Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,43 +7,46 @@ from transformers import AutoTokenizer, AutoModel, pipeline, AutoModelForCausalL
|
|
7 |
from huggingface_hub import login
|
8 |
import os
|
9 |
|
10 |
-
# β
Authenticate
|
11 |
login(token=os.getenv("HF_TOKEN"))
|
12 |
|
13 |
-
# β
Load FAISS
|
14 |
index = faiss.read_index("deberta_faiss.index")
|
15 |
text_data = pd.read_csv("deberta_text_data.csv")["Retrieved Text"].tolist()
|
16 |
|
17 |
-
# β
Load DeBERTa
|
18 |
deberta_model_name = "microsoft/deberta-v3-base"
|
19 |
deberta_tokenizer = AutoTokenizer.from_pretrained(deberta_model_name)
|
20 |
deberta_model = AutoModel.from_pretrained(deberta_model_name).to("cpu")
|
21 |
|
22 |
-
# β
Load
|
23 |
llm_model_name = "tiiuae/falcon-rw-1b"
|
24 |
llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
25 |
-
llm_model = AutoModelForCausalLM.from_pretrained(llm_model_name)
|
26 |
-
llm_pipeline = pipeline(
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
def generate_embeddings(queries):
|
30 |
tokens = deberta_tokenizer(queries, return_tensors="pt", padding=True, truncation=True).to("cpu")
|
31 |
with torch.no_grad():
|
32 |
outputs = deberta_model(**tokens).last_hidden_state.mean(dim=1).cpu().numpy().astype("float32")
|
33 |
return outputs
|
34 |
|
35 |
-
# β
RAG
|
36 |
def generate_response(user_query):
|
37 |
-
#
|
38 |
query_embedding = generate_embeddings([user_query])
|
39 |
faiss.normalize_L2(query_embedding)
|
40 |
-
|
41 |
-
# Step 2: FAISS retrieval
|
42 |
distances, indices = index.search(query_embedding, k=5)
|
43 |
retrieved_docs = [text_data[i] for i in indices[0]]
|
44 |
context = ", ".join(set(retrieved_docs))
|
45 |
|
46 |
-
#
|
47 |
prompt = f"""
|
48 |
Using the following product descriptions:
|
49 |
{context}
|
@@ -60,22 +63,23 @@ def generate_response(user_query):
|
|
60 |
|
61 |
**Your response:**
|
62 |
"""
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# β
Gradio UI
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
# β
Public sharing enabled
|
81 |
-
demo.launch()
|
|
|
7 |
from huggingface_hub import login
|
8 |
import os
|
9 |
|
10 |
+
# β
Authenticate Hugging Face (only needed if using gated/private models)
|
11 |
login(token=os.getenv("HF_TOKEN"))
|
12 |
|
13 |
+
# β
Load FAISS index and product text data
|
14 |
index = faiss.read_index("deberta_faiss.index")
|
15 |
text_data = pd.read_csv("deberta_text_data.csv")["Retrieved Text"].tolist()
|
16 |
|
17 |
+
# β
Load DeBERTa (for embedding queries)
|
18 |
deberta_model_name = "microsoft/deberta-v3-base"
|
19 |
deberta_tokenizer = AutoTokenizer.from_pretrained(deberta_model_name)
|
20 |
deberta_model = AutoModel.from_pretrained(deberta_model_name).to("cpu")
|
21 |
|
22 |
+
# β
Load lightweight LLM (Falcon 1B)
|
23 |
llm_model_name = "tiiuae/falcon-rw-1b"
|
24 |
llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
25 |
+
llm_model = AutoModelForCausalLM.from_pretrained(llm_model_name)
|
26 |
+
llm_pipeline = pipeline(
|
27 |
+
"text-generation",
|
28 |
+
model=llm_model,
|
29 |
+
tokenizer=llm_tokenizer,
|
30 |
+
device=-1 # CPU
|
31 |
+
)
|
32 |
+
|
33 |
+
# β
Function to generate query embeddings
|
34 |
def generate_embeddings(queries):
|
35 |
tokens = deberta_tokenizer(queries, return_tensors="pt", padding=True, truncation=True).to("cpu")
|
36 |
with torch.no_grad():
|
37 |
outputs = deberta_model(**tokens).last_hidden_state.mean(dim=1).cpu().numpy().astype("float32")
|
38 |
return outputs
|
39 |
|
40 |
+
# β
RAG + LLM Response Generator
|
41 |
def generate_response(user_query):
|
42 |
+
# Embed and retrieve
|
43 |
query_embedding = generate_embeddings([user_query])
|
44 |
faiss.normalize_L2(query_embedding)
|
|
|
|
|
45 |
distances, indices = index.search(query_embedding, k=5)
|
46 |
retrieved_docs = [text_data[i] for i in indices[0]]
|
47 |
context = ", ".join(set(retrieved_docs))
|
48 |
|
49 |
+
# Prompt LLM
|
50 |
prompt = f"""
|
51 |
Using the following product descriptions:
|
52 |
{context}
|
|
|
63 |
|
64 |
**Your response:**
|
65 |
"""
|
66 |
+
response = llm_pipeline(
|
67 |
+
prompt,
|
68 |
+
max_new_tokens=256,
|
69 |
+
do_sample=True,
|
70 |
+
truncation=True,
|
71 |
+
pad_token_id=llm_tokenizer.eos_token_id
|
72 |
+
)[0]["generated_text"]
|
73 |
+
return response
|
74 |
|
75 |
# β
Gradio UI
|
76 |
+
demo = gr.Interface(
|
77 |
+
fn=generate_response,
|
78 |
+
inputs=gr.Textbox(lines=2, placeholder="Ask a question about luxury home decor..."),
|
79 |
+
outputs="text",
|
80 |
+
title="Luxury Decor Assistant (RAG)",
|
81 |
+
description="Powered by DeBERTa + FAISS + Falcon-1B"
|
82 |
+
)
|
83 |
+
|
84 |
+
# β
Launch App (on Hugging Face, don't use share=True)
|
85 |
+
demo.launch()
|
|
|
|
|
|