dine24 commited on
Commit
89adc1e
Β·
verified Β·
1 Parent(s): e605997

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -30
app.py CHANGED
@@ -7,43 +7,46 @@ from transformers import AutoTokenizer, AutoModel, pipeline, AutoModelForCausalL
7
  from huggingface_hub import login
8
  import os
9
 
10
- # βœ… Authenticate securely using Hugging Face token (set in Secrets)
11
  login(token=os.getenv("HF_TOKEN"))
12
 
13
- # βœ… Load FAISS Index and text data
14
  index = faiss.read_index("deberta_faiss.index")
15
  text_data = pd.read_csv("deberta_text_data.csv")["Retrieved Text"].tolist()
16
 
17
- # βœ… Load DeBERTa Model (for embeddings)
18
  deberta_model_name = "microsoft/deberta-v3-base"
19
  deberta_tokenizer = AutoTokenizer.from_pretrained(deberta_model_name)
20
  deberta_model = AutoModel.from_pretrained(deberta_model_name).to("cpu")
21
 
22
- # βœ… Load Falcon RW 1B Model (lightweight alternative to Mistral for Spaces)
23
  llm_model_name = "tiiuae/falcon-rw-1b"
24
  llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
25
- llm_model = AutoModelForCausalLM.from_pretrained(llm_model_name).to("cpu")
26
- llm_pipeline = pipeline("text-generation", model=llm_model, tokenizer=llm_tokenizer, device=-1)
27
-
28
- # βœ… Embedding generation function
 
 
 
 
 
29
  def generate_embeddings(queries):
30
  tokens = deberta_tokenizer(queries, return_tensors="pt", padding=True, truncation=True).to("cpu")
31
  with torch.no_grad():
32
  outputs = deberta_model(**tokens).last_hidden_state.mean(dim=1).cpu().numpy().astype("float32")
33
  return outputs
34
 
35
- # βœ… RAG Pipeline
36
  def generate_response(user_query):
37
- # Step 1: Embed query
38
  query_embedding = generate_embeddings([user_query])
39
  faiss.normalize_L2(query_embedding)
40
-
41
- # Step 2: FAISS retrieval
42
  distances, indices = index.search(query_embedding, k=5)
43
  retrieved_docs = [text_data[i] for i in indices[0]]
44
  context = ", ".join(set(retrieved_docs))
45
 
46
- # Step 3: Prompt construction
47
  prompt = f"""
48
  Using the following product descriptions:
49
  {context}
@@ -60,22 +63,23 @@ def generate_response(user_query):
60
 
61
  **Your response:**
62
  """
63
-
64
- # Step 4: Generate using Falcon RW 1B
65
- result = llm_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7)[0]["generated_text"]
66
- return result.split("**Your response:**")[-1].strip()
 
 
 
 
67
 
68
  # βœ… Gradio UI
69
- with gr.Blocks() as demo:
70
- gr.Markdown("### πŸ›‹οΈ Luxury Decor Assistant (RAG)\nAsk about luxury home styling ideas based on curated descriptions.")
71
- with gr.Row():
72
- query = gr.Textbox(label="Your Question", lines=3, placeholder="e.g., Suggest ideas for a luxury bedroom makeover")
73
- with gr.Row():
74
- output = gr.Textbox(label="Assistant Response", lines=10)
75
- with gr.Row():
76
- submit = gr.Button("Generate Answer")
77
-
78
- submit.click(fn=generate_response, inputs=query, outputs=output)
79
-
80
- # βœ… Public sharing enabled
81
- demo.launch()
 
7
  from huggingface_hub import login
8
  import os
9
 
10
+ # βœ… Authenticate Hugging Face (only needed if using gated/private models)
11
  login(token=os.getenv("HF_TOKEN"))
12
 
13
+ # βœ… Load FAISS index and product text data
14
  index = faiss.read_index("deberta_faiss.index")
15
  text_data = pd.read_csv("deberta_text_data.csv")["Retrieved Text"].tolist()
16
 
17
+ # βœ… Load DeBERTa (for embedding queries)
18
  deberta_model_name = "microsoft/deberta-v3-base"
19
  deberta_tokenizer = AutoTokenizer.from_pretrained(deberta_model_name)
20
  deberta_model = AutoModel.from_pretrained(deberta_model_name).to("cpu")
21
 
22
+ # βœ… Load lightweight LLM (Falcon 1B)
23
  llm_model_name = "tiiuae/falcon-rw-1b"
24
  llm_tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
25
+ llm_model = AutoModelForCausalLM.from_pretrained(llm_model_name)
26
+ llm_pipeline = pipeline(
27
+ "text-generation",
28
+ model=llm_model,
29
+ tokenizer=llm_tokenizer,
30
+ device=-1 # CPU
31
+ )
32
+
33
+ # βœ… Function to generate query embeddings
34
  def generate_embeddings(queries):
35
  tokens = deberta_tokenizer(queries, return_tensors="pt", padding=True, truncation=True).to("cpu")
36
  with torch.no_grad():
37
  outputs = deberta_model(**tokens).last_hidden_state.mean(dim=1).cpu().numpy().astype("float32")
38
  return outputs
39
 
40
+ # βœ… RAG + LLM Response Generator
41
  def generate_response(user_query):
42
+ # Embed and retrieve
43
  query_embedding = generate_embeddings([user_query])
44
  faiss.normalize_L2(query_embedding)
 
 
45
  distances, indices = index.search(query_embedding, k=5)
46
  retrieved_docs = [text_data[i] for i in indices[0]]
47
  context = ", ".join(set(retrieved_docs))
48
 
49
+ # Prompt LLM
50
  prompt = f"""
51
  Using the following product descriptions:
52
  {context}
 
63
 
64
  **Your response:**
65
  """
66
+ response = llm_pipeline(
67
+ prompt,
68
+ max_new_tokens=256,
69
+ do_sample=True,
70
+ truncation=True,
71
+ pad_token_id=llm_tokenizer.eos_token_id
72
+ )[0]["generated_text"]
73
+ return response
74
 
75
  # βœ… Gradio UI
76
+ demo = gr.Interface(
77
+ fn=generate_response,
78
+ inputs=gr.Textbox(lines=2, placeholder="Ask a question about luxury home decor..."),
79
+ outputs="text",
80
+ title="Luxury Decor Assistant (RAG)",
81
+ description="Powered by DeBERTa + FAISS + Falcon-1B"
82
+ )
83
+
84
+ # βœ… Launch App (on Hugging Face, don't use share=True)
85
+ demo.launch()