Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,14 +16,52 @@ openai.api_key = os.getenv("sk-proj-MKLxeaKCwQdMz3SXhUTz_r_mE0zN6wEo032M7ZQV4O2E
|
|
16 |
persist_directory = "./docs/chroma/"
|
17 |
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# β
Function to Retrieve Top-K Relevant Documents
|
20 |
def retrieve_documents(question, k=5):
|
21 |
"""Retrieve top K relevant documents from ChromaDB"""
|
22 |
docs = vectordb.similarity_search(question, k=k)
|
|
|
23 |
if not docs:
|
24 |
-
return ["No relevant documents found."]
|
|
|
25 |
return [doc.page_content for doc in docs]
|
26 |
|
|
|
27 |
# β
Function to Generate AI Response
|
28 |
def generate_response(question, context):
|
29 |
"""Generate AI response using OpenAI GPT-4"""
|
@@ -61,7 +99,7 @@ iface = gr.Interface(
|
|
61 |
gr.Textbox(label="Generated Response"),
|
62 |
gr.Textbox(label="Retrieved Documents")
|
63 |
],
|
64 |
-
title="RAG-Based Question Answering System
|
65 |
description="Enter a question and retrieve relevant documents with AI-generated response."
|
66 |
)
|
67 |
|
|
|
16 |
persist_directory = "./docs/chroma/"
|
17 |
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)
|
18 |
|
19 |
+
from langchain_community.vectorstores import Chroma
|
20 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
21 |
+
from langchain.schema import Document
|
22 |
+
|
23 |
+
# Load the embedding model
|
24 |
+
model_name = "intfloat/e5-small"
|
25 |
+
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
26 |
+
|
27 |
+
# Define the ChromaDB persist directory
|
28 |
+
persist_directory = "./docs/chroma/"
|
29 |
+
|
30 |
+
# β
Load ChromaDB (or create if empty)
|
31 |
+
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding_model)
|
32 |
+
|
33 |
+
# β
Check if documents exist
|
34 |
+
if vectordb._collection.count() == 0:
|
35 |
+
print("β οΈ No documents found in ChromaDB. Re-indexing dataset...")
|
36 |
+
|
37 |
+
# Sample dataset (Replace with real RunGalileo dataset)
|
38 |
+
documents = [
|
39 |
+
Document(page_content="HVAC systems help regulate indoor temperature."),
|
40 |
+
Document(page_content="Chiller plants are used in large buildings for cooling."),
|
41 |
+
Document(page_content="BACnet is a common protocol for building automation."),
|
42 |
+
Document(page_content="Heat pumps are essential in modern energy-efficient HVAC designs."),
|
43 |
+
Document(page_content="Smart thermostats improve energy efficiency through AI-based control.")
|
44 |
+
]
|
45 |
+
|
46 |
+
# β
Insert documents into ChromaDB
|
47 |
+
vectordb.add_documents(documents)
|
48 |
+
|
49 |
+
print("β
Documents successfully indexed into ChromaDB.")
|
50 |
+
else:
|
51 |
+
print(f"β
ChromaDB contains {vectordb._collection.count()} documents.")
|
52 |
+
|
53 |
+
|
54 |
# β
Function to Retrieve Top-K Relevant Documents
|
55 |
def retrieve_documents(question, k=5):
|
56 |
"""Retrieve top K relevant documents from ChromaDB"""
|
57 |
docs = vectordb.similarity_search(question, k=k)
|
58 |
+
|
59 |
if not docs:
|
60 |
+
return ["β οΈ No relevant documents found. Try a different query."]
|
61 |
+
|
62 |
return [doc.page_content for doc in docs]
|
63 |
|
64 |
+
|
65 |
# β
Function to Generate AI Response
|
66 |
def generate_response(question, context):
|
67 |
"""Generate AI response using OpenAI GPT-4"""
|
|
|
99 |
gr.Textbox(label="Generated Response"),
|
100 |
gr.Textbox(label="Retrieved Documents")
|
101 |
],
|
102 |
+
title="RAG-Based Question Answering System ",
|
103 |
description="Enter a question and retrieve relevant documents with AI-generated response."
|
104 |
)
|
105 |
|