Spaces:
Sleeping
Sleeping
import gradio as gr | |
import openai | |
import os | |
import nltk | |
import shutil | |
import numpy as np | |
import torch | |
from datasets import load_dataset | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import Chroma | |
from langchain.schema import Document | |
from sentence_transformers import SentenceTransformer | |
from sklearn.metrics import mean_squared_error, roc_auc_score | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
# β Load Pretrained Model | |
model_name = "bert-base-uncased" | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
embedding_model = HuggingFaceEmbeddings(model_name=model_name) | |
embedding_model.client.to(device) | |
# β Set OpenAI API Key (Replace with your own) | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# β Download NLTK Dependencies | |
nltk.download('punkt') | |
# β Load RunGalileo Datasets | |
ragbench = {} | |
for dataset in ['covidqa', 'cuad', 'delucionqa', 'emanual', 'expertqa', 'finqa', 'hagrid', 'hotpotqa', 'msmarco', 'pubmedqa', 'tatqa', 'techqa']: | |
ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset) | |
print("Datasets Loaded β ") | |
# β Function to Chunk Documents | |
def chunk_documents_semantic(documents, max_chunk_size=500): | |
chunks = [] | |
for doc in documents: | |
sentences = nltk.sent_tokenize(doc) | |
current_chunk = "" | |
for sentence in sentences: | |
if len(current_chunk) + len(sentence) <= max_chunk_size: | |
current_chunk += sentence + " " | |
else: | |
chunks.append(current_chunk.strip()) | |
current_chunk = sentence + " " | |
if current_chunk: | |
chunks.append(current_chunk.strip()) | |
return chunks | |
# β Chunk the Entire Dataset | |
chunked_ragbench = {} | |
for dataset_name in ragbench.keys(): | |
for split in ragbench[dataset_name].keys(): | |
original_documents_full = ragbench[dataset_name][split]['documents'] | |
chunked_documents_full = chunk_documents_semantic(original_documents_full) | |
chunked_ragbench[split] = chunked_documents_full | |
print("Chunking Completed β ") | |
# β Setup ChromaDB | |
persist_directory = "chroma_db_directory" | |
if os.path.exists(persist_directory): | |
shutil.rmtree(persist_directory) | |
documents = [Document(page_content=chunk) for chunk in chunked_documents_full] | |
vectordb = Chroma.from_documents( | |
documents=documents, | |
embedding=embedding_model, | |
persist_directory=persist_directory | |
) | |
vectordb.persist() | |
# β Retrieve Documents | |
def retrieve_documents(question, k=5): | |
docs = vectordb.similarity_search(question, k=k) | |
if not docs: | |
return ["β οΈ No relevant documents found. Try a different query."] | |
return [doc.page_content for doc in docs] | |
# β Generate AI Response | |
def generate_response(question, context): | |
if not context or "No relevant documents found." in context: | |
return "No relevant context available. Try a different query." | |
full_prompt = f"Context: {context}\n\nQuestion: {question}" | |
try: | |
client = openai.OpenAI() | |
response = client.chat.completions.create( | |
model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are an AI assistant that answers user queries based on the given context."}, | |
{"role": "user", "content": full_prompt} | |
], | |
max_tokens=300, | |
temperature=0.7 | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
return f"Error generating response: {str(e)}" | |
# β Compute Context Relevance, Utilization, Completeness, Adherence | |
def compute_cosine_similarity(text1, text2): | |
vectorizer = TfidfVectorizer() | |
vectors = vectorizer.fit_transform([text1, text2]) | |
return cosine_similarity(vectors[0], vectors[1])[0][0] | |
def context_relevance(question, relevant_documents): | |
combined_docs = " ".join(relevant_documents) | |
return compute_cosine_similarity(question, combined_docs) | |
def context_utilization(response, relevant_documents): | |
combined_docs = " ".join(relevant_documents) | |
return compute_cosine_similarity(response, combined_docs) | |
def completeness(response, ground_truth_answer): | |
return compute_cosine_similarity(response, ground_truth_answer) | |
def adherence(response, relevant_documents): | |
combined_docs = " ".join(relevant_documents) | |
response_tokens = set(response.split()) | |
relevant_tokens = set(combined_docs.split()) | |
supported_tokens = response_tokens.intersection(relevant_tokens) | |
return len(supported_tokens) / len(response_tokens) | |
def compute_rmse(predicted_values, ground_truth_values): | |
return np.sqrt(mean_squared_error(ground_truth_values, predicted_values)) | |
# β Full RAG Pipeline | |
def rag_pipeline(question): | |
retrieved_docs = retrieve_documents(question, k=5) | |
context = " ".join(retrieved_docs) | |
response = generate_response(question, context) | |
# Compute Evaluation Metrics | |
ground_truth_answer = "Sample ground truth answer from dataset" | |
predicted_metrics = { | |
"context_relevance": context_relevance(question, retrieved_docs), | |
"context_utilization": context_utilization(response, retrieved_docs), | |
"completeness": completeness(response, ground_truth_answer), | |
"adherence": adherence(response, retrieved_docs) | |
} | |
return response, "\n\n".join(retrieved_docs), predicted_metrics | |
# β Gradio UI Interface | |
iface = gr.Interface( | |
fn=rag_pipeline, | |
inputs=gr.Textbox(label="Enter your question"), | |
outputs=[ | |
gr.Textbox(label="Generated Response"), | |
gr.Textbox(label="Retrieved Documents"), | |
gr.JSON(label="Evaluation Metrics") | |
], | |
title="RAG-Based QA System for RunGalileo", | |
description="Enter a question and retrieve relevant documents with AI-generated response & evaluation metrics." | |
) | |
# β Launch the Gradio App | |
if __name__ == "__main__": | |
iface.launch() | |