Spaces:
Running
Running
# Import necessary libraries for chatbot functionality and document processing | |
import gradio as gr | |
from langchain_groq import ChatGroq | |
from langchain_community.document_loaders import WebBaseLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain_core.prompts import PromptTemplate | |
from langchain.chains.question_answering import load_qa_chain | |
from dotenv import load_dotenv | |
import os | |
from langdetect import detect | |
# Load environment variables | |
load_dotenv() | |
GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
# Initialize the EyeDiseaseChatbot class | |
class EyeDiseaseChatbot: | |
def __init__(self, groq_api_key): | |
self.llm = ChatGroq(api_key=groq_api_key, model_name="meta-llama/llama-4-scout-17b-16e-instruct") | |
self.embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=150, chunk_overlap=30) | |
self.persist_directory = "chroma_db" | |
self.vector_db = self.process_web_content() | |
def process_web_content(self): | |
medical_sites = [ | |
"https://www.webmd.com/", | |
"https://www.mayoclinic.org/diseases-conditions/", | |
"https://medlineplus.gov/", | |
"https://www.healthline.com/health", | |
"https://www.cdc.gov/diseasesconditions/", | |
] | |
all_documents = [] | |
for site in medical_sites: | |
try: | |
loader = WebBaseLoader(site) | |
documents = loader.load() | |
all_documents.extend(documents) | |
except Exception as e: | |
print(f"⚠️ Failed to load data from {site}: {e}") | |
if not all_documents: | |
raise ValueError("❌ No data could be loaded from the websites.") | |
chunks = self.text_splitter.split_documents(all_documents) | |
return Chroma.from_documents(chunks, self.embedding_model, persist_directory=self.persist_directory) | |
def generate_answer(self, question): | |
question_language = detect(question) | |
similar_docs = self.vector_db.similarity_search(question, k=2) | |
context = "\n".join([doc.page_content for doc in similar_docs]) if similar_docs else None | |
qna_template = """You are an ophthalmologist specializing in eye health. | |
You answer questions **in the same language as the input question** ({language}), based on your medical knowledge and the available context. | |
- If no answer is available in the context, respond in the same language as the question: | |
**English:** "No answer is currently available." | |
**Arabic:** "لا يوجد إجابة متاحة حاليًا." | |
- Analyze the question medically before answering, relying on reliable scientific information. | |
- Keep your answers precise and to the point, avoiding unnecessary details. | |
### Medical Context: | |
{context} | |
### Question: | |
{question} | |
### Answer (in the same language {language}):""" | |
qna_prompt = PromptTemplate( | |
template=qna_template, | |
input_variables=['context', 'question', 'language'] | |
) | |
stuff_chain = load_qa_chain(self.llm, chain_type="stuff", prompt=qna_prompt) | |
if not context: | |
output = "No answer is currently available." if question_language == "en" else "لا يوجد إجابة متاحة حاليًا." | |
else: | |
answer_generator = stuff_chain.stream({ | |
"input_documents": similar_docs, | |
"question": question, | |
"language": question_language | |
}) | |
output = "" | |
for chunk in answer_generator: | |
output += chunk["output_text"] | |
# if question_language == "ar": | |
# output = f"""<div style='text-align: right; direction: rtl;'>{output}</div>""" | |
# else: | |
# output = f"""<div style='text-align: left; direction: ltr;'>{output}</div>""" | |
return output | |
chatbot_instance = EyeDiseaseChatbot(groq_api_key=GROQ_API_KEY) | |
def chat_interface(question): | |
return chatbot_instance.generate_answer(question) | |
demo = gr.Interface(fn=chat_interface, inputs="text", outputs="text", title="Medical Chatbot") | |
demo.launch() |