Academic_papers / app.py
Abi2124's picture
Update app.py
916ed06 verified
import gradio as gr
import torch
from sentence_transformers import SentenceTransformer
import numpy as np
from transformers import pipeline
import logging
import sys
# Set up logging and debug print to console
logging.basicConfig(level=logging.ERROR, stream=sys.stdout)
logger = logging.getLogger(__name__)
# Use a lighter summarization model
try:
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
print("Summarizer loaded successfully")
except Exception as e:
logger.error(f"Summarizer loading failed: {e}")
print(f"Summarizer error: {e}")
summarizer = None
# Sample dataset
papers = [
{
"title": "Machine Learning in Healthcare",
"abstract": "Explores the use of ML to improve diagnostic accuracy.",
"content": "This paper discusses how machine learning models can enhance healthcare diagnostics by analyzing patient data..."
},
{
"title": "Natural Language Processing Advances",
"abstract": "Reviews recent NLP techniques and their applications.",
"content": "Recent advances in NLP have enabled better text understanding, with models like BERT leading the way..."
},
{
"title": "Climate Change Mitigation",
"abstract": "Analyzes strategies for reducing carbon emissions.",
"content": "This study proposes new strategies for mitigating climate change through technology and policy..."
}
]
# Load sentence transformer
try:
model = SentenceTransformer('all-MiniLM-L6-v2')
print("SentenceTransformer loaded successfully")
except Exception as e:
logger.error(f"SentenceTransformer loading failed: {e}")
print(f"SentenceTransformer error: {e}")
model = None
# Pre-compute embeddings
paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
if paper_embeddings is not None:
print("Embeddings computed successfully")
else:
print("Embeddings computation failed")
def semantic_search(query):
if model is None or summarizer is None or paper_embeddings is None:
return {"title": "Error", "abstract": "Error", "summary": "Model or embeddings failed to load. Check console.", "link": ""}
try:
print(f"Processing query: {query}")
query_embedding = model.encode([query], convert_to_tensor=True)
# Use torch.matmul and keep as tensor
similarities = torch.matmul(paper_embeddings, query_embedding.T)
# Get the index using torch.argmax
top_idx = torch.argmax(similarities).item() # Convert to scalar
top_paper = papers[top_idx]
summary = summarizer(top_paper["content"], max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
print(f"Found paper: {top_paper['title']}")
return {
"title": top_paper["title"],
"abstract": top_paper["abstract"],
"summary": summary,
"link": "https://example.com/paper"
}
except Exception as e:
logger.error(f"Search failed: {e}")
print(f"Search error: {e}")
return {"title": "Error", "abstract": "Error", "summary": str(e), "link": ""}
def search_interface(query):
if not query:
return "Please enter a search query.", "Error", "Error", "Error"
result = semantic_search(query)
return (
"βœ… Search Complete!" if "Error" not in result["title"] else f"❌ Error: {result['summary']}",
result["title"],
result["abstract"],
result["summary"]
)
with gr.Blocks(title="Semantic Search Engine for Academic Papers") as demo:
gr.Markdown("# πŸ” Semantic Search Engine for Academic Papers\nSearch for academic papers by entering a research query.")
with gr.Row():
query_input = gr.Textbox(label="πŸ“ Enter Research Query", placeholder="e.g., 'machine learning in healthcare'")
search_btn = gr.Button("πŸ”Ž Search")
with gr.Row():
status = gr.Textbox(label="βœ… Status", interactive=False)
title_output = gr.Textbox(label="πŸ“‘ Paper Title", interactive=False)
with gr.Row():
abstract_output = gr.Textbox(label="πŸ“ Abstract", interactive=False, lines=3)
summary_output = gr.Textbox(label="πŸ“‹ Summary", interactive=False, lines=3)
search_btn.click(
fn=search_interface,
inputs=query_input,
outputs=[status, title_output, abstract_output, summary_output]
)
demo.launch()