Spaces:
Sleeping
Sleeping
File size: 4,463 Bytes
d090cc4 6370c8d d090cc4 3e11c25 470923a 3e11c25 470923a 3e11c25 d090cc4 470923a 3e11c25 470923a 3e11c25 470923a 3e11c25 d090cc4 126e9d0 d090cc4 126e9d0 3e11c25 470923a 3e11c25 470923a 3e11c25 d090cc4 126e9d0 3e11c25 470923a d090cc4 063c063 470923a d090cc4 3e11c25 470923a 916ed06 6370c8d 916ed06 3e11c25 126e9d0 470923a 3e11c25 470923a 3e11c25 d090cc4 3e11c25 d090cc4 3e11c25 d090cc4 3e11c25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
import torch
from sentence_transformers import SentenceTransformer
import numpy as np
from transformers import pipeline
import logging
import sys
# Set up logging and debug print to console
logging.basicConfig(level=logging.ERROR, stream=sys.stdout)
logger = logging.getLogger(__name__)
# Use a lighter summarization model
try:
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
print("Summarizer loaded successfully")
except Exception as e:
logger.error(f"Summarizer loading failed: {e}")
print(f"Summarizer error: {e}")
summarizer = None
# Sample dataset
papers = [
{
"title": "Machine Learning in Healthcare",
"abstract": "Explores the use of ML to improve diagnostic accuracy.",
"content": "This paper discusses how machine learning models can enhance healthcare diagnostics by analyzing patient data..."
},
{
"title": "Natural Language Processing Advances",
"abstract": "Reviews recent NLP techniques and their applications.",
"content": "Recent advances in NLP have enabled better text understanding, with models like BERT leading the way..."
},
{
"title": "Climate Change Mitigation",
"abstract": "Analyzes strategies for reducing carbon emissions.",
"content": "This study proposes new strategies for mitigating climate change through technology and policy..."
}
]
# Load sentence transformer
try:
model = SentenceTransformer('all-MiniLM-L6-v2')
print("SentenceTransformer loaded successfully")
except Exception as e:
logger.error(f"SentenceTransformer loading failed: {e}")
print(f"SentenceTransformer error: {e}")
model = None
# Pre-compute embeddings
paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
if paper_embeddings is not None:
print("Embeddings computed successfully")
else:
print("Embeddings computation failed")
def semantic_search(query):
if model is None or summarizer is None or paper_embeddings is None:
return {"title": "Error", "abstract": "Error", "summary": "Model or embeddings failed to load. Check console.", "link": ""}
try:
print(f"Processing query: {query}")
query_embedding = model.encode([query], convert_to_tensor=True)
# Use torch.matmul and keep as tensor
similarities = torch.matmul(paper_embeddings, query_embedding.T)
# Get the index using torch.argmax
top_idx = torch.argmax(similarities).item() # Convert to scalar
top_paper = papers[top_idx]
summary = summarizer(top_paper["content"], max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
print(f"Found paper: {top_paper['title']}")
return {
"title": top_paper["title"],
"abstract": top_paper["abstract"],
"summary": summary,
"link": "https://example.com/paper"
}
except Exception as e:
logger.error(f"Search failed: {e}")
print(f"Search error: {e}")
return {"title": "Error", "abstract": "Error", "summary": str(e), "link": ""}
def search_interface(query):
if not query:
return "Please enter a search query.", "Error", "Error", "Error"
result = semantic_search(query)
return (
"β
Search Complete!" if "Error" not in result["title"] else f"β Error: {result['summary']}",
result["title"],
result["abstract"],
result["summary"]
)
with gr.Blocks(title="Semantic Search Engine for Academic Papers") as demo:
gr.Markdown("# π Semantic Search Engine for Academic Papers\nSearch for academic papers by entering a research query.")
with gr.Row():
query_input = gr.Textbox(label="π Enter Research Query", placeholder="e.g., 'machine learning in healthcare'")
search_btn = gr.Button("π Search")
with gr.Row():
status = gr.Textbox(label="β
Status", interactive=False)
title_output = gr.Textbox(label="π Paper Title", interactive=False)
with gr.Row():
abstract_output = gr.Textbox(label="π Abstract", interactive=False, lines=3)
summary_output = gr.Textbox(label="π Summary", interactive=False, lines=3)
search_btn.click(
fn=search_interface,
inputs=query_input,
outputs=[status, title_output, abstract_output, summary_output]
)
demo.launch() |