Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,14 +8,14 @@ import logging
|
|
8 |
logging.basicConfig(level=logging.ERROR)
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
-
#
|
12 |
try:
|
13 |
-
summarizer = pipeline("summarization", model="
|
14 |
except Exception as e:
|
15 |
logger.error(f"Summarizer loading failed: {e}")
|
16 |
summarizer = None
|
17 |
|
18 |
-
# Sample dataset
|
19 |
papers = [
|
20 |
{
|
21 |
"title": "Machine Learning in Healthcare",
|
@@ -34,14 +34,14 @@ papers = [
|
|
34 |
}
|
35 |
]
|
36 |
|
37 |
-
# Load
|
38 |
try:
|
39 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
40 |
except Exception as e:
|
41 |
logger.error(f"SentenceTransformer loading failed: {e}")
|
42 |
model = None
|
43 |
|
44 |
-
# Pre-compute embeddings
|
45 |
paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
|
46 |
|
47 |
def semantic_search(query):
|
@@ -49,11 +49,11 @@ def semantic_search(query):
|
|
49 |
return {"title": "Error", "abstract": "Error", "summary": "Model loading failed. Check logs.", "link": ""}
|
50 |
|
51 |
try:
|
52 |
-
query_embedding = model.encode(query, convert_to_tensor=True)
|
53 |
similarities = np.dot(paper_embeddings, query_embedding.T).cpu().numpy()
|
54 |
top_idx = np.argmax(similarities)
|
55 |
top_paper = papers[top_idx]
|
56 |
-
summary = summarizer(top_paper["content"], max_length=
|
57 |
return {
|
58 |
"title": top_paper["title"],
|
59 |
"abstract": top_paper["abstract"],
|
@@ -67,7 +67,6 @@ def semantic_search(query):
|
|
67 |
def search_interface(query):
|
68 |
if not query:
|
69 |
return "Please enter a search query.", "Error", "Error", "Error"
|
70 |
-
|
71 |
result = semantic_search(query)
|
72 |
return (
|
73 |
"β
Search Complete!" if "Error" not in result["title"] else f"β Error: {result['summary']}",
|
@@ -76,22 +75,17 @@ def search_interface(query):
|
|
76 |
result["summary"]
|
77 |
)
|
78 |
|
79 |
-
# Gradio UI
|
80 |
with gr.Blocks(title="Semantic Search Engine for Academic Papers") as demo:
|
81 |
gr.Markdown("# π Semantic Search Engine for Academic Papers\nSearch for academic papers by entering a research query.")
|
82 |
-
|
83 |
with gr.Row():
|
84 |
query_input = gr.Textbox(label="π Enter Research Query", placeholder="e.g., 'machine learning in healthcare'")
|
85 |
search_btn = gr.Button("π Search")
|
86 |
-
|
87 |
with gr.Row():
|
88 |
status = gr.Textbox(label="β
Status", interactive=False)
|
89 |
title_output = gr.Textbox(label="π Paper Title", interactive=False)
|
90 |
-
|
91 |
with gr.Row():
|
92 |
abstract_output = gr.Textbox(label="π Abstract", interactive=False, lines=3)
|
93 |
summary_output = gr.Textbox(label="π Summary", interactive=False, lines=3)
|
94 |
-
|
95 |
search_btn.click(
|
96 |
fn=search_interface,
|
97 |
inputs=query_input,
|
|
|
8 |
logging.basicConfig(level=logging.ERROR)
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
+
# Use a lighter summarization model to reduce resource use
|
12 |
try:
|
13 |
+
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") # Lighter than bart-large-cnn
|
14 |
except Exception as e:
|
15 |
logger.error(f"Summarizer loading failed: {e}")
|
16 |
summarizer = None
|
17 |
|
18 |
+
# Sample dataset
|
19 |
papers = [
|
20 |
{
|
21 |
"title": "Machine Learning in Healthcare",
|
|
|
34 |
}
|
35 |
]
|
36 |
|
37 |
+
# Load sentence transformer
|
38 |
try:
|
39 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
40 |
except Exception as e:
|
41 |
logger.error(f"SentenceTransformer loading failed: {e}")
|
42 |
model = None
|
43 |
|
44 |
+
# Pre-compute embeddings
|
45 |
paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
|
46 |
|
47 |
def semantic_search(query):
|
|
|
49 |
return {"title": "Error", "abstract": "Error", "summary": "Model loading failed. Check logs.", "link": ""}
|
50 |
|
51 |
try:
|
52 |
+
query_embedding = model.encode([query], convert_to_tensor=True) # Ensure list input
|
53 |
similarities = np.dot(paper_embeddings, query_embedding.T).cpu().numpy()
|
54 |
top_idx = np.argmax(similarities)
|
55 |
top_paper = papers[top_idx]
|
56 |
+
summary = summarizer(top_paper["content"], max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
|
57 |
return {
|
58 |
"title": top_paper["title"],
|
59 |
"abstract": top_paper["abstract"],
|
|
|
67 |
def search_interface(query):
|
68 |
if not query:
|
69 |
return "Please enter a search query.", "Error", "Error", "Error"
|
|
|
70 |
result = semantic_search(query)
|
71 |
return (
|
72 |
"β
Search Complete!" if "Error" not in result["title"] else f"β Error: {result['summary']}",
|
|
|
75 |
result["summary"]
|
76 |
)
|
77 |
|
|
|
78 |
with gr.Blocks(title="Semantic Search Engine for Academic Papers") as demo:
|
79 |
gr.Markdown("# π Semantic Search Engine for Academic Papers\nSearch for academic papers by entering a research query.")
|
|
|
80 |
with gr.Row():
|
81 |
query_input = gr.Textbox(label="π Enter Research Query", placeholder="e.g., 'machine learning in healthcare'")
|
82 |
search_btn = gr.Button("π Search")
|
|
|
83 |
with gr.Row():
|
84 |
status = gr.Textbox(label="β
Status", interactive=False)
|
85 |
title_output = gr.Textbox(label="π Paper Title", interactive=False)
|
|
|
86 |
with gr.Row():
|
87 |
abstract_output = gr.Textbox(label="π Abstract", interactive=False, lines=3)
|
88 |
summary_output = gr.Textbox(label="π Summary", interactive=False, lines=3)
|
|
|
89 |
search_btn.click(
|
90 |
fn=search_interface,
|
91 |
inputs=query_input,
|