Abi2124 commited on
Commit
126e9d0
Β·
verified Β·
1 Parent(s): 3e11c25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -13
app.py CHANGED
@@ -8,14 +8,14 @@ import logging
8
  logging.basicConfig(level=logging.ERROR)
9
  logger = logging.getLogger(__name__)
10
 
11
- # Load a lightweight model for summarization (simulating LLM processing)
12
  try:
13
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
14
  except Exception as e:
15
  logger.error(f"Summarizer loading failed: {e}")
16
  summarizer = None
17
 
18
- # Sample dataset of academic papers (title, abstract, content)
19
  papers = [
20
  {
21
  "title": "Machine Learning in Healthcare",
@@ -34,14 +34,14 @@ papers = [
34
  }
35
  ]
36
 
37
- # Load pre-trained sentence transformer for embeddings
38
  try:
39
  model = SentenceTransformer('all-MiniLM-L6-v2')
40
  except Exception as e:
41
  logger.error(f"SentenceTransformer loading failed: {e}")
42
  model = None
43
 
44
- # Pre-compute embeddings for the papers' content
45
  paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
46
 
47
  def semantic_search(query):
@@ -49,11 +49,11 @@ def semantic_search(query):
49
  return {"title": "Error", "abstract": "Error", "summary": "Model loading failed. Check logs.", "link": ""}
50
 
51
  try:
52
- query_embedding = model.encode(query, convert_to_tensor=True)
53
  similarities = np.dot(paper_embeddings, query_embedding.T).cpu().numpy()
54
  top_idx = np.argmax(similarities)
55
  top_paper = papers[top_idx]
56
- summary = summarizer(top_paper["content"], max_length=50, min_length=25, do_sample=False)[0]["summary_text"]
57
  return {
58
  "title": top_paper["title"],
59
  "abstract": top_paper["abstract"],
@@ -67,7 +67,6 @@ def semantic_search(query):
67
  def search_interface(query):
68
  if not query:
69
  return "Please enter a search query.", "Error", "Error", "Error"
70
-
71
  result = semantic_search(query)
72
  return (
73
  "βœ… Search Complete!" if "Error" not in result["title"] else f"❌ Error: {result['summary']}",
@@ -76,22 +75,17 @@ def search_interface(query):
76
  result["summary"]
77
  )
78
 
79
- # Gradio UI
80
  with gr.Blocks(title="Semantic Search Engine for Academic Papers") as demo:
81
  gr.Markdown("# πŸ” Semantic Search Engine for Academic Papers\nSearch for academic papers by entering a research query.")
82
-
83
  with gr.Row():
84
  query_input = gr.Textbox(label="πŸ“ Enter Research Query", placeholder="e.g., 'machine learning in healthcare'")
85
  search_btn = gr.Button("πŸ”Ž Search")
86
-
87
  with gr.Row():
88
  status = gr.Textbox(label="βœ… Status", interactive=False)
89
  title_output = gr.Textbox(label="πŸ“‘ Paper Title", interactive=False)
90
-
91
  with gr.Row():
92
  abstract_output = gr.Textbox(label="πŸ“ Abstract", interactive=False, lines=3)
93
  summary_output = gr.Textbox(label="πŸ“‹ Summary", interactive=False, lines=3)
94
-
95
  search_btn.click(
96
  fn=search_interface,
97
  inputs=query_input,
 
8
  logging.basicConfig(level=logging.ERROR)
9
  logger = logging.getLogger(__name__)
10
 
11
+ # Use a lighter summarization model to reduce resource use
12
  try:
13
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6") # Lighter than bart-large-cnn
14
  except Exception as e:
15
  logger.error(f"Summarizer loading failed: {e}")
16
  summarizer = None
17
 
18
+ # Sample dataset
19
  papers = [
20
  {
21
  "title": "Machine Learning in Healthcare",
 
34
  }
35
  ]
36
 
37
+ # Load sentence transformer
38
  try:
39
  model = SentenceTransformer('all-MiniLM-L6-v2')
40
  except Exception as e:
41
  logger.error(f"SentenceTransformer loading failed: {e}")
42
  model = None
43
 
44
+ # Pre-compute embeddings
45
  paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
46
 
47
  def semantic_search(query):
 
49
  return {"title": "Error", "abstract": "Error", "summary": "Model loading failed. Check logs.", "link": ""}
50
 
51
  try:
52
+ query_embedding = model.encode([query], convert_to_tensor=True) # Ensure list input
53
  similarities = np.dot(paper_embeddings, query_embedding.T).cpu().numpy()
54
  top_idx = np.argmax(similarities)
55
  top_paper = papers[top_idx]
56
+ summary = summarizer(top_paper["content"], max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
57
  return {
58
  "title": top_paper["title"],
59
  "abstract": top_paper["abstract"],
 
67
  def search_interface(query):
68
  if not query:
69
  return "Please enter a search query.", "Error", "Error", "Error"
 
70
  result = semantic_search(query)
71
  return (
72
  "βœ… Search Complete!" if "Error" not in result["title"] else f"❌ Error: {result['summary']}",
 
75
  result["summary"]
76
  )
77
 
 
78
  with gr.Blocks(title="Semantic Search Engine for Academic Papers") as demo:
79
  gr.Markdown("# πŸ” Semantic Search Engine for Academic Papers\nSearch for academic papers by entering a research query.")
 
80
  with gr.Row():
81
  query_input = gr.Textbox(label="πŸ“ Enter Research Query", placeholder="e.g., 'machine learning in healthcare'")
82
  search_btn = gr.Button("πŸ”Ž Search")
 
83
  with gr.Row():
84
  status = gr.Textbox(label="βœ… Status", interactive=False)
85
  title_output = gr.Textbox(label="πŸ“‘ Paper Title", interactive=False)
 
86
  with gr.Row():
87
  abstract_output = gr.Textbox(label="πŸ“ Abstract", interactive=False, lines=3)
88
  summary_output = gr.Textbox(label="πŸ“‹ Summary", interactive=False, lines=3)
 
89
  search_btn.click(
90
  fn=search_interface,
91
  inputs=query_input,