Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,16 +3,19 @@ from sentence_transformers import SentenceTransformer
|
|
3 |
import numpy as np
|
4 |
from transformers import pipeline
|
5 |
import logging
|
|
|
6 |
|
7 |
-
# Set up logging
|
8 |
-
logging.basicConfig(level=logging.ERROR)
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
-
# Use a lighter summarization model
|
12 |
try:
|
13 |
-
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
|
|
|
14 |
except Exception as e:
|
15 |
logger.error(f"Summarizer loading failed: {e}")
|
|
|
16 |
summarizer = None
|
17 |
|
18 |
# Sample dataset
|
@@ -37,24 +40,31 @@ papers = [
|
|
37 |
# Load sentence transformer
|
38 |
try:
|
39 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
40 |
except Exception as e:
|
41 |
logger.error(f"SentenceTransformer loading failed: {e}")
|
|
|
42 |
model = None
|
43 |
|
44 |
# Pre-compute embeddings
|
45 |
paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def semantic_search(query):
|
48 |
-
# Check if any component is missing
|
49 |
if model is None or summarizer is None or paper_embeddings is None:
|
50 |
-
return {"title": "Error", "abstract": "Error", "summary": "Model or embeddings failed to load. Check
|
51 |
|
52 |
try:
|
53 |
-
|
|
|
54 |
similarities = np.dot(paper_embeddings, query_embedding.T).cpu().numpy()
|
55 |
top_idx = np.argmax(similarities)
|
56 |
top_paper = papers[top_idx]
|
57 |
summary = summarizer(top_paper["content"], max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
|
|
|
58 |
return {
|
59 |
"title": top_paper["title"],
|
60 |
"abstract": top_paper["abstract"],
|
@@ -63,6 +73,7 @@ def semantic_search(query):
|
|
63 |
}
|
64 |
except Exception as e:
|
65 |
logger.error(f"Search failed: {e}")
|
|
|
66 |
return {"title": "Error", "abstract": "Error", "summary": str(e), "link": ""}
|
67 |
|
68 |
def search_interface(query):
|
|
|
3 |
import numpy as np
|
4 |
from transformers import pipeline
|
5 |
import logging
|
6 |
+
import sys
|
7 |
|
8 |
+
# Set up logging and debug print to console
|
9 |
+
logging.basicConfig(level=logging.ERROR, stream=sys.stdout)
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
12 |
+
# Use a lighter summarization model
|
13 |
try:
|
14 |
+
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-6-6")
|
15 |
+
print("Summarizer loaded successfully")
|
16 |
except Exception as e:
|
17 |
logger.error(f"Summarizer loading failed: {e}")
|
18 |
+
print(f"Summarizer error: {e}")
|
19 |
summarizer = None
|
20 |
|
21 |
# Sample dataset
|
|
|
40 |
# Load sentence transformer
|
41 |
try:
|
42 |
model = SentenceTransformer('all-MiniLM-L6-v2')
|
43 |
+
print("SentenceTransformer loaded successfully")
|
44 |
except Exception as e:
|
45 |
logger.error(f"SentenceTransformer loading failed: {e}")
|
46 |
+
print(f"SentenceTransformer error: {e}")
|
47 |
model = None
|
48 |
|
49 |
# Pre-compute embeddings
|
50 |
paper_embeddings = model.encode([paper["content"] for paper in papers], convert_to_tensor=True) if model else None
|
51 |
+
if paper_embeddings is not None:
|
52 |
+
print("Embeddings computed successfully")
|
53 |
+
else:
|
54 |
+
print("Embeddings computation failed")
|
55 |
|
56 |
def semantic_search(query):
|
|
|
57 |
if model is None or summarizer is None or paper_embeddings is None:
|
58 |
+
return {"title": "Error", "abstract": "Error", "summary": "Model or embeddings failed to load. Check console.", "link": ""}
|
59 |
|
60 |
try:
|
61 |
+
print(f"Processing query: {query}")
|
62 |
+
query_embedding = model.encode([query], convert_to_tensor=True)
|
63 |
similarities = np.dot(paper_embeddings, query_embedding.T).cpu().numpy()
|
64 |
top_idx = np.argmax(similarities)
|
65 |
top_paper = papers[top_idx]
|
66 |
summary = summarizer(top_paper["content"], max_length=30, min_length=10, do_sample=False)[0]["summary_text"]
|
67 |
+
print(f"Found paper: {top_paper['title']}")
|
68 |
return {
|
69 |
"title": top_paper["title"],
|
70 |
"abstract": top_paper["abstract"],
|
|
|
73 |
}
|
74 |
except Exception as e:
|
75 |
logger.error(f"Search failed: {e}")
|
76 |
+
print(f"Search error: {e}")
|
77 |
return {"title": "Error", "abstract": "Error", "summary": str(e), "link": ""}
|
78 |
|
79 |
def search_interface(query):
|