HUANG-Stephanie commited on
Commit
45c1bf0
·
verified ·
1 Parent(s): fe7b387

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -8
app.py CHANGED
@@ -59,7 +59,8 @@ async def index(files: List[UploadFile] = File(...)):
59
  content = await file.read()
60
  pdf_image_list = convert_from_bytes(content)
61
  images.extend(pdf_image_list)
62
-
 
63
  dataloader = DataLoader(
64
  images,
65
  batch_size=4,
@@ -130,11 +131,9 @@ async def search(query: str, k: int = 1):
130
 
131
  @app.post("/search_by_cv")
132
  async def search_by_cv(file: UploadFile = File(...), k: int = 10):
133
- # Lire le fichier PDF uploadé
134
  content = await file.read()
135
  pdf_image_list = convert_from_bytes(content)
136
 
137
- # Générer les embeddings pour les pages du PDF uploadé
138
  qs = []
139
  dataloader = DataLoader(
140
  pdf_image_list,
@@ -148,14 +147,11 @@ async def search_by_cv(file: UploadFile = File(...), k: int = 10):
148
  embeddings_query = model(**batch_query)
149
  qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
150
 
151
- # Comparer les embeddings du CV uploadé avec ceux déjà indexés
152
  retriever_evaluator = CustomEvaluator(is_multi_vector=True)
153
  scores = retriever_evaluator.evaluate(qs, ds)
154
 
155
- # Trouver les indices des résultats les plus pertinents
156
  top_k_indices = scores.argsort(axis=1)[0][-k-1:-1][::-1]
157
 
158
- # Préparer les résultats sous forme d'images
159
  results = []
160
  for idx in top_k_indices:
161
  img_byte_arr = BytesIO()
@@ -163,10 +159,8 @@ async def search_by_cv(file: UploadFile = File(...), k: int = 10):
163
  img_base64 = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
164
  results.append({"image": img_base64, "page": f"Page {idx}"})
165
 
166
- # Générer le PDF des résultats
167
  pdf_buffer = generate_pdf(results)
168
 
169
- # Utiliser StreamingResponse pour renvoyer le fichier PDF généré
170
  response = StreamingResponse(pdf_buffer, media_type='application/pdf')
171
  response.headers['Content-Disposition'] = 'attachment; filename="results.pdf"'
172
 
 
59
  content = await file.read()
60
  pdf_image_list = convert_from_bytes(content)
61
  images.extend(pdf_image_list)
62
+
63
+ # Create embeddings for each file and load in memory storage
64
  dataloader = DataLoader(
65
  images,
66
  batch_size=4,
 
131
 
132
  @app.post("/search_by_cv")
133
  async def search_by_cv(file: UploadFile = File(...), k: int = 10):
 
134
  content = await file.read()
135
  pdf_image_list = convert_from_bytes(content)
136
 
 
137
  qs = []
138
  dataloader = DataLoader(
139
  pdf_image_list,
 
147
  embeddings_query = model(**batch_query)
148
  qs.extend(list(torch.unbind(embeddings_query.to("cpu"))))
149
 
 
150
  retriever_evaluator = CustomEvaluator(is_multi_vector=True)
151
  scores = retriever_evaluator.evaluate(qs, ds)
152
 
 
153
  top_k_indices = scores.argsort(axis=1)[0][-k-1:-1][::-1]
154
 
 
155
  results = []
156
  for idx in top_k_indices:
157
  img_byte_arr = BytesIO()
 
159
  img_base64 = base64.b64encode(img_byte_arr.getvalue()).decode('utf-8')
160
  results.append({"image": img_base64, "page": f"Page {idx}"})
161
 
 
162
  pdf_buffer = generate_pdf(results)
163
 
 
164
  response = StreamingResponse(pdf_buffer, media_type='application/pdf')
165
  response.headers['Content-Disposition'] = 'attachment; filename="results.pdf"'
166