indhupamula commited on
Commit
b2c7949
·
verified ·
1 Parent(s): 981c05b

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +33 -15
backend.py CHANGED
@@ -1,14 +1,15 @@
1
- from fastapi import FastAPI, UploadFile, File
2
- import requests
3
  from transformers import pipeline
4
- from sentence_transformers import CrossEncoder
5
  import pandas as pd
6
  import os
7
 
8
  app = FastAPI()
9
 
 
10
  ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
11
- text_model = CrossEncoder("cross-encoder/stsb-roberta-large")
12
 
13
  TEXT_DB = "text_plagiarism.csv"
14
 
@@ -25,17 +26,34 @@ def save_to_database(content, plagiarism_score):
25
 
26
  @app.post("/check_text")
27
  def check_text(text: str):
28
- stored_texts = load_database()["content"].tolist()
29
- if stored_texts:
30
- similarity_scores = text_model.predict([[text, stored] for stored in stored_texts])
31
- highest_similarity = max(similarity_scores)
32
- else:
33
- highest_similarity = 0
34
- save_to_database(text, highest_similarity * 100)
35
- return {"plagiarism_score": highest_similarity * 100}
 
 
 
 
 
36
 
37
  @app.post("/detect_ai")
38
  def detect_ai(text: str):
39
- result = ai_detector(text)
40
- return result[0]
41
-
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, UploadFile, File
2
+ import fitz # PyMuPDF for PDF handling
3
  from transformers import pipeline
4
+ from sentence_transformers import SentenceTransformer, util
5
  import pandas as pd
6
  import os
7
 
8
  app = FastAPI()
9
 
10
+ # Load AI detection model
11
  ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
12
+ text_model = SentenceTransformer("all-MiniLM-L6-v2")
13
 
14
  TEXT_DB = "text_plagiarism.csv"
15
 
 
26
 
27
  @app.post("/check_text")
28
  def check_text(text: str):
29
+ try:
30
+ stored_texts = load_database()["content"].tolist()
31
+ if stored_texts:
32
+ embeddings1 = text_model.encode(text, convert_to_tensor=True)
33
+ embeddings2 = text_model.encode(stored_texts, convert_to_tensor=True)
34
+ similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0]
35
+ highest_similarity = max(similarities) * 100 if similarities else 0
36
+ else:
37
+ highest_similarity = 0
38
+ save_to_database(text, highest_similarity)
39
+ return {"plagiarism_score": highest_similarity}
40
+ except Exception as e:
41
+ raise HTTPException(status_code=500, detail=f"Error checking plagiarism: {str(e)}")
42
 
43
  @app.post("/detect_ai")
44
  def detect_ai(text: str):
45
+ try:
46
+ result = ai_detector(text)
47
+ return {"AI_Detection": result[0]["label"], "Confidence": result[0]["score"]}
48
+ except Exception as e:
49
+ raise HTTPException(status_code=500, detail=f"Error in AI Detection: {str(e)}")
50
+
51
+ @app.post("/upload_pdf")
52
+ async def upload_pdf(file: UploadFile = File(...)):
53
+ try:
54
+ pdf_content = file.file.read()
55
+ doc = fitz.open(stream=pdf_content, filetype="pdf")
56
+ text = " ".join([page.get_text(\"text\") for page in doc])
57
+ return check_text(text)
58
+ except Exception as e:
59
+ raise HTTPException(status_code=500, detail=f"PDF Processing Error: {str(e)}")