Spaces:
Running
Running
Rename database.csv to backend.py
Browse files- backend.py +41 -0
- database.csv +0 -1
backend.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile, File
|
2 |
+
import requests
|
3 |
+
from transformers import pipeline
|
4 |
+
from sentence_transformers import CrossEncoder
|
5 |
+
import pandas as pd
|
6 |
+
import os
|
7 |
+
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
ai_detector = pipeline("text-classification", model="roberta-base-openai-detector")
|
11 |
+
text_model = CrossEncoder("cross-encoder/stsb-roberta-large")
|
12 |
+
|
13 |
+
TEXT_DB = "text_plagiarism.csv"
|
14 |
+
|
15 |
+
def load_database():
|
16 |
+
if not os.path.exists(TEXT_DB):
|
17 |
+
pd.DataFrame(columns=["content", "plagiarism_score"]).to_csv(TEXT_DB, index=False)
|
18 |
+
return pd.read_csv(TEXT_DB)
|
19 |
+
|
20 |
+
def save_to_database(content, plagiarism_score):
|
21 |
+
df = load_database()
|
22 |
+
new_entry = pd.DataFrame({"content": [content], "plagiarism_score": [plagiarism_score]})
|
23 |
+
df = pd.concat([df, new_entry], ignore_index=True)
|
24 |
+
df.to_csv(TEXT_DB, index=False)
|
25 |
+
|
26 |
+
@app.post("/check_text")
|
27 |
+
def check_text(text: str):
|
28 |
+
stored_texts = load_database()["content"].tolist()
|
29 |
+
if stored_texts:
|
30 |
+
similarity_scores = text_model.predict([[text, stored] for stored in stored_texts])
|
31 |
+
highest_similarity = max(similarity_scores)
|
32 |
+
else:
|
33 |
+
highest_similarity = 0
|
34 |
+
save_to_database(text, highest_similarity * 100)
|
35 |
+
return {"plagiarism_score": highest_similarity * 100}
|
36 |
+
|
37 |
+
@app.post("/detect_ai")
|
38 |
+
def detect_ai(text: str):
|
39 |
+
result = ai_detector(text)
|
40 |
+
return result[0]
|
41 |
+
|
database.csv
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
text
|
|
|
|