3v324v23's picture
Auto-deploy from GitHub
4418e3c
import os
from entity_recognition import extract_entities
from pydantic import BaseModel
from wordcloud import WordCloud
# Define paths
TEXT_FOLDER = "jfk_text"
SUMMARY_FOLDER = "summaryoutput"
MINDMAP_FOLDER = "mindmap_output"
WORDCLOUD_FOLDER = "wordcloud_output"
# Request model
class TextRequest(BaseModel):
text: str
def list_files():
"""List all Markdown (.md) files in the 'jfk_text' folder."""
if os.path.exists(TEXT_FOLDER):
return [f for f in os.listdir(TEXT_FOLDER) if f.endswith(".md")]
return []
def read_file(file_path):
"""Read the content of a given file."""
with open(file_path, "r", encoding="utf-8") as file:
return file.read()
def get_summary(file_name):
"""Get the summary of a file if it exists."""
summary_file = f"summary_{file_name}"
summary_path = os.path.join(SUMMARY_FOLDER, summary_file)
if os.path.exists(summary_path):
return read_file(summary_path)
return "Summary not found."
def process_file(file_name):
try:
# 1. Validate input and paths
if not file_name or not os.path.exists(os.path.join(TEXT_FOLDER, file_name)):
raise FileNotFoundError("Invalid file selection")
# 2. Read file
text = read_file(os.path.join(TEXT_FOLDER, file_name))
# 3. Generate outputs
wordcloud_path = os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png")
os.makedirs(WORDCLOUD_FOLDER, exist_ok=True)
# 4. Create visualizations
wc = WordCloud(width=800, height=400, background_color="white").generate(text)
wc.to_file(wordcloud_path)
return (
text,
get_summary(file_name),
{"entities": extract_entities(text)},
wordcloud_path,
)
except Exception as e:
error_msg = f"Error: {str(e)}"
return error_msg, error_msg, {"entities": {}}, None, f"<div>{error_msg}</div>"
# def process_file(file_name):
# try:
# # 1. Validate input and paths
# if not file_name or not os.path.exists(os.path.join(TEXT_FOLDER, file_name)):
# raise FileNotFoundError("Invalid file selection")
# # 2. Read file
# text = read_file(os.path.join(TEXT_FOLDER, file_name))
# # 3. Generate outputs
# wordcloud_path = os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png")
# os.makedirs(WORDCLOUD_FOLDER, exist_ok=True)
# # 4. Create visualizations
# wc = WordCloud(width=800, height=400, background_color="white").generate(text)
# wc.to_file(wordcloud_path)
# # 5. Generate mind map HTML
# mindmap_html = generate_mind_map(text)
# return (
# text,
# get_summary(file_name),
# {"entities": extract_entities(text)},
# wordcloud_path, # Word Cloud image path
# mindmap_html # Mind Map HTML content
# )
# except Exception as e:
# error_msg = f"Error: {str(e)}"
# return error_msg, error_msg, {"entities": {}}, None, f"<div>{error_msg}</div>"
# # def process_file(file_name):
# """Process file and return all outputs including mind map."""
# try:
# if not file_name: # Check if file_name is empty
# raise ValueError("No file selected")
# file_path = os.path.join(TEXT_FOLDER, file_name)
# if not os.path.exists(file_path):
# raise FileNotFoundError(f"File {file_name} not found in {TEXT_FOLDER}")
# text = read_file(file_path)
# return (
# text, # Full text
# get_summary(file_name), # Summary
# {"entities": extract_entities(text)}, # Entities
# generate_word_cloud(text, os.path.join(WORDCLOUD_FOLDER, f"wordcloud_{file_name}.png")), # Word Cloud
# generate_mind_map(text) # Mind Map (returns HTML)
# )
# except Exception as e:
# error_msg = f"Error: {str(e)}"
# return error_msg, error_msg, {"entities": {}}, None, "<div>Error generating visualization</div>"
# return summary, entities, wordcloud_path
# from entity_recognition import extract_entities
# from wordcloud import WordCloud
# from summarization import summarizer
# def process_file(filename):
# file_path = f"your_data_folder/{filename}" # Update this to the correct file path
# try:
# with open(file_path, "r", encoding="utf-8") as f:
# text = f.read()
# # Summarize the text
# chunks = [text[i:i+500] for i in range(0, len(text), 500)]
# summaries = []
# for chunk in chunks:
# summary = summarizer(chunk, max_length=130, min_length=30, do_sample=False, truncation=True)
# summaries.append(summary[0]['summary_text'])
# # Extract entities
# entities = extract_entities(text)
# # Generate word cloud
# wordcloud = WordCloud(width=800, height=600, max_font_size=40, min_font_size=10, background_color="white").generate(text)
# img_path = f"wordcloud_output/wordcloud_{filename}.png" # Ensure the path is valid
# wordcloud.to_file(img_path)
# return text, " ".join(summaries), entities, img_path # ✅ Returning exactly 4 values
# except Exception as e:
# return f"Error processing file: {str(e)}", "", {}, ""