|
from fastapi import FastAPI, HTTPException |
|
from pydantic import BaseModel |
|
from fastapi.responses import FileResponse |
|
import gradio as gr |
|
from entity_recognition import extract_entities |
|
from wordcloud import WordCloud |
|
from summarization import summarizer |
|
from utils import list_files, process_file |
|
import pygraphviz as pgv |
|
import os |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
class TextRequest(BaseModel): |
|
text: str |
|
|
|
def wrap_label(label, max_length=15): |
|
"""Improved label wrapping with hyphenation awareness""" |
|
if len(label) <= max_length: |
|
return label |
|
|
|
|
|
break_chars = [" ", "-", "_", "/"] |
|
lines = [] |
|
current = "" |
|
|
|
for char in label: |
|
if len(current) >= max_length and char in break_chars: |
|
lines.append(current.strip()) |
|
current = char |
|
else: |
|
current += char |
|
|
|
if current: |
|
lines.append(current.strip()) |
|
|
|
return "\\n".join(lines) |
|
|
|
def generate_high_res_mindmap(text): |
|
"""Generate high-resolution mind map with optimized layout""" |
|
entities = extract_entities(text) |
|
|
|
|
|
G = pgv.AGraph( |
|
directed=True, |
|
rankdir="TB", |
|
size="100,120", |
|
dpi="300", |
|
bgcolor="white", |
|
pad="1.0", |
|
ranksep="2.0", |
|
nodesep="1.5", |
|
splines="ortho", |
|
overlap="false", |
|
concentrate="true", |
|
quantum="0.5", |
|
fontname="Helvetica" |
|
) |
|
|
|
|
|
G.node_attr.update({ |
|
"fontsize": "28", |
|
"fontname": "Helvetica Bold", |
|
"shape": "Mrecord", |
|
"style": "filled,rounded", |
|
"fillcolor": "#E1F5FE", |
|
"color": "#0288D1", |
|
"height": "0.6", |
|
"width": "1.8", |
|
"penwidth": "2.0" |
|
}) |
|
|
|
|
|
G.edge_attr.update({ |
|
"color": "#757575", |
|
"penwidth": "2.5", |
|
"arrowsize": "1.2", |
|
"fontname": "Helvetica", |
|
"fontsize": "24", |
|
"fontcolor": "#616161" |
|
}) |
|
|
|
|
|
G.add_node("DOCUMENT", |
|
shape="doubleoctagon", |
|
fillcolor="#4FC3F7", |
|
fontsize="36", |
|
width="3.0", |
|
height="1.2") |
|
|
|
|
|
max_main_categories = 6 |
|
max_subcategories = 5 |
|
max_entities = 8 |
|
|
|
for cat_idx, (category, values) in enumerate(entities.items()): |
|
if cat_idx >= max_main_categories: |
|
break |
|
|
|
|
|
cat_node = f"CAT_{cat_idx}" |
|
G.add_node(cat_node, |
|
label=wrap_label(category.upper(), 18), |
|
shape="tab", |
|
fillcolor="#81D4FA") |
|
|
|
G.add_edge("DOCUMENT", cat_node, |
|
label="contains", |
|
penwidth="3.0") |
|
|
|
|
|
if len(values) > max_entities: |
|
grouped_values = [values[i:i+max_entities] for i in range(0, len(values), max_entities)] |
|
for sub_idx, group in enumerate(grouped_values): |
|
if sub_idx >= max_subcategories: |
|
break |
|
|
|
sub_node = f"SUB_{cat_idx}_{sub_idx}" |
|
G.add_node(sub_node, |
|
label=f"Group {sub_idx+1}", |
|
shape="folder", |
|
fillcolor="#B3E5FC") |
|
G.add_edge(cat_node, sub_node) |
|
|
|
|
|
for ent_idx, value in enumerate(group): |
|
ent_node = f"ENT_{cat_idx}_{sub_idx}_{ent_idx}" |
|
G.add_node(ent_node, |
|
label=wrap_label(value, 15), |
|
shape="note", |
|
fillcolor="#E1F5FE") |
|
G.add_edge(sub_node, ent_node) |
|
else: |
|
|
|
for ent_idx, value in enumerate(values): |
|
if ent_idx >= max_entities: |
|
break |
|
ent_node = f"ENT_{cat_idx}_{ent_idx}" |
|
G.add_node(ent_node, |
|
label=wrap_label(value, 15), |
|
shape="note", |
|
fillcolor="#E1F5FE") |
|
G.add_edge(cat_node, ent_node) |
|
|
|
|
|
output_dir = "mindmaps" |
|
os.makedirs(output_dir, exist_ok=True) |
|
output_path = os.path.join(output_dir, "mindmap.svg") |
|
|
|
|
|
G.draw(output_path, |
|
format="svg", |
|
prog="sfdp", |
|
args="-Goverlap=prism -Gepsilon=0.0001 -Gmaxiter=5000 -Gbgcolor=white") |
|
|
|
|
|
png_path = output_path.replace(".svg", ".png") |
|
os.system(f"convert -density 300 -resize 5000x5000 {output_path} {png_path}") |
|
|
|
return png_path |
|
|
|
@app.post("/summarize") |
|
def summarize_text(request: TextRequest): |
|
chunks = [request.text[i:i+500] for i in range(0, len(request.text), 500)] |
|
summaries = [] |
|
for chunk in chunks: |
|
try: |
|
summary = summarizer( |
|
chunk, |
|
max_length=130, |
|
min_length=30, |
|
do_sample=False, |
|
truncation=True |
|
) |
|
summaries.append(summary[0]['summary_text']) |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}") |
|
return {"summary": " ".join(summaries)} |
|
|
|
@app.post("/entities") |
|
def extract_entities_endpoint(request: TextRequest): |
|
return {"entities": extract_entities(request.text)} |
|
|
|
@app.post("/wordcloud") |
|
def generate_word_cloud(request: TextRequest): |
|
wordcloud = WordCloud( |
|
width=1200, |
|
height=1200, |
|
max_font_size=120, |
|
min_font_size=20, |
|
background_color="white", |
|
colormap="viridis" |
|
).generate(request.text) |
|
img_path = "wordcloud.png" |
|
wordcloud.to_file(img_path) |
|
return FileResponse(img_path, media_type="image/png", filename="wordcloud.png") |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), css=""" |
|
.mindmap-container img { |
|
max-height: none !important; |
|
min-width: 100% !important; |
|
object-fit: contain !important; |
|
background: white !important; |
|
border: 1px solid #e0e0e0 !important; |
|
border-radius: 8px !important; |
|
padding: 20px !important; |
|
} |
|
.gradio-container { max-width: 1400px !important; } |
|
""") as iface: |
|
|
|
gr.Markdown("# JFK Document Analysis Suite") |
|
gr.Markdown("Analyze declassified documents with AI-powered tools") |
|
|
|
|
|
with gr.Row(): |
|
file_dropdown = gr.Dropdown( |
|
choices=list_files(), |
|
label="Select Document", |
|
interactive=True |
|
) |
|
process_btn = gr.Button("Process Document", variant="primary") |
|
|
|
|
|
with gr.Row(): |
|
full_doc_text = gr.Textbox( |
|
label="Full Document Text", |
|
lines=15, |
|
max_lines=25 |
|
) |
|
output_summary = gr.Textbox( |
|
label="AI Summary", |
|
lines=15, |
|
max_lines=25 |
|
) |
|
|
|
|
|
with gr.Row(): |
|
output_entities = gr.JSON( |
|
label="Extracted Entities", |
|
show_label=True |
|
) |
|
output_wordcloud = gr.Image( |
|
label="Word Cloud", |
|
height=600, |
|
width=600 |
|
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
mindmap_btn = gr.Button( |
|
"Generate Enhanced Mind Map", |
|
variant="primary" |
|
) |
|
|
|
with gr.Row(): |
|
output_mindmap = gr.Image( |
|
label="High-Resolution Mind Map", |
|
elem_classes="mindmap-container", |
|
height=800, |
|
width=800 |
|
) |
|
|
|
|
|
process_btn.click( |
|
fn=process_file, |
|
inputs=file_dropdown, |
|
outputs=[full_doc_text, output_summary, output_entities, output_wordcloud] |
|
) |
|
|
|
mindmap_btn.click( |
|
fn=generate_high_res_mindmap, |
|
inputs=full_doc_text, |
|
outputs=output_mindmap |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False, |
|
debug=True |
|
) |