from fastapi import FastAPI, HTTPException from pydantic import BaseModel from fastapi.responses import FileResponse import gradio as gr from entity_recognition import extract_entities from wordcloud import WordCloud from summarization import summarizer from utils import list_files, process_file import pygraphviz as pgv import os # Initialize FastAPI app = FastAPI() # Request Model class TextRequest(BaseModel): text: str def wrap_label(label, max_length=15): """Improved label wrapping with hyphenation awareness""" if len(label) <= max_length: return label # Try to break at natural points break_chars = [" ", "-", "_", "/"] lines = [] current = "" for char in label: if len(current) >= max_length and char in break_chars: lines.append(current.strip()) current = char else: current += char if current: lines.append(current.strip()) return "\\n".join(lines) def generate_high_res_mindmap(text): """Generate high-resolution mind map with optimized layout""" entities = extract_entities(text) # Create graph with professional styling G = pgv.AGraph( directed=True, rankdir="TB", # Top-to-bottom layout size="100,120", # Larger canvas size dpi="300", # Higher DPI for better resolution bgcolor="white", pad="1.0", ranksep="2.0", # Increased spacing between ranks nodesep="1.5", # Increased spacing between nodes splines="ortho", # Orthogonal edges for cleaner look overlap="false", concentrate="true", quantum="0.5", fontname="Helvetica" ) # Node styling G.node_attr.update({ "fontsize": "28", "fontname": "Helvetica Bold", "shape": "Mrecord", # Rounded rectangles with fields "style": "filled,rounded", "fillcolor": "#E1F5FE", # Light blue "color": "#0288D1", # Darker blue border "height": "0.6", "width": "1.8", "penwidth": "2.0" }) # Edge styling G.edge_attr.update({ "color": "#757575", # Medium gray "penwidth": "2.5", "arrowsize": "1.2", "fontname": "Helvetica", "fontsize": "24", "fontcolor": "#616161" }) # Add central document node G.add_node("DOCUMENT", shape="doubleoctagon", fillcolor="#4FC3F7", fontsize="36", width="3.0", height="1.2") # Process entities with hierarchical grouping max_main_categories = 6 max_subcategories = 5 max_entities = 8 for cat_idx, (category, values) in enumerate(entities.items()): if cat_idx >= max_main_categories: break # Main category node cat_node = f"CAT_{cat_idx}" G.add_node(cat_node, label=wrap_label(category.upper(), 18), shape="tab", fillcolor="#81D4FA") G.add_edge("DOCUMENT", cat_node, label="contains", penwidth="3.0") # Add subcategories if needed if len(values) > max_entities: grouped_values = [values[i:i+max_entities] for i in range(0, len(values), max_entities)] for sub_idx, group in enumerate(grouped_values): if sub_idx >= max_subcategories: break sub_node = f"SUB_{cat_idx}_{sub_idx}" G.add_node(sub_node, label=f"Group {sub_idx+1}", shape="folder", fillcolor="#B3E5FC") G.add_edge(cat_node, sub_node) # Add entities to subcategory for ent_idx, value in enumerate(group): ent_node = f"ENT_{cat_idx}_{sub_idx}_{ent_idx}" G.add_node(ent_node, label=wrap_label(value, 15), shape="note", fillcolor="#E1F5FE") G.add_edge(sub_node, ent_node) else: # Directly add entities to main category for ent_idx, value in enumerate(values): if ent_idx >= max_entities: break ent_node = f"ENT_{cat_idx}_{ent_idx}" G.add_node(ent_node, label=wrap_label(value, 15), shape="note", fillcolor="#E1F5FE") G.add_edge(cat_node, ent_node) # Generate high-resolution output output_dir = "mindmaps" os.makedirs(output_dir, exist_ok=True) output_path = os.path.join(output_dir, "mindmap.svg") # Use SVG for scalability # Use sfdp layout engine for large graphs G.draw(output_path, format="svg", prog="sfdp", args="-Goverlap=prism -Gepsilon=0.0001 -Gmaxiter=5000 -Gbgcolor=white") # Convert SVG to high-res PNG (optional) png_path = output_path.replace(".svg", ".png") os.system(f"convert -density 300 -resize 5000x5000 {output_path} {png_path}") return png_path @app.post("/summarize") def summarize_text(request: TextRequest): chunks = [request.text[i:i+500] for i in range(0, len(request.text), 500)] summaries = [] for chunk in chunks: try: summary = summarizer( chunk, max_length=130, min_length=30, do_sample=False, truncation=True ) summaries.append(summary[0]['summary_text']) except Exception as e: raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}") return {"summary": " ".join(summaries)} @app.post("/entities") def extract_entities_endpoint(request: TextRequest): return {"entities": extract_entities(request.text)} @app.post("/wordcloud") def generate_word_cloud(request: TextRequest): wordcloud = WordCloud( width=1200, height=1200, max_font_size=120, min_font_size=20, background_color="white", colormap="viridis" ).generate(request.text) img_path = "wordcloud.png" wordcloud.to_file(img_path) return FileResponse(img_path, media_type="image/png", filename="wordcloud.png") # Gradio UI with gr.Blocks(theme=gr.themes.Soft(), css=""" .mindmap-container img { max-height: none !important; min-width: 100% !important; object-fit: contain !important; background: white !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; padding: 20px !important; } .gradio-container { max-width: 1400px !important; } """) as iface: gr.Markdown("# JFK Document Analysis Suite") gr.Markdown("Analyze declassified documents with AI-powered tools") # File selection with gr.Row(): file_dropdown = gr.Dropdown( choices=list_files(), label="Select Document", interactive=True ) process_btn = gr.Button("Process Document", variant="primary") # Document display with gr.Row(): full_doc_text = gr.Textbox( label="Full Document Text", lines=15, max_lines=25 ) output_summary = gr.Textbox( label="AI Summary", lines=15, max_lines=25 ) # Analysis results with gr.Row(): output_entities = gr.JSON( label="Extracted Entities", show_label=True ) output_wordcloud = gr.Image( label="Word Cloud", height=600, width=600 ) # Mind map section with gr.Row(): mindmap_btn = gr.Button( "Generate Enhanced Mind Map", variant="primary" ) with gr.Row(): output_mindmap = gr.Image( label="High-Resolution Mind Map", elem_classes="mindmap-container", height=800, width=800 ) # Event handlers process_btn.click( fn=process_file, inputs=file_dropdown, outputs=[full_doc_text, output_summary, output_entities, output_wordcloud] ) mindmap_btn.click( fn=generate_high_res_mindmap, inputs=full_doc_text, outputs=output_mindmap ) if __name__ == "__main__": iface.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=True )