from fastapi import FastAPI, HTTPException from pydantic import BaseModel from fastapi.responses import FileResponse import gradio as gr from entity_recognition import extract_entities from wordcloud import WordCloud from summarization import summarizer from utils import list_files, process_file import pygraphviz as pgv import os # Initialize FastAPI app = FastAPI() # Request Model class TextRequest(BaseModel): text: str def wrap_label(label, max_length=15): """Improved label wrapping with hyphenation awareness""" if len(label) <= max_length: return label # Try to break at natural points break_chars = [" ", "-", "_", "/"] lines = [] current = "" for char in label: if len(current) >= max_length and char in break_chars: lines.append(current.strip()) current = char else: current += char if current: lines.append(current.strip()) return "\\n".join(lines) def generate_high_res_mindmap(text): """Generate mind map optimized for Hugging Face Spaces""" try: # Verify we have entities to visualize entities = extract_entities(text) if not any(entities.values()): print("No entities found to generate mind map") return None # Create temporary directory (works better in Spaces than fixed paths) temp_dir = "/tmp/mindmaps" os.makedirs(temp_dir, exist_ok=True) output_path = os.path.join(temp_dir, "mindmap.png") # Simplified graph creation that's more likely to work in Spaces G = pgv.AGraph( directed=True, rankdir="TB", bgcolor="white", fontname="Helvetica", splines="ortho" ) # Simplified node styling G.node_attr.update({ "fontsize": "12", "shape": "box", "style": "rounded,filled", "fillcolor": "#E1F5FE", "color": "#0288D1", }) # Simplified edge styling G.edge_attr.update({ "color": "#757575", "penwidth": "1.5", }) # Add central node G.add_node("DOCUMENT", shape="ellipse", fillcolor="#4FC3F7", fontsize="14") # Add entities with simplified structure max_categories = 5 max_entities = 10 for cat_idx, (category, values) in enumerate(entities.items()): if cat_idx >= max_categories: break cat_node = f"CAT_{cat_idx}" G.add_node(cat_node, label=wrap_label(category, 15)) G.add_edge("DOCUMENT", cat_node) for ent_idx, value in enumerate(values[:max_entities]): ent_node = f"ENT_{cat_idx}_{ent_idx}" G.add_node(ent_node, label=wrap_label(value, 12)) G.add_edge(cat_node, ent_node) # Try multiple layout engines (dot is most reliable) for engine in ['dot', 'neato', 'sfdp']: try: G.draw(output_path, format="png", prog=engine) print(f"Successfully generated with {engine}") break except Exception as e: print(f"Layout engine {engine} failed: {str(e)}") continue else: raise RuntimeError("All layout engines failed") # Verify the file was created if not os.path.exists(output_path): raise FileNotFoundError("Mind map file was not generated") return output_path except Exception as e: print(f"Mind map generation failed: {str(e)}") # Return a placeholder image or None placeholder_path = os.path.join(temp_dir, "placeholder.png") if not os.path.exists(placeholder_path): # Create simple placeholder import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(10, 10)) ax.text(0.5, 0.5, "Mind Map Unavailable\n(Error: {})".format(str(e)), ha='center', va='center') plt.axis('off') plt.savefig(placeholder_path, bbox_inches='tight', pad_inches=0.1) plt.close() return placeholder_path @app.post("/summarize") def summarize_text(request: TextRequest): chunks = [request.text[i:i+500] for i in range(0, len(request.text), 500)] summaries = [] for chunk in chunks: try: summary = summarizer( chunk, max_length=130, min_length=30, do_sample=False, truncation=True ) summaries.append(summary[0]['summary_text']) except Exception as e: raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}") return {"summary": " ".join(summaries)} @app.post("/entities") def extract_entities_endpoint(request: TextRequest): return {"entities": extract_entities(request.text)} @app.post("/wordcloud") def generate_word_cloud(request: TextRequest): wordcloud = WordCloud( width=1200, height=1200, max_font_size=120, min_font_size=20, background_color="white", colormap="viridis" ).generate(request.text) img_path = "wordcloud.png" wordcloud.to_file(img_path) return FileResponse(img_path, media_type="image/png", filename="wordcloud.png") # Gradio UI with gr.Blocks(theme=gr.themes.Soft(), css=""" .mindmap-container img { max-height: none !important; min-width: 100% !important; object-fit: contain !important; background: white !important; border: 1px solid #e0e0e0 !important; border-radius: 8px !important; padding: 20px !important; } .gradio-container { max-width: 1400px !important; } """) as iface: gr.Markdown("# JFK Document Analysis Suite") gr.Markdown("Analyze declassified documents with AI-powered tools") # File selection with gr.Row(): file_dropdown = gr.Dropdown( choices=list_files(), label="Select Document", interactive=True ) process_btn = gr.Button("Process Document", variant="primary") # Document display with gr.Row(): full_doc_text = gr.Textbox( label="Full Document Text", lines=15, max_lines=25 ) output_summary = gr.Textbox( label="AI Summary", lines=15, max_lines=25 ) # Analysis results with gr.Row(): output_entities = gr.JSON( label="Extracted Entities", show_label=True ) output_wordcloud = gr.Image( label="Word Cloud", height=600, width=600 ) # Mind map section with gr.Row(): mindmap_btn = gr.Button( "Generate Enhanced Mind Map", variant="primary" ) with gr.Row(): output_mindmap = gr.Image( label="High-Resolution Mind Map", elem_classes="mindmap-container", height=800, width=800 ) # Event handlers must be inside the Blocks context process_btn.click( fn=process_file, inputs=file_dropdown, outputs=[full_doc_text, output_summary, output_entities, output_wordcloud] ) def display_mindmap(text): try: img_path = generate_high_res_mindmap(text) if img_path and os.path.exists(img_path): return img_path return None except Exception as e: print(f"Display error: {e}") return None mindmap_btn.click( fn=display_mindmap, inputs=full_doc_text, outputs=output_mindmap ) if __name__ == "__main__": iface.launch( server_name="0.0.0.0", server_port=7860, share=False, debug=True )