Spaces:

Abdullraffayy
/

jfk_assassination_records_app

Sleeping

File size: 8,692 Bytes

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.responses import FileResponse
import gradio as gr
from entity_recognition import extract_entities
from wordcloud import WordCloud
from summarization import summarizer
from utils import list_files, process_file
import pygraphviz as pgv
import os

# Initialize FastAPI
app = FastAPI()

# Request Model
class TextRequest(BaseModel):
    text: str

def wrap_label(label, max_length=15):
    """Improved label wrapping with hyphenation awareness"""
    if len(label) <= max_length:
        return label
        
    # Try to break at natural points
    break_chars = [" ", "-", "_", "/"]
    lines = []
    current = ""
    
    for char in label:
        if len(current) >= max_length and char in break_chars:
            lines.append(current.strip())
            current = char
        else:
            current += char
    
    if current:
        lines.append(current.strip())
    
    return "\\n".join(lines)

def generate_high_res_mindmap(text):
    """Generate high-resolution mind map with optimized layout"""
    entities = extract_entities(text)
    
    # Create graph with professional styling
    G = pgv.AGraph(
        directed=True,
        rankdir="TB",  # Top-to-bottom layout
        size="100,120",  # Larger canvas size
        dpi="300",  # Higher DPI for better resolution
        bgcolor="white",
        pad="1.0",
        ranksep="2.0",  # Increased spacing between ranks
        nodesep="1.5",  # Increased spacing between nodes
        splines="ortho",  # Orthogonal edges for cleaner look
        overlap="false",
        concentrate="true",
        quantum="0.5",
        fontname="Helvetica"
    )

    # Node styling
    G.node_attr.update({
        "fontsize": "28",
        "fontname": "Helvetica Bold",
        "shape": "Mrecord",  # Rounded rectangles with fields
        "style": "filled,rounded",
        "fillcolor": "#E1F5FE",  # Light blue
        "color": "#0288D1",  # Darker blue border
        "height": "0.6",
        "width": "1.8",
        "penwidth": "2.0"
    })

    # Edge styling
    G.edge_attr.update({
        "color": "#757575",  # Medium gray
        "penwidth": "2.5",
        "arrowsize": "1.2",
        "fontname": "Helvetica",
        "fontsize": "24",
        "fontcolor": "#616161"
    })

    # Add central document node
    G.add_node("DOCUMENT", 
              shape="doubleoctagon",
              fillcolor="#4FC3F7",
              fontsize="36",
              width="3.0",
              height="1.2")

    # Process entities with hierarchical grouping
    max_main_categories = 6
    max_subcategories = 5
    max_entities = 8

    for cat_idx, (category, values) in enumerate(entities.items()):
        if cat_idx >= max_main_categories:
            break
            
        # Main category node
        cat_node = f"CAT_{cat_idx}"
        G.add_node(cat_node,
                  label=wrap_label(category.upper(), 18),
                  shape="tab",
                  fillcolor="#81D4FA")
        
        G.add_edge("DOCUMENT", cat_node, 
                 label="contains",
                 penwidth="3.0")

        # Add subcategories if needed
        if len(values) > max_entities:
            grouped_values = [values[i:i+max_entities] for i in range(0, len(values), max_entities)]
            for sub_idx, group in enumerate(grouped_values):
                if sub_idx >= max_subcategories:
                    break
                    
                sub_node = f"SUB_{cat_idx}_{sub_idx}"
                G.add_node(sub_node,
                          label=f"Group {sub_idx+1}",
                          shape="folder",
                          fillcolor="#B3E5FC")
                G.add_edge(cat_node, sub_node)
                
                # Add entities to subcategory
                for ent_idx, value in enumerate(group):
                    ent_node = f"ENT_{cat_idx}_{sub_idx}_{ent_idx}"
                    G.add_node(ent_node,
                              label=wrap_label(value, 15),
                              shape="note",
                              fillcolor="#E1F5FE")
                    G.add_edge(sub_node, ent_node)
        else:
            # Directly add entities to main category
            for ent_idx, value in enumerate(values):
                if ent_idx >= max_entities:
                    break
                ent_node = f"ENT_{cat_idx}_{ent_idx}"
                G.add_node(ent_node,
                          label=wrap_label(value, 15),
                          shape="note",
                          fillcolor="#E1F5FE")
                G.add_edge(cat_node, ent_node)

    # Generate high-resolution output
    output_dir = "mindmaps"
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, "mindmap.svg")  # Use SVG for scalability
    
    # Use sfdp layout engine for large graphs
    G.draw(output_path,
          format="svg",
          prog="sfdp",
          args="-Goverlap=prism -Gepsilon=0.0001 -Gmaxiter=5000 -Gbgcolor=white")
    
    # Convert SVG to high-res PNG (optional)
    png_path = output_path.replace(".svg", ".png")
    os.system(f"convert -density 300 -resize 5000x5000 {output_path} {png_path}")
    
    return png_path

@app.post("/summarize")
def summarize_text(request: TextRequest):
    chunks = [request.text[i:i+500] for i in range(0, len(request.text), 500)]
    summaries = []
    for chunk in chunks:
        try:
            summary = summarizer(
                chunk, 
                max_length=130, 
                min_length=30, 
                do_sample=False,
                truncation=True
            )
            summaries.append(summary[0]['summary_text'])
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}")
    return {"summary": " ".join(summaries)}

@app.post("/entities")
def extract_entities_endpoint(request: TextRequest):
    return {"entities": extract_entities(request.text)}

@app.post("/wordcloud")
def generate_word_cloud(request: TextRequest):
    wordcloud = WordCloud(
        width=1200, 
        height=1200,
        max_font_size=120, 
        min_font_size=20, 
        background_color="white",
        colormap="viridis"
    ).generate(request.text)
    img_path = "wordcloud.png"
    wordcloud.to_file(img_path)
    return FileResponse(img_path, media_type="image/png", filename="wordcloud.png")

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(), css="""
.mindmap-container img {
    max-height: none !important;
    min-width: 100% !important;
    object-fit: contain !important;
    background: white !important;
    border: 1px solid #e0e0e0 !important;
    border-radius: 8px !important;
    padding: 20px !important;
}
.gradio-container { max-width: 1400px !important; }
""") as iface:
    
    gr.Markdown("# JFK Document Analysis Suite")
    gr.Markdown("Analyze declassified documents with AI-powered tools")

    # File selection
    with gr.Row():
        file_dropdown = gr.Dropdown(
            choices=list_files(), 
            label="Select Document",
            interactive=True
        )
        process_btn = gr.Button("Process Document", variant="primary")

    # Document display
    with gr.Row():
        full_doc_text = gr.Textbox(
            label="Full Document Text",
            lines=15,
            max_lines=25
        )
        output_summary = gr.Textbox(
            label="AI Summary",
            lines=15,
            max_lines=25
        )

    # Analysis results
    with gr.Row():
        output_entities = gr.JSON(
            label="Extracted Entities",
            show_label=True
        )
        output_wordcloud = gr.Image(
            label="Word Cloud",
            height=600,
            width=600
            
        )

    # Mind map section
    with gr.Row():
        mindmap_btn = gr.Button(
            "Generate Enhanced Mind Map",
            variant="primary"
        )
    
    with gr.Row():
        output_mindmap = gr.Image(
            label="High-Resolution Mind Map",
            elem_classes="mindmap-container",
            height=800,
            width=800
        )

    # Event handlers
    process_btn.click(
        fn=process_file,
        inputs=file_dropdown,
        outputs=[full_doc_text, output_summary, output_entities, output_wordcloud]
    )

    mindmap_btn.click(
        fn=generate_high_res_mindmap,
        inputs=full_doc_text,
        outputs=output_mindmap
    )

if __name__ == "__main__":
    iface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        debug=True
    )