3v324v23's picture
Auto-deploy from GitHub
58125e7
raw
history blame
8.69 kB
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.responses import FileResponse
import gradio as gr
from entity_recognition import extract_entities
from wordcloud import WordCloud
from summarization import summarizer
from utils import list_files, process_file
import pygraphviz as pgv
import os
# Initialize FastAPI
app = FastAPI()
# Request Model
class TextRequest(BaseModel):
text: str
def wrap_label(label, max_length=15):
"""Improved label wrapping with hyphenation awareness"""
if len(label) <= max_length:
return label
# Try to break at natural points
break_chars = [" ", "-", "_", "/"]
lines = []
current = ""
for char in label:
if len(current) >= max_length and char in break_chars:
lines.append(current.strip())
current = char
else:
current += char
if current:
lines.append(current.strip())
return "\\n".join(lines)
def generate_high_res_mindmap(text):
"""Generate high-resolution mind map with optimized layout"""
entities = extract_entities(text)
# Create graph with professional styling
G = pgv.AGraph(
directed=True,
rankdir="TB", # Top-to-bottom layout
size="100,120", # Larger canvas size
dpi="300", # Higher DPI for better resolution
bgcolor="white",
pad="1.0",
ranksep="2.0", # Increased spacing between ranks
nodesep="1.5", # Increased spacing between nodes
splines="ortho", # Orthogonal edges for cleaner look
overlap="false",
concentrate="true",
quantum="0.5",
fontname="Helvetica"
)
# Node styling
G.node_attr.update({
"fontsize": "28",
"fontname": "Helvetica Bold",
"shape": "Mrecord", # Rounded rectangles with fields
"style": "filled,rounded",
"fillcolor": "#E1F5FE", # Light blue
"color": "#0288D1", # Darker blue border
"height": "0.6",
"width": "1.8",
"penwidth": "2.0"
})
# Edge styling
G.edge_attr.update({
"color": "#757575", # Medium gray
"penwidth": "2.5",
"arrowsize": "1.2",
"fontname": "Helvetica",
"fontsize": "24",
"fontcolor": "#616161"
})
# Add central document node
G.add_node("DOCUMENT",
shape="doubleoctagon",
fillcolor="#4FC3F7",
fontsize="36",
width="3.0",
height="1.2")
# Process entities with hierarchical grouping
max_main_categories = 6
max_subcategories = 5
max_entities = 8
for cat_idx, (category, values) in enumerate(entities.items()):
if cat_idx >= max_main_categories:
break
# Main category node
cat_node = f"CAT_{cat_idx}"
G.add_node(cat_node,
label=wrap_label(category.upper(), 18),
shape="tab",
fillcolor="#81D4FA")
G.add_edge("DOCUMENT", cat_node,
label="contains",
penwidth="3.0")
# Add subcategories if needed
if len(values) > max_entities:
grouped_values = [values[i:i+max_entities] for i in range(0, len(values), max_entities)]
for sub_idx, group in enumerate(grouped_values):
if sub_idx >= max_subcategories:
break
sub_node = f"SUB_{cat_idx}_{sub_idx}"
G.add_node(sub_node,
label=f"Group {sub_idx+1}",
shape="folder",
fillcolor="#B3E5FC")
G.add_edge(cat_node, sub_node)
# Add entities to subcategory
for ent_idx, value in enumerate(group):
ent_node = f"ENT_{cat_idx}_{sub_idx}_{ent_idx}"
G.add_node(ent_node,
label=wrap_label(value, 15),
shape="note",
fillcolor="#E1F5FE")
G.add_edge(sub_node, ent_node)
else:
# Directly add entities to main category
for ent_idx, value in enumerate(values):
if ent_idx >= max_entities:
break
ent_node = f"ENT_{cat_idx}_{ent_idx}"
G.add_node(ent_node,
label=wrap_label(value, 15),
shape="note",
fillcolor="#E1F5FE")
G.add_edge(cat_node, ent_node)
# Generate high-resolution output
output_dir = "mindmaps"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "mindmap.svg") # Use SVG for scalability
# Use sfdp layout engine for large graphs
G.draw(output_path,
format="svg",
prog="sfdp",
args="-Goverlap=prism -Gepsilon=0.0001 -Gmaxiter=5000 -Gbgcolor=white")
# Convert SVG to high-res PNG (optional)
png_path = output_path.replace(".svg", ".png")
os.system(f"convert -density 300 -resize 5000x5000 {output_path} {png_path}")
return png_path
@app.post("/summarize")
def summarize_text(request: TextRequest):
chunks = [request.text[i:i+500] for i in range(0, len(request.text), 500)]
summaries = []
for chunk in chunks:
try:
summary = summarizer(
chunk,
max_length=130,
min_length=30,
do_sample=False,
truncation=True
)
summaries.append(summary[0]['summary_text'])
except Exception as e:
raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}")
return {"summary": " ".join(summaries)}
@app.post("/entities")
def extract_entities_endpoint(request: TextRequest):
return {"entities": extract_entities(request.text)}
@app.post("/wordcloud")
def generate_word_cloud(request: TextRequest):
wordcloud = WordCloud(
width=1200,
height=1200,
max_font_size=120,
min_font_size=20,
background_color="white",
colormap="viridis"
).generate(request.text)
img_path = "wordcloud.png"
wordcloud.to_file(img_path)
return FileResponse(img_path, media_type="image/png", filename="wordcloud.png")
# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(), css="""
.mindmap-container img {
max-height: none !important;
min-width: 100% !important;
object-fit: contain !important;
background: white !important;
border: 1px solid #e0e0e0 !important;
border-radius: 8px !important;
padding: 20px !important;
}
.gradio-container { max-width: 1400px !important; }
""") as iface:
gr.Markdown("# JFK Document Analysis Suite")
gr.Markdown("Analyze declassified documents with AI-powered tools")
# File selection
with gr.Row():
file_dropdown = gr.Dropdown(
choices=list_files(),
label="Select Document",
interactive=True
)
process_btn = gr.Button("Process Document", variant="primary")
# Document display
with gr.Row():
full_doc_text = gr.Textbox(
label="Full Document Text",
lines=15,
max_lines=25
)
output_summary = gr.Textbox(
label="AI Summary",
lines=15,
max_lines=25
)
# Analysis results
with gr.Row():
output_entities = gr.JSON(
label="Extracted Entities",
show_label=True
)
output_wordcloud = gr.Image(
label="Word Cloud",
height=600,
width=600
)
# Mind map section
with gr.Row():
mindmap_btn = gr.Button(
"Generate Enhanced Mind Map",
variant="primary"
)
with gr.Row():
output_mindmap = gr.Image(
label="High-Resolution Mind Map",
elem_classes="mindmap-container",
height=800,
width=800
)
# Event handlers
process_btn.click(
fn=process_file,
inputs=file_dropdown,
outputs=[full_doc_text, output_summary, output_entities, output_wordcloud]
)
mindmap_btn.click(
fn=generate_high_res_mindmap,
inputs=full_doc_text,
outputs=output_mindmap
)
if __name__ == "__main__":
iface.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
debug=True
)