File size: 3,008 Bytes
5098eae
2339301
 
 
5098eae
2339301
 
5098eae
2339301
5098eae
2339301
 
 
 
 
5098eae
2339301
 
 
 
 
 
 
 
 
 
 
5098eae
2339301
 
 
 
 
 
 
 
 
 
 
 
5098eae
 
 
 
 
 
 
 
2339301
 
 
 
 
5098eae
8216643
5098eae
 
 
 
5a246ea
5098eae
5a246ea
5098eae
 
 
 
 
 
2339301
5098eae
4418e3c
5098eae
 
 
 
 
 
 
 
 
 
2339301
5098eae
4418e3c
5098eae
 
 
 
 
 
58125e7
 
5098eae
4418e3c
8216643
5098eae
63c3641
5098eae
 
 
5a246ea
4418e3c
 
a29865d
2339301
5098eae
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from fastapi.responses import FileResponse
import gradio as gr
from entity_recognition import extract_entities
from wordcloud import WordCloud
from summarization import summarizer
from utils import list_files, process_file

# Initialize FastAPI
app = FastAPI()

# Request Model
class TextRequest(BaseModel):
    text: str

@app.post("/summarize")
def summarize_text(request: TextRequest):
    chunks = [request.text[i:i+500] for i in range(0, len(request.text), 500)]
    summaries = []
    for chunk in chunks:
        try:
            summary = summarizer(
                chunk, 
                max_length=130, 
                min_length=30, 
                do_sample=False,
                truncation=True
            )
            summaries.append(summary[0]['summary_text'])
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Summarization error: {str(e)}")
    return {"summary": " ".join(summaries)}

@app.post("/entities")
def extract_entities_endpoint(request: TextRequest):
    return {"entities": extract_entities(request.text)}

@app.post("/wordcloud")
def generate_word_cloud(request: TextRequest):
    wordcloud = WordCloud(
        width=1200, 
        height=1200,
        max_font_size=120, 
        min_font_size=20, 
        background_color="white",
        colormap="viridis"
    ).generate(request.text)
    img_path = "wordcloud.png"
    wordcloud.to_file(img_path)
    return FileResponse(img_path, media_type="image/png", filename="wordcloud.png")

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft(), css="""

""") as iface:
    
    gr.Markdown("# JFK Document Analysis Suite")
    gr.Markdown("Analyze declassified documents with AI-powered tools")

    # File selection
    with gr.Row():
        file_dropdown = gr.Dropdown(
            choices=list_files(), 
            label="Select Document",
            interactive=True
        )
        process_btn = gr.Button("Process Document", variant="primary")

    # Document display
    with gr.Row():
        full_doc_text = gr.Textbox(
            label="Full Document Text",
            lines=15,
            max_lines=25
        )
        output_summary = gr.Textbox(
            label="AI Summary",
            lines=15,
            max_lines=25
        )

    # Analysis results
    with gr.Row():
        output_entities = gr.JSON(
            label="Extracted Entities",
            show_label=True
        )
        output_wordcloud = gr.Image(
            label="Word Cloud",
            height=600,
            width=600
        )



    # Event handlers must be inside the Blocks context
    process_btn.click(
        fn=process_file,
        inputs=file_dropdown,
        outputs=[full_doc_text, output_summary, output_entities, output_wordcloud]
    )


if __name__ == "__main__":
    iface.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        debug=True
    )