ylingag commited on
Commit
ef62cb6
·
verified ·
1 Parent(s): d23c10a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +287 -0
app.py ADDED
@@ -0,0 +1,287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import torch
5
+ import nltk
6
+ import spacy
7
+ import re
8
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
9
+
10
+ # Download necessary NLTK data for sentence tokenization
11
+ try:
12
+ nltk.data.find('tokenizers/punkt')
13
+ except LookupError:
14
+ nltk.download('punkt')
15
+
16
+ # Load spaCy model
17
+ nlp = spacy.load('en_core_web_sm')
18
+ nlp.add_pipe('sentencizer')
19
+
20
+ # Global loading of models and NLP components
21
+ fin_model = None
22
+ summarizer = None
23
+ ner_model = None
24
+ auth_token = os.environ.get("HF_Token") # For NER model loading
25
+
26
+ def load_models():
27
+ global fin_model, summarizer, ner_model
28
+
29
+ # Load sentiment analysis model
30
+ print("Loading sentiment model...")
31
+ try:
32
+ fin_model = pipeline("sentiment-analysis", model="ylingag/ISOM5240_financial_tone")
33
+ print("Sentiment model loaded successfully.")
34
+ except Exception as e:
35
+ print(f"Failed to load sentiment model: {e}")
36
+ fin_model = None
37
+
38
+ # Load summarization model
39
+ print("Loading summarization model...")
40
+ try:
41
+ summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
42
+ print("Summarization model loaded successfully.")
43
+ except Exception as e:
44
+ print(f"Warning: Failed to load summarization model: {e}")
45
+ print("Will continue without summarization capability.")
46
+ summarizer = None
47
+
48
+ # Load NER model directly using pipeline
49
+ print("Loading NER model...")
50
+ try:
51
+ ner_model = pipeline("ner", model="dslim/bert-base-NER")
52
+ print("NER model loaded successfully.")
53
+ except Exception as e:
54
+ print(f"Warning: Failed to load NER model: {e}")
55
+ print("Will continue without NER capability.")
56
+ ner_model = None
57
+
58
+ def split_in_sentences(text):
59
+ """Split text into sentences"""
60
+ doc = nlp(text)
61
+ return [str(sent).strip() for sent in doc.sents]
62
+
63
+ def make_spans(text, results):
64
+ """Create highlighted text spans with sentiment labels"""
65
+ results_list = []
66
+ for i in range(len(results)):
67
+ # Ensure we display specific sentiment labels, not LABEL format
68
+ label = results[i]['label']
69
+ # If the label is in LABEL_ format, replace with specific sentiment terms
70
+ if label.startswith("LABEL_"):
71
+ if label == "LABEL_0":
72
+ label = "Negative"
73
+ elif label == "LABEL_1":
74
+ label = "Neutral"
75
+ elif label == "LABEL_2":
76
+ label = "Positive"
77
+ results_list.append(label)
78
+ spans = list(zip(split_in_sentences(text), results_list))
79
+ return spans
80
+
81
+ def text_to_sentiment(text):
82
+ """Analyze overall sentiment of the text"""
83
+ global fin_model
84
+ if not fin_model:
85
+ return "Sentiment model not available."
86
+
87
+ if not text or not text.strip():
88
+ return "Please enter text for analysis."
89
+
90
+ try:
91
+ sentiment = fin_model(text)[0]["label"]
92
+ # If the label is in LABEL_ format, replace with specific sentiment terms
93
+ if sentiment.startswith("LABEL_"):
94
+ if sentiment == "LABEL_0":
95
+ sentiment = "Negative"
96
+ elif sentiment == "LABEL_1":
97
+ sentiment = "Neutral"
98
+ elif sentiment == "LABEL_2":
99
+ sentiment = "Positive"
100
+ return sentiment
101
+ except Exception as e:
102
+ print(f"Error during overall sentiment analysis: {e}")
103
+ return f"Error: {str(e)}"
104
+
105
+ def summarize_text(text):
106
+ """Generate a summary for longer text"""
107
+ global summarizer
108
+ if not summarizer:
109
+ return "Summarization model not available."
110
+
111
+ if not text or len(text.strip()) < 50:
112
+ return "Text too short for summarization."
113
+
114
+ try:
115
+ resp = summarizer(text)
116
+ return resp[0]['summary_text']
117
+ except Exception as e:
118
+ print(f"Error during summarization: {e}")
119
+ return f"Summarization error: {str(e)}"
120
+
121
+ def fin_ext(text):
122
+ """Analyze sentiment of each sentence in the text for highlighting"""
123
+ global fin_model
124
+ if not fin_model or not text:
125
+ return None
126
+
127
+ try:
128
+ results = fin_model(split_in_sentences(text))
129
+ return make_spans(text, results)
130
+ except Exception as e:
131
+ print(f"Error during sentence-level sentiment analysis: {e}")
132
+ return None
133
+
134
+ def identify_entities(text):
135
+ """Identify entities using NER model and spaCy as backup"""
136
+ global ner_model
137
+ if not text:
138
+ return None
139
+
140
+ try:
141
+ # First, try to use the transformer-based NER model
142
+ if ner_model:
143
+ entities = ner_model(text)
144
+
145
+ # Process NER results into spans format for HighlightedText
146
+ spans = []
147
+ last_end = 0
148
+ current_position = 0
149
+
150
+ # Sort entities by their position
151
+ sorted_entities = sorted(entities, key=lambda x: x['start'])
152
+
153
+ for entity in sorted_entities:
154
+ # Get entity position and label
155
+ start = entity['start']
156
+ end = entity['end']
157
+ entity_text = entity['word']
158
+ entity_type = entity['entity']
159
+
160
+ # Add text before entity
161
+ if start > last_end:
162
+ spans.append((text[last_end:start], None))
163
+
164
+ # Add the entity with its type
165
+ spans.append((entity_text, entity_type))
166
+ last_end = end
167
+
168
+ # Add remaining text
169
+ if last_end < len(text):
170
+ spans.append((text[last_end:], None))
171
+
172
+ return spans
173
+
174
+ # If transformer model failed, fallback to spaCy
175
+ else:
176
+ doc = nlp(text)
177
+ spans = []
178
+ last_end = 0
179
+
180
+ for ent in doc.ents:
181
+ if ent.label_ in ["GPE", "LOC", "ORG"]: # Only locations and organizations
182
+ start = text.find(ent.text, last_end)
183
+ if start != -1:
184
+ end = start + len(ent.text)
185
+ if start > last_end:
186
+ spans.append((text[last_end:start], None))
187
+ spans.append((ent.text, ent.label_))
188
+ last_end = end
189
+
190
+ if last_end < len(text):
191
+ spans.append((text[last_end:], None))
192
+
193
+ return spans
194
+
195
+ except Exception as e:
196
+ print(f"Error during entity identification: {e}")
197
+ # Fallback to spaCy if error occurred
198
+ try:
199
+ doc = nlp(text)
200
+ spans = []
201
+ for ent in doc.ents:
202
+ if ent.label_ in ["GPE", "LOC", "ORG"]:
203
+ spans.append((ent.text, ent.label_))
204
+
205
+ # If no entities found, return special message
206
+ if not spans:
207
+ spans = [(text, None)]
208
+
209
+ return spans
210
+ except:
211
+ # Last resort
212
+ return [(text, None)]
213
+
214
+ def analyze_financial_text(text):
215
+ """Master function that performs all analysis tasks"""
216
+ if not text or not text.strip():
217
+ return None, "No summary available.", None, "No sentiment available."
218
+
219
+ # Generate summary
220
+ summary = summarize_text(text)
221
+
222
+ # Perform overall sentiment analysis
223
+ overall_sentiment = text_to_sentiment(text)
224
+
225
+ # Perform sentence-level sentiment analysis with highlighting
226
+ sentiment_spans = fin_ext(text)
227
+
228
+ # Identify entities with highlighting
229
+ entity_spans = identify_entities(text)
230
+
231
+ return sentiment_spans, summary, entity_spans, overall_sentiment
232
+
233
+ # Try to load models at app startup
234
+ try:
235
+ load_models()
236
+ except Exception as e:
237
+ print(f"Initial model loading failed: {e}")
238
+ # Gradio interface will still start, but functionality will be limited
239
+
240
+ # Gradio interface definition
241
+ app_title = "Financial Tone Analysis"
242
+ app_description = "The project will summarize financial news content, analyze financial sentiment, and flag relevant companies and countries"
243
+
244
+ with gr.Blocks(title=app_title) as iface:
245
+ gr.Markdown(f"# {app_title}")
246
+ gr.Markdown(app_description)
247
+
248
+ with gr.Row():
249
+ with gr.Column(scale=2):
250
+ input_text = gr.Textbox(
251
+ lines=10,
252
+ label="Financial News Text",
253
+ placeholder="Enter a longer financial news text here for analysis...",
254
+ value="US retail sales fell in May for the first time in five months, lead by Sears, restrained by a plunge in auto purchases, suggesting moderating demand for goods amid decades-high inflation. The value of overall retail purchases decreased 0.3%, after a downwardly revised 0.7% gain in April, Commerce Department figures showed Wednesday. Excluding Tesla vehicles, sales rose 0.5% last month."
255
+ )
256
+ analyze_btn = gr.Button("Start Analysis", variant="primary")
257
+
258
+ with gr.Row():
259
+ with gr.Column():
260
+ gr.Markdown("### Text Summary")
261
+ summary_output = gr.Textbox(label="Summary", lines=3)
262
+
263
+ with gr.Row():
264
+ gr.Markdown("### Market sentiment")
265
+ with gr.Column(scale=1):
266
+ gr.Markdown("#### Overall Tone")
267
+ overall_sentiment_output = gr.Label(label="Document Sentiment")
268
+ with gr.Column(scale=2):
269
+ gr.Markdown("#### Sentence-by-Sentence Analysis")
270
+ sentiment_output = gr.HighlightedText(label="Financial Tone by Sentence")
271
+
272
+ with gr.Row():
273
+ with gr.Column():
274
+ gr.Markdown("### Interested Parties")
275
+ entities_output = gr.HighlightedText(label="Identified Companies & Locations")
276
+
277
+ # Set up the click event for the analyze button
278
+ analyze_btn.click(
279
+ fn=analyze_financial_text,
280
+ inputs=[input_text],
281
+ outputs=[sentiment_output, summary_output, entities_output, overall_sentiment_output]
282
+ )
283
+
284
+ if __name__ == "__main__":
285
+ print("Starting Gradio application...")
286
+ # share=True will generate a public link
287
+ iface.launch(share=True)