Divyansh Kushwaha commited on
Commit
f5dd236
·
1 Parent(s): e6ec654

Files updated

Browse files
Files changed (5) hide show
  1. Dockerfile +17 -0
  2. api.py +366 -0
  3. main.py +0 -0
  4. requirements.txt +8 -0
  5. utils.py +173 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # Set the working directory
4
+ WORKDIR /app
5
+
6
+ # Copy requirements and install dependencies
7
+ COPY requirements.txt .
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ # Copy the application code
11
+ COPY . .
12
+
13
+ # Expose the port FastAPI will run on
14
+ EXPOSE 8000
15
+
16
+ # Command to run the FastAPI app
17
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
api.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from fastapi import FastAPI, Query
2
+ # from fastapi.responses import JSONResponse, FileResponse
3
+ # import json
4
+ # import os
5
+ # from bs4 import BeautifulSoup
6
+ # from dotenv import load_dotenv
7
+ # import requests
8
+ # from transformers import pipeline
9
+ # from elevenlabs import ElevenLabs
10
+ # from langchain_groq import ChatGroq
11
+ # from langchain.schema import HumanMessage
12
+
13
+ # app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
14
+
15
+ # load_dotenv()
16
+ # GROQ_API_KEY = os.getenv("GROQ_API_KEY")
17
+ # ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
18
+
19
+ # llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
20
+
21
+ # JSON_FILE_PATH = "final_summary.json"
22
+ # AUDIO_FILE_PATH = "hindi_summary.mp3"
23
+
24
+ # def extract_titles_and_summaries(company_name, num_articles=10):
25
+
26
+ # url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
27
+ # try:
28
+ # response = requests.get(url)
29
+ # if response.status_code != 200:
30
+ # print(f"Failed to fetch the webpage. Status code: {response.status_code}")
31
+ # return []
32
+
33
+ # soup = BeautifulSoup(response.content, "html.parser")
34
+ # articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
35
+ # extracted_articles = []
36
+
37
+ # for article in articles:
38
+ # title_tag = article.find('h2').find('a')
39
+ # title = title_tag.get_text(strip=True) if title_tag else "No Title Found"
40
+
41
+ # summary_tag = article.find('p')
42
+ # summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
43
+
44
+ # extracted_articles.append({
45
+ # "Title": title,
46
+ # "Summary": summary
47
+ # })
48
+
49
+ # return {
50
+ # "Company": company_name,
51
+ # "Articles": extracted_articles
52
+ # }
53
+ # except Exception as e:
54
+ # print(f"An error occurred: {e}")
55
+ # return []
56
+
57
+ # def perform_sentiment_analysis(news_data):
58
+ # articles = news_data.get("Articles", [])
59
+ # pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
60
+ # sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
61
+
62
+ # for article in articles:
63
+ # content = f"{article['Title']} {article['Summary']}"
64
+ # sentiment_result = pipe(content)[0]
65
+
66
+ # sentiment_map = {
67
+ # "positive": "Positive",
68
+ # "negative": "Negative",
69
+ # "neutral": "Neutral",
70
+ # "very positive":"Positive",
71
+ # "very negative":"Negative"
72
+ # }
73
+
74
+ # sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
75
+ # score = float(sentiment_result["score"])
76
+
77
+ # article["Sentiment"] = sentiment
78
+ # article["Score"] = score
79
+
80
+ # if sentiment in sentiment_counts:
81
+ # sentiment_counts[sentiment] += 1
82
+
83
+ # return news_data, sentiment_counts
84
+
85
+ # def extract_topics_with_hf(news_data):
86
+ # structured_data = {
87
+ # "Company": news_data.get("Company", "Unknown"),
88
+ # "Articles": []
89
+ # }
90
+ # topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
91
+ # articles = news_data.get("Articles", [])
92
+ # for article in articles:
93
+ # content = f"{article['Title']} {article['Summary']}"
94
+ # topics_result = topic_pipe(content, top_k=3)
95
+ # topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
96
+
97
+ # structured_data["Articles"].append({
98
+ # "Title": article["Title"],
99
+ # "Summary": article["Summary"],
100
+ # "Sentiment": article.get("Sentiment", "Unknown"),
101
+ # "Score": article.get("Score", 0.0),
102
+ # "Topics": topics
103
+ # })
104
+ # return structured_data
105
+
106
+ # def generate_final_sentiment(news_data, sentiment_counts):
107
+ # company_name = news_data["Company"]
108
+ # total_articles = sum(sentiment_counts.values())
109
+ # combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
110
+ # prompt = f"""
111
+ # Based on the analysis of {total_articles} articles about the company "{company_name}":
112
+ # - Positive articles: {sentiment_counts['Positive']}
113
+ # - Negative articles: {sentiment_counts['Negative']}
114
+ # - Neutral articles: {sentiment_counts['Neutral']}
115
+
116
+ # The following are the summarized key points from the articles: "{combined_summaries}".
117
+ # Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
118
+ # Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
119
+ # """
120
+ # response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
121
+ # final_sentiment = response if response else "Sentiment analysis summary not available."
122
+ # return final_sentiment.content
123
+
124
+ # def extract_json(response):
125
+ # try:
126
+ # return json.loads(response)
127
+ # except json.JSONDecodeError:
128
+ # return {}
129
+
130
+ # def compare_articles(news_data, sentiment_counts):
131
+ # articles = news_data.get("Articles", [])
132
+ # all_topics = [set(article["Topics"]) for article in articles]
133
+
134
+ # common_topics = set.intersection(*all_topics) if all_topics else set()
135
+
136
+ # topics_prompt = f"""
137
+ # Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
138
+ # even if they are phrased differently. The topics from each article are:
139
+ # {all_topics}
140
+
141
+ # Respond **ONLY** with a JSON format:
142
+ # {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
143
+ # """
144
+
145
+ # response = llm.invoke([HumanMessage(content=topics_prompt)]).content
146
+ # contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] # Limit to 3 topics
147
+
148
+ # total_articles = sum(sentiment_counts.values())
149
+ # comparison_prompt = f"""
150
+ # Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
151
+ # - Sentiment distribution: {sentiment_counts}
152
+ # - Commonly discussed topics across articles: {contextual_common_topics}
153
+
154
+ # Consider the following:
155
+ # 1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
156
+ # 2. Overall implications for the company's reputation, stock potential, and public perception.
157
+ # 3. How sentiment varies across articles and its impact.
158
+
159
+ # Respond **ONLY** with a concise and insightful summary in this JSON format:
160
+ # {{
161
+ # "Coverage Differences": [
162
+ # {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
163
+ # {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}},
164
+ # ...
165
+ # ]
166
+ # }}
167
+ # """
168
+
169
+ # response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
170
+ # coverage_differences = extract_json(response).get("Coverage Differences", [])
171
+
172
+ # final_sentiment = generate_final_sentiment(news_data, sentiment_counts)
173
+
174
+ # return {
175
+ # "Company": news_data["Company"],
176
+ # "Articles": articles,
177
+ # "Comparative Sentiment Score": {
178
+ # "Sentiment Distribution": sentiment_counts,
179
+ # "Coverage Differences": coverage_differences,
180
+ # "Topic Overlap": {
181
+ # "Common Topics": contextual_common_topics,
182
+ # "Unique Topics": {
183
+ # f"Article {i+1}": list(topics - set(contextual_common_topics))
184
+ # for i, topics in enumerate(all_topics)
185
+ # }
186
+ # }
187
+ # },
188
+ # "Final Sentiment Analysis": final_sentiment
189
+ # }
190
+
191
+
192
+ # def generate_summary(company_name):
193
+ # news_articles = extract_titles_and_summaries(company_name)
194
+ # news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
195
+ # news_articles = extract_topics_with_hf(news_articles)
196
+ # final_summary = compare_articles(news_articles, sentiment_counts)
197
+
198
+ # hindi_prompt = f"Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
199
+ # hindi_summary = llm.invoke([HumanMessage(content=hindi_prompt)]).content
200
+
201
+ # client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
202
+ # audio = client.text_to_speech.convert(
203
+ # voice_id="9BWtsMINqrJLrRacOk9x",
204
+ # output_format="mp3_44100_128",
205
+ # text=hindi_summary,
206
+ # model_id="eleven_multilingual_v2",
207
+ # )
208
+ # with open(AUDIO_FILE_PATH, "wb") as f:
209
+ # f.write(b"".join(audio))
210
+
211
+ # return final_summary["Final Sentiment Analysis"]
212
+
213
+ # @app.get("/")
214
+ # def home():
215
+ # return {"message": "Welcome to the Company Sentiment API"}
216
+
217
+ # @app.get("/generateSummary")
218
+ # def get_summary(company_name: str = Query(..., description="Enter company name")):
219
+ # summary = generate_summary(company_name)
220
+ # return {"final_summary": summary}
221
+
222
+ # @app.get("/downloadJson")
223
+ # def download_json():
224
+ # return FileResponse(JSON_FILE_PATH, media_type="application/json", filename="final_summary.json")
225
+
226
+ # @app.get("/downloadHindiAudio")
227
+ # def download_audio():
228
+ # return FileResponse(AUDIO_FILE_PATH, media_type="audio/mp3", filename="hindi_summary.mp3")
229
+
230
+ # if __name__ == "__main__":
231
+ # import uvicorn
232
+ # uvicorn.run(app, host="0.0.0.0", port=8000)
233
+
234
+
235
+
236
+
237
+
238
+
239
+
240
+ from fastapi import FastAPI, Query,HTTPException
241
+ from fastapi.responses import JSONResponse, FileResponse
242
+ from elevenlabs import ElevenLabs
243
+ from langchain.schema import HumanMessage
244
+ import json
245
+ from utils import (
246
+ get_llm,
247
+ extract_titles_and_summaries,
248
+ perform_sentiment_analysis,
249
+ extract_topics_with_hf,
250
+ compare_articles
251
+ )
252
+ app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
253
+
254
+ api_keys = {
255
+ "groq_api_key": None,
256
+ "elevenlabs_api_key": None,
257
+ "huggingface_api_key": None,
258
+ "voice_id":None
259
+ }
260
+
261
+ @app.post("/setAPIKeys")
262
+ def set_api_keys(
263
+ groq_api_key: str = Query(..., description="Enter your Groq API Key"),
264
+ elevenlabs_api_key: str = Query(..., description="Enter your ElevenLabs API Key"),
265
+ huggingface_api_key: str = Query(..., description="Enter your HuggingFace API Key"),
266
+ voice_id: str= Query(..., description="Enter your ElevenLabs Voice ID")
267
+ ):
268
+ if not groq_api_key or not elevenlabs_api_key or not huggingface_api_key or not voice_id:
269
+ raise HTTPException(status_code=400, detail="All API keys are required.")
270
+
271
+ # Update API keys in temporary storage
272
+ api_keys["groq_api_key"] = groq_api_key
273
+ api_keys["elevenlabs_api_key"] = elevenlabs_api_key
274
+ api_keys["huggingface_api_key"] = huggingface_api_key
275
+ api_keys["voice_id"] = voice_id
276
+
277
+ return {"message": "API keys updated successfully", "keys": api_keys}
278
+
279
+ if not api_keys["groq_api_key"] or not api_keys["elevenlabs_api_key"] or not api_keys["huggingface_api_key"] or not api_keys['voice_id']:
280
+ raise HTTPException(status_code=400, detail="API keys are required. Please use /setAPIKeys to provide them.")
281
+
282
+ llm = get_llm(api_keys["groq_api_key"])
283
+ JSON_FILE_PATH = "final_summary.json"
284
+ AUDIO_FILE_PATH = "hindi_summary.mp3"
285
+
286
+
287
+ def generate_summary(company_name):
288
+ news_articles = extract_titles_and_summaries(company_name)
289
+ news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
290
+ news_articles = extract_topics_with_hf(news_articles)
291
+ final_summary = compare_articles(news_articles, sentiment_counts,llm)
292
+
293
+ ELEVEN_LABS_API_KEY = api_keys.get("elevenlabs_api_key", "")
294
+ VOICE_ID = api_keys.get("voice_id","")
295
+ hindi_text = ""
296
+
297
+ if ELEVEN_LABS_API_KEY and VOICE_ID:
298
+ client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
299
+
300
+ hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
301
+ hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
302
+ hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
303
+
304
+ try:
305
+ audio = client.text_to_speech.convert(
306
+ voice_id=VOICE_ID,
307
+ output_format="mp3_44100_128",
308
+ text=hindi_text,
309
+ model_id="eleven_multilingual_v2",
310
+ )
311
+
312
+ hindi_summary = b"".join(audio) # Store the audio content as binary data
313
+ with open(AUDIO_FILE_PATH, "wb") as f:
314
+ f.write(b"".join(audio))
315
+
316
+ except Exception as e:
317
+ print(f"Error generating audio: {e}")
318
+ hindi_summary = None
319
+
320
+ with open(JSON_FILE_PATH,"w") as f:
321
+ json.dump(final_summary,f,indent=4)
322
+
323
+ return {
324
+ 'Company': final_summary["Company"],
325
+ 'Articles': [
326
+ {
327
+ 'Title': article.get('Title', 'No Title'),
328
+ 'Summary': article.get('Summary', 'No Summary'),
329
+ 'Sentiment': article.get('Sentiment', 'Unknown'),
330
+ 'Score': article.get('Score', 0.0),
331
+ 'Topics': article.get('Topics', [])
332
+ }
333
+ for article in final_summary["Articles"]
334
+ ],
335
+ 'Comparative Sentiment Score': {
336
+ 'Sentiment Distribution': sentiment_counts,
337
+ 'Coverage Differences': final_summary["Comparative Sentiment Score"].get("Coverage Differences", []),
338
+ 'Topic Overlap': {
339
+ 'Common Topics': final_summary["Comparative Sentiment Score"].get("Topic Overlap", {}).get("Common Topics", []),
340
+ 'Unique Topics': final_summary["Comparative Sentiment Score"].get("Topic Overlap", {}).get("Unique Topics", {})
341
+ }
342
+ },
343
+ 'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
344
+ 'Hindi Summary': hindi_summary
345
+ }
346
+
347
+ @app.get("/")
348
+ def home():
349
+ return {"message": "Welcome to the Company Sentiment API"}
350
+
351
+ @app.get("/generateSummary")
352
+ def get_summary(company_name: str = Query(..., description="Enter company name")):
353
+ structured_summary = generate_summary(company_name)
354
+ return structured_summary
355
+
356
+ @app.get("/downloadJson")
357
+ def download_json():
358
+ return FileResponse(JSON_FILE_PATH, media_type="application/json", filename="final_summary.json")
359
+
360
+ @app.get("/downloadHindiAudio")
361
+ def download_audio():
362
+ return FileResponse(AUDIO_FILE_PATH, media_type="audio/mp3", filename="hindi_summary.mp3")
363
+
364
+ if __name__ == "__main__":
365
+ import uvicorn
366
+ uvicorn.run(app, host="0.0.0.0", port=8000)
main.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests
4
+ bs4
5
+ transformers
6
+ langchain
7
+ langchain_groq
8
+ elevenlabs
utils.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from transformers import pipeline
5
+ from langchain.schema import HumanMessage
6
+ from langchain_groq import ChatGroq
7
+
8
+ def get_llm(api_key):
9
+ if not api_key:
10
+ raise ValueError("Groq API key is required to initialize llm.")
11
+ return ChatGroq(api_key=api_key, model="llama-3.1-8b-instant")
12
+
13
+ def extract_titles_and_summaries(company_name, num_articles=10):
14
+ url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
15
+ try:
16
+ response = requests.get(url)
17
+ if response.status_code != 200:
18
+ print(f"Failed to fetch the webpage. Status code: {response.status_code}")
19
+ return []
20
+
21
+ soup = BeautifulSoup(response.content, "html.parser")
22
+ articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
23
+ extracted_articles = []
24
+
25
+ for article in articles:
26
+ title_tag = article.find('h2')
27
+ if title_tag:
28
+ link_tag = title_tag.find('a')
29
+ title = link_tag.get_text(strip=True) if link_tag else "No Title Found"
30
+ else:
31
+ title = "No Title Found"
32
+
33
+ summary_tag = article.find('p')
34
+ summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
35
+
36
+ extracted_articles.append({
37
+ "Title": title,
38
+ "Summary": summary
39
+ })
40
+
41
+ return {
42
+ "Company": company_name,
43
+ "Articles": extracted_articles
44
+ }
45
+ except Exception as e:
46
+ print(f"An error occurred: {e}")
47
+ return []
48
+
49
+ def perform_sentiment_analysis(news_data):
50
+ articles = news_data.get("Articles", [])
51
+ pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
52
+ sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
53
+
54
+ for article in articles:
55
+ content = f"{article['Title']} {article['Summary']}"
56
+ sentiment_result = pipe(content)[0]
57
+
58
+ sentiment_map = {
59
+ "positive": "Positive",
60
+ "negative": "Negative",
61
+ "neutral": "Neutral",
62
+ "very positive": "Positive",
63
+ "very negative": "Negative"
64
+ }
65
+
66
+ sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
67
+ score = float(sentiment_result["score"])
68
+
69
+ article["Sentiment"] = sentiment
70
+ article["Score"] = score
71
+
72
+ if sentiment in sentiment_counts:
73
+ sentiment_counts[sentiment] += 1
74
+
75
+ return news_data, sentiment_counts
76
+
77
+ def extract_topics_with_hf(news_data):
78
+ structured_data = {
79
+ "Company": news_data.get("Company", "Unknown"),
80
+ "Articles": []
81
+ }
82
+ topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
83
+ articles = news_data.get("Articles", [])
84
+ for article in articles:
85
+ content = f"{article['Title']} {article['Summary']}"
86
+ topics_result = topic_pipe(content, top_k=3)
87
+ topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
88
+
89
+ structured_data["Articles"].append({
90
+ "Title": article["Title"],
91
+ "Summary": article["Summary"],
92
+ "Sentiment": article.get("Sentiment", "Unknown"),
93
+ "Score": article.get("Score", 0.0),
94
+ "Topics": topics
95
+ })
96
+ return structured_data
97
+
98
+ def generate_final_sentiment(news_data, sentiment_counts,llm):
99
+ company_name = news_data["Company"]
100
+ total_articles = sum(sentiment_counts.values())
101
+ combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
102
+ prompt = f"""
103
+ Based on the analysis of {total_articles} articles about the company "{company_name}":
104
+ - Positive articles: {sentiment_counts['Positive']}
105
+ - Negative articles: {sentiment_counts['Negative']}
106
+ - Neutral articles: {sentiment_counts['Neutral']}
107
+ The following are the summarized key points from the articles: "{combined_summaries}".
108
+ Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
109
+ Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
110
+ """
111
+ response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
112
+ final_sentiment = response if response else "Sentiment analysis summary not available."
113
+ return final_sentiment.content # it's a string
114
+
115
+ def extract_json(response):
116
+ try:
117
+ return json.loads(response)
118
+ except json.JSONDecodeError:
119
+ return {}
120
+
121
+ def compare_articles(news_data, sentiment_counts,llm):
122
+ articles = news_data.get("Articles", [])
123
+ all_topics = [set(article["Topics"]) for article in articles]
124
+ common_topics = set.intersection(*all_topics) if all_topics else set()
125
+ topics_prompt = f"""
126
+ Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
127
+ even if they are phrased differently. The topics from each article are:
128
+ {all_topics}
129
+
130
+ Respond **ONLY** with a JSON format:
131
+ {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
132
+ """
133
+ response = llm.invoke([HumanMessage(content=topics_prompt)]).content
134
+ contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3] # Limit to 3 topics
135
+
136
+ total_articles = sum(sentiment_counts.values())
137
+ comparison_prompt = f"""
138
+ Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
139
+ - Sentiment distribution: {sentiment_counts}
140
+ - Commonly discussed topics across articles: {contextual_common_topics}
141
+
142
+ Consider the following:
143
+ 1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
144
+ 2. Overall implications for the company's reputation, stock potential, and public perception.
145
+ 3. How sentiment varies across articles and its impact.
146
+
147
+ Respond **ONLY** with a concise and insightful summary in this JSON format:
148
+ {{
149
+ "Coverage Differences": [
150
+ {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
151
+ {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}}
152
+ ]
153
+ }}
154
+ """
155
+ response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
156
+ coverage_differences = extract_json(response).get("Coverage Differences", [])
157
+ final_sentiment = generate_final_sentiment(news_data, sentiment_counts,llm)
158
+ return {
159
+ "Company": news_data["Company"],
160
+ "Articles": articles,
161
+ "Comparative Sentiment Score": {
162
+ "Sentiment Distribution": sentiment_counts,
163
+ "Coverage Differences": coverage_differences,
164
+ "Topic Overlap": {
165
+ "Common Topics": contextual_common_topics,
166
+ "Unique Topics": {
167
+ f"Article {i+1}": list(topics - set(contextual_common_topics))
168
+ for i, topics in enumerate(all_topics)
169
+ }
170
+ }
171
+ },
172
+ "Final Sentiment Analysis": final_sentiment
173
+ }