File size: 7,857 Bytes
d97cb07
f5dd236
0021684
f5dd236
 
0021684
94c21a4
 
0021684
f5dd236
d97cb07
94c21a4
 
0021684
d97cb07
 
f5dd236
d97cb07
f5dd236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d97cb07
f5dd236
fdfc67e
f5dd236
d97cb07
f5dd236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d97cb07
f5dd236
 
 
 
 
d97cb07
f5dd236
d97cb07
f5dd236
 
0021684
 
f5dd236
 
 
 
 
 
0021684
f5dd236
 
 
d97cb07
94c21a4
f5dd236
 
 
d97cb07
f5dd236
 
 
 
 
 
 
d97cb07
f5dd236
d97cb07
 
f5dd236
d97cb07
f5dd236
d97cb07
f5dd236
 
 
 
 
d97cb07
 
94c21a4
f5dd236
 
 
d97cb07
f5dd236
 
 
 
 
 
 
 
d97cb07
f5dd236
 
 
 
d97cb07
f5dd236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d97cb07
f5dd236
 
d97cb07
aea053c
d97cb07
f5dd236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d97cb07
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Importing libraries
from bs4 import BeautifulSoup
import requests
from langchain.schema import HumanMessage
from langchain_groq import ChatGroq
import json
from dotenv import load_dotenv
import os
from transformers import pipeline

# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv('GROQ_API_KEY')

# Initialize the LLM model
llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")

# Function to extract news titles and summaries from Economic Times
def extract_titles_and_summaries(company_name, num_articles=10):
    url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
    try:
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch the webpage. Status code: {response.status_code}")
            return []

        soup = BeautifulSoup(response.content, "html.parser")
        articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
        extracted_articles = []

        for article in articles:
            title_tag = article.find('h2')
            if title_tag:
                link_tag = title_tag.find('a')
                title = link_tag.get_text(strip=True) if link_tag else "No Title Found"
            else:
                title = "No Title Found"

            summary_tag = article.find('p')
            summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"

            extracted_articles.append({
                "Title": title,
                "Summary": summary
            })

        return {
            "Company": company_name,
            "Articles": extracted_articles
        }
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

# Function to perform sentiment analysis on extracted news articles
def perform_sentiment_analysis(news_data):
    from transformers import pipeline
    articles = news_data.get("Articles", [])
    pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis", device=1)
    sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}

    for article in articles:
        content = f"{article['Title']} {article['Summary']}"
        sentiment_result = pipe(content)[0]

        sentiment_map = {
            "positive": "Positive",
            "negative": "Negative",
            "neutral": "Neutral",
            "very positive": "Positive",
            "very negative": "Negative"
        }

        sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
        score = float(sentiment_result["score"])

        article["Sentiment"] = sentiment
        article["Score"] = score

        if sentiment in sentiment_counts:
            sentiment_counts[sentiment] += 1

    return news_data, sentiment_counts

# Function to extract topics from articles using Hugging Face model
def extract_topics_with_hf(news_data):
    structured_data = {
        "Company": news_data.get("Company", "Unknown"),
        "Articles": []
    }
    topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification", device=1)
    articles = news_data.get("Articles", [])

    for article in articles:
        content = f"{article['Title']} {article['Summary']}"
        topics_result = topic_pipe(content, top_k=3)
        topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]

        structured_data["Articles"].append({
            "Title": article["Title"],
            "Summary": article["Summary"],
            "Sentiment": article.get("Sentiment", "Unknown"),
            "Score": article.get("Score", 0.0),
            "Topics": topics
        })
    return structured_data

# Function to generate a final sentiment summary using LLM
def generate_final_sentiment(news_data, sentiment_counts):
    company_name = news_data["Company"]
    total_articles = sum(sentiment_counts.values())
    combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])

    prompt = f"""
    Based on the analysis of {total_articles} articles about the company "{company_name}":
    - Positive articles: {sentiment_counts['Positive']}
    - Negative articles: {sentiment_counts['Negative']}
    - Neutral articles: {sentiment_counts['Neutral']}
    The following are the summarized key points from the articles: "{combined_summaries}".
    Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
    Respond **ONLY** with a well-structured very concise and short paragraph in plain text, focusing on overall sentiment.
    """

    response = llm.invoke([HumanMessage(content=prompt)], max_tokens=200)
    final_sentiment = response if response else "Sentiment analysis summary not available."
    return final_sentiment.content  # returns a string

# Function to extract JSON response from LLM output
def extract_json(response):
    try:
        return json.loads(response)
    except json.JSONDecodeError:
        return {}

# Function to compare articles based on common topics and sentiment variations
def compare_articles(news_data, sentiment_counts):
    articles = news_data.get("Articles", [])
    all_topics = [set(article["Topics"]) for article in articles]
    common_topics = set.intersection(*all_topics) if all_topics else set()

    topics_prompt = f"""
    Analyze the following article topics and identify **only three** key themes that are common across multiple articles, 
    even if they are phrased differently. The topics from each article are:
    {all_topics}

    Respond **ONLY** with a JSON format:
    {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
    """
    
    response = llm.invoke([HumanMessage(content=topics_prompt)]).content
    contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]  # Limit to 3 topics

    total_articles = sum(sentiment_counts.values())

    comparison_prompt = f"""
    Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
    - Sentiment distribution: {sentiment_counts}
    - Commonly discussed topics across articles: {contextual_common_topics}

    Consider the following:
    1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
    2. Overall implications for the company's reputation, stock potential, and public perception.
    3. How sentiment varies across articles and its impact.

    Respond **ONLY** with a concise and insightful summary in this JSON format:
    {{
        "Coverage Differences": [
            {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
            {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}}
        ]
    }}
    """

    response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
    coverage_differences = extract_json(response).get("Coverage Differences", [])

    final_sentiment = generate_final_sentiment(news_data, sentiment_counts)

    return {
        "Company": news_data["Company"],
        "Articles": articles,
        "Comparative Sentiment Score": {
            "Sentiment Distribution": sentiment_counts,
            "Coverage Differences": coverage_differences,  
            "Topic Overlap": {
                "Common Topics": contextual_common_topics,
                "Unique Topics": {
                    f"Article {i+1}": list(topics - set(contextual_common_topics))
                    for i, topics in enumerate(all_topics)
                }
            }
        },
        "Final Sentiment Analysis": final_sentiment
    }