Divyansh Kushwaha commited on
Commit
fdfc67e
·
1 Parent(s): 94c21a4
Files changed (1) hide show
  1. utils.py +39 -6
utils.py CHANGED
@@ -1,7 +1,6 @@
1
  import json
2
  import requests
3
  from bs4 import BeautifulSoup
4
- from transformers import pipeline
5
  from langchain.schema import HumanMessage
6
  from langchain_groq import ChatGroq
7
  from dotenv import load_dotenv
@@ -48,8 +47,9 @@ def extract_titles_and_summaries(company_name, num_articles=10):
48
  return []
49
 
50
  def perform_sentiment_analysis(news_data):
 
51
  articles = news_data.get("Articles", [])
52
- pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
53
  sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
54
 
55
  for article in articles:
@@ -75,24 +75,57 @@ def perform_sentiment_analysis(news_data):
75
 
76
  return news_data, sentiment_counts
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def extract_topics_with_hf(news_data):
79
  structured_data = {
80
  "Company": news_data.get("Company", "Unknown"),
81
  "Articles": []
82
  }
83
- topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
84
  articles = news_data.get("Articles", [])
85
  for article in articles:
86
  content = f"{article['Title']} {article['Summary']}"
87
- topics_result = topic_pipe(content, top_k=3)
88
- topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  structured_data["Articles"].append({
91
  "Title": article["Title"],
92
  "Summary": article["Summary"],
93
  "Sentiment": article.get("Sentiment", "Unknown"),
94
  "Score": article.get("Score", 0.0),
95
- "Topics": topics
96
  })
97
  return structured_data
98
 
 
1
  import json
2
  import requests
3
  from bs4 import BeautifulSoup
 
4
  from langchain.schema import HumanMessage
5
  from langchain_groq import ChatGroq
6
  from dotenv import load_dotenv
 
47
  return []
48
 
49
  def perform_sentiment_analysis(news_data):
50
+ from transformers import pipeline
51
  articles = news_data.get("Articles", [])
52
+ pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis",device=1)
53
  sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
54
 
55
  for article in articles:
 
75
 
76
  return news_data, sentiment_counts
77
 
78
+ # def extract_topics_with_hf(news_data):
79
+ # structured_data = {
80
+ # "Company": news_data.get("Company", "Unknown"),
81
+ # "Articles": []
82
+ # }
83
+ # topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
84
+ # articles = news_data.get("Articles", [])
85
+ # for article in articles:
86
+ # content = f"{article['Title']} {article['Summary']}"
87
+ # topics_result = topic_pipe(content, top_k=3)
88
+ # topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
89
+
90
+ # structured_data["Articles"].append({
91
+ # "Title": article["Title"],
92
+ # "Summary": article["Summary"],
93
+ # "Sentiment": article.get("Sentiment", "Unknown"),
94
+ # "Score": article.get("Score", 0.0),
95
+ # "Topics": topics
96
+ # })
97
+ # return structured_data
98
+
99
  def extract_topics_with_hf(news_data):
100
  structured_data = {
101
  "Company": news_data.get("Company", "Unknown"),
102
  "Articles": []
103
  }
 
104
  articles = news_data.get("Articles", [])
105
  for article in articles:
106
  content = f"{article['Title']} {article['Summary']}"
107
+ # Define the prompt for Groq AI
108
+ prompt = f"""
109
+ Analyze the following content: "{content}"
110
+ Extract and return **exactly three key topics** most relevant to this content.
111
+ The topics should be of one word after analyzing the content.
112
+ Respond in a JSON format like this:
113
+ {{"Topics": ["topic1", "topic2", "topic3"]}}
114
+ """
115
+ try:
116
+ # Use Groq AI to invoke the model
117
+ response = llm.invoke([HumanMessage(content=prompt)]).content
118
+ topics_result = json.loads(response).get("Topics", ["Unknown"]) # Parse JSON response
119
+ except Exception as e:
120
+ print(f"Error while extracting topics: {e}")
121
+ topics_result = ["Unknown"]
122
 
123
  structured_data["Articles"].append({
124
  "Title": article["Title"],
125
  "Summary": article["Summary"],
126
  "Sentiment": article.get("Sentiment", "Unknown"),
127
  "Score": article.get("Score", 0.0),
128
+ "Topics": topics_result
129
  })
130
  return structured_data
131