Divyansh Kushwaha
commited on
Commit
·
fdfc67e
1
Parent(s):
94c21a4
Updated
Browse files
utils.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import json
|
2 |
import requests
|
3 |
from bs4 import BeautifulSoup
|
4 |
-
from transformers import pipeline
|
5 |
from langchain.schema import HumanMessage
|
6 |
from langchain_groq import ChatGroq
|
7 |
from dotenv import load_dotenv
|
@@ -48,8 +47,9 @@ def extract_titles_and_summaries(company_name, num_articles=10):
|
|
48 |
return []
|
49 |
|
50 |
def perform_sentiment_analysis(news_data):
|
|
|
51 |
articles = news_data.get("Articles", [])
|
52 |
-
pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
|
53 |
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
54 |
|
55 |
for article in articles:
|
@@ -75,24 +75,57 @@ def perform_sentiment_analysis(news_data):
|
|
75 |
|
76 |
return news_data, sentiment_counts
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
def extract_topics_with_hf(news_data):
|
79 |
structured_data = {
|
80 |
"Company": news_data.get("Company", "Unknown"),
|
81 |
"Articles": []
|
82 |
}
|
83 |
-
topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
|
84 |
articles = news_data.get("Articles", [])
|
85 |
for article in articles:
|
86 |
content = f"{article['Title']} {article['Summary']}"
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
structured_data["Articles"].append({
|
91 |
"Title": article["Title"],
|
92 |
"Summary": article["Summary"],
|
93 |
"Sentiment": article.get("Sentiment", "Unknown"),
|
94 |
"Score": article.get("Score", 0.0),
|
95 |
-
"Topics":
|
96 |
})
|
97 |
return structured_data
|
98 |
|
|
|
1 |
import json
|
2 |
import requests
|
3 |
from bs4 import BeautifulSoup
|
|
|
4 |
from langchain.schema import HumanMessage
|
5 |
from langchain_groq import ChatGroq
|
6 |
from dotenv import load_dotenv
|
|
|
47 |
return []
|
48 |
|
49 |
def perform_sentiment_analysis(news_data):
|
50 |
+
from transformers import pipeline
|
51 |
articles = news_data.get("Articles", [])
|
52 |
+
pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis",device=1)
|
53 |
sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
|
54 |
|
55 |
for article in articles:
|
|
|
75 |
|
76 |
return news_data, sentiment_counts
|
77 |
|
78 |
+
# def extract_topics_with_hf(news_data):
|
79 |
+
# structured_data = {
|
80 |
+
# "Company": news_data.get("Company", "Unknown"),
|
81 |
+
# "Articles": []
|
82 |
+
# }
|
83 |
+
# topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
|
84 |
+
# articles = news_data.get("Articles", [])
|
85 |
+
# for article in articles:
|
86 |
+
# content = f"{article['Title']} {article['Summary']}"
|
87 |
+
# topics_result = topic_pipe(content, top_k=3)
|
88 |
+
# topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
|
89 |
+
|
90 |
+
# structured_data["Articles"].append({
|
91 |
+
# "Title": article["Title"],
|
92 |
+
# "Summary": article["Summary"],
|
93 |
+
# "Sentiment": article.get("Sentiment", "Unknown"),
|
94 |
+
# "Score": article.get("Score", 0.0),
|
95 |
+
# "Topics": topics
|
96 |
+
# })
|
97 |
+
# return structured_data
|
98 |
+
|
99 |
def extract_topics_with_hf(news_data):
|
100 |
structured_data = {
|
101 |
"Company": news_data.get("Company", "Unknown"),
|
102 |
"Articles": []
|
103 |
}
|
|
|
104 |
articles = news_data.get("Articles", [])
|
105 |
for article in articles:
|
106 |
content = f"{article['Title']} {article['Summary']}"
|
107 |
+
# Define the prompt for Groq AI
|
108 |
+
prompt = f"""
|
109 |
+
Analyze the following content: "{content}"
|
110 |
+
Extract and return **exactly three key topics** most relevant to this content.
|
111 |
+
The topics should be of one word after analyzing the content.
|
112 |
+
Respond in a JSON format like this:
|
113 |
+
{{"Topics": ["topic1", "topic2", "topic3"]}}
|
114 |
+
"""
|
115 |
+
try:
|
116 |
+
# Use Groq AI to invoke the model
|
117 |
+
response = llm.invoke([HumanMessage(content=prompt)]).content
|
118 |
+
topics_result = json.loads(response).get("Topics", ["Unknown"]) # Parse JSON response
|
119 |
+
except Exception as e:
|
120 |
+
print(f"Error while extracting topics: {e}")
|
121 |
+
topics_result = ["Unknown"]
|
122 |
|
123 |
structured_data["Articles"].append({
|
124 |
"Title": article["Title"],
|
125 |
"Summary": article["Summary"],
|
126 |
"Sentiment": article.get("Sentiment", "Unknown"),
|
127 |
"Score": article.get("Score", 0.0),
|
128 |
+
"Topics": topics_result
|
129 |
})
|
130 |
return structured_data
|
131 |
|