Spaces:

jatin7237
/

News-App

Running

App Files Files Community

Divyansh Kushwaha commited on Mar 21

Commit

fdfc67e

1 Parent(s): 94c21a4

Updated

Browse files

Files changed (1) hide show

utils.py +39 -6

utils.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import json
 import requests
 from bs4 import BeautifulSoup
-from transformers import pipeline
 from langchain.schema import HumanMessage
 from langchain_groq import ChatGroq
 from dotenv import load_dotenv
@@ -48,8 +47,9 @@ def extract_titles_and_summaries(company_name, num_articles=10):
         return []
 def perform_sentiment_analysis(news_data):
     articles = news_data.get("Articles", [])
-    pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
     sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
     for article in articles:
@@ -75,24 +75,57 @@ def perform_sentiment_analysis(news_data):
     return news_data, sentiment_counts
 def extract_topics_with_hf(news_data):
     structured_data = {
         "Company": news_data.get("Company", "Unknown"),
         "Articles": []
     }
-    topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
     articles = news_data.get("Articles", [])
     for article in articles:
         content = f"{article['Title']} {article['Summary']}"
-        topics_result = topic_pipe(content, top_k=3)
-        topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
         structured_data["Articles"].append({
             "Title": article["Title"],
             "Summary": article["Summary"],
             "Sentiment": article.get("Sentiment", "Unknown"),
             "Score": article.get("Score", 0.0),
-            "Topics": topics
         })
     return structured_data

 import json
 import requests
 from bs4 import BeautifulSoup
 from langchain.schema import HumanMessage
 from langchain_groq import ChatGroq
 from dotenv import load_dotenv
         return []
 def perform_sentiment_analysis(news_data):
+    from transformers import pipeline
     articles = news_data.get("Articles", [])
+    pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis",device=1)
     sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
     for article in articles:
     return news_data, sentiment_counts
+# def extract_topics_with_hf(news_data):
+    # structured_data = {
+    #     "Company": news_data.get("Company", "Unknown"),
+    #     "Articles": []
+    # }
+    # topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification",device=1)
+    # articles = news_data.get("Articles", [])
+    # for article in articles:
+    #     content = f"{article['Title']} {article['Summary']}"
+    #     topics_result = topic_pipe(content, top_k=3)
+    #     topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
+    #     structured_data["Articles"].append({
+    #         "Title": article["Title"],
+    #         "Summary": article["Summary"],
+    #         "Sentiment": article.get("Sentiment", "Unknown"),
+    #         "Score": article.get("Score", 0.0),
+    #         "Topics": topics
+    #     })
+    # return structured_data
 def extract_topics_with_hf(news_data):
     structured_data = {
         "Company": news_data.get("Company", "Unknown"),
         "Articles": []
     }
     articles = news_data.get("Articles", [])
     for article in articles:
         content = f"{article['Title']} {article['Summary']}"
+        # Define the prompt for Groq AI
+        prompt = f"""
+        Analyze the following content: "{content}"
+        Extract and return **exactly three key topics** most relevant to this content.
+        The topics should be of one word after analyzing the content.
+        Respond in a JSON format like this:
+        {{"Topics": ["topic1", "topic2", "topic3"]}}
+        """
+        try:
+            # Use Groq AI to invoke the model
+            response = llm.invoke([HumanMessage(content=prompt)]).content
+            topics_result = json.loads(response).get("Topics", ["Unknown"])  # Parse JSON response
+        except Exception as e:
+            print(f"Error while extracting topics: {e}")
+            topics_result = ["Unknown"]
         structured_data["Articles"].append({
             "Title": article["Title"],
             "Summary": article["Summary"],
             "Sentiment": article.get("Sentiment", "Unknown"),
             "Score": article.get("Score", 0.0),
+            "Topics": topics_result
         })
     return structured_data