carpeadiam commited on
Commit
2d7977e
·
verified ·
1 Parent(s): 79b0d58

Create StockSentimentNews.py

Browse files
Files changed (1) hide show
  1. StockSentimentNews.py +163 -0
StockSentimentNews.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ from transformers import pipeline
5
+ from collections import Counter
6
+ import time
7
+ import json
8
+ import numpy as np
9
+
10
+ def sentiment_analysis(querystring, headers):
11
+
12
+ # Load FinBERT
13
+ model_name = "yiyanghkust/finbert-tone"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
16
+ classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
17
+
18
+ def calculate_sentiment_scores(sentiment_data):
19
+ # Convert list values to their lengths, excluding 'details'
20
+ processed = {
21
+ k: len(v) if isinstance(v, list) and k != 'details' else v
22
+ for k, v in sentiment_data.items() if k != 'details'
23
+ }
24
+ total = sum(processed.values())
25
+
26
+ return {
27
+ "overall": max(processed, key=processed.get) if processed else "neutral",
28
+ "positive_percent": processed.get("positive", 0) / total * 100 if total > 0 else 0,
29
+ "negative_percent": processed.get("negative", 0) / total * 100 if total > 0 else 0,
30
+ "sentiment_ratio": processed.get("positive", 0) / processed.get("negative", 1) if processed.get("negative", 1) != 0 else float('-99999999'),
31
+ "average_confidence": sum(sentiment_data.get("confidence", [0])) / len(sentiment_data.get("confidence", [0])) if sentiment_data.get("confidence") else 0
32
+ }
33
+
34
+ # API setup
35
+ url = "https://indian-stock-exchange-api2.p.rapidapi.com/stock"
36
+
37
+ # Step 1: Get stock data
38
+ print("Fetching stock data...")
39
+ response = requests.get(url, headers=headers, params=querystring)
40
+ data = response.json()
41
+ news_data = data.get("recentNews", {})
42
+ print(f"Found {len(news_data)} news articles")
43
+
44
+ # Step 2: Extract URLs
45
+ urls = [item["url"] for item in news_data if isinstance(item, dict) and "url" in item]
46
+ print(f"Processing {len(urls)} articles...")
47
+
48
+ # Step 3: Analyze sentiment for each article
49
+ summary = Counter()
50
+ details = []
51
+
52
+ for i, news_item in enumerate(news_data):
53
+ news_url = news_item.get("url")
54
+ headline = news_item.get("headline", "")
55
+ intro = news_item.get("intro", "")
56
+
57
+ content_for_sentiment = ""
58
+ if news_url:
59
+ try:
60
+ print(f"\n[{i+1}/{len(urls)}] Analyzing: {news_url[:60]}...")
61
+ html = requests.get(news_url, timeout=10).text
62
+ soup = BeautifulSoup(html, "html.parser")
63
+
64
+ # Grab <p> tags and filter
65
+ paragraphs = soup.find_all("p")
66
+ if not paragraphs:
67
+ raise ValueError("No content found in paragraphs")
68
+
69
+ content_for_sentiment = " ".join(p.get_text() for p in paragraphs if len(p.get_text()) > 40)
70
+ content_for_sentiment = content_for_sentiment.strip()
71
+ if len(content_for_sentiment) < 100:
72
+ print("→ Content too short from web scraping, falling back to headline/intro")
73
+ content_for_sentiment = headline + " ." + intro
74
+
75
+ except Exception as e:
76
+ print(f"❌ Error scraping {news_url}: {str(e)}. Falling back to headline/intro for sentiment analysis.")
77
+ content_for_sentiment = headline + " ." + intro
78
+ else:
79
+ print(f"\n[{i+1}/{len(urls)}] No URL provided, using headline/intro for sentiment analysis.")
80
+ content_for_sentiment = headline + " ." + intro
81
+
82
+ if not content_for_sentiment.strip():
83
+ print("→ No content available for sentiment analysis, skipping.")
84
+ continue
85
+
86
+ # Truncate to 512 tokens max
87
+ content_for_sentiment = content_for_sentiment[:1000]
88
+ result = classifier(content_for_sentiment[:512])[0]
89
+ label = result['label'].lower()
90
+ score = round(result['score'], 3)
91
+
92
+ summary[label] += 1
93
+ details.append({
94
+ "url": news_url,
95
+ "title": news_item.get("title", "No title"), # Use title from news_item if available
96
+ "sentiment": label,
97
+ "confidence": score,
98
+ "content_length": len(content_for_sentiment),
99
+ "image_222x148": news_item.get("image_222x148"),
100
+ "intro": intro,
101
+ "headline": headline
102
+ })
103
+
104
+ print(f"→ Sentiment: {label.upper()} (confidence: {score:.1%})")
105
+ time.sleep(1.2)
106
+
107
+ # Step 4: Generate comprehensive output
108
+ sentiment_scores = calculate_sentiment_scores({
109
+ "positive": summary["positive"],
110
+ "negative": summary["negative"],
111
+ "neutral": summary["neutral"],
112
+ "details": details
113
+ })
114
+
115
+ output = {
116
+ "metadata": {
117
+ "total_articles": len(urls),
118
+ "processed_articles": len(details),
119
+ "processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
120
+ },
121
+ "sentiment_metrics": {
122
+ "overall_score": sentiment_scores["overall"], # Removed round() for string label
123
+ "positive_score": round(sentiment_scores["positive_percent"], 2),
124
+ "negative_score": round(sentiment_scores["negative_percent"], 2),
125
+ "sentiment_ratio": round(sentiment_scores["sentiment_ratio"], 2),
126
+ "average_confidence": round(sentiment_scores["average_confidence"], 2)
127
+ },
128
+ "article_details": details
129
+ }
130
+
131
+ # Print formatted results
132
+ print("\n=== SENTIMENT ANALYSIS RESULTS ===")
133
+ print(f"Overall Sentiment Score: {output['sentiment_metrics']['overall_score']}") # Updated print statement
134
+ print(f"Positive/Negative Ratio: {output['sentiment_metrics']['sentiment_ratio']:.2f}")
135
+ print(f"Average Confidence: {output['sentiment_metrics']['average_confidence']:.1f}%")
136
+
137
+ import json
138
+ with open("sentiment_results.json", "w") as f:
139
+ json.dump(output, f, indent=2)
140
+ print("Results saved to sentiment_results.json")
141
+ return output
142
+
143
+ def main(querystring):
144
+ """
145
+ Main function that takes querystring as parameter and runs sentiment analysis
146
+ Args:
147
+ querystring: Dictionary containing stock name (e.g. {'name': 'HDFC BANK'})
148
+ Returns:
149
+ Dictionary containing sentiment analysis results
150
+ """
151
+ try:
152
+ headers = {
153
+ "x-rapidapi-host": "indian-stock-exchange-api2.p.rapidapi.com",
154
+ "x-rapidapi-key": "a12f59fc40msh153da8fdf3885b6p100406jsn57d1d84b0d06"
155
+ }
156
+
157
+ # Run the sentiment analysis
158
+ results = sentiment_analysis(querystring, headers)
159
+ return results
160
+
161
+ except Exception as e:
162
+ print(f"Error in main function: {str(e)}")
163
+ return {"error": str(e)}