Spaces:
Running
Running
Create StockSentimentAnalyser.py
Browse files- StockSentimentAnalyser.py +676 -0
StockSentimentAnalyser.py
ADDED
@@ -0,0 +1,676 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mport requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
4 |
+
from transformers import pipeline
|
5 |
+
from collections import Counter
|
6 |
+
import time
|
7 |
+
import numpy as np
|
8 |
+
import yfinance as yf
|
9 |
+
import pandas as pd
|
10 |
+
from datetime import datetime, timedelta
|
11 |
+
import json
|
12 |
+
from typing import Dict, List, Tuple
|
13 |
+
import re # Add this import
|
14 |
+
import warnings
|
15 |
+
warnings.filterwarnings('ignore')
|
16 |
+
|
17 |
+
# Load FinBERT
|
18 |
+
model_name = "yiyanghkust/finbert-tone"
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
20 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
21 |
+
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
22 |
+
|
23 |
+
class StockSentimentAnalyzer:
|
24 |
+
def __init__(self):
|
25 |
+
self.session = requests.Session()
|
26 |
+
self.session.headers.update({
|
27 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
28 |
+
})
|
29 |
+
|
30 |
+
# API setup for Indian stock data
|
31 |
+
self.api_url = "https://indian-stock-exchange-api2.p.rapidapi.com/stock"
|
32 |
+
self.api_headers = {
|
33 |
+
"x-rapidapi-host": "indian-stock-exchange-api2.p.rapidapi.com",
|
34 |
+
"x-rapidapi-key": "a12f59fc40msh153da8fdf3885b6p100406jsn57d1d84b0d06"
|
35 |
+
}
|
36 |
+
self.symbol = None
|
37 |
+
|
38 |
+
def get_stock_data(self, symbol: str, period: str = "1mo") -> pd.DataFrame:
|
39 |
+
"""Fetch stock data from Yahoo Finance"""
|
40 |
+
try:
|
41 |
+
# Add .NS for NSE stocks if not present
|
42 |
+
if not symbol.endswith('.NS') and not symbol.endswith('.BO'):
|
43 |
+
symbol += '.NS'
|
44 |
+
|
45 |
+
stock = yf.Ticker(symbol)
|
46 |
+
data = stock.history(period=period)
|
47 |
+
return data
|
48 |
+
except Exception as e:
|
49 |
+
print(f"Error fetching stock data for {symbol}: {e}")
|
50 |
+
return pd.DataFrame()
|
51 |
+
|
52 |
+
def get_news_from_api(self, company_name: str) -> List[Dict]:
|
53 |
+
"""Get news articles from the API"""
|
54 |
+
querystring = {"name": company_name}
|
55 |
+
try:
|
56 |
+
response = requests.get(self.api_url, headers=self.api_headers, params=querystring)
|
57 |
+
data = response.json()
|
58 |
+
news_data = data.get("recentNews", {})
|
59 |
+
return news_data
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Error fetching news from API: {e}")
|
62 |
+
return []
|
63 |
+
|
64 |
+
def scrape_news_sentiment(self, company_name: str, symbol: str) -> Dict:
|
65 |
+
"""Scrape news sentiment from multiple sources"""
|
66 |
+
news_data = {
|
67 |
+
'headlines': [],
|
68 |
+
'sources': [],
|
69 |
+
'sentiment_scores': [],
|
70 |
+
'dates': [],
|
71 |
+
'urls': []
|
72 |
+
}
|
73 |
+
|
74 |
+
# Get news from API
|
75 |
+
api_news = self.get_news_from_api(company_name)
|
76 |
+
urls = [item["url"] for item in api_news if isinstance(item, dict) and "url" in item]
|
77 |
+
|
78 |
+
print(f"Found {len(urls)} news articles from API")
|
79 |
+
|
80 |
+
# Process each URL
|
81 |
+
for i, news_url in enumerate(urls):
|
82 |
+
try:
|
83 |
+
print(f"\n[{i+1}/{len(urls)}] Analyzing: {news_url[:60]}...")
|
84 |
+
html = requests.get(news_url, timeout=10).text
|
85 |
+
soup = BeautifulSoup(html, "html.parser")
|
86 |
+
|
87 |
+
# Get title
|
88 |
+
title = soup.title.string if soup.title else "No title"
|
89 |
+
|
90 |
+
# Grab <p> tags and filter
|
91 |
+
paragraphs = soup.find_all("p")
|
92 |
+
if not paragraphs:
|
93 |
+
print("→ No content found")
|
94 |
+
continue
|
95 |
+
|
96 |
+
content = " ".join(p.get_text() for p in paragraphs if len(p.get_text()) > 40)
|
97 |
+
content = content.strip()
|
98 |
+
if len(content) < 100:
|
99 |
+
print("→ Content too short")
|
100 |
+
continue
|
101 |
+
|
102 |
+
# Truncate to 512 tokens max
|
103 |
+
content = content[:1000]
|
104 |
+
result = classifier(content[:512])[0]
|
105 |
+
label = result['label'].lower()
|
106 |
+
score = result['score']
|
107 |
+
|
108 |
+
# Convert FinBERT sentiment to polarity score (-1 to 1)
|
109 |
+
polarity = 0
|
110 |
+
if label == "positive":
|
111 |
+
polarity = score
|
112 |
+
elif label == "negative":
|
113 |
+
polarity = -score
|
114 |
+
|
115 |
+
news_data['headlines'].append(title)
|
116 |
+
news_data['sources'].append('API')
|
117 |
+
news_data['sentiment_scores'].append(polarity)
|
118 |
+
news_data['dates'].append(datetime.now())
|
119 |
+
news_data['urls'].append(news_url)
|
120 |
+
|
121 |
+
print(f"→ Sentiment: {label.upper()} (confidence: {score:.1%})")
|
122 |
+
time.sleep(1.2) # polite delay
|
123 |
+
|
124 |
+
except Exception as e:
|
125 |
+
print(f"❌ Error: {str(e)}")
|
126 |
+
continue
|
127 |
+
|
128 |
+
# Economic Times
|
129 |
+
try:
|
130 |
+
et_url = f"https://economictimes.indiatimes.com/topic/{company_name.replace(' ', '-')}"
|
131 |
+
response = self.session.get(et_url, timeout=10)
|
132 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
133 |
+
|
134 |
+
headlines = soup.find_all(['h2', 'h3', 'h4'], class_=re.compile('.*title.*|.*headline.*'))
|
135 |
+
for headline in headlines[:5]: # Limit to 5 headlines
|
136 |
+
text = headline.get_text().strip()
|
137 |
+
if text and len(text) > 10:
|
138 |
+
# Use FinBERT for sentiment analysis
|
139 |
+
result = classifier(text)[0]
|
140 |
+
label = result['label'].lower()
|
141 |
+
score = result['score']
|
142 |
+
|
143 |
+
# Convert to polarity
|
144 |
+
polarity = 0
|
145 |
+
if label == "positive":
|
146 |
+
polarity = score
|
147 |
+
elif label == "negative":
|
148 |
+
polarity = -score
|
149 |
+
|
150 |
+
news_data['headlines'].append(text)
|
151 |
+
news_data['sources'].append('Economic Times')
|
152 |
+
news_data['sentiment_scores'].append(polarity)
|
153 |
+
news_data['dates'].append(datetime.now())
|
154 |
+
news_data['urls'].append(et_url)
|
155 |
+
except Exception as e:
|
156 |
+
print(f"Error scraping Economic Times: {e}")
|
157 |
+
|
158 |
+
return news_data
|
159 |
+
|
160 |
+
def calculate_news_sentiment_score(self, news_data: Dict) -> Dict:
|
161 |
+
"""Calculate various sentiment scores from news data"""
|
162 |
+
if not news_data['sentiment_scores']:
|
163 |
+
return {
|
164 |
+
'positive_score': 50,
|
165 |
+
'negative_score': 50,
|
166 |
+
'fear_score': 50,
|
167 |
+
'confidence_score': 50,
|
168 |
+
'overall_sentiment_score': 50
|
169 |
+
}
|
170 |
+
|
171 |
+
sentiments = news_data['sentiment_scores']
|
172 |
+
headlines = news_data['headlines']
|
173 |
+
|
174 |
+
# Count sentiments
|
175 |
+
positive_count = sum(1 for s in sentiments if s > 0.1)
|
176 |
+
negative_count = sum(1 for s in sentiments if s < -0.1)
|
177 |
+
neutral_count = len(sentiments) - positive_count - negative_count
|
178 |
+
|
179 |
+
total = len(sentiments)
|
180 |
+
positive_score = (positive_count / total) * 100 if total > 0 else 50
|
181 |
+
negative_score = (negative_count / total) * 100 if total > 0 else 50
|
182 |
+
|
183 |
+
# Calculate average confidence
|
184 |
+
confidence_values = [abs(s) for s in sentiments]
|
185 |
+
avg_confidence = sum(confidence_values) / len(confidence_values) if confidence_values else 0
|
186 |
+
confidence_score = avg_confidence * 100
|
187 |
+
|
188 |
+
# Fear score based on keywords
|
189 |
+
fear_keywords = ['fall', 'drop', 'crash', 'loss', 'decline', 'bear', 'sell', 'down', 'negative', 'risk']
|
190 |
+
confidence_keywords = ['rise', 'gain', 'bull', 'buy', 'up', 'positive', 'growth', 'profit', 'strong']
|
191 |
+
|
192 |
+
fear_mentions = sum(1 for headline in headlines
|
193 |
+
for keyword in fear_keywords
|
194 |
+
if keyword.lower() in headline.lower())
|
195 |
+
|
196 |
+
confidence_mentions = sum(1 for headline in headlines
|
197 |
+
for keyword in confidence_keywords
|
198 |
+
if keyword.lower() in headline.lower())
|
199 |
+
|
200 |
+
fear_score = min(100, (fear_mentions / len(headlines)) * 200) if headlines else 50
|
201 |
+
confidence_boost = min(100, (confidence_mentions / len(headlines)) * 200) if headlines else 50
|
202 |
+
|
203 |
+
# Overall sentiment score
|
204 |
+
overall_sentiment = 50 + ((positive_score - negative_score) * 0.3) + ((confidence_boost - fear_score) * 0.2)
|
205 |
+
|
206 |
+
return {
|
207 |
+
'positive_score': round(positive_score, 2),
|
208 |
+
'negative_score': round(negative_score, 2),
|
209 |
+
'fear_score': round(fear_score, 2),
|
210 |
+
'confidence_score': round(confidence_score, 2),
|
211 |
+
'overall_sentiment_score': round(min(100, max(0, overall_sentiment)), 2)
|
212 |
+
}
|
213 |
+
|
214 |
+
def calculate_volatility_score(self, stock_data: pd.DataFrame) -> float:
|
215 |
+
"""Calculate innovative volatility score (0-100)"""
|
216 |
+
if stock_data.empty:
|
217 |
+
return 0
|
218 |
+
|
219 |
+
# Calculate different volatility measures
|
220 |
+
returns = stock_data['Close'].pct_change().dropna()
|
221 |
+
|
222 |
+
# Standard deviation of returns (annualized)
|
223 |
+
std_vol = returns.std() * np.sqrt(252) * 100
|
224 |
+
|
225 |
+
# Average True Range volatility
|
226 |
+
high_low = stock_data['High'] - stock_data['Low']
|
227 |
+
high_close = np.abs(stock_data['High'] - stock_data['Close'].shift())
|
228 |
+
low_close = np.abs(stock_data['Low'] - stock_data['Close'].shift())
|
229 |
+
true_range = np.maximum(high_low, np.maximum(high_close, low_close))
|
230 |
+
atr = true_range.rolling(14).mean().iloc[-1]
|
231 |
+
atr_vol = (atr / stock_data['Close'].iloc[-1]) * 100
|
232 |
+
|
233 |
+
# Price range volatility
|
234 |
+
price_range = ((stock_data['High'].max() - stock_data['Low'].min()) / stock_data['Close'].iloc[-1]) * 100
|
235 |
+
|
236 |
+
# Combine and normalize to 0-100 scale
|
237 |
+
volatility_score = min(100, (std_vol * 0.4 + atr_vol * 0.4 + price_range * 0.2))
|
238 |
+
return round(volatility_score, 2)
|
239 |
+
|
240 |
+
def calculate_momentum_score(self, stock_data: pd.DataFrame) -> float:
|
241 |
+
"""Calculate momentum score based on price trends (0-100)"""
|
242 |
+
if stock_data.empty:
|
243 |
+
return 50
|
244 |
+
|
245 |
+
# RSI calculation
|
246 |
+
delta = stock_data['Close'].diff()
|
247 |
+
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
|
248 |
+
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
|
249 |
+
rs = gain / loss
|
250 |
+
rsi = 100 - (100 / (1 + rs))
|
251 |
+
current_rsi = rsi.iloc[-1] if not np.isnan(rsi.iloc[-1]) else 50
|
252 |
+
|
253 |
+
# Price momentum (% change over different periods)
|
254 |
+
mom_1d = ((stock_data['Close'].iloc[-1] - stock_data['Close'].iloc[-2]) / stock_data['Close'].iloc[-2]) * 100
|
255 |
+
mom_5d = ((stock_data['Close'].iloc[-1] - stock_data['Close'].iloc[-6]) / stock_data['Close'].iloc[-6]) * 100 if len(stock_data) > 5 else 0
|
256 |
+
mom_20d = ((stock_data['Close'].iloc[-1] - stock_data['Close'].iloc[-21]) / stock_data['Close'].iloc[-21]) * 100 if len(stock_data) > 20 else 0
|
257 |
+
|
258 |
+
# Moving average trends
|
259 |
+
ma_5 = stock_data['Close'].rolling(5).mean().iloc[-1]
|
260 |
+
ma_20 = stock_data['Close'].rolling(20).mean().iloc[-1] if len(stock_data) > 20 else ma_5
|
261 |
+
current_price = stock_data['Close'].iloc[-1]
|
262 |
+
|
263 |
+
ma_score = 50
|
264 |
+
if current_price > ma_5 > ma_20:
|
265 |
+
ma_score = 75
|
266 |
+
elif current_price > ma_5:
|
267 |
+
ma_score = 65
|
268 |
+
elif current_price < ma_5 < ma_20:
|
269 |
+
ma_score = 25
|
270 |
+
elif current_price < ma_5:
|
271 |
+
ma_score = 35
|
272 |
+
|
273 |
+
# Combine scores
|
274 |
+
momentum_score = (current_rsi * 0.4 + ma_score * 0.3 +
|
275 |
+
min(max(mom_1d * 2 + 50, 0), 100) * 0.1 +
|
276 |
+
min(max(mom_5d + 50, 0), 100) * 0.1 +
|
277 |
+
min(max(mom_20d * 0.5 + 50, 0), 100) * 0.1)
|
278 |
+
|
279 |
+
return round(momentum_score, 2)
|
280 |
+
|
281 |
+
def calculate_liquidity_score(self, stock_data: pd.DataFrame) -> float:
|
282 |
+
"""Calculate liquidity score based on volume patterns (0-100)"""
|
283 |
+
if stock_data.empty:
|
284 |
+
return 0
|
285 |
+
|
286 |
+
# Average volume
|
287 |
+
avg_volume = stock_data['Volume'].mean()
|
288 |
+
recent_volume = stock_data['Volume'].tail(5).mean()
|
289 |
+
|
290 |
+
# Volume trend
|
291 |
+
volume_trend = (recent_volume - avg_volume) / avg_volume * 100 if avg_volume > 0 else 0
|
292 |
+
|
293 |
+
# Volume-price relationship
|
294 |
+
price_changes = stock_data['Close'].pct_change()
|
295 |
+
volume_changes = stock_data['Volume'].pct_change()
|
296 |
+
correlation = price_changes.corr(volume_changes)
|
297 |
+
correlation = 0 if np.isnan(correlation) else correlation
|
298 |
+
|
299 |
+
# Normalize to 0-100 scale
|
300 |
+
volume_score = min(100, max(0, 50 + volume_trend * 0.3 + correlation * 25))
|
301 |
+
|
302 |
+
return round(volume_score, 2)
|
303 |
+
|
304 |
+
def calculate_technical_strength_score(self, stock_data: pd.DataFrame) -> float:
|
305 |
+
"""Calculate technical strength based on multiple indicators (0-100)"""
|
306 |
+
if stock_data.empty:
|
307 |
+
return 50
|
308 |
+
|
309 |
+
scores = []
|
310 |
+
|
311 |
+
# Support/Resistance levels
|
312 |
+
highs = stock_data['High'].rolling(20).max()
|
313 |
+
lows = stock_data['Low'].rolling(20).min()
|
314 |
+
current_price = stock_data['Close'].iloc[-1]
|
315 |
+
|
316 |
+
# Price position within range
|
317 |
+
price_position = ((current_price - lows.iloc[-1]) / (highs.iloc[-1] - lows.iloc[-1])) * 100
|
318 |
+
scores.append(min(100, max(0, price_position)))
|
319 |
+
|
320 |
+
# Volume-weighted average price deviation
|
321 |
+
vwap = (stock_data['Close'] * stock_data['Volume']).sum() / stock_data['Volume'].sum()
|
322 |
+
vwap_score = 50 + ((current_price - vwap) / vwap) * 100
|
323 |
+
scores.append(min(100, max(0, vwap_score)))
|
324 |
+
|
325 |
+
# Bollinger Bands position
|
326 |
+
ma_20 = stock_data['Close'].rolling(20).mean()
|
327 |
+
std_20 = stock_data['Close'].rolling(20).std()
|
328 |
+
upper_band = ma_20 + (std_20 * 2)
|
329 |
+
lower_band = ma_20 - (std_20 * 2)
|
330 |
+
|
331 |
+
if not upper_band.empty and not lower_band.empty:
|
332 |
+
bb_position = ((current_price - lower_band.iloc[-1]) /
|
333 |
+
(upper_band.iloc[-1] - lower_band.iloc[-1])) * 100
|
334 |
+
scores.append(min(100, max(0, bb_position)))
|
335 |
+
|
336 |
+
return round(np.mean(scores), 2)
|
337 |
+
|
338 |
+
def calculate_market_correlation_score(self, symbol: str, stock_data: pd.DataFrame) -> float:
|
339 |
+
"""Calculate correlation with major indices (0-100)"""
|
340 |
+
try:
|
341 |
+
# Get Nifty 50 data for comparison
|
342 |
+
nifty = yf.Ticker("^NSEI")
|
343 |
+
nifty_data = nifty.history(period="1mo")
|
344 |
+
|
345 |
+
if nifty_data.empty or stock_data.empty:
|
346 |
+
return 50
|
347 |
+
|
348 |
+
# Align dates
|
349 |
+
common_dates = stock_data.index.intersection(nifty_data.index)
|
350 |
+
if len(common_dates) < 5:
|
351 |
+
return 50
|
352 |
+
|
353 |
+
stock_returns = stock_data.loc[common_dates]['Close'].pct_change().dropna()
|
354 |
+
nifty_returns = nifty_data.loc[common_dates]['Close'].pct_change().dropna()
|
355 |
+
|
356 |
+
# Calculate correlation
|
357 |
+
correlation = stock_returns.corr(nifty_returns)
|
358 |
+
if np.isnan(correlation):
|
359 |
+
return 50
|
360 |
+
|
361 |
+
# Convert correlation to 0-100 score
|
362 |
+
# High positive correlation = higher score (follows market)
|
363 |
+
# Negative correlation = lower score (contrarian)
|
364 |
+
correlation_score = (correlation + 1) * 50
|
365 |
+
|
366 |
+
return round(correlation_score, 2)
|
367 |
+
except Exception as e:
|
368 |
+
print(f"Error calculating market correlation: {e}")
|
369 |
+
return 50
|
370 |
+
|
371 |
+
def calculate_growth_potential_score(self, stock_data: pd.DataFrame) -> float:
|
372 |
+
"""Calculate growth potential based on trend analysis (0-100)"""
|
373 |
+
if stock_data.empty:
|
374 |
+
return 50
|
375 |
+
|
376 |
+
# Calculate different timeframe growth rates
|
377 |
+
current_price = stock_data['Close'].iloc[-1]
|
378 |
+
|
379 |
+
growth_scores = []
|
380 |
+
|
381 |
+
# Weekly growth
|
382 |
+
if len(stock_data) >= 5:
|
383 |
+
week_ago_price = stock_data['Close'].iloc[-5]
|
384 |
+
weekly_growth = ((current_price - week_ago_price) / week_ago_price) * 100
|
385 |
+
weekly_score = min(100, max(0, 50 + weekly_growth * 2))
|
386 |
+
growth_scores.append(weekly_score)
|
387 |
+
|
388 |
+
# Monthly growth
|
389 |
+
if len(stock_data) >= 20:
|
390 |
+
month_ago_price = stock_data['Close'].iloc[-20]
|
391 |
+
monthly_growth = ((current_price - month_ago_price) / month_ago_price) * 100
|
392 |
+
monthly_score = min(100, max(0, 50 + monthly_growth))
|
393 |
+
growth_scores.append(monthly_score)
|
394 |
+
|
395 |
+
# Volume growth trend
|
396 |
+
recent_volume = stock_data['Volume'].tail(5).mean()
|
397 |
+
earlier_volume = stock_data['Volume'].head(5).mean()
|
398 |
+
if earlier_volume > 0:
|
399 |
+
volume_growth = ((recent_volume - earlier_volume) / earlier_volume) * 100
|
400 |
+
volume_score = min(100, max(0, 50 + volume_growth * 0.5))
|
401 |
+
growth_scores.append(volume_score)
|
402 |
+
|
403 |
+
return round(np.mean(growth_scores) if growth_scores else 50, 2)
|
404 |
+
|
405 |
+
def calculate_stability_score(self, stock_data: pd.DataFrame) -> float:
|
406 |
+
"""Calculate stability score based on price steadiness (0-100)"""
|
407 |
+
if stock_data.empty:
|
408 |
+
return 50
|
409 |
+
|
410 |
+
# Calculate coefficient of variation
|
411 |
+
returns = stock_data['Close'].pct_change().dropna()
|
412 |
+
mean_return = returns.mean()
|
413 |
+
std_return = returns.std()
|
414 |
+
|
415 |
+
if mean_return != 0:
|
416 |
+
cv = abs(std_return / mean_return)
|
417 |
+
# Lower CV = higher stability
|
418 |
+
stability_score = max(0, 100 - cv * 100)
|
419 |
+
else:
|
420 |
+
stability_score = 50
|
421 |
+
|
422 |
+
# Consider price gaps
|
423 |
+
gaps = abs(stock_data['Open'] - stock_data['Close'].shift()).dropna()
|
424 |
+
avg_gap = gaps.mean()
|
425 |
+
avg_price = stock_data['Close'].mean()
|
426 |
+
|
427 |
+
if avg_price > 0:
|
428 |
+
gap_ratio = avg_gap / avg_price
|
429 |
+
gap_penalty = min(50, gap_ratio * 1000)
|
430 |
+
stability_score = max(0, stability_score - gap_penalty)
|
431 |
+
|
432 |
+
return round(stability_score, 2)
|
433 |
+
|
434 |
+
def calculate_risk_score(self, analysis: Dict) -> float:
|
435 |
+
"""Calculate risk score based on multiple factors"""
|
436 |
+
risk_factors = [
|
437 |
+
analysis['volatility_score'],
|
438 |
+
analysis['fear_score'],
|
439 |
+
100 - analysis['liquidity_score'],
|
440 |
+
100 - analysis['technical_strength_score'],
|
441 |
+
100 - analysis['stability_score']
|
442 |
+
]
|
443 |
+
return round(np.mean(risk_factors), 2)
|
444 |
+
|
445 |
+
def calculate_investment_attractiveness(self, analysis: Dict) -> float:
|
446 |
+
"""Calculate investment attractiveness score"""
|
447 |
+
attractiveness_factors = [
|
448 |
+
analysis['overall_sentiment_score'],
|
449 |
+
analysis['growth_potential_score'],
|
450 |
+
analysis['momentum_score'],
|
451 |
+
100 - analysis['risk_score']
|
452 |
+
]
|
453 |
+
return round(np.mean(attractiveness_factors), 2)
|
454 |
+
|
455 |
+
def get_comprehensive_analysis(self, symbol: str, company_name: str = None) -> Dict:
|
456 |
+
"""Get comprehensive sentiment analysis for a stock"""
|
457 |
+
# If company name not provided, try to extract from symbol
|
458 |
+
self.symbol = symbol
|
459 |
+
if not company_name:
|
460 |
+
company_name = symbol.replace('.NS', '').replace('.BO', '')
|
461 |
+
|
462 |
+
print(f"\n{'='*80}")
|
463 |
+
print(f"🔍 ANALYZING: {company_name.upper()} ({symbol})")
|
464 |
+
print(f"{'='*80}")
|
465 |
+
|
466 |
+
# Get stock data
|
467 |
+
print("📊 Fetching stock data...")
|
468 |
+
stock_data = self.get_stock_data(symbol)
|
469 |
+
|
470 |
+
if stock_data.empty:
|
471 |
+
print("❌ Could not fetch stock data. Please check the symbol.")
|
472 |
+
return {}
|
473 |
+
|
474 |
+
# Get news sentiment
|
475 |
+
print("📰 Scraping news sentiment...")
|
476 |
+
news_data = self.scrape_news_sentiment(company_name, symbol)
|
477 |
+
|
478 |
+
# Calculate all scores
|
479 |
+
print("🧮 Calculating sentiment scores...")
|
480 |
+
|
481 |
+
# Basic stock info
|
482 |
+
current_price = stock_data['Close'].iloc[-1]
|
483 |
+
prev_close = stock_data['Close'].iloc[-2] if len(stock_data) > 1 else current_price
|
484 |
+
price_change = current_price - prev_close
|
485 |
+
price_change_pct = (price_change / prev_close) * 100 if prev_close != 0 else 0
|
486 |
+
|
487 |
+
analysis = {
|
488 |
+
'symbol': symbol,
|
489 |
+
'company_name': company_name,
|
490 |
+
'analysis_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
491 |
+
'current_price': round(current_price, 2),
|
492 |
+
'price_change': round(price_change, 2),
|
493 |
+
'price_change_pct': round(price_change_pct, 2),
|
494 |
+
'volume': int(stock_data['Volume'].iloc[-1]),
|
495 |
+
'market_cap_approx': 'N/A', # Would need additional API for exact market cap
|
496 |
+
|
497 |
+
# Innovative Scores
|
498 |
+
'volatility_score': self.calculate_volatility_score(stock_data),
|
499 |
+
'momentum_score': self.calculate_momentum_score(stock_data),
|
500 |
+
'liquidity_score': self.calculate_liquidity_score(stock_data),
|
501 |
+
'technical_strength_score': self.calculate_technical_strength_score(stock_data),
|
502 |
+
'market_correlation_score': self.calculate_market_correlation_score(symbol, stock_data),
|
503 |
+
'growth_potential_score': self.calculate_growth_potential_score(stock_data),
|
504 |
+
'stability_score': self.calculate_stability_score(stock_data),
|
505 |
+
|
506 |
+
# News sentiment scores
|
507 |
+
**self.calculate_news_sentiment_score(news_data),
|
508 |
+
|
509 |
+
# Additional metrics
|
510 |
+
'news_count': len(news_data['headlines']),
|
511 |
+
'recent_headlines': news_data['headlines'][:5] # Top 5 headlines
|
512 |
+
}
|
513 |
+
|
514 |
+
# Calculate risk score
|
515 |
+
analysis['risk_score'] = self.calculate_risk_score(analysis)
|
516 |
+
|
517 |
+
# Calculate risk level based on risk score
|
518 |
+
if analysis['risk_score'] >= 75:
|
519 |
+
analysis['risk_level'] = "VERY HIGH"
|
520 |
+
elif analysis['risk_score'] >= 60:
|
521 |
+
analysis['risk_level'] = "HIGH"
|
522 |
+
elif analysis['risk_score'] >= 40:
|
523 |
+
analysis['risk_level'] = "MODERATE"
|
524 |
+
elif analysis['risk_score'] >= 25:
|
525 |
+
analysis['risk_level'] = "LOW"
|
526 |
+
else:
|
527 |
+
analysis['risk_level'] = "VERY LOW"
|
528 |
+
|
529 |
+
# Add risk factors based on analysis
|
530 |
+
analysis['risk_factors'] = []
|
531 |
+
if analysis['volatility_score'] > 70:
|
532 |
+
analysis['risk_factors'].append("High market volatility")
|
533 |
+
if analysis['fear_score'] > 60:
|
534 |
+
analysis['risk_factors'].append("Elevated market fear")
|
535 |
+
if analysis['negative_score'] > 60:
|
536 |
+
analysis['risk_factors'].append("Negative sentiment trend")
|
537 |
+
if analysis['market_correlation_score'] < 30:
|
538 |
+
analysis['risk_factors'].append("Low market correlation")
|
539 |
+
if analysis['stability_score'] < 40:
|
540 |
+
analysis['risk_factors'].append("Low stability indicators")
|
541 |
+
|
542 |
+
# Calculate investment attractiveness
|
543 |
+
analysis['investment_attractiveness_score'] = self.calculate_investment_attractiveness(analysis)
|
544 |
+
|
545 |
+
return analysis
|
546 |
+
|
547 |
+
def generate_recommendation(self, analysis: Dict) -> str:
|
548 |
+
"""Generate trading recommendation based on analysis"""
|
549 |
+
if not analysis:
|
550 |
+
return "Unable to generate recommendation - insufficient data"
|
551 |
+
|
552 |
+
sentiment = analysis['overall_sentiment_score']
|
553 |
+
risk = analysis['risk_score']
|
554 |
+
momentum = analysis['momentum_score']
|
555 |
+
volatility = analysis['volatility_score']
|
556 |
+
attractiveness = analysis['investment_attractiveness_score']
|
557 |
+
|
558 |
+
if sentiment > 70 and risk < 40 and momentum > 60 and attractiveness > 65:
|
559 |
+
return "🟢 STRONG BUY - High sentiment, low risk, strong momentum"
|
560 |
+
elif sentiment > 60 and risk < 50 and attractiveness > 55:
|
561 |
+
return "🟢 BUY - Positive sentiment with manageable risk"
|
562 |
+
elif sentiment > 40 and sentiment < 60 and risk < 60:
|
563 |
+
return "🟡 HOLD - Neutral sentiment, monitor closely"
|
564 |
+
elif sentiment < 40 and risk > 60:
|
565 |
+
return "🔴 SELL - Negative sentiment with high risk"
|
566 |
+
elif sentiment < 30 or risk > 75:
|
567 |
+
return "🔴 STRONG SELL - Very negative sentiment or very high risk"
|
568 |
+
else:
|
569 |
+
return "🟡 HOLD - Mixed signals, proceed with caution"
|
570 |
+
|
571 |
+
def display_analysis(self, analysis: Dict):
|
572 |
+
"""Display comprehensive analysis in a formatted way"""
|
573 |
+
if not analysis:
|
574 |
+
print("❌ No analysis data available")
|
575 |
+
return
|
576 |
+
|
577 |
+
print(f"\n{'='*80}")
|
578 |
+
print(f"📈 COMPREHENSIVE STOCK ANALYSIS REPORT")
|
579 |
+
print(f"{'='*80}")
|
580 |
+
|
581 |
+
# Basic Info
|
582 |
+
print(f"\n📊 BASIC INFORMATION:")
|
583 |
+
print(f"Company: {analysis['company_name']}")
|
584 |
+
print(f"Symbol: {analysis['symbol']}")
|
585 |
+
print(f"Current Price: ₹{analysis['current_price']}")
|
586 |
+
print(f"Price Change: ₹{analysis['price_change']} ({analysis['price_change_pct']:+.2f}%)")
|
587 |
+
print(f"Volume: {analysis['volume']:,}")
|
588 |
+
print(f"Analysis Date: {analysis['analysis_date']}")
|
589 |
+
|
590 |
+
# Sentiment Scores
|
591 |
+
print(f"\n🎯 SENTIMENT SCORES (0-100):")
|
592 |
+
print(f"Overall Sentiment Score: {analysis['overall_sentiment_score']}/100")
|
593 |
+
print(f"Positive Score: {analysis['positive_score']}/100")
|
594 |
+
print(f"Negative Score: {analysis['negative_score']}/100")
|
595 |
+
print(f"Fear Score: {analysis['fear_score']}/100")
|
596 |
+
print(f"Confidence Score: {analysis['confidence_score']}/100")
|
597 |
+
|
598 |
+
# Technical Scores
|
599 |
+
print(f"\n⚙️ TECHNICAL SCORES (0-100):")
|
600 |
+
print(f"Volatility Score: {analysis['volatility_score']}/100")
|
601 |
+
print(f"Momentum Score: {analysis['momentum_score']}/100")
|
602 |
+
print(f"Technical Strength: {analysis['technical_strength_score']}/100")
|
603 |
+
print(f"Liquidity Score: {analysis['liquidity_score']}/100")
|
604 |
+
print(f"Market Correlation: {analysis['market_correlation_score']}/100")
|
605 |
+
|
606 |
+
# Advanced Scores
|
607 |
+
print(f"\n🚀 ADVANCED SCORES (0-100):")
|
608 |
+
print(f"Growth Potential: {analysis['growth_potential_score']}/100")
|
609 |
+
print(f"Stability Score: {analysis['stability_score']}/100")
|
610 |
+
print(f"Risk Score: {analysis['risk_score']}/100")
|
611 |
+
print(f"Investment Attractiveness: {analysis['investment_attractiveness_score']}/100")
|
612 |
+
|
613 |
+
# Recommendation
|
614 |
+
recommendation = self.generate_recommendation(analysis)
|
615 |
+
print(f"\n💡 RECOMMENDATION:")
|
616 |
+
print(f"{recommendation}")
|
617 |
+
|
618 |
+
# News Analysis
|
619 |
+
print(f"\n📰 NEWS ANALYSIS:")
|
620 |
+
print(f"Headlines Analyzed: {analysis['news_count']}")
|
621 |
+
if analysis['recent_headlines']:
|
622 |
+
print(f"\n📋 Recent Headlines:")
|
623 |
+
for i, headline in enumerate(analysis['recent_headlines'], 1):
|
624 |
+
print(f"{i}. {headline}")
|
625 |
+
|
626 |
+
# Risk Assessment
|
627 |
+
print(f"\n⚠️ RISK ASSESSMENT:")
|
628 |
+
print(f"Risk Level: {analysis['risk_level']}")
|
629 |
+
print(f"Key Risk Factors:")
|
630 |
+
for risk_factor in analysis['risk_factors']:
|
631 |
+
print(f"- {risk_factor}")
|
632 |
+
|
633 |
+
# Save analysis to JSON
|
634 |
+
output_file = f"analysis_{self.symbol}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
635 |
+
with open(output_file, 'w') as f:
|
636 |
+
json.dump(analysis, f, indent=4)
|
637 |
+
print(f"\n💾 Analysis saved to {output_file}")
|
638 |
+
|
639 |
+
def main():
|
640 |
+
"""Main function to run the stock analysis"""
|
641 |
+
analyzer = StockSentimentAnalyzer()
|
642 |
+
|
643 |
+
print("🚀 Welcome to Stock Sentiment Analyzer!")
|
644 |
+
print("Enter stock symbols (e.g., RELIANCE, TCS, HDFCBANK)")
|
645 |
+
print("The system will automatically add .NS for NSE stocks")
|
646 |
+
print("Type 'quit' to exit\n")
|
647 |
+
|
648 |
+
while True:
|
649 |
+
try:
|
650 |
+
# Get user input
|
651 |
+
user_input = input("Enter stock symbol: ").strip().upper()
|
652 |
+
|
653 |
+
if user_input.lower() == 'quit':
|
654 |
+
print("👋 Thank you for using Stock Sentiment Analyzer!")
|
655 |
+
break
|
656 |
+
|
657 |
+
if not user_input:
|
658 |
+
print("❌ Please enter a valid stock symbol")
|
659 |
+
continue
|
660 |
+
|
661 |
+
# Get company name (optional)
|
662 |
+
company_name = input("Enter company name (optional, press Enter to skip): ").strip()
|
663 |
+
|
664 |
+
# Perform analysis
|
665 |
+
analysis = analyzer.get_comprehensive_analysis(user_input, company_name if company_name else None)
|
666 |
+
|
667 |
+
# Display results
|
668 |
+
if analysis:
|
669 |
+
analyzer.display_analysis(analysis)
|
670 |
+
|
671 |
+
except Exception as e:
|
672 |
+
print(f"❌ Error: {str(e)}")
|
673 |
+
print("Please try again with a different stock symbol")
|
674 |
+
|
675 |
+
if __name__ == "__main__":
|
676 |
+
main()
|