Spaces:

carpeadiam
/

centiMent

Running

App Files Files Community

centiMent / StockSentimentNews.py

carpeadiam

Update StockSentimentNews.py

172d12d verified 12 days ago

raw

history blame

7.09 kB

	import requests
	from bs4 import BeautifulSoup
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from transformers import pipeline
	from collections import Counter
	import time
	import json
	import numpy as np

	def sentiment_analysis(querystring, headers):

	# Load FinBERT
	model_name = "yiyanghkust/finbert-tone"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

	def calculate_sentiment_scores(sentiment_data):
	# Convert list values to their lengths, excluding 'details'
	processed = {
	k: len(v) if isinstance(v, list) and k != 'details' else v
	for k, v in sentiment_data.items() if k != 'details'
	}
	total = sum(processed.values())

	return {
	"overall": max(processed, key=processed.get) if processed else "neutral",
	"positive_percent": processed.get("positive", 0) / total * 100 if total > 0 else 0,
	"negative_percent": processed.get("negative", 0) / total * 100 if total > 0 else 0,
	"sentiment_ratio": processed.get("positive", 0) / processed.get("negative", 1) if processed.get("negative", 1) != 0 else float('-99999999'),
	"average_confidence": sum(sentiment_data.get("confidence", [0])) / len(sentiment_data.get("confidence", [0])) if sentiment_data.get("confidence") else 0
	}

	# API setup
	url = "https://indian-stock-exchange-api2.p.rapidapi.com/stock"

	# Step 1: Get stock data
	print("Fetching stock data...")
	response = requests.get(url, headers=headers, params=querystring)
	data = response.json()
	news_data = data.get("recentNews", {})
	print(f"Found {len(news_data)} news articles")

	# Step 2: Extract URLs
	urls = [item["url"] for item in news_data if isinstance(item, dict) and "url" in item]
	print(f"Processing {len(urls)} articles...")

	# Step 3: Analyze sentiment for each article
	summary = Counter()
	details = []

	for i, news_item in enumerate(news_data):
	news_url = news_item.get("url")
	headline = news_item.get("headline", "")
	intro = news_item.get("intro", "")

	content_for_sentiment = ""
	if news_url:
	try:
	print(f"\n[{i+1}/{len(urls)}] Analyzing: {news_url[:60]}...")
	html = requests.get(news_url, timeout=10).text
	soup = BeautifulSoup(html, "html.parser")

	# Grab <p> tags and filter
	paragraphs = soup.find_all("p")
	if not paragraphs:
	raise ValueError("No content found in paragraphs")

	content_for_sentiment = " ".join(p.get_text() for p in paragraphs if len(p.get_text()) > 40)
	content_for_sentiment = content_for_sentiment.strip()
	if len(content_for_sentiment) < 100:
	print("→ Content too short from web scraping, falling back to headline/intro")
	content_for_sentiment = headline + " ." + intro

	except Exception as e:
	print(f"❌ Error scraping {news_url}: {str(e)}. Falling back to headline/intro for sentiment analysis.")
	content_for_sentiment = headline + " ." + intro
	else:
	print(f"\n[{i+1}/{len(urls)}] No URL provided, using headline/intro for sentiment analysis.")
	content_for_sentiment = headline + " ." + intro

	if not content_for_sentiment.strip():
	print("→ No content available for sentiment analysis, skipping.")
	continue

	# Truncate to 512 tokens max
	content_for_sentiment = content_for_sentiment[:1000]
	result = classifier(content_for_sentiment[:512])[0]
	label = result['label'].lower()
	score = round(result['score'], 3)

	summary[label] += 1
	details.append({
	"url": news_url,
	"title": news_item.get("title", "No title"), # Use title from news_item if available
	"sentiment": label,
	"confidence": score,
	"content_length": len(content_for_sentiment),
	"image_222x148": news_item.get("image_222x148"),
	"intro": intro,
	"headline": headline
	})

	print(f"→ Sentiment: {label.upper()} (confidence: {score:.1%})")
	time.sleep(1.2)

	# Step 4: Generate comprehensive output
	sentiment_scores = calculate_sentiment_scores({
	"positive": summary["positive"],
	"negative": summary["negative"],
	"neutral": summary["neutral"],
	"details": details
	})

	output = {
	"metadata": {
	"total_articles": len(urls),
	"processed_articles": len(details),
	"processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
	},
	"sentiment_metrics": {
	"overall_score": sentiment_scores["overall"], # Removed round() for string label
	"positive_score": round(sentiment_scores["positive_percent"], 2),
	"negative_score": round(sentiment_scores["negative_percent"], 2),
	"sentiment_ratio": round(sentiment_scores["sentiment_ratio"], 2),
	"average_confidence": round(sentiment_scores["average_confidence"], 2)
	},
	"article_details": details
	}

	# Print formatted results
	print("\n=== SENTIMENT ANALYSIS RESULTS ===")
	print(f"Overall Sentiment Score: {output['sentiment_metrics']['overall_score']}") # Updated print statement
	print(f"Positive/Negative Ratio: {output['sentiment_metrics']['sentiment_ratio']:.2f}")
	print(f"Average Confidence: {output['sentiment_metrics']['average_confidence']:.1f}%")

	import json
	with open("sentiment_results.json", "w") as f:
	json.dump(output, f, indent=2)
	print("Results saved to sentiment_results.json")
	return output

	def mainOne(querystring):
	"""
	Main function that takes querystring as parameter and runs sentiment analysis
	Args:
	querystring: Dictionary containing stock name (e.g. {'name': 'HDFC BANK'})
	Returns:
	Dictionary containing sentiment analysis results
	"""
	try:
	headers = {
	"x-rapidapi-host": "indian-stock-exchange-api2.p.rapidapi.com",
	"x-rapidapi-key": "a12f59fc40msh153da8fdf3885b6p100406jsn57d1d84b0d06"
	}

	# Run the sentiment analysis
	results = sentiment_analysis(querystring, headers)
	return results

	except Exception as e:
	print(f"Error in main function: {str(e)}")
	return {"error": str(e)}