Spaces:

RAHULJUNEJA33
/

Financial_Report_Sentiment_Analyzer

Running

App Files Files Community

Financial_Report_Sentiment_Analyzer / app.py

RAHULJUNEJA33

Update app.py

23f3ac6 verified 3 months ago

raw

history blame contribute delete

6.05 kB

	import streamlit as st
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from collections import defaultdict
	import fitz # PyMuPDF for PDF reading
	import re
	import os
	import spacy # Replace NLTK with spaCy for sentence tokenization

	# Load spaCy model for sentence tokenization
	nlp = spacy.load("en_core_web_sm")

	# Streamlit App Configuration
	st.set_page_config(page_title="📊 Financial Report Sentiment Analyzer", layout="wide")
	st.title("📊 Financial Report Sentiment Analyzer")

	st.markdown("""
	### What is FinBERT?
	FinBERT is a language model fine-tuned for financial text analysis. It classifies sentiment as Positive, Neutral, or Negative for key financial aspects:
	1. Assets – What the company owns
	2. Liabilities – What the company owes
	3. Equity – Net worth (Assets - Liabilities)
	---
	""")

	# File Upload
	uploaded_file = st.file_uploader("📂 Upload Financial Report (.pdf or .txt)", type=["pdf", "txt"])

	# ✅ Custom CSS for Better Report Preview
	st.markdown("""
	<style>
	.report-preview {
	border: 1px solid #ccc;
	padding: 10px;
	max-height: 300px;
	overflow-y: scroll;
	background-color: #f9f9f9;
	color: #333 !important;
	white-space: pre-wrap;
	line-height: 1.6;
	font-family: Arial, sans-serif;
	}
	</style>
	""", unsafe_allow_html=True)

	# ✅ Load FinBERT Model (Optimized with Streamlit Caching)
	@st.cache_resource
	def load_model():
	tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
	model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
	return tokenizer, model

	tokenizer, model = load_model()
	label_mapping = {0: 'Positive', 1: 'Negative', 2: 'Neutral'}

	# ✅ Extract Text from Uploaded File
	def extract_text(file):
	try:
	if file.name.endswith('.pdf'):
	with fitz.open(stream=file.read(), filetype="pdf") as doc:
	return "\n".join([page.get_text() for page in doc])
	else:
	return file.read().decode('utf-8')
	except Exception as e:
	st.error(f"❌ Error reading file: {e}")
	return ""

	if uploaded_file:
	report_text = extract_text(uploaded_file)
	st.write("### 📄 Uploaded Report Preview:")
	st.markdown(f"<div class='report-preview'>{report_text[:5000]}</div>", unsafe_allow_html=True)

	# ✅ Sentiment Analysis Function
	def analyze_sentiment(sentence):
	inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
	with torch.no_grad():
	outputs = model(**inputs)
	probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
	label_idx = torch.argmax(probs, dim=1).item()
	return label_mapping[label_idx], probs.tolist()[0]

	# ✅ Extract Sentences Matching Financial Keywords (using spaCy)
	def extract_sentences(text, keywords):
	try:
	doc = nlp(text)
	sentences = [sent.text for sent in doc.sents] # Use spaCy for sentence tokenization
	pattern = re.compile(r'\b(' + '\|'.join(map(re.escape, keywords)) + r')\b', re.IGNORECASE)
	return [s for s in sentences if pattern.search(s)]
	except Exception as e:
	st.error(f"❌ Error in sentence tokenization: {e}")
	return []

	# ✅ Analyze Sentiment for a Specific Financial Category
	def analyze_category(text, category_name, keywords):
	sentences = extract_sentences(text, keywords)
	if not sentences:
	st.warning(f"⚠️ No relevant sentences found for {category_name}")
	return None, []

	sentiment_scores = defaultdict(int)
	negative_sentences = []

	for sentence in sentences:
	label, probs = analyze_sentiment(sentence)
	sentiment_scores[label] += 1
	if label == 'Negative':
	negative_sentences.append((sentence, probs))

	total = sum(sentiment_scores.values())
	sentiment_percentages = {
	'Positive': (sentiment_scores.get('Positive', 0) / total) * 100 if total else 0,
	'Negative': (sentiment_scores.get('Negative', 0) / total) * 100 if total else 0,
	'Neutral': (sentiment_scores.get('Neutral', 0) / total) * 100 if total else 0
	}
	return sentiment_percentages, negative_sentences

	# ✅ Financial Categories & Keywords
	categories = {
	'Assets': ['asset', 'current assets', 'fixed assets', 'cash equivalents', 'inventory', 'receivables', 'property', 'investments'],
	'Liabilities': ['liability', 'debt', 'accounts payable', 'loans payable', 'taxes payable', 'borrowings', 'creditors', 'obligations'],
	'Equity': ['equity', 'shareholders equity', 'stockholders equity', 'common stock', 'retained earnings', 'net worth', 'share capital']
	}

	# ✅ Sentiment Analysis Results
	st.write("## 📝 Sentiment Analysis Results:")

	for category, keywords in categories.items():
	st.write(f"### 🔍 {category}")
	result = analyze_category(report_text, category, keywords)
	if result[0] is None:
	continue

	sentiment_percentages, negative_sentences = result

	# Display Sentiment Metrics
	cols = st.columns(3)
	cols[0].metric(label="✅ Positive", value=f"{sentiment_percentages['Positive']:.1f}%")
	cols[1].metric(label="⚠️ Negative", value=f"{sentiment_percentages['Negative']:.1f}%")
	cols[2].metric(label="ℹ️ Neutral", value=f"{sentiment_percentages['Neutral']:.1f}%")

	# Show Negative Sentences (if any)
	if negative_sentences:
	with st.expander("🔻 View Negative Sentences"):
	for idx, (sentence, probs) in enumerate(negative_sentences, 1):
	st.write(f"{idx}. {sentence}")
	st.caption(f"Probabilities → Positive: {probs[0]:.2f}, Negative: {probs[1]:.2f}, Neutral: {probs[2]:.2f}")
	else:
	st.success("✅ No negative sentences detected.")