RAHULJUNEJA33's picture
Update app.py
23f3ac6 verified
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from collections import defaultdict
import fitz # PyMuPDF for PDF reading
import re
import os
import spacy # Replace NLTK with spaCy for sentence tokenization
# Load spaCy model for sentence tokenization
nlp = spacy.load("en_core_web_sm")
# Streamlit App Configuration
st.set_page_config(page_title="πŸ“Š Financial Report Sentiment Analyzer", layout="wide")
st.title("πŸ“Š Financial Report Sentiment Analyzer")
st.markdown("""
### What is FinBERT?
**FinBERT** is a language model fine-tuned for financial text analysis. It classifies sentiment as **Positive, Neutral, or Negative** for key financial aspects:
1. **Assets** – What the company owns
2. **Liabilities** – What the company owes
3. **Equity** – Net worth (Assets - Liabilities)
---
""")
# File Upload
uploaded_file = st.file_uploader("πŸ“‚ Upload Financial Report (.pdf or .txt)", type=["pdf", "txt"])
# βœ… Custom CSS for Better Report Preview
st.markdown("""
<style>
.report-preview {
border: 1px solid #ccc;
padding: 10px;
max-height: 300px;
overflow-y: scroll;
background-color: #f9f9f9;
color: #333 !important;
white-space: pre-wrap;
line-height: 1.6;
font-family: Arial, sans-serif;
}
</style>
""", unsafe_allow_html=True)
# βœ… Load FinBERT Model (Optimized with Streamlit Caching)
@st.cache_resource
def load_model():
tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
return tokenizer, model
tokenizer, model = load_model()
label_mapping = {0: 'Positive', 1: 'Negative', 2: 'Neutral'}
# βœ… Extract Text from Uploaded File
def extract_text(file):
try:
if file.name.endswith('.pdf'):
with fitz.open(stream=file.read(), filetype="pdf") as doc:
return "\n".join([page.get_text() for page in doc])
else:
return file.read().decode('utf-8')
except Exception as e:
st.error(f"❌ Error reading file: {e}")
return ""
if uploaded_file:
report_text = extract_text(uploaded_file)
st.write("### πŸ“„ Uploaded Report Preview:")
st.markdown(f"<div class='report-preview'>{report_text[:5000]}</div>", unsafe_allow_html=True)
# βœ… Sentiment Analysis Function
def analyze_sentiment(sentence):
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
label_idx = torch.argmax(probs, dim=1).item()
return label_mapping[label_idx], probs.tolist()[0]
# βœ… Extract Sentences Matching Financial Keywords (using spaCy)
def extract_sentences(text, keywords):
try:
doc = nlp(text)
sentences = [sent.text for sent in doc.sents] # Use spaCy for sentence tokenization
pattern = re.compile(r'\b(' + '|'.join(map(re.escape, keywords)) + r')\b', re.IGNORECASE)
return [s for s in sentences if pattern.search(s)]
except Exception as e:
st.error(f"❌ Error in sentence tokenization: {e}")
return []
# βœ… Analyze Sentiment for a Specific Financial Category
def analyze_category(text, category_name, keywords):
sentences = extract_sentences(text, keywords)
if not sentences:
st.warning(f"⚠️ No relevant sentences found for {category_name}")
return None, []
sentiment_scores = defaultdict(int)
negative_sentences = []
for sentence in sentences:
label, probs = analyze_sentiment(sentence)
sentiment_scores[label] += 1
if label == 'Negative':
negative_sentences.append((sentence, probs))
total = sum(sentiment_scores.values())
sentiment_percentages = {
'Positive': (sentiment_scores.get('Positive', 0) / total) * 100 if total else 0,
'Negative': (sentiment_scores.get('Negative', 0) / total) * 100 if total else 0,
'Neutral': (sentiment_scores.get('Neutral', 0) / total) * 100 if total else 0
}
return sentiment_percentages, negative_sentences
# βœ… Financial Categories & Keywords
categories = {
'Assets': ['asset', 'current assets', 'fixed assets', 'cash equivalents', 'inventory', 'receivables', 'property', 'investments'],
'Liabilities': ['liability', 'debt', 'accounts payable', 'loans payable', 'taxes payable', 'borrowings', 'creditors', 'obligations'],
'Equity': ['equity', 'shareholders equity', 'stockholders equity', 'common stock', 'retained earnings', 'net worth', 'share capital']
}
# βœ… Sentiment Analysis Results
st.write("## πŸ“ Sentiment Analysis Results:")
for category, keywords in categories.items():
st.write(f"### πŸ” {category}")
result = analyze_category(report_text, category, keywords)
if result[0] is None:
continue
sentiment_percentages, negative_sentences = result
# Display Sentiment Metrics
cols = st.columns(3)
cols[0].metric(label="βœ… Positive", value=f"{sentiment_percentages['Positive']:.1f}%")
cols[1].metric(label="⚠️ Negative", value=f"{sentiment_percentages['Negative']:.1f}%")
cols[2].metric(label="ℹ️ Neutral", value=f"{sentiment_percentages['Neutral']:.1f}%")
# Show Negative Sentences (if any)
if negative_sentences:
with st.expander("πŸ”» View Negative Sentences"):
for idx, (sentence, probs) in enumerate(negative_sentences, 1):
st.write(f"**{idx}.** *{sentence}*")
st.caption(f"Probabilities β†’ Positive: {probs[0]:.2f}, Negative: {probs[1]:.2f}, Neutral: {probs[2]:.2f}")
else:
st.success("βœ… No negative sentences detected.")