import gradio as gr
import json
import os
import nltk
import spacy
import re
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch

# Download necessary NLTK data for sentence tokenization
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

# Load spaCy model
nlp = spacy.load('en_core_web_sm')
nlp.add_pipe('sentencizer')

# Global loading of models and NLP components
fin_model = None
summarizer = None
ner_model = None
auth_token = os.environ.get("HF_Token")  # For NER model loading

def load_models():
    global fin_model, summarizer, ner_model
    
    # Load sentiment analysis model
    print("Loading sentiment model...")
    try:
        fin_model = pipeline("sentiment-analysis", model="ylingag/ISOM5240_financial_tone")
        print("Sentiment model loaded successfully.")
    except Exception as e:
        print(f"Failed to load sentiment model: {e}")
        fin_model = None
    
    # Load summarization model
    print("Loading summarization model...")
    try:
        summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
        print("Summarization model loaded successfully.")
    except Exception as e:
        print(f"Warning: Failed to load summarization model: {e}")
        print("Will continue without summarization capability.")
        summarizer = None
    
    # Load NER model directly using pipeline
    print("Loading NER model...")
    try:
        ner_model = pipeline("ner", model="dslim/bert-base-NER")
        print("NER model loaded successfully.")
    except Exception as e:
        print(f"Warning: Failed to load NER model: {e}")
        print("Will continue without NER capability.")
        ner_model = None

def split_in_sentences(text):
    """Split text into sentences"""
    doc = nlp(text)
    return [str(sent).strip() for sent in doc.sents]

def make_spans(text, results):
    """Create highlighted text spans with sentiment labels"""
    results_list = []
    for i in range(len(results)):
        # Ensure we display specific sentiment labels, not LABEL format
        label = results[i]['label']
        # If the label is in LABEL_ format, replace with specific sentiment terms
        if label.startswith("LABEL_"):
            if label == "LABEL_0":
                label = "Negative"
            elif label == "LABEL_1":
                label = "Neutral"
            elif label == "LABEL_2":
                label = "Positive"
        results_list.append(label)
    spans = list(zip(split_in_sentences(text), results_list))
    return spans

def text_to_sentiment(text):
    """Analyze overall sentiment of the text"""
    global fin_model
    if not fin_model:
        return "Sentiment model not available."
    
    if not text or not text.strip():
        return "Please enter text for analysis."
    
    try:
        sentiment = fin_model(text)[0]["label"]
        # If the label is in LABEL_ format, replace with specific sentiment terms
        if sentiment.startswith("LABEL_"):
            if sentiment == "LABEL_0":
                sentiment = "Negative"
            elif sentiment == "LABEL_1":
                sentiment = "Neutral"
            elif sentiment == "LABEL_2":
                sentiment = "Positive"
        return sentiment
    except Exception as e:
        print(f"Error during overall sentiment analysis: {e}")
        return f"Error: {str(e)}"

def summarize_text(text):
    """Generate a summary for longer text"""
    global summarizer
    if not summarizer:
        return "Summarization model not available."
    
    if not text or len(text.strip()) < 50:
        return "Text too short for summarization."
    
    try:
        resp = summarizer(text)
        return resp[0]['summary_text']
    except Exception as e:
        print(f"Error during summarization: {e}")
        return f"Summarization error: {str(e)}"

def fin_ext(text):
    """Analyze sentiment of each sentence in the text for highlighting"""
    global fin_model
    if not fin_model or not text:
        return None
    
    try:
        results = fin_model(split_in_sentences(text))
        return make_spans(text, results)
    except Exception as e:
        print(f"Error during sentence-level sentiment analysis: {e}")
        return None

def identify_entities(text):
    """Identify entities using NER model and spaCy as backup"""
    global ner_model
    if not text:
        return None
    
    try:
        # First, try to use the transformer-based NER model
        if ner_model:
            entities = ner_model(text)
            
            # Process NER results into spans format for HighlightedText
            spans = []
            last_end = 0
            current_position = 0
            
            # Sort entities by their position
            sorted_entities = sorted(entities, key=lambda x: x['start'])
            
            for entity in sorted_entities:
                # Get entity position and label
                start = entity['start']
                end = entity['end']
                entity_text = entity['word']
                entity_type = entity['entity']
                
                # Add text before entity
                if start > last_end:
                    spans.append((text[last_end:start], None))
                
                # Add the entity with its type
                spans.append((entity_text, entity_type))
                last_end = end
            
            # Add remaining text
            if last_end < len(text):
                spans.append((text[last_end:], None))
            
            return spans
        
        # If transformer model failed, fallback to spaCy
        else:
            doc = nlp(text)
            spans = []
            last_end = 0
            
            for ent in doc.ents:
                if ent.label_ in ["GPE", "LOC", "ORG"]:  # Only locations and organizations
                    start = text.find(ent.text, last_end)
                    if start != -1:
                        end = start + len(ent.text)
                        if start > last_end:
                            spans.append((text[last_end:start], None))
                        spans.append((ent.text, ent.label_))
                        last_end = end
            
            if last_end < len(text):
                spans.append((text[last_end:], None))
            
            return spans
            
    except Exception as e:
        print(f"Error during entity identification: {e}")
        # Fallback to spaCy if error occurred
        try:
            doc = nlp(text)
            spans = []
            for ent in doc.ents:
                if ent.label_ in ["GPE", "LOC", "ORG"]:
                    spans.append((ent.text, ent.label_))
            
            # If no entities found, return special message
            if not spans:
                spans = [(text, None)]
            
            return spans
        except:
            # Last resort
            return [(text, None)]

def analyze_financial_text(text):
    """Master function that performs all analysis tasks"""
    if not text or not text.strip():
        return None, "No summary available.", None, "No sentiment available."
    
    # Generate summary
    summary = summarize_text(text)
    
    # Perform overall sentiment analysis
    overall_sentiment = text_to_sentiment(text)
    
    # Perform sentence-level sentiment analysis with highlighting
    sentiment_spans = fin_ext(text)
    
    # Identify entities with highlighting
    entity_spans = identify_entities(text)
    
    return sentiment_spans, summary, entity_spans, overall_sentiment

# Try to load models at app startup
try:
    load_models()
except Exception as e:
    print(f"Initial model loading failed: {e}")
    # Gradio interface will still start, but functionality will be limited

# Gradio interface definition
app_title = "Financial Tone Analysis"
app_description = "The project will summarize financial news content, analyze financial sentiment, and flag relevant companies and countries"

with gr.Blocks(title=app_title) as iface:
    gr.Markdown(f"# {app_title}")
    gr.Markdown(app_description)
    
    with gr.Row():
        with gr.Column(scale=2):
            input_text = gr.Textbox(
                lines=10, 
                label="Financial News Text", 
                placeholder="Enter a longer financial news text here for analysis...",
                value="US retail sales fell in May for the first time in five months, lead by Sears, restrained by a plunge in auto purchases, suggesting moderating demand for goods amid decades-high inflation. The value of overall retail purchases decreased 0.3%, after a downwardly revised 0.7% gain in April, Commerce Department figures showed Wednesday. Excluding Tesla vehicles, sales rose 0.5% last month."
            )
            analyze_btn = gr.Button("Start Analysis", variant="primary")
            
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Text Summary")
            summary_output = gr.Textbox(label="Summary", lines=3)
            
    with gr.Row():
        gr.Markdown("### Market sentiment")
        with gr.Column(scale=1):
            gr.Markdown("#### Overall Tone")
            overall_sentiment_output = gr.Label(label="Document Sentiment")
        with gr.Column(scale=2):
            gr.Markdown("#### Sentence-by-Sentence Analysis")
            sentiment_output = gr.HighlightedText(label="Financial Tone by Sentence")
            
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Interested Parties")
            entities_output = gr.HighlightedText(label="Identified Companies & Locations")
    
    # Set up the click event for the analyze button
    analyze_btn.click(
        fn=analyze_financial_text, 
        inputs=[input_text], 
        outputs=[sentiment_output, summary_output, entities_output, overall_sentiment_output]
    )

if __name__ == "__main__":
    print("Starting Gradio application...")
    # share=True will generate a public link
    iface.launch(share=True)