Spaces:

jatin7237
/

News-App

Running

App Files Files Community

Divyansh Kushwaha commited on Mar 21

Commit

f5dd236

1 Parent(s): e6ec654

Files updated

Browse files

Files changed (5) hide show

Dockerfile +17 -0
api.py +366 -0
main.py +0 -0
requirements.txt +8 -0
utils.py +173 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.9-slim
+# Set the working directory
+WORKDIR /app
+# Copy requirements and install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the application code
+COPY . .
+# Expose the port FastAPI will run on
+EXPOSE 8000
+# Command to run the FastAPI app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

api.py ADDED Viewed

	@@ -0,0 +1,366 @@

+# from fastapi import FastAPI, Query
+# from fastapi.responses import JSONResponse, FileResponse
+# import json
+# import os
+# from bs4 import BeautifulSoup
+# from dotenv import load_dotenv
+# import requests
+# from transformers import pipeline
+# from elevenlabs import ElevenLabs
+# from langchain_groq import ChatGroq
+# from langchain.schema import HumanMessage
+# app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
+# load_dotenv()
+# GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+# ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
+# llm = ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
+# JSON_FILE_PATH = "final_summary.json"
+# AUDIO_FILE_PATH = "hindi_summary.mp3"
+# def extract_titles_and_summaries(company_name, num_articles=10):
+#     url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
+#     try:
+#         response = requests.get(url)
+#         if response.status_code != 200:
+#             print(f"Failed to fetch the webpage. Status code: {response.status_code}")
+#             return []
+#         soup = BeautifulSoup(response.content, "html.parser")
+#         articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
+#         extracted_articles = []
+#         for article in articles:
+#             title_tag = article.find('h2').find('a')
+#             title = title_tag.get_text(strip=True) if title_tag else "No Title Found"
+#             summary_tag = article.find('p')
+#             summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
+#             extracted_articles.append({
+#                 "Title": title,
+#                 "Summary": summary
+#             })
+#         return {
+#             "Company": company_name,
+#             "Articles": extracted_articles
+#         }
+#     except Exception as e:
+#         print(f"An error occurred: {e}")
+#         return []
+# def perform_sentiment_analysis(news_data):
+#     articles = news_data.get("Articles", [])
+#     pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
+#     sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
+#     for article in articles:
+#         content = f"{article['Title']} {article['Summary']}"
+#         sentiment_result = pipe(content)[0]
+#         sentiment_map = {
+#             "positive": "Positive",
+#             "negative": "Negative",
+#             "neutral": "Neutral",
+#             "very positive":"Positive",
+#             "very negative":"Negative"
+#         }
+#         sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
+#         score = float(sentiment_result["score"])
+#         article["Sentiment"] = sentiment
+#         article["Score"] = score
+#         if sentiment in sentiment_counts:
+#             sentiment_counts[sentiment] += 1
+#     return news_data, sentiment_counts
+# def extract_topics_with_hf(news_data):
+#     structured_data = {
+#         "Company": news_data.get("Company", "Unknown"),
+#         "Articles": []
+#     }
+#     topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
+#     articles = news_data.get("Articles", [])
+#     for article in articles:
+#         content = f"{article['Title']} {article['Summary']}"
+#         topics_result = topic_pipe(content, top_k=3)
+#         topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
+#         structured_data["Articles"].append({
+#             "Title": article["Title"],
+#             "Summary": article["Summary"],
+#             "Sentiment": article.get("Sentiment", "Unknown"),
+#             "Score": article.get("Score", 0.0),
+#             "Topics": topics
+#         })
+#     return structured_data
+# def generate_final_sentiment(news_data, sentiment_counts):
+#     company_name = news_data["Company"]
+#     total_articles = sum(sentiment_counts.values())
+#     combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
+#     prompt = f"""
+#     Based on the analysis of {total_articles} articles about the company "{company_name}":
+#     - Positive articles: {sentiment_counts['Positive']}
+#     - Negative articles: {sentiment_counts['Negative']}
+#     - Neutral articles: {sentiment_counts['Neutral']}
+#     The following are the summarized key points from the articles: "{combined_summaries}".
+#     Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
+#     Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
+#     """
+#     response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
+#     final_sentiment = response if response else "Sentiment analysis summary not available."
+#     return final_sentiment.content
+# def extract_json(response):
+#     try:
+#         return json.loads(response)
+#     except json.JSONDecodeError:
+#         return {}
+# def compare_articles(news_data, sentiment_counts):
+#     articles = news_data.get("Articles", [])
+#     all_topics = [set(article["Topics"]) for article in articles]
+#     common_topics = set.intersection(*all_topics) if all_topics else set()
+#     topics_prompt = f"""
+#     Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
+#     even if they are phrased differently. The topics from each article are:
+#     {all_topics}
+#     Respond **ONLY** with a JSON format:
+#     {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
+#     """
+#     response = llm.invoke([HumanMessage(content=topics_prompt)]).content
+#     contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]  # Limit to 3 topics
+#     total_articles = sum(sentiment_counts.values())
+#     comparison_prompt = f"""
+#     Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
+#     - Sentiment distribution: {sentiment_counts}
+#     - Commonly discussed topics across articles: {contextual_common_topics}
+#     Consider the following:
+#     1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
+#     2. Overall implications for the company's reputation, stock potential, and public perception.
+#     3. How sentiment varies across articles and its impact.
+#     Respond **ONLY** with a concise and insightful summary in this JSON format:
+#     {{
+#         "Coverage Differences": [
+#             {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
+#             {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}},
+#             ...
+#         ]
+#     }}
+#     """
+#     response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
+#     coverage_differences = extract_json(response).get("Coverage Differences", [])
+#     final_sentiment = generate_final_sentiment(news_data, sentiment_counts)
+#     return {
+#         "Company": news_data["Company"],
+#         "Articles": articles,
+#         "Comparative Sentiment Score": {
+#             "Sentiment Distribution": sentiment_counts,
+#             "Coverage Differences": coverage_differences,
+#             "Topic Overlap": {
+#                 "Common Topics": contextual_common_topics,
+#                 "Unique Topics": {
+#                     f"Article {i+1}": list(topics - set(contextual_common_topics))
+#                     for i, topics in enumerate(all_topics)
+#                 }
+#             }
+#         },
+#         "Final Sentiment Analysis": final_sentiment
+#     }
+# def generate_summary(company_name):
+#     news_articles = extract_titles_and_summaries(company_name)
+#     news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
+#     news_articles = extract_topics_with_hf(news_articles)
+#     final_summary = compare_articles(news_articles, sentiment_counts)
+#     hindi_prompt = f"Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
+#     hindi_summary = llm.invoke([HumanMessage(content=hindi_prompt)]).content
+#     client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
+#     audio = client.text_to_speech.convert(
+#         voice_id="9BWtsMINqrJLrRacOk9x",
+#         output_format="mp3_44100_128",
+#         text=hindi_summary,
+#         model_id="eleven_multilingual_v2",
+#     )
+#     with open(AUDIO_FILE_PATH, "wb") as f:
+#         f.write(b"".join(audio))
+#     return final_summary["Final Sentiment Analysis"]
+# @app.get("/")
+# def home():
+#     return {"message": "Welcome to the Company Sentiment API"}
+# @app.get("/generateSummary")
+# def get_summary(company_name: str = Query(..., description="Enter company name")):
+#     summary = generate_summary(company_name)
+#     return {"final_summary": summary}
+# @app.get("/downloadJson")
+# def download_json():
+#     return FileResponse(JSON_FILE_PATH, media_type="application/json", filename="final_summary.json")
+# @app.get("/downloadHindiAudio")
+# def download_audio():
+#     return FileResponse(AUDIO_FILE_PATH, media_type="audio/mp3", filename="hindi_summary.mp3")
+# if __name__ == "__main__":
+#     import uvicorn
+#     uvicorn.run(app, host="0.0.0.0", port=8000)
+from fastapi import FastAPI, Query,HTTPException
+from fastapi.responses import JSONResponse, FileResponse
+from elevenlabs import ElevenLabs
+from langchain.schema import HumanMessage
+import json
+from utils import (
+    get_llm,
+    extract_titles_and_summaries,
+    perform_sentiment_analysis,
+    extract_topics_with_hf,
+    compare_articles
+)
+app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
+api_keys = {
+    "groq_api_key": None,
+    "elevenlabs_api_key": None,
+    "huggingface_api_key": None,
+    "voice_id":None
+}
+@app.post("/setAPIKeys")
+def set_api_keys(
+    groq_api_key: str = Query(..., description="Enter your Groq API Key"),
+    elevenlabs_api_key: str = Query(..., description="Enter your ElevenLabs API Key"),
+    huggingface_api_key: str = Query(..., description="Enter your HuggingFace API Key"),
+    voice_id: str= Query(..., description="Enter your ElevenLabs Voice ID")
+):
+    if not groq_api_key or not elevenlabs_api_key or not huggingface_api_key or not voice_id:
+        raise HTTPException(status_code=400, detail="All API keys are required.")
+    # Update API keys in temporary storage
+    api_keys["groq_api_key"] = groq_api_key
+    api_keys["elevenlabs_api_key"] = elevenlabs_api_key
+    api_keys["huggingface_api_key"] = huggingface_api_key
+    api_keys["voice_id"] = voice_id
+    return {"message": "API keys updated successfully", "keys": api_keys}
+if not api_keys["groq_api_key"] or not api_keys["elevenlabs_api_key"] or not api_keys["huggingface_api_key"] or not api_keys['voice_id']:
+        raise HTTPException(status_code=400, detail="API keys are required. Please use /setAPIKeys to provide them.")
+llm = get_llm(api_keys["groq_api_key"])
+JSON_FILE_PATH = "final_summary.json"
+AUDIO_FILE_PATH = "hindi_summary.mp3"
+def generate_summary(company_name):
+    news_articles = extract_titles_and_summaries(company_name)
+    news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
+    news_articles = extract_topics_with_hf(news_articles)
+    final_summary = compare_articles(news_articles, sentiment_counts,llm)
+    ELEVEN_LABS_API_KEY = api_keys.get("elevenlabs_api_key", "")
+    VOICE_ID = api_keys.get("voice_id","")
+    hindi_text = ""
+    if ELEVEN_LABS_API_KEY and VOICE_ID:
+        client = ElevenLabs(api_key=ELEVEN_LABS_API_KEY)
+        hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
+        hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
+        hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
+        try:
+            audio = client.text_to_speech.convert(
+                voice_id=VOICE_ID,
+                output_format="mp3_44100_128",
+                text=hindi_text,
+                model_id="eleven_multilingual_v2",
+            )
+            hindi_summary = b"".join(audio)  # Store the audio content as binary data
+            with open(AUDIO_FILE_PATH, "wb") as f:
+                f.write(b"".join(audio))
+        except Exception as e:
+            print(f"Error generating audio: {e}")
+            hindi_summary = None
+    with open(JSON_FILE_PATH,"w") as f:
+        json.dump(final_summary,f,indent=4)
+    return {
+        'Company': final_summary["Company"],
+        'Articles': [
+            {
+                'Title': article.get('Title', 'No Title'),
+                'Summary': article.get('Summary', 'No Summary'),
+                'Sentiment': article.get('Sentiment', 'Unknown'),
+                'Score': article.get('Score', 0.0),
+                'Topics': article.get('Topics', [])
+            }
+            for article in final_summary["Articles"]
+        ],
+        'Comparative Sentiment Score': {
+            'Sentiment Distribution': sentiment_counts,
+            'Coverage Differences': final_summary["Comparative Sentiment Score"].get("Coverage Differences", []),
+            'Topic Overlap': {
+                'Common Topics': final_summary["Comparative Sentiment Score"].get("Topic Overlap", {}).get("Common Topics", []),
+                'Unique Topics': final_summary["Comparative Sentiment Score"].get("Topic Overlap", {}).get("Unique Topics", {})
+            }
+        },
+        'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
+        'Hindi Summary': hindi_summary
+    }
+@app.get("/")
+def home():
+    return {"message": "Welcome to the Company Sentiment API"}
+@app.get("/generateSummary")
+def get_summary(company_name: str = Query(..., description="Enter company name")):
+    structured_summary = generate_summary(company_name)
+    return structured_summary
+@app.get("/downloadJson")
+def download_json():
+    return FileResponse(JSON_FILE_PATH, media_type="application/json", filename="final_summary.json")
+@app.get("/downloadHindiAudio")
+def download_audio():
+    return FileResponse(AUDIO_FILE_PATH, media_type="audio/mp3", filename="hindi_summary.mp3")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

main.py ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn
+requests
+bs4
+transformers
+langchain
+langchain_groq
+elevenlabs

utils.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import json
+import requests
+from bs4 import BeautifulSoup
+from transformers import pipeline
+from langchain.schema import HumanMessage
+from langchain_groq import ChatGroq
+def get_llm(api_key):
+    if not api_key:
+        raise ValueError("Groq API key is required to initialize llm.")
+    return ChatGroq(api_key=api_key, model="llama-3.1-8b-instant")
+def extract_titles_and_summaries(company_name, num_articles=10):
+    url = f"https://economictimes.indiatimes.com/topic/{company_name}/news"
+    try:
+        response = requests.get(url)
+        if response.status_code != 200:
+            print(f"Failed to fetch the webpage. Status code: {response.status_code}")
+            return []
+        soup = BeautifulSoup(response.content, "html.parser")
+        articles = soup.find_all('div', class_='clr flt topicstry story_list', limit=num_articles)
+        extracted_articles = []
+        for article in articles:
+            title_tag = article.find('h2')
+            if title_tag:
+                link_tag = title_tag.find('a')
+                title = link_tag.get_text(strip=True) if link_tag else "No Title Found"
+            else:
+                title = "No Title Found"
+            summary_tag = article.find('p')
+            summary = summary_tag.get_text(strip=True) if summary_tag else "No Summary Found"
+            extracted_articles.append({
+                "Title": title,
+                "Summary": summary
+            })
+        return {
+            "Company": company_name,
+            "Articles": extracted_articles
+        }
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return []
+def perform_sentiment_analysis(news_data):
+    articles = news_data.get("Articles", [])
+    pipe = pipeline("text-classification", model="tabularisai/multilingual-sentiment-analysis")
+    sentiment_counts = {"Positive": 0, "Negative": 0, "Neutral": 0}
+    for article in articles:
+        content = f"{article['Title']} {article['Summary']}"
+        sentiment_result = pipe(content)[0]
+        sentiment_map = {
+            "positive": "Positive",
+            "negative": "Negative",
+            "neutral": "Neutral",
+            "very positive": "Positive",
+            "very negative": "Negative"
+        }
+        sentiment = sentiment_map.get(sentiment_result["label"].lower(), "Unknown")
+        score = float(sentiment_result["score"])
+        article["Sentiment"] = sentiment
+        article["Score"] = score
+        if sentiment in sentiment_counts:
+            sentiment_counts[sentiment] += 1
+    return news_data, sentiment_counts
+def extract_topics_with_hf(news_data):
+    structured_data = {
+        "Company": news_data.get("Company", "Unknown"),
+        "Articles": []
+    }
+    topic_pipe = pipeline("text-classification", model="valurank/distilroberta-topic-classification")
+    articles = news_data.get("Articles", [])
+    for article in articles:
+        content = f"{article['Title']} {article['Summary']}"
+        topics_result = topic_pipe(content, top_k=3)
+        topics = [topic["label"] for topic in topics_result] if topics_result else ["Unknown"]
+        structured_data["Articles"].append({
+            "Title": article["Title"],
+            "Summary": article["Summary"],
+            "Sentiment": article.get("Sentiment", "Unknown"),
+            "Score": article.get("Score", 0.0),
+            "Topics": topics
+        })
+    return structured_data
+def generate_final_sentiment(news_data, sentiment_counts,llm):
+    company_name = news_data["Company"]
+    total_articles = sum(sentiment_counts.values())
+    combined_summaries = " ".join([article["Summary"] for article in news_data["Articles"]])
+    prompt = f"""
+    Based on the analysis of {total_articles} articles about the company "{company_name}":
+    - Positive articles: {sentiment_counts['Positive']}
+    - Negative articles: {sentiment_counts['Negative']}
+    - Neutral articles: {sentiment_counts['Neutral']}
+    The following are the summarized key points from the articles: "{combined_summaries}".
+    Provide a single, concise summary that integrates the overall sentiment analysis and key news highlights while maintaining a natural flow. Explain its implications for the company's reputation, stock potential, and public perception.
+    Respond **ONLY** with a well-structured very concised and very short paragraph in plain text, focus on overall sentiment.
+    """
+    response = llm.invoke([HumanMessage(content=prompt)],max_tokens=200)
+    final_sentiment = response if response else "Sentiment analysis summary not available."
+    return final_sentiment.content   # it's a string
+def extract_json(response):
+    try:
+        return json.loads(response)
+    except json.JSONDecodeError:
+        return {}
+def compare_articles(news_data, sentiment_counts,llm):
+    articles = news_data.get("Articles", [])
+    all_topics = [set(article["Topics"]) for article in articles]
+    common_topics = set.intersection(*all_topics) if all_topics else set()
+    topics_prompt = f"""
+    Analyze the following article topics and identify **only three** key themes that are common across multiple articles,
+    even if they are phrased differently. The topics from each article are:
+    {all_topics}
+    Respond **ONLY** with a JSON format:
+    {{"CommonTopics": ["topic1", "topic2", "topic3"]}}
+    """
+    response = llm.invoke([HumanMessage(content=topics_prompt)]).content
+    contextual_common_topics = extract_json(response).get("CommonTopics", list(common_topics))[:3]  # Limit to 3 topics
+    total_articles = sum(sentiment_counts.values())
+    comparison_prompt = f"""
+    Provide a high-level summary comparing {total_articles} news articles about "{news_data['Company']}":
+    - Sentiment distribution: {sentiment_counts}
+    - Commonly discussed topics across articles: {contextual_common_topics}
+    Consider the following:
+    1. Notable contrasts between articles (e.g., major differences in topics and perspectives).
+    2. Overall implications for the company's reputation, stock potential, and public perception.
+    3. How sentiment varies across articles and its impact.
+    Respond **ONLY** with a concise and insightful summary in this JSON format:
+    {{
+        "Coverage Differences": [
+            {{"Comparison": "Brief contrast between Articles 1 & 2", "Impact": "Concise impact statement"}},
+            {{"Comparison": "Brief contrast between Articles 3 & 4", "Impact": "Concise impact statement"}}
+        ]
+    }}
+    """
+    response = llm.invoke([HumanMessage(content=comparison_prompt)]).content
+    coverage_differences = extract_json(response).get("Coverage Differences", [])
+    final_sentiment = generate_final_sentiment(news_data, sentiment_counts,llm)
+    return {
+        "Company": news_data["Company"],
+        "Articles": articles,
+        "Comparative Sentiment Score": {
+            "Sentiment Distribution": sentiment_counts,
+            "Coverage Differences": coverage_differences,
+            "Topic Overlap": {
+                "Common Topics": contextual_common_topics,
+                "Unique Topics": {
+                    f"Article {i+1}": list(topics - set(contextual_common_topics))
+                    for i, topics in enumerate(all_topics)
+                }
+            }
+        },
+        "Final Sentiment Analysis": final_sentiment
+    }