Divyansh Kushwaha
commited on
Commit
·
2199ce9
1
Parent(s):
5352272
Updated
Browse files- .gitignore +2 -0
- api.py +28 -17
- app.py +19 -24
- requirements.txt +4 -1
.gitignore
CHANGED
@@ -1 +1,3 @@
|
|
1 |
.env
|
|
|
|
|
|
1 |
.env
|
2 |
+
gcp_key.json
|
3 |
+
.gitignore
|
api.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from fastapi import FastAPI, Query,HTTPException
|
2 |
from fastapi.responses import JSONResponse, FileResponse
|
3 |
-
from
|
|
|
4 |
from langchain.schema import HumanMessage
|
5 |
from langchain_groq import ChatGroq
|
6 |
import json
|
@@ -15,8 +16,8 @@ from utils import (
|
|
15 |
|
16 |
load_dotenv()
|
17 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
18 |
-
|
19 |
-
|
20 |
|
21 |
app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
|
22 |
|
@@ -25,13 +26,23 @@ llm=ChatGroq(api_key=GROQ_API_KEY, model="llama-3.1-8b-instant")
|
|
25 |
JSON_FILE_PATH = "final_summary.json"
|
26 |
AUDIO_FILE_PATH = "hindi_summary.mp3"
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def generate_summary(company_name):
|
29 |
news_articles = extract_titles_and_summaries(company_name)
|
30 |
news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
|
31 |
news_articles = extract_topics_with_hf(news_articles)
|
32 |
final_summary = compare_articles(news_articles, sentiment_counts)
|
33 |
hindi_text = ""
|
34 |
-
if
|
35 |
hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
|
36 |
hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
|
37 |
hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
|
@@ -41,24 +52,24 @@ def generate_summary(company_name):
|
|
41 |
print("Hindi Text not generated")
|
42 |
|
43 |
try:
|
44 |
-
client =
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
model_id="eleven_multilingual_v2",
|
50 |
)
|
51 |
-
|
52 |
-
|
53 |
-
with open(AUDIO_FILE_PATH, "wb") as
|
54 |
-
|
|
|
55 |
|
56 |
except Exception as e:
|
57 |
print(f"Error generating audio: {e}")
|
58 |
-
audio_bytes = None
|
59 |
if not os.path.exists(AUDIO_FILE_PATH):
|
60 |
print(f"Audio file could not be found at {AUDIO_FILE_PATH}.")
|
61 |
-
|
|
|
62 |
|
63 |
with open(JSON_FILE_PATH,"w",encoding="utf-8") as f:
|
64 |
json.dump(final_summary,f,ensure_ascii=False, indent=4)
|
@@ -84,7 +95,7 @@ def generate_summary(company_name):
|
|
84 |
}
|
85 |
},
|
86 |
'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
|
87 |
-
'
|
88 |
}
|
89 |
|
90 |
@app.get("/")
|
|
|
1 |
from fastapi import FastAPI, Query,HTTPException
|
2 |
from fastapi.responses import JSONResponse, FileResponse
|
3 |
+
from google.cloud import texttospeech
|
4 |
+
from google.oauth2.service_account import Credentials
|
5 |
from langchain.schema import HumanMessage
|
6 |
from langchain_groq import ChatGroq
|
7 |
import json
|
|
|
16 |
|
17 |
load_dotenv()
|
18 |
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
|
19 |
+
PRIVATE_KEY = os.getenv('PRIVATE_KEY').replace("\\n", "\n")
|
20 |
+
CLIENT_EMAIL = os.getenv('CLIENT_EMAIL')
|
21 |
|
22 |
app = FastAPI(title="Company Sentiment API", description="Get company news summaries with sentiment analysis")
|
23 |
|
|
|
26 |
JSON_FILE_PATH = "final_summary.json"
|
27 |
AUDIO_FILE_PATH = "hindi_summary.mp3"
|
28 |
|
29 |
+
|
30 |
+
def get_tts_client():
|
31 |
+
credentials = Credentials.from_service_account_info({
|
32 |
+
"type": "service_account",
|
33 |
+
"private_key": PRIVATE_KEY,
|
34 |
+
"client_email": CLIENT_EMAIL,
|
35 |
+
"token_uri": "https://oauth2.googleapis.com/token"
|
36 |
+
})
|
37 |
+
return texttospeech.TextToSpeechClient(credentials=credentials)
|
38 |
+
|
39 |
def generate_summary(company_name):
|
40 |
news_articles = extract_titles_and_summaries(company_name)
|
41 |
news_articles, sentiment_counts = perform_sentiment_analysis(news_articles)
|
42 |
news_articles = extract_topics_with_hf(news_articles)
|
43 |
final_summary = compare_articles(news_articles, sentiment_counts)
|
44 |
hindi_text = ""
|
45 |
+
if PRIVATE_KEY and CLIENT_EMAIL:
|
46 |
hindi_prompt = f"Just Translate this text into Hindi: {final_summary['Final Sentiment Analysis']}"
|
47 |
hindi_response = llm.invoke([HumanMessage(content=hindi_prompt)]).content
|
48 |
hindi_text = hindi_response.strip() if hindi_response else "Translation not available."
|
|
|
52 |
print("Hindi Text not generated")
|
53 |
|
54 |
try:
|
55 |
+
client = get_tts_client()
|
56 |
+
input_text = texttospeech.SynthesisInput(text=hindi_text)
|
57 |
+
voice = texttospeech.VoiceSelectionParams(
|
58 |
+
language_code="hi-IN",
|
59 |
+
name="hi-IN-Chirp3-HD-Kore"
|
|
|
60 |
)
|
61 |
+
audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
|
62 |
+
response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config)
|
63 |
+
with open(AUDIO_FILE_PATH, "wb") as out:
|
64 |
+
out.write(response.audio_content)
|
65 |
+
print(f"Audio content written to file: {AUDIO_FILE_PATH}")
|
66 |
|
67 |
except Exception as e:
|
68 |
print(f"Error generating audio: {e}")
|
|
|
69 |
if not os.path.exists(AUDIO_FILE_PATH):
|
70 |
print(f"Audio file could not be found at {AUDIO_FILE_PATH}.")
|
71 |
+
|
72 |
+
final_summary["Audio"] = AUDIO_FILE_PATH
|
73 |
|
74 |
with open(JSON_FILE_PATH,"w",encoding="utf-8") as f:
|
75 |
json.dump(final_summary,f,ensure_ascii=False, indent=4)
|
|
|
95 |
}
|
96 |
},
|
97 |
'Final Sentiment Analysis': final_summary["Final Sentiment Analysis"],
|
98 |
+
'Audio': AUDIO_FILE_PATH
|
99 |
}
|
100 |
|
101 |
@app.get("/")
|
app.py
CHANGED
@@ -1,26 +1,23 @@
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
|
4 |
-
BASE_URL = "
|
5 |
st.title("Company Sentiment Analysis")
|
6 |
|
7 |
company_name = st.text_input(
|
8 |
-
"Enter the company name:",
|
9 |
placeholder="Example: Microsoft, Apple, Tesla"
|
10 |
)
|
11 |
|
12 |
def display_articles(articles):
|
13 |
for i, article in enumerate(articles, start=1):
|
14 |
-
st.markdown(f"
|
15 |
-
st.write(f"- **Title:** {article['Title']}")
|
16 |
st.write(f"- **Summary:** {article['Summary']}")
|
17 |
-
st.write(f"- **Sentiment:** {article['Sentiment']}")
|
18 |
-
st.write(f"- **Score:** {article['Score']:.2f}")
|
19 |
st.write(f"- **Topics:** {', '.join(article['Topics'])}")
|
20 |
-
st.markdown("---")
|
21 |
|
22 |
def display_sentiment_distribution(sentiment_distribution):
|
23 |
-
st.
|
24 |
sentiment_data = {
|
25 |
"Sentiment": list(sentiment_distribution.keys()),
|
26 |
"Count": list(sentiment_distribution.values())
|
@@ -28,38 +25,35 @@ def display_sentiment_distribution(sentiment_distribution):
|
|
28 |
st.table(sentiment_data)
|
29 |
|
30 |
def display_coverage_differences(coverage_differences):
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
st.markdown("---")
|
36 |
|
37 |
def display_topic_overlap(topic_overlap):
|
38 |
-
st.
|
39 |
st.write(f"- **Common Topics:** {', '.join(topic_overlap['Common Topics'])}")
|
40 |
-
st.
|
41 |
for article, topics in topic_overlap["Unique Topics"].items():
|
42 |
st.write(f" - **{article}:** {', '.join(topics)}")
|
43 |
-
st.markdown("---")
|
44 |
|
45 |
if st.button("Generate Summary"):
|
46 |
if company_name:
|
47 |
try:
|
48 |
summary_url = f"{BASE_URL}/generateSummary?company_name={company_name}"
|
49 |
response = requests.post(summary_url)
|
50 |
-
|
51 |
if response.status_code == 200:
|
52 |
data = response.json()
|
53 |
-
|
54 |
-
|
55 |
-
st.markdown(f"### **Company: {data.get('Company', 'Unknown')}**")
|
56 |
|
57 |
# Articles
|
58 |
-
st.markdown("
|
59 |
display_articles(data.get("Articles", []))
|
60 |
|
61 |
# Comparative Sentiment Score
|
62 |
-
st.markdown("
|
63 |
sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
|
64 |
display_sentiment_distribution(sentiment_distribution)
|
65 |
|
@@ -70,12 +64,13 @@ if st.button("Generate Summary"):
|
|
70 |
display_topic_overlap(topic_overlap)
|
71 |
|
72 |
# Final Sentiment Analysis
|
73 |
-
st.markdown("
|
74 |
st.write(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
|
75 |
|
76 |
# Hindi Summary
|
77 |
-
st.markdown("
|
78 |
st.write(data.get("Hindi Summary", "No Hindi summary available."))
|
|
|
79 |
|
80 |
else:
|
81 |
st.error(f"Error: {response.status_code}, {response.text}")
|
|
|
1 |
import streamlit as st
|
2 |
import requests
|
3 |
|
4 |
+
BASE_URL = "http://127.0.0.1:8000"
|
5 |
st.title("Company Sentiment Analysis")
|
6 |
|
7 |
company_name = st.text_input(
|
8 |
+
"Enter the company name:",
|
9 |
placeholder="Example: Microsoft, Apple, Tesla"
|
10 |
)
|
11 |
|
12 |
def display_articles(articles):
|
13 |
for i, article in enumerate(articles, start=1):
|
14 |
+
st.markdown(f"##### **Article {i}: {article['Title']}**")
|
|
|
15 |
st.write(f"- **Summary:** {article['Summary']}")
|
16 |
+
st.write(f"- **Sentiment:** {article['Sentiment']} | **Score:** {article['Score']:.2f}")
|
|
|
17 |
st.write(f"- **Topics:** {', '.join(article['Topics'])}")
|
|
|
18 |
|
19 |
def display_sentiment_distribution(sentiment_distribution):
|
20 |
+
st.markdown("#### **Sentiment Distribution:**")
|
21 |
sentiment_data = {
|
22 |
"Sentiment": list(sentiment_distribution.keys()),
|
23 |
"Count": list(sentiment_distribution.values())
|
|
|
25 |
st.table(sentiment_data)
|
26 |
|
27 |
def display_coverage_differences(coverage_differences):
|
28 |
+
if coverage_differences:
|
29 |
+
st.markdown("#### **Coverage Differences:**")
|
30 |
+
for diff in coverage_differences:
|
31 |
+
st.write(f"- **{diff['Comparison']}:** {diff['Impact']}")
|
|
|
32 |
|
33 |
def display_topic_overlap(topic_overlap):
|
34 |
+
st.markdown("#### **Topic Overlap:**")
|
35 |
st.write(f"- **Common Topics:** {', '.join(topic_overlap['Common Topics'])}")
|
36 |
+
st.markdown("- **Unique Topics by Article:**")
|
37 |
for article, topics in topic_overlap["Unique Topics"].items():
|
38 |
st.write(f" - **{article}:** {', '.join(topics)}")
|
|
|
39 |
|
40 |
if st.button("Generate Summary"):
|
41 |
if company_name:
|
42 |
try:
|
43 |
summary_url = f"{BASE_URL}/generateSummary?company_name={company_name}"
|
44 |
response = requests.post(summary_url)
|
45 |
+
|
46 |
if response.status_code == 200:
|
47 |
data = response.json()
|
48 |
+
|
49 |
+
st.markdown(f"#### **Company: {data.get('Company', 'Unknown')}**")
|
|
|
50 |
|
51 |
# Articles
|
52 |
+
st.markdown("#### **Articles:**")
|
53 |
display_articles(data.get("Articles", []))
|
54 |
|
55 |
# Comparative Sentiment Score
|
56 |
+
st.markdown("#### **Comparative Sentiment Score:**")
|
57 |
sentiment_distribution = data.get("Comparative Sentiment Score", {}).get("Sentiment Distribution", {})
|
58 |
display_sentiment_distribution(sentiment_distribution)
|
59 |
|
|
|
64 |
display_topic_overlap(topic_overlap)
|
65 |
|
66 |
# Final Sentiment Analysis
|
67 |
+
st.markdown("#### **Final Sentiment Analysis:**")
|
68 |
st.write(data.get("Final Sentiment Analysis", "No sentiment analysis available."))
|
69 |
|
70 |
# Hindi Summary
|
71 |
+
st.markdown("#### **Hindi Summary:**")
|
72 |
st.write(data.get("Hindi Summary", "No Hindi summary available."))
|
73 |
+
st.audio(f"{BASE_URL}/downloadHindiAudio", format="audio/mp3")
|
74 |
|
75 |
else:
|
76 |
st.error(f"Error: {response.status_code}, {response.text}")
|
requirements.txt
CHANGED
@@ -7,4 +7,7 @@ langchain
|
|
7 |
langchain_groq
|
8 |
elevenlabs
|
9 |
torch
|
10 |
-
python-dotenv
|
|
|
|
|
|
|
|
7 |
langchain_groq
|
8 |
elevenlabs
|
9 |
torch
|
10 |
+
python-dotenv
|
11 |
+
google-cloud-texttospeech
|
12 |
+
google-auth
|
13 |
+
streamlit
|