MaroofTechSorcerer commited on
Commit
854f1c9
·
verified ·
1 Parent(s): aec967d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +183 -238
app.py CHANGED
@@ -8,86 +8,100 @@ import plotly.express as px
8
  import logging
9
  import warnings
10
  import whisper
 
 
11
  import base64
12
  import io
13
- import asyncio
14
- from concurrent.futures import ThreadPoolExecutor
15
  import streamlit.components.v1 as components
16
 
17
- # Try importing torchaudio, fallback to pydub
18
- try:
19
- import torchaudio
20
- USE_TORCHAUDIO = True
21
- except ImportError:
22
- from pydub import AudioSegment
23
- USE_TORCHAUDIO = False
24
- st.warning("torchaudio not found. Using pydub (slower). Install torchaudio: pip install torchaudio")
25
-
26
- # Suppress warnings and set logging
27
- logging.getLogger("torch").setLevel(logging.ERROR)
28
- logging.getLogger("transformers").setLevel(logging.ERROR)
29
  warnings.filterwarnings("ignore")
30
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
31
 
32
- # Streamlit config
33
- st.set_page_config(layout="wide", page_title="Voice Sentiment Analysis")
34
- st.title("🎙 Voice Sentiment Analysis")
35
- st.markdown("Fast, accurate detection of emotions, sentiment, and sarcasm from voice or text.")
 
 
 
 
 
 
36
 
37
- # Global model cache
38
  @st.cache_resource
39
- def load_models():
40
  try:
41
- # Load Whisper model with CPU optimization
42
- whisper_model = whisper.load_model("base")
43
-
44
- # Load emotion detection model
45
- emotion_tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
46
- emotion_model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion")
47
- emotion_classifier = pipeline("text-classification", model=emotion_model, tokenizer=emotion_tokenizer,
48
- top_k=None, device=-1) # CPU only
49
-
50
- # Load sarcasm detection model
51
- sarcasm_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
52
- sarcasm_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony")
53
- sarcasm_classifier = pipeline("text-classification", model=sarcasm_model, tokenizer=sarcasm_tokenizer,
54
- device=-1) # CPU only
55
-
56
- return whisper_model, emotion_classifier, sarcasm_classifier
57
  except Exception as e:
58
- st.error(f"Failed to load models: {str(e)}")
59
- raise
60
-
61
- whisper_model, emotion_classifier, sarcasm_classifier = load_models()
62
 
63
- # Emotion detection
64
- async def perform_emotion_detection(text):
65
- if not text or len(text.strip()) < 3:
66
- return {}, "neutral", {}, "NEUTRAL"
67
-
68
  try:
69
- results = emotion_classifier(text)[0]
70
- emotions_dict = {r['label']: r['score'] for r in results}
71
- filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
72
- top_emotion = max(filtered_emotions, key=filtered_emotions.get, default="neutral")
73
-
 
 
 
 
 
74
  positive_emotions = ["joy"]
75
  negative_emotions = ["anger", "disgust", "fear", "sadness"]
76
- sentiment = ("POSITIVE" if top_emotion in positive_emotions else
77
- "NEGATIVE" if top_emotion in negative_emotions else "NEUTRAL")
78
-
79
- emotion_map = {"joy": "😊", "anger": "😡", "disgust": "🤢", "fear": "😨", "sadness": "😭", "surprise": "😲"}
 
 
 
 
 
 
 
 
80
  return emotions_dict, top_emotion, emotion_map, sentiment
81
  except Exception as e:
82
  st.error(f"Emotion detection failed: {str(e)}")
83
  return {}, "neutral", {}, "NEUTRAL"
84
 
85
- # Sarcasm detection
86
- async def perform_sarcasm_detection(text):
87
- if not text or len(text.strip()) < 3:
88
- return False, 0.0
89
-
 
 
 
 
 
 
 
 
 
 
 
90
  try:
 
 
 
 
 
91
  result = sarcasm_classifier(text)[0]
92
  is_sarcastic = result['label'] == "LABEL_1"
93
  sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
@@ -96,248 +110,179 @@ async def perform_sarcasm_detection(text):
96
  st.error(f"Sarcasm detection failed: {str(e)}")
97
  return False, 0.0
98
 
99
- # Audio validation
100
  def validate_audio(audio_path):
101
  try:
102
- if USE_TORCHAUDIO:
103
- waveform, sample_rate = torchaudio.load(audio_path)
104
- if waveform.abs().max() < 0.01:
105
- st.warning("Audio volume too low.")
106
- return False
107
- if waveform.shape[1] / sample_rate < 1:
108
- st.warning("Audio too short.")
109
- return False
110
- else:
111
- sound = AudioSegment.from_file(audio_path)
112
- if sound.dBFS < -55:
113
- st.warning("Audio volume too low.")
114
- return False
115
- if len(sound) < 1000:
116
- st.warning("Audio too short.")
117
- return False
118
  return True
119
  except Exception as e:
120
  st.error(f"Invalid audio file: {str(e)}")
121
  return False
122
 
123
- # Audio transcription
124
- @st.cache_data
 
 
 
 
 
 
 
 
125
  def transcribe_audio(audio_path):
 
126
  try:
127
- if USE_TORCHAUDIO:
128
- waveform, sample_rate = torchaudio.load(audio_path)
129
- if sample_rate != 16000:
130
- resampler = torchaudio.transforms.Resample(sample_rate, 16000)
131
- waveform = resampler(waveform)
132
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
133
- torchaudio.save(temp_file.name, waveform, 16000)
134
- result = whisper_model.transcribe(temp_file.name, language="en", no_speech_threshold=0.6)
135
- else:
136
- sound = AudioSegment.from_file(audio_path)
137
- sound = sound.set_frame_rate(16000).set_channels(1)
138
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
139
- sound.export(temp_file.name, format="wav")
140
- result = whisper_model.transcribe(temp_file.name, language="en", no_speech_threshold=0.6)
141
- os.remove(temp_file.name)
142
  return result["text"].strip()
143
  except Exception as e:
144
  st.error(f"Transcription failed: {str(e)}")
145
  return ""
 
 
 
146
 
147
- # Process uploaded audio
148
  def process_uploaded_audio(audio_file):
 
 
 
149
  try:
150
  ext = audio_file.name.split('.')[-1].lower()
151
  if ext not in ['wav', 'mp3', 'ogg']:
152
- st.error("Unsupported format. Use WAV, MP3, or OGG.")
153
  return None
154
- with tempfile.NamedTemporaryFile(suffix=f".{ext}", delete=False) as temp_file:
155
- temp_file.write(audio_file.getvalue())
156
- temp_file_path = temp_file.name
157
  if not validate_audio(temp_file_path):
158
- os.remove(temp_file_path)
159
  return None
160
  return temp_file_path
161
  except Exception as e:
162
- st.error(f"Error processing audio: {str(e)}")
163
  return None
164
-
165
- # Process base64 audio
166
- def process_base64_audio(base64_data):
167
- try:
168
- if not base64_data.startswith("data:audio"):
169
- st.error("Invalid audio data.")
170
- return None
171
- base64_binary = base64_data.split(',')[1]
172
- binary_data = base64.b64decode(base64_binary)
173
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
174
- temp_file.write(binary_data)
175
- temp_file_path = temp_file.name
176
- if not validate_audio(temp_file_path):
177
  os.remove(temp_file_path)
178
- return None
179
- return temp_file_path
180
- except Exception as e:
181
- st.error(f"Error processing audio data: {str(e)}")
182
- return None
 
 
 
 
 
183
 
184
  # Custom audio recorder
185
  def custom_audio_recorder():
 
186
  audio_recorder_html = """
187
  <script>
188
- let recorder, audioBlob, isRecording = false;
189
- const recordButton = document.getElementById('record-button');
190
- const audioPlayback = document.getElementById('audio-playback');
191
- const audioData = document.getElementById('audio-data');
192
-
193
  async function startRecording() {
194
  try {
195
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
196
  recorder = new MediaRecorder(stream);
197
  const chunks = [];
198
  recorder.ondataavailable = e => chunks.push(e.data);
199
  recorder.onstop = () => {
200
- audioBlob = new Blob(chunks, { type: 'audio/wav' });
201
- audioPlayback.src = URL.createObjectURL(audioBlob);
202
  const reader = new FileReader();
203
- reader.readAsDataURL(audioBlob);
204
  reader.onloadend = () => {
205
- audioData.value = reader.result;
206
  window.parent.postMessage({type: "streamlit:setComponentValue", value: reader.result}, "*");
207
  };
 
208
  stream.getTracks().forEach(track => track.stop());
209
  };
210
  recorder.start();
211
- isRecording = true;
212
- recordButton.textContent = 'Stop Recording';
213
- recordButton.classList.add('recording');
214
- } catch (e) {
215
- alert('Recording failed: ' + e.message);
216
- }
217
  }
218
-
219
  function stopRecording() {
220
  recorder.stop();
221
- isRecording = false;
222
- recordButton.textContent = 'Start Recording';
223
- recordButton.classList.remove('recording');
 
 
224
  }
225
-
226
- document.getElementById('record-button').onclick = () => {
227
- isRecording ? stopRecording() : startRecording();
228
- };
229
  </script>
230
- <style>
231
- .recorder-container { text-align: center; padding: 15px; }
232
- .record-button { background: #ff4b4b; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer; }
233
- .record-button.recording { background: #d32f2f; animation: pulse 1.5s infinite; }
234
- @keyframes pulse { 0% { opacity: 1; } 50% { opacity: 0.7; } 100% { opacity: 1; } }
235
- audio { margin-top: 10px; width: 100%; }
236
- </style>
237
- <div class="recorder-container">
238
- <button id="record-button">Start Recording</button>
239
- <audio id="audio-playback" controls></audio>
240
- <input type="hidden" id="audio-data">
241
- </div>
242
  """
243
- return components.html(audio_recorder_html, height=150)
244
 
245
- # Display results
246
  def display_analysis_results(transcribed_text):
247
- async def run_analyses():
248
- emotion_task = perform_emotion_detection(transcribed_text)
249
- sarcasm_task = perform_sarcasm_detection(transcribed_text)
250
- return await asyncio.gather(emotion_task, sarcasm_task)
251
-
252
- with st.spinner("Analyzing..."):
253
- with ThreadPoolExecutor() as executor:
254
- loop = asyncio.get_event_loop()
255
- (emotions_dict, top_emotion, emotion_map, sentiment), (is_sarcastic, sarcasm_score) = loop.run_until_complete(run_analyses())
256
-
257
  st.header("Results")
258
- st.subheader("Transcribed Text")
259
- st.text_area("Text", transcribed_text, height=100, disabled=True)
260
-
261
- col1, col2 = st.columns([1, 2])
262
  with col1:
263
  st.subheader("Sentiment")
264
- sentiment_icon = "👍" if sentiment == "POSITIVE" else "👎" if sentiment == "NEGATIVE" else "😐"
265
- st.markdown(f"{sentiment_icon} **{sentiment}**")
266
-
267
- st.subheader("Sarcasm")
268
- sarcasm_icon = "😏" if is_sarcastic else "😐"
269
- st.markdown(f"{sarcasm_icon} **{'Detected' if is_sarcastic else 'Not Detected'}** (Score: {sarcasm_score:.2f})")
270
-
271
  with col2:
272
- st.subheader("Emotions")
273
- if emotions_dict:
274
- st.markdown(f"*Dominant:* {emotion_map.get(top_emotion, '❓')} **{top_emotion.capitalize()}** ({emotions_dict[top_emotion]:.2f})")
275
- emotions = list(emotions_dict.keys())[:5]
276
- scores = list(emotions_dict.values())[:5]
277
- fig = px.bar(x=emotions, y=scores, labels={'x': 'Emotion', 'y': 'Score'}, color=emotions,
278
- color_discrete_sequence=px.colors.qualitative.Set2)
279
- fig.update_layout(yaxis_range=[0, 1], showlegend=False, height=300)
280
- st.plotly_chart(fig, use_container_width=True)
281
- else:
282
- st.write("No emotions detected.")
283
 
284
- with st.expander("Details"):
285
- st.markdown("""
286
- - **Speech**: Whisper-base (fast, ~10-15% WER)
287
- - **Emotions**: DistilBERT (joy, anger, etc.)
288
- - **Sarcasm**: RoBERTa (irony detection)
289
- - **Tips**: Clear audio, minimal noise
290
- """)
 
 
 
 
 
 
 
 
 
 
291
 
292
- # Main app
293
  def main():
294
- if 'debug_info' not in st.session_state:
295
- st.session_state.debug_info = []
296
-
297
- tab1, tab2, tab3 = st.tabs(["📁 Upload Audio", "🎙 Record Audio", "✍️ Text Input"])
298
-
299
  with tab1:
300
- audio_file = st.file_uploader("Upload audio", type=["wav", "mp3", "ogg"])
301
  if audio_file:
302
- st.audio(audio_file.getvalue())
303
- if st.button("Analyze", key="upload_analyze"):
304
- progress = st.progress(0)
305
- temp_path = process_uploaded_audio(audio_file)
 
 
 
 
 
 
 
 
 
306
  if temp_path:
307
- progress.progress(50)
308
  text = transcribe_audio(temp_path)
309
  if text:
310
- progress.progress(100)
311
  display_analysis_results(text)
312
- else:
313
- st.error("Transcription failed.")
314
- if os.path.exists(temp_path):
315
- os.remove(temp_path)
316
- progress.empty()
317
-
318
- with tab2:
319
- st.markdown("Record audio using your microphone.")
320
- audio_data = custom_audio_recorder()
321
- if audio_data and st.button("Analyze", key="record_analyze"):
322
- progress = st.progress(0)
323
- temp_path = process_base64_audio(audio_data)
324
- if temp_path:
325
- progress.progress(50)
326
- text = transcribe_audio(temp_path)
327
- if text:
328
- progress.progress(100)
329
- display_analysis_results(text)
330
- else:
331
- st.error("Transcription failed.")
332
- if os.path.exists(temp_path):
333
- os.remove(temp_path)
334
- progress.empty()
335
-
336
- with tab3:
337
- manual_text = st.text_area("Enter text:", placeholder="Type text to analyze...")
338
- if st.button("Analyze", key="text_analyze") and manual_text:
339
- display_analysis_results(manual_text)
340
 
341
  if __name__ == "__main__":
342
- main()
343
-
 
8
  import logging
9
  import warnings
10
  import whisper
11
+ from pydub import AudioSegment
12
+ import time
13
  import base64
14
  import io
 
 
15
  import streamlit.components.v1 as components
16
 
17
+ # Suppress warnings for a clean console
18
+ logging.getLogger("torch").setLevel(logging.CRITICAL)
19
+ logging.getLogger("transformers").setLevel(logging.CRITICAL)
 
 
 
 
 
 
 
 
 
20
  warnings.filterwarnings("ignore")
21
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
 
23
+ # Check if CUDA is available, otherwise use CPU
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+ print(f"Using device: {device}")
26
+
27
+ # Set Streamlit app layout
28
+ st.set_page_config(layout="wide", page_title="Voice Based Sentiment Analysis")
29
+
30
+ # Interface design
31
+ st.title("🎙 Voice Based Sentiment Analysis")
32
+ st.write("Detect emotions, sentiment, and sarcasm from your voice with optimized speed and accuracy using OpenAI Whisper.")
33
 
34
+ # Emotion Detection Function
35
  @st.cache_resource
36
+ def get_emotion_classifier():
37
  try:
38
+ tokenizer = AutoTokenizer.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion", use_fast=True)
39
+ model = AutoModelForSequenceClassification.from_pretrained("bhadresh-savani/distilbert-base-uncased-emotion").to(device)
40
+ if torch.cuda.is_available():
41
+ model = model.half() # Use fp16 on GPU
42
+ classifier = pipeline("text-classification",
43
+ model=model,
44
+ tokenizer=tokenizer,
45
+ top_k=None,
46
+ device=0 if torch.cuda.is_available() else -1)
47
+ return classifier
 
 
 
 
 
 
48
  except Exception as e:
49
+ st.error(f"Failed to load emotion model: {str(e)}")
50
+ return None
 
 
51
 
52
+ def perform_emotion_detection(text):
 
 
 
 
53
  try:
54
+ if not text or len(text.strip()) < 3:
55
+ return {}, "neutral", {}, "NEUTRAL"
56
+ emotion_classifier = get_emotion_classifier()
57
+ if not emotion_classifier:
58
+ return {}, "neutral", {}, "NEUTRAL"
59
+ emotion_results = emotion_classifier(text)[0]
60
+ emotion_map = {
61
+ "joy": "😊", "anger": "😡", "disgust": "🤢", "fear": "😨",
62
+ "sadness": "😭", "surprise": "😲"
63
+ }
64
  positive_emotions = ["joy"]
65
  negative_emotions = ["anger", "disgust", "fear", "sadness"]
66
+ neutral_emotions = ["surprise"]
67
+ emotions_dict = {result['label']: result['score'] for result in emotion_results}
68
+ filtered_emotions = {k: v for k, v in emotions_dict.items() if v > 0.01}
69
+ if not filtered_emotions:
70
+ filtered_emotions = emotions_dict
71
+ top_emotion = max(filtered_emotions, key=filtered_emotions.get)
72
+ if top_emotion in positive_emotions:
73
+ sentiment = "POSITIVE"
74
+ elif top_emotion in negative_emotions:
75
+ sentiment = "NEGATIVE"
76
+ else:
77
+ sentiment = "NEUTRAL"
78
  return emotions_dict, top_emotion, emotion_map, sentiment
79
  except Exception as e:
80
  st.error(f"Emotion detection failed: {str(e)}")
81
  return {}, "neutral", {}, "NEUTRAL"
82
 
83
+ # Sarcasm Detection Function
84
+ @st.cache_resource
85
+ def get_sarcasm_classifier():
86
+ try:
87
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-irony", use_fast=True)
88
+ model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-irony").to(device)
89
+ if torch.cuda.is_available():
90
+ model = model.half() # Use fp16 on GPU
91
+ classifier = pipeline("text-classification", model=model, tokenizer=tokenizer,
92
+ device=0 if torch.cuda.is_available() else -1)
93
+ return classifier
94
+ except Exception as e:
95
+ st.error(f"Failed to load sarcasm model: {str(e)}")
96
+ return None
97
+
98
+ def perform_sarcasm_detection(text):
99
  try:
100
+ if not text or len(text.strip()) < 3:
101
+ return False, 0.0
102
+ sarcasm_classifier = get_sarcasm_classifier()
103
+ if not sarcasm_classifier:
104
+ return False, 0.0
105
  result = sarcasm_classifier(text)[0]
106
  is_sarcastic = result['label'] == "LABEL_1"
107
  sarcasm_score = result['score'] if is_sarcastic else 1 - result['score']
 
110
  st.error(f"Sarcasm detection failed: {str(e)}")
111
  return False, 0.0
112
 
113
+ # Validate audio quality
114
  def validate_audio(audio_path):
115
  try:
116
+ sound = AudioSegment.from_file(audio_path)
117
+ if sound.dBFS < -55:
118
+ st.warning("Audio volume is too low.")
119
+ return False
120
+ if len(sound) < 1000:
121
+ st.warning("Audio is too short.")
122
+ return False
 
 
 
 
 
 
 
 
 
123
  return True
124
  except Exception as e:
125
  st.error(f"Invalid audio file: {str(e)}")
126
  return False
127
 
128
+ # Speech Recognition with Whisper
129
+ @st.cache_resource
130
+ def load_whisper_model():
131
+ try:
132
+ model = whisper.load_model("base").to(device)
133
+ return model
134
+ except Exception as e:
135
+ st.error(f"Failed to load Whisper model: {str(e)}")
136
+ return None
137
+
138
  def transcribe_audio(audio_path):
139
+ temp_wav_path = None
140
  try:
141
+ sound = AudioSegment.from_file(audio_path).set_frame_rate(16000).set_channels(1)
142
+ temp_wav_path = os.path.join(tempfile.gettempdir(), f"temp_{int(time.time())}.wav")
143
+ sound.export(temp_wav_path, format="wav")
144
+ model = load_whisper_model()
145
+ if not model:
146
+ return ""
147
+ result = model.transcribe(temp_wav_path, language="en", fp16=torch.cuda.is_available())
 
 
 
 
 
 
 
 
148
  return result["text"].strip()
149
  except Exception as e:
150
  st.error(f"Transcription failed: {str(e)}")
151
  return ""
152
+ finally:
153
+ if temp_wav_path and os.path.exists(temp_wav_path):
154
+ os.remove(temp_wav_path)
155
 
156
+ # Process uploaded audio files
157
  def process_uploaded_audio(audio_file):
158
+ if not audio_file:
159
+ return None
160
+ temp_file_path = None
161
  try:
162
  ext = audio_file.name.split('.')[-1].lower()
163
  if ext not in ['wav', 'mp3', 'ogg']:
164
+ st.error("Unsupported audio format. Use WAV, MP3, or OGG.")
165
  return None
166
+ temp_file_path = os.path.join(tempfile.gettempdir(), f"uploaded_{int(time.time())}.{ext}")
167
+ with open(temp_file_path, "wb") as f:
168
+ f.write(audio_file.getvalue())
169
  if not validate_audio(temp_file_path):
 
170
  return None
171
  return temp_file_path
172
  except Exception as e:
173
+ st.error(f"Error processing uploaded audio: {str(e)}")
174
  return None
175
+ finally:
176
+ if temp_file_path and os.path.exists(temp_file_path) and not st.session_state.get('keep_temp', False):
 
 
 
 
 
 
 
 
 
 
 
177
  os.remove(temp_file_path)
178
+
179
+ # Show model information
180
+ def show_model_info():
181
+ st.sidebar.header("🧠 About the Models")
182
+ with st.sidebar.expander("Model Details"):
183
+ st.markdown("""
184
+ - *Emotion*: DistilBERT (bhadresh-savani/distilbert-base-uncased-emotion)
185
+ - *Sarcasm*: RoBERTa (cardiffnlp/twitter-roberta-base-irony)
186
+ - *Speech*: OpenAI Whisper (base)
187
+ """)
188
 
189
  # Custom audio recorder
190
  def custom_audio_recorder():
191
+ st.warning("Recording requires microphone access and a modern browser.")
192
  audio_recorder_html = """
193
  <script>
194
+ let recorder, stream;
 
 
 
 
195
  async function startRecording() {
196
  try {
197
+ stream = await navigator.mediaDevices.getUserMedia({ audio: true });
198
  recorder = new MediaRecorder(stream);
199
  const chunks = [];
200
  recorder.ondataavailable = e => chunks.push(e.data);
201
  recorder.onstop = () => {
202
+ const blob = new Blob(chunks, { type: 'audio/wav' });
 
203
  const reader = new FileReader();
 
204
  reader.onloadend = () => {
 
205
  window.parent.postMessage({type: "streamlit:setComponentValue", value: reader.result}, "*");
206
  };
207
+ reader.readAsDataURL(blob);
208
  stream.getTracks().forEach(track => track.stop());
209
  };
210
  recorder.start();
211
+ document.getElementById('record-btn').textContent = 'Stop Recording';
212
+ } catch (e) { alert('Recording failed: ' + e.message); }
 
 
 
 
213
  }
 
214
  function stopRecording() {
215
  recorder.stop();
216
+ document.getElementById('record-btn').textContent = 'Start Recording';
217
+ }
218
+ function toggleRecording() {
219
+ if (!recorder || recorder.state === 'inactive') startRecording();
220
+ else stopRecording();
221
  }
 
 
 
 
222
  </script>
223
+ <button id="record-btn" onclick="toggleRecording()">Start Recording</button>
 
 
 
 
 
 
 
 
 
 
 
224
  """
225
+ return components.html(audio_recorder_html, height=100)
226
 
227
+ # Display analysis results
228
  def display_analysis_results(transcribed_text):
229
+ emotions_dict, top_emotion, emotion_map, sentiment = perform_emotion_detection(transcribed_text)
230
+ is_sarcastic, sarcasm_score = perform_sarcasm_detection(transcribed_text)
 
 
 
 
 
 
 
 
231
  st.header("Results")
232
+ st.text_area("Transcribed Text", transcribed_text, height=100, disabled=True)
233
+ col1, col2 = st.columns(2)
 
 
234
  with col1:
235
  st.subheader("Sentiment")
236
+ st.write(f"{sentiment} ({top_emotion})")
 
 
 
 
 
 
237
  with col2:
238
+ st.subheader("Sarcasm")
239
+ st.write(f"{'Detected' if is_sarcastic else 'Not Detected'} (Score: {sarcasm_score:.2f})")
240
+ if emotions_dict:
241
+ fig = px.bar(x=list(emotions_dict.keys()), y=list(emotions_dict.values()), labels={'x': 'Emotion', 'y': 'Score'})
242
+ st.plotly_chart(fig)
 
 
 
 
 
 
243
 
244
+ # Process base64 audio
245
+ def process_base64_audio(base64_data):
246
+ temp_file_path = None
247
+ try:
248
+ audio_bytes = base64.b64decode(base64_data.split(',')[1])
249
+ temp_file_path = os.path.join(tempfile.gettempdir(), f"rec_{int(time.time())}.wav")
250
+ with open(temp_file_path, "wb") as f:
251
+ f.write(audio_bytes)
252
+ if not validate_audio(temp_file_path):
253
+ return None
254
+ return temp_file_path
255
+ except Exception as e:
256
+ st.error(f"Error processing recorded audio: {str(e)}")
257
+ return None
258
+ finally:
259
+ if temp_file_path and os.path.exists(temp_file_path):
260
+ os.remove(temp_file_path)
261
 
262
+ # Main App Logic
263
  def main():
264
+ tab1, tab2 = st.tabs(["Upload Audio", "Record Audio"])
 
 
 
 
265
  with tab1:
266
+ audio_file = st.file_uploader("Upload Audio", type=["wav", "mp3", "ogg"])
267
  if audio_file:
268
+ st.audio(audio_file)
269
+ if st.button("Analyze Uploaded Audio"):
270
+ with st.spinner("Analyzing..."):
271
+ temp_path = process_uploaded_audio(audio_file)
272
+ if temp_path:
273
+ text = transcribe_audio(temp_path)
274
+ if text:
275
+ display_analysis_results(text)
276
+ with tab2:
277
+ audio_data = custom_audio_recorder()
278
+ if audio_data and st.button("Analyze Recorded Audio"):
279
+ with st.spinner("Analyzing..."):
280
+ temp_path = process_base64_audio(audio_data)
281
  if temp_path:
 
282
  text = transcribe_audio(temp_path)
283
  if text:
 
284
  display_analysis_results(text)
285
+ show_model_info()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  if __name__ == "__main__":
288
+ main()