raygiles3 commited on
Commit
0e20c4d
·
verified ·
1 Parent(s): 75c077d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -66
app.py CHANGED
@@ -1,66 +1,22 @@
1
- import base64
2
- import json
3
- from flask import Flask, render_template, request
4
- from worker import speech_to_text, text_to_speech, openai_process_message
5
- from flask_cors import CORS
6
- import os
7
-
8
- app = Flask(__name__)
9
- cors = CORS(app, resources={r"/*": {"origins": "*"}})
10
-
11
-
12
- @app.route('/', methods=['GET'])
13
- def index():
14
- return render_template('index.html')
15
-
16
-
17
- @app.route('/speech-to-text', methods=['POST'])
18
- def speech_to_text_route():
19
- print("processing speech-to-text")
20
- audio_binary = request.data # Get the user's speech from their request
21
- text = speech_to_text(audio_binary) # Call speech_to_text function to transcribe the speech
22
-
23
- # Return the response back to the user in JSON format
24
- response = app.response_class(
25
- response=json.dumps({'text': text}),
26
- status=200,
27
- mimetype='application/json'
28
- )
29
- print(response)
30
- print(response.data)
31
- return response
32
-
33
-
34
- @app.route('/process-message', methods=['POST'])
35
- def process_message_route():
36
- user_message = request.json['userMessage'] # Get user's message from their request
37
- print('user_message', user_message)
38
-
39
- voice = request.json['voice'] # Get user's preferred voice from their request
40
- print('voice', voice)
41
-
42
- # Call openai_process_message function to process the user's message and get a response back
43
- openai_response_text = openai_process_message(user_message)
44
-
45
- # Clean the response to remove any emptylines
46
- openai_response_text = os.linesep.join([s for s in openai_response_text.splitlines() if s])
47
-
48
- # Call our text_to_speech function to convert OpenAI Api's reponse to speech
49
- openai_response_speech = text_to_speech(openai_response_text, voice)
50
-
51
- # convert openai_response_speech to base64 string so it can be sent back in the JSON response
52
- openai_response_speech = base64.b64encode(openai_response_speech).decode('utf-8')
53
-
54
- # Send a JSON response back to the user containing their message's response both in text and speech formats
55
- response = app.response_class(
56
- response=json.dumps({"openaiResponseText": openai_response_text, "openaiResponseSpeech": openai_response_speech}),
57
- status=200,
58
- mimetype='application/json'
59
- )
60
-
61
- print(response)
62
- return response
63
-
64
-
65
- if __name__ == "__main__":
66
- app.run(port=8000, host='0.0.0.0')
 
1
+ import torch
2
+ from transformers import pipeline
3
+
4
+ # Initialize the speech-to-text pipeline from Hugging Face Transformers
5
+ # This uses the "openai/whisper-tiny.en" model for automatic speech recognition (ASR)
6
+ # The `chunk_length_s` parameter specifies the chunk length in seconds for processing
7
+ pipe = pipeline(
8
+ "automatic-speech-recognition",
9
+ model="openai/whisper-tiny.en",
10
+ chunk_length_s=30,
11
+ )
12
+
13
+ # Define the path to the audio file that needs to be transcribed
14
+ sample = 'downloaded_audio.mp3'
15
+
16
+ # Perform speech recognition on the audio file
17
+ # The `batch_size=8` parameter indicates how many chunks are processed at a time
18
+ # The result is stored in `prediction` with the key "text" containing the transcribed text
19
+ prediction = pipe(sample, batch_size=8)["text"]
20
+
21
+ # Print the transcribed text to the console
22
+ print(prediction)