Athspi commited on
Commit
36b9aca
·
verified ·
1 Parent(s): a12d4b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -14
app.py CHANGED
@@ -1,11 +1,10 @@
1
- import os
2
  import json
3
  import base64
4
  import requests
5
- from flask import Flask, request, send_file
6
-
7
- app = Flask(__name__)
8
 
 
9
  API_KEY = os.getenv("GEMINI_API_KEY")
10
  API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent"
11
 
@@ -35,23 +34,30 @@ def generate_audio(text):
35
 
36
  response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
37
  if response.status_code != 200:
38
- raise Exception(f"API Error: {response.text}")
39
 
40
  res_json = response.json()
41
  data = res_json["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
42
  audio_bytes = base64.b64decode(data)
43
 
44
- output_path = "out.wav"
45
- with open(output_path, "wb") as f:
46
  f.write(audio_bytes)
47
 
48
- return output_path
 
 
 
 
 
49
 
50
- @app.route("/speak", methods=["POST"])
51
- def speak():
52
- text = request.json.get("text", "Have a wonderful day!")
53
- output_path = generate_audio(text)
54
- return send_file(output_path, mimetype="audio/wav")
 
 
55
 
56
  if __name__ == "__main__":
57
- app.run(host="0.0.0.0", port=7860)
 
1
+ import os
2
  import json
3
  import base64
4
  import requests
5
+ import gradio as gr
 
 
6
 
7
+ # Load API key from Hugging Face secret
8
  API_KEY = os.getenv("GEMINI_API_KEY")
9
  API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent"
10
 
 
34
 
35
  response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
36
  if response.status_code != 200:
37
+ return f"API Error: {response.text}"
38
 
39
  res_json = response.json()
40
  data = res_json["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
41
  audio_bytes = base64.b64decode(data)
42
 
43
+ file_path = "output.wav"
44
+ with open(file_path, "wb") as f:
45
  f.write(audio_bytes)
46
 
47
+ return file_path
48
+
49
+ # Gradio Interface
50
+ def speak_gradio(text):
51
+ result = generate_audio(text)
52
+ return result if result.endswith(".wav") else None
53
 
54
+ iface = gr.Interface(
55
+ fn=speak_gradio,
56
+ inputs=gr.Textbox(label="Enter text to speak", placeholder="Say something cheerful..."),
57
+ outputs=gr.Audio(label="Gemini TTS Output", type="filepath"),
58
+ title="Gemini TTS (Kore Voice)",
59
+ description="Powered by Gemini 2.5 Flash Preview TTS API. Cheerfully speaks your input!"
60
+ )
61
 
62
  if __name__ == "__main__":
63
+ iface.launch()