Gysyeysg / app.py
Athspi's picture
Update app.py
36b9aca verified
raw
history blame
1.76 kB
import os
import json
import base64
import requests
import gradio as gr
# Load API key from Hugging Face secret
API_KEY = os.getenv("GEMINI_API_KEY")
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent"
headers = {
"Content-Type": "application/json",
"x-goog-api-key": API_KEY,
}
def generate_audio(text):
payload = {
"contents": [{
"parts": [{
"text": f"Say cheerfully: {text}"
}]
}],
"generationConfig": {
"responseModalities": ["AUDIO"],
"speechConfig": {
"voiceConfig": {
"prebuiltVoiceConfig": {
"voiceName": "Kore"
}
}
}
}
}
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
if response.status_code != 200:
return f"API Error: {response.text}"
res_json = response.json()
data = res_json["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
audio_bytes = base64.b64decode(data)
file_path = "output.wav"
with open(file_path, "wb") as f:
f.write(audio_bytes)
return file_path
# Gradio Interface
def speak_gradio(text):
result = generate_audio(text)
return result if result.endswith(".wav") else None
iface = gr.Interface(
fn=speak_gradio,
inputs=gr.Textbox(label="Enter text to speak", placeholder="Say something cheerful..."),
outputs=gr.Audio(label="Gemini TTS Output", type="filepath"),
title="Gemini TTS (Kore Voice)",
description="Powered by Gemini 2.5 Flash Preview TTS API. Cheerfully speaks your input!"
)
if __name__ == "__main__":
iface.launch()