|
import os |
|
import json |
|
import base64 |
|
import requests |
|
import gradio as gr |
|
|
|
|
|
API_KEY = os.getenv("GEMINI_API_KEY") |
|
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent" |
|
|
|
headers = { |
|
"Content-Type": "application/json", |
|
"x-goog-api-key": API_KEY, |
|
} |
|
|
|
def generate_audio(text): |
|
payload = { |
|
"contents": [{ |
|
"parts": [{ |
|
"text": f"Say cheerfully: {text}" |
|
}] |
|
}], |
|
"generationConfig": { |
|
"responseModalities": ["AUDIO"], |
|
"speechConfig": { |
|
"voiceConfig": { |
|
"prebuiltVoiceConfig": { |
|
"voiceName": "Kore" |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
response = requests.post(API_URL, headers=headers, data=json.dumps(payload)) |
|
if response.status_code != 200: |
|
return f"API Error: {response.text}" |
|
|
|
res_json = response.json() |
|
data = res_json["candidates"][0]["content"]["parts"][0]["inlineData"]["data"] |
|
audio_bytes = base64.b64decode(data) |
|
|
|
file_path = "output.wav" |
|
with open(file_path, "wb") as f: |
|
f.write(audio_bytes) |
|
|
|
return file_path |
|
|
|
|
|
def speak_gradio(text): |
|
result = generate_audio(text) |
|
return result if result.endswith(".wav") else None |
|
|
|
iface = gr.Interface( |
|
fn=speak_gradio, |
|
inputs=gr.Textbox(label="Enter text to speak", placeholder="Say something cheerful..."), |
|
outputs=gr.Audio(label="Gemini TTS Output", type="filepath"), |
|
title="Gemini TTS (Kore Voice)", |
|
description="Powered by Gemini 2.5 Flash Preview TTS API. Cheerfully speaks your input!" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |