File size: 1,764 Bytes
36b9aca a12d4b6 36b9aca a12d4b6 36b9aca a12d4b6 36b9aca a12d4b6 36b9aca a12d4b6 36b9aca a12d4b6 36b9aca a12d4b6 36b9aca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os
import json
import base64
import requests
import gradio as gr
# Load API key from Hugging Face secret
API_KEY = os.getenv("GEMINI_API_KEY")
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-tts:generateContent"
headers = {
"Content-Type": "application/json",
"x-goog-api-key": API_KEY,
}
def generate_audio(text):
payload = {
"contents": [{
"parts": [{
"text": f"Say cheerfully: {text}"
}]
}],
"generationConfig": {
"responseModalities": ["AUDIO"],
"speechConfig": {
"voiceConfig": {
"prebuiltVoiceConfig": {
"voiceName": "Kore"
}
}
}
}
}
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
if response.status_code != 200:
return f"API Error: {response.text}"
res_json = response.json()
data = res_json["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
audio_bytes = base64.b64decode(data)
file_path = "output.wav"
with open(file_path, "wb") as f:
f.write(audio_bytes)
return file_path
# Gradio Interface
def speak_gradio(text):
result = generate_audio(text)
return result if result.endswith(".wav") else None
iface = gr.Interface(
fn=speak_gradio,
inputs=gr.Textbox(label="Enter text to speak", placeholder="Say something cheerful..."),
outputs=gr.Audio(label="Gemini TTS Output", type="filepath"),
title="Gemini TTS (Kore Voice)",
description="Powered by Gemini 2.5 Flash Preview TTS API. Cheerfully speaks your input!"
)
if __name__ == "__main__":
iface.launch() |