Update app.py
Browse files
app.py
CHANGED
@@ -1,49 +1 @@
|
|
1 |
-
|
2 |
-
import wave
|
3 |
-
from google import genai
|
4 |
-
from google.genai import types
|
5 |
-
import gradio as gr
|
6 |
-
|
7 |
-
# Load API key from Hugging Face environment variable
|
8 |
-
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
|
9 |
-
|
10 |
-
# Function to save PCM data to WAV
|
11 |
-
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
|
12 |
-
with wave.open(filename, "wb") as wf:
|
13 |
-
wf.setnchannels(channels)
|
14 |
-
wf.setsampwidth(sample_width)
|
15 |
-
wf.setframerate(rate)
|
16 |
-
wf.writeframes(pcm)
|
17 |
-
|
18 |
-
# Main function to generate speech
|
19 |
-
def generate_audio(text):
|
20 |
-
response = client.models.generate_content(
|
21 |
-
model="gemini-2.5-flash-preview-tts",
|
22 |
-
contents=f"Say cheerfully: {text}",
|
23 |
-
config=types.GenerateContentConfig(
|
24 |
-
response_modalities=["AUDIO"],
|
25 |
-
speech_config=types.SpeechConfig(
|
26 |
-
voice_config=types.VoiceConfig(
|
27 |
-
prebuilt_voice_config=types.PrebuiltVoiceConfig(
|
28 |
-
voice_name='Kore',
|
29 |
-
)
|
30 |
-
)
|
31 |
-
),
|
32 |
-
)
|
33 |
-
)
|
34 |
-
|
35 |
-
data = response.candidates[0].content.parts[0].inline_data.data
|
36 |
-
file_name = "out.wav"
|
37 |
-
wave_file(file_name, data)
|
38 |
-
return file_name
|
39 |
-
|
40 |
-
# Gradio interface
|
41 |
-
iface = gr.Interface(
|
42 |
-
fn=generate_audio,
|
43 |
-
inputs=gr.Textbox(label="Text to Speak"),
|
44 |
-
outputs=gr.Audio(type="filepath", label="Generated Audio"),
|
45 |
-
title="Gemini 2.5 Flash TTS",
|
46 |
-
)
|
47 |
-
|
48 |
-
if __name__ == "__main__":
|
49 |
-
iface.launch()
|
|
|
1 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|