Athspi commited on
Commit
2d4c672
·
verified ·
1 Parent(s): 6802088

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -49
app.py CHANGED
@@ -1,49 +1 @@
1
- import os
2
- import wave
3
- from google import genai
4
- from google.genai import types
5
- import gradio as gr
6
-
7
- # Load API key from Hugging Face environment variable
8
- client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
9
-
10
- # Function to save PCM data to WAV
11
- def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
12
- with wave.open(filename, "wb") as wf:
13
- wf.setnchannels(channels)
14
- wf.setsampwidth(sample_width)
15
- wf.setframerate(rate)
16
- wf.writeframes(pcm)
17
-
18
- # Main function to generate speech
19
- def generate_audio(text):
20
- response = client.models.generate_content(
21
- model="gemini-2.5-flash-preview-tts",
22
- contents=f"Say cheerfully: {text}",
23
- config=types.GenerateContentConfig(
24
- response_modalities=["AUDIO"],
25
- speech_config=types.SpeechConfig(
26
- voice_config=types.VoiceConfig(
27
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
28
- voice_name='Kore',
29
- )
30
- )
31
- ),
32
- )
33
- )
34
-
35
- data = response.candidates[0].content.parts[0].inline_data.data
36
- file_name = "out.wav"
37
- wave_file(file_name, data)
38
- return file_name
39
-
40
- # Gradio interface
41
- iface = gr.Interface(
42
- fn=generate_audio,
43
- inputs=gr.Textbox(label="Text to Speak"),
44
- outputs=gr.Audio(type="filepath", label="Generated Audio"),
45
- title="Gemini 2.5 Flash TTS",
46
- )
47
-
48
- if __name__ == "__main__":
49
- iface.launch()
 
1
+