Athspi commited on
Commit
ab5a1ff
·
verified ·
1 Parent(s): a41c224

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import wave
4
+ from google import genai
5
+ from google.genai import types
6
+ from google.colab import userdata
7
+
8
+ # Set up the wave file to save the output:
9
+ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
10
+ with wave.open(filename, "wb") as wf:
11
+ wf.setnchannels(channels)
12
+ wf.setsampwidth(sample_width)
13
+ wf.setframerate(rate)
14
+ wf.writeframes(pcm)
15
+
16
+ # Retrieve the API key from Colab's Secrets Manage
17
+ GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
18
+ client = genai.Client(api_key=GOOGLE_API_KEY)
19
+
20
+ def synthesize_speech(text):
21
+ """Synthesizes speech from the given text and saves it to a wave file."""
22
+ response = client.models.generate_content(
23
+ model="gemini-2.5-flash-preview-tts",
24
+ contents=f"Say cheerfully: {text}",
25
+ config=types.GenerateContentConfig(
26
+ response_modalities=["AUDIO"],
27
+ speech_config=types.SpeechConfig(
28
+ voice_config=types.VoiceConfig(
29
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
30
+ voice_name='Kore',
31
+ )
32
+ )
33
+ ),
34
+ )
35
+ )
36
+
37
+ data = response.candidates[0].content.parts[0].inline_data.data
38
+
39
+ # Create a dynamic filename using a timestamp
40
+ timestamp = int(time.time())
41
+ file_name = f'out_{timestamp}.wav'
42
+ wave_file(file_name, data)
43
+
44
+ return file_name
45
+
46
+ iface = gr.Interface(
47
+ fn=synthesize_speech,
48
+ inputs=gr.Textbox(label="Enter text for speech synthesis"),
49
+ outputs=gr.Audio(label="Generated Audio"),
50
+ title="Text-to-Speech Interface"
51
+ )
52
+
53
+ iface.launch()