File size: 2,036 Bytes
4a54590
5011794
7b02fdc
 
 
 
c9eaebb
7b02fdc
 
 
 
 
 
 
 
 
 
 
 
 
4a54590
7b02fdc
4a54590
7b02fdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import wave
import gradio as gr
import google.generativeai as genai

# Set your API Key (or via Hugging Face Secrets / os.environ)
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

if not GOOGLE_API_KEY:
    raise ValueError("Please set your GOOGLE_API_KEY environment variable.")

# Configure Generative AI
genai.configure(api_key=GOOGLE_API_KEY)

# Initialize Gemini TTS model
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")

# Function to save raw PCM data to WAV file
def save_wave(filename, pcm_data, channels=1, rate=24000, sample_width=2):
    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        wf.writeframes(pcm_data)

# Function to handle TTS generation
def generate_tts(text):
    if not text.strip():
        return None, "Please enter some text."

    try:
        response = model.generate_content(
            text,
            generation_config={"response_mime_type": "audio/wav"},
            response_modality="AUDIO"
        )

        # Extract audio data from response
        audio_data = response.candidates[0].content.parts[0].inline_data.data

        output_filename = "output.wav"
        save_wave(output_filename, audio_data)

        return output_filename, "Audio generated successfully!"

    except Exception as e:
        return None, f"Error: {str(e)}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## ๐ŸŽ™๏ธ Gemini 2.5 Text-to-Speech Demo")

    with gr.Row():
        text_input = gr.Textbox(label="Enter text to convert to speech")
    
    with gr.Row():
        submit_button = gr.Button("Generate Speech")

    with gr.Row():
        audio_output = gr.Audio(label="Generated Audio", type="filepath")
        status_output = gr.Textbox(label="Status")

    submit_button.click(
        fn=generate_tts,
        inputs=[text_input],
        outputs=[audio_output, status_output]
    )

# Launch Gradio app
if __name__ == "__main__":
    demo.launch()