Spaces:
Running
Running
import os | |
import wave | |
import gradio as gr | |
import google.generativeai as genai | |
# Set your API Key (or via Hugging Face Secrets / os.environ) | |
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY") | |
if not GOOGLE_API_KEY: | |
raise ValueError("Please set your GOOGLE_API_KEY environment variable.") | |
# Configure Generative AI | |
genai.configure(api_key=GOOGLE_API_KEY) | |
# Initialize Gemini TTS model | |
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts") | |
# Function to save raw PCM data to WAV file | |
def save_wave(filename, pcm_data, channels=1, rate=24000, sample_width=2): | |
with wave.open(filename, 'wb') as wf: | |
wf.setnchannels(channels) | |
wf.setsampwidth(sample_width) | |
wf.setframerate(rate) | |
wf.writeframes(pcm_data) | |
# Function to handle TTS generation | |
def generate_tts(text): | |
if not text.strip(): | |
return None, "Please enter some text." | |
try: | |
response = model.generate_content( | |
text, | |
generation_config={"response_mime_type": "audio/wav"}, | |
response_modality="AUDIO" | |
) | |
# Extract audio data from response | |
audio_data = response.candidates[0].content.parts[0].inline_data.data | |
output_filename = "output.wav" | |
save_wave(output_filename, audio_data) | |
return output_filename, "Audio generated successfully!" | |
except Exception as e: | |
return None, f"Error: {str(e)}" | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("## ποΈ Gemini 2.5 Text-to-Speech Demo") | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter text to convert to speech") | |
with gr.Row(): | |
submit_button = gr.Button("Generate Speech") | |
with gr.Row(): | |
audio_output = gr.Audio(label="Generated Audio", type="filepath") | |
status_output = gr.Textbox(label="Status") | |
submit_button.click( | |
fn=generate_tts, | |
inputs=[text_input], | |
outputs=[audio_output, status_output] | |
) | |
# Launch Gradio app | |
if __name__ == "__main__": | |
demo.launch() | |