Aittsg / app.py
Athspi's picture
Update app.py
c9eaebb verified
raw
history blame
2.04 kB
import os
import wave
import gradio as gr
import google.generativeai as genai
# Set your API Key (or via Hugging Face Secrets / os.environ)
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("Please set your GOOGLE_API_KEY environment variable.")
# Configure Generative AI
genai.configure(api_key=GOOGLE_API_KEY)
# Initialize Gemini TTS model
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
# Function to save raw PCM data to WAV file
def save_wave(filename, pcm_data, channels=1, rate=24000, sample_width=2):
with wave.open(filename, 'wb') as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(rate)
wf.writeframes(pcm_data)
# Function to handle TTS generation
def generate_tts(text):
if not text.strip():
return None, "Please enter some text."
try:
response = model.generate_content(
text,
generation_config={"response_mime_type": "audio/wav"},
response_modality="AUDIO"
)
# Extract audio data from response
audio_data = response.candidates[0].content.parts[0].inline_data.data
output_filename = "output.wav"
save_wave(output_filename, audio_data)
return output_filename, "Audio generated successfully!"
except Exception as e:
return None, f"Error: {str(e)}"
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## πŸŽ™οΈ Gemini 2.5 Text-to-Speech Demo")
with gr.Row():
text_input = gr.Textbox(label="Enter text to convert to speech")
with gr.Row():
submit_button = gr.Button("Generate Speech")
with gr.Row():
audio_output = gr.Audio(label="Generated Audio", type="filepath")
status_output = gr.Textbox(label="Status")
submit_button.click(
fn=generate_tts,
inputs=[text_input],
outputs=[audio_output, status_output]
)
# Launch Gradio app
if __name__ == "__main__":
demo.launch()