Spaces:

Athspi-aitools
/

Aittsg

Running

File size: 2,036 Bytes

4a54590
5011794
7b02fdc
 
 
 
c9eaebb
7b02fdc
 
 
 
 
 
 
 
 
 
 
 
 
4a54590
7b02fdc
4a54590
7b02fdc

import os
import wave
import gradio as gr
import google.generativeai as genai

# Set your API Key (or via Hugging Face Secrets / os.environ)
GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")

if not GOOGLE_API_KEY:
    raise ValueError("Please set your GOOGLE_API_KEY environment variable.")

# Configure Generative AI
genai.configure(api_key=GOOGLE_API_KEY)

# Initialize Gemini TTS model
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")

# Function to save raw PCM data to WAV file
def save_wave(filename, pcm_data, channels=1, rate=24000, sample_width=2):
    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        wf.writeframes(pcm_data)

# Function to handle TTS generation
def generate_tts(text):
    if not text.strip():
        return None, "Please enter some text."

    try:
        response = model.generate_content(
            text,
            generation_config={"response_mime_type": "audio/wav"},
            response_modality="AUDIO"
        )

        # Extract audio data from response
        audio_data = response.candidates[0].content.parts[0].inline_data.data

        output_filename = "output.wav"
        save_wave(output_filename, audio_data)

        return output_filename, "Audio generated successfully!"

    except Exception as e:
        return None, f"Error: {str(e)}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🎙️ Gemini 2.5 Text-to-Speech Demo")

    with gr.Row():
        text_input = gr.Textbox(label="Enter text to convert to speech")
    
    with gr.Row():
        submit_button = gr.Button("Generate Speech")

    with gr.Row():
        audio_output = gr.Audio(label="Generated Audio", type="filepath")
        status_output = gr.Textbox(label="Status")

    submit_button.click(
        fn=generate_tts,
        inputs=[text_input],
        outputs=[audio_output, status_output]
    )

# Launch Gradio app
if __name__ == "__main__":
    demo.launch()