|
import gradio as gr |
|
import google.generativeai as genai |
|
import time |
|
import os |
|
|
|
|
|
|
|
|
|
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") |
|
|
|
|
|
def create_unique_wav_file(audio_data): |
|
"""Saves audio data to a uniquely named WAV file and returns the path.""" |
|
|
|
output_dir = "audio_outputs" |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
timestamp = int(time.time()) |
|
file_name = os.path.join(output_dir, f'speech_output_{timestamp}.wav') |
|
|
|
|
|
try: |
|
with open(file_name, 'wb') as f: |
|
f.write(audio_data) |
|
return file_name |
|
except Exception as e: |
|
print(f"Error saving wave file: {e}") |
|
raise gr.Error(f"Could not save audio file. Error: {e}") |
|
|
|
|
|
|
|
def synthesize_speech(text): |
|
""" |
|
Synthesizes speech from text using the Gemini API. |
|
This function uses the API key loaded from Hugging Face secrets. |
|
""" |
|
|
|
if not GOOGLE_API_KEY: |
|
raise gr.Error("Google API Key not found. Please ensure you have set the GOOGLE_API_KEY secret in your Hugging Face Space settings.") |
|
if not text or not text.strip(): |
|
raise gr.Error("Please enter some text to synthesize.") |
|
|
|
try: |
|
|
|
genai.configure(api_key=GOOGLE_API_KEY) |
|
|
|
|
|
|
|
model = genai.GenerativeModel(model_name='tts-1') |
|
|
|
|
|
prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'" |
|
|
|
|
|
response = model.generate_content(prompt) |
|
|
|
|
|
|
|
if response.audio_content: |
|
audio_file_path = create_unique_wav_file(response.audio_content) |
|
return audio_file_path |
|
else: |
|
|
|
raise gr.Error("The API did not return audio data. Please check your text or try again.") |
|
|
|
except Exception as e: |
|
|
|
print(f"An error occurred: {e}") |
|
raise gr.Error(f"Failed to synthesize speech. Please check your network connection and that your API key is valid. Error: {e}") |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as iface: |
|
gr.Markdown( |
|
""" |
|
# ✨ Gemini Text-to-Speech Synthesizer |
|
This app uses an API key stored securely in Hugging Face secrets. |
|
Just enter the text you want to convert to speech! |
|
""" |
|
) |
|
|
|
|
|
text_input = gr.Textbox( |
|
label="Text to Synthesize", |
|
placeholder="Hello! Welcome to the text-to-speech demonstration.", |
|
lines=4, |
|
) |
|
|
|
|
|
submit_btn = gr.Button("Generate Speech", variant="primary") |
|
|
|
|
|
audio_output = gr.Audio(label="Generated Audio", type="filepath") |
|
|
|
|
|
|
|
submit_btn.click( |
|
fn=synthesize_speech, |
|
inputs=[text_input], |
|
outputs=audio_output |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
"The weather is wonderful today, perfect for a walk in the park.", |
|
"I am so excited to try out this new text-to-speech feature!", |
|
"Congratulations on your amazing achievement!", |
|
"This is a demonstration of high-quality speech synthesis." |
|
], |
|
inputs=[text_input], |
|
label="Example Prompts" |
|
) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|