Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,11 @@ import google.generativeai as genai
|
|
3 |
import time
|
4 |
import os
|
5 |
|
|
|
|
|
|
|
|
|
|
|
6 |
# --- Helper Function ---
|
7 |
def create_unique_wav_file(audio_data):
|
8 |
"""Saves audio data to a uniquely named WAV file and returns the path."""
|
@@ -25,32 +30,30 @@ def create_unique_wav_file(audio_data):
|
|
25 |
|
26 |
|
27 |
# --- Core API Logic ---
|
28 |
-
def synthesize_speech(
|
29 |
"""
|
30 |
Synthesizes speech from text using the Gemini API.
|
31 |
-
This function
|
32 |
-
Gemini client, calls the Text-to-Speech API, and saves the resulting audio.
|
33 |
"""
|
34 |
-
# 1. Validate Inputs
|
35 |
-
if not
|
36 |
-
raise gr.Error("API Key
|
37 |
if not text or not text.strip():
|
38 |
raise gr.Error("Please enter some text to synthesize.")
|
39 |
|
40 |
try:
|
41 |
-
# 2. Configure the Gemini API
|
42 |
-
|
43 |
-
genai.configure(api_key=api_key)
|
44 |
|
45 |
# 3. Call the Text-to-Speech Model
|
46 |
# We use the 'tts-1' model which is optimized for this task.
|
47 |
-
# The prompt itself instructs the model on the desired tone.
|
48 |
model = genai.GenerativeModel(model_name='tts-1')
|
49 |
|
50 |
# The API can be instructed on tone and style directly in the prompt.
|
51 |
prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"
|
52 |
|
53 |
-
|
|
|
54 |
|
55 |
# 4. Process the Response and Save the Audio File
|
56 |
# The audio data is conveniently located in the `audio_content` attribute.
|
@@ -64,33 +67,24 @@ def synthesize_speech(api_key, text):
|
|
64 |
except Exception as e:
|
65 |
# Provide a more informative error message in the UI.
|
66 |
print(f"An error occurred: {e}")
|
67 |
-
raise gr.Error(f"Failed to synthesize speech. Please check your API key
|
68 |
|
69 |
# --- Gradio User Interface ---
|
70 |
with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
71 |
gr.Markdown(
|
72 |
"""
|
73 |
# ✨ Gemini Text-to-Speech Synthesizer
|
74 |
-
|
75 |
-
|
76 |
"""
|
77 |
)
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
scale=1
|
86 |
-
)
|
87 |
-
# Input for the text to be synthesized.
|
88 |
-
text_input = gr.Textbox(
|
89 |
-
label="Text to Synthesize",
|
90 |
-
placeholder="Hello! Welcome to the text-to-speech demonstration.",
|
91 |
-
lines=3,
|
92 |
-
scale=2
|
93 |
-
)
|
94 |
|
95 |
# Button to trigger the synthesis process.
|
96 |
submit_btn = gr.Button("Generate Speech", variant="primary")
|
@@ -99,9 +93,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
99 |
audio_output = gr.Audio(label="Generated Audio", type="filepath")
|
100 |
|
101 |
# Connect the button click event to the core function.
|
|
|
102 |
submit_btn.click(
|
103 |
fn=synthesize_speech,
|
104 |
-
inputs=[
|
105 |
outputs=audio_output
|
106 |
)
|
107 |
|
@@ -118,6 +113,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
118 |
)
|
119 |
|
120 |
# --- Main execution block ---
|
121 |
-
# To
|
|
|
122 |
if __name__ == "__main__":
|
123 |
iface.launch()
|
|
|
3 |
import time
|
4 |
import os
|
5 |
|
6 |
+
# --- Load API Key from Hugging Face Secrets ---
|
7 |
+
# IMPORTANT: For this to work on Hugging Face Spaces, you must go to your Space's
|
8 |
+
# settings and add a secret named "GOOGLE_API_KEY" with your Google AI API key as the value.
|
9 |
+
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
10 |
+
|
11 |
# --- Helper Function ---
|
12 |
def create_unique_wav_file(audio_data):
|
13 |
"""Saves audio data to a uniquely named WAV file and returns the path."""
|
|
|
30 |
|
31 |
|
32 |
# --- Core API Logic ---
|
33 |
+
def synthesize_speech(text):
|
34 |
"""
|
35 |
Synthesizes speech from text using the Gemini API.
|
36 |
+
This function uses the API key loaded from Hugging Face secrets.
|
|
|
37 |
"""
|
38 |
+
# 1. Validate Inputs (API Key and Text)
|
39 |
+
if not GOOGLE_API_KEY:
|
40 |
+
raise gr.Error("Google API Key not found. Please ensure you have set the GOOGLE_API_KEY secret in your Hugging Face Space settings.")
|
41 |
if not text or not text.strip():
|
42 |
raise gr.Error("Please enter some text to synthesize.")
|
43 |
|
44 |
try:
|
45 |
+
# 2. Configure the Gemini API with the loaded key
|
46 |
+
genai.configure(api_key=GOOGLE_API_KEY)
|
|
|
47 |
|
48 |
# 3. Call the Text-to-Speech Model
|
49 |
# We use the 'tts-1' model which is optimized for this task.
|
|
|
50 |
model = genai.GenerativeModel(model_name='tts-1')
|
51 |
|
52 |
# The API can be instructed on tone and style directly in the prompt.
|
53 |
prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"
|
54 |
|
55 |
+
# The tts-1 model implicitly returns audio/wav format.
|
56 |
+
response = model.generate_content(prompt)
|
57 |
|
58 |
# 4. Process the Response and Save the Audio File
|
59 |
# The audio data is conveniently located in the `audio_content` attribute.
|
|
|
67 |
except Exception as e:
|
68 |
# Provide a more informative error message in the UI.
|
69 |
print(f"An error occurred: {e}")
|
70 |
+
raise gr.Error(f"Failed to synthesize speech. Please check your network connection and that your API key is valid. Error: {e}")
|
71 |
|
72 |
# --- Gradio User Interface ---
|
73 |
with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
74 |
gr.Markdown(
|
75 |
"""
|
76 |
# ✨ Gemini Text-to-Speech Synthesizer
|
77 |
+
This app uses an API key stored securely in Hugging Face secrets.
|
78 |
+
Just enter the text you want to convert to speech!
|
79 |
"""
|
80 |
)
|
81 |
|
82 |
+
# Input for the text to be synthesized.
|
83 |
+
text_input = gr.Textbox(
|
84 |
+
label="Text to Synthesize",
|
85 |
+
placeholder="Hello! Welcome to the text-to-speech demonstration.",
|
86 |
+
lines=4,
|
87 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
# Button to trigger the synthesis process.
|
90 |
submit_btn = gr.Button("Generate Speech", variant="primary")
|
|
|
93 |
audio_output = gr.Audio(label="Generated Audio", type="filepath")
|
94 |
|
95 |
# Connect the button click event to the core function.
|
96 |
+
# The API key is now handled internally and not needed as an input.
|
97 |
submit_btn.click(
|
98 |
fn=synthesize_speech,
|
99 |
+
inputs=[text_input],
|
100 |
outputs=audio_output
|
101 |
)
|
102 |
|
|
|
113 |
)
|
114 |
|
115 |
# --- Main execution block ---
|
116 |
+
# To deploy, push this file and a requirements.txt to a Hugging Face Space
|
117 |
+
# and set the GOOGLE_API_KEY in the repository secrets.
|
118 |
if __name__ == "__main__":
|
119 |
iface.launch()
|