Spaces:

Athspi
/

Gsgsgsg

Running

App Files Files Community

Athspi commited on 3 days ago

Commit

b4357ba

verified ·

1 Parent(s): 8274800

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -31

app.py CHANGED Viewed

@@ -3,6 +3,11 @@ import google.generativeai as genai
 import time
 import os
 # --- Helper Function ---
 def create_unique_wav_file(audio_data):
     """Saves audio data to a uniquely named WAV file and returns the path."""
@@ -25,32 +30,30 @@ def create_unique_wav_file(audio_data):
 # --- Core API Logic ---
-def synthesize_speech(api_key, text):
     """
     Synthesizes speech from text using the Gemini API.
-    This function takes an API key and text, validates them, configures the
-    Gemini client, calls the Text-to-Speech API, and saves the resulting audio.
     """
-    # 1. Validate Inputs
-    if not api_key:
-        raise gr.Error("API Key is required. Please enter your Google AI API Key.")
     if not text or not text.strip():
         raise gr.Error("Please enter some text to synthesize.")
     try:
-        # 2. Configure the Gemini API
-        # This sets up the API key for all subsequent genai calls.
-        genai.configure(api_key=api_key)
         # 3. Call the Text-to-Speech Model
         # We use the 'tts-1' model which is optimized for this task.
-        # The prompt itself instructs the model on the desired tone.
         model = genai.GenerativeModel(model_name='tts-1')
         # The API can be instructed on tone and style directly in the prompt.
         prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"
-        response = model.generate_content(prompt, response_mime_type="audio/wav")
         # 4. Process the Response and Save the Audio File
         # The audio data is conveniently located in the `audio_content` attribute.
@@ -64,33 +67,24 @@ def synthesize_speech(api_key, text):
     except Exception as e:
         # Provide a more informative error message in the UI.
         print(f"An error occurred: {e}")
-        raise gr.Error(f"Failed to synthesize speech. Please check your API key and network connection. Error: {e}")
 # --- Gradio User Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
     gr.Markdown(
         """
         # ✨ Gemini Text-to-Speech Synthesizer
-        Enter your Google AI API Key and the text you want to convert to speech.
-        The audio will be generated with a cheerful tone.
         """
     )
-    with gr.Row():
-        # Input for the user's API key. Type="password" hides the input.
-        api_key_input = gr.Textbox(
-            label="Google AI API Key",
-            type="password",
-            placeholder="Enter your API key here...",
-            scale=1
-        )
-        # Input for the text to be synthesized.
-        text_input = gr.Textbox(
-            label="Text to Synthesize",
-            placeholder="Hello! Welcome to the text-to-speech demonstration.",
-            lines=3,
-            scale=2
-        )
     # Button to trigger the synthesis process.
     submit_btn = gr.Button("Generate Speech", variant="primary")
@@ -99,9 +93,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
     audio_output = gr.Audio(label="Generated Audio", type="filepath")
     # Connect the button click event to the core function.
     submit_btn.click(
         fn=synthesize_speech,
-        inputs=[api_key_input, text_input],
         outputs=audio_output
     )
@@ -118,6 +113,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
     )
 # --- Main execution block ---
-# To run this script, save it as app.py and run `python app.py` in your terminal.
 if __name__ == "__main__":
     iface.launch()

 import time
 import os
+# --- Load API Key from Hugging Face Secrets ---
+# IMPORTANT: For this to work on Hugging Face Spaces, you must go to your Space's
+# settings and add a secret named "GOOGLE_API_KEY" with your Google AI API key as the value.
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
 # --- Helper Function ---
 def create_unique_wav_file(audio_data):
     """Saves audio data to a uniquely named WAV file and returns the path."""
 # --- Core API Logic ---
+def synthesize_speech(text):
     """
     Synthesizes speech from text using the Gemini API.
+    This function uses the API key loaded from Hugging Face secrets.
     """
+    # 1. Validate Inputs (API Key and Text)
+    if not GOOGLE_API_KEY:
+        raise gr.Error("Google API Key not found. Please ensure you have set the GOOGLE_API_KEY secret in your Hugging Face Space settings.")
     if not text or not text.strip():
         raise gr.Error("Please enter some text to synthesize.")
     try:
+        # 2. Configure the Gemini API with the loaded key
+        genai.configure(api_key=GOOGLE_API_KEY)
         # 3. Call the Text-to-Speech Model
         # We use the 'tts-1' model which is optimized for this task.
         model = genai.GenerativeModel(model_name='tts-1')
         # The API can be instructed on tone and style directly in the prompt.
         prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"
+        # The tts-1 model implicitly returns audio/wav format.
+        response = model.generate_content(prompt)
         # 4. Process the Response and Save the Audio File
         # The audio data is conveniently located in the `audio_content` attribute.
     except Exception as e:
         # Provide a more informative error message in the UI.
         print(f"An error occurred: {e}")
+        raise gr.Error(f"Failed to synthesize speech. Please check your network connection and that your API key is valid. Error: {e}")
 # --- Gradio User Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
     gr.Markdown(
         """
         # ✨ Gemini Text-to-Speech Synthesizer
+        This app uses an API key stored securely in Hugging Face secrets.
+        Just enter the text you want to convert to speech!
         """
     )
+    # Input for the text to be synthesized.
+    text_input = gr.Textbox(
+        label="Text to Synthesize",
+        placeholder="Hello! Welcome to the text-to-speech demonstration.",
+        lines=4,
+    )
     # Button to trigger the synthesis process.
     submit_btn = gr.Button("Generate Speech", variant="primary")
     audio_output = gr.Audio(label="Generated Audio", type="filepath")
     # Connect the button click event to the core function.
+    # The API key is now handled internally and not needed as an input.
     submit_btn.click(
         fn=synthesize_speech,
+        inputs=[text_input],
         outputs=audio_output
     )
     )
 # --- Main execution block ---
+# To deploy, push this file and a requirements.txt to a Hugging Face Space
+# and set the GOOGLE_API_KEY in the repository secrets.
 if __name__ == "__main__":
     iface.launch()