Spaces:

Athspi
/

Gsgsgsg

Running

App Files Files Community

Athspi commited on 20 days ago

Commit

68d8b0d

verified ·

1 Parent(s): ab5a1ff

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -46

app.py CHANGED Viewed

@@ -1,53 +1,123 @@
 import gradio as gr
 import time
-import wave
-from google import genai
-from google.genai import types
-from google.colab import userdata
-# Set up the wave file to save the output:
-def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
-   with wave.open(filename, "wb") as wf:
-      wf.setnchannels(channels)
-      wf.setsampwidth(sample_width)
-      wf.setframerate(rate)
-      wf.writeframes(pcm)
-# Retrieve the API key from Colab's Secrets Manage
-GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
-client = genai.Client(api_key=GOOGLE_API_KEY)
-def synthesize_speech(text):
-    """Synthesizes speech from the given text and saves it to a wave file."""
-    response = client.models.generate_content(
-        model="gemini-2.5-flash-preview-tts",
-        contents=f"Say cheerfully: {text}",
-        config=types.GenerateContentConfig(
-            response_modalities=["AUDIO"],
-            speech_config=types.SpeechConfig(
-                voice_config=types.VoiceConfig(
-                    prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                        voice_name='Kore',
-                    )
-                )
-            ),
-        )
-    )
-    data = response.candidates[0].content.parts[0].inline_data.data
-    # Create a dynamic filename using a timestamp
     timestamp = int(time.time())
-    file_name = f'out_{timestamp}.wav'
-    wave_file(file_name, data)
-    return file_name
-iface = gr.Interface(
-    fn=synthesize_speech,
-    inputs=gr.Textbox(label="Enter text for speech synthesis"),
-    outputs=gr.Audio(label="Generated Audio"),
-    title="Text-to-Speech Interface"
-)
-iface.launch()

 import gradio as gr
+import google.generativeai as genai
 import time
+import os
+# --- Helper Function ---
+def create_unique_wav_file(audio_data):
+    """Saves audio data to a uniquely named WAV file and returns the path."""
+    # Create a directory to store audio outputs if it doesn't exist
+    output_dir = "audio_outputs"
+    os.makedirs(output_dir, exist_ok=True)
+    # Generate a unique filename using a timestamp
     timestamp = int(time.time())
+    file_name = os.path.join(output_dir, f'speech_output_{timestamp}.wav')
+    # The API returns a complete WAV file, so we just write the bytes directly.
+    try:
+        with open(file_name, 'wb') as f:
+            f.write(audio_data)
+        return file_name
+    except Exception as e:
+        print(f"Error saving wave file: {e}")
+        raise gr.Error(f"Could not save audio file. Error: {e}")
+# --- Core API Logic ---
+def synthesize_speech(api_key, text):
+    """
+    Synthesizes speech from text using the Gemini API.
+    This function takes an API key and text, validates them, configures the
+    Gemini client, calls the Text-to-Speech API, and saves the resulting audio.
+    """
+    # 1. Validate Inputs
+    if not api_key:
+        raise gr.Error("API Key is required. Please enter your Google AI API Key.")
+    if not text or not text.strip():
+        raise gr.Error("Please enter some text to synthesize.")
+    try:
+        # 2. Configure the Gemini API
+        # This sets up the API key for all subsequent genai calls.
+        genai.configure(api_key=api_key)
+        # 3. Call the Text-to-Speech Model
+        # We use the 'tts-1' model which is optimized for this task.
+        # The prompt itself instructs the model on the desired tone.
+        model = genai.GenerativeModel(model_name='tts-1')
+        # The API can be instructed on tone and style directly in the prompt.
+        prompt = f"Speak the following text in a cheerful and friendly voice: '{text}'"
+        response = model.generate_content(prompt, response_mime_type="audio/wav")
+        # 4. Process the Response and Save the Audio File
+        # The audio data is conveniently located in the `audio_content` attribute.
+        if response.audio_content:
+            audio_file_path = create_unique_wav_file(response.audio_content)
+            return audio_file_path
+        else:
+            # Handle cases where audio might not be generated
+            raise gr.Error("The API did not return audio data. Please check your text or try again.")
+    except Exception as e:
+        # Provide a more informative error message in the UI.
+        print(f"An error occurred: {e}")
+        raise gr.Error(f"Failed to synthesize speech. Please check your API key and network connection. Error: {e}")
+# --- Gradio User Interface ---
+with gr.Blocks(theme=gr.themes.Soft()) as iface:
+    gr.Markdown(
+        """
+        # ✨ Gemini Text-to-Speech Synthesizer
+        Enter your Google AI API Key and the text you want to convert to speech.
+        The audio will be generated with a cheerful tone.
+        """
+    )
+    with gr.Row():
+        # Input for the user's API key. Type="password" hides the input.
+        api_key_input = gr.Textbox(
+            label="Google AI API Key",
+            type="password",
+            placeholder="Enter your API key here...",
+            scale=1
+        )
+        # Input for the text to be synthesized.
+        text_input = gr.Textbox(
+            label="Text to Synthesize",
+            placeholder="Hello! Welcome to the text-to-speech demonstration.",
+            lines=3,
+            scale=2
+        )
+    # Button to trigger the synthesis process.
+    submit_btn = gr.Button("Generate Speech", variant="primary")
+    # Component to display the generated audio.
+    audio_output = gr.Audio(label="Generated Audio", type="filepath")
+    # Connect the button click event to the core function.
+    submit_btn.click(
+        fn=synthesize_speech,
+        inputs=[api_key_input, text_input],
+        outputs=audio_output
+    )
+    # Provide example text for users to try easily.
+    gr.Examples(
+        examples=[
+            "The weather is wonderful today, perfect for a walk in the park.",
+            "I am so excited to try out this new text-to-speech feature!",
+            "Congratulations on your amazing achievement!",
+            "This is a demonstration of high-quality speech synthesis."
+        ],
+        inputs=[text_input],
+        label="Example Prompts"
+    )
+# --- Main execution block ---
+# To run this script, save it as app.py and run `python app.py` in your terminal.
+if __name__ == "__main__":
+    iface.launch()