Spaces:

Athspi
/

Gsgsgsg

Running

App Files Files Community

Athspi commited on 19 days ago

Commit

059047d

verified ·

1 Parent(s): ee8b748

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -19

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import google.generativeai as genai
-from google.generativeai import types
 import time
 import os
 import wave
@@ -30,7 +30,7 @@ def create_unique_wav_file(pcm_data, channels=1, rate=24000, sample_width=2):
         print(f"Error saving wave file: {e}")
         raise gr.Error(f"Could not save audio file. Error: {e}")
-# --- Core API Logic (Rewritten based on new documentation) ---
 def synthesize_speech(text, voice):
     """
     Synthesizes speech from text using the Gemini API's native TTS capabilities.
@@ -44,28 +44,32 @@ def synthesize_speech(text, voice):
         raise gr.Error("Please select a voice.")
     try:
-        # 2. Configure the Gemini client directly
-        client = genai.Client(api_key=GOOGLE_API_KEY)
-        # 3. Construct the API call as per the new TTS documentation
-        prompt = f"Say cheerfully: {text}"
-        response = client.models.generate_content(
-           model="gemini-2.5-flash-preview-tts",
            contents=prompt,
-           config=types.GenerateContentConfig(
-              response_modalities=["AUDIO"],
-              speech_config=types.SpeechConfig(
-                 voice_config=types.VoiceConfig(
-                    prebuilt_voice_config=types.PrebuiltVoiceConfig(
-                       voice_name=voice,
-                    )
-                 )
-              ),
-           )
         )
-        # 4. Extract audio data from the new response structure
         if response.candidates and response.candidates[0].content.parts:
             audio_data = response.candidates[0].content.parts[0].inline_data.data
             audio_file_path = create_unique_wav_file(audio_data)

 import gradio as gr
 import google.generativeai as genai
+from google.generativeai.types import GenerationConfig
 import time
 import os
 import wave
         print(f"Error saving wave file: {e}")
         raise gr.Error(f"Could not save audio file. Error: {e}")
+# --- Core API Logic (Corrected Pattern) ---
 def synthesize_speech(text, voice):
     """
     Synthesizes speech from text using the Gemini API's native TTS capabilities.
         raise gr.Error("Please select a voice.")
     try:
+        # 2. Configure the API key once
+        genai.configure(api_key=GOOGLE_API_KEY)
+        # 3. Instantiate the correct model
+        model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
+        # 4. Construct the GenerationConfig
+        tts_config = GenerationConfig(
+            response_modalities=["AUDIO"],
+            speech_config={
+                "voice_config": {
+                    "prebuilt_voice_config": {
+                        "voice_name": voice
+                    }
+                }
+            }
+        )
+        # 5. Generate content with the model and config
+        prompt = f"Say cheerfully: {text}"
+        response = model.generate_content(
            contents=prompt,
+           generation_config=tts_config
         )
+        # 6. Extract audio data from the response structure
         if response.candidates and response.candidates[0].content.parts:
             audio_data = response.candidates[0].content.parts[0].inline_data.data
             audio_file_path = create_unique_wav_file(audio_data)