Spaces:

Athspi
/

Gsgsgsg

Running

Athspi commited on Jun 26

Commit

e4ca1d6

verified ·

1 Parent(s): 059047d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ def create_unique_wav_file(pcm_data, channels=1, rate=24000, sample_width=2):
         print(f"Error saving wave file: {e}")
         raise gr.Error(f"Could not save audio file. Error: {e}")
-# --- Core API Logic (Corrected Pattern) ---
 def synthesize_speech(text, voice):
     """
     Synthesizes speech from text using the Gemini API's native TTS capabilities.
@@ -50,9 +50,8 @@ def synthesize_speech(text, voice):
         # 3. Instantiate the correct model
         model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
-        # 4. Construct the GenerationConfig
-        tts_config = GenerationConfig(
-            response_modalities=["AUDIO"],
             speech_config={
                 "voice_config": {
                     "prebuilt_voice_config": {
@@ -62,11 +61,12 @@ def synthesize_speech(text, voice):
             }
         )
-        # 5. Generate content with the model and config
         prompt = f"Say cheerfully: {text}"
         response = model.generate_content(
            contents=prompt,
-           generation_config=tts_config
         )
         # 6. Extract audio data from the response structure

         print(f"Error saving wave file: {e}")
         raise gr.Error(f"Could not save audio file. Error: {e}")
+# --- Core API Logic (Corrected API Call Structure) ---
 def synthesize_speech(text, voice):
     """
     Synthesizes speech from text using the Gemini API's native TTS capabilities.
         # 3. Instantiate the correct model
         model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
+        # 4. Construct the GenerationConfig with ONLY the speech_config
+        tts_generation_config = GenerationConfig(
             speech_config={
                 "voice_config": {
                     "prebuilt_voice_config": {
             }
         )
+        # 5. Generate content, passing response_modalities directly
         prompt = f"Say cheerfully: {text}"
         response = model.generate_content(
            contents=prompt,
+           generation_config=tts_generation_config,
+           response_modalities=["AUDIO"]  # CORRECTED: This is a direct argument
         )
         # 6. Extract audio data from the response structure