Athspi commited on
Commit
e4ca1d6
·
verified ·
1 Parent(s): 059047d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -30,7 +30,7 @@ def create_unique_wav_file(pcm_data, channels=1, rate=24000, sample_width=2):
30
  print(f"Error saving wave file: {e}")
31
  raise gr.Error(f"Could not save audio file. Error: {e}")
32
 
33
- # --- Core API Logic (Corrected Pattern) ---
34
  def synthesize_speech(text, voice):
35
  """
36
  Synthesizes speech from text using the Gemini API's native TTS capabilities.
@@ -50,9 +50,8 @@ def synthesize_speech(text, voice):
50
  # 3. Instantiate the correct model
51
  model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
52
 
53
- # 4. Construct the GenerationConfig
54
- tts_config = GenerationConfig(
55
- response_modalities=["AUDIO"],
56
  speech_config={
57
  "voice_config": {
58
  "prebuilt_voice_config": {
@@ -62,11 +61,12 @@ def synthesize_speech(text, voice):
62
  }
63
  )
64
 
65
- # 5. Generate content with the model and config
66
  prompt = f"Say cheerfully: {text}"
67
  response = model.generate_content(
68
  contents=prompt,
69
- generation_config=tts_config
 
70
  )
71
 
72
  # 6. Extract audio data from the response structure
 
30
  print(f"Error saving wave file: {e}")
31
  raise gr.Error(f"Could not save audio file. Error: {e}")
32
 
33
+ # --- Core API Logic (Corrected API Call Structure) ---
34
  def synthesize_speech(text, voice):
35
  """
36
  Synthesizes speech from text using the Gemini API's native TTS capabilities.
 
50
  # 3. Instantiate the correct model
51
  model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
52
 
53
+ # 4. Construct the GenerationConfig with ONLY the speech_config
54
+ tts_generation_config = GenerationConfig(
 
55
  speech_config={
56
  "voice_config": {
57
  "prebuilt_voice_config": {
 
61
  }
62
  )
63
 
64
+ # 5. Generate content, passing response_modalities directly
65
  prompt = f"Say cheerfully: {text}"
66
  response = model.generate_content(
67
  contents=prompt,
68
+ generation_config=tts_generation_config,
69
+ response_modalities=["AUDIO"] # CORRECTED: This is a direct argument
70
  )
71
 
72
  # 6. Extract audio data from the response structure