Update app.py
Browse files
app.py
CHANGED
@@ -30,7 +30,7 @@ def create_unique_wav_file(pcm_data, channels=1, rate=24000, sample_width=2):
|
|
30 |
print(f"Error saving wave file: {e}")
|
31 |
raise gr.Error(f"Could not save audio file. Error: {e}")
|
32 |
|
33 |
-
# --- Core API Logic (Corrected
|
34 |
def synthesize_speech(text, voice):
|
35 |
"""
|
36 |
Synthesizes speech from text using the Gemini API's native TTS capabilities.
|
@@ -50,9 +50,8 @@ def synthesize_speech(text, voice):
|
|
50 |
# 3. Instantiate the correct model
|
51 |
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
|
52 |
|
53 |
-
# 4. Construct the GenerationConfig
|
54 |
-
|
55 |
-
response_modalities=["AUDIO"],
|
56 |
speech_config={
|
57 |
"voice_config": {
|
58 |
"prebuilt_voice_config": {
|
@@ -62,11 +61,12 @@ def synthesize_speech(text, voice):
|
|
62 |
}
|
63 |
)
|
64 |
|
65 |
-
# 5. Generate content
|
66 |
prompt = f"Say cheerfully: {text}"
|
67 |
response = model.generate_content(
|
68 |
contents=prompt,
|
69 |
-
generation_config=
|
|
|
70 |
)
|
71 |
|
72 |
# 6. Extract audio data from the response structure
|
|
|
30 |
print(f"Error saving wave file: {e}")
|
31 |
raise gr.Error(f"Could not save audio file. Error: {e}")
|
32 |
|
33 |
+
# --- Core API Logic (Corrected API Call Structure) ---
|
34 |
def synthesize_speech(text, voice):
|
35 |
"""
|
36 |
Synthesizes speech from text using the Gemini API's native TTS capabilities.
|
|
|
50 |
# 3. Instantiate the correct model
|
51 |
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
|
52 |
|
53 |
+
# 4. Construct the GenerationConfig with ONLY the speech_config
|
54 |
+
tts_generation_config = GenerationConfig(
|
|
|
55 |
speech_config={
|
56 |
"voice_config": {
|
57 |
"prebuilt_voice_config": {
|
|
|
61 |
}
|
62 |
)
|
63 |
|
64 |
+
# 5. Generate content, passing response_modalities directly
|
65 |
prompt = f"Say cheerfully: {text}"
|
66 |
response = model.generate_content(
|
67 |
contents=prompt,
|
68 |
+
generation_config=tts_generation_config,
|
69 |
+
response_modalities=["AUDIO"] # CORRECTED: This is a direct argument
|
70 |
)
|
71 |
|
72 |
# 6. Extract audio data from the response structure
|