Athspi commited on
Commit
059047d
·
verified ·
1 Parent(s): ee8b748

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -19
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import google.generativeai as genai
3
- from google.generativeai import types
4
  import time
5
  import os
6
  import wave
@@ -30,7 +30,7 @@ def create_unique_wav_file(pcm_data, channels=1, rate=24000, sample_width=2):
30
  print(f"Error saving wave file: {e}")
31
  raise gr.Error(f"Could not save audio file. Error: {e}")
32
 
33
- # --- Core API Logic (Rewritten based on new documentation) ---
34
  def synthesize_speech(text, voice):
35
  """
36
  Synthesizes speech from text using the Gemini API's native TTS capabilities.
@@ -44,28 +44,32 @@ def synthesize_speech(text, voice):
44
  raise gr.Error("Please select a voice.")
45
 
46
  try:
47
- # 2. Configure the Gemini client directly
48
- client = genai.Client(api_key=GOOGLE_API_KEY)
49
 
50
- # 3. Construct the API call as per the new TTS documentation
51
- prompt = f"Say cheerfully: {text}"
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- response = client.models.generate_content(
54
- model="gemini-2.5-flash-preview-tts",
 
55
  contents=prompt,
56
- config=types.GenerateContentConfig(
57
- response_modalities=["AUDIO"],
58
- speech_config=types.SpeechConfig(
59
- voice_config=types.VoiceConfig(
60
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
61
- voice_name=voice,
62
- )
63
- )
64
- ),
65
- )
66
  )
67
 
68
- # 4. Extract audio data from the new response structure
69
  if response.candidates and response.candidates[0].content.parts:
70
  audio_data = response.candidates[0].content.parts[0].inline_data.data
71
  audio_file_path = create_unique_wav_file(audio_data)
 
1
  import gradio as gr
2
  import google.generativeai as genai
3
+ from google.generativeai.types import GenerationConfig
4
  import time
5
  import os
6
  import wave
 
30
  print(f"Error saving wave file: {e}")
31
  raise gr.Error(f"Could not save audio file. Error: {e}")
32
 
33
+ # --- Core API Logic (Corrected Pattern) ---
34
  def synthesize_speech(text, voice):
35
  """
36
  Synthesizes speech from text using the Gemini API's native TTS capabilities.
 
44
  raise gr.Error("Please select a voice.")
45
 
46
  try:
47
+ # 2. Configure the API key once
48
+ genai.configure(api_key=GOOGLE_API_KEY)
49
 
50
+ # 3. Instantiate the correct model
51
+ model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-tts")
52
+
53
+ # 4. Construct the GenerationConfig
54
+ tts_config = GenerationConfig(
55
+ response_modalities=["AUDIO"],
56
+ speech_config={
57
+ "voice_config": {
58
+ "prebuilt_voice_config": {
59
+ "voice_name": voice
60
+ }
61
+ }
62
+ }
63
+ )
64
 
65
+ # 5. Generate content with the model and config
66
+ prompt = f"Say cheerfully: {text}"
67
+ response = model.generate_content(
68
  contents=prompt,
69
+ generation_config=tts_config
 
 
 
 
 
 
 
 
 
70
  )
71
 
72
+ # 6. Extract audio data from the response structure
73
  if response.candidates and response.candidates[0].content.parts:
74
  audio_data = response.candidates[0].content.parts[0].inline_data.data
75
  audio_file_path = create_unique_wav_file(audio_data)