Athspi commited on
Commit
280b5d0
·
verified ·
1 Parent(s): 385365a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -26
app.py CHANGED
@@ -11,28 +11,31 @@ from werkzeug.utils import secure_filename
11
  from gtts import gTTS, lang
12
  from kokoro import KPipeline
13
 
14
- from google import genai
15
- from google.genai import types
16
 
17
- # API key setup
18
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
19
  if not GEMINI_API_KEY:
20
  raise ValueError("GEMINI_API_KEY environment variable not set")
21
 
22
- client = genai.Client(api_key=GEMINI_API_KEY)
23
 
24
- # App config
25
  app = Flask(__name__, static_folder='static')
26
  CORS(app)
27
 
28
- # Language support
29
- KOKORO_LANGUAGES = {"American English": "a", "British English": "b", "Mandarin Chinese": "z",
30
- "Spanish": "e", "French": "f", "Hindi": "h", "Italian": "i", "Brazilian Portuguese": "p"}
 
 
31
  GTTS_LANGUAGES = lang.tts_langs()
32
  GTTS_LANGUAGES['ja'] = 'Japanese'
33
- SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
34
 
35
- GEMINI_VOICE = "Kore"
 
36
 
37
  def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
38
  with wave.open(filename, "wb") as wf:
@@ -65,8 +68,8 @@ def translate_audio():
65
  if audio_file.mimetype not in allowed_mime_types:
66
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
67
 
68
- model = genai.GenerativeModel("gemini-2.0-flash")
69
-
70
  audio_blob = {
71
  'mime_type': audio_file.mimetype,
72
  'data': audio_file.read()
@@ -84,36 +87,38 @@ def translate_audio():
84
 
85
  # Try Gemini 2.5 TTS
86
  try:
87
- response = client.models.generate_content(
88
- model="gemini-2.5-flash-preview-tts",
89
  contents=translated_text,
90
- config=types.GenerateContentConfig(
91
- response_modalities=["AUDIO"],
92
- speech_config=types.SpeechConfig(
93
- voice_config=types.VoiceConfig(
94
- prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=GEMINI_VOICE)
95
- )
96
  )
97
  )
98
  )
99
- data = response.candidates[0].content.parts[0].inline_data.data
 
100
  temp_output_path = os.path.join(tempfile.gettempdir(), "tts_gemini.wav")
101
  wave_file(temp_output_path, data)
102
- except Exception:
103
- # Fallback: Kokoro or gTTS
 
 
104
  if target_language in KOKORO_LANGUAGES:
105
  lang_code = KOKORO_LANGUAGES[target_language]
106
  pipeline = KPipeline(lang_code=lang_code)
107
  generator = pipeline(translated_text, voice="af_heart", speed=1)
108
 
109
  audio_segments = [audio for _, _, audio in generator if audio is not None]
110
-
111
  if audio_segments:
112
  audio_data = np.concatenate(audio_segments)
113
  temp_output_path = os.path.join(tempfile.gettempdir(), "tts_kokoro.wav")
114
  sf.write(temp_output_path, audio_data, 24000)
115
  else:
116
- raise ValueError("No audio generated by Kokoro")
117
  else:
118
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
119
  tts = gTTS(translated_text, lang=lang_code)
@@ -127,7 +132,7 @@ def translate_audio():
127
  })
128
 
129
  except Exception as e:
130
- app.logger.error(f"Error: {str(e)}")
131
  return jsonify({'error': str(e)}), 500
132
 
133
  @app.route('/download/<filename>')
 
11
  from gtts import gTTS, lang
12
  from kokoro import KPipeline
13
 
14
+ import google.generativeai as genai
15
+ from google.generativeai.types import GenerateContentConfig, SpeechConfig, VoiceConfig, PrebuiltVoiceConfig
16
 
17
+ # Load API key
18
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
19
  if not GEMINI_API_KEY:
20
  raise ValueError("GEMINI_API_KEY environment variable not set")
21
 
22
+ genai.configure(api_key=GEMINI_API_KEY)
23
 
24
+ # Flask app setup
25
  app = Flask(__name__, static_folder='static')
26
  CORS(app)
27
 
28
+ # Supported languages
29
+ KOKORO_LANGUAGES = {
30
+ "American English": "a", "British English": "b", "Mandarin Chinese": "z",
31
+ "Spanish": "e", "French": "f", "Hindi": "h", "Italian": "i", "Brazilian Portuguese": "p"
32
+ }
33
  GTTS_LANGUAGES = lang.tts_langs()
34
  GTTS_LANGUAGES['ja'] = 'Japanese'
35
+ SUPPORTED_LANGUAGES = sorted(set(KOKORO_LANGUAGES.keys()) | set(GTTS_LANGUAGES.values()))
36
 
37
+ # Voice name for Gemini TTS
38
+ DEFAULT_GEMINI_VOICE = "Kore"
39
 
40
  def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
41
  with wave.open(filename, "wb") as wf:
 
68
  if audio_file.mimetype not in allowed_mime_types:
69
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
70
 
71
+ # Transcribe audio with Gemini
72
+ model = genai.GenerativeModel("models/gemini-1.5-flash")
73
  audio_blob = {
74
  'mime_type': audio_file.mimetype,
75
  'data': audio_file.read()
 
87
 
88
  # Try Gemini 2.5 TTS
89
  try:
90
+ tts_response = genai.generate_content(
91
+ model="models/gemini-2.5-flash-preview-tts",
92
  contents=translated_text,
93
+ generation_config=GenerateContentConfig(
94
+ response_mime_type="audio/wav"
95
+ ),
96
+ speech_config=SpeechConfig(
97
+ voice_config=VoiceConfig(
98
+ prebuilt_voice=PrebuiltVoiceConfig(voice_name=DEFAULT_GEMINI_VOICE)
99
  )
100
  )
101
  )
102
+
103
+ data = tts_response.candidates[0].content.parts[0].inline_data.data
104
  temp_output_path = os.path.join(tempfile.gettempdir(), "tts_gemini.wav")
105
  wave_file(temp_output_path, data)
106
+
107
+ except Exception as gemini_tts_error:
108
+ app.logger.warning(f"Gemini TTS failed: {gemini_tts_error}")
109
+ # Fallback to Kokoro or gTTS
110
  if target_language in KOKORO_LANGUAGES:
111
  lang_code = KOKORO_LANGUAGES[target_language]
112
  pipeline = KPipeline(lang_code=lang_code)
113
  generator = pipeline(translated_text, voice="af_heart", speed=1)
114
 
115
  audio_segments = [audio for _, _, audio in generator if audio is not None]
 
116
  if audio_segments:
117
  audio_data = np.concatenate(audio_segments)
118
  temp_output_path = os.path.join(tempfile.gettempdir(), "tts_kokoro.wav")
119
  sf.write(temp_output_path, audio_data, 24000)
120
  else:
121
+ raise ValueError("No audio generated by Kokoro.")
122
  else:
123
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
124
  tts = gTTS(translated_text, lang=lang_code)
 
132
  })
133
 
134
  except Exception as e:
135
+ app.logger.error(f"Processing error: {str(e)}")
136
  return jsonify({'error': str(e)}), 500
137
 
138
  @app.route('/download/<filename>')