Athspi commited on
Commit
70e979d
·
verified ·
1 Parent(s): c3a2ea4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -22,7 +22,6 @@ genai.configure(api_key=GEMINI_API_KEY)
22
  KOKORO_LANGUAGES = {
23
  "American English": "a",
24
  "British English": "b",
25
- "Japanese": "j",
26
  "Mandarin Chinese": "z",
27
  "Spanish": "e",
28
  "French": "f",
@@ -32,9 +31,11 @@ KOKORO_LANGUAGES = {
32
  }
33
 
34
  GTTS_LANGUAGES = lang.tts_langs()
 
35
 
36
- # Combine languages and remove duplicates
37
- SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
 
38
 
39
  @app.route('/')
40
  def serve_index():
@@ -62,7 +63,7 @@ def translate_audio():
62
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
63
 
64
  # Transcribe audio using Gemini
65
- model = genai.GenerativeModel("gemini-2.0-flash")
66
 
67
  # Create proper audio blob
68
  audio_blob = {
@@ -81,7 +82,7 @@ def translate_audio():
81
  response = model.generate_content(prompt)
82
  translated_text = response.text.strip()
83
 
84
- # Generate TTS (corrected version)
85
  if target_language in KOKORO_LANGUAGES:
86
  lang_code = KOKORO_LANGUAGES[target_language]
87
  pipeline = KPipeline(lang_code=lang_code)
@@ -90,18 +91,17 @@ def translate_audio():
90
  # Collect all audio segments
91
  audio_segments = []
92
  for _, _, audio in generator:
93
- if audio is not None: # Explicit None check
94
  audio_segments.append(audio)
95
 
96
  if audio_segments:
97
- # Concatenate audio tensors
98
  audio_data = np.concatenate(audio_segments)
99
  _, temp_output_path = tempfile.mkstemp(suffix=".wav")
100
  sf.write(temp_output_path, audio_data, 24000)
101
  else:
102
  raise ValueError("No audio generated by Kokoro")
103
  else:
104
- # Fallback to gTTS
105
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
106
  tts = gTTS(translated_text, lang=lang_code)
107
  _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
 
22
  KOKORO_LANGUAGES = {
23
  "American English": "a",
24
  "British English": "b",
 
25
  "Mandarin Chinese": "z",
26
  "Spanish": "e",
27
  "French": "f",
 
31
  }
32
 
33
  GTTS_LANGUAGES = lang.tts_langs()
34
+ GTTS_LANGUAGES['ja'] = 'Japanese' # Explicit Japanese support
35
 
36
+ SUPPORTED_LANGUAGES = sorted(
37
+ list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
38
+ )
39
 
40
  @app.route('/')
41
  def serve_index():
 
63
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
64
 
65
  # Transcribe audio using Gemini
66
+ model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")
67
 
68
  # Create proper audio blob
69
  audio_blob = {
 
82
  response = model.generate_content(prompt)
83
  translated_text = response.text.strip()
84
 
85
+ # Generate TTS
86
  if target_language in KOKORO_LANGUAGES:
87
  lang_code = KOKORO_LANGUAGES[target_language]
88
  pipeline = KPipeline(lang_code=lang_code)
 
91
  # Collect all audio segments
92
  audio_segments = []
93
  for _, _, audio in generator:
94
+ if audio is not None:
95
  audio_segments.append(audio)
96
 
97
  if audio_segments:
 
98
  audio_data = np.concatenate(audio_segments)
99
  _, temp_output_path = tempfile.mkstemp(suffix=".wav")
100
  sf.write(temp_output_path, audio_data, 24000)
101
  else:
102
  raise ValueError("No audio generated by Kokoro")
103
  else:
104
+ # Standard gTTS handling
105
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
106
  tts = gTTS(translated_text, lang=lang_code)
107
  _, temp_output_path = tempfile.mkstemp(suffix=".mp3")