Spaces:

Athspi-ai
/

Audio-translation

Running

App Files Files Community

Athspi commited on Mar 9

Commit

70e979d

verified ·

1 Parent(s): c3a2ea4

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -22,7 +22,6 @@ genai.configure(api_key=GEMINI_API_KEY)
 KOKORO_LANGUAGES = {
     "American English": "a",
     "British English": "b",
-    "Japanese": "j",
     "Mandarin Chinese": "z",
     "Spanish": "e",
     "French": "f",
@@ -32,9 +31,11 @@ KOKORO_LANGUAGES = {
 }
 GTTS_LANGUAGES = lang.tts_langs()
-# Combine languages and remove duplicates
-SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
 @app.route('/')
 def serve_index():
@@ -62,7 +63,7 @@ def translate_audio():
             return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
         # Transcribe audio using Gemini
-        model = genai.GenerativeModel("gemini-2.0-flash")
         # Create proper audio blob
         audio_blob = {
@@ -81,7 +82,7 @@ def translate_audio():
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
-        # Generate TTS (corrected version)
         if target_language in KOKORO_LANGUAGES:
             lang_code = KOKORO_LANGUAGES[target_language]
             pipeline = KPipeline(lang_code=lang_code)
@@ -90,18 +91,17 @@ def translate_audio():
             # Collect all audio segments
             audio_segments = []
             for _, _, audio in generator:
-                if audio is not None:  # Explicit None check
                     audio_segments.append(audio)
             if audio_segments:
-                # Concatenate audio tensors
                 audio_data = np.concatenate(audio_segments)
                 _, temp_output_path = tempfile.mkstemp(suffix=".wav")
                 sf.write(temp_output_path, audio_data, 24000)
             else:
                 raise ValueError("No audio generated by Kokoro")
         else:
-            # Fallback to gTTS
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
             _, temp_output_path = tempfile.mkstemp(suffix=".mp3")

 KOKORO_LANGUAGES = {
     "American English": "a",
     "British English": "b",
     "Mandarin Chinese": "z",
     "Spanish": "e",
     "French": "f",
 }
 GTTS_LANGUAGES = lang.tts_langs()
+GTTS_LANGUAGES['ja'] = 'Japanese'  # Explicit Japanese support
+SUPPORTED_LANGUAGES = sorted(
+    list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
+)
 @app.route('/')
 def serve_index():
             return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
         # Transcribe audio using Gemini
+        model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")
         # Create proper audio blob
         audio_blob = {
         response = model.generate_content(prompt)
         translated_text = response.text.strip()
+        # Generate TTS
         if target_language in KOKORO_LANGUAGES:
             lang_code = KOKORO_LANGUAGES[target_language]
             pipeline = KPipeline(lang_code=lang_code)
             # Collect all audio segments
             audio_segments = []
             for _, _, audio in generator:
+                if audio is not None:
                     audio_segments.append(audio)
             if audio_segments:
                 audio_data = np.concatenate(audio_segments)
                 _, temp_output_path = tempfile.mkstemp(suffix=".wav")
                 sf.write(temp_output_path, audio_data, 24000)
             else:
                 raise ValueError("No audio generated by Kokoro")
         else:
+            # Standard gTTS handling
             lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
             tts = gTTS(translated_text, lang=lang_code)
             _, temp_output_path = tempfile.mkstemp(suffix=".mp3")