Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,6 @@ genai.configure(api_key=GEMINI_API_KEY)
|
|
22 |
KOKORO_LANGUAGES = {
|
23 |
"American English": "a",
|
24 |
"British English": "b",
|
25 |
-
"Japanese": "j",
|
26 |
"Mandarin Chinese": "z",
|
27 |
"Spanish": "e",
|
28 |
"French": "f",
|
@@ -32,9 +31,11 @@ KOKORO_LANGUAGES = {
|
|
32 |
}
|
33 |
|
34 |
GTTS_LANGUAGES = lang.tts_langs()
|
|
|
35 |
|
36 |
-
|
37 |
-
|
|
|
38 |
|
39 |
@app.route('/')
|
40 |
def serve_index():
|
@@ -62,7 +63,7 @@ def translate_audio():
|
|
62 |
return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
|
63 |
|
64 |
# Transcribe audio using Gemini
|
65 |
-
model = genai.GenerativeModel("gemini-2.0-
|
66 |
|
67 |
# Create proper audio blob
|
68 |
audio_blob = {
|
@@ -81,7 +82,7 @@ def translate_audio():
|
|
81 |
response = model.generate_content(prompt)
|
82 |
translated_text = response.text.strip()
|
83 |
|
84 |
-
# Generate TTS
|
85 |
if target_language in KOKORO_LANGUAGES:
|
86 |
lang_code = KOKORO_LANGUAGES[target_language]
|
87 |
pipeline = KPipeline(lang_code=lang_code)
|
@@ -90,18 +91,17 @@ def translate_audio():
|
|
90 |
# Collect all audio segments
|
91 |
audio_segments = []
|
92 |
for _, _, audio in generator:
|
93 |
-
if audio is not None:
|
94 |
audio_segments.append(audio)
|
95 |
|
96 |
if audio_segments:
|
97 |
-
# Concatenate audio tensors
|
98 |
audio_data = np.concatenate(audio_segments)
|
99 |
_, temp_output_path = tempfile.mkstemp(suffix=".wav")
|
100 |
sf.write(temp_output_path, audio_data, 24000)
|
101 |
else:
|
102 |
raise ValueError("No audio generated by Kokoro")
|
103 |
else:
|
104 |
-
#
|
105 |
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
|
106 |
tts = gTTS(translated_text, lang=lang_code)
|
107 |
_, temp_output_path = tempfile.mkstemp(suffix=".mp3")
|
|
|
22 |
KOKORO_LANGUAGES = {
|
23 |
"American English": "a",
|
24 |
"British English": "b",
|
|
|
25 |
"Mandarin Chinese": "z",
|
26 |
"Spanish": "e",
|
27 |
"French": "f",
|
|
|
31 |
}
|
32 |
|
33 |
GTTS_LANGUAGES = lang.tts_langs()
|
34 |
+
GTTS_LANGUAGES['ja'] = 'Japanese' # Explicit Japanese support
|
35 |
|
36 |
+
SUPPORTED_LANGUAGES = sorted(
|
37 |
+
list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
|
38 |
+
)
|
39 |
|
40 |
@app.route('/')
|
41 |
def serve_index():
|
|
|
63 |
return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
|
64 |
|
65 |
# Transcribe audio using Gemini
|
66 |
+
model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")
|
67 |
|
68 |
# Create proper audio blob
|
69 |
audio_blob = {
|
|
|
82 |
response = model.generate_content(prompt)
|
83 |
translated_text = response.text.strip()
|
84 |
|
85 |
+
# Generate TTS
|
86 |
if target_language in KOKORO_LANGUAGES:
|
87 |
lang_code = KOKORO_LANGUAGES[target_language]
|
88 |
pipeline = KPipeline(lang_code=lang_code)
|
|
|
91 |
# Collect all audio segments
|
92 |
audio_segments = []
|
93 |
for _, _, audio in generator:
|
94 |
+
if audio is not None:
|
95 |
audio_segments.append(audio)
|
96 |
|
97 |
if audio_segments:
|
|
|
98 |
audio_data = np.concatenate(audio_segments)
|
99 |
_, temp_output_path = tempfile.mkstemp(suffix=".wav")
|
100 |
sf.write(temp_output_path, audio_data, 24000)
|
101 |
else:
|
102 |
raise ValueError("No audio generated by Kokoro")
|
103 |
else:
|
104 |
+
# Standard gTTS handling
|
105 |
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
|
106 |
tts = gTTS(translated_text, lang=lang_code)
|
107 |
_, temp_output_path = tempfile.mkstemp(suffix=".mp3")
|