Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -11,28 +11,31 @@ from werkzeug.utils import secure_filename
|
|
11 |
from gtts import gTTS, lang
|
12 |
from kokoro import KPipeline
|
13 |
|
14 |
-
|
15 |
-
from google.
|
16 |
|
17 |
-
# API key
|
18 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
19 |
if not GEMINI_API_KEY:
|
20 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
21 |
|
22 |
-
|
23 |
|
24 |
-
#
|
25 |
app = Flask(__name__, static_folder='static')
|
26 |
CORS(app)
|
27 |
|
28 |
-
#
|
29 |
-
KOKORO_LANGUAGES = {
|
30 |
-
|
|
|
|
|
31 |
GTTS_LANGUAGES = lang.tts_langs()
|
32 |
GTTS_LANGUAGES['ja'] = 'Japanese'
|
33 |
-
SUPPORTED_LANGUAGES = sorted(
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
|
38 |
with wave.open(filename, "wb") as wf:
|
@@ -65,8 +68,8 @@ def translate_audio():
|
|
65 |
if audio_file.mimetype not in allowed_mime_types:
|
66 |
return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
|
67 |
|
68 |
-
|
69 |
-
|
70 |
audio_blob = {
|
71 |
'mime_type': audio_file.mimetype,
|
72 |
'data': audio_file.read()
|
@@ -84,36 +87,38 @@ def translate_audio():
|
|
84 |
|
85 |
# Try Gemini 2.5 TTS
|
86 |
try:
|
87 |
-
|
88 |
-
model="gemini-2.5-flash-preview-tts",
|
89 |
contents=translated_text,
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
)
|
96 |
)
|
97 |
)
|
98 |
)
|
99 |
-
|
|
|
100 |
temp_output_path = os.path.join(tempfile.gettempdir(), "tts_gemini.wav")
|
101 |
wave_file(temp_output_path, data)
|
102 |
-
|
103 |
-
|
|
|
|
|
104 |
if target_language in KOKORO_LANGUAGES:
|
105 |
lang_code = KOKORO_LANGUAGES[target_language]
|
106 |
pipeline = KPipeline(lang_code=lang_code)
|
107 |
generator = pipeline(translated_text, voice="af_heart", speed=1)
|
108 |
|
109 |
audio_segments = [audio for _, _, audio in generator if audio is not None]
|
110 |
-
|
111 |
if audio_segments:
|
112 |
audio_data = np.concatenate(audio_segments)
|
113 |
temp_output_path = os.path.join(tempfile.gettempdir(), "tts_kokoro.wav")
|
114 |
sf.write(temp_output_path, audio_data, 24000)
|
115 |
else:
|
116 |
-
raise ValueError("No audio generated by Kokoro")
|
117 |
else:
|
118 |
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
|
119 |
tts = gTTS(translated_text, lang=lang_code)
|
@@ -127,7 +132,7 @@ def translate_audio():
|
|
127 |
})
|
128 |
|
129 |
except Exception as e:
|
130 |
-
app.logger.error(f"
|
131 |
return jsonify({'error': str(e)}), 500
|
132 |
|
133 |
@app.route('/download/<filename>')
|
|
|
11 |
from gtts import gTTS, lang
|
12 |
from kokoro import KPipeline
|
13 |
|
14 |
+
import google.generativeai as genai
|
15 |
+
from google.generativeai.types import GenerateContentConfig, SpeechConfig, VoiceConfig, PrebuiltVoiceConfig
|
16 |
|
17 |
+
# Load API key
|
18 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
19 |
if not GEMINI_API_KEY:
|
20 |
raise ValueError("GEMINI_API_KEY environment variable not set")
|
21 |
|
22 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
23 |
|
24 |
+
# Flask app setup
|
25 |
app = Flask(__name__, static_folder='static')
|
26 |
CORS(app)
|
27 |
|
28 |
+
# Supported languages
|
29 |
+
KOKORO_LANGUAGES = {
|
30 |
+
"American English": "a", "British English": "b", "Mandarin Chinese": "z",
|
31 |
+
"Spanish": "e", "French": "f", "Hindi": "h", "Italian": "i", "Brazilian Portuguese": "p"
|
32 |
+
}
|
33 |
GTTS_LANGUAGES = lang.tts_langs()
|
34 |
GTTS_LANGUAGES['ja'] = 'Japanese'
|
35 |
+
SUPPORTED_LANGUAGES = sorted(set(KOKORO_LANGUAGES.keys()) | set(GTTS_LANGUAGES.values()))
|
36 |
|
37 |
+
# Voice name for Gemini TTS
|
38 |
+
DEFAULT_GEMINI_VOICE = "Kore"
|
39 |
|
40 |
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
|
41 |
with wave.open(filename, "wb") as wf:
|
|
|
68 |
if audio_file.mimetype not in allowed_mime_types:
|
69 |
return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
|
70 |
|
71 |
+
# Transcribe audio with Gemini
|
72 |
+
model = genai.GenerativeModel("models/gemini-1.5-flash")
|
73 |
audio_blob = {
|
74 |
'mime_type': audio_file.mimetype,
|
75 |
'data': audio_file.read()
|
|
|
87 |
|
88 |
# Try Gemini 2.5 TTS
|
89 |
try:
|
90 |
+
tts_response = genai.generate_content(
|
91 |
+
model="models/gemini-2.5-flash-preview-tts",
|
92 |
contents=translated_text,
|
93 |
+
generation_config=GenerateContentConfig(
|
94 |
+
response_mime_type="audio/wav"
|
95 |
+
),
|
96 |
+
speech_config=SpeechConfig(
|
97 |
+
voice_config=VoiceConfig(
|
98 |
+
prebuilt_voice=PrebuiltVoiceConfig(voice_name=DEFAULT_GEMINI_VOICE)
|
99 |
)
|
100 |
)
|
101 |
)
|
102 |
+
|
103 |
+
data = tts_response.candidates[0].content.parts[0].inline_data.data
|
104 |
temp_output_path = os.path.join(tempfile.gettempdir(), "tts_gemini.wav")
|
105 |
wave_file(temp_output_path, data)
|
106 |
+
|
107 |
+
except Exception as gemini_tts_error:
|
108 |
+
app.logger.warning(f"Gemini TTS failed: {gemini_tts_error}")
|
109 |
+
# Fallback to Kokoro or gTTS
|
110 |
if target_language in KOKORO_LANGUAGES:
|
111 |
lang_code = KOKORO_LANGUAGES[target_language]
|
112 |
pipeline = KPipeline(lang_code=lang_code)
|
113 |
generator = pipeline(translated_text, voice="af_heart", speed=1)
|
114 |
|
115 |
audio_segments = [audio for _, _, audio in generator if audio is not None]
|
|
|
116 |
if audio_segments:
|
117 |
audio_data = np.concatenate(audio_segments)
|
118 |
temp_output_path = os.path.join(tempfile.gettempdir(), "tts_kokoro.wav")
|
119 |
sf.write(temp_output_path, audio_data, 24000)
|
120 |
else:
|
121 |
+
raise ValueError("No audio generated by Kokoro.")
|
122 |
else:
|
123 |
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
|
124 |
tts = gTTS(translated_text, lang=lang_code)
|
|
|
132 |
})
|
133 |
|
134 |
except Exception as e:
|
135 |
+
app.logger.error(f"Processing error: {str(e)}")
|
136 |
return jsonify({'error': str(e)}), 500
|
137 |
|
138 |
@app.route('/download/<filename>')
|