Athspi's picture
Update app.py
385365a verified
raw
history blame
5.39 kB
import os
import tempfile
import wave
import numpy as np
import soundfile as sf
from flask import Flask, request, jsonify, send_file, send_from_directory
from flask_cors import CORS
from werkzeug.utils import secure_filename
from gtts import gTTS, lang
from kokoro import KPipeline
from google import genai
from google.genai import types
# API key setup
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY environment variable not set")
client = genai.Client(api_key=GEMINI_API_KEY)
# App config
app = Flask(__name__, static_folder='static')
CORS(app)
# Language support
KOKORO_LANGUAGES = {"American English": "a", "British English": "b", "Mandarin Chinese": "z",
"Spanish": "e", "French": "f", "Hindi": "h", "Italian": "i", "Brazilian Portuguese": "p"}
GTTS_LANGUAGES = lang.tts_langs()
GTTS_LANGUAGES['ja'] = 'Japanese'
SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
GEMINI_VOICE = "Kore"
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
with wave.open(filename, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(rate)
wf.writeframes(pcm)
@app.route('/')
def serve_index():
return send_from_directory(app.static_folder, 'index.html')
@app.route('/languages')
def get_languages():
return jsonify(SUPPORTED_LANGUAGES)
@app.route('/translate', methods=['POST'])
def translate_audio():
try:
if 'audio' not in request.files:
return jsonify({'error': 'No audio file uploaded'}), 400
audio_file = request.files['audio']
target_language = request.form.get('language', 'English')
if not audio_file or audio_file.filename == '':
return jsonify({'error': 'Invalid audio file'}), 400
allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
if audio_file.mimetype not in allowed_mime_types:
return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
model = genai.GenerativeModel("gemini-2.0-flash")
audio_blob = {
'mime_type': audio_file.mimetype,
'data': audio_file.read()
}
convo = model.start_chat()
convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim.")
response = convo.send_message(audio_blob)
transcription = response.text.strip()
# Translate
prompt = f"Translate the following text to {target_language}:\n\n{transcription}"
translation_response = model.generate_content(prompt)
translated_text = translation_response.text.strip()
# Try Gemini 2.5 TTS
try:
response = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=translated_text,
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=GEMINI_VOICE)
)
)
)
)
data = response.candidates[0].content.parts[0].inline_data.data
temp_output_path = os.path.join(tempfile.gettempdir(), "tts_gemini.wav")
wave_file(temp_output_path, data)
except Exception:
# Fallback: Kokoro or gTTS
if target_language in KOKORO_LANGUAGES:
lang_code = KOKORO_LANGUAGES[target_language]
pipeline = KPipeline(lang_code=lang_code)
generator = pipeline(translated_text, voice="af_heart", speed=1)
audio_segments = [audio for _, _, audio in generator if audio is not None]
if audio_segments:
audio_data = np.concatenate(audio_segments)
temp_output_path = os.path.join(tempfile.gettempdir(), "tts_kokoro.wav")
sf.write(temp_output_path, audio_data, 24000)
else:
raise ValueError("No audio generated by Kokoro")
else:
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
tts = gTTS(translated_text, lang=lang_code)
temp_output_path = os.path.join(tempfile.gettempdir(), "tts_gtts.mp3")
tts.save(temp_output_path)
return jsonify({
'transcription': transcription,
'translation': translated_text,
'audio_url': f'/download/{os.path.basename(temp_output_path)}'
})
except Exception as e:
app.logger.error(f"Error: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/download/<filename>')
def download_file(filename):
try:
return send_file(
os.path.join(tempfile.gettempdir(), filename),
mimetype="audio/mpeg",
as_attachment=True,
download_name=f"translated_{filename}"
)
except FileNotFoundError:
return jsonify({'error': 'File not found'}), 404
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)