Spaces:
Running
Running
File size: 6,635 Bytes
5f33e0e 9ffbfd1 c07d698 9ffbfd1 bfc5175 7cc4829 bfc5175 dbe8a71 bfc5175 6ebed08 5ddb059 7cc4829 bfc5175 5ddb059 bfc5175 dbe8a71 bfc5175 9ffbfd1 bfc5175 6ebed08 9ffbfd1 bfc5175 317b2f2 6ebed08 dbed07a 63a0fca bfc5175 dbed07a 965bd2d bfc5175 5ddb059 6c131f6 5ddb059 bfc5175 5ddb059 9ffbfd1 bfc5175 5ddb059 9ffbfd1 dbed07a 9ffbfd1 5ddb059 9ffbfd1 bfc5175 9ffbfd1 dbed07a 9ffbfd1 6ebed08 bfc5175 9ffbfd1 bfc5175 9ffbfd1 bfc5175 9ffbfd1 bfc5175 9ffbfd1 bfc5175 5ddb059 7cc4829 bfc5175 5ddb059 bfc5175 5ddb059 9ffbfd1 5ddb059 dbe8a71 bfc5175 7cc4829 9ffbfd1 7cc4829 9ffbfd1 ef2c8e0 dbed07a dbe8a71 bfc5175 7cc4829 9ffbfd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import os
import numpy as np
import tempfile
import soundfile as sf
import wave
from flask import Flask, request, jsonify, send_file, send_from_directory
from flask_cors import CORS
from werkzeug.utils import secure_filename
from kokoro import KPipeline
from gtts import gTTS, lang
from google import generativeai as genai
from google.genai import types
# Flask app setup
app = Flask(__name__, static_folder='static')
CORS(app)
# Gemini API configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)
client = genai.Client(api_key=GEMINI_API_KEY)
# Language support
KOKORO_LANGUAGES = {
"American English": "a", "British English": "b", "Mandarin Chinese": "z",
"Spanish": "e", "French": "f", "Hindi": "h", "Italian": "i", "Brazilian Portuguese": "p"
}
GTTS_LANGUAGES = lang.tts_langs()
GTTS_LANGUAGES['ja'] = 'Japanese'
SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
GEMINI_VOICES = {
"ar-EG": "Kore", "de-DE": "Kore", "en-US": "Kore", "es-US": "Kore", "fr-FR": "Kore",
"hi-IN": "Kore", "id-ID": "Kore", "it-IT": "Kore", "ja-JP": "Kore", "ko-KR": "Kore",
"pt-BR": "Kore", "ru-RU": "Kore", "nl-NL": "Kore", "pl-PL": "Kore", "th-TH": "Kore",
"tr-TR": "Kore", "vi-VN": "Kore", "ro-RO": "Kore", "uk-UA": "Kore", "bn-BD": "Kore",
"en-IN": "Kore", "mr-IN": "Kore", "ta-IN": "Kore", "te-IN": "Kore"
}
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
with wave.open(filename, "wb") as wf:
wf.setnchannels(channels)
wf.setsampwidth(sample_width)
wf.setframerate(rate)
wf.writeframes(pcm)
@app.route('/')
def serve_index():
return send_from_directory(app.static_folder, 'index.html')
@app.route('/languages')
def get_languages():
return jsonify(SUPPORTED_LANGUAGES)
@app.route('/translate', methods=['POST'])
def translate_audio():
try:
if 'audio' not in request.files:
return jsonify({'error': 'No audio file uploaded'}), 400
audio_file = request.files['audio']
target_language = request.form.get('language', 'English')
if not audio_file or audio_file.filename == '':
return jsonify({'error': 'Invalid audio file'}), 400
allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
if audio_file.mimetype not in allowed_mime_types:
return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
model = genai.GenerativeModel("gemini-2.0-flash")
audio_blob = {'mime_type': audio_file.mimetype, 'data': audio_file.read()}
convo = model.start_chat()
convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
response = convo.send_message(audio_blob)
transcription = response.text.strip()
prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
response = model.generate_content(prompt)
translated_text = response.text.strip()
voice_name = GEMINI_VOICES.get(get_bcp47_code(target_language), None)
if voice_name:
response = client.models.generate_content(
model="gemini-2.5-flash-preview-tts",
contents=translated_text,
config=types.GenerateContentConfig(
response_modalities=["AUDIO"],
speech_config=types.SpeechConfig(
voice_config=types.VoiceConfig(
prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice_name)
)
)
)
)
data = response.candidates[0].content.parts[0].inline_data.data
temp_path = os.path.join(tempfile.gettempdir(), f'tts_{secure_filename(audio_file.filename)}.wav')
wave_file(temp_path, data)
elif target_language in KOKORO_LANGUAGES:
lang_code = KOKORO_LANGUAGES[target_language]
pipeline = KPipeline(lang_code=lang_code)
generator = pipeline(translated_text, voice="af_heart", speed=1)
audio_segments = []
for _, _, audio in generator:
if audio is not None:
audio_segments.append(audio)
if audio_segments:
audio_data = np.concatenate(audio_segments)
temp_path = os.path.join(tempfile.gettempdir(), f'kokoro_{secure_filename(audio_file.filename)}.wav')
sf.write(temp_path, audio_data, 24000)
else:
raise ValueError("No audio generated by Kokoro")
else:
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
tts = gTTS(translated_text, lang=lang_code)
temp_path = os.path.join(tempfile.gettempdir(), f'gtts_{secure_filename(audio_file.filename)}.mp3')
tts.save(temp_path)
return jsonify({
'transcription': transcription,
'translation': translated_text,
'audio_url': f'/download/{os.path.basename(temp_path)}'
})
except Exception as e:
app.logger.error(f"Error processing request: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/download/<filename>')
def download_file(filename):
try:
return send_file(
os.path.join(tempfile.gettempdir(), filename),
mimetype="audio/mpeg",
as_attachment=True,
download_name=f"translated_{filename}"
)
except FileNotFoundError:
return jsonify({'error': 'File not found'}), 404
def get_bcp47_code(language):
bcp_map = {
"Arabic": "ar-EG", "German": "de-DE", "English": "en-US", "Spanish": "es-US", "French": "fr-FR",
"Hindi": "hi-IN", "Indonesian": "id-ID", "Italian": "it-IT", "Japanese": "ja-JP", "Korean": "ko-KR",
"Portuguese": "pt-BR", "Russian": "ru-RU", "Dutch": "nl-NL", "Polish": "pl-PL", "Thai": "th-TH",
"Turkish": "tr-TR", "Vietnamese": "vi-VN", "Romanian": "ro-RO", "Ukrainian": "uk-UA", "Bengali": "bn-BD",
"Indian English": "en-IN", "Marathi": "mr-IN", "Tamil": "ta-IN", "Telugu": "te-IN"
}
return bcp_map.get(language)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860) |