Spaces:
Running
Running
import os | |
import tempfile | |
import numpy as np | |
import soundfile as sf | |
import wave | |
from flask import Flask, request, jsonify, send_file, send_from_directory | |
from flask_cors import CORS | |
import google.generativeai as genai | |
from google.generativeai import types | |
# Initialize Flask app | |
app = Flask(__name__, static_folder='static') | |
CORS(app) | |
# Configure Gemini API | |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") | |
if not GEMINI_API_KEY: | |
raise ValueError("GEMINI_API_KEY environment variable not set") | |
genai.configure(api_key=GEMINI_API_KEY) | |
# Supported languages and their BCP-47 codes | |
SUPPORTED_LANGUAGES = { | |
"Arabic (Egyptian)": "ar-EG", | |
"German (Germany)": "de-DE", | |
"English (US)": "en-US", | |
"Spanish (US)": "es-US", | |
"French (France)": "fr-FR", | |
"Hindi (India)": "hi-IN", | |
"Indonesian (Indonesia)": "id-ID", | |
"Italian (Italy)": "it-IT", | |
"Japanese (Japan)": "ja-JP", | |
"Korean (Korea)": "ko-KR", | |
"Portuguese (Brazil)": "pt-BR", | |
"Russian (Russia)": "ru-RU", | |
"Dutch (Netherlands)": "nl-NL", | |
"Polish (Poland)": "pl-PL", | |
"Thai (Thailand)": "th-TH", | |
"Turkish (Turkey)": "tr-TR", | |
"Vietnamese (Vietnam)": "vi-VN", | |
"Romanian (Romania)": "ro-RO", | |
"Ukrainian (Ukraine)": "uk-UA", | |
"Bengali (Bangladesh)": "bn-BD", | |
"English (India)": "en-IN", | |
"Marathi (India)": "mr-IN", | |
"Tamil (India)": "ta-IN", | |
"Telugu (India)": "te-IN" | |
} | |
def serve_index(): | |
return send_from_directory(app.static_folder, 'index.html') | |
def get_languages(): | |
return jsonify(list(SUPPORTED_LANGUAGES.keys())) | |
def translate_audio(): | |
try: | |
if 'audio' not in request.files: | |
return jsonify({'error': 'No audio file uploaded'}), 400 | |
audio_file = request.files['audio'] | |
target_language = request.form.get('language', 'English (US)') | |
if not audio_file or audio_file.filename == '': | |
return jsonify({'error': 'Invalid audio file'}), 400 | |
# Validate MIME type | |
allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm'] | |
if audio_file.mimetype not in allowed_mime_types: | |
return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400 | |
# Read audio data | |
audio_data = audio_file.read() | |
# Transcribe audio using Gemini | |
model = genai.GenerativeModel("gemini-2.0-flash") | |
audio_blob = { | |
'mime_type': audio_file.mimetype, | |
'data': audio_data | |
} | |
convo = model.start_chat() | |
convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.") | |
response = convo.send_message(audio_blob) | |
transcription = response.text.strip() | |
# Translate text using Gemini | |
prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}" | |
response = model.generate_content(prompt) | |
translated_text = response.text.strip() | |
# Generate TTS using Gemini | |
# Initialize Gemini client | |
client = genai.Client(api_key=GEMINI_API_KEY) | |
# Determine language code | |
lang_code = SUPPORTED_LANGUAGES.get(target_language, 'en-US') | |
# Generate speech | |
response = client.models.generate_content( | |
model="gemini-2.5-flash-preview-tts", | |
contents=translated_text, | |
config=types.GenerateContentConfig( | |
response_modalities=["AUDIO"], | |
speech_config=types.SpeechConfig( | |
voice_config=types.VoiceConfig( | |
prebuilt_voice_config=types.PrebuiltVoiceConfig( | |
voice_name='Kore' # You can change the voice as needed | |
) | |
) | |
), | |
) | |
) | |
# Extract audio data | |
audio_output = response.candidates[0].content.parts[0].inline_data.data | |
# Save audio to temporary file | |
temp_fd, temp_output_path = tempfile.mkstemp(suffix=".wav") | |
with wave.open(temp_output_path, "wb") as wf: | |
wf.setnchannels(1) | |
wf.setsampwidth(2) | |
wf.setframerate(24000) | |
wf.writeframes(audio_output) | |
return jsonify({ | |
'transcription': transcription, | |
'translation': translated_text, | |
'audio_url': f'/download/{os.path.basename(temp_output_path)}' | |
}) | |
except Exception as e: | |
app.logger.error(f"Error processing request: {str(e)}") | |
return jsonify({'error': str(e)}), 500 | |
def download_file(filename): | |
try: | |
return send_file( | |
os.path.join(tempfile.gettempdir(), filename), | |
mimetype="audio/wav", | |
as_attachment=True, | |
download_name=f"translated_{filename}" | |
) | |
except FileNotFoundError: | |
return jsonify({'error': 'File not found'}), 404 | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=7860) |