Spaces:
Running
Running
File size: 3,453 Bytes
dbe8a71 11a3089 d0dd39c ab0df5d dbe8a71 c07d698 7cc4829 dbe8a71 75b45e0 7cc4829 413a70d 7cc4829 ab0df5d dbe8a71 7cc4829 75b45e0 7cc4829 dbe8a71 7cc4829 dbe8a71 7cc4829 c07d698 d0dd39c 11a3089 c07d698 75b45e0 11a3089 c07d698 073ce19 c07d698 11a3089 c07d698 75b45e0 c07d698 7cc4829 75b45e0 7cc4829 dbe8a71 7cc4829 dbe8a71 7cc4829 ef2c8e0 7cc4829 dbe8a71 7cc4829 d0dd39c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
import base64
from flask import Flask, request, jsonify, send_file, send_from_directory
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
from werkzeug.utils import secure_filename
from flask_cors import CORS
app = Flask(__name__, static_folder='static', static_url_path='')
CORS(app)
# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)
# Language configurations
GTTS_LANGUAGES = lang.tts_langs()
SUPPORTED_LANGUAGES = sorted(GTTS_LANGUAGES.values())
@app.route('/')
def serve_index():
return send_from_directory(app.static_folder, 'index.html')
@app.route('/languages')
def get_languages():
return jsonify(SUPPORTED_LANGUAGES)
@app.route('/translate', methods=['POST'])
def translate_audio():
try:
if 'audio' not in request.files:
return jsonify({'error': 'No audio file uploaded'}), 400
audio_file = request.files['audio']
target_language = request.form.get('language', 'English')
if not audio_file or audio_file.filename == '':
return jsonify({'error': 'Invalid audio file'}), 400
# Save temporary audio file
filename = secure_filename(audio_file.filename)
temp_input_path = os.path.join(tempfile.gettempdir(), filename)
audio_file.save(temp_input_path)
# Read audio file as base64
with open(temp_input_path, "rb") as f:
audio_data = base64.b64encode(f.read()).decode("utf-8")
# Transcribe with Gemini
model = genai.GenerativeModel("gemini-1.5-pro-latest")
prompt = """Accurately transcribe this audio file. Return only the raw text without formatting."""
response = model.generate_content(
[
prompt,
{
"mime_type": "audio/" + filename.split('.')[-1],
"data": audio_data
}
]
)
transcription = response.text.strip()
# Translate with Gemini
translate_prompt = f"Translate to {target_language} preserving meaning: {transcription}"
translated_response = model.generate_content(translate_prompt)
translated_text = translated_response.text.strip()
# Generate TTS
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
tts = gTTS(translated_text, lang=lang_code)
_, temp_output_path = tempfile.mkstemp(suffix=".mp3")
tts.save(temp_output_path)
return jsonify({
'transcription': transcription,
'translation': translated_text,
'audio_url': f'/download/{os.path.basename(temp_output_path)}'
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/download/<filename>')
def download_file(filename):
try:
return send_file(
os.path.join(tempfile.gettempdir(), filename),
mimetype="audio/mpeg",
as_attachment=True,
download_name=f"translated_{filename}"
)
except FileNotFoundError:
return jsonify({'error': 'File not found'}), 404
if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000))) |