Athspi's picture
Update app.py
75b45e0 verified
raw
history blame
3.45 kB
import os
import base64
from flask import Flask, request, jsonify, send_file, send_from_directory
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
from werkzeug.utils import secure_filename
from flask_cors import CORS
app = Flask(__name__, static_folder='static', static_url_path='')
CORS(app)
# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)
# Language configurations
GTTS_LANGUAGES = lang.tts_langs()
SUPPORTED_LANGUAGES = sorted(GTTS_LANGUAGES.values())
@app.route('/')
def serve_index():
return send_from_directory(app.static_folder, 'index.html')
@app.route('/languages')
def get_languages():
return jsonify(SUPPORTED_LANGUAGES)
@app.route('/translate', methods=['POST'])
def translate_audio():
try:
if 'audio' not in request.files:
return jsonify({'error': 'No audio file uploaded'}), 400
audio_file = request.files['audio']
target_language = request.form.get('language', 'English')
if not audio_file or audio_file.filename == '':
return jsonify({'error': 'Invalid audio file'}), 400
# Save temporary audio file
filename = secure_filename(audio_file.filename)
temp_input_path = os.path.join(tempfile.gettempdir(), filename)
audio_file.save(temp_input_path)
# Read audio file as base64
with open(temp_input_path, "rb") as f:
audio_data = base64.b64encode(f.read()).decode("utf-8")
# Transcribe with Gemini
model = genai.GenerativeModel("gemini-1.5-pro-latest")
prompt = """Accurately transcribe this audio file. Return only the raw text without formatting."""
response = model.generate_content(
[
prompt,
{
"mime_type": "audio/" + filename.split('.')[-1],
"data": audio_data
}
]
)
transcription = response.text.strip()
# Translate with Gemini
translate_prompt = f"Translate to {target_language} preserving meaning: {transcription}"
translated_response = model.generate_content(translate_prompt)
translated_text = translated_response.text.strip()
# Generate TTS
lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
tts = gTTS(translated_text, lang=lang_code)
_, temp_output_path = tempfile.mkstemp(suffix=".mp3")
tts.save(temp_output_path)
return jsonify({
'transcription': transcription,
'translation': translated_text,
'audio_url': f'/download/{os.path.basename(temp_output_path)}'
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/download/<filename>')
def download_file(filename):
try:
return send_file(
os.path.join(tempfile.gettempdir(), filename),
mimetype="audio/mpeg",
as_attachment=True,
download_name=f"translated_{filename}"
)
except FileNotFoundError:
return jsonify({'error': 'File not found'}), 404
if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))