import os import base64 from flask import Flask, request, jsonify, send_file, send_from_directory import google.generativeai as genai from gtts import gTTS, lang import tempfile from werkzeug.utils import secure_filename from flask_cors import CORS app = Flask(__name__, static_folder='static', static_url_path='') CORS(app) # Configure Gemini API GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") if not GEMINI_API_KEY: raise ValueError("GEMINI_API_KEY environment variable not set") genai.configure(api_key=GEMINI_API_KEY) # Language configurations GTTS_LANGUAGES = lang.tts_langs() SUPPORTED_LANGUAGES = sorted(GTTS_LANGUAGES.values()) @app.route('/') def serve_index(): return send_from_directory(app.static_folder, 'index.html') @app.route('/languages') def get_languages(): return jsonify(SUPPORTED_LANGUAGES) @app.route('/translate', methods=['POST']) def translate_audio(): try: if 'audio' not in request.files: return jsonify({'error': 'No audio file uploaded'}), 400 audio_file = request.files['audio'] target_language = request.form.get('language', 'English') if not audio_file or audio_file.filename == '': return jsonify({'error': 'Invalid audio file'}), 400 # Save temporary audio file filename = secure_filename(audio_file.filename) temp_input_path = os.path.join(tempfile.gettempdir(), filename) audio_file.save(temp_input_path) # Read audio file as base64 with open(temp_input_path, "rb") as f: audio_data = base64.b64encode(f.read()).decode("utf-8") # Transcribe with Gemini model = genai.GenerativeModel("gemini-1.5-pro-latest") prompt = """Accurately transcribe this audio file. Return only the raw text without formatting.""" response = model.generate_content( [ prompt, { "mime_type": "audio/" + filename.split('.')[-1], "data": audio_data } ] ) transcription = response.text.strip() # Translate with Gemini translate_prompt = f"Translate to {target_language} preserving meaning: {transcription}" translated_response = model.generate_content(translate_prompt) translated_text = translated_response.text.strip() # Generate TTS lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en') tts = gTTS(translated_text, lang=lang_code) _, temp_output_path = tempfile.mkstemp(suffix=".mp3") tts.save(temp_output_path) return jsonify({ 'transcription': transcription, 'translation': translated_text, 'audio_url': f'/download/{os.path.basename(temp_output_path)}' }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/download/') def download_file(filename): try: return send_file( os.path.join(tempfile.gettempdir(), filename), mimetype="audio/mpeg", as_attachment=True, download_name=f"translated_{filename}" ) except FileNotFoundError: return jsonify({'error': 'File not found'}), 404 if __name__ == '__main__': app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))