Spaces:

Athspi-ai
/

Audio-translation

Running

File size: 4,280 Bytes

dbe8a71
11a3089
d0dd39c
ab0df5d
dbe8a71
c07d698
 
7cc4829
 
dbe8a71
7cc4829
 
 
 
413a70d
 
7cc4829
ab0df5d
dbe8a71
7cc4829
9dbf879
 
 
 
 
 
 
 
 
 
 
 
7cc4829
 
 
 
 
 
 
 
 
 
dbe8a71
7cc4829
 
dbe8a71
7cc4829
 
 
 
 
 
 
 
 
 
 
 
 
 
c07d698
d0dd39c
 
11a3089
c07d698
 
 
 
11a3089
c07d698
 
 
 
 
 
 
 
073ce19
c07d698
11a3089
c07d698
 
 
11a3089
c07d698
 
7cc4829
 
 
 
d0dd39c
 
 
9dbf879
7cc4829
 
 
 
 
 
 
 
 
 
 
dbe8a71
7cc4829
dbe8a71
7cc4829
 
 
 
 
 
 
 
ef2c8e0
7cc4829
 
dbe8a71
7cc4829
d0dd39c

import os
import base64
from flask import Flask, request, jsonify, send_file, send_from_directory
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
import soundfile as sf
from werkzeug.utils import secure_filename
from flask_cors import CORS

app = Flask(__name__, static_folder='static')
CORS(app)

# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)

# Language configurations
KOKORO_LANGUAGES = {
    "American English": "a",
    "British English": "b",
    "Japanese": "j",
    "Mandarin Chinese": "z",
    "Spanish": "e",
    "French": "f",
    "Hindi": "h",
    "Italian": "i",
    "Brazilian Portuguese": "p"
}

GTTS_LANGUAGES = lang.tts_langs()
SUPPORTED_LANGUAGES = sorted(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))

@app.route('/')
def serve_index():
    return send_from_directory(app.static_folder, 'index.html')

@app.route('/languages')
def get_languages():
    return jsonify(SUPPORTED_LANGUAGES)

@app.route('/translate', methods=['POST'])
def translate_audio():
    try:
        if 'audio' not in request.files:
            return jsonify({'error': 'No audio file uploaded'}), 400
            
        audio_file = request.files['audio']
        target_language = request.form.get('language', 'English')
        
        if not audio_file or audio_file.filename == '':
            return jsonify({'error': 'Invalid audio file'}), 400

        # Save temporary audio file
        filename = secure_filename(audio_file.filename)
        temp_input_path = os.path.join(tempfile.gettempdir(), filename)
        audio_file.save(temp_input_path)
        
        # Read audio file as base64
        with open(temp_input_path, "rb") as f:
            audio_data = base64.b64encode(f.read()).decode("utf-8")
        
        # Transcribe with Gemini
        model = genai.GenerativeModel("gemini-1.5-pro-latest")
        prompt = """Accurately transcribe this audio file. Return only the raw text without any formatting, 
                   punctuation, or additional commentary. Preserve the original language and meaning."""
        
        response = model.generate_content(
            [
                prompt,
                {
                    "mime_type": "audio/" + filename.split('.')[-1],
                    "data": audio_data
                }
            ]
        )
        transcription = response.text.strip()
        
        # Translate with Gemini
        translate_prompt = f"""Translate this text to {target_language} preserving exact meaning and cultural nuances.
                            Return only the translated text without any explanations or formatting: {transcription}"""
        
        translated_response = model.generate_content(translate_prompt)
        translated_text = translated_response.text.strip()
        
        # Generate TTS
        if target_language in KOKORO_LANGUAGES:
            lang_code = KOKORO_LANGUAGES[target_language]
            # Kokoro TTS implementation
            _, temp_output_path = tempfile.mkstemp(suffix=".wav")
            # Add actual Kokoro synthesis here
        else:
            lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
            tts = gTTS(translated_text, lang=lang_code)
            _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
            tts.save(temp_output_path)
        
        return jsonify({
            'transcription': transcription,
            'translation': translated_text,
            'audio_url': f'/download/{os.path.basename(temp_output_path)}'
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/download/<filename>')
def download_file(filename):
    try:
        return send_file(
            os.path.join(tempfile.gettempdir(), filename),
            mimetype="audio/mpeg",
            as_attachment=True,
            download_name=f"translated_{filename}"
        )
    except FileNotFoundError:
        return jsonify({'error': 'File not found'}), 404

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 5000)))