File size: 4,145 Bytes
dbe8a71
7cc4829
dbe8a71
 
 
 
9dbf879
 
7cc4829
 
dbe8a71
7cc4829
 
 
 
413a70d
 
7cc4829
dbe8a71
 
7cc4829
dbe8a71
413a70d
 
 
 
dbe8a71
7cc4829
9dbf879
 
 
 
 
 
 
 
 
 
 
 
7cc4829
 
 
 
 
 
 
 
 
 
dbe8a71
7cc4829
 
dbe8a71
7cc4829
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8b6f22
7cc4829
dbe8a71
 
7cc4829
 
 
 
9dbf879
7cc4829
 
 
 
 
9dbf879
7cc4829
 
 
 
 
 
 
 
 
 
 
dbe8a71
7cc4829
 
dbe8a71
7cc4829
 
 
 
 
 
 
 
ef2c8e0
7cc4829
 
dbe8a71
7cc4829
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
from flask import Flask, request, jsonify, send_file, send_from_directory
from faster_whisper import WhisperModel
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
import soundfile as sf
from kokoro import KPipeline
from werkzeug.utils import secure_filename
from flask_cors import CORS

app = Flask(__name__, static_folder='static')
CORS(app)

# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)

# Initialize Whisper model
model_size = "Systran/faster-whisper-large-v3"
try:
    whisper_model = WhisperModel(model_size, device="auto", compute_type="float16")
except ValueError:
    whisper_model = WhisperModel(model_size, device="cpu", compute_type="int8")

# Language configurations
KOKORO_LANGUAGES = {
    "American English": "a",
    "British English": "b",
    "Japanese": "j",
    "Mandarin Chinese": "z",
    "Spanish": "e",
    "French": "f",
    "Hindi": "h",
    "Italian": "i",
    "Brazilian Portuguese": "p"
}

GTTS_LANGUAGES = lang.tts_langs()
SUPPORTED_LANGUAGES = sorted(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))

@app.route('/')
def serve_index():
    return send_from_directory(app.static_folder, 'index.html')

@app.route('/languages')
def get_languages():
    return jsonify(SUPPORTED_LANGUAGES)

@app.route('/translate', methods=['POST'])
def translate_audio():
    try:
        if 'audio' not in request.files:
            return jsonify({'error': 'No audio file uploaded'}), 400
            
        audio_file = request.files['audio']
        target_language = request.form.get('language', 'English')
        
        if not audio_file or audio_file.filename == '':
            return jsonify({'error': 'Invalid audio file'}), 400

        # Save temporary audio file
        filename = secure_filename(audio_file.filename)
        temp_input_path = os.path.join(tempfile.gettempdir(), filename)
        audio_file.save(temp_input_path)
        
        # Transcribe audio
        segments, info = whisper_model.transcribe(temp_input_path, beam_size=5)
        transcription = " ".join([segment.text for segment in segments])
        
        # Translate text
        model = genai.GenerativeModel("gemini-2.0-flash")
        prompt = f"Translate to {target_language} preserving meaning and cultural nuances:\n\n{transcription}"
        response = model.generate_content(prompt)
        translated_text = response.text.strip()
        
        # Generate TTS
        if target_language in KOKORO_LANGUAGES:
            lang_code = KOKORO_LANGUAGES[target_language]
            pipeline = KPipeline(lang_code=lang_code)
            generator = pipeline(translated_text, voice="af_heart", speed=1)
            audio_data = next((audio for _, _, audio in generator), None)
            if audio_data:
                _, temp_output_path = tempfile.mkstemp(suffix=".wav")
                sf.write(temp_output_path, audio_data, 24000)
        else:
            lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
            tts = gTTS(translated_text, lang=lang_code)
            _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
            tts.save(temp_output_path)
        
        return jsonify({
            'transcription': transcription,
            'translation': translated_text,
            'audio_url': f'/download/{os.path.basename(temp_output_path)}'
        })
        
    except Exception as e:
        app.logger.error(f"Error processing request: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.route('/download/<filename>')
def download_file(filename):
    try:
        return send_file(
            os.path.join(tempfile.gettempdir(), filename),
            mimetype="audio/mpeg",
            as_attachment=True,
            download_name=f"translated_{filename}"
        )
    except FileNotFoundError:
        return jsonify({'error': 'File not found'}), 404

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)