File size: 4,712 Bytes
5f33e0e
9ffbfd1
f49c906
280b5d0
d7f2b05
 
 
 
 
 
7cc4829
9e7d27b
 
 
 
5ddb059
 
9e7d27b
280b5d0
e51d62b
d7f2b05
 
 
 
 
 
 
 
 
 
9e7d27b
e51d62b
d7f2b05
 
 
 
 
 
 
9e7d27b
dbed07a
9e7d27b
965bd2d
9e7d27b
 
d7f2b05
e51d62b
9e7d27b
5ddb059
6c131f6
9e7d27b
 
d7f2b05
9e7d27b
d7f2b05
 
9e7d27b
 
5ddb059
9e7d27b
 
 
 
5ddb059
9e7d27b
e51d62b
d7f2b05
 
9e7d27b
 
d7f2b05
9e7d27b
f49c906
d7f2b05
9ffbfd1
9e7d27b
 
 
 
 
 
 
 
d7f2b05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e7d27b
 
 
 
 
d7f2b05
9e7d27b
 
 
e51d62b
9e7d27b
 
 
 
 
d7f2b05
9e7d27b
 
 
 
 
dbe8a71
9e7d27b
9ffbfd1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import numpy as np
from flask import Flask, request, jsonify, send_file, send_from_directory
import google.generativeai as genai
from gtts import gTTS, lang
import tempfile
import soundfile as sf
from kokoro import KPipeline
from werkzeug.utils import secure_filename
from flask_cors import CORS

app = Flask(__name__, static_folder='static')
CORS(app)

# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)

# Language configurations
KOKORO_LANGUAGES = {
    "American English": "a",
    "British English": "b",
    "Mandarin Chinese": "z",
    "Spanish": "e",
    "French": "f",
    "Hindi": "h",
    "Italian": "i",
    "Brazilian Portuguese": "p"
}

GTTS_LANGUAGES = lang.tts_langs()
GTTS_LANGUAGES['ja'] = 'Japanese'  # Explicit Japanese support

SUPPORTED_LANGUAGES = sorted(
    list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
)

@app.route('/')
def serve_index():
    return send_from_directory(app.static_folder, 'index.html')

@app.route('/languages')
def get_languages():
    return jsonify(SUPPORTED_LANGUAGES)

@app.route('/translate', methods=['POST'])
def translate_audio():
    try:
        if 'audio' not in request.files:
            return jsonify({'error': 'No audio file uploaded'}), 400
            
        audio_file = request.files['audio']
        target_language = request.form.get('language', 'English')
        
        if not audio_file or audio_file.filename == '':
            return jsonify({'error': 'Invalid audio file'}), 400

        # Validate MIME type
        allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
        if audio_file.mimetype not in allowed_mime_types:
            return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400

        # Transcribe audio using Gemini
        model = genai.GenerativeModel("gemini-2.0-flash")
        
        # Create proper audio blob
        audio_blob = {
            'mime_type': audio_file.mimetype,
            'data': audio_file.read()
        }

        # Get transcription
        convo = model.start_chat()
        convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
        response = convo.send_message(audio_blob)
        transcription = response.text.strip()

        # Translate text using Gemini
        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
        response = model.generate_content(prompt)
        translated_text = response.text.strip()
        
        # Generate TTS
        if target_language in KOKORO_LANGUAGES:
            lang_code = KOKORO_LANGUAGES[target_language]
            pipeline = KPipeline(lang_code=lang_code)
            generator = pipeline(translated_text, voice="af_heart", speed=1)
            
            # Collect all audio segments
            audio_segments = []
            for _, _, audio in generator:
                if audio is not None:
                    audio_segments.append(audio)
            
            if audio_segments:
                audio_data = np.concatenate(audio_segments)
                _, temp_output_path = tempfile.mkstemp(suffix=".wav")
                sf.write(temp_output_path, audio_data, 24000)
            else:
                raise ValueError("No audio generated by Kokoro")
        else:
            # Standard gTTS handling
            lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
            tts = gTTS(translated_text, lang=lang_code)
            _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
            tts.save(temp_output_path)
        
        return jsonify({
            'transcription': transcription,
            'translation': translated_text,
            'audio_url': f'/download/{os.path.basename(temp_output_path)}'
        })
        
    except Exception as e:
        app.logger.error(f"Error processing request: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.route('/download/<filename>')
def download_file(filename):
    try:
        return send_file(
            os.path.join(tempfile.gettempdir(), filename),
            mimetype="audio/mpeg",
            as_attachment=True,
            download_name=f"translated_{filename}"
        )
    except FileNotFoundError:
        return jsonify({'error': 'File not found'}), 404

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)