File size: 5,204 Bytes
5f33e0e
385365a
9ffbfd1
385365a
9e7d27b
385365a
f49c906
385365a
280b5d0
9e7d27b
7cc4829
9e7d27b
 
 
 
 
5ddb059
 
9e7d27b
280b5d0
e51d62b
9e7d27b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e51d62b
9e7d27b
dbed07a
9e7d27b
965bd2d
9e7d27b
 
 
e51d62b
9e7d27b
5ddb059
6c131f6
9e7d27b
 
385365a
9e7d27b
 
385365a
9e7d27b
 
5ddb059
9e7d27b
 
 
 
5ddb059
9e7d27b
 
 
 
e51d62b
9e7d27b
 
 
 
f49c906
9ffbfd1
9e7d27b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e51d62b
9e7d27b
280b5d0
385365a
e51d62b
385365a
9e7d27b
 
e51d62b
9e7d27b
 
 
 
 
 
 
dbe8a71
9e7d27b
 
 
 
 
 
 
 
 
e51d62b
9e7d27b
 
 
 
 
 
 
 
 
 
 
dbe8a71
9e7d27b
9ffbfd1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os
import tempfile
import numpy as np
import soundfile as sf
import wave

from flask import Flask, request, jsonify, send_file, send_from_directory
from flask_cors import CORS
import google.generativeai as genai
from google.generativeai import types

# Initialize Flask app
app = Flask(__name__, static_folder='static')
CORS(app)

# Configure Gemini API
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY environment variable not set")
genai.configure(api_key=GEMINI_API_KEY)

# Supported languages and their BCP-47 codes
SUPPORTED_LANGUAGES = {
    "Arabic (Egyptian)": "ar-EG",
    "German (Germany)": "de-DE",
    "English (US)": "en-US",
    "Spanish (US)": "es-US",
    "French (France)": "fr-FR",
    "Hindi (India)": "hi-IN",
    "Indonesian (Indonesia)": "id-ID",
    "Italian (Italy)": "it-IT",
    "Japanese (Japan)": "ja-JP",
    "Korean (Korea)": "ko-KR",
    "Portuguese (Brazil)": "pt-BR",
    "Russian (Russia)": "ru-RU",
    "Dutch (Netherlands)": "nl-NL",
    "Polish (Poland)": "pl-PL",
    "Thai (Thailand)": "th-TH",
    "Turkish (Turkey)": "tr-TR",
    "Vietnamese (Vietnam)": "vi-VN",
    "Romanian (Romania)": "ro-RO",
    "Ukrainian (Ukraine)": "uk-UA",
    "Bengali (Bangladesh)": "bn-BD",
    "English (India)": "en-IN",
    "Marathi (India)": "mr-IN",
    "Tamil (India)": "ta-IN",
    "Telugu (India)": "te-IN"
}

@app.route('/')
def serve_index():
    return send_from_directory(app.static_folder, 'index.html')

@app.route('/languages')
def get_languages():
    return jsonify(list(SUPPORTED_LANGUAGES.keys()))

@app.route('/translate', methods=['POST'])
def translate_audio():
    try:
        if 'audio' not in request.files:
            return jsonify({'error': 'No audio file uploaded'}), 400

        audio_file = request.files['audio']
        target_language = request.form.get('language', 'English (US)')

        if not audio_file or audio_file.filename == '':
            return jsonify({'error': 'Invalid audio file'}), 400

        # Validate MIME type
        allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
        if audio_file.mimetype not in allowed_mime_types:
            return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400

        # Read audio data
        audio_data = audio_file.read()

        # Transcribe audio using Gemini
        model = genai.GenerativeModel("gemini-2.0-flash")
        audio_blob = {
            'mime_type': audio_file.mimetype,
            'data': audio_data
        }

        convo = model.start_chat()
        convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
        response = convo.send_message(audio_blob)
        transcription = response.text.strip()

        # Translate text using Gemini
        prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
        response = model.generate_content(prompt)
        translated_text = response.text.strip()

        # Generate TTS using Gemini
        # Initialize Gemini client
        client = genai.Client(api_key=GEMINI_API_KEY)

        # Determine language code
        lang_code = SUPPORTED_LANGUAGES.get(target_language, 'en-US')

        # Generate speech
        response = client.models.generate_content(
            model="gemini-2.5-flash-preview-tts",
            contents=translated_text,
            config=types.GenerateContentConfig(
                response_modalities=["AUDIO"],
                speech_config=types.SpeechConfig(
                    voice_config=types.VoiceConfig(
                        prebuilt_voice_config=types.PrebuiltVoiceConfig(
                            voice_name='Kore'  # You can change the voice as needed
                        )
                    )
                ),
            )
        )

        # Extract audio data
        audio_output = response.candidates[0].content.parts[0].inline_data.data

        # Save audio to temporary file
        temp_fd, temp_output_path = tempfile.mkstemp(suffix=".wav")
        with wave.open(temp_output_path, "wb") as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)
            wf.setframerate(24000)
            wf.writeframes(audio_output)

        return jsonify({
            'transcription': transcription,
            'translation': translated_text,
            'audio_url': f'/download/{os.path.basename(temp_output_path)}'
        })

    except Exception as e:
        app.logger.error(f"Error processing request: {str(e)}")
        return jsonify({'error': str(e)}), 500

@app.route('/download/<filename>')
def download_file(filename):
    try:
        return send_file(
            os.path.join(tempfile.gettempdir(), filename),
            mimetype="audio/wav",
            as_attachment=True,
            download_name=f"translated_{filename}"
        )
    except FileNotFoundError:
        return jsonify({'error': 'File not found'}), 404

if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)