Athspi commited on
Commit
bfc5175
·
verified ·
1 Parent(s): 9ffbfd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -47
app.py CHANGED
@@ -1,122 +1,145 @@
1
  import os
2
  import numpy as np
3
- from flask import Flask, request, jsonify, send_file, send_from_directory
4
- import google.generativeai as genai
5
- from gtts import gTTS, lang
6
  import tempfile
7
  import soundfile as sf
8
- from kokoro import KPipeline
9
- from werkzeug.utils import secure_filename
 
10
  from flask_cors import CORS
 
 
 
 
11
 
 
 
 
 
12
  app = Flask(__name__, static_folder='static')
13
  CORS(app)
14
 
15
- # Configure Gemini API
16
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
17
  if not GEMINI_API_KEY:
18
  raise ValueError("GEMINI_API_KEY environment variable not set")
19
  genai.configure(api_key=GEMINI_API_KEY)
 
20
 
21
- # Language configurations
22
  KOKORO_LANGUAGES = {
23
- "American English": "a",
24
- "British English": "b",
25
- "Mandarin Chinese": "z",
26
- "Spanish": "e",
27
- "French": "f",
28
- "Hindi": "h",
29
- "Italian": "i",
30
- "Brazilian Portuguese": "p"
31
  }
32
-
33
  GTTS_LANGUAGES = lang.tts_langs()
34
- GTTS_LANGUAGES['ja'] = 'Japanese' # Explicit Japanese support
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- SUPPORTED_LANGUAGES = sorted(
37
- list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
38
- )
39
 
40
  @app.route('/')
41
  def serve_index():
42
  return send_from_directory(app.static_folder, 'index.html')
43
 
 
44
  @app.route('/languages')
45
  def get_languages():
46
  return jsonify(SUPPORTED_LANGUAGES)
47
 
 
48
  @app.route('/translate', methods=['POST'])
49
  def translate_audio():
50
  try:
51
  if 'audio' not in request.files:
52
  return jsonify({'error': 'No audio file uploaded'}), 400
53
-
54
  audio_file = request.files['audio']
55
  target_language = request.form.get('language', 'English')
56
-
57
  if not audio_file or audio_file.filename == '':
58
  return jsonify({'error': 'Invalid audio file'}), 400
59
 
60
- # Validate MIME type
61
  allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
62
  if audio_file.mimetype not in allowed_mime_types:
63
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
64
 
65
- # Transcribe audio using Gemini
66
  model = genai.GenerativeModel("gemini-2.0-flash")
67
-
68
- # Create proper audio blob
69
- audio_blob = {
70
- 'mime_type': audio_file.mimetype,
71
- 'data': audio_file.read()
72
- }
73
-
74
- # Get transcription
75
  convo = model.start_chat()
76
  convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
77
  response = convo.send_message(audio_blob)
78
  transcription = response.text.strip()
79
 
80
- # Translate text using Gemini
81
  prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
82
  response = model.generate_content(prompt)
83
  translated_text = response.text.strip()
84
-
85
- # Generate TTS
86
- if target_language in KOKORO_LANGUAGES:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  lang_code = KOKORO_LANGUAGES[target_language]
88
  pipeline = KPipeline(lang_code=lang_code)
89
  generator = pipeline(translated_text, voice="af_heart", speed=1)
90
-
91
- # Collect all audio segments
92
  audio_segments = []
93
  for _, _, audio in generator:
94
  if audio is not None:
95
  audio_segments.append(audio)
96
-
97
  if audio_segments:
98
  audio_data = np.concatenate(audio_segments)
99
- _, temp_output_path = tempfile.mkstemp(suffix=".wav")
100
- sf.write(temp_output_path, audio_data, 24000)
101
  else:
102
  raise ValueError("No audio generated by Kokoro")
103
  else:
104
- # Standard gTTS handling
105
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
106
  tts = gTTS(translated_text, lang=lang_code)
107
- _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
108
- tts.save(temp_output_path)
109
-
110
  return jsonify({
111
  'transcription': transcription,
112
  'translation': translated_text,
113
- 'audio_url': f'/download/{os.path.basename(temp_output_path)}'
114
  })
115
-
116
  except Exception as e:
117
  app.logger.error(f"Error processing request: {str(e)}")
118
  return jsonify({'error': str(e)}), 500
119
 
 
120
  @app.route('/download/<filename>')
121
  def download_file(filename):
122
  try:
@@ -129,5 +152,17 @@ def download_file(filename):
129
  except FileNotFoundError:
130
  return jsonify({'error': 'File not found'}), 404
131
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  if __name__ == '__main__':
133
  app.run(host="0.0.0.0", port=7860)
 
1
  import os
2
  import numpy as np
 
 
 
3
  import tempfile
4
  import soundfile as sf
5
+ import wave
6
+
7
+ from flask import Flask, request, jsonify, send_file, send_from_directory
8
  from flask_cors import CORS
9
+ from werkzeug.utils import secure_filename
10
+
11
+ from kokoro import KPipeline
12
+ from gtts import gTTS, lang
13
 
14
+ from google import generativeai as genai
15
+ from google.genai import types
16
+
17
+ # Flask app setup
18
  app = Flask(__name__, static_folder='static')
19
  CORS(app)
20
 
21
+ # Gemini API configuration
22
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
23
  if not GEMINI_API_KEY:
24
  raise ValueError("GEMINI_API_KEY environment variable not set")
25
  genai.configure(api_key=GEMINI_API_KEY)
26
+ client = genai.Client(api_key=GEMINI_API_KEY)
27
 
28
+ # Language support
29
  KOKORO_LANGUAGES = {
30
+ "American English": "a", "British English": "b", "Mandarin Chinese": "z",
31
+ "Spanish": "e", "French": "f", "Hindi": "h", "Italian": "i", "Brazilian Portuguese": "p"
 
 
 
 
 
 
32
  }
 
33
  GTTS_LANGUAGES = lang.tts_langs()
34
+ GTTS_LANGUAGES['ja'] = 'Japanese'
35
+ SUPPORTED_LANGUAGES = sorted(list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values()))))
36
+
37
+ GEMINI_VOICES = {
38
+ "ar-EG": "Kore", "de-DE": "Kore", "en-US": "Kore", "es-US": "Kore", "fr-FR": "Kore",
39
+ "hi-IN": "Kore", "id-ID": "Kore", "it-IT": "Kore", "ja-JP": "Kore", "ko-KR": "Kore",
40
+ "pt-BR": "Kore", "ru-RU": "Kore", "nl-NL": "Kore", "pl-PL": "Kore", "th-TH": "Kore",
41
+ "tr-TR": "Kore", "vi-VN": "Kore", "ro-RO": "Kore", "uk-UA": "Kore", "bn-BD": "Kore",
42
+ "en-IN": "Kore", "mr-IN": "Kore", "ta-IN": "Kore", "te-IN": "Kore"
43
+ }
44
+
45
+
46
+ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
47
+ with wave.open(filename, "wb") as wf:
48
+ wf.setnchannels(channels)
49
+ wf.setsampwidth(sample_width)
50
+ wf.setframerate(rate)
51
+ wf.writeframes(pcm)
52
 
 
 
 
53
 
54
  @app.route('/')
55
  def serve_index():
56
  return send_from_directory(app.static_folder, 'index.html')
57
 
58
+
59
  @app.route('/languages')
60
  def get_languages():
61
  return jsonify(SUPPORTED_LANGUAGES)
62
 
63
+
64
  @app.route('/translate', methods=['POST'])
65
  def translate_audio():
66
  try:
67
  if 'audio' not in request.files:
68
  return jsonify({'error': 'No audio file uploaded'}), 400
69
+
70
  audio_file = request.files['audio']
71
  target_language = request.form.get('language', 'English')
72
+
73
  if not audio_file or audio_file.filename == '':
74
  return jsonify({'error': 'Invalid audio file'}), 400
75
 
 
76
  allowed_mime_types = ['audio/wav', 'audio/mpeg', 'audio/mp4', 'audio/webm']
77
  if audio_file.mimetype not in allowed_mime_types:
78
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
79
 
 
80
  model = genai.GenerativeModel("gemini-2.0-flash")
81
+ audio_blob = {'mime_type': audio_file.mimetype, 'data': audio_file.read()}
82
+
 
 
 
 
 
 
83
  convo = model.start_chat()
84
  convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
85
  response = convo.send_message(audio_blob)
86
  transcription = response.text.strip()
87
 
 
88
  prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
89
  response = model.generate_content(prompt)
90
  translated_text = response.text.strip()
91
+
92
+ voice_name = GEMINI_VOICES.get(get_bcp47_code(target_language), None)
93
+
94
+ if voice_name:
95
+ response = client.models.generate_content(
96
+ model="gemini-2.5-flash-preview-tts",
97
+ contents=translated_text,
98
+ config=types.GenerateContentConfig(
99
+ response_modalities=["AUDIO"],
100
+ speech_config=types.SpeechConfig(
101
+ voice_config=types.VoiceConfig(
102
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name=voice_name)
103
+ )
104
+ )
105
+ )
106
+ )
107
+ data = response.candidates[0].content.parts[0].inline_data.data
108
+ temp_path = os.path.join(tempfile.gettempdir(), f'tts_{secure_filename(audio_file.filename)}.wav')
109
+ wave_file(temp_path, data)
110
+ elif target_language in KOKORO_LANGUAGES:
111
  lang_code = KOKORO_LANGUAGES[target_language]
112
  pipeline = KPipeline(lang_code=lang_code)
113
  generator = pipeline(translated_text, voice="af_heart", speed=1)
114
+
 
115
  audio_segments = []
116
  for _, _, audio in generator:
117
  if audio is not None:
118
  audio_segments.append(audio)
119
+
120
  if audio_segments:
121
  audio_data = np.concatenate(audio_segments)
122
+ temp_path = os.path.join(tempfile.gettempdir(), f'kokoro_{secure_filename(audio_file.filename)}.wav')
123
+ sf.write(temp_path, audio_data, 24000)
124
  else:
125
  raise ValueError("No audio generated by Kokoro")
126
  else:
 
127
  lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
128
  tts = gTTS(translated_text, lang=lang_code)
129
+ temp_path = os.path.join(tempfile.gettempdir(), f'gtts_{secure_filename(audio_file.filename)}.mp3')
130
+ tts.save(temp_path)
131
+
132
  return jsonify({
133
  'transcription': transcription,
134
  'translation': translated_text,
135
+ 'audio_url': f'/download/{os.path.basename(temp_path)}'
136
  })
137
+
138
  except Exception as e:
139
  app.logger.error(f"Error processing request: {str(e)}")
140
  return jsonify({'error': str(e)}), 500
141
 
142
+
143
  @app.route('/download/<filename>')
144
  def download_file(filename):
145
  try:
 
152
  except FileNotFoundError:
153
  return jsonify({'error': 'File not found'}), 404
154
 
155
+
156
+ def get_bcp47_code(language):
157
+ bcp_map = {
158
+ "Arabic": "ar-EG", "German": "de-DE", "English": "en-US", "Spanish": "es-US", "French": "fr-FR",
159
+ "Hindi": "hi-IN", "Indonesian": "id-ID", "Italian": "it-IT", "Japanese": "ja-JP", "Korean": "ko-KR",
160
+ "Portuguese": "pt-BR", "Russian": "ru-RU", "Dutch": "nl-NL", "Polish": "pl-PL", "Thai": "th-TH",
161
+ "Turkish": "tr-TR", "Vietnamese": "vi-VN", "Romanian": "ro-RO", "Ukrainian": "uk-UA", "Bengali": "bn-BD",
162
+ "Indian English": "en-IN", "Marathi": "mr-IN", "Tamil": "ta-IN", "Telugu": "te-IN"
163
+ }
164
+ return bcp_map.get(language)
165
+
166
+
167
  if __name__ == '__main__':
168
  app.run(host="0.0.0.0", port=7860)