Athspi commited on
Commit
d7f2b05
·
verified ·
1 Parent(s): 9e7d27b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -78
app.py CHANGED
@@ -1,15 +1,14 @@
1
  import os
2
- import tempfile
3
  import numpy as np
4
- import soundfile as sf
5
- import wave
6
-
7
  from flask import Flask, request, jsonify, send_file, send_from_directory
8
- from flask_cors import CORS
9
  import google.generativeai as genai
10
- from google.generativeai import types
 
 
 
 
 
11
 
12
- # Initialize Flask app
13
  app = Flask(__name__, static_folder='static')
14
  CORS(app)
15
 
@@ -19,51 +18,42 @@ if not GEMINI_API_KEY:
19
  raise ValueError("GEMINI_API_KEY environment variable not set")
20
  genai.configure(api_key=GEMINI_API_KEY)
21
 
22
- # Supported languages and their BCP-47 codes
23
- SUPPORTED_LANGUAGES = {
24
- "Arabic (Egyptian)": "ar-EG",
25
- "German (Germany)": "de-DE",
26
- "English (US)": "en-US",
27
- "Spanish (US)": "es-US",
28
- "French (France)": "fr-FR",
29
- "Hindi (India)": "hi-IN",
30
- "Indonesian (Indonesia)": "id-ID",
31
- "Italian (Italy)": "it-IT",
32
- "Japanese (Japan)": "ja-JP",
33
- "Korean (Korea)": "ko-KR",
34
- "Portuguese (Brazil)": "pt-BR",
35
- "Russian (Russia)": "ru-RU",
36
- "Dutch (Netherlands)": "nl-NL",
37
- "Polish (Poland)": "pl-PL",
38
- "Thai (Thailand)": "th-TH",
39
- "Turkish (Turkey)": "tr-TR",
40
- "Vietnamese (Vietnam)": "vi-VN",
41
- "Romanian (Romania)": "ro-RO",
42
- "Ukrainian (Ukraine)": "uk-UA",
43
- "Bengali (Bangladesh)": "bn-BD",
44
- "English (India)": "en-IN",
45
- "Marathi (India)": "mr-IN",
46
- "Tamil (India)": "ta-IN",
47
- "Telugu (India)": "te-IN"
48
  }
49
 
 
 
 
 
 
 
 
50
  @app.route('/')
51
  def serve_index():
52
  return send_from_directory(app.static_folder, 'index.html')
53
 
54
  @app.route('/languages')
55
  def get_languages():
56
- return jsonify(list(SUPPORTED_LANGUAGES.keys()))
57
 
58
  @app.route('/translate', methods=['POST'])
59
  def translate_audio():
60
  try:
61
  if 'audio' not in request.files:
62
  return jsonify({'error': 'No audio file uploaded'}), 400
63
-
64
  audio_file = request.files['audio']
65
- target_language = request.form.get('language', 'English (US)')
66
-
67
  if not audio_file or audio_file.filename == '':
68
  return jsonify({'error': 'Invalid audio file'}), 400
69
 
@@ -72,16 +62,16 @@ def translate_audio():
72
  if audio_file.mimetype not in allowed_mime_types:
73
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
74
 
75
- # Read audio data
76
- audio_data = audio_file.read()
77
-
78
  # Transcribe audio using Gemini
79
  model = genai.GenerativeModel("gemini-2.0-flash")
 
 
80
  audio_blob = {
81
  'mime_type': audio_file.mimetype,
82
- 'data': audio_data
83
  }
84
 
 
85
  convo = model.start_chat()
86
  convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
87
  response = convo.send_message(audio_blob)
@@ -91,47 +81,38 @@ def translate_audio():
91
  prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
92
  response = model.generate_content(prompt)
93
  translated_text = response.text.strip()
94
-
95
- # Generate TTS using Gemini
96
- # Initialize Gemini client
97
- client = genai.Client(api_key=GEMINI_API_KEY)
98
-
99
- # Determine language code
100
- lang_code = SUPPORTED_LANGUAGES.get(target_language, 'en-US')
101
-
102
- # Generate speech
103
- response = client.models.generate_content(
104
- model="gemini-2.5-flash-preview-tts",
105
- contents=translated_text,
106
- config=types.GenerateContentConfig(
107
- response_modalities=["AUDIO"],
108
- speech_config=types.SpeechConfig(
109
- voice_config=types.VoiceConfig(
110
- prebuilt_voice_config=types.PrebuiltVoiceConfig(
111
- voice_name='Kore' # You can change the voice as needed
112
- )
113
- )
114
- ),
115
- )
116
- )
117
-
118
- # Extract audio data
119
- audio_output = response.candidates[0].content.parts[0].inline_data.data
120
-
121
- # Save audio to temporary file
122
- temp_fd, temp_output_path = tempfile.mkstemp(suffix=".wav")
123
- with wave.open(temp_output_path, "wb") as wf:
124
- wf.setnchannels(1)
125
- wf.setsampwidth(2)
126
- wf.setframerate(24000)
127
- wf.writeframes(audio_output)
128
-
129
  return jsonify({
130
  'transcription': transcription,
131
  'translation': translated_text,
132
  'audio_url': f'/download/{os.path.basename(temp_output_path)}'
133
  })
134
-
135
  except Exception as e:
136
  app.logger.error(f"Error processing request: {str(e)}")
137
  return jsonify({'error': str(e)}), 500
@@ -141,7 +122,7 @@ def download_file(filename):
141
  try:
142
  return send_file(
143
  os.path.join(tempfile.gettempdir(), filename),
144
- mimetype="audio/wav",
145
  as_attachment=True,
146
  download_name=f"translated_{filename}"
147
  )
 
1
  import os
 
2
  import numpy as np
 
 
 
3
  from flask import Flask, request, jsonify, send_file, send_from_directory
 
4
  import google.generativeai as genai
5
+ from gtts import gTTS, lang
6
+ import tempfile
7
+ import soundfile as sf
8
+ from kokoro import KPipeline
9
+ from werkzeug.utils import secure_filename
10
+ from flask_cors import CORS
11
 
 
12
  app = Flask(__name__, static_folder='static')
13
  CORS(app)
14
 
 
18
  raise ValueError("GEMINI_API_KEY environment variable not set")
19
  genai.configure(api_key=GEMINI_API_KEY)
20
 
21
+ # Language configurations
22
+ KOKORO_LANGUAGES = {
23
+ "American English": "a",
24
+ "British English": "b",
25
+ "Mandarin Chinese": "z",
26
+ "Spanish": "e",
27
+ "French": "f",
28
+ "Hindi": "h",
29
+ "Italian": "i",
30
+ "Brazilian Portuguese": "p"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
 
33
+ GTTS_LANGUAGES = lang.tts_langs()
34
+ GTTS_LANGUAGES['ja'] = 'Japanese' # Explicit Japanese support
35
+
36
+ SUPPORTED_LANGUAGES = sorted(
37
+ list(set(list(KOKORO_LANGUAGES.keys()) + list(GTTS_LANGUAGES.values())))
38
+ )
39
+
40
  @app.route('/')
41
  def serve_index():
42
  return send_from_directory(app.static_folder, 'index.html')
43
 
44
  @app.route('/languages')
45
  def get_languages():
46
+ return jsonify(SUPPORTED_LANGUAGES)
47
 
48
  @app.route('/translate', methods=['POST'])
49
  def translate_audio():
50
  try:
51
  if 'audio' not in request.files:
52
  return jsonify({'error': 'No audio file uploaded'}), 400
53
+
54
  audio_file = request.files['audio']
55
+ target_language = request.form.get('language', 'English')
56
+
57
  if not audio_file or audio_file.filename == '':
58
  return jsonify({'error': 'Invalid audio file'}), 400
59
 
 
62
  if audio_file.mimetype not in allowed_mime_types:
63
  return jsonify({'error': f'Unsupported file type: {audio_file.mimetype}'}), 400
64
 
 
 
 
65
  # Transcribe audio using Gemini
66
  model = genai.GenerativeModel("gemini-2.0-flash")
67
+
68
+ # Create proper audio blob
69
  audio_blob = {
70
  'mime_type': audio_file.mimetype,
71
+ 'data': audio_file.read()
72
  }
73
 
74
+ # Get transcription
75
  convo = model.start_chat()
76
  convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
77
  response = convo.send_message(audio_blob)
 
81
  prompt = f"Translate the following text to {target_language} preserving meaning and cultural nuances. Respond only with the translation:\n\n{transcription}"
82
  response = model.generate_content(prompt)
83
  translated_text = response.text.strip()
84
+
85
+ # Generate TTS
86
+ if target_language in KOKORO_LANGUAGES:
87
+ lang_code = KOKORO_LANGUAGES[target_language]
88
+ pipeline = KPipeline(lang_code=lang_code)
89
+ generator = pipeline(translated_text, voice="af_heart", speed=1)
90
+
91
+ # Collect all audio segments
92
+ audio_segments = []
93
+ for _, _, audio in generator:
94
+ if audio is not None:
95
+ audio_segments.append(audio)
96
+
97
+ if audio_segments:
98
+ audio_data = np.concatenate(audio_segments)
99
+ _, temp_output_path = tempfile.mkstemp(suffix=".wav")
100
+ sf.write(temp_output_path, audio_data, 24000)
101
+ else:
102
+ raise ValueError("No audio generated by Kokoro")
103
+ else:
104
+ # Standard gTTS handling
105
+ lang_code = next((k for k, v in GTTS_LANGUAGES.items() if v == target_language), 'en')
106
+ tts = gTTS(translated_text, lang=lang_code)
107
+ _, temp_output_path = tempfile.mkstemp(suffix=".mp3")
108
+ tts.save(temp_output_path)
109
+
 
 
 
 
 
 
 
 
 
110
  return jsonify({
111
  'transcription': transcription,
112
  'translation': translated_text,
113
  'audio_url': f'/download/{os.path.basename(temp_output_path)}'
114
  })
115
+
116
  except Exception as e:
117
  app.logger.error(f"Error processing request: {str(e)}")
118
  return jsonify({'error': str(e)}), 500
 
122
  try:
123
  return send_file(
124
  os.path.join(tempfile.gettempdir(), filename),
125
+ mimetype="audio/mpeg",
126
  as_attachment=True,
127
  download_name=f"translated_{filename}"
128
  )