Athspi commited on
Commit
1ba1d48
·
verified ·
1 Parent(s): 2df8446

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -132,6 +132,9 @@ LANGUAGE_NAME_TO_CODE = {
132
  # Reverse mapping of language codes to full language names
133
  CODE_TO_LANGUAGE_NAME = {v: k for k, v in LANGUAGE_NAME_TO_CODE.items()}
134
 
 
 
 
135
  def detect_language(audio_file):
136
  """Detect the language of the audio file."""
137
  # Load the Whisper model (use "base" for faster detection)
@@ -182,10 +185,16 @@ def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faste
182
  detected_language = language
183
  else:
184
  # Use the selected Whisper model
185
- if model_size == "Systran/faster-whisper-large-v3":
186
  # Use faster-whisper for the Systran model
187
- model = WhisperModel(model_size, device="cuda" if torch.cuda.is_available() else "cpu")
188
- segments, info = model.transcribe(processed_audio_path, beam_size=5)
 
 
 
 
 
 
189
  transcription = " ".join([segment.text for segment in segments])
190
  detected_language_code = info.language
191
  detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")
 
132
  # Reverse mapping of language codes to full language names
133
  CODE_TO_LANGUAGE_NAME = {v: k for k, v in LANGUAGE_NAME_TO_CODE.items()}
134
 
135
+ # Device and compute type for faster-whisper
136
+ device, torch_dtype = ("cuda", "float32") if torch.cuda.is_available() else ("cpu", "int8")
137
+
138
  def detect_language(audio_file):
139
  """Detect the language of the audio file."""
140
  # Load the Whisper model (use "base" for faster detection)
 
185
  detected_language = language
186
  else:
187
  # Use the selected Whisper model
188
+ if model_size == "Systran Faster Whisper Large v3":
189
  # Use faster-whisper for the Systran model
190
+ model = WhisperModel(MODELS[model_size], device=device, compute_type=torch_dtype)
191
+ segments, info = model.transcribe(
192
+ processed_audio_path,
193
+ task="transcribe",
194
+ word_timestamps=True,
195
+ repetition_penalty=1.1,
196
+ temperature=[0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8, 1.0],
197
+ )
198
  transcription = " ".join([segment.text for segment in segments])
199
  detected_language_code = info.language
200
  detected_language = CODE_TO_LANGUAGE_NAME.get(detected_language_code, "Unknown Language")