DeeeeeeM commited on
Commit
7716a94
·
1 Parent(s): 5b9ff42

added minor changes

Browse files
Files changed (2) hide show
  1. app.py +9 -2
  2. requirements.txt +2 -1
app.py CHANGED
@@ -6,12 +6,15 @@ import gradio as gr
6
  import torch
7
  import stable_whisper
8
  from stable_whisper.text_output import result_to_any, sec2srt
 
9
 
10
  def process_media(
11
  model_size, source_lang, upload, model_type,
12
  max_chars, max_words, extend_in, extend_out, collapse_gaps,
13
  max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
14
  ):
 
 
15
  # ----- is file empty? checker ----- #
16
  if upload is None:
17
  return None, None, None, None
@@ -27,7 +30,8 @@ def process_media(
27
  model = stable_whisper.load_model(model_size, device=device)
28
 
29
  try:
30
- result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False, denoiser="demucs", no_speech_threshold=0.9)
 
31
  #result.save_as_json(word_transcription_path)
32
  except Exception as e:
33
  return None, None, None, None
@@ -87,6 +91,9 @@ def process_media(
87
  audio_out = temp_path if mime and mime.startswith("audio") else None
88
  video_out = temp_path if mime and mime.startswith("video") else None
89
 
 
 
 
90
  return audio_out, video_out, transcript_txt, srt_file_path
91
 
92
  def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
@@ -300,7 +307,7 @@ with gr.Blocks() as interface:
300
  )
301
  model_size = gr.Dropdown(
302
  choices=[
303
- "large-v3-turbo",
304
  "large-v3",
305
  "large-v2",
306
  "large",
 
6
  import torch
7
  import stable_whisper
8
  from stable_whisper.text_output import result_to_any, sec2srt
9
+ import time
10
 
11
  def process_media(
12
  model_size, source_lang, upload, model_type,
13
  max_chars, max_words, extend_in, extend_out, collapse_gaps,
14
  max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
15
  ):
16
+ start_time = time.time()
17
+
18
  # ----- is file empty? checker ----- #
19
  if upload is None:
20
  return None, None, None, None
 
30
  model = stable_whisper.load_model(model_size, device=device)
31
 
32
  try:
33
+ result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False, no_speech_threshold=0.9)
34
+ #remove background music/noise: denoiser="demucs"
35
  #result.save_as_json(word_transcription_path)
36
  except Exception as e:
37
  return None, None, None, None
 
91
  audio_out = temp_path if mime and mime.startswith("audio") else None
92
  video_out = temp_path if mime and mime.startswith("video") else None
93
 
94
+ elapsed = time.time() - start_time
95
+ print(f"process_media completed in {elapsed:.2f} seconds")
96
+
97
  return audio_out, video_out, transcript_txt, srt_file_path
98
 
99
  def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
 
307
  )
308
  model_size = gr.Dropdown(
309
  choices=[
310
+ "deepdml/faster-whisper-large-v3-turbo-ct2",
311
  "large-v3",
312
  "large-v2",
313
  "large",
requirements.txt CHANGED
@@ -2,5 +2,6 @@ gradio>=3.0.0
2
  stable-ts
3
  stable-ts[fw]
4
  demucs
5
- torch==2.1.2
6
  numpy<2
 
 
2
  stable-ts
3
  stable-ts[fw]
4
  demucs
5
+ torch==2.6.0
6
  numpy<2
7
+ chardet