DeeeeeeM
commited on
Commit
·
7716a94
1
Parent(s):
5b9ff42
added minor changes
Browse files- app.py +9 -2
- requirements.txt +2 -1
app.py
CHANGED
@@ -6,12 +6,15 @@ import gradio as gr
|
|
6 |
import torch
|
7 |
import stable_whisper
|
8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
|
|
9 |
|
10 |
def process_media(
|
11 |
model_size, source_lang, upload, model_type,
|
12 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
13 |
max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
|
14 |
):
|
|
|
|
|
15 |
# ----- is file empty? checker ----- #
|
16 |
if upload is None:
|
17 |
return None, None, None, None
|
@@ -27,7 +30,8 @@ def process_media(
|
|
27 |
model = stable_whisper.load_model(model_size, device=device)
|
28 |
|
29 |
try:
|
30 |
-
result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False,
|
|
|
31 |
#result.save_as_json(word_transcription_path)
|
32 |
except Exception as e:
|
33 |
return None, None, None, None
|
@@ -87,6 +91,9 @@ def process_media(
|
|
87 |
audio_out = temp_path if mime and mime.startswith("audio") else None
|
88 |
video_out = temp_path if mime and mime.startswith("video") else None
|
89 |
|
|
|
|
|
|
|
90 |
return audio_out, video_out, transcript_txt, srt_file_path
|
91 |
|
92 |
def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
|
@@ -300,7 +307,7 @@ with gr.Blocks() as interface:
|
|
300 |
)
|
301 |
model_size = gr.Dropdown(
|
302 |
choices=[
|
303 |
-
"large-v3-turbo",
|
304 |
"large-v3",
|
305 |
"large-v2",
|
306 |
"large",
|
|
|
6 |
import torch
|
7 |
import stable_whisper
|
8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
9 |
+
import time
|
10 |
|
11 |
def process_media(
|
12 |
model_size, source_lang, upload, model_type,
|
13 |
max_chars, max_words, extend_in, extend_out, collapse_gaps,
|
14 |
max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
|
15 |
):
|
16 |
+
start_time = time.time()
|
17 |
+
|
18 |
# ----- is file empty? checker ----- #
|
19 |
if upload is None:
|
20 |
return None, None, None, None
|
|
|
30 |
model = stable_whisper.load_model(model_size, device=device)
|
31 |
|
32 |
try:
|
33 |
+
result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False, no_speech_threshold=0.9)
|
34 |
+
#remove background music/noise: denoiser="demucs"
|
35 |
#result.save_as_json(word_transcription_path)
|
36 |
except Exception as e:
|
37 |
return None, None, None, None
|
|
|
91 |
audio_out = temp_path if mime and mime.startswith("audio") else None
|
92 |
video_out = temp_path if mime and mime.startswith("video") else None
|
93 |
|
94 |
+
elapsed = time.time() - start_time
|
95 |
+
print(f"process_media completed in {elapsed:.2f} seconds")
|
96 |
+
|
97 |
return audio_out, video_out, transcript_txt, srt_file_path
|
98 |
|
99 |
def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
|
|
|
307 |
)
|
308 |
model_size = gr.Dropdown(
|
309 |
choices=[
|
310 |
+
"deepdml/faster-whisper-large-v3-turbo-ct2",
|
311 |
"large-v3",
|
312 |
"large-v2",
|
313 |
"large",
|
requirements.txt
CHANGED
@@ -2,5 +2,6 @@ gradio>=3.0.0
|
|
2 |
stable-ts
|
3 |
stable-ts[fw]
|
4 |
demucs
|
5 |
-
torch==2.
|
6 |
numpy<2
|
|
|
|
2 |
stable-ts
|
3 |
stable-ts[fw]
|
4 |
demucs
|
5 |
+
torch==2.6.0
|
6 |
numpy<2
|
7 |
+
chardet
|