Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs_dev.py
Browse filesDeleted the fake model hack
- whisper_cs_dev.py +5 -20
whisper_cs_dev.py
CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
11 |
import glob
|
12 |
import ctypes
|
13 |
|
14 |
-
from settings import DEBUG_MODE, MODEL_PATH_V2_FAST, MODEL_PATH_V2, LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH,
|
15 |
|
16 |
def load_cudnn():
|
17 |
|
@@ -93,15 +93,9 @@ def load_model(use_v2_fast, device, compute_type):
|
|
93 |
device = device,
|
94 |
)
|
95 |
|
96 |
-
# HACK we need to do this for strange reasons.
|
97 |
-
# If we don't do this, we get:
|
98 |
-
#Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory
|
99 |
-
#fake_model = whisper_ts.load_model(MODEL_PATH_V2, device=device)
|
100 |
-
fake_model = None
|
101 |
-
|
102 |
if DEBUG_MODE: print(f"Exiting load_model function...")
|
103 |
|
104 |
-
return model
|
105 |
|
106 |
|
107 |
def split_input_stereo_channels(audio_path):
|
@@ -202,19 +196,10 @@ def transcribe_audio_no_fast_model(model, audio_path):
|
|
202 |
if DEBUG_MODE: print(f"Exited transcribe_audio_no_fast_model function.")
|
203 |
|
204 |
|
205 |
-
def transcribe_channels(left_waveform, right_waveform, model, use_v2_fast
|
206 |
|
207 |
if DEBUG_MODE: print(f"Entering transcribe_channels function...")
|
208 |
|
209 |
-
# HACK we need to do this for strange reasons.
|
210 |
-
# If we don't do this, we get:
|
211 |
-
#Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory
|
212 |
-
#fake_result = whisper_ts.transcribe(
|
213 |
-
# fake_model,
|
214 |
-
# FAKE_AUDIO_PATH,
|
215 |
-
# beam_size=1,
|
216 |
-
#)
|
217 |
-
|
218 |
if DEBUG_MODE: print(f"Preparing to transcribe...")
|
219 |
|
220 |
if use_v2_fast:
|
@@ -354,10 +339,10 @@ def generate(audio_path, use_v2_fast):
|
|
354 |
|
355 |
load_cudnn()
|
356 |
device, compute_type = get_settings()
|
357 |
-
model
|
358 |
split_input_stereo_channels(audio_path)
|
359 |
left_waveform, right_waveform = process_waveforms()
|
360 |
-
left_result, right_result = transcribe_channels(left_waveform, right_waveform, model, use_v2_fast
|
361 |
output = post_process_transcripts(left_result, right_result, use_v2_fast)
|
362 |
cleanup_temp_files(LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH)
|
363 |
|
|
|
11 |
import glob
|
12 |
import ctypes
|
13 |
|
14 |
+
from settings import DEBUG_MODE, MODEL_PATH_V2_FAST, MODEL_PATH_V2, LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH, RESAMPLING_FREQ
|
15 |
|
16 |
def load_cudnn():
|
17 |
|
|
|
93 |
device = device,
|
94 |
)
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
if DEBUG_MODE: print(f"Exiting load_model function...")
|
97 |
|
98 |
+
return model
|
99 |
|
100 |
|
101 |
def split_input_stereo_channels(audio_path):
|
|
|
196 |
if DEBUG_MODE: print(f"Exited transcribe_audio_no_fast_model function.")
|
197 |
|
198 |
|
199 |
+
def transcribe_channels(left_waveform, right_waveform, model, use_v2_fast):
|
200 |
|
201 |
if DEBUG_MODE: print(f"Entering transcribe_channels function...")
|
202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
if DEBUG_MODE: print(f"Preparing to transcribe...")
|
204 |
|
205 |
if use_v2_fast:
|
|
|
339 |
|
340 |
load_cudnn()
|
341 |
device, compute_type = get_settings()
|
342 |
+
model = load_model(use_v2_fast, device, compute_type)
|
343 |
split_input_stereo_channels(audio_path)
|
344 |
left_waveform, right_waveform = process_waveforms()
|
345 |
+
left_result, right_result = transcribe_channels(left_waveform, right_waveform, model, use_v2_fast)
|
346 |
output = post_process_transcripts(left_result, right_result, use_v2_fast)
|
347 |
cleanup_temp_files(LEFT_CHANNEL_TEMP_PATH, RIGHT_CHANNEL_TEMP_PATH)
|
348 |
|