Spaces:

DeeeeeM
/

ssui-app

Running

App Files Files Community

DeeeeeeM commited on Jun 9

Commit

7716a94

1 Parent(s): 5b9ff42

added minor changes

Browse files

Files changed (2) hide show

app.py +9 -2
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -6,12 +6,15 @@ import gradio as gr
 import torch
 import stable_whisper
 from stable_whisper.text_output import result_to_any, sec2srt
 def process_media(
     model_size, source_lang, upload, model_type,
     max_chars, max_words, extend_in, extend_out, collapse_gaps,
     max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
 ):
     # ----- is file empty? checker ----- #
     if upload is None:
         return None, None, None, None
@@ -27,7 +30,8 @@ def process_media(
         model = stable_whisper.load_model(model_size, device=device)
     try:
-        result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False, denoiser="demucs", no_speech_threshold=0.9)
         #result.save_as_json(word_transcription_path)
     except Exception as e:
         return None, None, None, None
@@ -87,6 +91,9 @@ def process_media(
     audio_out = temp_path if mime and mime.startswith("audio") else None
     video_out = temp_path if mime and mime.startswith("video") else None
     return audio_out, video_out, transcript_txt, srt_file_path
 def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
@@ -300,7 +307,7 @@ with gr.Blocks() as interface:
                         )
                         model_size = gr.Dropdown(
                             choices=[
-                                "large-v3-turbo",
                                 "large-v3",
                                 "large-v2",
                                 "large",

 import torch
 import stable_whisper
 from stable_whisper.text_output import result_to_any, sec2srt
+import time
 def process_media(
     model_size, source_lang, upload, model_type,
     max_chars, max_words, extend_in, extend_out, collapse_gaps,
     max_lines_per_segment, line_penalty, longest_line_char_penalty, *args
 ):
+    start_time = time.time()
     # ----- is file empty? checker ----- #
     if upload is None:
         return None, None, None, None
         model = stable_whisper.load_model(model_size, device=device)
     try:
+        result = model.transcribe(temp_path, language=source_lang, vad=True, regroup=False, no_speech_threshold=0.9)
+        #remove background music/noise: denoiser="demucs"
         #result.save_as_json(word_transcription_path)
     except Exception as e:
         return None, None, None, None
     audio_out = temp_path if mime and mime.startswith("audio") else None
     video_out = temp_path if mime and mime.startswith("video") else None
+    elapsed = time.time() - start_time
+    print(f"process_media completed in {elapsed:.2f} seconds")
     return audio_out, video_out, transcript_txt, srt_file_path
 def optimize_text(text, max_lines_per_segment, line_penalty, longest_line_char_penalty):
                         )
                         model_size = gr.Dropdown(
                             choices=[
+                                "deepdml/faster-whisper-large-v3-turbo-ct2",
                                 "large-v3",
                                 "large-v2",
                                 "large",

requirements.txt CHANGED Viewed

@@ -2,5 +2,6 @@ gradio>=3.0.0
 stable-ts
 stable-ts[fw]
 demucs
-torch==2.1.2
 numpy<2

 stable-ts
 stable-ts[fw]
 demucs
+torch==2.6.0
 numpy<2
+chardet