whisper-vs-distil-whisper

Runtime error

sanchit-gandhi commited on Mar 22, 2024

Commit

e12608e

verified ·

1 Parent(s): 01499ab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
-from transformers.utils import is_flash_attn_2_available
 from transformers.pipelines.audio_utils import ffmpeg_read
 import torch
 import gradio as gr
@@ -11,7 +11,7 @@ MAX_AUDIO_MINS = 30  # maximum audio input in minutes
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     "openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
@@ -124,7 +124,7 @@ if __name__ == "__main__":
             <p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
             Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form"> chunked long-form transcription algorithm</a>
-            in 🤗 Transformers, as well as Flash Attention. To use Distil-Whisper yourself, check the code examples on the
             <a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
             usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
             """

 from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
 from transformers.pipelines.audio_utils import ffmpeg_read
 import torch
 import gradio as gr
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa" if is_torch_sdpa_available() else "eager"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
     "openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
             <p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
             Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form"> chunked long-form transcription algorithm</a>
+            in 🤗 Transformers. To use Distil-Whisper yourself, check the code examples on the
             <a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
             usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
             """