sanchit-gandhi commited on
Commit
e12608e
·
verified ·
1 Parent(s): 01499ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
2
- from transformers.utils import is_flash_attn_2_available
3
  from transformers.pipelines.audio_utils import ffmpeg_read
4
  import torch
5
  import gradio as gr
@@ -11,7 +11,7 @@ MAX_AUDIO_MINS = 30 # maximum audio input in minutes
11
 
12
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
14
- attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
15
 
16
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
  "openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
@@ -124,7 +124,7 @@ if __name__ == "__main__":
124
 
125
  <p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
126
  Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form"> chunked long-form transcription algorithm</a>
127
- in 🤗 Transformers, as well as Flash Attention. To use Distil-Whisper yourself, check the code examples on the
128
  <a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
129
  usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
130
  """
 
1
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
2
+ from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
3
  from transformers.pipelines.audio_utils import ffmpeg_read
4
  import torch
5
  import gradio as gr
 
11
 
12
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
14
+ attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa" if is_torch_sdpa_available() else "eager"
15
 
16
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
  "openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
 
124
 
125
  <p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
126
  Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form"> chunked long-form transcription algorithm</a>
127
+ in 🤗 Transformers. To use Distil-Whisper yourself, check the code examples on the
128
  <a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
129
  usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
130
  """