whisper-vs-distil-whisper

Runtime error

sanchit-gandhi commited on Feb 19, 2024

Commit

2ab8d12

verified ·

1 Parent(s): 50837d0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,20 +10,15 @@ MAX_AUDIO_MINS = 30  # maximum audio input in minutes
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-use_flash_attention_2 = is_flash_attn_2_available()
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    "openai/whisper-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_flash_attention_2=use_flash_attention_2
 )
 distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    "distil-whisper/distil-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, use_flash_attention_2=use_flash_attention_2
 )
-if not use_flash_attention_2:
-    # use flash attention from pytorch sdpa
-    model = model.to_bettertransformer()
-    distilled_model = distilled_model.to_bettertransformer()
 processor = AutoProcessor.from_pretrained("openai/whisper-large-v2")
 model.to(device)

 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
 model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    "openai/whisper-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
 )
 distilled_model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    "distil-whisper/distil-large-v2", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
 )
 processor = AutoProcessor.from_pretrained("openai/whisper-large-v2")
 model.to(device)