Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
2 |
-
from transformers.utils import is_flash_attn_2_available
|
3 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
4 |
import torch
|
5 |
import gradio as gr
|
@@ -11,7 +11,7 @@ MAX_AUDIO_MINS = 30 # maximum audio input in minutes
|
|
11 |
|
12 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
13 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
14 |
-
attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa"
|
15 |
|
16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
17 |
"openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
|
@@ -124,7 +124,7 @@ if __name__ == "__main__":
|
|
124 |
|
125 |
<p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
|
126 |
Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form"> chunked long-form transcription algorithm</a>
|
127 |
-
in 🤗 Transformers
|
128 |
<a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
|
129 |
usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
|
130 |
"""
|
|
|
1 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
2 |
+
from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available
|
3 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
4 |
import torch
|
5 |
import gradio as gr
|
|
|
11 |
|
12 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
13 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
14 |
+
attn_implementation = "flash_attention_2" if is_flash_attn_2_available() else "sdpa" if is_torch_sdpa_available() else "eager"
|
15 |
|
16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
17 |
"openai/whisper-large-v3", torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True, attn_implementation=attn_implementation
|
|
|
124 |
|
125 |
<p>In this demo, we perform a speed comparison between Whisper and Distil-Whisper in order to test this claim.
|
126 |
Both models use the <a href="https://huggingface.co/distil-whisper/distil-large-v3#chunked-long-form"> chunked long-form transcription algorithm</a>
|
127 |
+
in 🤗 Transformers. To use Distil-Whisper yourself, check the code examples on the
|
128 |
<a href="https://github.com/huggingface/distil-whisper#1-usage"> Distil-Whisper repository</a>. To ensure fair
|
129 |
usage of the Space, we ask that audio file inputs are kept to < 30 mins.</p>
|
130 |
"""
|