Spaces:
Running
Running
Commit
·
ea54579
1
Parent(s):
954cfbc
Reduce peak GPU memory use during model init
Browse filesSigned-off-by: Piotr Żelasko <[email protected]>
app.py
CHANGED
@@ -16,10 +16,8 @@ MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
|
|
16 |
CHUNK_SECONDS = 40.0 # max audio length seen by the model
|
17 |
BATCH_SIZE = 8 # for parallel transcription of audio longer than CHUNK_SECONDS
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
model = SALM.from_pretrained("nvidia/canary-qwen-2.5b").bfloat16().eval().to(device)
|
22 |
-
torch.set_default_dtype(torch.float32)
|
23 |
|
24 |
|
25 |
def timestamp(idx: int):
|
|
|
16 |
CHUNK_SECONDS = 40.0 # max audio length seen by the model
|
17 |
BATCH_SIZE = 8 # for parallel transcription of audio longer than CHUNK_SECONDS
|
18 |
|
19 |
+
|
20 |
+
model = SALM.from_pretrained("nvidia/canary-qwen-2.5b").bfloat16().eval().to(device)
|
|
|
|
|
21 |
|
22 |
|
23 |
def timestamp(idx: int):
|