piotrzelasko commited on
Commit
ea54579
·
1 Parent(s): 954cfbc

Reduce peak GPU memory use during model init

Browse files

Signed-off-by: Piotr Żelasko <[email protected]>

Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -16,10 +16,8 @@ MAX_AUDIO_MINUTES = 10 # wont try to transcribe if longer than this
16
  CHUNK_SECONDS = 40.0 # max audio length seen by the model
17
  BATCH_SIZE = 8 # for parallel transcription of audio longer than CHUNK_SECONDS
18
 
19
- with device:
20
- torch.set_default_dtype(torch.bfloat16) # speed up start-up time
21
- model = SALM.from_pretrained("nvidia/canary-qwen-2.5b").bfloat16().eval().to(device)
22
- torch.set_default_dtype(torch.float32)
23
 
24
 
25
  def timestamp(idx: int):
 
16
  CHUNK_SECONDS = 40.0 # max audio length seen by the model
17
  BATCH_SIZE = 8 # for parallel transcription of audio longer than CHUNK_SECONDS
18
 
19
+
20
+ model = SALM.from_pretrained("nvidia/canary-qwen-2.5b").bfloat16().eval().to(device)
 
 
21
 
22
 
23
  def timestamp(idx: int):