camparchimedes commited on
Commit
23b8d9f
·
verified ·
1 Parent(s): cb06cac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -112,8 +112,8 @@ def transcribe_audio(audio_file, chunk_length_s=30):
112
  attention_mask = torch.ones(inputs.input_features.shape[:2], dtype=torch.long, device=device)
113
 
114
  # Check the dimensions and values of the attention mask
115
- assert attention_mask.shape == (1, input_features.shape[1]), "Attention mask dimensions do not match the input features."
116
- assert (attention_mask.sum().item() == input_features.shape[1]), "Attention mask has incorrect values."
117
 
118
  # ASR model inference on the chunk
119
  with torch.no_grad():
@@ -124,8 +124,8 @@ def transcribe_audio(audio_file, chunk_length_s=30):
124
  )
125
 
126
  # new processor object with desired configuration
127
- new_processor = processor.add_special_tokens({'pad_token': '[PAD]'})
128
- chunk_text = new_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
129
  full_text.append(chunk_text)
130
 
131
  # Combine the transcribed text from all chunks
 
112
  attention_mask = torch.ones(inputs.input_features.shape[:2], dtype=torch.long, device=device)
113
 
114
  # Check the dimensions and values of the attention mask
115
+ assert attention_mask.shape == (2, input_features.shape[2]), "Attention mask dimensions do not match the input features."
116
+ assert (attention_mask.sum().item() == input_features.shape[2]), "Attention mask has incorrect values."
117
 
118
  # ASR model inference on the chunk
119
  with torch.no_grad():
 
124
  )
125
 
126
  # new processor object with desired configuration
127
+ #new_processor = processor.add_special_tokens({'eos_token': '[EOS]'})
128
+ chunk_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
129
  full_text.append(chunk_text)
130
 
131
  # Combine the transcribed text from all chunks