Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -104,16 +104,16 @@ def transcribe_audio(audio_file, chunk_length_s=30):
|
|
104 |
if chunk_waveform.shape[0] > 1:
|
105 |
chunk_waveform = torch.mean(chunk_waveform, dim=0, keepdim=True)
|
106 |
|
107 |
-
# Process
|
108 |
inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, return_tensors="pt")
|
109 |
input_features = inputs.input_features
|
110 |
|
111 |
# Create attention mask
|
112 |
attention_mask = torch.ones(inputs.input_features.shape[:2], dtype=torch.long, device=device)
|
113 |
|
114 |
-
#
|
115 |
-
|
116 |
-
|
117 |
|
118 |
# ASR model inference on the chunk
|
119 |
with torch.no_grad():
|
@@ -264,7 +264,7 @@ def save_to_pdf(text, summary):
|
|
264 |
iface = gr.Blocks()
|
265 |
|
266 |
with iface:
|
267 |
-
gr.HTML('<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/
|
268 |
gr.Markdown("**Switch Work's JoJo-versjon som webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download**")
|
269 |
|
270 |
with gr.Tabs():
|
|
|
104 |
if chunk_waveform.shape[0] > 1:
|
105 |
chunk_waveform = torch.mean(chunk_waveform, dim=0, keepdim=True)
|
106 |
|
107 |
+
# Process chunk with tokenizer
|
108 |
inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, return_tensors="pt")
|
109 |
input_features = inputs.input_features
|
110 |
|
111 |
# Create attention mask
|
112 |
attention_mask = torch.ones(inputs.input_features.shape[:2], dtype=torch.long, device=device)
|
113 |
|
114 |
+
# Set the attention mask to zero for padding tokens
|
115 |
+
attention_mask[input_features == processor.tokenizer.pad_token_id] = 0
|
116 |
+
|
117 |
|
118 |
# ASR model inference on the chunk
|
119 |
with torch.no_grad():
|
|
|
264 |
iface = gr.Blocks()
|
265 |
|
266 |
with iface:
|
267 |
+
gr.HTML('<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/resolve/main/banner_trans.png" width="100%" height="auto"/>')
|
268 |
gr.Markdown("**Switch Work's JoJo-versjon som webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download**")
|
269 |
|
270 |
with gr.Tabs():
|