Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -32,6 +32,7 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
32 |
import pandas as pd
|
33 |
import numpy as np
|
34 |
import re
|
|
|
35 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
36 |
import gradio as gr
|
37 |
from fpdf import FPDF
|
@@ -45,6 +46,7 @@ PLACEHOLDER = """
|
|
45 |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">En webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download</p>
|
46 |
</div>
|
47 |
"""
|
|
|
48 |
# Suppress warnings
|
49 |
warnings.filterwarnings("ignore")
|
50 |
|
@@ -114,10 +116,7 @@ def transcribe_audio(audio_file, chunk_length_s=30):
|
|
114 |
|
115 |
|
116 |
# Tokenize the input batch with the processor
|
117 |
-
inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, return_tensors="pt", task="transcribe")
|
118 |
-
|
119 |
-
# Use the attention mask directly from the tokenizer output
|
120 |
-
attention_mask = inputs.attention_mask.to(device)
|
121 |
|
122 |
# ASR model inference on the chunk
|
123 |
with torch.no_grad():
|
|
|
32 |
import pandas as pd
|
33 |
import numpy as np
|
34 |
import re
|
35 |
+
import spaces
|
36 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
37 |
import gradio as gr
|
38 |
from fpdf import FPDF
|
|
|
46 |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">En webapp for transkribering av lydfiler til norsk skrift. Språkmodell: NbAiLab/nb-whisper-large, Ekstra: oppsummering, pdf-download</p>
|
47 |
</div>
|
48 |
"""
|
49 |
+
|
50 |
# Suppress warnings
|
51 |
warnings.filterwarnings("ignore")
|
52 |
|
|
|
116 |
|
117 |
|
118 |
# Tokenize the input batch with the processor
|
119 |
+
inputs = processor(chunk_waveform.squeeze(0).numpy(), sampling_rate=sample_rate, padding="max_length", return_tensors="pt", task="transcribe", device=device)
|
|
|
|
|
|
|
120 |
|
121 |
# ASR model inference on the chunk
|
122 |
with torch.no_grad():
|