Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -27,20 +27,22 @@ def transcribe_audio(audio_file):
|
|
27 |
chunks = [audio_input[i:i + chunk_size] for i in range(0, len(audio_input), chunk_size)]
|
28 |
|
29 |
transcription = ""
|
30 |
-
for
|
31 |
-
|
|
|
32 |
inputs = inputs.to(device)
|
|
|
33 |
with torch.no_grad():
|
34 |
output = model.generate(
|
35 |
inputs.input_features,
|
36 |
-
max_length=1024, # Increase max_length
|
37 |
-
num_beams=
|
38 |
task="transcribe",
|
|
|
|
|
39 |
language="no"
|
40 |
)
|
41 |
-
transcription += processor.batch_decode(output, skip_special_tokens=True)
|
42 |
-
|
43 |
-
return transcription.strip()
|
44 |
|
45 |
# HTML |banner image
|
46 |
banner_html = """
|
@@ -54,12 +56,13 @@ iface = gr.Blocks()
|
|
54 |
|
55 |
with iface:
|
56 |
gr.HTML(banner_html)
|
57 |
-
gr.Markdown("#
|
58 |
audio_input = gr.Audio(type="filepath")
|
|
|
59 |
transcription_output = gr.Textbox()
|
60 |
transcribe_button = gr.Button("Transcribe")
|
61 |
|
62 |
-
transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription_output)
|
63 |
|
64 |
# Launch interface
|
65 |
iface.launch(share=True, debug=True)
|
|
|
27 |
chunks = [audio_input[i:i + chunk_size] for i in range(0, len(audio_input), chunk_size)]
|
28 |
|
29 |
transcription = ""
|
30 |
+
for i in range(0, len(chunks), batch_size):
|
31 |
+
batch_chunks = chunks[i:i + batch_size]
|
32 |
+
inputs = processor(batch_chunks, sampling_rate=16000, return_tensors="pt", padding=True)
|
33 |
inputs = inputs.to(device)
|
34 |
+
attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
|
35 |
with torch.no_grad():
|
36 |
output = model.generate(
|
37 |
inputs.input_features,
|
38 |
+
max_length=1024, # Increase max_length for longer outputs
|
39 |
+
num_beams=7,
|
40 |
task="transcribe",
|
41 |
+
attention_mask=attention_mask,
|
42 |
+
forced_decoder_ids=None # forced_decoder_ids must not be set
|
43 |
language="no"
|
44 |
)
|
45 |
+
transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
|
|
|
|
|
46 |
|
47 |
# HTML |banner image
|
48 |
banner_html = """
|
|
|
56 |
|
57 |
with iface:
|
58 |
gr.HTML(banner_html)
|
59 |
+
gr.Markdown("# ππ―π’ππ’π ππππ ππΌπΎπ¦Ύβ‘ @{NbAiLab/whisper-norwegian-medium}\nUpload audio file:β")
|
60 |
audio_input = gr.Audio(type="filepath")
|
61 |
+
batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, default=4, label="Batch Size")
|
62 |
transcription_output = gr.Textbox()
|
63 |
transcribe_button = gr.Button("Transcribe")
|
64 |
|
65 |
+
transcribe_button.click(fn=transcribe_audio, inputs=[audio_input, batch_size_input], outputs=transcription_output)
|
66 |
|
67 |
# Launch interface
|
68 |
iface.launch(share=True, debug=True)
|