Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -27,20 +27,22 @@ def transcribe_audio(audio_file):
|
|
| 27 |
chunks = [audio_input[i:i + chunk_size] for i in range(0, len(audio_input), chunk_size)]
|
| 28 |
|
| 29 |
transcription = ""
|
| 30 |
-
for
|
| 31 |
-
|
|
|
|
| 32 |
inputs = inputs.to(device)
|
|
|
|
| 33 |
with torch.no_grad():
|
| 34 |
output = model.generate(
|
| 35 |
inputs.input_features,
|
| 36 |
-
max_length=1024, # Increase max_length
|
| 37 |
-
num_beams=
|
| 38 |
task="transcribe",
|
|
|
|
|
|
|
| 39 |
language="no"
|
| 40 |
)
|
| 41 |
-
transcription += processor.batch_decode(output, skip_special_tokens=True)
|
| 42 |
-
|
| 43 |
-
return transcription.strip()
|
| 44 |
|
| 45 |
# HTML |banner image
|
| 46 |
banner_html = """
|
|
@@ -54,12 +56,13 @@ iface = gr.Blocks()
|
|
| 54 |
|
| 55 |
with iface:
|
| 56 |
gr.HTML(banner_html)
|
| 57 |
-
gr.Markdown("#
|
| 58 |
audio_input = gr.Audio(type="filepath")
|
|
|
|
| 59 |
transcription_output = gr.Textbox()
|
| 60 |
transcribe_button = gr.Button("Transcribe")
|
| 61 |
|
| 62 |
-
transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription_output)
|
| 63 |
|
| 64 |
# Launch interface
|
| 65 |
iface.launch(share=True, debug=True)
|
|
|
|
| 27 |
chunks = [audio_input[i:i + chunk_size] for i in range(0, len(audio_input), chunk_size)]
|
| 28 |
|
| 29 |
transcription = ""
|
| 30 |
+
for i in range(0, len(chunks), batch_size):
|
| 31 |
+
batch_chunks = chunks[i:i + batch_size]
|
| 32 |
+
inputs = processor(batch_chunks, sampling_rate=16000, return_tensors="pt", padding=True)
|
| 33 |
inputs = inputs.to(device)
|
| 34 |
+
attention_mask = inputs.attention_mask.to(device) if 'attention_mask' in inputs else None
|
| 35 |
with torch.no_grad():
|
| 36 |
output = model.generate(
|
| 37 |
inputs.input_features,
|
| 38 |
+
max_length=1024, # Increase max_length for longer outputs
|
| 39 |
+
num_beams=7,
|
| 40 |
task="transcribe",
|
| 41 |
+
attention_mask=attention_mask,
|
| 42 |
+
forced_decoder_ids=None # forced_decoder_ids must not be set
|
| 43 |
language="no"
|
| 44 |
)
|
| 45 |
+
transcription += " ".join(processor.batch_decode(output, skip_special_tokens=True)) + " "
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# HTML |banner image
|
| 48 |
banner_html = """
|
|
|
|
| 56 |
|
| 57 |
with iface:
|
| 58 |
gr.HTML(banner_html)
|
| 59 |
+
gr.Markdown("# ππ―π’ππ’π ππππ ππΌπΎπ¦Ύβ‘ @{NbAiLab/whisper-norwegian-medium}\nUpload audio file:β")
|
| 60 |
audio_input = gr.Audio(type="filepath")
|
| 61 |
+
batch_size_input = gr.Slider(minimum=1, maximum=16, step=1, default=4, label="Batch Size")
|
| 62 |
transcription_output = gr.Textbox()
|
| 63 |
transcribe_button = gr.Button("Transcribe")
|
| 64 |
|
| 65 |
+
transcribe_button.click(fn=transcribe_audio, inputs=[audio_input, batch_size_input], outputs=transcription_output)
|
| 66 |
|
| 67 |
# Launch interface
|
| 68 |
iface.launch(share=True, debug=True)
|