Spaces:
Build error
Build error
File size: 2,223 Bytes
8cb8264 592f7e1 930fb43 afe3d6c 8cb8264 592f7e1 8cb8264 f9cd637 592f7e1 f9cd637 592f7e1 f9cd637 5c56ed6 592f7e1 47661bd 592f7e1 47661bd aea18b3 47661bd aea18b3 47661bd 592f7e1 aea18b3 592f7e1 5e4096f 592f7e1 f9cd637 592f7e1 aea18b3 5e4096f 592f7e1 aea18b3 47661bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
import warnings
import torch
from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
import soundfile as sf
warnings.filterwarnings("ignore")
# Load tokenizer + model
tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
# set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch_dtype = torch.float32
# move model to device
model.to(device)
def transcribe_audio(audio_file):
audio_input, sample_rate = sf.read(audio_file)
chunk_size = 16000 * 28 # 28 seconds chunks, seems to work best
chunks = [audio_input[i:i + chunk_size] for i in range(0, len(audio_input), chunk_size)]
transcription = ""
for chunk in chunks:
inputs = processor(chunk, sampling_rate=16000, return_tensors="pt")
inputs = inputs.to(device)
with torch.no_grad():
output = model.generate(
inputs.input_features,
max_length=1024, # Increase max_length@longer outputs
num_beams=5,
task="transcribe",
language="no"
)
transcription += processor.batch_decode(output, skip_special_tokens=True)[0] + " "
return transcription.strip()
# HTML |banner image
banner_html = """
<div style="text-align: center;">
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
</div>
"""
# Gradio interface
iface = gr.Blocks()
with iface:
gr.HTML(banner_html)
gr.Markdown("# Nvidia A100ππΌπΎπ¦Ύβ‘βπ§πΌβπ«@{NbAiLab/whisper-norwegian-medium}\nUpload audio file (*needs to be in .mp3 format before upload*)")
audio_input = gr.Audio(type="filepath")
transcription_output = gr.Textbox()
transcribe_button = gr.Button("Transcribe")
transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription_output)
# Launch interface
iface.launch(share=True, debug=True) |