Spaces:
Build error
Build error
File size: 2,274 Bytes
8cb8264 592f7e1 47661bd afe3d6c 8cb8264 592f7e1 8cb8264 f9cd637 592f7e1 f9cd637 592f7e1 f9cd637 5c56ed6 592f7e1 47661bd 592f7e1 47661bd 592f7e1 5e4096f 592f7e1 f9cd637 592f7e1 47661bd 5e4096f 592f7e1 47661bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
import warnings
import torch
from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
import soundfile as sf
from huggingface_hub import spaces
warnings.filterwarnings("ignore")
# Load tokenizer + model
tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
# set up device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch_dtype = torch.float32
# move model to device
model.to(device)
def transcribe_audio(audio_file):
audio_input, sample_rate = sf.read(audio_file)
chunk_size = 16000 * 28 # 28 seconds chunks (seems to work best)
chunks = [audio_input[i:i + chunk_size] for i in range(0, len(audio_input), chunk_size)]
transcription = ""
for chunk in chunks:
inputs = processor(chunk, sampling_rate=16000, return_tensors="pt")
inputs = inputs.to(device)
with torch.no_grad():
output = model.generate(
inputs.input_features,
max_length=2048, # Increase max_length for longer outputs
num_beams=10,
task="transcribe",
language="no"
)
transcription += processor.batch_decode(output, skip_special_tokens=True)[0] + " "
return transcription.strip()
# HTML for banner image
banner_html = """
<div style="text-align: center;">
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
</div>
"""
# Gradio interface
iface = gr.Blocks()
with iface:
gr.HTML(banner_html)
gr.Markdown("# Ola's AudioSwitch2Go ππ§βπ§πΌβπ«@{NbAiLab/whisper-norwegian-medium}\nUpload audio file (if .ma4 ~simply rename it to .mp3 before upload)")
audio_input = gr.Audio(type="filepath")
transcription_output = gr.Textbox()
transcribe_button = gr.Button("Transcribe")
transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription_output)
# Launch the interface
iface.launch(share=True, debug=True) |