Spaces:
Runtime error
Runtime error
File size: 1,585 Bytes
17cb7d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
"""Røst ASR demo."""
import warnings
import gradio as gr
import numpy as np
import samplerate
import torch
from punctfix import PunctFixer
from transformers import pipeline
warnings.filterwarnings("ignore", category=FutureWarning)
TITLE = "Røst ASR Demo"
DESCRIPTION = """
This is a demo of the Danish speech recognition model Røst. Speak into the microphone
and see the text appear on the screen!
"""
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
transcriber = pipeline(
task="automatic-speech-recognition",
model="alexandrainst/roest-315m",
device=device
)
transcription_fixer = PunctFixer(language="da", device=device)
def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
"""Transcribe the audio.
Args:
sampling_rate_and_audio:
A tuple with the sampling rate and the audio.
Returns:
The transcription.
"""
sampling_rate, audio = sampling_rate_and_audio
if audio.ndim > 1:
audio = np.mean(audio, axis=1)
audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
transcription = transcriber(inputs=audio)
if not isinstance(transcription, dict):
return ""
cleaned_transcription = transcription_fixer.punctuate(
text=transcription["text"]
)
return cleaned_transcription
demo = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(sources=["microphone", "upload"]),
outputs="textbox",
title=TITLE,
description=DESCRIPTION,
allow_flagging="never",
)
demo.launch()
|