Spaces:

alexandrainst
/

roest-demo

Sleeping

App Files Files Community

saattrupdan commited on Sep 18, 2024

Commit

17cb7d3

1 Parent(s): 48609c3

feat: Add demo

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +59 -0
requirements.txt +74 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv/

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""Røst ASR demo."""
+import warnings
+import gradio as gr
+import numpy as np
+import samplerate
+import torch
+from punctfix import PunctFixer
+from transformers import pipeline
+warnings.filterwarnings("ignore", category=FutureWarning)
+TITLE = "Røst ASR Demo"
+DESCRIPTION = """
+This is a demo of the Danish speech recognition model Røst. Speak into the microphone
+and see the text appear on the screen!
+"""
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+transcriber = pipeline(
+    task="automatic-speech-recognition",
+    model="alexandrainst/roest-315m",
+    device=device
+)
+transcription_fixer = PunctFixer(language="da", device=device)
+def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray]) -> str:
+    """Transcribe the audio.
+    Args:
+        sampling_rate_and_audio:
+            A tuple with the sampling rate and the audio.
+    Returns:
+        The transcription.
+    """
+    sampling_rate, audio = sampling_rate_and_audio
+    if audio.ndim > 1:
+        audio = np.mean(audio, axis=1)
+    audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
+    transcription = transcriber(inputs=audio)
+    if not isinstance(transcription, dict):
+        return ""
+    cleaned_transcription = transcription_fixer.punctuate(
+        text=transcription["text"]
+    )
+    return cleaned_transcription
+demo = gr.Interface(
+    fn=transcribe_audio,
+    inputs=gr.Audio(sources=["microphone", "upload"]),
+    outputs="textbox",
+    title=TITLE,
+    description=DESCRIPTION,
+    allow_flagging="never",
+)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,74 @@

+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==24.2.0
+certifi==2024.8.30
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.3.0
+cycler==0.12.1
+exceptiongroup==1.2.2
+fastapi==0.115.0
+ffmpy==0.4.0
+filelock==3.16.1
+fonttools==4.53.1
+fsspec==2024.9.0
+gradio==4.44.0
+gradio_client==1.3.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.2
+huggingface-hub==0.25.0
+hypothesis==6.112.1
+idna==3.10
+importlib_resources==6.4.5
+Jinja2==3.1.4
+kenlm @ https://github.com/kpu/kenlm/archive/master.zip#sha256=d23d300d559a45a5e3ede958dbbf2395231119c0b8cd97a1ea43480625894ff4
+kiwisolver==1.4.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.3
+numpy==1.26.4
+orjson==3.10.7
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+punctfix==0.11.1
+pyctcdecode==0.5.0
+pydantic==2.9.2
+pydantic_core==2.23.4
+pydub==0.25.1
+Pygments==2.18.0
+pygtrie==2.5.0
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-multipart==0.0.9
+pytz==2024.2
+PyYAML==6.0.2
+regex==2024.9.11
+requests==2.32.3
+rich==13.8.1
+ruff==0.6.5
+safetensors==0.4.5
+samplerate==0.2.1
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+sortedcontainers==2.4.0
+starlette==0.38.5
+sympy==1.13.2
+tokenizers==0.19.1
+tomlkit==0.12.0
+torch==2.4.1
+tqdm==4.66.5
+transformers==4.44.2
+typer==0.12.5
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.3
+uvicorn==0.30.6
+websockets==12.0