|
import gradio as gr |
|
from transformers import pipeline |
|
import numpy as np |
|
import os |
|
from huggingface_hub import login |
|
import librosa |
|
import spaces |
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
if HF_TOKEN: |
|
login(token=HF_TOKEN) |
|
|
|
MODEL_ID = "badrex/w2v-bert-2.0-kinyarwanda-asr" |
|
transcriber = pipeline("automatic-speech-recognition", model=MODEL_ID) |
|
|
|
|
|
@spaces.GPU |
|
def transcribe(audio): |
|
sr, y = audio |
|
|
|
|
|
if y.ndim > 1: |
|
y = y.mean(axis=1) |
|
|
|
|
|
|
|
|
|
|
|
y = y.astype(np.float32) |
|
y /= np.max(np.abs(y)) |
|
|
|
return transcriber({"sampling_rate": sr, "raw": y})["text"] |
|
|
|
examples = [] |
|
examples_dir = "examples" |
|
if os.path.exists(examples_dir): |
|
for filename in os.listdir(examples_dir): |
|
if filename.endswith((".wav", ".mp3", ".ogg")): |
|
examples.append([os.path.join(examples_dir, filename)]) |
|
|
|
print(f"Found {len(examples)} example files") |
|
else: |
|
print("Examples directory not found") |
|
|
|
|
|
demo = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(), |
|
outputs="text", |
|
title="<div>ASRwanda ποΈ <br>Speech Recognition for Kinyarwanda</div>", |
|
description=""" |
|
<div class="centered-content"> |
|
<div> |
|
<p> |
|
Developed with β€ by <a href="https://badrex.github.io/" style="color: #2563eb;">Badr al-Absi</a> β |
|
</p> |
|
<br> |
|
<p style="font-size: 15px; line-height: 1.8;"> |
|
Muraho ππΌ |
|
<br> |
|
<br> |
|
This is a demo for ASRwanda, a Transformer-based automatic speech recognition (ASR) system for Kinyarwanda language. |
|
The underlying ASR model was trained on 500 hours of transcribed speech provided by |
|
<a href="https://digitalumuganda.com/" style="color: #2563eb;">Digital Umuganda</a> as part of the Kinyarwanda |
|
<a href="https://www.kaggle.com/competitions/kinyarwanda-automatic-speech-recognition-track-a" style="color: #2563eb;"> ASR hackathon</a> on Kaggle. |
|
<br> |
|
<p style="font-size: 15px; line-height: 1.8;"> |
|
Simply <strong>upload an audio file</strong> π€ or <strong>record yourself speaking</strong> ποΈβΊοΈ to try out the model! |
|
</p> |
|
</div> |
|
</div> |
|
""", |
|
examples=examples if examples else None, |
|
cache_examples=False, |
|
flagging_mode=None, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |