Spaces:
Sleeping
Sleeping
File size: 2,771 Bytes
a355d31 f58319d b89d1a0 a355d31 86031f7 a7fc99e 86031f7 4ad281b 86031f7 4ad281b 86031f7 a7fc99e 86031f7 f58319d a355d31 780f607 a355d31 a7fc99e 86031f7 a355d31 b89d1a0 2ab57bd a355d31 86031f7 7769035 0a287e7 a355d31 2ab57bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
from transformers import pipeline
import os
def transcribe(audio, language):
model_map = {
"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
"zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
"xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
"afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
"bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
"shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
"luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
"swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
"lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
"amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
"kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
"oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
"akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
"ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
"wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
"bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
}
# load eval pipeline
asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0, token=os.getenv('HF_TOKEN'))
text = asr(audio)["text"]
return text
asr_app = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["upload", "microphone"], type="filepath"),
gr.Dropdown(
[
"hausa",
"igbo",
"yoruba",
"zulu",
"xhosa",
"afrikaans",
"bemba",
"shona",
"luganda",
"swahili",
"lingala",
"amharic",
"kinyarwanda",
"oromo",
"akan",
"ewe",
"wolof",
"bambara",
]
),
],
outputs="text",
title="ASR Africa",
description="This space serves as a realtime demo for automatic speech recognition models developed by Mak-CAD under the auspicies of Gates Foundation for 18 African languages using open source data.\
\nWe would appreciate your feedback on these models, you can share your feedback via this form https://forms.gle/RbzpwBFbC6Lcx5V78 :)"
)
asr_app.launch()
|