File size: 2,771 Bytes
a355d31
f58319d
b89d1a0
a355d31
86031f7
a7fc99e
 
 
 
86031f7
 
 
4ad281b
86031f7
 
 
 
 
 
 
4ad281b
 
86031f7
 
a7fc99e
 
86031f7
f58319d
a355d31
 
 
 
 
780f607
a355d31
 
 
 
a7fc99e
 
86031f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a355d31
b89d1a0
2ab57bd
a355d31
86031f7
7769035
0a287e7
a355d31
 
2ab57bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
from transformers import pipeline
import os

def transcribe(audio, language):
    model_map = {
        "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
        "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
        "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
        "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
        "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
        "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
        "bemba": "asr-africa/whisper_BIG-C_BEMBA_189hr_v1",
        "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
        "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
        "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
        "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
        "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
        "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
        "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
        "akan": "asr-africa/wav2vec2-xls-r-akan-100-hours",
        "ewe": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
        "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
        "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
    }

    # load eval pipeline
    asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0, token=os.getenv('HF_TOKEN'))

    text = asr(audio)["text"]
    return text

asr_app = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources=["upload", "microphone"], type="filepath"),
        gr.Dropdown(
            [
                "hausa",
                "igbo",
                "yoruba",
                "zulu",
                "xhosa",
                "afrikaans",
                "bemba",
                "shona",
                "luganda",
                "swahili",
                "lingala",
                "amharic",
                "kinyarwanda",
                "oromo",
                "akan",
                "ewe",
                "wolof",
                "bambara",
            ]
        ),
    ],
    outputs="text",
    title="ASR Africa",
    description="This space serves as a realtime demo for automatic speech recognition models developed by Mak-CAD under the auspicies of Gates Foundation for 18 African languages using open source data.\
    \nWe would appreciate your feedback on these models, you can share your feedback via this form https://forms.gle/RbzpwBFbC6Lcx5V78 :)"
)

asr_app.launch()