File size: 2,664 Bytes
a355d31
f58319d
b89d1a0
a355d31
86031f7
a7fc99e
 
 
 
86031f7
 
 
660e348
86031f7
 
 
 
 
 
 
 
 
 
 
a7fc99e
 
86031f7
f58319d
a355d31
 
 
 
 
780f607
a355d31
 
 
 
a7fc99e
 
86031f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a355d31
b89d1a0
2ab57bd
a355d31
86031f7
 
a355d31
 
2ab57bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
from transformers import pipeline
import os

def transcribe(audio, language):
    model_map = {
        "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
        "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
        "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
        "zulu": "asr-africa/W2V2-Bert_nchlt_speech_corpus_Fleurs_ZULU_63hr_v1",
        "xhosa": "asr-africa/wav2vec2_xls_r_300m_nchlt_speech_corpus_Fleurs_XHOSA_63hr_v1",
        "afrikaans": "asr-africa/mms-1B_all_nchlt_speech_corpus_Fleurs_CV_AFRIKAANS_57hr_v1",
        "bemba": "asr-africa/w2v-bert-2.0-BIG_C-AMMI-BEMBA_SPEECH_CORPUS-BEMBA-189hrs-V1",
        "shona": "asr-africa/W2V2_Bert_Afrivoice_FLEURS_Shona_100hr_v1",
        "luganda": "asr-africa/whisper-small-CV-Fleurs-lg-313hrs-v1",
        "swahili": "asr-africa/wav2vec2-xls-r-300m-CV_Fleurs_AMMI_ALFFA-sw-400hrs-v1",
        "lingala": "asr-africa/wav2vec2-xls-r-300m-Fleurs_AMMI_AFRIVOICE_LRSC-ln-109hrs-v2",
        "amharic": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-amh-200hrs-v1",
        "kinyarwanda": "asr-africa/facebook-mms-1b-all-common_voice_fleurs-rw-100hrs-v1",
        "oromo": "asr-africa/mms-1b-all-Sagalee-orm-85hrs-4",
        "akan": "asr-africa/wav2vec2-xls-r-ewe-100-hours",
        "ewe": "asr-africa/wav2vec2-xls-r-akan-100-hours",
        "wolof": "asr-africa/w2v2-bert-Wolof-20-hours-Google-Fleurs-ALF-dataset",
        "bambara": "asr-africa/mms-bambara-50-hours-mixed-bambara-dataset",
    }

    # load eval pipeline
    asr = pipeline("automatic-speech-recognition", model=model_map[language], device=0, token=os.getenv('HF_TOKEN'))

    text = asr(audio)["text"]
    return text

asr_app = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources=["upload", "microphone"], type="filepath"),
        gr.Dropdown(
            [
                "hausa",
                "igbo",
                "yoruba",
                "zulu",
                "xhosa",
                "afrikaans",
                "bemba",
                "shona",
                "luganda",
                "swahili",
                "lingala",
                "amharic",
                "kinyarwanda",
                "oromo",
                "akan",
                "ewe",
                "wolof",
                "bambara",
            ]
        ),
    ],
    outputs="text",
    title="ASR Africa",
    description="This space serves as a realtime demo for automatic speech recognition models developed by Mak-CAD under the auspicies of Gates Foundation for 19 African languages using open source data.",
)

asr_app.launch()