Spaces:
Sleeping
Sleeping
Commit
·
a7fc99e
1
Parent(s):
2ab57bd
Updated app.py: created mappings for model selection
Browse files
app.py
CHANGED
@@ -2,15 +2,26 @@ import gradio as gr
|
|
2 |
from transformers import pipeline, Wav2Vec2ProcessorWithLM
|
3 |
import os
|
4 |
|
5 |
-
def transcribe(audio,
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# load processor
|
8 |
-
p = Wav2Vec2ProcessorWithLM.from_pretrained(
|
9 |
# load eval pipeline
|
10 |
-
asr = pipeline("automatic-speech-recognition", model=
|
11 |
else:
|
12 |
# load eval pipeline
|
13 |
-
asr = pipeline("automatic-speech-recognition", model=
|
14 |
|
15 |
text = asr(audio)["text"]
|
16 |
return text
|
@@ -21,23 +32,26 @@ asr_app = gr.Interface(
|
|
21 |
gr.Audio(sources=["upload", "microphone"], type="filepath"),
|
22 |
gr.Dropdown(
|
23 |
[
|
24 |
-
"
|
25 |
-
"
|
26 |
-
"
|
27 |
]
|
28 |
),
|
29 |
-
gr.Radio(["
|
30 |
],
|
31 |
examples=[
|
32 |
-
["./examples/CV/hausa/common_voice_ha_32885169.wav", "
|
33 |
-
["./examples/CV/hausa/
|
34 |
-
["./examples/CV/hausa/
|
35 |
-
["./examples/CV/
|
36 |
-
["./examples/CV/igbo/
|
37 |
-
["./examples/CV/igbo/
|
38 |
-
["./examples/CV/
|
39 |
-
["./examples/CV/
|
40 |
-
["./examples/CV/yoruba/
|
|
|
|
|
|
|
41 |
],
|
42 |
outputs="text",
|
43 |
title="NaijaVoices ASR",
|
|
|
2 |
from transformers import pipeline, Wav2Vec2ProcessorWithLM
|
3 |
import os
|
4 |
|
5 |
+
def transcribe(audio, language, model):
|
6 |
+
model_map = {
|
7 |
+
"hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
|
8 |
+
"igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
|
9 |
+
"yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
|
10 |
+
}
|
11 |
+
|
12 |
+
revison_map = {
|
13 |
+
"w/ LM": "lm",
|
14 |
+
"w/o LM": "main",
|
15 |
+
}
|
16 |
+
|
17 |
+
if revison_map[model] != "main":
|
18 |
# load processor
|
19 |
+
p = Wav2Vec2ProcessorWithLM.from_pretrained(model_map[language], revision=revison_map[model])
|
20 |
# load eval pipeline
|
21 |
+
asr = pipeline("automatic-speech-recognition", model=model_map[language], tokenizer=p.tokenizer, feature_extractor=p.feature_extractor, decoder=p.decoder, token=os.getenv('HF_TOKEN'))
|
22 |
else:
|
23 |
# load eval pipeline
|
24 |
+
asr = pipeline("automatic-speech-recognition", model=model_map[language], token=os.getenv('HF_TOKEN'))
|
25 |
|
26 |
text = asr(audio)["text"]
|
27 |
return text
|
|
|
32 |
gr.Audio(sources=["upload", "microphone"], type="filepath"),
|
33 |
gr.Dropdown(
|
34 |
[
|
35 |
+
"hausa",
|
36 |
+
"igbo",
|
37 |
+
"yoruba"
|
38 |
]
|
39 |
),
|
40 |
+
gr.Radio(["w/o LM","w/ LM"])
|
41 |
],
|
42 |
examples=[
|
43 |
+
["./examples/CV/hausa/common_voice_ha_32885169.wav", "hausa", "w/o LM"],
|
44 |
+
["./examples/CV/hausa/common_voice_ha_32885169.wav", "hausa", "w/ LM"],
|
45 |
+
["./examples/CV/hausa/common_voice_ha_29417456.wav", "hausa", "w/o LM"],
|
46 |
+
["./examples/CV/hausa/common_voice_ha_29417456.wav", "hausa", "w/ LM"],
|
47 |
+
["./examples/CV/igbo/common_voice_ig_31594237.wav", "igbo", "w/o LM"],
|
48 |
+
["./examples/CV/igbo/common_voice_ig_31594237.wav", "igbo", "w/ LM"],
|
49 |
+
["./examples/CV/igbo/common_voice_ig_30710992.wav", "igbo", "w/o LM"],
|
50 |
+
["./examples/CV/igbo/common_voice_ig_30710992.wav", "igbo", "w/ LM"],
|
51 |
+
["./examples/CV/yoruba/common_voice_yo_36914062.wav", "yoruba", "w/o LM"],
|
52 |
+
["./examples/CV/yoruba/common_voice_yo_36914062.wav", "yoruba", "w/ LM"],
|
53 |
+
["./examples/CV/yoruba/common_voice_yo_36841367.wav", "yoruba", "w/o LM"],
|
54 |
+
["./examples/CV/yoruba/common_voice_yo_36841367.wav", "yoruba", "w/ LM"]
|
55 |
],
|
56 |
outputs="text",
|
57 |
title="NaijaVoices ASR",
|