Alvin-Nahabwe commited on
Commit
a7fc99e
·
1 Parent(s): 2ab57bd

Updated app.py: created mappings for model selection

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -2,15 +2,26 @@ import gradio as gr
2
  from transformers import pipeline, Wav2Vec2ProcessorWithLM
3
  import os
4
 
5
- def transcribe(audio, model_id, model_revison):
6
- if model_revison != "main":
 
 
 
 
 
 
 
 
 
 
 
7
  # load processor
8
- p = Wav2Vec2ProcessorWithLM.from_pretrained(model_id, revision=model_revison)
9
  # load eval pipeline
10
- asr = pipeline("automatic-speech-recognition", model=model_id, tokenizer=p.tokenizer, feature_extractor=p.feature_extractor, decoder=p.decoder, token=os.getenv('HF_TOKEN'))
11
  else:
12
  # load eval pipeline
13
- asr = pipeline("automatic-speech-recognition", model=model_id, token=os.getenv('HF_TOKEN'))
14
 
15
  text = asr(audio)["text"]
16
  return text
@@ -21,23 +32,26 @@ asr_app = gr.Interface(
21
  gr.Audio(sources=["upload", "microphone"], type="filepath"),
22
  gr.Dropdown(
23
  [
24
- "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
25
- "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
26
- "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0"
27
  ]
28
  ),
29
- gr.Radio(["main","lm"])
30
  ],
31
  examples=[
32
- ["./examples/CV/hausa/common_voice_ha_32885169.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0", "lm"],
33
- ["./examples/CV/hausa/common_voice_ha_29417456.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0", "lm"],
34
- ["./examples/CV/hausa/common_voice_ha_28554827.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0", "main"],
35
- ["./examples/CV/igbo/common_voice_ig_31594237.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0", "lm"],
36
- ["./examples/CV/igbo/common_voice_ig_30710992.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0", "lm"],
37
- ["./examples/CV/igbo/common_voice_ig_30692048.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0", "main"],
38
- ["./examples/CV/yoruba/common_voice_yo_36914062.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0", "lm"],
39
- ["./examples/CV/yoruba/common_voice_yo_36841367.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0", "lm"],
40
- ["./examples/CV/yoruba/common_voice_yo_36526475.wav", "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0", "main"]
 
 
 
41
  ],
42
  outputs="text",
43
  title="NaijaVoices ASR",
 
2
  from transformers import pipeline, Wav2Vec2ProcessorWithLM
3
  import os
4
 
5
+ def transcribe(audio, language, model):
6
+ model_map = {
7
+ "hausa": "asr-africa/wav2vec2-xls-r-1b-naijavoices-hausa-500hr-v0",
8
+ "igbo": "asr-africa/wav2vec2-xls-r-1b-naijavoices-igbo-500hr-v0",
9
+ "yoruba": "asr-africa/wav2vec2-xls-r-1b-naijavoices-yoruba-500hr-v0",
10
+ }
11
+
12
+ revison_map = {
13
+ "w/ LM": "lm",
14
+ "w/o LM": "main",
15
+ }
16
+
17
+ if revison_map[model] != "main":
18
  # load processor
19
+ p = Wav2Vec2ProcessorWithLM.from_pretrained(model_map[language], revision=revison_map[model])
20
  # load eval pipeline
21
+ asr = pipeline("automatic-speech-recognition", model=model_map[language], tokenizer=p.tokenizer, feature_extractor=p.feature_extractor, decoder=p.decoder, token=os.getenv('HF_TOKEN'))
22
  else:
23
  # load eval pipeline
24
+ asr = pipeline("automatic-speech-recognition", model=model_map[language], token=os.getenv('HF_TOKEN'))
25
 
26
  text = asr(audio)["text"]
27
  return text
 
32
  gr.Audio(sources=["upload", "microphone"], type="filepath"),
33
  gr.Dropdown(
34
  [
35
+ "hausa",
36
+ "igbo",
37
+ "yoruba"
38
  ]
39
  ),
40
+ gr.Radio(["w/o LM","w/ LM"])
41
  ],
42
  examples=[
43
+ ["./examples/CV/hausa/common_voice_ha_32885169.wav", "hausa", "w/o LM"],
44
+ ["./examples/CV/hausa/common_voice_ha_32885169.wav", "hausa", "w/ LM"],
45
+ ["./examples/CV/hausa/common_voice_ha_29417456.wav", "hausa", "w/o LM"],
46
+ ["./examples/CV/hausa/common_voice_ha_29417456.wav", "hausa", "w/ LM"],
47
+ ["./examples/CV/igbo/common_voice_ig_31594237.wav", "igbo", "w/o LM"],
48
+ ["./examples/CV/igbo/common_voice_ig_31594237.wav", "igbo", "w/ LM"],
49
+ ["./examples/CV/igbo/common_voice_ig_30710992.wav", "igbo", "w/o LM"],
50
+ ["./examples/CV/igbo/common_voice_ig_30710992.wav", "igbo", "w/ LM"],
51
+ ["./examples/CV/yoruba/common_voice_yo_36914062.wav", "yoruba", "w/o LM"],
52
+ ["./examples/CV/yoruba/common_voice_yo_36914062.wav", "yoruba", "w/ LM"],
53
+ ["./examples/CV/yoruba/common_voice_yo_36841367.wav", "yoruba", "w/o LM"],
54
+ ["./examples/CV/yoruba/common_voice_yo_36841367.wav", "yoruba", "w/ LM"]
55
  ],
56
  outputs="text",
57
  title="NaijaVoices ASR",