ales commited on
Commit
6b93fd2
·
1 Parent(s): 51f7123

changed audio loading

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -2,7 +2,7 @@ from pprint import pformat
2
 
3
  from huggingface_hub import hf_hub_download
4
 
5
- import datasets as hfd
6
 
7
  import gradio as gr
8
 
@@ -11,15 +11,12 @@ from pipeline import PreTrainedPipeline
11
 
12
  HF_HUB_URL = 'ales/wav2vec2-cv-be'
13
  LM_HUB_FP = 'language_model/cv8be_5gram.bin'
 
14
 
15
 
16
  def main(audio_fp: str):
17
- # read and preprocess audio with huggingface.datasets
18
- ds = hfd.Dataset.from_dict({'path': [audio_fp]})
19
- ds = ds.cast_column('path', hfd.Audio(sampling_rate=16_000, mono=True))
20
- ds = ds.rename_column('path', 'audio')
21
- inputs = ds[0]['audio']['array']
22
- sampling_rate = ds[0]['audio']['sampling_rate']
23
 
24
  # download Language Model from HF Hub
25
  lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
@@ -34,7 +31,7 @@ def main(audio_fp: str):
34
  # add technical information to the output
35
  tech_data = pipeline_res
36
  del tech_data['text']
37
- tech_data['sampling_rate_orig'] = sampling_rate
38
  tech_data['inputs_shape'] = inputs.shape
39
  tech_data['inputs_max'] = inputs.max().item()
40
  tech_data['inputs_min'] = inputs.min().item()
@@ -43,6 +40,7 @@ def main(audio_fp: str):
43
 
44
  return text, tech_data_str
45
 
 
46
  article = """
47
  The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)
48
 
@@ -61,8 +59,8 @@ iface = gr.Interface(
61
  ],
62
  title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
63
  description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
64
- 'Акустычная мадэль + моўная мадэль.'
65
- ),
66
  article=article,
67
  )
68
 
 
2
 
3
  from huggingface_hub import hf_hub_download
4
 
5
+ import librosa
6
 
7
  import gradio as gr
8
 
 
11
 
12
  HF_HUB_URL = 'ales/wav2vec2-cv-be'
13
  LM_HUB_FP = 'language_model/cv8be_5gram.bin'
14
+ MODEL_SAMPLING_RATE = 16_000 # 16kHz
15
 
16
 
17
  def main(audio_fp: str):
18
+ # read audio file
19
+ inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0]
 
 
 
 
20
 
21
  # download Language Model from HF Hub
22
  lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
 
31
  # add technical information to the output
32
  tech_data = pipeline_res
33
  del tech_data['text']
34
+ tech_data['model_sampling_rate'] = MODEL_SAMPLING_RATE
35
  tech_data['inputs_shape'] = inputs.shape
36
  tech_data['inputs_max'] = inputs.max().item()
37
  tech_data['inputs_min'] = inputs.min().item()
 
40
 
41
  return text, tech_data_str
42
 
43
+
44
  article = """
45
  The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)
46
 
 
59
  ],
60
  title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
61
  description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
62
+ 'Акустычная мадэль + моўная мадэль.'
63
+ ),
64
  article=article,
65
  )
66