Spaces:

ales
/

wav2vec2-cv-be-lm

Runtime error

App Files Files Community

ales commited on Apr 14, 2022

Commit

6b93fd2

1 Parent(s): 51f7123

changed audio loading

Browse files

Files changed (1) hide show

app.py +8 -10

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ from pprint import pformat
 from huggingface_hub import hf_hub_download
-import datasets as hfd
 import gradio as gr
@@ -11,15 +11,12 @@ from pipeline import PreTrainedPipeline
 HF_HUB_URL = 'ales/wav2vec2-cv-be'
 LM_HUB_FP = 'language_model/cv8be_5gram.bin'
 def main(audio_fp: str):
-    # read and preprocess audio with huggingface.datasets
-    ds = hfd.Dataset.from_dict({'path': [audio_fp]})
-    ds = ds.cast_column('path', hfd.Audio(sampling_rate=16_000, mono=True))
-    ds = ds.rename_column('path', 'audio')
-    inputs = ds[0]['audio']['array']
-    sampling_rate = ds[0]['audio']['sampling_rate']
     # download Language Model from HF Hub
     lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
@@ -34,7 +31,7 @@ def main(audio_fp: str):
     # add technical information to the output
     tech_data = pipeline_res
     del tech_data['text']
-    tech_data['sampling_rate_orig'] = sampling_rate
     tech_data['inputs_shape'] = inputs.shape
     tech_data['inputs_max'] = inputs.max().item()
     tech_data['inputs_min'] = inputs.min().item()
@@ -43,6 +40,7 @@ def main(audio_fp: str):
     return text, tech_data_str
 article = """
 The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)
@@ -61,8 +59,8 @@ iface = gr.Interface(
     ],
     title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
     description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
-                'Акустычная мадэль + моўная мадэль.'
-    ),
     article=article,
 )

 from huggingface_hub import hf_hub_download
+import librosa
 import gradio as gr
 HF_HUB_URL = 'ales/wav2vec2-cv-be'
 LM_HUB_FP = 'language_model/cv8be_5gram.bin'
+MODEL_SAMPLING_RATE = 16_000  # 16kHz
 def main(audio_fp: str):
+    # read audio file
+    inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0]
     # download Language Model from HF Hub
     lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
     # add technical information to the output
     tech_data = pipeline_res
     del tech_data['text']
+    tech_data['model_sampling_rate'] = MODEL_SAMPLING_RATE
     tech_data['inputs_shape'] = inputs.shape
     tech_data['inputs_max'] = inputs.max().item()
     tech_data['inputs_min'] = inputs.min().item()
     return text, tech_data_str
 article = """
 The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)
     ],
     title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
     description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
+                 'Акустычная мадэль + моўная мадэль.'
+                 ),
     article=article,
 )