Spaces:
Runtime error
Runtime error
changed audio loading
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ from pprint import pformat
|
|
2 |
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
-
import
|
6 |
|
7 |
import gradio as gr
|
8 |
|
@@ -11,15 +11,12 @@ from pipeline import PreTrainedPipeline
|
|
11 |
|
12 |
HF_HUB_URL = 'ales/wav2vec2-cv-be'
|
13 |
LM_HUB_FP = 'language_model/cv8be_5gram.bin'
|
|
|
14 |
|
15 |
|
16 |
def main(audio_fp: str):
|
17 |
-
# read
|
18 |
-
|
19 |
-
ds = ds.cast_column('path', hfd.Audio(sampling_rate=16_000, mono=True))
|
20 |
-
ds = ds.rename_column('path', 'audio')
|
21 |
-
inputs = ds[0]['audio']['array']
|
22 |
-
sampling_rate = ds[0]['audio']['sampling_rate']
|
23 |
|
24 |
# download Language Model from HF Hub
|
25 |
lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
|
@@ -34,7 +31,7 @@ def main(audio_fp: str):
|
|
34 |
# add technical information to the output
|
35 |
tech_data = pipeline_res
|
36 |
del tech_data['text']
|
37 |
-
tech_data['
|
38 |
tech_data['inputs_shape'] = inputs.shape
|
39 |
tech_data['inputs_max'] = inputs.max().item()
|
40 |
tech_data['inputs_min'] = inputs.min().item()
|
@@ -43,6 +40,7 @@ def main(audio_fp: str):
|
|
43 |
|
44 |
return text, tech_data_str
|
45 |
|
|
|
46 |
article = """
|
47 |
The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)
|
48 |
|
@@ -61,8 +59,8 @@ iface = gr.Interface(
|
|
61 |
],
|
62 |
title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
|
63 |
description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
|
64 |
-
|
65 |
-
|
66 |
article=article,
|
67 |
)
|
68 |
|
|
|
2 |
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
+
import librosa
|
6 |
|
7 |
import gradio as gr
|
8 |
|
|
|
11 |
|
12 |
HF_HUB_URL = 'ales/wav2vec2-cv-be'
|
13 |
LM_HUB_FP = 'language_model/cv8be_5gram.bin'
|
14 |
+
MODEL_SAMPLING_RATE = 16_000 # 16kHz
|
15 |
|
16 |
|
17 |
def main(audio_fp: str):
|
18 |
+
# read audio file
|
19 |
+
inputs = librosa.load(audio_fp, sr=MODEL_SAMPLING_RATE, mono=True)[0]
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# download Language Model from HF Hub
|
22 |
lm_fp = hf_hub_download(repo_id=HF_HUB_URL, filename=LM_HUB_FP)
|
|
|
31 |
# add technical information to the output
|
32 |
tech_data = pipeline_res
|
33 |
del tech_data['text']
|
34 |
+
tech_data['model_sampling_rate'] = MODEL_SAMPLING_RATE
|
35 |
tech_data['inputs_shape'] = inputs.shape
|
36 |
tech_data['inputs_max'] = inputs.max().item()
|
37 |
tech_data['inputs_min'] = inputs.min().item()
|
|
|
40 |
|
41 |
return text, tech_data_str
|
42 |
|
43 |
+
|
44 |
article = """
|
45 |
The model used can be found here: [ales/wav2vec2-cv-be](https://huggingface.co/ales/wav2vec2-cv-be)
|
46 |
|
|
|
59 |
],
|
60 |
title='wav2vec2 fine-tuned on CommonVoice 8 Be + Language Model',
|
61 |
description=('Мадэль распазнаваньня беларускага маўленьня, навучаная на датсэце Common Voice 8.\n'
|
62 |
+
'Акустычная мадэль + моўная мадэль.'
|
63 |
+
),
|
64 |
article=article,
|
65 |
)
|
66 |
|