speechbrain
/

asr-wav2vec2-librispeech

Automatic Speech Recognition

hf-asr-leaderboard

Model card Files Files and versions

Titouan commited on Jun 5, 2022

Commit

e1fee45

·

1 Parent(s): b02bcdd

init

Files changed (4) hide show

asr.ckpt +3 -0
hyperparams.yaml +67 -0
tokenizer.ckpt +3 -0
wav2vec2.ckpt +3 -0

asr.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d859bb4fa9e63d6645c8f4693437cf09896133a95f036d53d48d1a61ea443d7e
+size 8526821

hyperparams.yaml ADDED Viewed

	@@ -0,0 +1,67 @@

+# ################################
+# Model: wav2vec2 + DNN + CTC
+# Augmentation: SpecAugment
+# Authors: Sung-Lin Yeh 2021, Titouan Parcollet 2022
+# ################################
+# URL for the biggest Fairseq english wav2vec2 model.
+wav2vec2_hub: facebook/wav2vec2-large-960h-lv60-self
+sample_rate: 16000
+# Model parameters
+activation: !name:torch.nn.LeakyReLU
+dnn_layers: 2
+dnn_neurons: 1024
+freeze_wav2vec: True
+# Outputs
+output_neurons: 31  # BPE size, index(blank/eos/bos) = 0
+# Decoding parameters
+blank_index: 0
+bos_index: 1
+eos_index: 2
+enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
+   input_shape: [null, null, 1024]
+   activation: !ref <activation>
+   dnn_blocks: !ref <dnn_layers>
+   dnn_neurons: !ref <dnn_neurons>
+wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
+   source: !ref <wav2vec2_hub>
+   output_norm: True
+   freeze: True
+   save_path: model_checkpoints
+ctc_lin: !new:speechbrain.nnet.linear.Linear
+   input_size: !ref <dnn_neurons>
+   n_neurons: !ref <output_neurons>
+log_softmax: !new:speechbrain.nnet.activations.Softmax
+   apply_log: True
+ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
+   blank_index: !ref <blank_index>
+asr_model: !new:torch.nn.ModuleList
+    - [!ref <enc>, !ref <ctc_lin>]
+tokenizer: !new:speechbrain.dataio.encoder.CTCTextEncoder
+encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
+    wav2vec2: !ref <wav2vec2>
+    enc: !ref <enc>
+    ctc_lin: !ref <ctc_lin>
+decoding_function: !name:speechbrain.decoders.ctc_greedy_decode
+    blank_id: !ref <blank_index>
+modules:
+   encoder: !ref <encoder>
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+   loadables:
+      wav2vec2: !ref <wav2vec2>
+      asr: !ref <asr_model>
+      tokenizer: !ref <tokenizer>

tokenizer.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28129425176b22cdc33e21f8b2edfdffc74b41ff441e54ec6daa8d5d739ab320
+size 426

wav2vec2.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4c9a695eee6c61eb6c0452d693db18e682642042525a88ca80430673a90abc8
+size 1261920693