csikasote commited on
Commit
294a0c7
·
verified ·
1 Parent(s): 910e50d

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base-960h
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +19,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # wav2vec2-base-librispeech-model
18
 
19
- This model is a fine-tuned version of [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.8515
22
- - Wer: 0.7236
23
 
24
  ## Model description
25
 
 
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base-960h
5
  tags:
6
+ - automatic-speech-recognition
7
+ - libri10h
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # wav2vec2-base-librispeech-model
21
 
22
+ This model is a fine-tuned version of [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) on the LIBRI10H - ENG dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.8514
25
+ - Wer: 0.7245
26
 
27
  ## Model description
28
 
adapter.eng.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bb6010e8765394d31c9279a44c0f53a4e7d36c9187477109fa5148df2be0975
3
- size 38108196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5227271458e2148b60995be53604c884de9775ac1d560989c01006974d075768
3
+ size 19101500
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 0.5515139698982239,
4
- "eval_runtime": 155.9781,
5
  "eval_samples": 2604,
6
- "eval_samples_per_second": 16.695,
7
- "eval_steps_per_second": 4.174,
8
- "eval_wer": 0.4640569678163663,
9
- "total_flos": 3.512425533037396e+19,
10
- "train_loss": 0.26323221057394275,
11
- "train_runtime": 51849.7109,
12
  "train_samples": 2759,
13
- "train_samples_per_second": 5.321,
14
- "train_steps_per_second": 0.665
15
  }
 
1
  {
2
+ "epoch": 99.4231884057971,
3
+ "eval_loss": 0.8513913154602051,
4
+ "eval_runtime": 128.9183,
5
  "eval_samples": 2604,
6
+ "eval_samples_per_second": 20.199,
7
+ "eval_steps_per_second": 2.529,
8
+ "eval_wer": 0.7245295779130365,
9
+ "total_flos": 3.3406327721118188e+19,
10
+ "train_loss": 1.118092892668968,
11
+ "train_runtime": 45286.7863,
12
  "train_samples": 2759,
13
+ "train_samples_per_second": 6.092,
14
+ "train_steps_per_second": 0.38
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 0.5515139698982239,
4
- "eval_runtime": 155.9781,
5
  "eval_samples": 2604,
6
- "eval_samples_per_second": 16.695,
7
- "eval_steps_per_second": 4.174,
8
- "eval_wer": 0.4640569678163663
9
  }
 
1
  {
2
+ "epoch": 99.4231884057971,
3
+ "eval_loss": 0.8513913154602051,
4
+ "eval_runtime": 128.9183,
5
  "eval_samples": 2604,
6
+ "eval_samples_per_second": 20.199,
7
+ "eval_steps_per_second": 2.529,
8
+ "eval_wer": 0.7245295779130365
9
  }
runs/Mar09_21-57-37_srvrocgpu011.uct.ac.za/events.out.tfevents.1741595873.srvrocgpu011.uct.ac.za.3447107.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b617634a3dc30711bad72bf419557dfd9628b12843f221a6d328064b73bbe268
3
+ size 412
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 100.0,
3
- "total_flos": 3.512425533037396e+19,
4
- "train_loss": 0.26323221057394275,
5
- "train_runtime": 51849.7109,
6
  "train_samples": 2759,
7
- "train_samples_per_second": 5.321,
8
- "train_steps_per_second": 0.665
9
  }
 
1
  {
2
+ "epoch": 99.4231884057971,
3
+ "total_flos": 3.3406327721118188e+19,
4
+ "train_loss": 1.118092892668968,
5
+ "train_runtime": 45286.7863,
6
  "train_samples": 2759,
7
+ "train_samples_per_second": 6.092,
8
+ "train_steps_per_second": 0.38
9
  }
trainer_state.json CHANGED
@@ -1,1132 +1,1404 @@
1
  {
2
- "best_metric": 0.5514756441116333,
3
- "best_model_checkpoint": "/scratch/skscla001/speech/results/wav2vec2-base-librispeech-model/checkpoint-5500",
4
- "epoch": 100.0,
5
- "eval_steps": 500,
6
- "global_step": 34500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.4492753623188406,
13
- "grad_norm": 0.6375983953475952,
14
- "learning_rate": 0.00029939999999999996,
15
- "loss": 3.544,
16
- "step": 500
17
  },
18
  {
19
- "epoch": 1.4492753623188406,
20
- "eval_loss": 1.7567861080169678,
21
- "eval_runtime": 155.5701,
22
- "eval_samples_per_second": 16.738,
23
- "eval_steps_per_second": 4.185,
24
- "eval_wer": 0.9819787564148467,
25
- "step": 500
26
  },
27
  {
28
- "epoch": 2.898550724637681,
29
- "grad_norm": 0.6045961976051331,
30
- "learning_rate": 0.00029559705882352936,
31
- "loss": 1.42,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "step": 1000
33
  },
34
  {
35
- "epoch": 2.898550724637681,
36
- "eval_loss": 1.027501106262207,
37
- "eval_runtime": 153.8637,
38
- "eval_samples_per_second": 16.924,
39
- "eval_steps_per_second": 4.231,
40
- "eval_wer": 0.8167641325536062,
41
  "step": 1000
42
  },
43
  {
44
- "epoch": 4.3478260869565215,
45
- "grad_norm": 0.6760497689247131,
46
- "learning_rate": 0.000291185294117647,
47
- "loss": 1.0403,
48
- "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  },
50
  {
51
- "epoch": 4.3478260869565215,
52
- "eval_loss": 0.8305047154426575,
53
- "eval_runtime": 154.0362,
54
- "eval_samples_per_second": 16.905,
55
- "eval_steps_per_second": 4.226,
56
- "eval_wer": 0.717329036877909,
57
- "step": 1500
58
  },
59
  {
60
- "epoch": 5.797101449275362,
61
- "grad_norm": 0.8079296946525574,
62
- "learning_rate": 0.0002867735294117647,
63
- "loss": 0.8574,
 
 
 
 
 
 
 
 
 
64
  "step": 2000
65
  },
66
  {
67
- "epoch": 5.797101449275362,
68
- "eval_loss": 0.729262113571167,
69
- "eval_runtime": 154.0605,
70
- "eval_samples_per_second": 16.902,
71
- "eval_steps_per_second": 4.226,
72
- "eval_wer": 0.6648565859092175,
73
  "step": 2000
74
  },
75
  {
76
- "epoch": 7.246376811594203,
77
- "grad_norm": 0.7156503200531006,
78
- "learning_rate": 0.00028236176470588235,
79
- "loss": 0.7315,
80
- "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  },
82
  {
83
- "epoch": 7.246376811594203,
84
- "eval_loss": 0.6631811857223511,
85
- "eval_runtime": 154.2602,
86
- "eval_samples_per_second": 16.881,
87
- "eval_steps_per_second": 4.22,
88
- "eval_wer": 0.6025380912598958,
89
- "step": 2500
90
  },
91
  {
92
- "epoch": 8.695652173913043,
93
- "grad_norm": 0.9667319655418396,
94
- "learning_rate": 0.00027794999999999995,
95
- "loss": 0.6389,
 
 
 
 
 
 
 
 
 
96
  "step": 3000
97
  },
98
  {
99
- "epoch": 8.695652173913043,
100
- "eval_loss": 0.6286384463310242,
101
- "eval_runtime": 154.4422,
102
- "eval_samples_per_second": 16.861,
103
- "eval_steps_per_second": 4.215,
104
- "eval_wer": 0.5695389266817839,
105
  "step": 3000
106
  },
107
  {
108
- "epoch": 10.144927536231885,
109
- "grad_norm": 1.996129035949707,
110
- "learning_rate": 0.0002735382352941176,
111
- "loss": 0.5679,
112
- "step": 3500
113
  },
114
  {
115
- "epoch": 10.144927536231885,
116
- "eval_loss": 0.6101946830749512,
117
- "eval_runtime": 154.3916,
118
- "eval_samples_per_second": 16.866,
119
- "eval_steps_per_second": 4.217,
120
- "eval_wer": 0.548911962445797,
121
- "step": 3500
122
  },
123
  {
124
- "epoch": 11.594202898550725,
125
- "grad_norm": 0.8161708116531372,
126
- "learning_rate": 0.0002691264705882353,
127
- "loss": 0.5085,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  "step": 4000
129
  },
130
  {
131
- "epoch": 11.594202898550725,
132
- "eval_loss": 0.5863102078437805,
133
- "eval_runtime": 155.3264,
134
- "eval_samples_per_second": 16.765,
135
- "eval_steps_per_second": 4.191,
136
- "eval_wer": 0.5215419501133787,
137
  "step": 4000
138
  },
139
  {
140
- "epoch": 13.043478260869565,
141
- "grad_norm": 0.7935850024223328,
142
- "learning_rate": 0.00026471470588235294,
143
- "loss": 0.4579,
144
- "step": 4500
 
 
 
 
 
 
 
 
 
145
  },
146
  {
147
- "epoch": 13.043478260869565,
148
- "eval_loss": 0.5660598874092102,
149
- "eval_runtime": 155.0665,
150
- "eval_samples_per_second": 16.793,
151
- "eval_steps_per_second": 4.198,
152
- "eval_wer": 0.4933365158929069,
153
- "step": 4500
154
  },
155
  {
156
- "epoch": 14.492753623188406,
157
- "grad_norm": 0.871003270149231,
158
- "learning_rate": 0.00026030294117647054,
159
- "loss": 0.4097,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  "step": 5000
161
  },
162
  {
163
- "epoch": 14.492753623188406,
164
- "eval_loss": 0.5645673871040344,
165
- "eval_runtime": 155.2693,
166
- "eval_samples_per_second": 16.771,
167
- "eval_steps_per_second": 4.193,
168
- "eval_wer": 0.48229701237220035,
169
  "step": 5000
170
  },
171
  {
172
- "epoch": 15.942028985507246,
173
- "grad_norm": 0.7843791246414185,
174
- "learning_rate": 0.0002558911764705882,
175
- "loss": 0.382,
176
- "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  },
178
  {
179
- "epoch": 15.942028985507246,
180
- "eval_loss": 0.5514756441116333,
181
- "eval_runtime": 155.0315,
182
- "eval_samples_per_second": 16.797,
183
- "eval_steps_per_second": 4.199,
184
- "eval_wer": 0.46441500576838923,
185
- "step": 5500
186
  },
187
  {
188
- "epoch": 17.391304347826086,
189
- "grad_norm": 0.7231972217559814,
190
- "learning_rate": 0.00025147941176470586,
191
- "loss": 0.3426,
192
  "step": 6000
193
  },
194
  {
195
- "epoch": 17.391304347826086,
196
- "eval_loss": 0.5585278868675232,
197
- "eval_runtime": 155.3086,
198
- "eval_samples_per_second": 16.767,
199
- "eval_steps_per_second": 4.192,
200
- "eval_wer": 0.4513864025142221,
201
  "step": 6000
202
  },
203
  {
204
- "epoch": 18.840579710144926,
205
- "grad_norm": 0.8039381504058838,
206
- "learning_rate": 0.0002470676470588235,
207
- "loss": 0.32,
208
- "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  },
210
  {
211
- "epoch": 18.840579710144926,
212
- "eval_loss": 0.5598042607307434,
213
- "eval_runtime": 155.6698,
214
- "eval_samples_per_second": 16.728,
215
- "eval_steps_per_second": 4.182,
216
- "eval_wer": 0.4475076580339738,
217
- "step": 6500
218
  },
219
  {
220
- "epoch": 20.28985507246377,
221
- "grad_norm": 2.395310640335083,
222
- "learning_rate": 0.00024265588235294113,
223
- "loss": 0.2926,
224
  "step": 7000
225
  },
226
  {
227
- "epoch": 20.28985507246377,
228
- "eval_loss": 0.6245768666267395,
229
- "eval_runtime": 155.7542,
230
- "eval_samples_per_second": 16.719,
231
- "eval_steps_per_second": 4.18,
232
- "eval_wer": 0.4587062895333572,
233
  "step": 7000
234
  },
235
  {
236
- "epoch": 21.73913043478261,
237
- "grad_norm": 0.9574357867240906,
238
- "learning_rate": 0.00023824411764705882,
239
- "loss": 0.2735,
240
- "step": 7500
241
  },
242
  {
243
- "epoch": 21.73913043478261,
244
- "eval_loss": 0.5887025594711304,
245
- "eval_runtime": 156.166,
246
- "eval_samples_per_second": 16.675,
247
- "eval_steps_per_second": 4.169,
248
- "eval_wer": 0.44390738751641007,
249
- "step": 7500
250
  },
251
  {
252
- "epoch": 23.18840579710145,
253
- "grad_norm": 0.852486252784729,
254
- "learning_rate": 0.00023383235294117643,
255
- "loss": 0.257,
256
- "step": 8000
257
  },
258
  {
259
- "epoch": 23.18840579710145,
260
- "eval_loss": 0.5977800488471985,
261
- "eval_runtime": 155.6723,
262
- "eval_samples_per_second": 16.727,
263
- "eval_steps_per_second": 4.182,
264
- "eval_wer": 0.4355133866412062,
265
- "step": 8000
266
  },
267
  {
268
- "epoch": 24.63768115942029,
269
- "grad_norm": 0.9156618118286133,
270
- "learning_rate": 0.0002294205882352941,
271
- "loss": 0.2409,
272
- "step": 8500
273
  },
274
  {
275
- "epoch": 24.63768115942029,
276
- "eval_loss": 0.5721296072006226,
277
- "eval_runtime": 155.7717,
278
- "eval_samples_per_second": 16.717,
279
- "eval_steps_per_second": 4.179,
280
- "eval_wer": 0.4214703425229741,
281
- "step": 8500
282
  },
283
  {
284
- "epoch": 26.08695652173913,
285
- "grad_norm": 0.67136549949646,
286
- "learning_rate": 0.00022500882352941172,
287
- "loss": 0.2246,
288
- "step": 9000
289
  },
290
  {
291
- "epoch": 26.08695652173913,
292
- "eval_loss": 0.5978689789772034,
293
- "eval_runtime": 156.0083,
294
- "eval_samples_per_second": 16.691,
295
- "eval_steps_per_second": 4.173,
296
- "eval_wer": 0.4186856028961292,
297
- "step": 9000
298
  },
299
  {
300
- "epoch": 27.536231884057973,
301
- "grad_norm": 0.8318625092506409,
302
- "learning_rate": 0.00022059705882352938,
303
- "loss": 0.2139,
304
- "step": 9500
305
  },
306
  {
307
- "epoch": 27.536231884057973,
308
- "eval_loss": 0.6102660298347473,
309
- "eval_runtime": 155.8743,
310
- "eval_samples_per_second": 16.706,
311
- "eval_steps_per_second": 4.176,
312
- "eval_wer": 0.4145482754505311,
313
- "step": 9500
314
  },
315
  {
316
- "epoch": 28.985507246376812,
317
- "grad_norm": 0.7386115193367004,
318
- "learning_rate": 0.00021618529411764702,
319
- "loss": 0.2014,
320
- "step": 10000
321
  },
322
  {
323
- "epoch": 28.985507246376812,
324
- "eval_loss": 0.643622100353241,
325
- "eval_runtime": 156.0699,
326
- "eval_samples_per_second": 16.685,
327
- "eval_steps_per_second": 4.171,
328
- "eval_wer": 0.41568206229860366,
329
- "step": 10000
330
  },
331
  {
332
- "epoch": 30.434782608695652,
333
- "grad_norm": Infinity,
334
- "learning_rate": 0.00021179117647058822,
335
- "loss": 0.1917,
336
- "step": 10500
337
  },
338
  {
339
- "epoch": 30.434782608695652,
340
- "eval_loss": 0.6471384763717651,
341
- "eval_runtime": 156.2304,
342
- "eval_samples_per_second": 16.668,
343
- "eval_steps_per_second": 4.167,
344
- "eval_wer": 0.41878505788280224,
345
- "step": 10500
346
  },
347
  {
348
- "epoch": 31.884057971014492,
349
- "grad_norm": 0.8037901520729065,
350
- "learning_rate": 0.00020737941176470585,
351
- "loss": 0.184,
352
- "step": 11000
353
  },
354
  {
355
- "epoch": 31.884057971014492,
356
- "eval_loss": 0.6409507989883423,
357
- "eval_runtime": 156.4464,
358
- "eval_samples_per_second": 16.645,
359
- "eval_steps_per_second": 4.161,
360
- "eval_wer": 0.4067510044953654,
361
- "step": 11000
362
  },
363
  {
364
- "epoch": 33.333333333333336,
365
- "grad_norm": 0.8534353375434875,
366
- "learning_rate": 0.0002029676470588235,
367
- "loss": 0.1752,
368
- "step": 11500
369
  },
370
  {
371
- "epoch": 33.333333333333336,
372
- "eval_loss": 0.6426236629486084,
373
- "eval_runtime": 156.2362,
374
- "eval_samples_per_second": 16.667,
375
- "eval_steps_per_second": 4.167,
376
- "eval_wer": 0.408640649242153,
377
- "step": 11500
378
  },
379
  {
380
- "epoch": 34.78260869565217,
381
- "grad_norm": 0.8873021006584167,
382
- "learning_rate": 0.00019855588235294114,
383
- "loss": 0.169,
384
- "step": 12000
385
  },
386
  {
387
- "epoch": 34.78260869565217,
388
- "eval_loss": 0.6633431315422058,
389
- "eval_runtime": 160.0974,
390
- "eval_samples_per_second": 16.265,
391
- "eval_steps_per_second": 4.066,
392
- "eval_wer": 0.40253411306042886,
393
- "step": 12000
394
  },
395
  {
396
- "epoch": 36.231884057971016,
397
- "grad_norm": 0.6732174754142761,
398
- "learning_rate": 0.0001941441176470588,
399
- "loss": 0.1612,
400
- "step": 12500
401
  },
402
  {
403
- "epoch": 36.231884057971016,
404
- "eval_loss": 0.6465741395950317,
405
- "eval_runtime": 156.2372,
406
- "eval_samples_per_second": 16.667,
407
- "eval_steps_per_second": 4.167,
408
- "eval_wer": 0.396765723833393,
409
- "step": 12500
410
  },
411
  {
412
- "epoch": 37.68115942028985,
413
- "grad_norm": 0.7956398725509644,
414
- "learning_rate": 0.00018973235294117647,
415
- "loss": 0.1553,
416
- "step": 13000
417
  },
418
  {
419
- "epoch": 37.68115942028985,
420
- "eval_loss": 0.6572560667991638,
421
- "eval_runtime": 156.6011,
422
- "eval_samples_per_second": 16.628,
423
- "eval_steps_per_second": 4.157,
424
- "eval_wer": 0.39410033019055574,
425
- "step": 13000
426
  },
427
  {
428
- "epoch": 39.130434782608695,
429
- "grad_norm": 0.8513861298561096,
430
- "learning_rate": 0.0001853205882352941,
431
- "loss": 0.15,
432
- "step": 13500
433
  },
434
  {
435
- "epoch": 39.130434782608695,
436
- "eval_loss": 0.6988933086395264,
437
- "eval_runtime": 156.3407,
438
- "eval_samples_per_second": 16.656,
439
- "eval_steps_per_second": 4.164,
440
- "eval_wer": 0.39557226399331663,
441
- "step": 13500
442
  },
443
  {
444
- "epoch": 40.57971014492754,
445
- "grad_norm": 1.6812894344329834,
446
- "learning_rate": 0.00018090882352941176,
447
- "loss": 0.1442,
448
- "step": 14000
449
  },
450
  {
451
- "epoch": 40.57971014492754,
452
- "eval_loss": 0.7209036946296692,
453
- "eval_runtime": 156.2269,
454
- "eval_samples_per_second": 16.668,
455
- "eval_steps_per_second": 4.167,
456
- "eval_wer": 0.40621394756733104,
457
- "step": 14000
458
  },
459
  {
460
- "epoch": 42.028985507246375,
461
- "grad_norm": 0.685930609703064,
462
- "learning_rate": 0.0001764970588235294,
463
- "loss": 0.1409,
464
- "step": 14500
465
  },
466
  {
467
- "epoch": 42.028985507246375,
468
- "eval_loss": 0.6950096487998962,
469
- "eval_runtime": 156.3243,
470
- "eval_samples_per_second": 16.658,
471
- "eval_steps_per_second": 4.164,
472
- "eval_wer": 0.3960894299240164,
473
- "step": 14500
474
  },
475
  {
476
- "epoch": 43.47826086956522,
477
- "grad_norm": 0.5274556279182434,
478
- "learning_rate": 0.0001720941176470588,
479
- "loss": 0.1356,
480
- "step": 15000
481
  },
482
  {
483
- "epoch": 43.47826086956522,
484
- "eval_loss": 0.6815584897994995,
485
- "eval_runtime": 160.3066,
486
- "eval_samples_per_second": 16.244,
487
- "eval_steps_per_second": 4.061,
488
- "eval_wer": 0.38626327724072085,
489
- "step": 15000
490
  },
491
  {
492
- "epoch": 44.927536231884055,
493
- "grad_norm": 0.735133945941925,
494
- "learning_rate": 0.00016768235294117647,
495
- "loss": 0.134,
496
- "step": 15500
497
  },
498
  {
499
- "epoch": 44.927536231884055,
500
- "eval_loss": 0.6895952820777893,
501
- "eval_runtime": 156.4083,
502
- "eval_samples_per_second": 16.649,
503
- "eval_steps_per_second": 4.162,
504
- "eval_wer": 0.386661097187413,
505
- "step": 15500
506
  },
507
  {
508
- "epoch": 46.3768115942029,
509
- "grad_norm": 0.8698641657829285,
510
- "learning_rate": 0.0001632705882352941,
511
- "loss": 0.1288,
512
- "step": 16000
513
  },
514
  {
515
- "epoch": 46.3768115942029,
516
- "eval_loss": 0.7073222994804382,
517
- "eval_runtime": 156.7831,
518
- "eval_samples_per_second": 16.609,
519
- "eval_steps_per_second": 4.152,
520
- "eval_wer": 0.3843537414965986,
521
- "step": 16000
522
  },
523
  {
524
- "epoch": 47.82608695652174,
525
- "grad_norm": 0.7717955708503723,
526
- "learning_rate": 0.00015885882352941177,
527
- "loss": 0.1263,
528
- "step": 16500
529
  },
530
  {
531
- "epoch": 47.82608695652174,
532
- "eval_loss": 0.7207434177398682,
533
- "eval_runtime": 156.8893,
534
- "eval_samples_per_second": 16.598,
535
- "eval_steps_per_second": 4.149,
536
- "eval_wer": 0.3835978835978836,
537
- "step": 16500
538
  },
539
  {
540
- "epoch": 49.27536231884058,
541
- "grad_norm": 1.0890934467315674,
542
- "learning_rate": 0.0001544647058823529,
543
- "loss": 0.1218,
544
- "step": 17000
545
  },
546
  {
547
- "epoch": 49.27536231884058,
548
- "eval_loss": 0.7430319786071777,
549
- "eval_runtime": 156.7354,
550
- "eval_samples_per_second": 16.614,
551
- "eval_steps_per_second": 4.153,
552
- "eval_wer": 0.3811711819230616,
553
- "step": 17000
554
  },
555
  {
556
- "epoch": 50.72463768115942,
557
- "grad_norm": 1.435145616531372,
558
- "learning_rate": 0.0001500529411764706,
559
- "loss": 0.1217,
560
- "step": 17500
561
  },
562
  {
563
- "epoch": 50.72463768115942,
564
- "eval_loss": 0.7588245868682861,
565
- "eval_runtime": 156.9854,
566
- "eval_samples_per_second": 16.588,
567
- "eval_steps_per_second": 4.147,
568
- "eval_wer": 0.38306082666984925,
569
- "step": 17500
570
  },
571
  {
572
- "epoch": 52.17391304347826,
573
- "grad_norm": 0.6202664971351624,
574
- "learning_rate": 0.0001456411764705882,
575
- "loss": 0.1183,
576
- "step": 18000
577
  },
578
  {
579
- "epoch": 52.17391304347826,
580
- "eval_loss": 0.7478466629981995,
581
- "eval_runtime": 156.6121,
582
- "eval_samples_per_second": 16.627,
583
- "eval_steps_per_second": 4.157,
584
- "eval_wer": 0.3813303099017385,
585
- "step": 18000
586
  },
587
  {
588
- "epoch": 53.6231884057971,
589
- "grad_norm": 0.7649337649345398,
590
- "learning_rate": 0.00014122941176470587,
591
- "loss": 0.113,
592
- "step": 18500
593
  },
594
  {
595
- "epoch": 53.6231884057971,
596
- "eval_loss": 0.7268975973129272,
597
- "eval_runtime": 156.8146,
598
- "eval_samples_per_second": 16.606,
599
- "eval_steps_per_second": 4.151,
600
- "eval_wer": 0.377869276365517,
601
- "step": 18500
602
  },
603
  {
604
- "epoch": 55.072463768115945,
605
- "grad_norm": 0.6549494862556458,
606
- "learning_rate": 0.0001368176470588235,
607
- "loss": 0.1109,
608
- "step": 19000
609
  },
610
  {
611
- "epoch": 55.072463768115945,
612
- "eval_loss": 0.7117257118225098,
613
- "eval_runtime": 156.5809,
614
- "eval_samples_per_second": 16.63,
615
- "eval_steps_per_second": 4.158,
616
- "eval_wer": 0.373473365954569,
617
- "step": 19000
618
  },
619
  {
620
- "epoch": 56.52173913043478,
621
- "grad_norm": 0.4416871666908264,
622
- "learning_rate": 0.00013241470588235292,
623
- "loss": 0.1102,
624
- "step": 19500
625
  },
626
  {
627
- "epoch": 56.52173913043478,
628
- "eval_loss": 0.7531840205192566,
629
- "eval_runtime": 156.1999,
630
- "eval_samples_per_second": 16.671,
631
- "eval_steps_per_second": 4.168,
632
- "eval_wer": 0.3689183275649441,
633
- "step": 19500
634
  },
635
  {
636
- "epoch": 57.971014492753625,
637
- "grad_norm": 0.8305051922798157,
638
- "learning_rate": 0.00012800294117647058,
639
- "loss": 0.1084,
640
- "step": 20000
641
  },
642
  {
643
- "epoch": 57.971014492753625,
644
- "eval_loss": 0.7607569694519043,
645
- "eval_runtime": 156.7941,
646
- "eval_samples_per_second": 16.608,
647
- "eval_steps_per_second": 4.152,
648
- "eval_wer": 0.3703504793730358,
649
- "step": 20000
650
  },
651
  {
652
- "epoch": 59.42028985507246,
653
- "grad_norm": 0.7849389314651489,
654
- "learning_rate": 0.00012359117647058824,
655
- "loss": 0.1042,
656
- "step": 20500
657
  },
658
  {
659
- "epoch": 59.42028985507246,
660
- "eval_loss": 0.7571032643318176,
661
- "eval_runtime": 156.51,
662
- "eval_samples_per_second": 16.638,
663
- "eval_steps_per_second": 4.159,
664
- "eval_wer": 0.3676651947328639,
665
- "step": 20500
666
  },
667
  {
668
- "epoch": 60.869565217391305,
669
- "grad_norm": 2.074193239212036,
670
- "learning_rate": 0.00011917941176470587,
671
- "loss": 0.1048,
672
- "step": 21000
673
  },
674
  {
675
- "epoch": 60.869565217391305,
676
- "eval_loss": 0.7744612097740173,
677
- "eval_runtime": 156.269,
678
- "eval_samples_per_second": 16.664,
679
- "eval_steps_per_second": 4.166,
680
- "eval_wer": 0.3682619246529021,
681
- "step": 21000
682
  },
683
  {
684
- "epoch": 62.31884057971015,
685
- "grad_norm": 0.620770275592804,
686
- "learning_rate": 0.00011476764705882352,
687
- "loss": 0.1005,
688
- "step": 21500
689
  },
690
  {
691
- "epoch": 62.31884057971015,
692
- "eval_loss": 0.784517228603363,
693
- "eval_runtime": 156.5747,
694
- "eval_samples_per_second": 16.631,
695
- "eval_steps_per_second": 4.158,
696
- "eval_wer": 0.37122568325575844,
697
- "step": 21500
698
  },
699
  {
700
- "epoch": 63.768115942028984,
701
- "grad_norm": 0.6090012192726135,
702
- "learning_rate": 0.00011035588235294117,
703
- "loss": 0.1006,
704
- "step": 22000
705
  },
706
  {
707
- "epoch": 63.768115942028984,
708
- "eval_loss": 0.7632699608802795,
709
- "eval_runtime": 156.8255,
710
- "eval_samples_per_second": 16.604,
711
- "eval_steps_per_second": 4.151,
712
- "eval_wer": 0.3664319528981183,
713
- "step": 22000
714
  },
715
  {
716
- "epoch": 65.21739130434783,
717
- "grad_norm": 0.7670443058013916,
718
- "learning_rate": 0.00010594411764705882,
719
- "loss": 0.0976,
720
- "step": 22500
721
  },
722
  {
723
- "epoch": 65.21739130434783,
724
- "eval_loss": 0.772113561630249,
725
- "eval_runtime": 156.8861,
726
- "eval_samples_per_second": 16.598,
727
- "eval_steps_per_second": 4.15,
728
- "eval_wer": 0.3638859052392887,
729
- "step": 22500
730
  },
731
  {
732
- "epoch": 66.66666666666667,
733
- "grad_norm": 0.7425007224082947,
734
- "learning_rate": 0.00010153235294117646,
735
- "loss": 0.096,
736
- "step": 23000
737
  },
738
  {
739
- "epoch": 66.66666666666667,
740
- "eval_loss": 0.7658870220184326,
741
- "eval_runtime": 156.557,
742
- "eval_samples_per_second": 16.633,
743
- "eval_steps_per_second": 4.158,
744
- "eval_wer": 0.36432350718065004,
745
- "step": 23000
746
  },
747
  {
748
- "epoch": 68.1159420289855,
749
- "grad_norm": 0.6578115820884705,
750
- "learning_rate": 9.712058823529411e-05,
751
- "loss": 0.0938,
752
- "step": 23500
753
  },
754
  {
755
- "epoch": 68.1159420289855,
756
- "eval_loss": 0.7658408284187317,
757
- "eval_runtime": 156.5578,
758
- "eval_samples_per_second": 16.633,
759
- "eval_steps_per_second": 4.158,
760
- "eval_wer": 0.3620161514898357,
761
- "step": 23500
762
  },
763
  {
764
- "epoch": 69.56521739130434,
765
- "grad_norm": 0.7457234859466553,
766
- "learning_rate": 9.271764705882353e-05,
767
- "loss": 0.0933,
768
- "step": 24000
769
  },
770
  {
771
- "epoch": 69.56521739130434,
772
- "eval_loss": 0.7692342400550842,
773
- "eval_runtime": 156.8993,
774
- "eval_samples_per_second": 16.597,
775
- "eval_steps_per_second": 4.149,
776
- "eval_wer": 0.35789871504157217,
777
- "step": 24000
778
  },
779
  {
780
- "epoch": 71.01449275362319,
781
- "grad_norm": 0.44514575600624084,
782
- "learning_rate": 8.830588235294118e-05,
783
- "loss": 0.092,
784
- "step": 24500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
785
  },
786
  {
787
- "epoch": 71.01449275362319,
788
- "eval_loss": 0.7784613966941833,
789
- "eval_runtime": 156.7679,
790
- "eval_samples_per_second": 16.611,
791
- "eval_steps_per_second": 4.153,
792
- "eval_wer": 0.36245375343119707,
793
- "step": 24500
794
  },
795
  {
796
- "epoch": 72.46376811594203,
797
- "grad_norm": 0.6682944893836975,
798
- "learning_rate": 8.389411764705882e-05,
799
- "loss": 0.089,
800
- "step": 25000
 
 
801
  },
802
  {
803
- "epoch": 72.46376811594203,
804
- "eval_loss": 0.7845346331596375,
805
- "eval_runtime": 156.6256,
806
- "eval_samples_per_second": 16.626,
807
- "eval_steps_per_second": 4.156,
808
- "eval_wer": 0.36149898555913595,
809
- "step": 25000
810
  },
811
  {
812
- "epoch": 73.91304347826087,
813
- "grad_norm": 0.6694007515907288,
814
- "learning_rate": 7.948235294117647e-05,
815
- "loss": 0.088,
816
- "step": 25500
 
 
817
  },
818
  {
819
- "epoch": 73.91304347826087,
820
- "eval_loss": 0.7972577214241028,
821
- "eval_runtime": 156.8625,
822
- "eval_samples_per_second": 16.601,
823
- "eval_steps_per_second": 4.15,
824
- "eval_wer": 0.3585948999482834,
825
- "step": 25500
826
  },
827
  {
828
- "epoch": 75.3623188405797,
829
- "grad_norm": 0.6369543671607971,
830
- "learning_rate": 7.507941176470587e-05,
831
- "loss": 0.0862,
832
- "step": 26000
 
 
833
  },
834
  {
835
- "epoch": 75.3623188405797,
836
- "eval_loss": 0.7805651426315308,
837
- "eval_runtime": 160.3239,
838
- "eval_samples_per_second": 16.242,
839
- "eval_steps_per_second": 4.061,
840
- "eval_wer": 0.3575804590842185,
841
- "step": 26000
 
 
 
 
 
 
 
842
  },
843
  {
844
- "epoch": 76.81159420289855,
845
- "grad_norm": 0.6956751346588135,
846
- "learning_rate": 7.066764705882352e-05,
847
- "loss": 0.0851,
848
- "step": 26500
849
  },
850
  {
851
- "epoch": 76.81159420289855,
852
- "eval_loss": 0.7946861982345581,
853
- "eval_runtime": 156.9716,
854
- "eval_samples_per_second": 16.589,
855
- "eval_steps_per_second": 4.147,
856
- "eval_wer": 0.3582567529935951,
857
- "step": 26500
858
  },
859
  {
860
- "epoch": 78.26086956521739,
861
- "grad_norm": 0.6056320667266846,
862
- "learning_rate": 6.625588235294117e-05,
863
- "loss": 0.0846,
864
- "step": 27000
865
  },
866
  {
867
- "epoch": 78.26086956521739,
868
- "eval_loss": 0.7801975607872009,
869
- "eval_runtime": 156.8934,
870
- "eval_samples_per_second": 16.597,
871
- "eval_steps_per_second": 4.149,
872
- "eval_wer": 0.3526276007479015,
873
- "step": 27000
874
  },
875
  {
876
- "epoch": 79.71014492753623,
877
- "grad_norm": 0.7737133502960205,
878
- "learning_rate": 6.184411764705882e-05,
879
- "loss": 0.0809,
880
- "step": 27500
881
  },
882
  {
883
- "epoch": 79.71014492753623,
884
- "eval_loss": 0.8093453049659729,
885
- "eval_runtime": 156.9304,
886
- "eval_samples_per_second": 16.593,
887
- "eval_steps_per_second": 4.148,
888
- "eval_wer": 0.3532243306679397,
889
- "step": 27500
890
  },
891
  {
892
- "epoch": 81.15942028985508,
893
- "grad_norm": 0.4759688377380371,
894
- "learning_rate": 5.7449999999999994e-05,
895
- "loss": 0.0813,
896
- "step": 28000
897
  },
898
  {
899
- "epoch": 81.15942028985508,
900
- "eval_loss": 0.8237009644508362,
901
- "eval_runtime": 156.9785,
902
- "eval_samples_per_second": 16.588,
903
- "eval_steps_per_second": 4.147,
904
- "eval_wer": 0.3571627481401918,
905
- "step": 28000
906
  },
907
  {
908
- "epoch": 82.6086956521739,
909
- "grad_norm": 0.6076968908309937,
910
- "learning_rate": 5.303823529411764e-05,
911
- "loss": 0.0785,
912
- "step": 28500
913
  },
914
  {
915
- "epoch": 82.6086956521739,
916
- "eval_loss": 0.8130338191986084,
917
- "eval_runtime": 157.2925,
918
- "eval_samples_per_second": 16.555,
919
- "eval_steps_per_second": 4.139,
920
- "eval_wer": 0.35330389465727813,
921
- "step": 28500
922
  },
923
  {
924
- "epoch": 84.05797101449275,
925
- "grad_norm": 0.452317476272583,
926
- "learning_rate": 4.862647058823529e-05,
927
- "loss": 0.0799,
928
- "step": 29000
929
  },
930
  {
931
- "epoch": 84.05797101449275,
932
- "eval_loss": 0.7957727909088135,
933
- "eval_runtime": 157.045,
934
- "eval_samples_per_second": 16.581,
935
- "eval_steps_per_second": 4.145,
936
- "eval_wer": 0.3510959939531368,
937
- "step": 29000
938
  },
939
  {
940
- "epoch": 85.5072463768116,
941
- "grad_norm": 0.48164471983909607,
942
- "learning_rate": 4.4214705882352936e-05,
943
- "loss": 0.0784,
944
- "step": 29500
945
  },
946
  {
947
- "epoch": 85.5072463768116,
948
- "eval_loss": 0.8108227252960205,
949
- "eval_runtime": 157.2491,
950
- "eval_samples_per_second": 16.56,
951
- "eval_steps_per_second": 4.14,
952
- "eval_wer": 0.35065839201177545,
953
- "step": 29500
954
  },
955
  {
956
- "epoch": 86.95652173913044,
957
- "grad_norm": 0.46430978178977966,
958
- "learning_rate": 3.9802941176470584e-05,
959
- "loss": 0.0767,
960
- "step": 30000
961
  },
962
  {
963
- "epoch": 86.95652173913044,
964
- "eval_loss": 0.8208354711532593,
965
- "eval_runtime": 157.1037,
966
- "eval_samples_per_second": 16.575,
967
- "eval_steps_per_second": 4.144,
968
- "eval_wer": 0.3510959939531368,
969
- "step": 30000
970
  },
971
  {
972
- "epoch": 88.40579710144928,
973
- "grad_norm": 0.678175687789917,
974
- "learning_rate": 3.539117647058823e-05,
975
- "loss": 0.0742,
976
- "step": 30500
977
  },
978
  {
979
- "epoch": 88.40579710144928,
980
- "eval_loss": 0.8270174860954285,
981
- "eval_runtime": 157.1113,
982
- "eval_samples_per_second": 16.574,
983
- "eval_steps_per_second": 4.144,
984
- "eval_wer": 0.3500815530890719,
985
- "step": 30500
986
  },
987
  {
988
- "epoch": 89.85507246376811,
989
- "grad_norm": 1.0150262117385864,
990
- "learning_rate": 3.097941176470588e-05,
991
- "loss": 0.0746,
992
- "step": 31000
993
  },
994
  {
995
- "epoch": 89.85507246376811,
996
- "eval_loss": 0.8121225237846375,
997
- "eval_runtime": 157.194,
998
- "eval_samples_per_second": 16.566,
999
- "eval_steps_per_second": 4.141,
1000
- "eval_wer": 0.3458646616541353,
1001
- "step": 31000
1002
  },
1003
  {
1004
- "epoch": 91.30434782608695,
1005
- "grad_norm": 0.6040120124816895,
1006
- "learning_rate": 2.6567647058823526e-05,
1007
- "loss": 0.073,
1008
- "step": 31500
1009
  },
1010
  {
1011
- "epoch": 91.30434782608695,
1012
- "eval_loss": 0.8151441812515259,
1013
- "eval_runtime": 157.4683,
1014
- "eval_samples_per_second": 16.537,
1015
- "eval_steps_per_second": 4.134,
1016
- "eval_wer": 0.34849027330230337,
1017
- "step": 31500
1018
  },
1019
  {
1020
- "epoch": 92.7536231884058,
1021
- "grad_norm": 0.4965975284576416,
1022
- "learning_rate": 2.2155882352941177e-05,
1023
- "loss": 0.0725,
1024
- "step": 32000
1025
- },
1026
- {
1027
- "epoch": 92.7536231884058,
1028
- "eval_loss": 0.8264575600624084,
1029
- "eval_runtime": 157.3691,
1030
- "eval_samples_per_second": 16.547,
1031
- "eval_steps_per_second": 4.137,
1032
- "eval_wer": 0.34773441540358835,
1033
- "step": 32000
1034
- },
1035
- {
1036
- "epoch": 94.20289855072464,
1037
- "grad_norm": 0.4107760190963745,
1038
- "learning_rate": 1.7752941176470586e-05,
1039
- "loss": 0.0717,
1040
- "step": 32500
1041
- },
1042
- {
1043
- "epoch": 94.20289855072464,
1044
- "eval_loss": 0.817254900932312,
1045
- "eval_runtime": 157.3782,
1046
- "eval_samples_per_second": 16.546,
1047
- "eval_steps_per_second": 4.137,
1048
- "eval_wer": 0.3445916378247205,
1049
- "step": 32500
1050
- },
1051
- {
1052
- "epoch": 95.65217391304348,
1053
- "grad_norm": 0.5178669691085815,
1054
- "learning_rate": 1.3341176470588234e-05,
1055
- "loss": 0.0709,
1056
- "step": 33000
1057
- },
1058
- {
1059
- "epoch": 95.65217391304348,
1060
- "eval_loss": 0.8134782314300537,
1061
- "eval_runtime": 157.5041,
1062
- "eval_samples_per_second": 16.533,
1063
- "eval_steps_per_second": 4.133,
1064
- "eval_wer": 0.34341806898197874,
1065
- "step": 33000
1066
- },
1067
- {
1068
- "epoch": 97.10144927536231,
1069
- "grad_norm": 0.4780011773109436,
1070
- "learning_rate": 8.929411764705881e-06,
1071
- "loss": 0.0704,
1072
- "step": 33500
1073
- },
1074
- {
1075
- "epoch": 97.10144927536231,
1076
- "eval_loss": 0.817874550819397,
1077
- "eval_runtime": 157.0668,
1078
- "eval_samples_per_second": 16.579,
1079
- "eval_steps_per_second": 4.145,
1080
- "eval_wer": 0.3431197040219597,
1081
- "step": 33500
1082
- },
1083
- {
1084
- "epoch": 98.55072463768116,
1085
- "grad_norm": 0.44165438413619995,
1086
- "learning_rate": 4.5176470588235295e-06,
1087
- "loss": 0.0699,
1088
- "step": 34000
1089
- },
1090
- {
1091
- "epoch": 98.55072463768116,
1092
- "eval_loss": 0.8134194612503052,
1093
- "eval_runtime": 157.034,
1094
- "eval_samples_per_second": 16.582,
1095
- "eval_steps_per_second": 4.146,
1096
- "eval_wer": 0.3426622110832637,
1097
- "step": 34000
1098
- },
1099
- {
1100
- "epoch": 100.0,
1101
- "grad_norm": 1.7415516376495361,
1102
- "learning_rate": 1.1470588235294118e-07,
1103
- "loss": 0.0691,
1104
- "step": 34500
1105
- },
1106
- {
1107
- "epoch": 100.0,
1108
- "eval_loss": 0.8155142664909363,
1109
- "eval_runtime": 157.1295,
1110
- "eval_samples_per_second": 16.572,
1111
- "eval_steps_per_second": 4.143,
1112
- "eval_wer": 0.34280144806460594,
1113
- "step": 34500
1114
- },
1115
- {
1116
- "epoch": 100.0,
1117
- "step": 34500,
1118
- "total_flos": 3.512425533037396e+19,
1119
- "train_loss": 0.26323221057394275,
1120
- "train_runtime": 51849.7109,
1121
- "train_samples_per_second": 5.321,
1122
- "train_steps_per_second": 0.665
1123
  }
1124
  ],
1125
- "logging_steps": 500,
1126
- "max_steps": 34500,
1127
  "num_input_tokens_seen": 0,
1128
  "num_train_epochs": 100,
1129
- "save_steps": 500,
1130
  "stateful_callbacks": {
1131
  "TrainerControl": {
1132
  "args": {
@@ -1139,8 +1411,8 @@
1139
  "attributes": {}
1140
  }
1141
  },
1142
- "total_flos": 3.512425533037396e+19,
1143
- "train_batch_size": 4,
1144
  "trial_name": null,
1145
  "trial_params": null
1146
  }
 
1
  {
2
+ "best_metric": 0.8512468338012695,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/wav2vec2-base-librispeech-model/checkpoint-16800",
4
+ "epoch": 99.4231884057971,
5
+ "eval_steps": 200,
6
+ "global_step": 17200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.1565217391304348,
13
+ "grad_norm": 1.2893730401992798,
14
+ "learning_rate": 0.00029699999999999996,
15
+ "loss": 4.7426,
16
+ "step": 200
17
  },
18
  {
19
+ "epoch": 1.1565217391304348,
20
+ "eval_loss": 2.8968212604522705,
21
+ "eval_runtime": 126.7808,
22
+ "eval_samples_per_second": 20.539,
23
+ "eval_steps_per_second": 2.571,
24
+ "eval_wer": 1.0,
25
+ "step": 200
26
  },
27
  {
28
+ "epoch": 2.3130434782608695,
29
+ "grad_norm": 0.3036455512046814,
30
+ "learning_rate": 0.0002965058823529411,
31
+ "loss": 2.7493,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 2.3130434782608695,
36
+ "eval_loss": 2.26717472076416,
37
+ "eval_runtime": 126.2768,
38
+ "eval_samples_per_second": 20.621,
39
+ "eval_steps_per_second": 2.582,
40
+ "eval_wer": 0.9989457771412659,
41
+ "step": 400
42
+ },
43
+ {
44
+ "epoch": 3.4695652173913043,
45
+ "grad_norm": 0.44915950298309326,
46
+ "learning_rate": 0.0002929764705882353,
47
+ "loss": 2.0156,
48
+ "step": 600
49
+ },
50
+ {
51
+ "epoch": 3.4695652173913043,
52
+ "eval_loss": 1.6933485269546509,
53
+ "eval_runtime": 126.1204,
54
+ "eval_samples_per_second": 20.647,
55
+ "eval_steps_per_second": 2.585,
56
+ "eval_wer": 0.9759716752197956,
57
+ "step": 600
58
+ },
59
+ {
60
+ "epoch": 4.626086956521739,
61
+ "grad_norm": 0.3140698969364166,
62
+ "learning_rate": 0.0002894470588235294,
63
+ "loss": 1.7839,
64
+ "step": 800
65
+ },
66
+ {
67
+ "epoch": 4.626086956521739,
68
+ "eval_loss": 1.5435950756072998,
69
+ "eval_runtime": 126.0194,
70
+ "eval_samples_per_second": 20.663,
71
+ "eval_steps_per_second": 2.587,
72
+ "eval_wer": 0.960735171261487,
73
+ "step": 800
74
+ },
75
+ {
76
+ "epoch": 5.782608695652174,
77
+ "grad_norm": 0.40944600105285645,
78
+ "learning_rate": 0.0002859176470588235,
79
+ "loss": 1.6691,
80
  "step": 1000
81
  },
82
  {
83
+ "epoch": 5.782608695652174,
84
+ "eval_loss": 1.4462971687316895,
85
+ "eval_runtime": 130.6554,
86
+ "eval_samples_per_second": 19.93,
87
+ "eval_steps_per_second": 2.495,
88
+ "eval_wer": 0.9393921311214545,
89
  "step": 1000
90
  },
91
  {
92
+ "epoch": 6.939130434782609,
93
+ "grad_norm": 0.39339712262153625,
94
+ "learning_rate": 0.00028238823529411764,
95
+ "loss": 1.592,
96
+ "step": 1200
97
+ },
98
+ {
99
+ "epoch": 6.939130434782609,
100
+ "eval_loss": 1.3824982643127441,
101
+ "eval_runtime": 125.9506,
102
+ "eval_samples_per_second": 20.675,
103
+ "eval_steps_per_second": 2.588,
104
+ "eval_wer": 0.9289493575207861,
105
+ "step": 1200
106
+ },
107
+ {
108
+ "epoch": 8.092753623188406,
109
+ "grad_norm": 0.4750897288322449,
110
+ "learning_rate": 0.00027885882352941176,
111
+ "loss": 1.5384,
112
+ "step": 1400
113
+ },
114
+ {
115
+ "epoch": 8.092753623188406,
116
+ "eval_loss": 1.3446310758590698,
117
+ "eval_runtime": 126.0297,
118
+ "eval_samples_per_second": 20.662,
119
+ "eval_steps_per_second": 2.587,
120
+ "eval_wer": 0.9162986832159764,
121
+ "step": 1400
122
+ },
123
+ {
124
+ "epoch": 9.24927536231884,
125
+ "grad_norm": 0.9390007257461548,
126
+ "learning_rate": 0.0002753294117647059,
127
+ "loss": 1.4929,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "epoch": 9.24927536231884,
132
+ "eval_loss": 1.317173957824707,
133
+ "eval_runtime": 126.3023,
134
+ "eval_samples_per_second": 20.617,
135
+ "eval_steps_per_second": 2.581,
136
+ "eval_wer": 0.9207144846242591,
137
+ "step": 1600
138
  },
139
  {
140
+ "epoch": 10.405797101449275,
141
+ "grad_norm": 0.3777698576450348,
142
+ "learning_rate": 0.0002718,
143
+ "loss": 1.4563,
144
+ "step": 1800
 
 
145
  },
146
  {
147
+ "epoch": 10.405797101449275,
148
+ "eval_loss": 1.2747116088867188,
149
+ "eval_runtime": 130.3532,
150
+ "eval_samples_per_second": 19.976,
151
+ "eval_steps_per_second": 2.501,
152
+ "eval_wer": 0.907427298404742,
153
+ "step": 1800
154
+ },
155
+ {
156
+ "epoch": 11.56231884057971,
157
+ "grad_norm": 0.4892979562282562,
158
+ "learning_rate": 0.00026827058823529406,
159
+ "loss": 1.4278,
160
  "step": 2000
161
  },
162
  {
163
+ "epoch": 11.56231884057971,
164
+ "eval_loss": 1.2533023357391357,
165
+ "eval_runtime": 126.6214,
166
+ "eval_samples_per_second": 20.565,
167
+ "eval_steps_per_second": 2.575,
168
+ "eval_wer": 0.9113657158769941,
169
  "step": 2000
170
  },
171
  {
172
+ "epoch": 12.718840579710145,
173
+ "grad_norm": 0.4739660620689392,
174
+ "learning_rate": 0.00026474117647058823,
175
+ "loss": 1.3945,
176
+ "step": 2200
177
+ },
178
+ {
179
+ "epoch": 12.718840579710145,
180
+ "eval_loss": 1.2253831624984741,
181
+ "eval_runtime": 126.6581,
182
+ "eval_samples_per_second": 20.559,
183
+ "eval_steps_per_second": 2.574,
184
+ "eval_wer": 0.8960297569320126,
185
+ "step": 2200
186
+ },
187
+ {
188
+ "epoch": 13.87536231884058,
189
+ "grad_norm": 0.4378398060798645,
190
+ "learning_rate": 0.00026121176470588235,
191
+ "loss": 1.3772,
192
+ "step": 2400
193
+ },
194
+ {
195
+ "epoch": 13.87536231884058,
196
+ "eval_loss": 1.2075964212417603,
197
+ "eval_runtime": 126.6197,
198
+ "eval_samples_per_second": 20.566,
199
+ "eval_steps_per_second": 2.575,
200
+ "eval_wer": 0.9005450133269682,
201
+ "step": 2400
202
+ },
203
+ {
204
+ "epoch": 15.028985507246377,
205
+ "grad_norm": 0.5404449105262756,
206
+ "learning_rate": 0.00025768235294117646,
207
+ "loss": 1.3473,
208
+ "step": 2600
209
+ },
210
+ {
211
+ "epoch": 15.028985507246377,
212
+ "eval_loss": 1.1940252780914307,
213
+ "eval_runtime": 126.7107,
214
+ "eval_samples_per_second": 20.551,
215
+ "eval_steps_per_second": 2.573,
216
+ "eval_wer": 0.8836973385845567,
217
+ "step": 2600
218
  },
219
  {
220
+ "epoch": 16.18550724637681,
221
+ "grad_norm": 0.4465963840484619,
222
+ "learning_rate": 0.0002541529411764706,
223
+ "loss": 1.3281,
224
+ "step": 2800
 
 
225
  },
226
  {
227
+ "epoch": 16.18550724637681,
228
+ "eval_loss": 1.1721361875534058,
229
+ "eval_runtime": 126.7359,
230
+ "eval_samples_per_second": 20.547,
231
+ "eval_steps_per_second": 2.572,
232
+ "eval_wer": 0.8849305804193022,
233
+ "step": 2800
234
+ },
235
+ {
236
+ "epoch": 17.342028985507245,
237
+ "grad_norm": 0.36305734515190125,
238
+ "learning_rate": 0.00025062352941176465,
239
+ "loss": 1.3126,
240
  "step": 3000
241
  },
242
  {
243
+ "epoch": 17.342028985507245,
244
+ "eval_loss": 1.1684266328811646,
245
+ "eval_runtime": 126.6868,
246
+ "eval_samples_per_second": 20.555,
247
+ "eval_steps_per_second": 2.573,
248
+ "eval_wer": 0.8771929824561403,
249
  "step": 3000
250
  },
251
  {
252
+ "epoch": 18.49855072463768,
253
+ "grad_norm": 0.8538926243782043,
254
+ "learning_rate": 0.0002470941176470588,
255
+ "loss": 1.294,
256
+ "step": 3200
257
  },
258
  {
259
+ "epoch": 18.49855072463768,
260
+ "eval_loss": 1.1741454601287842,
261
+ "eval_runtime": 126.8469,
262
+ "eval_samples_per_second": 20.529,
263
+ "eval_steps_per_second": 2.57,
264
+ "eval_wer": 0.8795799021362931,
265
+ "step": 3200
266
  },
267
  {
268
+ "epoch": 19.655072463768114,
269
+ "grad_norm": 0.4263986051082611,
270
+ "learning_rate": 0.00024356470588235294,
271
+ "loss": 1.277,
272
+ "step": 3400
273
+ },
274
+ {
275
+ "epoch": 19.655072463768114,
276
+ "eval_loss": 1.1416850090026855,
277
+ "eval_runtime": 126.7443,
278
+ "eval_samples_per_second": 20.545,
279
+ "eval_steps_per_second": 2.572,
280
+ "eval_wer": 0.8725384890798424,
281
+ "step": 3400
282
+ },
283
+ {
284
+ "epoch": 20.81159420289855,
285
+ "grad_norm": 0.5871867537498474,
286
+ "learning_rate": 0.00024003529411764703,
287
+ "loss": 1.2668,
288
+ "step": 3600
289
+ },
290
+ {
291
+ "epoch": 20.81159420289855,
292
+ "eval_loss": 1.13175368309021,
293
+ "eval_runtime": 126.8413,
294
+ "eval_samples_per_second": 20.53,
295
+ "eval_steps_per_second": 2.57,
296
+ "eval_wer": 0.8663722799061145,
297
+ "step": 3600
298
+ },
299
+ {
300
+ "epoch": 21.968115942028987,
301
+ "grad_norm": 0.455477237701416,
302
+ "learning_rate": 0.00023650588235294115,
303
+ "loss": 1.2456,
304
+ "step": 3800
305
+ },
306
+ {
307
+ "epoch": 21.968115942028987,
308
+ "eval_loss": 1.1195415258407593,
309
+ "eval_runtime": 127.194,
310
+ "eval_samples_per_second": 20.473,
311
+ "eval_steps_per_second": 2.563,
312
+ "eval_wer": 0.8677845407168715,
313
+ "step": 3800
314
+ },
315
+ {
316
+ "epoch": 23.121739130434783,
317
+ "grad_norm": 0.3989470601081848,
318
+ "learning_rate": 0.0002329764705882353,
319
+ "loss": 1.2317,
320
  "step": 4000
321
  },
322
  {
323
+ "epoch": 23.121739130434783,
324
+ "eval_loss": 1.1132415533065796,
325
+ "eval_runtime": 126.9937,
326
+ "eval_samples_per_second": 20.505,
327
+ "eval_steps_per_second": 2.567,
328
+ "eval_wer": 0.8625532084178701,
329
  "step": 4000
330
  },
331
  {
332
+ "epoch": 24.278260869565216,
333
+ "grad_norm": 0.5162937045097351,
334
+ "learning_rate": 0.00022944705882352938,
335
+ "loss": 1.2225,
336
+ "step": 4200
337
+ },
338
+ {
339
+ "epoch": 24.278260869565216,
340
+ "eval_loss": 1.1121087074279785,
341
+ "eval_runtime": 126.8632,
342
+ "eval_samples_per_second": 20.526,
343
+ "eval_steps_per_second": 2.57,
344
+ "eval_wer": 0.8715638302104467,
345
+ "step": 4200
346
  },
347
  {
348
+ "epoch": 25.434782608695652,
349
+ "grad_norm": 0.4300783574581146,
350
+ "learning_rate": 0.00022591764705882353,
351
+ "loss": 1.2059,
352
+ "step": 4400
 
 
353
  },
354
  {
355
+ "epoch": 25.434782608695652,
356
+ "eval_loss": 1.1003350019454956,
357
+ "eval_runtime": 131.7283,
358
+ "eval_samples_per_second": 19.768,
359
+ "eval_steps_per_second": 2.475,
360
+ "eval_wer": 0.8622747344551855,
361
+ "step": 4400
362
+ },
363
+ {
364
+ "epoch": 26.591304347826085,
365
+ "grad_norm": 0.4021275043487549,
366
+ "learning_rate": 0.00022238823529411762,
367
+ "loss": 1.1991,
368
+ "step": 4600
369
+ },
370
+ {
371
+ "epoch": 26.591304347826085,
372
+ "eval_loss": 1.0933383703231812,
373
+ "eval_runtime": 127.0907,
374
+ "eval_samples_per_second": 20.489,
375
+ "eval_steps_per_second": 2.565,
376
+ "eval_wer": 0.8598878147750328,
377
+ "step": 4600
378
+ },
379
+ {
380
+ "epoch": 27.747826086956522,
381
+ "grad_norm": 0.8139039874076843,
382
+ "learning_rate": 0.00021885882352941174,
383
+ "loss": 1.1832,
384
+ "step": 4800
385
+ },
386
+ {
387
+ "epoch": 27.747826086956522,
388
+ "eval_loss": 1.089185118675232,
389
+ "eval_runtime": 127.3985,
390
+ "eval_samples_per_second": 20.44,
391
+ "eval_steps_per_second": 2.559,
392
+ "eval_wer": 0.8555913593507578,
393
+ "step": 4800
394
+ },
395
+ {
396
+ "epoch": 28.904347826086955,
397
+ "grad_norm": 0.42960914969444275,
398
+ "learning_rate": 0.00021532941176470588,
399
+ "loss": 1.1732,
400
  "step": 5000
401
  },
402
  {
403
+ "epoch": 28.904347826086955,
404
+ "eval_loss": 1.0722780227661133,
405
+ "eval_runtime": 127.3657,
406
+ "eval_samples_per_second": 20.445,
407
+ "eval_steps_per_second": 2.56,
408
+ "eval_wer": 0.8469387755102041,
409
  "step": 5000
410
  },
411
  {
412
+ "epoch": 30.057971014492754,
413
+ "grad_norm": 0.4363812804222107,
414
+ "learning_rate": 0.00021179999999999997,
415
+ "loss": 1.1588,
416
+ "step": 5200
417
+ },
418
+ {
419
+ "epoch": 30.057971014492754,
420
+ "eval_loss": 1.0763438940048218,
421
+ "eval_runtime": 127.5044,
422
+ "eval_samples_per_second": 20.423,
423
+ "eval_steps_per_second": 2.557,
424
+ "eval_wer": 0.8478537613875959,
425
+ "step": 5200
426
+ },
427
+ {
428
+ "epoch": 31.214492753623187,
429
+ "grad_norm": 0.49785545468330383,
430
+ "learning_rate": 0.0002082705882352941,
431
+ "loss": 1.149,
432
+ "step": 5400
433
+ },
434
+ {
435
+ "epoch": 31.214492753623187,
436
+ "eval_loss": 1.0577690601348877,
437
+ "eval_runtime": 127.6923,
438
+ "eval_samples_per_second": 20.393,
439
+ "eval_steps_per_second": 2.553,
440
+ "eval_wer": 0.8409515853124876,
441
+ "step": 5400
442
+ },
443
+ {
444
+ "epoch": 32.37101449275362,
445
+ "grad_norm": 0.3857053220272064,
446
+ "learning_rate": 0.00020474117647058823,
447
+ "loss": 1.1398,
448
+ "step": 5600
449
+ },
450
+ {
451
+ "epoch": 32.37101449275362,
452
+ "eval_loss": 1.0509884357452393,
453
+ "eval_runtime": 127.2975,
454
+ "eval_samples_per_second": 20.456,
455
+ "eval_steps_per_second": 2.561,
456
+ "eval_wer": 0.8412101682778375,
457
+ "step": 5600
458
+ },
459
+ {
460
+ "epoch": 33.527536231884056,
461
+ "grad_norm": 0.4151206612586975,
462
+ "learning_rate": 0.00020121176470588233,
463
+ "loss": 1.1297,
464
+ "step": 5800
465
  },
466
  {
467
+ "epoch": 33.527536231884056,
468
+ "eval_loss": 1.0429149866104126,
469
+ "eval_runtime": 127.4154,
470
+ "eval_samples_per_second": 20.437,
471
+ "eval_steps_per_second": 2.559,
472
+ "eval_wer": 0.8413494052591797,
473
+ "step": 5800
474
  },
475
  {
476
+ "epoch": 34.68405797101449,
477
+ "grad_norm": 0.46378350257873535,
478
+ "learning_rate": 0.00019768235294117647,
479
+ "loss": 1.117,
480
  "step": 6000
481
  },
482
  {
483
+ "epoch": 34.68405797101449,
484
+ "eval_loss": 1.0374654531478882,
485
+ "eval_runtime": 127.3947,
486
+ "eval_samples_per_second": 20.44,
487
+ "eval_steps_per_second": 2.559,
488
+ "eval_wer": 0.8321796554879262,
489
  "step": 6000
490
  },
491
  {
492
+ "epoch": 35.84057971014493,
493
+ "grad_norm": 0.4410320222377777,
494
+ "learning_rate": 0.00019415294117647056,
495
+ "loss": 1.1119,
496
+ "step": 6200
497
+ },
498
+ {
499
+ "epoch": 35.84057971014493,
500
+ "eval_loss": 1.0412153005599976,
501
+ "eval_runtime": 127.3041,
502
+ "eval_samples_per_second": 20.455,
503
+ "eval_steps_per_second": 2.561,
504
+ "eval_wer": 0.8316227075625572,
505
+ "step": 6200
506
+ },
507
+ {
508
+ "epoch": 36.99710144927536,
509
+ "grad_norm": 1.033341884613037,
510
+ "learning_rate": 0.00019062352941176468,
511
+ "loss": 1.0986,
512
+ "step": 6400
513
+ },
514
+ {
515
+ "epoch": 36.99710144927536,
516
+ "eval_loss": 1.029122233390808,
517
+ "eval_runtime": 127.5003,
518
+ "eval_samples_per_second": 20.423,
519
+ "eval_steps_per_second": 2.557,
520
+ "eval_wer": 0.8341687552213868,
521
+ "step": 6400
522
+ },
523
+ {
524
+ "epoch": 38.15072463768116,
525
+ "grad_norm": 0.44958433508872986,
526
+ "learning_rate": 0.00018709411764705882,
527
+ "loss": 1.0858,
528
+ "step": 6600
529
+ },
530
+ {
531
+ "epoch": 38.15072463768116,
532
+ "eval_loss": 1.0151913166046143,
533
+ "eval_runtime": 127.4524,
534
+ "eval_samples_per_second": 20.431,
535
+ "eval_steps_per_second": 2.558,
536
+ "eval_wer": 0.824103910570076,
537
+ "step": 6600
538
+ },
539
+ {
540
+ "epoch": 39.30724637681159,
541
+ "grad_norm": 0.42733389139175415,
542
+ "learning_rate": 0.00018356470588235291,
543
+ "loss": 1.0781,
544
+ "step": 6800
545
  },
546
  {
547
+ "epoch": 39.30724637681159,
548
+ "eval_loss": 1.028800129890442,
549
+ "eval_runtime": 127.5392,
550
+ "eval_samples_per_second": 20.417,
551
+ "eval_steps_per_second": 2.556,
552
+ "eval_wer": 0.8245017305167681,
553
+ "step": 6800
554
  },
555
  {
556
+ "epoch": 40.46376811594203,
557
+ "grad_norm": 0.5106310248374939,
558
+ "learning_rate": 0.00018003529411764703,
559
+ "loss": 1.0712,
560
  "step": 7000
561
  },
562
  {
563
+ "epoch": 40.46376811594203,
564
+ "eval_loss": 1.009470820426941,
565
+ "eval_runtime": 127.3801,
566
+ "eval_samples_per_second": 20.443,
567
+ "eval_steps_per_second": 2.559,
568
+ "eval_wer": 0.8191112702390898,
569
  "step": 7000
570
  },
571
  {
572
+ "epoch": 41.620289855072464,
573
+ "grad_norm": 0.4691919982433319,
574
+ "learning_rate": 0.00017650588235294118,
575
+ "loss": 1.0646,
576
+ "step": 7200
577
  },
578
  {
579
+ "epoch": 41.620289855072464,
580
+ "eval_loss": 1.0001919269561768,
581
+ "eval_runtime": 127.5273,
582
+ "eval_samples_per_second": 20.419,
583
+ "eval_steps_per_second": 2.556,
584
+ "eval_wer": 0.8189521422604129,
585
+ "step": 7200
586
  },
587
  {
588
+ "epoch": 42.7768115942029,
589
+ "grad_norm": 0.4837665855884552,
590
+ "learning_rate": 0.00017297647058823527,
591
+ "loss": 1.052,
592
+ "step": 7400
593
  },
594
  {
595
+ "epoch": 42.7768115942029,
596
+ "eval_loss": 0.9987174868583679,
597
+ "eval_runtime": 127.7719,
598
+ "eval_samples_per_second": 20.38,
599
+ "eval_steps_per_second": 2.551,
600
+ "eval_wer": 0.8281616740263357,
601
+ "step": 7400
602
  },
603
  {
604
+ "epoch": 43.93333333333333,
605
+ "grad_norm": 0.4914618134498596,
606
+ "learning_rate": 0.00016944705882352941,
607
+ "loss": 1.0422,
608
+ "step": 7600
609
  },
610
  {
611
+ "epoch": 43.93333333333333,
612
+ "eval_loss": 0.9949682950973511,
613
+ "eval_runtime": 127.6961,
614
+ "eval_samples_per_second": 20.392,
615
+ "eval_steps_per_second": 2.553,
616
+ "eval_wer": 0.8155109997215261,
617
+ "step": 7600
618
  },
619
  {
620
+ "epoch": 45.08695652173913,
621
+ "grad_norm": 0.4811262786388397,
622
+ "learning_rate": 0.0001659176470588235,
623
+ "loss": 1.0345,
624
+ "step": 7800
625
  },
626
  {
627
+ "epoch": 45.08695652173913,
628
+ "eval_loss": 0.9906212091445923,
629
+ "eval_runtime": 127.616,
630
+ "eval_samples_per_second": 20.405,
631
+ "eval_steps_per_second": 2.555,
632
+ "eval_wer": 0.8113139992839241,
633
+ "step": 7800
634
  },
635
  {
636
+ "epoch": 46.243478260869566,
637
+ "grad_norm": 0.7449145317077637,
638
+ "learning_rate": 0.00016238823529411762,
639
+ "loss": 1.0209,
640
+ "step": 8000
641
  },
642
  {
643
+ "epoch": 46.243478260869566,
644
+ "eval_loss": 0.9777077436447144,
645
+ "eval_runtime": 127.811,
646
+ "eval_samples_per_second": 20.374,
647
+ "eval_steps_per_second": 2.551,
648
+ "eval_wer": 0.812129530174643,
649
+ "step": 8000
650
  },
651
  {
652
+ "epoch": 47.4,
653
+ "grad_norm": 0.48268821835517883,
654
+ "learning_rate": 0.00015885882352941177,
655
+ "loss": 1.0217,
656
+ "step": 8200
657
  },
658
  {
659
+ "epoch": 47.4,
660
+ "eval_loss": 0.9648416638374329,
661
+ "eval_runtime": 127.7579,
662
+ "eval_samples_per_second": 20.382,
663
+ "eval_steps_per_second": 2.552,
664
+ "eval_wer": 0.8048295341528424,
665
+ "step": 8200
666
  },
667
  {
668
+ "epoch": 48.55652173913043,
669
+ "grad_norm": 0.5607514977455139,
670
+ "learning_rate": 0.00015532941176470586,
671
+ "loss": 1.0067,
672
+ "step": 8400
673
  },
674
  {
675
+ "epoch": 48.55652173913043,
676
+ "eval_loss": 0.9864305853843689,
677
+ "eval_runtime": 127.8335,
678
+ "eval_samples_per_second": 20.37,
679
+ "eval_steps_per_second": 2.55,
680
+ "eval_wer": 0.8027210884353742,
681
+ "step": 8400
682
  },
683
  {
684
+ "epoch": 49.71304347826087,
685
+ "grad_norm": 0.6017518043518066,
686
+ "learning_rate": 0.00015179999999999998,
687
+ "loss": 1.0033,
688
+ "step": 8600
689
  },
690
  {
691
+ "epoch": 49.71304347826087,
692
+ "eval_loss": 0.9633412957191467,
693
+ "eval_runtime": 127.7324,
694
+ "eval_samples_per_second": 20.386,
695
+ "eval_steps_per_second": 2.552,
696
+ "eval_wer": 0.7977284481043879,
697
+ "step": 8600
698
  },
699
  {
700
+ "epoch": 50.869565217391305,
701
+ "grad_norm": 0.5103667974472046,
702
+ "learning_rate": 0.0001482705882352941,
703
+ "loss": 0.9925,
704
+ "step": 8800
705
  },
706
  {
707
+ "epoch": 50.869565217391305,
708
+ "eval_loss": 0.9522212147712708,
709
+ "eval_runtime": 127.8431,
710
+ "eval_samples_per_second": 20.369,
711
+ "eval_steps_per_second": 2.55,
712
+ "eval_wer": 0.7946254525201893,
713
+ "step": 8800
714
  },
715
  {
716
+ "epoch": 52.0231884057971,
717
+ "grad_norm": 0.5089967250823975,
718
+ "learning_rate": 0.0001447411764705882,
719
+ "loss": 0.9784,
720
+ "step": 9000
721
  },
722
  {
723
+ "epoch": 52.0231884057971,
724
+ "eval_loss": 0.9520332217216492,
725
+ "eval_runtime": 127.451,
726
+ "eval_samples_per_second": 20.431,
727
+ "eval_steps_per_second": 2.558,
728
+ "eval_wer": 0.7978875760830648,
729
+ "step": 9000
730
  },
731
  {
732
+ "epoch": 53.17971014492753,
733
+ "grad_norm": 0.4959864020347595,
734
+ "learning_rate": 0.00014121176470588236,
735
+ "loss": 0.9757,
736
+ "step": 9200
737
  },
738
  {
739
+ "epoch": 53.17971014492753,
740
+ "eval_loss": 0.940946638584137,
741
+ "eval_runtime": 127.57,
742
+ "eval_samples_per_second": 20.412,
743
+ "eval_steps_per_second": 2.555,
744
+ "eval_wer": 0.78828022437045,
745
+ "step": 9200
746
  },
747
  {
748
+ "epoch": 54.33623188405797,
749
+ "grad_norm": 0.518679678440094,
750
+ "learning_rate": 0.00013768235294117645,
751
+ "loss": 0.9648,
752
+ "step": 9400
753
  },
754
  {
755
+ "epoch": 54.33623188405797,
756
+ "eval_loss": 0.9465099573135376,
757
+ "eval_runtime": 127.6829,
758
+ "eval_samples_per_second": 20.394,
759
+ "eval_steps_per_second": 2.553,
760
+ "eval_wer": 0.788578589330469,
761
+ "step": 9400
762
  },
763
  {
764
+ "epoch": 55.492753623188406,
765
+ "grad_norm": 0.7649258375167847,
766
+ "learning_rate": 0.00013415294117647057,
767
+ "loss": 0.9553,
768
+ "step": 9600
769
  },
770
  {
771
+ "epoch": 55.492753623188406,
772
+ "eval_loss": 0.9416138529777527,
773
+ "eval_runtime": 127.6278,
774
+ "eval_samples_per_second": 20.403,
775
+ "eval_steps_per_second": 2.554,
776
+ "eval_wer": 0.7877829494370848,
777
+ "step": 9600
778
  },
779
  {
780
+ "epoch": 56.64927536231884,
781
+ "grad_norm": 0.4904441237449646,
782
+ "learning_rate": 0.00013062352941176468,
783
+ "loss": 0.955,
784
+ "step": 9800
785
  },
786
  {
787
+ "epoch": 56.64927536231884,
788
+ "eval_loss": 0.9271659255027771,
789
+ "eval_runtime": 127.4634,
790
+ "eval_samples_per_second": 20.429,
791
+ "eval_steps_per_second": 2.558,
792
+ "eval_wer": 0.7855153757409397,
793
+ "step": 9800
794
  },
795
  {
796
+ "epoch": 57.80579710144927,
797
+ "grad_norm": 0.46599653363227844,
798
+ "learning_rate": 0.00012709411764705883,
799
+ "loss": 0.9442,
800
+ "step": 10000
801
  },
802
  {
803
+ "epoch": 57.80579710144927,
804
+ "eval_loss": 0.9268618822097778,
805
+ "eval_runtime": 127.2806,
806
+ "eval_samples_per_second": 20.459,
807
+ "eval_steps_per_second": 2.561,
808
+ "eval_wer": 0.7777976687751124,
809
+ "step": 10000
810
  },
811
  {
812
+ "epoch": 58.96231884057971,
813
+ "grad_norm": 0.5592873096466064,
814
+ "learning_rate": 0.00012356470588235292,
815
+ "loss": 0.9346,
816
+ "step": 10200
817
  },
818
  {
819
+ "epoch": 58.96231884057971,
820
+ "eval_loss": 0.9154264330863953,
821
+ "eval_runtime": 127.5803,
822
+ "eval_samples_per_second": 20.411,
823
+ "eval_steps_per_second": 2.555,
824
+ "eval_wer": 0.7685085730198512,
825
+ "step": 10200
826
  },
827
  {
828
+ "epoch": 60.11594202898551,
829
+ "grad_norm": 0.4597800374031067,
830
+ "learning_rate": 0.00012003529411764705,
831
+ "loss": 0.9271,
832
+ "step": 10400
833
  },
834
  {
835
+ "epoch": 60.11594202898551,
836
+ "eval_loss": 0.9182903170585632,
837
+ "eval_runtime": 127.6408,
838
+ "eval_samples_per_second": 20.401,
839
+ "eval_steps_per_second": 2.554,
840
+ "eval_wer": 0.7704777817559773,
841
+ "step": 10400
842
  },
843
  {
844
+ "epoch": 61.27246376811594,
845
+ "grad_norm": 0.5329666137695312,
846
+ "learning_rate": 0.00011650588235294116,
847
+ "loss": 0.9165,
848
+ "step": 10600
849
  },
850
  {
851
+ "epoch": 61.27246376811594,
852
+ "eval_loss": 0.9233406186103821,
853
+ "eval_runtime": 128.0081,
854
+ "eval_samples_per_second": 20.342,
855
+ "eval_steps_per_second": 2.547,
856
+ "eval_wer": 0.7725862274734455,
857
+ "step": 10600
858
  },
859
  {
860
+ "epoch": 62.428985507246374,
861
+ "grad_norm": 0.546533465385437,
862
+ "learning_rate": 0.00011297647058823529,
863
+ "loss": 0.9165,
864
+ "step": 10800
865
  },
866
  {
867
+ "epoch": 62.428985507246374,
868
+ "eval_loss": 0.9042327404022217,
869
+ "eval_runtime": 127.7124,
870
+ "eval_samples_per_second": 20.39,
871
+ "eval_steps_per_second": 2.553,
872
+ "eval_wer": 0.7694235588972431,
873
+ "step": 10800
874
  },
875
  {
876
+ "epoch": 63.585507246376814,
877
+ "grad_norm": 0.49292466044425964,
878
+ "learning_rate": 0.0001094470588235294,
879
+ "loss": 0.9088,
880
+ "step": 11000
881
  },
882
  {
883
+ "epoch": 63.585507246376814,
884
+ "eval_loss": 0.9099429845809937,
885
+ "eval_runtime": 127.459,
886
+ "eval_samples_per_second": 20.43,
887
+ "eval_steps_per_second": 2.558,
888
+ "eval_wer": 0.7645900465449338,
889
+ "step": 11000
890
  },
891
  {
892
+ "epoch": 64.74202898550725,
893
+ "grad_norm": 0.7860192656517029,
894
+ "learning_rate": 0.00010591764705882352,
895
+ "loss": 0.9018,
896
+ "step": 11200
897
  },
898
  {
899
+ "epoch": 64.74202898550725,
900
+ "eval_loss": 0.8967615365982056,
901
+ "eval_runtime": 127.4599,
902
+ "eval_samples_per_second": 20.43,
903
+ "eval_steps_per_second": 2.558,
904
+ "eval_wer": 0.7602140271313204,
905
+ "step": 11200
906
  },
907
  {
908
+ "epoch": 65.89855072463769,
909
+ "grad_norm": 0.4933035373687744,
910
+ "learning_rate": 0.00010238823529411763,
911
+ "loss": 0.8985,
912
+ "step": 11400
913
  },
914
  {
915
+ "epoch": 65.89855072463769,
916
+ "eval_loss": 0.8918899297714233,
917
+ "eval_runtime": 127.4862,
918
+ "eval_samples_per_second": 20.426,
919
+ "eval_steps_per_second": 2.557,
920
+ "eval_wer": 0.7606118470780124,
921
+ "step": 11400
922
  },
923
  {
924
+ "epoch": 67.05217391304348,
925
+ "grad_norm": 0.5119895935058594,
926
+ "learning_rate": 9.885882352941176e-05,
927
+ "loss": 0.8851,
928
+ "step": 11600
929
  },
930
  {
931
+ "epoch": 67.05217391304348,
932
+ "eval_loss": 0.8957463502883911,
933
+ "eval_runtime": 127.6383,
934
+ "eval_samples_per_second": 20.401,
935
+ "eval_steps_per_second": 2.554,
936
+ "eval_wer": 0.75442574690695,
937
+ "step": 11600
938
  },
939
  {
940
+ "epoch": 68.20869565217392,
941
+ "grad_norm": 0.61966872215271,
942
+ "learning_rate": 9.532941176470588e-05,
943
+ "loss": 0.8834,
944
+ "step": 11800
945
  },
946
  {
947
+ "epoch": 68.20869565217392,
948
+ "eval_loss": 0.8949310183525085,
949
+ "eval_runtime": 127.6674,
950
+ "eval_samples_per_second": 20.397,
951
+ "eval_steps_per_second": 2.554,
952
+ "eval_wer": 0.7545848748856268,
953
+ "step": 11800
954
  },
955
  {
956
+ "epoch": 69.36521739130434,
957
+ "grad_norm": 0.5450541973114014,
958
+ "learning_rate": 9.18e-05,
959
+ "loss": 0.8779,
960
+ "step": 12000
961
  },
962
  {
963
+ "epoch": 69.36521739130434,
964
+ "eval_loss": 0.8951545357704163,
965
+ "eval_runtime": 127.7527,
966
+ "eval_samples_per_second": 20.383,
967
+ "eval_steps_per_second": 2.552,
968
+ "eval_wer": 0.7552213868003341,
969
+ "step": 12000
970
  },
971
  {
972
+ "epoch": 70.52173913043478,
973
+ "grad_norm": 0.46215394139289856,
974
+ "learning_rate": 8.82705882352941e-05,
975
+ "loss": 0.8708,
976
+ "step": 12200
977
  },
978
  {
979
+ "epoch": 70.52173913043478,
980
+ "eval_loss": 0.8882645964622498,
981
+ "eval_runtime": 127.6812,
982
+ "eval_samples_per_second": 20.395,
983
+ "eval_steps_per_second": 2.553,
984
+ "eval_wer": 0.75265544814417,
985
+ "step": 12200
986
  },
987
  {
988
+ "epoch": 71.67826086956522,
989
+ "grad_norm": 0.5442056059837341,
990
+ "learning_rate": 8.474117647058823e-05,
991
+ "loss": 0.8669,
992
+ "step": 12400
993
  },
994
  {
995
+ "epoch": 71.67826086956522,
996
+ "eval_loss": 0.8810063600540161,
997
+ "eval_runtime": 127.6211,
998
+ "eval_samples_per_second": 20.404,
999
+ "eval_steps_per_second": 2.554,
1000
+ "eval_wer": 0.7488761586505948,
1001
+ "step": 12400
1002
  },
1003
  {
1004
+ "epoch": 72.83478260869565,
1005
+ "grad_norm": 0.540812611579895,
1006
+ "learning_rate": 8.121176470588235e-05,
1007
+ "loss": 0.8616,
1008
+ "step": 12600
1009
  },
1010
  {
1011
+ "epoch": 72.83478260869565,
1012
+ "eval_loss": 0.8785393834114075,
1013
+ "eval_runtime": 127.672,
1014
+ "eval_samples_per_second": 20.396,
1015
+ "eval_steps_per_second": 2.553,
1016
+ "eval_wer": 0.7446393762183235,
1017
+ "step": 12600
1018
  },
1019
  {
1020
+ "epoch": 73.99130434782609,
1021
+ "grad_norm": 0.7264253497123718,
1022
+ "learning_rate": 7.768235294117647e-05,
1023
+ "loss": 0.8572,
1024
+ "step": 12800
1025
  },
1026
  {
1027
+ "epoch": 73.99130434782609,
1028
+ "eval_loss": 0.8806383013725281,
1029
+ "eval_runtime": 127.5992,
1030
+ "eval_samples_per_second": 20.408,
1031
+ "eval_steps_per_second": 2.555,
1032
+ "eval_wer": 0.7415761626287942,
1033
+ "step": 12800
1034
  },
1035
  {
1036
+ "epoch": 75.14492753623189,
1037
+ "grad_norm": 0.5088544487953186,
1038
+ "learning_rate": 7.415294117647058e-05,
1039
+ "loss": 0.8536,
1040
+ "step": 13000
1041
  },
1042
  {
1043
+ "epoch": 75.14492753623189,
1044
+ "eval_loss": 0.8745167851448059,
1045
+ "eval_runtime": 127.7753,
1046
+ "eval_samples_per_second": 20.38,
1047
+ "eval_steps_per_second": 2.551,
1048
+ "eval_wer": 0.7391295699566376,
1049
+ "step": 13000
1050
  },
1051
  {
1052
+ "epoch": 76.30144927536232,
1053
+ "grad_norm": 0.5131168961524963,
1054
+ "learning_rate": 7.06235294117647e-05,
1055
+ "loss": 0.8453,
1056
+ "step": 13200
1057
  },
1058
  {
1059
+ "epoch": 76.30144927536232,
1060
+ "eval_loss": 0.8805530071258545,
1061
+ "eval_runtime": 127.9225,
1062
+ "eval_samples_per_second": 20.356,
1063
+ "eval_steps_per_second": 2.548,
1064
+ "eval_wer": 0.7384731670445956,
1065
+ "step": 13200
1066
  },
1067
  {
1068
+ "epoch": 77.45797101449276,
1069
+ "grad_norm": 0.6087790131568909,
1070
+ "learning_rate": 6.709411764705882e-05,
1071
+ "loss": 0.8435,
1072
+ "step": 13400
1073
  },
1074
  {
1075
+ "epoch": 77.45797101449276,
1076
+ "eval_loss": 0.8695724606513977,
1077
+ "eval_runtime": 132.7677,
1078
+ "eval_samples_per_second": 19.613,
1079
+ "eval_steps_per_second": 2.455,
1080
+ "eval_wer": 0.7399053188526873,
1081
+ "step": 13400
1082
  },
1083
  {
1084
+ "epoch": 78.61449275362318,
1085
+ "grad_norm": 0.49738621711730957,
1086
+ "learning_rate": 6.356470588235294e-05,
1087
+ "loss": 0.8392,
1088
+ "step": 13600
1089
  },
1090
  {
1091
+ "epoch": 78.61449275362318,
1092
+ "eval_loss": 0.8718934059143066,
1093
+ "eval_runtime": 128.0532,
1094
+ "eval_samples_per_second": 20.335,
1095
+ "eval_steps_per_second": 2.546,
1096
+ "eval_wer": 0.7387317500099455,
1097
+ "step": 13600
1098
  },
1099
  {
1100
+ "epoch": 79.77101449275362,
1101
+ "grad_norm": 0.5539494156837463,
1102
+ "learning_rate": 6.003529411764706e-05,
1103
+ "loss": 0.8361,
1104
+ "step": 13800
1105
  },
1106
  {
1107
+ "epoch": 79.77101449275362,
1108
+ "eval_loss": 0.8683921694755554,
1109
+ "eval_runtime": 128.202,
1110
+ "eval_samples_per_second": 20.312,
1111
+ "eval_steps_per_second": 2.543,
1112
+ "eval_wer": 0.7372598162071846,
1113
+ "step": 13800
1114
  },
1115
  {
1116
+ "epoch": 80.92753623188406,
1117
+ "grad_norm": 0.5560426712036133,
1118
+ "learning_rate": 5.650588235294117e-05,
1119
+ "loss": 0.8339,
1120
+ "step": 14000
1121
+ },
1122
+ {
1123
+ "epoch": 80.92753623188406,
1124
+ "eval_loss": 0.8655583262443542,
1125
+ "eval_runtime": 128.1625,
1126
+ "eval_samples_per_second": 20.318,
1127
+ "eval_steps_per_second": 2.544,
1128
+ "eval_wer": 0.7349325695190357,
1129
+ "step": 14000
1130
+ },
1131
+ {
1132
+ "epoch": 82.08115942028985,
1133
+ "grad_norm": 0.5128791928291321,
1134
+ "learning_rate": 5.299411764705882e-05,
1135
+ "loss": 0.8264,
1136
+ "step": 14200
1137
+ },
1138
+ {
1139
+ "epoch": 82.08115942028985,
1140
+ "eval_loss": 0.8635972738265991,
1141
+ "eval_runtime": 128.0931,
1142
+ "eval_samples_per_second": 20.329,
1143
+ "eval_steps_per_second": 2.545,
1144
+ "eval_wer": 0.7316704459561603,
1145
+ "step": 14200
1146
  },
1147
  {
1148
+ "epoch": 83.23768115942029,
1149
+ "grad_norm": 0.7810338139533997,
1150
+ "learning_rate": 4.946470588235294e-05,
1151
+ "loss": 0.8184,
1152
+ "step": 14400
 
 
1153
  },
1154
  {
1155
+ "epoch": 83.23768115942029,
1156
+ "eval_loss": 0.8625103831291199,
1157
+ "eval_runtime": 128.3971,
1158
+ "eval_samples_per_second": 20.281,
1159
+ "eval_steps_per_second": 2.539,
1160
+ "eval_wer": 0.732943469785575,
1161
+ "step": 14400
1162
  },
1163
  {
1164
+ "epoch": 84.39420289855073,
1165
+ "grad_norm": 0.5399278998374939,
1166
+ "learning_rate": 4.593529411764705e-05,
1167
+ "loss": 0.8246,
1168
+ "step": 14600
 
 
1169
  },
1170
  {
1171
+ "epoch": 84.39420289855073,
1172
+ "eval_loss": 0.8625257611274719,
1173
+ "eval_runtime": 128.2959,
1174
+ "eval_samples_per_second": 20.297,
1175
+ "eval_steps_per_second": 2.541,
1176
+ "eval_wer": 0.7340971476309822,
1177
+ "step": 14600
1178
  },
1179
  {
1180
+ "epoch": 85.55072463768116,
1181
+ "grad_norm": 0.4938839077949524,
1182
+ "learning_rate": 4.240588235294118e-05,
1183
+ "loss": 0.8176,
1184
+ "step": 14800
 
 
1185
  },
1186
  {
1187
+ "epoch": 85.55072463768116,
1188
+ "eval_loss": 0.8633288741111755,
1189
+ "eval_runtime": 128.0601,
1190
+ "eval_samples_per_second": 20.334,
1191
+ "eval_steps_per_second": 2.546,
1192
+ "eval_wer": 0.732088156900187,
1193
+ "step": 14800
1194
  },
1195
  {
1196
+ "epoch": 86.7072463768116,
1197
+ "grad_norm": 0.5640541911125183,
1198
+ "learning_rate": 3.887647058823529e-05,
1199
+ "loss": 0.8167,
1200
+ "step": 15000
1201
+ },
1202
+ {
1203
+ "epoch": 86.7072463768116,
1204
+ "eval_loss": 0.8610928654670715,
1205
+ "eval_runtime": 128.3971,
1206
+ "eval_samples_per_second": 20.281,
1207
+ "eval_steps_per_second": 2.539,
1208
+ "eval_wer": 0.7309344790547798,
1209
+ "step": 15000
1210
  },
1211
  {
1212
+ "epoch": 87.86376811594202,
1213
+ "grad_norm": 0.5315191149711609,
1214
+ "learning_rate": 3.534705882352941e-05,
1215
+ "loss": 0.8123,
1216
+ "step": 15200
1217
  },
1218
  {
1219
+ "epoch": 87.86376811594202,
1220
+ "eval_loss": 0.8582242131233215,
1221
+ "eval_runtime": 128.0796,
1222
+ "eval_samples_per_second": 20.331,
1223
+ "eval_steps_per_second": 2.545,
1224
+ "eval_wer": 0.7285873413692963,
1225
+ "step": 15200
1226
  },
1227
  {
1228
+ "epoch": 89.01739130434783,
1229
+ "grad_norm": 0.5748764276504517,
1230
+ "learning_rate": 3.1817647058823525e-05,
1231
+ "loss": 0.8045,
1232
+ "step": 15400
1233
  },
1234
  {
1235
+ "epoch": 89.01739130434783,
1236
+ "eval_loss": 0.8577102422714233,
1237
+ "eval_runtime": 128.1955,
1238
+ "eval_samples_per_second": 20.313,
1239
+ "eval_steps_per_second": 2.543,
1240
+ "eval_wer": 0.7275331185105621,
1241
+ "step": 15400
1242
  },
1243
  {
1244
+ "epoch": 90.17391304347827,
1245
+ "grad_norm": 0.5497247576713562,
1246
+ "learning_rate": 2.8288235294117643e-05,
1247
+ "loss": 0.8121,
1248
+ "step": 15600
1249
  },
1250
  {
1251
+ "epoch": 90.17391304347827,
1252
+ "eval_loss": 0.8565927147865295,
1253
+ "eval_runtime": 128.1806,
1254
+ "eval_samples_per_second": 20.315,
1255
+ "eval_steps_per_second": 2.543,
1256
+ "eval_wer": 0.7265982416358356,
1257
+ "step": 15600
1258
  },
1259
  {
1260
+ "epoch": 91.33043478260869,
1261
+ "grad_norm": 0.5754753947257996,
1262
+ "learning_rate": 2.475882352941176e-05,
1263
+ "loss": 0.8061,
1264
+ "step": 15800
1265
  },
1266
  {
1267
+ "epoch": 91.33043478260869,
1268
+ "eval_loss": 0.8549688458442688,
1269
+ "eval_runtime": 128.2142,
1270
+ "eval_samples_per_second": 20.31,
1271
+ "eval_steps_per_second": 2.543,
1272
+ "eval_wer": 0.7265783506385011,
1273
+ "step": 15800
1274
  },
1275
  {
1276
+ "epoch": 92.48695652173913,
1277
+ "grad_norm": 0.7763922810554504,
1278
+ "learning_rate": 2.122941176470588e-05,
1279
+ "loss": 0.7999,
1280
+ "step": 16000
1281
  },
1282
  {
1283
+ "epoch": 92.48695652173913,
1284
+ "eval_loss": 0.8554069399833679,
1285
+ "eval_runtime": 128.218,
1286
+ "eval_samples_per_second": 20.309,
1287
+ "eval_steps_per_second": 2.543,
1288
+ "eval_wer": 0.725723037753113,
1289
+ "step": 16000
1290
  },
1291
  {
1292
+ "epoch": 93.64347826086957,
1293
+ "grad_norm": 0.5108122825622559,
1294
+ "learning_rate": 1.7699999999999997e-05,
1295
+ "loss": 0.7977,
1296
+ "step": 16200
1297
  },
1298
  {
1299
+ "epoch": 93.64347826086957,
1300
+ "eval_loss": 0.8529220819473267,
1301
+ "eval_runtime": 128.0979,
1302
+ "eval_samples_per_second": 20.328,
1303
+ "eval_steps_per_second": 2.545,
1304
+ "eval_wer": 0.7248876158650595,
1305
+ "step": 16200
1306
  },
1307
  {
1308
+ "epoch": 94.8,
1309
+ "grad_norm": 0.501833975315094,
1310
+ "learning_rate": 1.4188235294117647e-05,
1311
+ "loss": 0.7999,
1312
+ "step": 16400
1313
  },
1314
  {
1315
+ "epoch": 94.8,
1316
+ "eval_loss": 0.8523918390274048,
1317
+ "eval_runtime": 128.3137,
1318
+ "eval_samples_per_second": 20.294,
1319
+ "eval_steps_per_second": 2.541,
1320
+ "eval_wer": 0.7234753550543024,
1321
+ "step": 16400
1322
  },
1323
  {
1324
+ "epoch": 95.95652173913044,
1325
+ "grad_norm": 0.6044087409973145,
1326
+ "learning_rate": 1.0658823529411765e-05,
1327
+ "loss": 0.798,
1328
+ "step": 16600
1329
  },
1330
  {
1331
+ "epoch": 95.95652173913044,
1332
+ "eval_loss": 0.852449893951416,
1333
+ "eval_runtime": 129.1001,
1334
+ "eval_samples_per_second": 20.17,
1335
+ "eval_steps_per_second": 2.525,
1336
+ "eval_wer": 0.7250865258384055,
1337
+ "step": 16600
1338
  },
1339
  {
1340
+ "epoch": 97.11014492753623,
1341
+ "grad_norm": 0.5589261054992676,
1342
+ "learning_rate": 7.1294117647058815e-06,
1343
+ "loss": 0.7932,
1344
+ "step": 16800
1345
  },
1346
  {
1347
+ "epoch": 97.11014492753623,
1348
+ "eval_loss": 0.8512468338012695,
1349
+ "eval_runtime": 128.4107,
1350
+ "eval_samples_per_second": 20.279,
1351
+ "eval_steps_per_second": 2.539,
1352
+ "eval_wer": 0.7236941560249831,
1353
+ "step": 16800
1354
  },
1355
  {
1356
+ "epoch": 98.26666666666667,
1357
+ "grad_norm": 0.4919562339782715,
1358
+ "learning_rate": 3.6e-06,
1359
+ "loss": 0.793,
1360
+ "step": 17000
1361
  },
1362
  {
1363
+ "epoch": 98.26666666666667,
1364
+ "eval_loss": 0.8516792058944702,
1365
+ "eval_runtime": 128.4775,
1366
+ "eval_samples_per_second": 20.268,
1367
+ "eval_steps_per_second": 2.537,
1368
+ "eval_wer": 0.7228985161315988,
1369
+ "step": 17000
1370
  },
1371
  {
1372
+ "epoch": 99.4231884057971,
1373
+ "grad_norm": 0.5763407945632935,
1374
+ "learning_rate": 7.058823529411765e-08,
1375
+ "loss": 0.7989,
1376
+ "step": 17200
1377
  },
1378
  {
1379
+ "epoch": 99.4231884057971,
1380
+ "eval_loss": 0.8514899611473083,
1381
+ "eval_runtime": 128.5442,
1382
+ "eval_samples_per_second": 20.258,
1383
+ "eval_steps_per_second": 2.536,
1384
+ "eval_wer": 0.7236344830329793,
1385
+ "step": 17200
1386
  },
1387
  {
1388
+ "epoch": 99.4231884057971,
1389
+ "step": 17200,
1390
+ "total_flos": 3.3406327721118188e+19,
1391
+ "train_loss": 1.118092892668968,
1392
+ "train_runtime": 45286.7863,
1393
+ "train_samples_per_second": 6.092,
1394
+ "train_steps_per_second": 0.38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1395
  }
1396
  ],
1397
+ "logging_steps": 200,
1398
+ "max_steps": 17200,
1399
  "num_input_tokens_seen": 0,
1400
  "num_train_epochs": 100,
1401
+ "save_steps": 200,
1402
  "stateful_callbacks": {
1403
  "TrainerControl": {
1404
  "args": {
 
1411
  "attributes": {}
1412
  }
1413
  },
1414
+ "total_flos": 3.3406327721118188e+19,
1415
+ "train_batch_size": 8,
1416
  "trial_name": null,
1417
  "trial_params": null
1418
  }