csikasote commited on
Commit
5cb6051
·
verified ·
1 Parent(s): 7dd91e1

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base-960h
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +19,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # wav2vec2-base-librispeech-model
18
 
19
- This model is a fine-tuned version of [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.8515
22
- - Wer: 0.7230
23
 
24
  ## Model description
25
 
 
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-base-960h
5
  tags:
6
+ - automatic-speech-recognition
7
+ - libri10h
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # wav2vec2-base-librispeech-model
21
 
22
+ This model is a fine-tuned version of [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) on the LIBRI10H - ENG dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.8515
25
+ - Wer: 0.7226
26
 
27
  ## Model description
28
 
adapter.eng.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5227271458e2148b60995be53604c884de9775ac1d560989c01006974d075768
3
- size 19101500
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6d1cdf2d600a156e728c608d5a4e2e983b986c64d6d702eb30085c2a90208b
3
+ size 19101788
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 99.4231884057971,
3
- "eval_loss": 0.8513913154602051,
4
- "eval_runtime": 128.9183,
5
  "eval_samples": 2604,
6
- "eval_samples_per_second": 20.199,
7
- "eval_steps_per_second": 2.529,
8
- "eval_wer": 0.7245295779130365,
9
  "total_flos": 3.3406327721118188e+19,
10
- "train_loss": 1.118092892668968,
11
- "train_runtime": 45286.7863,
12
  "train_samples": 2759,
13
- "train_samples_per_second": 6.092,
14
- "train_steps_per_second": 0.38
15
  }
 
1
  {
2
  "epoch": 99.4231884057971,
3
+ "eval_loss": 0.8515061736106873,
4
+ "eval_runtime": 131.4192,
5
  "eval_samples": 2604,
6
+ "eval_samples_per_second": 19.814,
7
+ "eval_steps_per_second": 2.481,
8
+ "eval_wer": 0.7226001511715797,
9
  "total_flos": 3.3406327721118188e+19,
10
+ "train_loss": 1.1172501763632132,
11
+ "train_runtime": 45795.1729,
12
  "train_samples": 2759,
13
+ "train_samples_per_second": 6.025,
14
+ "train_steps_per_second": 0.376
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 99.4231884057971,
3
- "eval_loss": 0.8513913154602051,
4
- "eval_runtime": 128.9183,
5
  "eval_samples": 2604,
6
- "eval_samples_per_second": 20.199,
7
- "eval_steps_per_second": 2.529,
8
- "eval_wer": 0.7245295779130365
9
  }
 
1
  {
2
  "epoch": 99.4231884057971,
3
+ "eval_loss": 0.8515061736106873,
4
+ "eval_runtime": 131.4192,
5
  "eval_samples": 2604,
6
+ "eval_samples_per_second": 19.814,
7
+ "eval_steps_per_second": 2.481,
8
+ "eval_wer": 0.7226001511715797
9
  }
runs/Mar11_20-35-40_srvrocgpu011.uct.ac.za/events.out.tfevents.1741764214.srvrocgpu011.uct.ac.za.31183.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801fa5f12a4d0e798dae53184fb24d8efc3190ce186785a232c6eae194bab890
3
+ size 412
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 99.4231884057971,
3
  "total_flos": 3.3406327721118188e+19,
4
- "train_loss": 1.118092892668968,
5
- "train_runtime": 45286.7863,
6
  "train_samples": 2759,
7
- "train_samples_per_second": 6.092,
8
- "train_steps_per_second": 0.38
9
  }
 
1
  {
2
  "epoch": 99.4231884057971,
3
  "total_flos": 3.3406327721118188e+19,
4
+ "train_loss": 1.1172501763632132,
5
+ "train_runtime": 45795.1729,
6
  "train_samples": 2759,
7
+ "train_samples_per_second": 6.025,
8
+ "train_steps_per_second": 0.376
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8512468338012695,
3
- "best_model_checkpoint": "/scratch/skscla001/speech/results/wav2vec2-base-librispeech-model/checkpoint-16800",
4
  "epoch": 99.4231884057971,
5
  "eval_steps": 200,
6
  "global_step": 17200,
@@ -10,1388 +10,1388 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.1565217391304348,
13
- "grad_norm": 1.2893730401992798,
14
  "learning_rate": 0.00029699999999999996,
15
  "loss": 4.7426,
16
  "step": 200
17
  },
18
  {
19
  "epoch": 1.1565217391304348,
20
- "eval_loss": 2.8968212604522705,
21
- "eval_runtime": 126.7808,
22
- "eval_samples_per_second": 20.539,
23
- "eval_steps_per_second": 2.571,
24
  "eval_wer": 1.0,
25
  "step": 200
26
  },
27
  {
28
  "epoch": 2.3130434782608695,
29
- "grad_norm": 0.3036455512046814,
30
  "learning_rate": 0.0002965058823529411,
31
  "loss": 2.7493,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.3130434782608695,
36
- "eval_loss": 2.26717472076416,
37
- "eval_runtime": 126.2768,
38
- "eval_samples_per_second": 20.621,
39
- "eval_steps_per_second": 2.582,
40
- "eval_wer": 0.9989457771412659,
41
  "step": 400
42
  },
43
  {
44
  "epoch": 3.4695652173913043,
45
- "grad_norm": 0.44915950298309326,
46
  "learning_rate": 0.0002929764705882353,
47
- "loss": 2.0156,
48
  "step": 600
49
  },
50
  {
51
  "epoch": 3.4695652173913043,
52
- "eval_loss": 1.6933485269546509,
53
- "eval_runtime": 126.1204,
54
- "eval_samples_per_second": 20.647,
55
- "eval_steps_per_second": 2.585,
56
- "eval_wer": 0.9759716752197956,
57
  "step": 600
58
  },
59
  {
60
  "epoch": 4.626086956521739,
61
- "grad_norm": 0.3140698969364166,
62
  "learning_rate": 0.0002894470588235294,
63
- "loss": 1.7839,
64
  "step": 800
65
  },
66
  {
67
  "epoch": 4.626086956521739,
68
- "eval_loss": 1.5435950756072998,
69
- "eval_runtime": 126.0194,
70
- "eval_samples_per_second": 20.663,
71
- "eval_steps_per_second": 2.587,
72
- "eval_wer": 0.960735171261487,
73
  "step": 800
74
  },
75
  {
76
  "epoch": 5.782608695652174,
77
- "grad_norm": 0.40944600105285645,
78
  "learning_rate": 0.0002859176470588235,
79
- "loss": 1.6691,
80
  "step": 1000
81
  },
82
  {
83
  "epoch": 5.782608695652174,
84
- "eval_loss": 1.4462971687316895,
85
- "eval_runtime": 130.6554,
86
- "eval_samples_per_second": 19.93,
87
- "eval_steps_per_second": 2.495,
88
- "eval_wer": 0.9393921311214545,
89
  "step": 1000
90
  },
91
  {
92
  "epoch": 6.939130434782609,
93
- "grad_norm": 0.39339712262153625,
94
  "learning_rate": 0.00028238823529411764,
95
- "loss": 1.592,
96
  "step": 1200
97
  },
98
  {
99
  "epoch": 6.939130434782609,
100
- "eval_loss": 1.3824982643127441,
101
- "eval_runtime": 125.9506,
102
- "eval_samples_per_second": 20.675,
103
- "eval_steps_per_second": 2.588,
104
- "eval_wer": 0.9289493575207861,
105
  "step": 1200
106
  },
107
  {
108
  "epoch": 8.092753623188406,
109
- "grad_norm": 0.4750897288322449,
110
  "learning_rate": 0.00027885882352941176,
111
- "loss": 1.5384,
112
  "step": 1400
113
  },
114
  {
115
  "epoch": 8.092753623188406,
116
- "eval_loss": 1.3446310758590698,
117
- "eval_runtime": 126.0297,
118
- "eval_samples_per_second": 20.662,
119
- "eval_steps_per_second": 2.587,
120
- "eval_wer": 0.9162986832159764,
121
  "step": 1400
122
  },
123
  {
124
  "epoch": 9.24927536231884,
125
- "grad_norm": 0.9390007257461548,
126
  "learning_rate": 0.0002753294117647059,
127
- "loss": 1.4929,
128
  "step": 1600
129
  },
130
  {
131
  "epoch": 9.24927536231884,
132
- "eval_loss": 1.317173957824707,
133
- "eval_runtime": 126.3023,
134
- "eval_samples_per_second": 20.617,
135
- "eval_steps_per_second": 2.581,
136
- "eval_wer": 0.9207144846242591,
137
  "step": 1600
138
  },
139
  {
140
  "epoch": 10.405797101449275,
141
- "grad_norm": 0.3777698576450348,
142
  "learning_rate": 0.0002718,
143
- "loss": 1.4563,
144
  "step": 1800
145
  },
146
  {
147
  "epoch": 10.405797101449275,
148
- "eval_loss": 1.2747116088867188,
149
- "eval_runtime": 130.3532,
150
- "eval_samples_per_second": 19.976,
151
- "eval_steps_per_second": 2.501,
152
- "eval_wer": 0.907427298404742,
153
  "step": 1800
154
  },
155
  {
156
  "epoch": 11.56231884057971,
157
- "grad_norm": 0.4892979562282562,
158
  "learning_rate": 0.00026827058823529406,
159
- "loss": 1.4278,
160
  "step": 2000
161
  },
162
  {
163
  "epoch": 11.56231884057971,
164
- "eval_loss": 1.2533023357391357,
165
- "eval_runtime": 126.6214,
166
- "eval_samples_per_second": 20.565,
167
- "eval_steps_per_second": 2.575,
168
- "eval_wer": 0.9113657158769941,
169
  "step": 2000
170
  },
171
  {
172
  "epoch": 12.718840579710145,
173
- "grad_norm": 0.4739660620689392,
174
  "learning_rate": 0.00026474117647058823,
175
- "loss": 1.3945,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.718840579710145,
180
- "eval_loss": 1.2253831624984741,
181
- "eval_runtime": 126.6581,
182
- "eval_samples_per_second": 20.559,
183
- "eval_steps_per_second": 2.574,
184
- "eval_wer": 0.8960297569320126,
185
  "step": 2200
186
  },
187
  {
188
  "epoch": 13.87536231884058,
189
- "grad_norm": 0.4378398060798645,
190
  "learning_rate": 0.00026121176470588235,
191
- "loss": 1.3772,
192
  "step": 2400
193
  },
194
  {
195
  "epoch": 13.87536231884058,
196
- "eval_loss": 1.2075964212417603,
197
- "eval_runtime": 126.6197,
198
- "eval_samples_per_second": 20.566,
199
- "eval_steps_per_second": 2.575,
200
- "eval_wer": 0.9005450133269682,
201
  "step": 2400
202
  },
203
  {
204
  "epoch": 15.028985507246377,
205
- "grad_norm": 0.5404449105262756,
206
  "learning_rate": 0.00025768235294117646,
207
- "loss": 1.3473,
208
  "step": 2600
209
  },
210
  {
211
  "epoch": 15.028985507246377,
212
- "eval_loss": 1.1940252780914307,
213
- "eval_runtime": 126.7107,
214
- "eval_samples_per_second": 20.551,
215
- "eval_steps_per_second": 2.573,
216
- "eval_wer": 0.8836973385845567,
217
  "step": 2600
218
  },
219
  {
220
  "epoch": 16.18550724637681,
221
- "grad_norm": 0.4465963840484619,
222
  "learning_rate": 0.0002541529411764706,
223
- "loss": 1.3281,
224
  "step": 2800
225
  },
226
  {
227
  "epoch": 16.18550724637681,
228
- "eval_loss": 1.1721361875534058,
229
- "eval_runtime": 126.7359,
230
- "eval_samples_per_second": 20.547,
231
- "eval_steps_per_second": 2.572,
232
- "eval_wer": 0.8849305804193022,
233
  "step": 2800
234
  },
235
  {
236
  "epoch": 17.342028985507245,
237
- "grad_norm": 0.36305734515190125,
238
  "learning_rate": 0.00025062352941176465,
239
- "loss": 1.3126,
240
  "step": 3000
241
  },
242
  {
243
  "epoch": 17.342028985507245,
244
- "eval_loss": 1.1684266328811646,
245
- "eval_runtime": 126.6868,
246
- "eval_samples_per_second": 20.555,
247
- "eval_steps_per_second": 2.573,
248
- "eval_wer": 0.8771929824561403,
249
  "step": 3000
250
  },
251
  {
252
  "epoch": 18.49855072463768,
253
- "grad_norm": 0.8538926243782043,
254
  "learning_rate": 0.0002470941176470588,
255
- "loss": 1.294,
256
  "step": 3200
257
  },
258
  {
259
  "epoch": 18.49855072463768,
260
- "eval_loss": 1.1741454601287842,
261
- "eval_runtime": 126.8469,
262
- "eval_samples_per_second": 20.529,
263
- "eval_steps_per_second": 2.57,
264
- "eval_wer": 0.8795799021362931,
265
  "step": 3200
266
  },
267
  {
268
  "epoch": 19.655072463768114,
269
- "grad_norm": 0.4263986051082611,
270
  "learning_rate": 0.00024356470588235294,
271
- "loss": 1.277,
272
  "step": 3400
273
  },
274
  {
275
  "epoch": 19.655072463768114,
276
- "eval_loss": 1.1416850090026855,
277
- "eval_runtime": 126.7443,
278
- "eval_samples_per_second": 20.545,
279
- "eval_steps_per_second": 2.572,
280
- "eval_wer": 0.8725384890798424,
281
  "step": 3400
282
  },
283
  {
284
  "epoch": 20.81159420289855,
285
- "grad_norm": 0.5871867537498474,
286
  "learning_rate": 0.00024003529411764703,
287
- "loss": 1.2668,
288
  "step": 3600
289
  },
290
  {
291
  "epoch": 20.81159420289855,
292
- "eval_loss": 1.13175368309021,
293
- "eval_runtime": 126.8413,
294
- "eval_samples_per_second": 20.53,
295
- "eval_steps_per_second": 2.57,
296
- "eval_wer": 0.8663722799061145,
297
  "step": 3600
298
  },
299
  {
300
  "epoch": 21.968115942028987,
301
- "grad_norm": 0.455477237701416,
302
  "learning_rate": 0.00023650588235294115,
303
- "loss": 1.2456,
304
  "step": 3800
305
  },
306
  {
307
  "epoch": 21.968115942028987,
308
- "eval_loss": 1.1195415258407593,
309
- "eval_runtime": 127.194,
310
- "eval_samples_per_second": 20.473,
311
- "eval_steps_per_second": 2.563,
312
- "eval_wer": 0.8677845407168715,
313
  "step": 3800
314
  },
315
  {
316
  "epoch": 23.121739130434783,
317
- "grad_norm": 0.3989470601081848,
318
  "learning_rate": 0.0002329764705882353,
319
- "loss": 1.2317,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 23.121739130434783,
324
- "eval_loss": 1.1132415533065796,
325
- "eval_runtime": 126.9937,
326
- "eval_samples_per_second": 20.505,
327
- "eval_steps_per_second": 2.567,
328
- "eval_wer": 0.8625532084178701,
329
  "step": 4000
330
  },
331
  {
332
  "epoch": 24.278260869565216,
333
- "grad_norm": 0.5162937045097351,
334
  "learning_rate": 0.00022944705882352938,
335
- "loss": 1.2225,
336
  "step": 4200
337
  },
338
  {
339
  "epoch": 24.278260869565216,
340
- "eval_loss": 1.1121087074279785,
341
- "eval_runtime": 126.8632,
342
- "eval_samples_per_second": 20.526,
343
- "eval_steps_per_second": 2.57,
344
- "eval_wer": 0.8715638302104467,
345
  "step": 4200
346
  },
347
  {
348
  "epoch": 25.434782608695652,
349
- "grad_norm": 0.4300783574581146,
350
  "learning_rate": 0.00022591764705882353,
351
- "loss": 1.2059,
352
  "step": 4400
353
  },
354
  {
355
  "epoch": 25.434782608695652,
356
- "eval_loss": 1.1003350019454956,
357
- "eval_runtime": 131.7283,
358
- "eval_samples_per_second": 19.768,
359
- "eval_steps_per_second": 2.475,
360
- "eval_wer": 0.8622747344551855,
361
  "step": 4400
362
  },
363
  {
364
  "epoch": 26.591304347826085,
365
- "grad_norm": 0.4021275043487549,
366
  "learning_rate": 0.00022238823529411762,
367
- "loss": 1.1991,
368
  "step": 4600
369
  },
370
  {
371
  "epoch": 26.591304347826085,
372
- "eval_loss": 1.0933383703231812,
373
- "eval_runtime": 127.0907,
374
- "eval_samples_per_second": 20.489,
375
- "eval_steps_per_second": 2.565,
376
- "eval_wer": 0.8598878147750328,
377
  "step": 4600
378
  },
379
  {
380
  "epoch": 27.747826086956522,
381
- "grad_norm": 0.8139039874076843,
382
  "learning_rate": 0.00021885882352941174,
383
- "loss": 1.1832,
384
  "step": 4800
385
  },
386
  {
387
  "epoch": 27.747826086956522,
388
- "eval_loss": 1.089185118675232,
389
- "eval_runtime": 127.3985,
390
- "eval_samples_per_second": 20.44,
391
- "eval_steps_per_second": 2.559,
392
- "eval_wer": 0.8555913593507578,
393
  "step": 4800
394
  },
395
  {
396
  "epoch": 28.904347826086955,
397
- "grad_norm": 0.42960914969444275,
398
  "learning_rate": 0.00021532941176470588,
399
- "loss": 1.1732,
400
  "step": 5000
401
  },
402
  {
403
  "epoch": 28.904347826086955,
404
- "eval_loss": 1.0722780227661133,
405
- "eval_runtime": 127.3657,
406
- "eval_samples_per_second": 20.445,
407
- "eval_steps_per_second": 2.56,
408
- "eval_wer": 0.8469387755102041,
409
  "step": 5000
410
  },
411
  {
412
  "epoch": 30.057971014492754,
413
- "grad_norm": 0.4363812804222107,
414
  "learning_rate": 0.00021179999999999997,
415
- "loss": 1.1588,
416
  "step": 5200
417
  },
418
  {
419
  "epoch": 30.057971014492754,
420
- "eval_loss": 1.0763438940048218,
421
- "eval_runtime": 127.5044,
422
- "eval_samples_per_second": 20.423,
423
- "eval_steps_per_second": 2.557,
424
- "eval_wer": 0.8478537613875959,
425
  "step": 5200
426
  },
427
  {
428
  "epoch": 31.214492753623187,
429
- "grad_norm": 0.49785545468330383,
430
  "learning_rate": 0.0002082705882352941,
431
- "loss": 1.149,
432
  "step": 5400
433
  },
434
  {
435
  "epoch": 31.214492753623187,
436
- "eval_loss": 1.0577690601348877,
437
- "eval_runtime": 127.6923,
438
- "eval_samples_per_second": 20.393,
439
  "eval_steps_per_second": 2.553,
440
- "eval_wer": 0.8409515853124876,
441
  "step": 5400
442
  },
443
  {
444
  "epoch": 32.37101449275362,
445
- "grad_norm": 0.3857053220272064,
446
  "learning_rate": 0.00020474117647058823,
447
- "loss": 1.1398,
448
  "step": 5600
449
  },
450
  {
451
  "epoch": 32.37101449275362,
452
- "eval_loss": 1.0509884357452393,
453
- "eval_runtime": 127.2975,
454
- "eval_samples_per_second": 20.456,
455
- "eval_steps_per_second": 2.561,
456
- "eval_wer": 0.8412101682778375,
457
  "step": 5600
458
  },
459
  {
460
  "epoch": 33.527536231884056,
461
- "grad_norm": 0.4151206612586975,
462
  "learning_rate": 0.00020121176470588233,
463
- "loss": 1.1297,
464
  "step": 5800
465
  },
466
  {
467
  "epoch": 33.527536231884056,
468
- "eval_loss": 1.0429149866104126,
469
- "eval_runtime": 127.4154,
470
- "eval_samples_per_second": 20.437,
471
- "eval_steps_per_second": 2.559,
472
- "eval_wer": 0.8413494052591797,
473
  "step": 5800
474
  },
475
  {
476
  "epoch": 34.68405797101449,
477
- "grad_norm": 0.46378350257873535,
478
  "learning_rate": 0.00019768235294117647,
479
- "loss": 1.117,
480
  "step": 6000
481
  },
482
  {
483
  "epoch": 34.68405797101449,
484
- "eval_loss": 1.0374654531478882,
485
- "eval_runtime": 127.3947,
486
- "eval_samples_per_second": 20.44,
487
- "eval_steps_per_second": 2.559,
488
- "eval_wer": 0.8321796554879262,
489
  "step": 6000
490
  },
491
  {
492
  "epoch": 35.84057971014493,
493
- "grad_norm": 0.4410320222377777,
494
  "learning_rate": 0.00019415294117647056,
495
- "loss": 1.1119,
496
  "step": 6200
497
  },
498
  {
499
  "epoch": 35.84057971014493,
500
- "eval_loss": 1.0412153005599976,
501
- "eval_runtime": 127.3041,
502
- "eval_samples_per_second": 20.455,
503
- "eval_steps_per_second": 2.561,
504
- "eval_wer": 0.8316227075625572,
505
  "step": 6200
506
  },
507
  {
508
  "epoch": 36.99710144927536,
509
- "grad_norm": 1.033341884613037,
510
  "learning_rate": 0.00019062352941176468,
511
- "loss": 1.0986,
512
  "step": 6400
513
  },
514
  {
515
  "epoch": 36.99710144927536,
516
- "eval_loss": 1.029122233390808,
517
- "eval_runtime": 127.5003,
518
- "eval_samples_per_second": 20.423,
519
- "eval_steps_per_second": 2.557,
520
- "eval_wer": 0.8341687552213868,
521
  "step": 6400
522
  },
523
  {
524
  "epoch": 38.15072463768116,
525
- "grad_norm": 0.44958433508872986,
526
  "learning_rate": 0.00018709411764705882,
527
- "loss": 1.0858,
528
  "step": 6600
529
  },
530
  {
531
  "epoch": 38.15072463768116,
532
- "eval_loss": 1.0151913166046143,
533
- "eval_runtime": 127.4524,
534
- "eval_samples_per_second": 20.431,
535
- "eval_steps_per_second": 2.558,
536
- "eval_wer": 0.824103910570076,
537
  "step": 6600
538
  },
539
  {
540
  "epoch": 39.30724637681159,
541
- "grad_norm": 0.42733389139175415,
542
  "learning_rate": 0.00018356470588235291,
543
- "loss": 1.0781,
544
  "step": 6800
545
  },
546
  {
547
  "epoch": 39.30724637681159,
548
- "eval_loss": 1.028800129890442,
549
- "eval_runtime": 127.5392,
550
- "eval_samples_per_second": 20.417,
551
- "eval_steps_per_second": 2.556,
552
- "eval_wer": 0.8245017305167681,
553
  "step": 6800
554
  },
555
  {
556
  "epoch": 40.46376811594203,
557
- "grad_norm": 0.5106310248374939,
558
  "learning_rate": 0.00018003529411764703,
559
- "loss": 1.0712,
560
  "step": 7000
561
  },
562
  {
563
  "epoch": 40.46376811594203,
564
- "eval_loss": 1.009470820426941,
565
- "eval_runtime": 127.3801,
566
- "eval_samples_per_second": 20.443,
567
- "eval_steps_per_second": 2.559,
568
- "eval_wer": 0.8191112702390898,
569
  "step": 7000
570
  },
571
  {
572
  "epoch": 41.620289855072464,
573
- "grad_norm": 0.4691919982433319,
574
  "learning_rate": 0.00017650588235294118,
575
- "loss": 1.0646,
576
  "step": 7200
577
  },
578
  {
579
  "epoch": 41.620289855072464,
580
- "eval_loss": 1.0001919269561768,
581
- "eval_runtime": 127.5273,
582
- "eval_samples_per_second": 20.419,
583
- "eval_steps_per_second": 2.556,
584
- "eval_wer": 0.8189521422604129,
585
  "step": 7200
586
  },
587
  {
588
  "epoch": 42.7768115942029,
589
- "grad_norm": 0.4837665855884552,
590
  "learning_rate": 0.00017297647058823527,
591
- "loss": 1.052,
592
  "step": 7400
593
  },
594
  {
595
  "epoch": 42.7768115942029,
596
- "eval_loss": 0.9987174868583679,
597
- "eval_runtime": 127.7719,
598
- "eval_samples_per_second": 20.38,
599
- "eval_steps_per_second": 2.551,
600
- "eval_wer": 0.8281616740263357,
601
  "step": 7400
602
  },
603
  {
604
  "epoch": 43.93333333333333,
605
- "grad_norm": 0.4914618134498596,
606
  "learning_rate": 0.00016944705882352941,
607
- "loss": 1.0422,
608
  "step": 7600
609
  },
610
  {
611
  "epoch": 43.93333333333333,
612
- "eval_loss": 0.9949682950973511,
613
- "eval_runtime": 127.6961,
614
- "eval_samples_per_second": 20.392,
615
- "eval_steps_per_second": 2.553,
616
- "eval_wer": 0.8155109997215261,
617
  "step": 7600
618
  },
619
  {
620
  "epoch": 45.08695652173913,
621
- "grad_norm": 0.4811262786388397,
622
  "learning_rate": 0.0001659176470588235,
623
- "loss": 1.0345,
624
  "step": 7800
625
  },
626
  {
627
  "epoch": 45.08695652173913,
628
- "eval_loss": 0.9906212091445923,
629
- "eval_runtime": 127.616,
630
- "eval_samples_per_second": 20.405,
631
- "eval_steps_per_second": 2.555,
632
- "eval_wer": 0.8113139992839241,
633
  "step": 7800
634
  },
635
  {
636
  "epoch": 46.243478260869566,
637
- "grad_norm": 0.7449145317077637,
638
  "learning_rate": 0.00016238823529411762,
639
- "loss": 1.0209,
640
  "step": 8000
641
  },
642
  {
643
  "epoch": 46.243478260869566,
644
- "eval_loss": 0.9777077436447144,
645
- "eval_runtime": 127.811,
646
- "eval_samples_per_second": 20.374,
647
- "eval_steps_per_second": 2.551,
648
- "eval_wer": 0.812129530174643,
649
  "step": 8000
650
  },
651
  {
652
  "epoch": 47.4,
653
- "grad_norm": 0.48268821835517883,
654
  "learning_rate": 0.00015885882352941177,
655
- "loss": 1.0217,
656
  "step": 8200
657
  },
658
  {
659
  "epoch": 47.4,
660
- "eval_loss": 0.9648416638374329,
661
- "eval_runtime": 127.7579,
662
- "eval_samples_per_second": 20.382,
663
- "eval_steps_per_second": 2.552,
664
- "eval_wer": 0.8048295341528424,
665
  "step": 8200
666
  },
667
  {
668
  "epoch": 48.55652173913043,
669
- "grad_norm": 0.5607514977455139,
670
  "learning_rate": 0.00015532941176470586,
671
- "loss": 1.0067,
672
  "step": 8400
673
  },
674
  {
675
  "epoch": 48.55652173913043,
676
- "eval_loss": 0.9864305853843689,
677
- "eval_runtime": 127.8335,
678
- "eval_samples_per_second": 20.37,
679
- "eval_steps_per_second": 2.55,
680
- "eval_wer": 0.8027210884353742,
681
  "step": 8400
682
  },
683
  {
684
  "epoch": 49.71304347826087,
685
- "grad_norm": 0.6017518043518066,
686
  "learning_rate": 0.00015179999999999998,
687
- "loss": 1.0033,
688
  "step": 8600
689
  },
690
  {
691
  "epoch": 49.71304347826087,
692
- "eval_loss": 0.9633412957191467,
693
- "eval_runtime": 127.7324,
694
- "eval_samples_per_second": 20.386,
695
- "eval_steps_per_second": 2.552,
696
- "eval_wer": 0.7977284481043879,
697
  "step": 8600
698
  },
699
  {
700
  "epoch": 50.869565217391305,
701
- "grad_norm": 0.5103667974472046,
702
  "learning_rate": 0.0001482705882352941,
703
- "loss": 0.9925,
704
  "step": 8800
705
  },
706
  {
707
  "epoch": 50.869565217391305,
708
- "eval_loss": 0.9522212147712708,
709
- "eval_runtime": 127.8431,
710
- "eval_samples_per_second": 20.369,
711
- "eval_steps_per_second": 2.55,
712
- "eval_wer": 0.7946254525201893,
713
  "step": 8800
714
  },
715
  {
716
  "epoch": 52.0231884057971,
717
- "grad_norm": 0.5089967250823975,
718
  "learning_rate": 0.0001447411764705882,
719
  "loss": 0.9784,
720
  "step": 9000
721
  },
722
  {
723
  "epoch": 52.0231884057971,
724
- "eval_loss": 0.9520332217216492,
725
- "eval_runtime": 127.451,
726
- "eval_samples_per_second": 20.431,
727
- "eval_steps_per_second": 2.558,
728
- "eval_wer": 0.7978875760830648,
729
  "step": 9000
730
  },
731
  {
732
  "epoch": 53.17971014492753,
733
- "grad_norm": 0.4959864020347595,
734
  "learning_rate": 0.00014121176470588236,
735
- "loss": 0.9757,
736
  "step": 9200
737
  },
738
  {
739
  "epoch": 53.17971014492753,
740
- "eval_loss": 0.940946638584137,
741
- "eval_runtime": 127.57,
742
- "eval_samples_per_second": 20.412,
743
- "eval_steps_per_second": 2.555,
744
- "eval_wer": 0.78828022437045,
745
  "step": 9200
746
  },
747
  {
748
  "epoch": 54.33623188405797,
749
- "grad_norm": 0.518679678440094,
750
  "learning_rate": 0.00013768235294117645,
751
- "loss": 0.9648,
752
  "step": 9400
753
  },
754
  {
755
  "epoch": 54.33623188405797,
756
- "eval_loss": 0.9465099573135376,
757
- "eval_runtime": 127.6829,
758
- "eval_samples_per_second": 20.394,
759
- "eval_steps_per_second": 2.553,
760
- "eval_wer": 0.788578589330469,
761
  "step": 9400
762
  },
763
  {
764
  "epoch": 55.492753623188406,
765
- "grad_norm": 0.7649258375167847,
766
  "learning_rate": 0.00013415294117647057,
767
- "loss": 0.9553,
768
  "step": 9600
769
  },
770
  {
771
  "epoch": 55.492753623188406,
772
- "eval_loss": 0.9416138529777527,
773
- "eval_runtime": 127.6278,
774
- "eval_samples_per_second": 20.403,
775
- "eval_steps_per_second": 2.554,
776
- "eval_wer": 0.7877829494370848,
777
  "step": 9600
778
  },
779
  {
780
  "epoch": 56.64927536231884,
781
- "grad_norm": 0.4904441237449646,
782
  "learning_rate": 0.00013062352941176468,
783
- "loss": 0.955,
784
  "step": 9800
785
  },
786
  {
787
  "epoch": 56.64927536231884,
788
- "eval_loss": 0.9271659255027771,
789
- "eval_runtime": 127.4634,
790
- "eval_samples_per_second": 20.429,
791
- "eval_steps_per_second": 2.558,
792
- "eval_wer": 0.7855153757409397,
793
  "step": 9800
794
  },
795
  {
796
  "epoch": 57.80579710144927,
797
- "grad_norm": 0.46599653363227844,
798
  "learning_rate": 0.00012709411764705883,
799
- "loss": 0.9442,
800
  "step": 10000
801
  },
802
  {
803
  "epoch": 57.80579710144927,
804
- "eval_loss": 0.9268618822097778,
805
- "eval_runtime": 127.2806,
806
- "eval_samples_per_second": 20.459,
807
- "eval_steps_per_second": 2.561,
808
- "eval_wer": 0.7777976687751124,
809
  "step": 10000
810
  },
811
  {
812
  "epoch": 58.96231884057971,
813
- "grad_norm": 0.5592873096466064,
814
  "learning_rate": 0.00012356470588235292,
815
- "loss": 0.9346,
816
  "step": 10200
817
  },
818
  {
819
  "epoch": 58.96231884057971,
820
- "eval_loss": 0.9154264330863953,
821
- "eval_runtime": 127.5803,
822
- "eval_samples_per_second": 20.411,
823
- "eval_steps_per_second": 2.555,
824
- "eval_wer": 0.7685085730198512,
825
  "step": 10200
826
  },
827
  {
828
  "epoch": 60.11594202898551,
829
- "grad_norm": 0.4597800374031067,
830
  "learning_rate": 0.00012003529411764705,
831
- "loss": 0.9271,
832
  "step": 10400
833
  },
834
  {
835
  "epoch": 60.11594202898551,
836
- "eval_loss": 0.9182903170585632,
837
- "eval_runtime": 127.6408,
838
- "eval_samples_per_second": 20.401,
839
- "eval_steps_per_second": 2.554,
840
- "eval_wer": 0.7704777817559773,
841
  "step": 10400
842
  },
843
  {
844
  "epoch": 61.27246376811594,
845
- "grad_norm": 0.5329666137695312,
846
  "learning_rate": 0.00011650588235294116,
847
- "loss": 0.9165,
848
  "step": 10600
849
  },
850
  {
851
  "epoch": 61.27246376811594,
852
- "eval_loss": 0.9233406186103821,
853
- "eval_runtime": 128.0081,
854
- "eval_samples_per_second": 20.342,
855
- "eval_steps_per_second": 2.547,
856
- "eval_wer": 0.7725862274734455,
857
  "step": 10600
858
  },
859
  {
860
  "epoch": 62.428985507246374,
861
- "grad_norm": 0.546533465385437,
862
  "learning_rate": 0.00011297647058823529,
863
- "loss": 0.9165,
864
  "step": 10800
865
  },
866
  {
867
  "epoch": 62.428985507246374,
868
- "eval_loss": 0.9042327404022217,
869
- "eval_runtime": 127.7124,
870
- "eval_samples_per_second": 20.39,
871
- "eval_steps_per_second": 2.553,
872
- "eval_wer": 0.7694235588972431,
873
  "step": 10800
874
  },
875
  {
876
  "epoch": 63.585507246376814,
877
- "grad_norm": 0.49292466044425964,
878
  "learning_rate": 0.0001094470588235294,
879
- "loss": 0.9088,
880
  "step": 11000
881
  },
882
  {
883
  "epoch": 63.585507246376814,
884
- "eval_loss": 0.9099429845809937,
885
- "eval_runtime": 127.459,
886
- "eval_samples_per_second": 20.43,
887
- "eval_steps_per_second": 2.558,
888
- "eval_wer": 0.7645900465449338,
889
  "step": 11000
890
  },
891
  {
892
  "epoch": 64.74202898550725,
893
- "grad_norm": 0.7860192656517029,
894
  "learning_rate": 0.00010591764705882352,
895
- "loss": 0.9018,
896
  "step": 11200
897
  },
898
  {
899
  "epoch": 64.74202898550725,
900
- "eval_loss": 0.8967615365982056,
901
- "eval_runtime": 127.4599,
902
- "eval_samples_per_second": 20.43,
903
- "eval_steps_per_second": 2.558,
904
- "eval_wer": 0.7602140271313204,
905
  "step": 11200
906
  },
907
  {
908
  "epoch": 65.89855072463769,
909
- "grad_norm": 0.4933035373687744,
910
  "learning_rate": 0.00010238823529411763,
911
- "loss": 0.8985,
912
  "step": 11400
913
  },
914
  {
915
  "epoch": 65.89855072463769,
916
- "eval_loss": 0.8918899297714233,
917
- "eval_runtime": 127.4862,
918
- "eval_samples_per_second": 20.426,
919
- "eval_steps_per_second": 2.557,
920
- "eval_wer": 0.7606118470780124,
921
  "step": 11400
922
  },
923
  {
924
  "epoch": 67.05217391304348,
925
- "grad_norm": 0.5119895935058594,
926
  "learning_rate": 9.885882352941176e-05,
927
- "loss": 0.8851,
928
  "step": 11600
929
  },
930
  {
931
  "epoch": 67.05217391304348,
932
- "eval_loss": 0.8957463502883911,
933
- "eval_runtime": 127.6383,
934
- "eval_samples_per_second": 20.401,
935
- "eval_steps_per_second": 2.554,
936
- "eval_wer": 0.75442574690695,
937
  "step": 11600
938
  },
939
  {
940
  "epoch": 68.20869565217392,
941
- "grad_norm": 0.61966872215271,
942
  "learning_rate": 9.532941176470588e-05,
943
- "loss": 0.8834,
944
  "step": 11800
945
  },
946
  {
947
  "epoch": 68.20869565217392,
948
- "eval_loss": 0.8949310183525085,
949
- "eval_runtime": 127.6674,
950
- "eval_samples_per_second": 20.397,
951
- "eval_steps_per_second": 2.554,
952
- "eval_wer": 0.7545848748856268,
953
  "step": 11800
954
  },
955
  {
956
  "epoch": 69.36521739130434,
957
- "grad_norm": 0.5450541973114014,
958
  "learning_rate": 9.18e-05,
959
- "loss": 0.8779,
960
  "step": 12000
961
  },
962
  {
963
  "epoch": 69.36521739130434,
964
- "eval_loss": 0.8951545357704163,
965
- "eval_runtime": 127.7527,
966
- "eval_samples_per_second": 20.383,
967
- "eval_steps_per_second": 2.552,
968
- "eval_wer": 0.7552213868003341,
969
  "step": 12000
970
  },
971
  {
972
  "epoch": 70.52173913043478,
973
- "grad_norm": 0.46215394139289856,
974
  "learning_rate": 8.82705882352941e-05,
975
- "loss": 0.8708,
976
  "step": 12200
977
  },
978
  {
979
  "epoch": 70.52173913043478,
980
- "eval_loss": 0.8882645964622498,
981
- "eval_runtime": 127.6812,
982
- "eval_samples_per_second": 20.395,
983
- "eval_steps_per_second": 2.553,
984
- "eval_wer": 0.75265544814417,
985
  "step": 12200
986
  },
987
  {
988
  "epoch": 71.67826086956522,
989
- "grad_norm": 0.5442056059837341,
990
  "learning_rate": 8.474117647058823e-05,
991
- "loss": 0.8669,
992
  "step": 12400
993
  },
994
  {
995
  "epoch": 71.67826086956522,
996
- "eval_loss": 0.8810063600540161,
997
- "eval_runtime": 127.6211,
998
- "eval_samples_per_second": 20.404,
999
- "eval_steps_per_second": 2.554,
1000
- "eval_wer": 0.7488761586505948,
1001
  "step": 12400
1002
  },
1003
  {
1004
  "epoch": 72.83478260869565,
1005
- "grad_norm": 0.540812611579895,
1006
  "learning_rate": 8.121176470588235e-05,
1007
- "loss": 0.8616,
1008
  "step": 12600
1009
  },
1010
  {
1011
  "epoch": 72.83478260869565,
1012
- "eval_loss": 0.8785393834114075,
1013
- "eval_runtime": 127.672,
1014
- "eval_samples_per_second": 20.396,
1015
- "eval_steps_per_second": 2.553,
1016
- "eval_wer": 0.7446393762183235,
1017
  "step": 12600
1018
  },
1019
  {
1020
  "epoch": 73.99130434782609,
1021
- "grad_norm": 0.7264253497123718,
1022
  "learning_rate": 7.768235294117647e-05,
1023
- "loss": 0.8572,
1024
  "step": 12800
1025
  },
1026
  {
1027
  "epoch": 73.99130434782609,
1028
- "eval_loss": 0.8806383013725281,
1029
- "eval_runtime": 127.5992,
1030
- "eval_samples_per_second": 20.408,
1031
- "eval_steps_per_second": 2.555,
1032
- "eval_wer": 0.7415761626287942,
1033
  "step": 12800
1034
  },
1035
  {
1036
  "epoch": 75.14492753623189,
1037
- "grad_norm": 0.5088544487953186,
1038
  "learning_rate": 7.415294117647058e-05,
1039
- "loss": 0.8536,
1040
  "step": 13000
1041
  },
1042
  {
1043
  "epoch": 75.14492753623189,
1044
- "eval_loss": 0.8745167851448059,
1045
- "eval_runtime": 127.7753,
1046
- "eval_samples_per_second": 20.38,
1047
- "eval_steps_per_second": 2.551,
1048
- "eval_wer": 0.7391295699566376,
1049
  "step": 13000
1050
  },
1051
  {
1052
  "epoch": 76.30144927536232,
1053
- "grad_norm": 0.5131168961524963,
1054
  "learning_rate": 7.06235294117647e-05,
1055
- "loss": 0.8453,
1056
  "step": 13200
1057
  },
1058
  {
1059
  "epoch": 76.30144927536232,
1060
- "eval_loss": 0.8805530071258545,
1061
- "eval_runtime": 127.9225,
1062
- "eval_samples_per_second": 20.356,
1063
- "eval_steps_per_second": 2.548,
1064
- "eval_wer": 0.7384731670445956,
1065
  "step": 13200
1066
  },
1067
  {
1068
  "epoch": 77.45797101449276,
1069
- "grad_norm": 0.6087790131568909,
1070
  "learning_rate": 6.709411764705882e-05,
1071
  "loss": 0.8435,
1072
  "step": 13400
1073
  },
1074
  {
1075
  "epoch": 77.45797101449276,
1076
- "eval_loss": 0.8695724606513977,
1077
- "eval_runtime": 132.7677,
1078
- "eval_samples_per_second": 19.613,
1079
- "eval_steps_per_second": 2.455,
1080
- "eval_wer": 0.7399053188526873,
1081
  "step": 13400
1082
  },
1083
  {
1084
  "epoch": 78.61449275362318,
1085
- "grad_norm": 0.49738621711730957,
1086
  "learning_rate": 6.356470588235294e-05,
1087
- "loss": 0.8392,
1088
  "step": 13600
1089
  },
1090
  {
1091
  "epoch": 78.61449275362318,
1092
- "eval_loss": 0.8718934059143066,
1093
- "eval_runtime": 128.0532,
1094
- "eval_samples_per_second": 20.335,
1095
- "eval_steps_per_second": 2.546,
1096
- "eval_wer": 0.7387317500099455,
1097
  "step": 13600
1098
  },
1099
  {
1100
  "epoch": 79.77101449275362,
1101
- "grad_norm": 0.5539494156837463,
1102
  "learning_rate": 6.003529411764706e-05,
1103
- "loss": 0.8361,
1104
  "step": 13800
1105
  },
1106
  {
1107
  "epoch": 79.77101449275362,
1108
- "eval_loss": 0.8683921694755554,
1109
- "eval_runtime": 128.202,
1110
- "eval_samples_per_second": 20.312,
1111
- "eval_steps_per_second": 2.543,
1112
- "eval_wer": 0.7372598162071846,
1113
  "step": 13800
1114
  },
1115
  {
1116
  "epoch": 80.92753623188406,
1117
- "grad_norm": 0.5560426712036133,
1118
  "learning_rate": 5.650588235294117e-05,
1119
- "loss": 0.8339,
1120
  "step": 14000
1121
  },
1122
  {
1123
  "epoch": 80.92753623188406,
1124
- "eval_loss": 0.8655583262443542,
1125
- "eval_runtime": 128.1625,
1126
- "eval_samples_per_second": 20.318,
1127
- "eval_steps_per_second": 2.544,
1128
- "eval_wer": 0.7349325695190357,
1129
  "step": 14000
1130
  },
1131
  {
1132
  "epoch": 82.08115942028985,
1133
- "grad_norm": 0.5128791928291321,
1134
  "learning_rate": 5.299411764705882e-05,
1135
- "loss": 0.8264,
1136
  "step": 14200
1137
  },
1138
  {
1139
  "epoch": 82.08115942028985,
1140
- "eval_loss": 0.8635972738265991,
1141
- "eval_runtime": 128.0931,
1142
- "eval_samples_per_second": 20.329,
1143
- "eval_steps_per_second": 2.545,
1144
- "eval_wer": 0.7316704459561603,
1145
  "step": 14200
1146
  },
1147
  {
1148
  "epoch": 83.23768115942029,
1149
- "grad_norm": 0.7810338139533997,
1150
  "learning_rate": 4.946470588235294e-05,
1151
- "loss": 0.8184,
1152
  "step": 14400
1153
  },
1154
  {
1155
  "epoch": 83.23768115942029,
1156
- "eval_loss": 0.8625103831291199,
1157
- "eval_runtime": 128.3971,
1158
- "eval_samples_per_second": 20.281,
1159
- "eval_steps_per_second": 2.539,
1160
- "eval_wer": 0.732943469785575,
1161
  "step": 14400
1162
  },
1163
  {
1164
  "epoch": 84.39420289855073,
1165
- "grad_norm": 0.5399278998374939,
1166
  "learning_rate": 4.593529411764705e-05,
1167
- "loss": 0.8246,
1168
  "step": 14600
1169
  },
1170
  {
1171
  "epoch": 84.39420289855073,
1172
- "eval_loss": 0.8625257611274719,
1173
- "eval_runtime": 128.2959,
1174
- "eval_samples_per_second": 20.297,
1175
- "eval_steps_per_second": 2.541,
1176
- "eval_wer": 0.7340971476309822,
1177
  "step": 14600
1178
  },
1179
  {
1180
  "epoch": 85.55072463768116,
1181
- "grad_norm": 0.4938839077949524,
1182
  "learning_rate": 4.240588235294118e-05,
1183
  "loss": 0.8176,
1184
  "step": 14800
1185
  },
1186
  {
1187
  "epoch": 85.55072463768116,
1188
- "eval_loss": 0.8633288741111755,
1189
- "eval_runtime": 128.0601,
1190
- "eval_samples_per_second": 20.334,
1191
- "eval_steps_per_second": 2.546,
1192
- "eval_wer": 0.732088156900187,
1193
  "step": 14800
1194
  },
1195
  {
1196
  "epoch": 86.7072463768116,
1197
- "grad_norm": 0.5640541911125183,
1198
  "learning_rate": 3.887647058823529e-05,
1199
- "loss": 0.8167,
1200
  "step": 15000
1201
  },
1202
  {
1203
  "epoch": 86.7072463768116,
1204
- "eval_loss": 0.8610928654670715,
1205
- "eval_runtime": 128.3971,
1206
- "eval_samples_per_second": 20.281,
1207
- "eval_steps_per_second": 2.539,
1208
- "eval_wer": 0.7309344790547798,
1209
  "step": 15000
1210
  },
1211
  {
1212
  "epoch": 87.86376811594202,
1213
- "grad_norm": 0.5315191149711609,
1214
  "learning_rate": 3.534705882352941e-05,
1215
- "loss": 0.8123,
1216
  "step": 15200
1217
  },
1218
  {
1219
  "epoch": 87.86376811594202,
1220
- "eval_loss": 0.8582242131233215,
1221
- "eval_runtime": 128.0796,
1222
- "eval_samples_per_second": 20.331,
1223
- "eval_steps_per_second": 2.545,
1224
- "eval_wer": 0.7285873413692963,
1225
  "step": 15200
1226
  },
1227
  {
1228
  "epoch": 89.01739130434783,
1229
- "grad_norm": 0.5748764276504517,
1230
  "learning_rate": 3.1817647058823525e-05,
1231
- "loss": 0.8045,
1232
  "step": 15400
1233
  },
1234
  {
1235
  "epoch": 89.01739130434783,
1236
- "eval_loss": 0.8577102422714233,
1237
- "eval_runtime": 128.1955,
1238
- "eval_samples_per_second": 20.313,
1239
- "eval_steps_per_second": 2.543,
1240
- "eval_wer": 0.7275331185105621,
1241
  "step": 15400
1242
  },
1243
  {
1244
  "epoch": 90.17391304347827,
1245
- "grad_norm": 0.5497247576713562,
1246
  "learning_rate": 2.8288235294117643e-05,
1247
- "loss": 0.8121,
1248
  "step": 15600
1249
  },
1250
  {
1251
  "epoch": 90.17391304347827,
1252
- "eval_loss": 0.8565927147865295,
1253
- "eval_runtime": 128.1806,
1254
- "eval_samples_per_second": 20.315,
1255
- "eval_steps_per_second": 2.543,
1256
- "eval_wer": 0.7265982416358356,
1257
  "step": 15600
1258
  },
1259
  {
1260
  "epoch": 91.33043478260869,
1261
- "grad_norm": 0.5754753947257996,
1262
  "learning_rate": 2.475882352941176e-05,
1263
- "loss": 0.8061,
1264
  "step": 15800
1265
  },
1266
  {
1267
  "epoch": 91.33043478260869,
1268
- "eval_loss": 0.8549688458442688,
1269
- "eval_runtime": 128.2142,
1270
- "eval_samples_per_second": 20.31,
1271
- "eval_steps_per_second": 2.543,
1272
- "eval_wer": 0.7265783506385011,
1273
  "step": 15800
1274
  },
1275
  {
1276
  "epoch": 92.48695652173913,
1277
- "grad_norm": 0.7763922810554504,
1278
  "learning_rate": 2.122941176470588e-05,
1279
- "loss": 0.7999,
1280
  "step": 16000
1281
  },
1282
  {
1283
  "epoch": 92.48695652173913,
1284
- "eval_loss": 0.8554069399833679,
1285
- "eval_runtime": 128.218,
1286
- "eval_samples_per_second": 20.309,
1287
- "eval_steps_per_second": 2.543,
1288
- "eval_wer": 0.725723037753113,
1289
  "step": 16000
1290
  },
1291
  {
1292
  "epoch": 93.64347826086957,
1293
- "grad_norm": 0.5108122825622559,
1294
  "learning_rate": 1.7699999999999997e-05,
1295
- "loss": 0.7977,
1296
  "step": 16200
1297
  },
1298
  {
1299
  "epoch": 93.64347826086957,
1300
- "eval_loss": 0.8529220819473267,
1301
- "eval_runtime": 128.0979,
1302
- "eval_samples_per_second": 20.328,
1303
- "eval_steps_per_second": 2.545,
1304
- "eval_wer": 0.7248876158650595,
1305
  "step": 16200
1306
  },
1307
  {
1308
  "epoch": 94.8,
1309
- "grad_norm": 0.501833975315094,
1310
  "learning_rate": 1.4188235294117647e-05,
1311
- "loss": 0.7999,
1312
  "step": 16400
1313
  },
1314
  {
1315
  "epoch": 94.8,
1316
- "eval_loss": 0.8523918390274048,
1317
- "eval_runtime": 128.3137,
1318
- "eval_samples_per_second": 20.294,
1319
- "eval_steps_per_second": 2.541,
1320
- "eval_wer": 0.7234753550543024,
1321
  "step": 16400
1322
  },
1323
  {
1324
  "epoch": 95.95652173913044,
1325
- "grad_norm": 0.6044087409973145,
1326
  "learning_rate": 1.0658823529411765e-05,
1327
- "loss": 0.798,
1328
  "step": 16600
1329
  },
1330
  {
1331
  "epoch": 95.95652173913044,
1332
- "eval_loss": 0.852449893951416,
1333
- "eval_runtime": 129.1001,
1334
- "eval_samples_per_second": 20.17,
1335
- "eval_steps_per_second": 2.525,
1336
- "eval_wer": 0.7250865258384055,
1337
  "step": 16600
1338
  },
1339
  {
1340
  "epoch": 97.11014492753623,
1341
- "grad_norm": 0.5589261054992676,
1342
  "learning_rate": 7.1294117647058815e-06,
1343
- "loss": 0.7932,
1344
  "step": 16800
1345
  },
1346
  {
1347
  "epoch": 97.11014492753623,
1348
- "eval_loss": 0.8512468338012695,
1349
- "eval_runtime": 128.4107,
1350
- "eval_samples_per_second": 20.279,
1351
- "eval_steps_per_second": 2.539,
1352
- "eval_wer": 0.7236941560249831,
1353
  "step": 16800
1354
  },
1355
  {
1356
  "epoch": 98.26666666666667,
1357
- "grad_norm": 0.4919562339782715,
1358
  "learning_rate": 3.6e-06,
1359
- "loss": 0.793,
1360
  "step": 17000
1361
  },
1362
  {
1363
  "epoch": 98.26666666666667,
1364
- "eval_loss": 0.8516792058944702,
1365
- "eval_runtime": 128.4775,
1366
- "eval_samples_per_second": 20.268,
1367
- "eval_steps_per_second": 2.537,
1368
- "eval_wer": 0.7228985161315988,
1369
  "step": 17000
1370
  },
1371
  {
1372
  "epoch": 99.4231884057971,
1373
- "grad_norm": 0.5763407945632935,
1374
  "learning_rate": 7.058823529411765e-08,
1375
- "loss": 0.7989,
1376
  "step": 17200
1377
  },
1378
  {
1379
  "epoch": 99.4231884057971,
1380
- "eval_loss": 0.8514899611473083,
1381
- "eval_runtime": 128.5442,
1382
- "eval_samples_per_second": 20.258,
1383
- "eval_steps_per_second": 2.536,
1384
- "eval_wer": 0.7236344830329793,
1385
  "step": 17200
1386
  },
1387
  {
1388
  "epoch": 99.4231884057971,
1389
  "step": 17200,
1390
  "total_flos": 3.3406327721118188e+19,
1391
- "train_loss": 1.118092892668968,
1392
- "train_runtime": 45286.7863,
1393
- "train_samples_per_second": 6.092,
1394
- "train_steps_per_second": 0.38
1395
  }
1396
  ],
1397
  "logging_steps": 200,
 
1
  {
2
+ "best_metric": 0.8514918088912964,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/wav2vec2-base-librispeech-model/checkpoint-17200",
4
  "epoch": 99.4231884057971,
5
  "eval_steps": 200,
6
  "global_step": 17200,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.1565217391304348,
13
+ "grad_norm": 1.2894129753112793,
14
  "learning_rate": 0.00029699999999999996,
15
  "loss": 4.7426,
16
  "step": 200
17
  },
18
  {
19
  "epoch": 1.1565217391304348,
20
+ "eval_loss": 2.8968138694763184,
21
+ "eval_runtime": 127.7821,
22
+ "eval_samples_per_second": 20.378,
23
+ "eval_steps_per_second": 2.551,
24
  "eval_wer": 1.0,
25
  "step": 200
26
  },
27
  {
28
  "epoch": 2.3130434782608695,
29
+ "grad_norm": 0.3344170153141022,
30
  "learning_rate": 0.0002965058823529411,
31
  "loss": 2.7493,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.3130434782608695,
36
+ "eval_loss": 2.271225690841675,
37
+ "eval_runtime": 126.821,
38
+ "eval_samples_per_second": 20.533,
39
+ "eval_steps_per_second": 2.571,
40
+ "eval_wer": 0.998687194175916,
41
  "step": 400
42
  },
43
  {
44
  "epoch": 3.4695652173913043,
45
+ "grad_norm": 0.4589632451534271,
46
  "learning_rate": 0.0002929764705882353,
47
+ "loss": 2.0118,
48
  "step": 600
49
  },
50
  {
51
  "epoch": 3.4695652173913043,
52
+ "eval_loss": 1.6904748678207397,
53
+ "eval_runtime": 126.1402,
54
+ "eval_samples_per_second": 20.644,
55
+ "eval_steps_per_second": 2.584,
56
+ "eval_wer": 0.9768468791025182,
57
  "step": 600
58
  },
59
  {
60
  "epoch": 4.626086956521739,
61
+ "grad_norm": 0.3284680247306824,
62
  "learning_rate": 0.0002894470588235294,
63
+ "loss": 1.7815,
64
  "step": 800
65
  },
66
  {
67
  "epoch": 4.626086956521739,
68
+ "eval_loss": 1.54061758518219,
69
+ "eval_runtime": 131.8205,
70
+ "eval_samples_per_second": 19.754,
71
+ "eval_steps_per_second": 2.473,
72
+ "eval_wer": 0.9587858535226956,
73
  "step": 800
74
  },
75
  {
76
  "epoch": 5.782608695652174,
77
+ "grad_norm": 0.39838629961013794,
78
  "learning_rate": 0.0002859176470588235,
79
+ "loss": 1.667,
80
  "step": 1000
81
  },
82
  {
83
  "epoch": 5.782608695652174,
84
+ "eval_loss": 1.4410459995269775,
85
+ "eval_runtime": 127.4156,
86
+ "eval_samples_per_second": 20.437,
87
+ "eval_steps_per_second": 2.559,
88
+ "eval_wer": 0.9384771452440626,
89
  "step": 1000
90
  },
91
  {
92
  "epoch": 6.939130434782609,
93
+ "grad_norm": 0.41073665022850037,
94
  "learning_rate": 0.00028238823529411764,
95
+ "loss": 1.5898,
96
  "step": 1200
97
  },
98
  {
99
  "epoch": 6.939130434782609,
100
+ "eval_loss": 1.3798913955688477,
101
+ "eval_runtime": 127.1442,
102
+ "eval_samples_per_second": 20.481,
103
+ "eval_steps_per_second": 2.564,
104
+ "eval_wer": 0.9282332816167402,
105
  "step": 1200
106
  },
107
  {
108
  "epoch": 8.092753623188406,
109
+ "grad_norm": 0.4452091157436371,
110
  "learning_rate": 0.00027885882352941176,
111
+ "loss": 1.5366,
112
  "step": 1400
113
  },
114
  {
115
  "epoch": 8.092753623188406,
116
+ "eval_loss": 1.3415042161941528,
117
+ "eval_runtime": 126.9587,
118
+ "eval_samples_per_second": 20.511,
119
+ "eval_steps_per_second": 2.568,
120
+ "eval_wer": 0.9165174841866571,
121
  "step": 1400
122
  },
123
  {
124
  "epoch": 9.24927536231884,
125
+ "grad_norm": 0.893618643283844,
126
  "learning_rate": 0.0002753294117647059,
127
+ "loss": 1.4917,
128
  "step": 1600
129
  },
130
  {
131
  "epoch": 9.24927536231884,
132
+ "eval_loss": 1.3143993616104126,
133
+ "eval_runtime": 127.039,
134
+ "eval_samples_per_second": 20.498,
135
+ "eval_steps_per_second": 2.566,
136
+ "eval_wer": 0.9205354656482476,
137
  "step": 1600
138
  },
139
  {
140
  "epoch": 10.405797101449275,
141
+ "grad_norm": 0.3896653652191162,
142
  "learning_rate": 0.0002718,
143
+ "loss": 1.455,
144
  "step": 1800
145
  },
146
  {
147
  "epoch": 10.405797101449275,
148
+ "eval_loss": 1.2746105194091797,
149
+ "eval_runtime": 127.2229,
150
+ "eval_samples_per_second": 20.468,
151
+ "eval_steps_per_second": 2.562,
152
+ "eval_wer": 0.9067907864900346,
153
  "step": 1800
154
  },
155
  {
156
  "epoch": 11.56231884057971,
157
+ "grad_norm": 0.46033382415771484,
158
  "learning_rate": 0.00026827058823529406,
159
+ "loss": 1.4266,
160
  "step": 2000
161
  },
162
  {
163
  "epoch": 11.56231884057971,
164
+ "eval_loss": 1.2521367073059082,
165
+ "eval_runtime": 127.5795,
166
+ "eval_samples_per_second": 20.411,
167
+ "eval_steps_per_second": 2.555,
168
+ "eval_wer": 0.9102319290289215,
169
  "step": 2000
170
  },
171
  {
172
  "epoch": 12.718840579710145,
173
+ "grad_norm": 0.4809955358505249,
174
  "learning_rate": 0.00026474117647058823,
175
+ "loss": 1.3925,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.718840579710145,
180
+ "eval_loss": 1.2212536334991455,
181
+ "eval_runtime": 127.1582,
182
+ "eval_samples_per_second": 20.478,
183
+ "eval_steps_per_second": 2.564,
184
+ "eval_wer": 0.8971436527827505,
185
  "step": 2200
186
  },
187
  {
188
  "epoch": 13.87536231884058,
189
+ "grad_norm": 0.4258709251880646,
190
  "learning_rate": 0.00026121176470588235,
191
+ "loss": 1.3754,
192
  "step": 2400
193
  },
194
  {
195
  "epoch": 13.87536231884058,
196
+ "eval_loss": 1.2028323411941528,
197
+ "eval_runtime": 126.9929,
198
+ "eval_samples_per_second": 20.505,
199
+ "eval_steps_per_second": 2.567,
200
+ "eval_wer": 0.8938019652305367,
201
  "step": 2400
202
  },
203
  {
204
  "epoch": 15.028985507246377,
205
+ "grad_norm": 0.5979081392288208,
206
  "learning_rate": 0.00025768235294117646,
207
+ "loss": 1.3452,
208
  "step": 2600
209
  },
210
  {
211
  "epoch": 15.028985507246377,
212
+ "eval_loss": 1.193106770515442,
213
+ "eval_runtime": 127.1193,
214
+ "eval_samples_per_second": 20.485,
215
+ "eval_steps_per_second": 2.565,
216
+ "eval_wer": 0.8825834427338187,
217
  "step": 2600
218
  },
219
  {
220
  "epoch": 16.18550724637681,
221
+ "grad_norm": 0.415189266204834,
222
  "learning_rate": 0.0002541529411764706,
223
+ "loss": 1.3265,
224
  "step": 2800
225
  },
226
  {
227
  "epoch": 16.18550724637681,
228
+ "eval_loss": 1.168208360671997,
229
+ "eval_runtime": 127.0071,
230
+ "eval_samples_per_second": 20.503,
231
+ "eval_steps_per_second": 2.567,
232
+ "eval_wer": 0.8860444762700401,
233
  "step": 2800
234
  },
235
  {
236
  "epoch": 17.342028985507245,
237
+ "grad_norm": 0.3661479651927948,
238
  "learning_rate": 0.00025062352941176465,
239
+ "loss": 1.3106,
240
  "step": 3000
241
  },
242
  {
243
  "epoch": 17.342028985507245,
244
+ "eval_loss": 1.1645121574401855,
245
+ "eval_runtime": 127.0406,
246
+ "eval_samples_per_second": 20.497,
247
+ "eval_steps_per_second": 2.566,
248
+ "eval_wer": 0.8752038827226797,
249
  "step": 3000
250
  },
251
  {
252
  "epoch": 18.49855072463768,
253
+ "grad_norm": 0.831349790096283,
254
  "learning_rate": 0.0002470941176470588,
255
+ "loss": 1.2917,
256
  "step": 3200
257
  },
258
  {
259
  "epoch": 18.49855072463768,
260
+ "eval_loss": 1.1686357259750366,
261
+ "eval_runtime": 127.3366,
262
+ "eval_samples_per_second": 20.45,
263
+ "eval_steps_per_second": 2.56,
264
+ "eval_wer": 0.8779886223495246,
265
  "step": 3200
266
  },
267
  {
268
  "epoch": 19.655072463768114,
269
+ "grad_norm": 0.4396457076072693,
270
  "learning_rate": 0.00024356470588235294,
271
+ "loss": 1.2745,
272
  "step": 3400
273
  },
274
  {
275
  "epoch": 19.655072463768114,
276
+ "eval_loss": 1.1385252475738525,
277
+ "eval_runtime": 127.3458,
278
+ "eval_samples_per_second": 20.448,
279
+ "eval_steps_per_second": 2.56,
280
+ "eval_wer": 0.8670485738154912,
281
  "step": 3400
282
  },
283
  {
284
  "epoch": 20.81159420289855,
285
+ "grad_norm": 0.5819875597953796,
286
  "learning_rate": 0.00024003529411764703,
287
+ "loss": 1.2639,
288
  "step": 3600
289
  },
290
  {
291
  "epoch": 20.81159420289855,
292
+ "eval_loss": 1.1300982236862183,
293
+ "eval_runtime": 127.3185,
294
+ "eval_samples_per_second": 20.453,
295
+ "eval_steps_per_second": 2.561,
296
+ "eval_wer": 0.8666109718741297,
297
  "step": 3600
298
  },
299
  {
300
  "epoch": 21.968115942028987,
301
+ "grad_norm": 0.4509641230106354,
302
  "learning_rate": 0.00023650588235294115,
303
+ "loss": 1.2432,
304
  "step": 3800
305
  },
306
  {
307
  "epoch": 21.968115942028987,
308
+ "eval_loss": 1.1173290014266968,
309
+ "eval_runtime": 127.5924,
310
+ "eval_samples_per_second": 20.409,
311
+ "eval_steps_per_second": 2.555,
312
+ "eval_wer": 0.8669690098261527,
313
  "step": 3800
314
  },
315
  {
316
  "epoch": 23.121739130434783,
317
+ "grad_norm": 0.3901135325431824,
318
  "learning_rate": 0.0002329764705882353,
319
+ "loss": 1.2294,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 23.121739130434783,
324
+ "eval_loss": 1.1098414659500122,
325
+ "eval_runtime": 127.6057,
326
+ "eval_samples_per_second": 20.407,
327
+ "eval_steps_per_second": 2.555,
328
+ "eval_wer": 0.8619564784978319,
329
  "step": 4000
330
  },
331
  {
332
  "epoch": 24.278260869565216,
333
+ "grad_norm": 0.5170231461524963,
334
  "learning_rate": 0.00022944705882352938,
335
+ "loss": 1.2203,
336
  "step": 4200
337
  },
338
  {
339
  "epoch": 24.278260869565216,
340
+ "eval_loss": 1.1076903343200684,
341
+ "eval_runtime": 127.7278,
342
+ "eval_samples_per_second": 20.387,
343
+ "eval_steps_per_second": 2.552,
344
+ "eval_wer": 0.8710864462744162,
345
  "step": 4200
346
  },
347
  {
348
  "epoch": 25.434782608695652,
349
+ "grad_norm": 0.43240106105804443,
350
  "learning_rate": 0.00022591764705882353,
351
+ "loss": 1.2037,
352
  "step": 4400
353
  },
354
  {
355
  "epoch": 25.434782608695652,
356
+ "eval_loss": 1.096359133720398,
357
+ "eval_runtime": 127.673,
358
+ "eval_samples_per_second": 20.396,
359
+ "eval_steps_per_second": 2.553,
360
+ "eval_wer": 0.8625929904125393,
361
  "step": 4400
362
  },
363
  {
364
  "epoch": 26.591304347826085,
365
+ "grad_norm": 0.40014615654945374,
366
  "learning_rate": 0.00022238823529411762,
367
+ "loss": 1.1965,
368
  "step": 4600
369
  },
370
  {
371
  "epoch": 26.591304347826085,
372
+ "eval_loss": 1.091011881828308,
373
+ "eval_runtime": 127.6577,
374
+ "eval_samples_per_second": 20.398,
375
+ "eval_steps_per_second": 2.554,
376
+ "eval_wer": 0.8580976250149183,
377
  "step": 4600
378
  },
379
  {
380
  "epoch": 27.747826086956522,
381
+ "grad_norm": 0.8588981628417969,
382
  "learning_rate": 0.00021885882352941174,
383
+ "loss": 1.181,
384
  "step": 4800
385
  },
386
  {
387
  "epoch": 27.747826086956522,
388
+ "eval_loss": 1.084170937538147,
389
+ "eval_runtime": 127.8024,
390
+ "eval_samples_per_second": 20.375,
391
+ "eval_steps_per_second": 2.551,
392
+ "eval_wer": 0.8532641126626089,
393
  "step": 4800
394
  },
395
  {
396
  "epoch": 28.904347826086955,
397
+ "grad_norm": 0.4440598785877228,
398
  "learning_rate": 0.00021532941176470588,
399
+ "loss": 1.1711,
400
  "step": 5000
401
  },
402
  {
403
  "epoch": 28.904347826086955,
404
+ "eval_loss": 1.0691804885864258,
405
+ "eval_runtime": 127.5978,
406
+ "eval_samples_per_second": 20.408,
407
+ "eval_steps_per_second": 2.555,
408
+ "eval_wer": 0.8465011735688427,
409
  "step": 5000
410
  },
411
  {
412
  "epoch": 30.057971014492754,
413
+ "grad_norm": 0.435140997171402,
414
  "learning_rate": 0.00021179999999999997,
415
+ "loss": 1.1573,
416
  "step": 5200
417
  },
418
  {
419
  "epoch": 30.057971014492754,
420
+ "eval_loss": 1.0723620653152466,
421
+ "eval_runtime": 133.3452,
422
+ "eval_samples_per_second": 19.528,
423
+ "eval_steps_per_second": 2.445,
424
+ "eval_wer": 0.8464017185821697,
425
  "step": 5200
426
  },
427
  {
428
  "epoch": 31.214492753623187,
429
+ "grad_norm": 0.5071683526039124,
430
  "learning_rate": 0.0002082705882352941,
431
+ "loss": 1.1472,
432
  "step": 5400
433
  },
434
  {
435
  "epoch": 31.214492753623187,
436
+ "eval_loss": 1.0529308319091797,
437
+ "eval_runtime": 127.6762,
438
+ "eval_samples_per_second": 20.395,
439
  "eval_steps_per_second": 2.553,
440
+ "eval_wer": 0.8404145283844532,
441
  "step": 5400
442
  },
443
  {
444
  "epoch": 32.37101449275362,
445
+ "grad_norm": 0.381583571434021,
446
  "learning_rate": 0.00020474117647058823,
447
+ "loss": 1.1375,
448
  "step": 5600
449
  },
450
  {
451
  "epoch": 32.37101449275362,
452
+ "eval_loss": 1.0505813360214233,
453
+ "eval_runtime": 127.9023,
454
+ "eval_samples_per_second": 20.359,
455
+ "eval_steps_per_second": 2.549,
456
+ "eval_wer": 0.8402554004057764,
457
  "step": 5600
458
  },
459
  {
460
  "epoch": 33.527536231884056,
461
+ "grad_norm": 0.4180471897125244,
462
  "learning_rate": 0.00020121176470588233,
463
+ "loss": 1.1276,
464
  "step": 5800
465
  },
466
  {
467
  "epoch": 33.527536231884056,
468
+ "eval_loss": 1.0432393550872803,
469
+ "eval_runtime": 127.7286,
470
+ "eval_samples_per_second": 20.387,
471
+ "eval_steps_per_second": 2.552,
472
+ "eval_wer": 0.839817798464415,
473
  "step": 5800
474
  },
475
  {
476
  "epoch": 34.68405797101449,
477
+ "grad_norm": 0.4830577075481415,
478
  "learning_rate": 0.00019768235294117647,
479
+ "loss": 1.1149,
480
  "step": 6000
481
  },
482
  {
483
  "epoch": 34.68405797101449,
484
+ "eval_loss": 1.0371085405349731,
485
+ "eval_runtime": 127.9927,
486
+ "eval_samples_per_second": 20.345,
487
+ "eval_steps_per_second": 2.547,
488
+ "eval_wer": 0.8330150773759797,
489
  "step": 6000
490
  },
491
  {
492
  "epoch": 35.84057971014493,
493
+ "grad_norm": 0.4170786142349243,
494
  "learning_rate": 0.00019415294117647056,
495
+ "loss": 1.1099,
496
  "step": 6200
497
  },
498
  {
499
  "epoch": 35.84057971014493,
500
+ "eval_loss": 1.0371705293655396,
501
+ "eval_runtime": 128.0525,
502
+ "eval_samples_per_second": 20.335,
503
+ "eval_steps_per_second": 2.546,
504
+ "eval_wer": 0.8340891912320484,
505
  "step": 6200
506
  },
507
  {
508
  "epoch": 36.99710144927536,
509
+ "grad_norm": 1.0729575157165527,
510
  "learning_rate": 0.00019062352941176468,
511
+ "loss": 1.0959,
512
  "step": 6400
513
  },
514
  {
515
  "epoch": 36.99710144927536,
516
+ "eval_loss": 1.0295618772506714,
517
+ "eval_runtime": 128.0568,
518
+ "eval_samples_per_second": 20.335,
519
+ "eval_steps_per_second": 2.546,
520
+ "eval_wer": 0.8369534948482317,
521
  "step": 6400
522
  },
523
  {
524
  "epoch": 38.15072463768116,
525
+ "grad_norm": 0.44851154088974,
526
  "learning_rate": 0.00018709411764705882,
527
+ "loss": 1.0838,
528
  "step": 6600
529
  },
530
  {
531
  "epoch": 38.15072463768116,
532
+ "eval_loss": 1.0135877132415771,
533
+ "eval_runtime": 128.1825,
534
+ "eval_samples_per_second": 20.315,
535
+ "eval_steps_per_second": 2.543,
536
+ "eval_wer": 0.8232485976846879,
537
  "step": 6600
538
  },
539
  {
540
  "epoch": 39.30724637681159,
541
+ "grad_norm": 0.44436436891555786,
542
  "learning_rate": 0.00018356470588235291,
543
+ "loss": 1.0761,
544
  "step": 6800
545
  },
546
  {
547
  "epoch": 39.30724637681159,
548
+ "eval_loss": 1.0354554653167725,
549
+ "eval_runtime": 127.9672,
550
+ "eval_samples_per_second": 20.349,
551
+ "eval_steps_per_second": 2.548,
552
+ "eval_wer": 0.8287782949437085,
553
  "step": 6800
554
  },
555
  {
556
  "epoch": 40.46376811594203,
557
+ "grad_norm": 0.5356501936912537,
558
  "learning_rate": 0.00018003529411764703,
559
+ "loss": 1.069,
560
  "step": 7000
561
  },
562
  {
563
  "epoch": 40.46376811594203,
564
+ "eval_loss": 1.0072308778762817,
565
+ "eval_runtime": 128.0877,
566
+ "eval_samples_per_second": 20.33,
567
+ "eval_steps_per_second": 2.545,
568
+ "eval_wer": 0.8211003699725504,
569
  "step": 7000
570
  },
571
  {
572
  "epoch": 41.620289855072464,
573
+ "grad_norm": 0.44935235381126404,
574
  "learning_rate": 0.00017650588235294118,
575
+ "loss": 1.0624,
576
  "step": 7200
577
  },
578
  {
579
  "epoch": 41.620289855072464,
580
+ "eval_loss": 1.0019198656082153,
581
+ "eval_runtime": 128.3344,
582
+ "eval_samples_per_second": 20.291,
583
+ "eval_steps_per_second": 2.54,
584
+ "eval_wer": 0.8216573178979194,
585
  "step": 7200
586
  },
587
  {
588
  "epoch": 42.7768115942029,
589
+ "grad_norm": 0.5230256915092468,
590
  "learning_rate": 0.00017297647058823527,
591
+ "loss": 1.0502,
592
  "step": 7400
593
  },
594
  {
595
  "epoch": 42.7768115942029,
596
+ "eval_loss": 1.0021299123764038,
597
+ "eval_runtime": 128.3833,
598
+ "eval_samples_per_second": 20.283,
599
+ "eval_steps_per_second": 2.539,
600
+ "eval_wer": 0.8328559493973028,
601
  "step": 7400
602
  },
603
  {
604
  "epoch": 43.93333333333333,
605
+ "grad_norm": 0.5322907567024231,
606
  "learning_rate": 0.00016944705882352941,
607
+ "loss": 1.0423,
608
  "step": 7600
609
  },
610
  {
611
  "epoch": 43.93333333333333,
612
+ "eval_loss": 0.9959840774536133,
613
+ "eval_runtime": 128.179,
614
+ "eval_samples_per_second": 20.315,
615
+ "eval_steps_per_second": 2.543,
616
+ "eval_wer": 0.8152723077535108,
617
  "step": 7600
618
  },
619
  {
620
  "epoch": 45.08695652173913,
621
+ "grad_norm": 0.4864259958267212,
622
  "learning_rate": 0.0001659176470588235,
623
+ "loss": 1.0334,
624
  "step": 7800
625
  },
626
  {
627
  "epoch": 45.08695652173913,
628
+ "eval_loss": 0.9902531504631042,
629
+ "eval_runtime": 128.4294,
630
+ "eval_samples_per_second": 20.276,
631
+ "eval_steps_per_second": 2.538,
632
+ "eval_wer": 0.8133826630067231,
633
  "step": 7800
634
  },
635
  {
636
  "epoch": 46.243478260869566,
637
+ "grad_norm": 0.7626239657402039,
638
  "learning_rate": 0.00016238823529411762,
639
+ "loss": 1.0203,
640
  "step": 8000
641
  },
642
  {
643
  "epoch": 46.243478260869566,
644
+ "eval_loss": 0.9787291288375854,
645
+ "eval_runtime": 128.4884,
646
+ "eval_samples_per_second": 20.266,
647
+ "eval_steps_per_second": 2.537,
648
+ "eval_wer": 0.811572582249274,
649
  "step": 8000
650
  },
651
  {
652
  "epoch": 47.4,
653
+ "grad_norm": 0.4770081043243408,
654
  "learning_rate": 0.00015885882352941177,
655
+ "loss": 1.0212,
656
  "step": 8200
657
  },
658
  {
659
  "epoch": 47.4,
660
+ "eval_loss": 0.9690199494361877,
661
+ "eval_runtime": 128.4277,
662
+ "eval_samples_per_second": 20.276,
663
+ "eval_steps_per_second": 2.538,
664
+ "eval_wer": 0.802880216414051,
665
  "step": 8200
666
  },
667
  {
668
  "epoch": 48.55652173913043,
669
+ "grad_norm": 0.5174329876899719,
670
  "learning_rate": 0.00015532941176470586,
671
+ "loss": 1.0062,
672
  "step": 8400
673
  },
674
  {
675
  "epoch": 48.55652173913043,
676
+ "eval_loss": 0.9864068627357483,
677
+ "eval_runtime": 128.5885,
678
+ "eval_samples_per_second": 20.251,
679
+ "eval_steps_per_second": 2.535,
680
+ "eval_wer": 0.8029995623980586,
681
  "step": 8400
682
  },
683
  {
684
  "epoch": 49.71304347826087,
685
+ "grad_norm": 0.628575325012207,
686
  "learning_rate": 0.00015179999999999998,
687
+ "loss": 1.0029,
688
  "step": 8600
689
  },
690
  {
691
  "epoch": 49.71304347826087,
692
+ "eval_loss": 0.965828537940979,
693
+ "eval_runtime": 128.611,
694
+ "eval_samples_per_second": 20.247,
695
+ "eval_steps_per_second": 2.535,
696
+ "eval_wer": 0.8000159127978677,
697
  "step": 8600
698
  },
699
  {
700
  "epoch": 50.869565217391305,
701
+ "grad_norm": 0.512400209903717,
702
  "learning_rate": 0.0001482705882352941,
703
+ "loss": 0.9922,
704
  "step": 8800
705
  },
706
  {
707
  "epoch": 50.869565217391305,
708
+ "eval_loss": 0.9551593065261841,
709
+ "eval_runtime": 128.6546,
710
+ "eval_samples_per_second": 20.24,
711
+ "eval_steps_per_second": 2.534,
712
+ "eval_wer": 0.7964156422803039,
713
  "step": 8800
714
  },
715
  {
716
  "epoch": 52.0231884057971,
717
+ "grad_norm": 0.5017286539077759,
718
  "learning_rate": 0.0001447411764705882,
719
  "loss": 0.9784,
720
  "step": 9000
721
  },
722
  {
723
  "epoch": 52.0231884057971,
724
+ "eval_loss": 0.9562975168228149,
725
+ "eval_runtime": 128.6669,
726
+ "eval_samples_per_second": 20.238,
727
+ "eval_steps_per_second": 2.534,
728
+ "eval_wer": 0.7978080120937264,
729
  "step": 9000
730
  },
731
  {
732
  "epoch": 53.17971014492753,
733
+ "grad_norm": 0.5114548206329346,
734
  "learning_rate": 0.00014121176470588236,
735
+ "loss": 0.9761,
736
  "step": 9200
737
  },
738
  {
739
  "epoch": 53.17971014492753,
740
+ "eval_loss": 0.9441680908203125,
741
+ "eval_runtime": 128.6496,
742
+ "eval_samples_per_second": 20.241,
743
+ "eval_steps_per_second": 2.534,
744
+ "eval_wer": 0.7898118311652146,
745
  "step": 9200
746
  },
747
  {
748
  "epoch": 54.33623188405797,
749
+ "grad_norm": 0.5005738735198975,
750
  "learning_rate": 0.00013768235294117645,
751
+ "loss": 0.9649,
752
  "step": 9400
753
  },
754
  {
755
  "epoch": 54.33623188405797,
756
+ "eval_loss": 0.9494587779045105,
757
+ "eval_runtime": 128.5953,
758
+ "eval_samples_per_second": 20.25,
759
+ "eval_steps_per_second": 2.535,
760
+ "eval_wer": 0.7898317221625493,
761
  "step": 9400
762
  },
763
  {
764
  "epoch": 55.492753623188406,
765
+ "grad_norm": 0.7633522152900696,
766
  "learning_rate": 0.00013415294117647057,
767
+ "loss": 0.9567,
768
  "step": 9600
769
  },
770
  {
771
  "epoch": 55.492753623188406,
772
+ "eval_loss": 0.9448326826095581,
773
+ "eval_runtime": 128.7309,
774
+ "eval_samples_per_second": 20.228,
775
+ "eval_steps_per_second": 2.532,
776
+ "eval_wer": 0.792676134781398,
777
  "step": 9600
778
  },
779
  {
780
  "epoch": 56.64927536231884,
781
+ "grad_norm": 0.5112641453742981,
782
  "learning_rate": 0.00013062352941176468,
783
+ "loss": 0.9556,
784
  "step": 9800
785
  },
786
  {
787
  "epoch": 56.64927536231884,
788
+ "eval_loss": 0.930349588394165,
789
+ "eval_runtime": 128.7424,
790
+ "eval_samples_per_second": 20.226,
791
+ "eval_steps_per_second": 2.532,
792
+ "eval_wer": 0.7851374467915821,
793
  "step": 9800
794
  },
795
  {
796
  "epoch": 57.80579710144927,
797
+ "grad_norm": 0.46061235666275024,
798
  "learning_rate": 0.00012709411764705883,
799
+ "loss": 0.9454,
800
  "step": 10000
801
  },
802
  {
803
  "epoch": 57.80579710144927,
804
+ "eval_loss": 0.9303532838821411,
805
+ "eval_runtime": 128.7544,
806
+ "eval_samples_per_second": 20.225,
807
+ "eval_steps_per_second": 2.532,
808
+ "eval_wer": 0.7783943986951506,
809
  "step": 10000
810
  },
811
  {
812
  "epoch": 58.96231884057971,
813
+ "grad_norm": 0.5678063035011292,
814
  "learning_rate": 0.00012356470588235292,
815
+ "loss": 0.9356,
816
  "step": 10200
817
  },
818
  {
819
  "epoch": 58.96231884057971,
820
+ "eval_loss": 0.9202280640602112,
821
+ "eval_runtime": 128.797,
822
+ "eval_samples_per_second": 20.218,
823
+ "eval_steps_per_second": 2.531,
824
+ "eval_wer": 0.7718303695747305,
825
  "step": 10200
826
  },
827
  {
828
  "epoch": 60.11594202898551,
829
+ "grad_norm": 0.4535027742385864,
830
  "learning_rate": 0.00012003529411764705,
831
+ "loss": 0.927,
832
  "step": 10400
833
  },
834
  {
835
  "epoch": 60.11594202898551,
836
+ "eval_loss": 0.9264362454414368,
837
+ "eval_runtime": 128.7883,
838
+ "eval_samples_per_second": 20.219,
839
+ "eval_steps_per_second": 2.531,
840
+ "eval_wer": 0.7730238294148069,
841
  "step": 10400
842
  },
843
  {
844
  "epoch": 61.27246376811594,
845
+ "grad_norm": 0.5285719633102417,
846
  "learning_rate": 0.00011650588235294116,
847
+ "loss": 0.9172,
848
  "step": 10600
849
  },
850
  {
851
  "epoch": 61.27246376811594,
852
+ "eval_loss": 0.9252079129219055,
853
+ "eval_runtime": 128.9172,
854
+ "eval_samples_per_second": 20.199,
855
+ "eval_steps_per_second": 2.529,
856
+ "eval_wer": 0.7736006683375104,
857
  "step": 10600
858
  },
859
  {
860
  "epoch": 62.428985507246374,
861
+ "grad_norm": 0.5567119121551514,
862
  "learning_rate": 0.00011297647058823529,
863
+ "loss": 0.9177,
864
  "step": 10800
865
  },
866
  {
867
  "epoch": 62.428985507246374,
868
+ "eval_loss": 0.9086942076683044,
869
+ "eval_runtime": 128.9307,
870
+ "eval_samples_per_second": 20.197,
871
+ "eval_steps_per_second": 2.528,
872
+ "eval_wer": 0.7682102080598321,
873
  "step": 10800
874
  },
875
  {
876
  "epoch": 63.585507246376814,
877
+ "grad_norm": 0.48296016454696655,
878
  "learning_rate": 0.0001094470588235294,
879
+ "loss": 0.9107,
880
  "step": 11000
881
  },
882
  {
883
  "epoch": 63.585507246376814,
884
+ "eval_loss": 0.911865770816803,
885
+ "eval_runtime": 128.9178,
886
+ "eval_samples_per_second": 20.199,
887
+ "eval_steps_per_second": 2.529,
888
+ "eval_wer": 0.7663006723157099,
889
  "step": 11000
890
  },
891
  {
892
  "epoch": 64.74202898550725,
893
+ "grad_norm": 0.813925564289093,
894
  "learning_rate": 0.00010591764705882352,
895
+ "loss": 0.9017,
896
  "step": 11200
897
  },
898
  {
899
  "epoch": 64.74202898550725,
900
+ "eval_loss": 0.9014148116111755,
901
+ "eval_runtime": 128.8475,
902
+ "eval_samples_per_second": 20.21,
903
+ "eval_steps_per_second": 2.53,
904
+ "eval_wer": 0.760890321040697,
905
  "step": 11200
906
  },
907
  {
908
  "epoch": 65.89855072463769,
909
+ "grad_norm": 0.5046563148498535,
910
  "learning_rate": 0.00010238823529411763,
911
+ "loss": 0.899,
912
  "step": 11400
913
  },
914
  {
915
  "epoch": 65.89855072463769,
916
+ "eval_loss": 0.8962476253509521,
917
+ "eval_runtime": 128.9757,
918
+ "eval_samples_per_second": 20.19,
919
+ "eval_steps_per_second": 2.528,
920
+ "eval_wer": 0.7597366431952898,
921
  "step": 11400
922
  },
923
  {
924
  "epoch": 67.05217391304348,
925
+ "grad_norm": 0.5046080946922302,
926
  "learning_rate": 9.885882352941176e-05,
927
+ "loss": 0.8854,
928
  "step": 11600
929
  },
930
  {
931
  "epoch": 67.05217391304348,
932
+ "eval_loss": 0.8975692987442017,
933
+ "eval_runtime": 129.5423,
934
+ "eval_samples_per_second": 20.102,
935
+ "eval_steps_per_second": 2.517,
936
+ "eval_wer": 0.7532521780642082,
937
  "step": 11600
938
  },
939
  {
940
  "epoch": 68.20869565217392,
941
+ "grad_norm": 0.6193573474884033,
942
  "learning_rate": 9.532941176470588e-05,
943
+ "loss": 0.8841,
944
  "step": 11800
945
  },
946
  {
947
  "epoch": 68.20869565217392,
948
+ "eval_loss": 0.8951582908630371,
949
+ "eval_runtime": 129.0232,
950
+ "eval_samples_per_second": 20.182,
951
+ "eval_steps_per_second": 2.527,
952
+ "eval_wer": 0.755380514779011,
953
  "step": 11800
954
  },
955
  {
956
  "epoch": 69.36521739130434,
957
+ "grad_norm": 0.5326569676399231,
958
  "learning_rate": 9.18e-05,
959
+ "loss": 0.8792,
960
  "step": 12000
961
  },
962
  {
963
  "epoch": 69.36521739130434,
964
+ "eval_loss": 0.8951340317726135,
965
+ "eval_runtime": 128.9952,
966
+ "eval_samples_per_second": 20.187,
967
+ "eval_steps_per_second": 2.527,
968
+ "eval_wer": 0.7534709790348888,
969
  "step": 12000
970
  },
971
  {
972
  "epoch": 70.52173913043478,
973
+ "grad_norm": 0.47887784242630005,
974
  "learning_rate": 8.82705882352941e-05,
975
+ "loss": 0.8697,
976
  "step": 12200
977
  },
978
  {
979
  "epoch": 70.52173913043478,
980
+ "eval_loss": 0.8912975192070007,
981
+ "eval_runtime": 129.1402,
982
+ "eval_samples_per_second": 20.164,
983
+ "eval_steps_per_second": 2.524,
984
+ "eval_wer": 0.7513227513227513,
985
  "step": 12200
986
  },
987
  {
988
  "epoch": 71.67826086956522,
989
+ "grad_norm": 0.5348592400550842,
990
  "learning_rate": 8.474117647058823e-05,
991
+ "loss": 0.8677,
992
  "step": 12400
993
  },
994
  {
995
  "epoch": 71.67826086956522,
996
+ "eval_loss": 0.8820046186447144,
997
+ "eval_runtime": 129.1569,
998
+ "eval_samples_per_second": 20.162,
999
+ "eval_steps_per_second": 2.524,
1000
+ "eval_wer": 0.7496320165493098,
1001
  "step": 12400
1002
  },
1003
  {
1004
  "epoch": 72.83478260869565,
1005
+ "grad_norm": 0.507841944694519,
1006
  "learning_rate": 8.121176470588235e-05,
1007
+ "loss": 0.862,
1008
  "step": 12600
1009
  },
1010
  {
1011
  "epoch": 72.83478260869565,
1012
+ "eval_loss": 0.8834201097488403,
1013
+ "eval_runtime": 129.1489,
1014
+ "eval_samples_per_second": 20.163,
1015
+ "eval_steps_per_second": 2.524,
1016
+ "eval_wer": 0.7446592672156582,
1017
  "step": 12600
1018
  },
1019
  {
1020
  "epoch": 73.99130434782609,
1021
+ "grad_norm": 0.7582190036773682,
1022
  "learning_rate": 7.768235294117647e-05,
1023
+ "loss": 0.8573,
1024
  "step": 12800
1025
  },
1026
  {
1027
  "epoch": 73.99130434782609,
1028
+ "eval_loss": 0.8823952674865723,
1029
+ "eval_runtime": 129.2373,
1030
+ "eval_samples_per_second": 20.149,
1031
+ "eval_steps_per_second": 2.522,
1032
+ "eval_wer": 0.7437044993435971,
1033
  "step": 12800
1034
  },
1035
  {
1036
  "epoch": 75.14492753623189,
1037
+ "grad_norm": 0.5389032363891602,
1038
  "learning_rate": 7.415294117647058e-05,
1039
+ "loss": 0.8527,
1040
  "step": 13000
1041
  },
1042
  {
1043
  "epoch": 75.14492753623189,
1044
+ "eval_loss": 0.8746750354766846,
1045
+ "eval_runtime": 129.2983,
1046
+ "eval_samples_per_second": 20.139,
1047
+ "eval_steps_per_second": 2.521,
1048
+ "eval_wer": 0.7387516410072801,
1049
  "step": 13000
1050
  },
1051
  {
1052
  "epoch": 76.30144927536232,
1053
+ "grad_norm": 0.5191702842712402,
1054
  "learning_rate": 7.06235294117647e-05,
1055
+ "loss": 0.8451,
1056
  "step": 13200
1057
  },
1058
  {
1059
  "epoch": 76.30144927536232,
1060
+ "eval_loss": 0.8806008100509644,
1061
+ "eval_runtime": 129.3573,
1062
+ "eval_samples_per_second": 20.13,
1063
+ "eval_steps_per_second": 2.52,
1064
+ "eval_wer": 0.7399053188526873,
1065
  "step": 13200
1066
  },
1067
  {
1068
  "epoch": 77.45797101449276,
1069
+ "grad_norm": 0.5819474458694458,
1070
  "learning_rate": 6.709411764705882e-05,
1071
  "loss": 0.8435,
1072
  "step": 13400
1073
  },
1074
  {
1075
  "epoch": 77.45797101449276,
1076
+ "eval_loss": 0.8713163137435913,
1077
+ "eval_runtime": 129.2981,
1078
+ "eval_samples_per_second": 20.14,
1079
+ "eval_steps_per_second": 2.521,
1080
+ "eval_wer": 0.7400644468313641,
1081
  "step": 13400
1082
  },
1083
  {
1084
  "epoch": 78.61449275362318,
1085
+ "grad_norm": 0.5077270865440369,
1086
  "learning_rate": 6.356470588235294e-05,
1087
+ "loss": 0.8393,
1088
  "step": 13600
1089
  },
1090
  {
1091
  "epoch": 78.61449275362318,
1092
+ "eval_loss": 0.8733929395675659,
1093
+ "eval_runtime": 129.3529,
1094
+ "eval_samples_per_second": 20.131,
1095
+ "eval_steps_per_second": 2.52,
1096
+ "eval_wer": 0.7386521860206071,
1097
  "step": 13600
1098
  },
1099
  {
1100
  "epoch": 79.77101449275362,
1101
+ "grad_norm": 0.5823322534561157,
1102
  "learning_rate": 6.003529411764706e-05,
1103
+ "loss": 0.8353,
1104
  "step": 13800
1105
  },
1106
  {
1107
  "epoch": 79.77101449275362,
1108
+ "eval_loss": 0.8702186346054077,
1109
+ "eval_runtime": 129.4516,
1110
+ "eval_samples_per_second": 20.116,
1111
+ "eval_steps_per_second": 2.518,
1112
+ "eval_wer": 0.7366829772844811,
1113
  "step": 13800
1114
  },
1115
  {
1116
  "epoch": 80.92753623188406,
1117
+ "grad_norm": 0.5790678858757019,
1118
  "learning_rate": 5.650588235294117e-05,
1119
+ "loss": 0.834,
1120
  "step": 14000
1121
  },
1122
  {
1123
  "epoch": 80.92753623188406,
1124
+ "eval_loss": 0.8660680651664734,
1125
+ "eval_runtime": 129.3194,
1126
+ "eval_samples_per_second": 20.136,
1127
+ "eval_steps_per_second": 2.521,
1128
+ "eval_wer": 0.7334805267136094,
1129
  "step": 14000
1130
  },
1131
  {
1132
  "epoch": 82.08115942028985,
1133
+ "grad_norm": 0.4909045994281769,
1134
  "learning_rate": 5.299411764705882e-05,
1135
+ "loss": 0.8265,
1136
  "step": 14200
1137
  },
1138
  {
1139
  "epoch": 82.08115942028985,
1140
+ "eval_loss": 0.8641631007194519,
1141
+ "eval_runtime": 129.3575,
1142
+ "eval_samples_per_second": 20.13,
1143
+ "eval_steps_per_second": 2.52,
1144
+ "eval_wer": 0.7312129530174643,
1145
  "step": 14200
1146
  },
1147
  {
1148
  "epoch": 83.23768115942029,
1149
+ "grad_norm": 0.7918466329574585,
1150
  "learning_rate": 4.946470588235294e-05,
1151
+ "loss": 0.8183,
1152
  "step": 14400
1153
  },
1154
  {
1155
  "epoch": 83.23768115942029,
1156
+ "eval_loss": 0.8637834191322327,
1157
+ "eval_runtime": 129.5477,
1158
+ "eval_samples_per_second": 20.101,
1159
+ "eval_steps_per_second": 2.516,
1160
+ "eval_wer": 0.733400962724271,
1161
  "step": 14400
1162
  },
1163
  {
1164
  "epoch": 84.39420289855073,
1165
+ "grad_norm": 0.5342910289764404,
1166
  "learning_rate": 4.593529411764705e-05,
1167
+ "loss": 0.8238,
1168
  "step": 14600
1169
  },
1170
  {
1171
  "epoch": 84.39420289855073,
1172
+ "eval_loss": 0.8643026351928711,
1173
+ "eval_runtime": 129.5572,
1174
+ "eval_samples_per_second": 20.099,
1175
+ "eval_steps_per_second": 2.516,
1176
+ "eval_wer": 0.7310538250387875,
1177
  "step": 14600
1178
  },
1179
  {
1180
  "epoch": 85.55072463768116,
1181
+ "grad_norm": 0.4855327904224396,
1182
  "learning_rate": 4.240588235294118e-05,
1183
  "loss": 0.8176,
1184
  "step": 14800
1185
  },
1186
  {
1187
  "epoch": 85.55072463768116,
1188
+ "eval_loss": 0.8639850616455078,
1189
+ "eval_runtime": 129.6515,
1190
+ "eval_samples_per_second": 20.085,
1191
+ "eval_steps_per_second": 2.514,
1192
+ "eval_wer": 0.7308946970601106,
1193
  "step": 14800
1194
  },
1195
  {
1196
  "epoch": 86.7072463768116,
1197
+ "grad_norm": 0.5488927364349365,
1198
  "learning_rate": 3.887647058823529e-05,
1199
+ "loss": 0.8183,
1200
  "step": 15000
1201
  },
1202
  {
1203
  "epoch": 86.7072463768116,
1204
+ "eval_loss": 0.8602821826934814,
1205
+ "eval_runtime": 129.637,
1206
+ "eval_samples_per_second": 20.087,
1207
+ "eval_steps_per_second": 2.515,
1208
+ "eval_wer": 0.7294028722600151,
1209
  "step": 15000
1210
  },
1211
  {
1212
  "epoch": 87.86376811594202,
1213
+ "grad_norm": 0.5371025800704956,
1214
  "learning_rate": 3.534705882352941e-05,
1215
+ "loss": 0.8121,
1216
  "step": 15200
1217
  },
1218
  {
1219
  "epoch": 87.86376811594202,
1220
+ "eval_loss": 0.8586457371711731,
1221
+ "eval_runtime": 130.1392,
1222
+ "eval_samples_per_second": 20.009,
1223
+ "eval_steps_per_second": 2.505,
1224
+ "eval_wer": 0.7269960615825277,
1225
  "step": 15200
1226
  },
1227
  {
1228
  "epoch": 89.01739130434783,
1229
+ "grad_norm": 0.5630968809127808,
1230
  "learning_rate": 3.1817647058823525e-05,
1231
+ "loss": 0.8033,
1232
  "step": 15400
1233
  },
1234
  {
1235
  "epoch": 89.01739130434783,
1236
+ "eval_loss": 0.8584564924240112,
1237
+ "eval_runtime": 129.7214,
1238
+ "eval_samples_per_second": 20.074,
1239
+ "eval_steps_per_second": 2.513,
1240
+ "eval_wer": 0.7264590046544934,
1241
  "step": 15400
1242
  },
1243
  {
1244
  "epoch": 90.17391304347827,
1245
+ "grad_norm": 0.5413855314254761,
1246
  "learning_rate": 2.8288235294117643e-05,
1247
+ "loss": 0.8116,
1248
  "step": 15600
1249
  },
1250
  {
1251
  "epoch": 90.17391304347827,
1252
+ "eval_loss": 0.8559712767601013,
1253
+ "eval_runtime": 129.7326,
1254
+ "eval_samples_per_second": 20.072,
1255
+ "eval_steps_per_second": 2.513,
1256
+ "eval_wer": 0.7254047817957593,
1257
  "step": 15600
1258
  },
1259
  {
1260
  "epoch": 91.33043478260869,
1261
+ "grad_norm": 0.5642918944358826,
1262
  "learning_rate": 2.475882352941176e-05,
1263
+ "loss": 0.8058,
1264
  "step": 15800
1265
  },
1266
  {
1267
  "epoch": 91.33043478260869,
1268
+ "eval_loss": 0.8553411364555359,
1269
+ "eval_runtime": 129.7535,
1270
+ "eval_samples_per_second": 20.069,
1271
+ "eval_steps_per_second": 2.512,
1272
+ "eval_wer": 0.7261606396944743,
1273
  "step": 15800
1274
  },
1275
  {
1276
  "epoch": 92.48695652173913,
1277
+ "grad_norm": 0.7495045065879822,
1278
  "learning_rate": 2.122941176470588e-05,
1279
+ "loss": 0.7992,
1280
  "step": 16000
1281
  },
1282
  {
1283
  "epoch": 92.48695652173913,
1284
+ "eval_loss": 0.854764461517334,
1285
+ "eval_runtime": 129.7705,
1286
+ "eval_samples_per_second": 20.066,
1287
+ "eval_steps_per_second": 2.512,
1288
+ "eval_wer": 0.7263396586704858,
1289
  "step": 16000
1290
  },
1291
  {
1292
  "epoch": 93.64347826086957,
1293
+ "grad_norm": 0.5202896595001221,
1294
  "learning_rate": 1.7699999999999997e-05,
1295
+ "loss": 0.7979,
1296
  "step": 16200
1297
  },
1298
  {
1299
  "epoch": 93.64347826086957,
1300
+ "eval_loss": 0.8527706265449524,
1301
+ "eval_runtime": 129.7564,
1302
+ "eval_samples_per_second": 20.068,
1303
+ "eval_steps_per_second": 2.512,
1304
+ "eval_wer": 0.7235748100409755,
1305
  "step": 16200
1306
  },
1307
  {
1308
  "epoch": 94.8,
1309
+ "grad_norm": 0.5076733231544495,
1310
  "learning_rate": 1.4188235294117647e-05,
1311
+ "loss": 0.7979,
1312
  "step": 16400
1313
  },
1314
  {
1315
  "epoch": 94.8,
1316
+ "eval_loss": 0.8528936505317688,
1317
+ "eval_runtime": 129.9107,
1318
+ "eval_samples_per_second": 20.045,
1319
+ "eval_steps_per_second": 2.509,
1320
+ "eval_wer": 0.7234952460516371,
1321
  "step": 16400
1322
  },
1323
  {
1324
  "epoch": 95.95652173913044,
1325
+ "grad_norm": 0.5990000367164612,
1326
  "learning_rate": 1.0658823529411765e-05,
1327
+ "loss": 0.7978,
1328
  "step": 16600
1329
  },
1330
  {
1331
  "epoch": 95.95652173913044,
1332
+ "eval_loss": 0.8526215553283691,
1333
+ "eval_runtime": 130.1031,
1334
+ "eval_samples_per_second": 20.015,
1335
+ "eval_steps_per_second": 2.506,
1336
+ "eval_wer": 0.7242312129530175,
1337
  "step": 16600
1338
  },
1339
  {
1340
  "epoch": 97.11014492753623,
1341
+ "grad_norm": 0.5280841588973999,
1342
  "learning_rate": 7.1294117647058815e-06,
1343
+ "loss": 0.7934,
1344
  "step": 16800
1345
  },
1346
  {
1347
  "epoch": 97.11014492753623,
1348
+ "eval_loss": 0.8518908619880676,
1349
+ "eval_runtime": 130.115,
1350
+ "eval_samples_per_second": 20.013,
1351
+ "eval_steps_per_second": 2.505,
1352
+ "eval_wer": 0.7238135020089908,
1353
  "step": 16800
1354
  },
1355
  {
1356
  "epoch": 98.26666666666667,
1357
+ "grad_norm": 0.49593880772590637,
1358
  "learning_rate": 3.6e-06,
1359
+ "loss": 0.7915,
1360
  "step": 17000
1361
  },
1362
  {
1363
  "epoch": 98.26666666666667,
1364
+ "eval_loss": 0.8519703149795532,
1365
+ "eval_runtime": 130.0289,
1366
+ "eval_samples_per_second": 20.026,
1367
+ "eval_steps_per_second": 2.507,
1368
+ "eval_wer": 0.7232963360782909,
1369
  "step": 17000
1370
  },
1371
  {
1372
  "epoch": 99.4231884057971,
1373
+ "grad_norm": 0.5583031177520752,
1374
  "learning_rate": 7.058823529411765e-08,
1375
+ "loss": 0.7996,
1376
  "step": 17200
1377
  },
1378
  {
1379
  "epoch": 99.4231884057971,
1380
+ "eval_loss": 0.8514918088912964,
1381
+ "eval_runtime": 130.2005,
1382
+ "eval_samples_per_second": 20.0,
1383
+ "eval_steps_per_second": 2.504,
1384
+ "eval_wer": 0.723037753112941,
1385
  "step": 17200
1386
  },
1387
  {
1388
  "epoch": 99.4231884057971,
1389
  "step": 17200,
1390
  "total_flos": 3.3406327721118188e+19,
1391
+ "train_loss": 1.1172501763632132,
1392
+ "train_runtime": 45795.1729,
1393
+ "train_samples_per_second": 6.025,
1394
+ "train_steps_per_second": 0.376
1395
  }
1396
  ],
1397
  "logging_steps": 200,