Applied AI commited on
Commit
69de07b
·
verified ·
1 Parent(s): cfb7adc

Model save

Browse files
Files changed (5) hide show
  1. README.md +3 -8
  2. all_results.json +4 -4
  3. model.safetensors +1 -1
  4. train_results.json +4 -4
  5. trainer_state.json +94 -94
README.md CHANGED
@@ -2,16 +2,11 @@
2
  license: mit
3
  base_model: gpt2
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
8
  - generated_from_trainer
9
- - trl
10
- - sft
11
- - alignment-handbook
12
- - generated_from_trainer
13
  datasets:
14
- - appliedai-qx/sample-dataset-ah
15
  model-index:
16
  - name: gpt2
17
  results: []
@@ -22,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # gpt2
24
 
25
- This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the appliedai-qx/sample-dataset-ah dataset.
26
 
27
  ## Model description
28
 
@@ -62,5 +57,5 @@ The following hyperparameters were used during training:
62
 
63
  - Transformers 4.41.2
64
  - Pytorch 2.3.1+cu121
65
- - Datasets 2.19.2
66
  - Tokenizers 0.19.1
 
2
  license: mit
3
  base_model: gpt2
4
  tags:
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
 
 
 
 
8
  datasets:
9
+ - generator
10
  model-index:
11
  - name: gpt2
12
  results: []
 
17
 
18
  # gpt2
19
 
20
+ This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on the generator dataset.
21
 
22
  ## Model description
23
 
 
57
 
58
  - Transformers 4.41.2
59
  - Pytorch 2.3.1+cu121
60
+ - Datasets 2.20.0
61
  - Tokenizers 0.19.1
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 7357983621120000.0,
4
- "train_loss": 0.11611378409645774,
5
- "train_runtime": 16.2542,
6
  "train_samples": 10000,
7
- "train_samples_per_second": 864.147,
8
- "train_steps_per_second": 13.535
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 7357983621120000.0,
4
+ "train_loss": 1.3832195509563794,
5
+ "train_runtime": 51.7657,
6
  "train_samples": 10000,
7
+ "train_samples_per_second": 271.338,
8
+ "train_steps_per_second": 4.25
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45be5d0365d26e836319f5b468c755d28110be195d1c62b8b3f5858bfd60a158
3
  size 248894656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e754af9e9e09e482b62fce2b2ea79047dbb99a79bacaff80227b59eabaeaab00
3
  size 248894656
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 7357983621120000.0,
4
- "train_loss": 0.11611378409645774,
5
- "train_runtime": 16.2542,
6
  "train_samples": 10000,
7
- "train_samples_per_second": 864.147,
8
- "train_steps_per_second": 13.535
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 7357983621120000.0,
4
+ "train_loss": 1.3832195509563794,
5
+ "train_runtime": 51.7657,
6
  "train_samples": 10000,
7
+ "train_samples_per_second": 271.338,
8
+ "train_steps_per_second": 4.25
9
  }
trainer_state.json CHANGED
@@ -10,327 +10,327 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.004545454545454545,
13
- "grad_norm": 3.78125,
14
  "learning_rate": 9.090909090909091e-06,
15
- "loss": 1.8611,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.022727272727272728,
20
- "grad_norm": 3.21875,
21
  "learning_rate": 4.545454545454546e-05,
22
- "loss": 1.8713,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.045454545454545456,
27
- "grad_norm": 1.96875,
28
  "learning_rate": 9.090909090909092e-05,
29
- "loss": 1.8057,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.06818181818181818,
34
- "grad_norm": 1.2890625,
35
  "learning_rate": 0.00013636363636363637,
36
- "loss": 1.7424,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.09090909090909091,
41
- "grad_norm": 1.796875,
42
  "learning_rate": 0.00018181818181818183,
43
- "loss": 1.6841,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.11363636363636363,
48
- "grad_norm": 1.9375,
49
  "learning_rate": 0.0001998867339183008,
50
- "loss": 1.611,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.13636363636363635,
55
- "grad_norm": 1.84375,
56
  "learning_rate": 0.00019919548128307954,
57
- "loss": 1.5838,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.1590909090909091,
62
- "grad_norm": 1.46875,
63
  "learning_rate": 0.00019788024462147788,
64
- "loss": 1.5374,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.18181818181818182,
69
- "grad_norm": 0.76953125,
70
  "learning_rate": 0.00019594929736144976,
71
- "loss": 1.5049,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.20454545454545456,
76
- "grad_norm": 0.9609375,
77
  "learning_rate": 0.00019341478602651069,
78
- "loss": 1.4867,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.22727272727272727,
83
- "grad_norm": 0.83203125,
84
  "learning_rate": 0.00019029265382866214,
85
- "loss": 1.4711,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.25,
90
- "grad_norm": 0.9453125,
91
  "learning_rate": 0.00018660254037844388,
92
- "loss": 1.4442,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.2727272727272727,
97
- "grad_norm": 0.95703125,
98
  "learning_rate": 0.0001823676581429833,
99
- "loss": 1.4328,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.29545454545454547,
104
- "grad_norm": 0.875,
105
  "learning_rate": 0.0001776146464291757,
106
- "loss": 1.4086,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.3181818181818182,
111
- "grad_norm": 1.046875,
112
  "learning_rate": 0.00017237340381050703,
113
- "loss": 1.3785,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.3409090909090909,
118
- "grad_norm": 0.90625,
119
  "learning_rate": 0.00016667690005162916,
120
- "loss": 1.3806,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.36363636363636365,
125
- "grad_norm": 0.7734375,
126
  "learning_rate": 0.00016056096871376667,
127
- "loss": 1.3681,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.38636363636363635,
132
- "grad_norm": 0.66015625,
133
  "learning_rate": 0.00015406408174555976,
134
- "loss": 1.3576,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.4090909090909091,
139
- "grad_norm": 0.6328125,
140
  "learning_rate": 0.0001472271074772683,
141
- "loss": 1.3267,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.4318181818181818,
146
- "grad_norm": 1.015625,
147
  "learning_rate": 0.00014009305354066137,
148
- "loss": 1.3442,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.45454545454545453,
153
- "grad_norm": 0.8984375,
154
  "learning_rate": 0.00013270679633174218,
155
- "loss": 1.3321,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.4772727272727273,
160
- "grad_norm": 0.9765625,
161
  "learning_rate": 0.0001251147987181079,
162
- "loss": 1.3126,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.5,
167
- "grad_norm": 0.84765625,
168
  "learning_rate": 0.00011736481776669306,
169
- "loss": 1.3108,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.5227272727272727,
174
- "grad_norm": 0.6640625,
175
  "learning_rate": 0.00010950560433041826,
176
- "loss": 1.2999,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.5454545454545454,
181
- "grad_norm": 0.52734375,
182
  "learning_rate": 0.00010158659638348081,
183
- "loss": 1.3105,
184
  "step": 120
185
  },
186
  {
187
  "epoch": 0.5681818181818182,
188
- "grad_norm": 0.69140625,
189
  "learning_rate": 9.365760803434355e-05,
190
- "loss": 1.3079,
191
  "step": 125
192
  },
193
  {
194
  "epoch": 0.5909090909090909,
195
- "grad_norm": 0.58984375,
196
  "learning_rate": 8.57685161726715e-05,
197
- "loss": 1.2927,
198
  "step": 130
199
  },
200
  {
201
  "epoch": 0.6136363636363636,
202
- "grad_norm": 0.51953125,
203
  "learning_rate": 7.796894672134594e-05,
204
- "loss": 1.2819,
205
  "step": 135
206
  },
207
  {
208
  "epoch": 0.6363636363636364,
209
- "grad_norm": 0.76171875,
210
  "learning_rate": 7.030796246717255e-05,
211
- "loss": 1.2973,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 0.6590909090909091,
216
- "grad_norm": 0.64453125,
217
  "learning_rate": 6.283375443396726e-05,
218
- "loss": 1.2926,
219
  "step": 145
220
  },
221
  {
222
  "epoch": 0.6818181818181818,
223
- "grad_norm": 0.462890625,
224
  "learning_rate": 5.559333873942259e-05,
225
- "loss": 1.2886,
226
  "step": 150
227
  },
228
  {
229
  "epoch": 0.7045454545454546,
230
- "grad_norm": 0.671875,
231
  "learning_rate": 4.8632260842659393e-05,
232
- "loss": 1.2927,
233
  "step": 155
234
  },
235
  {
236
  "epoch": 0.7272727272727273,
237
- "grad_norm": 0.51953125,
238
  "learning_rate": 4.19943090428802e-05,
239
- "loss": 1.2802,
240
  "step": 160
241
  },
242
  {
243
  "epoch": 0.75,
244
- "grad_norm": 0.447265625,
245
  "learning_rate": 3.5721239031346066e-05,
246
- "loss": 1.2582,
247
  "step": 165
248
  },
249
  {
250
  "epoch": 0.7727272727272727,
251
- "grad_norm": 0.5625,
252
  "learning_rate": 2.9852511229367865e-05,
253
- "loss": 1.2829,
254
  "step": 170
255
  },
256
  {
257
  "epoch": 0.7954545454545454,
258
- "grad_norm": 0.41796875,
259
  "learning_rate": 2.4425042564574184e-05,
260
- "loss": 1.2812,
261
  "step": 175
262
  },
263
  {
264
  "epoch": 0.8181818181818182,
265
- "grad_norm": 0.4375,
266
  "learning_rate": 1.947297424689414e-05,
267
- "loss": 1.2832,
268
  "step": 180
269
  },
270
  {
271
  "epoch": 0.8409090909090909,
272
- "grad_norm": 0.4609375,
273
  "learning_rate": 1.5027457005048573e-05,
274
- "loss": 1.277,
275
  "step": 185
276
  },
277
  {
278
  "epoch": 0.8636363636363636,
279
- "grad_norm": 0.5234375,
280
  "learning_rate": 1.1116455134507664e-05,
281
- "loss": 1.2788,
282
  "step": 190
283
  },
284
  {
285
  "epoch": 0.8863636363636364,
286
- "grad_norm": 0.53125,
287
  "learning_rate": 7.764570589541875e-06,
288
- "loss": 1.2764,
289
  "step": 195
290
  },
291
  {
292
  "epoch": 0.9090909090909091,
293
- "grad_norm": 0.5078125,
294
  "learning_rate": 4.992888225905468e-06,
295
- "loss": 1.2836,
296
  "step": 200
297
  },
298
  {
299
  "epoch": 0.9318181818181818,
300
- "grad_norm": 0.65625,
301
  "learning_rate": 2.818843167645835e-06,
302
- "loss": 1.2792,
303
  "step": 205
304
  },
305
  {
306
  "epoch": 0.9545454545454546,
307
- "grad_norm": 0.5546875,
308
  "learning_rate": 1.2561111323605712e-06,
309
- "loss": 1.2774,
310
  "step": 210
311
  },
312
  {
313
  "epoch": 0.9772727272727273,
314
- "grad_norm": 0.47265625,
315
  "learning_rate": 3.145224048057727e-07,
316
- "loss": 1.2793,
317
  "step": 215
318
  },
319
  {
320
  "epoch": 1.0,
321
- "grad_norm": 0.51171875,
322
  "learning_rate": 0.0,
323
- "loss": 1.2731,
324
  "step": 220
325
  },
326
  {
327
  "epoch": 1.0,
328
  "step": 220,
329
  "total_flos": 7357983621120000.0,
330
- "train_loss": 0.11611378409645774,
331
- "train_runtime": 16.2542,
332
- "train_samples_per_second": 864.147,
333
- "train_steps_per_second": 13.535
334
  }
335
  ],
336
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.004545454545454545,
13
+ "grad_norm": 3.71875,
14
  "learning_rate": 9.090909090909091e-06,
15
+ "loss": 1.8651,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.022727272727272728,
20
+ "grad_norm": 3.296875,
21
  "learning_rate": 4.545454545454546e-05,
22
+ "loss": 1.8719,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.045454545454545456,
27
+ "grad_norm": 1.7578125,
28
  "learning_rate": 9.090909090909092e-05,
29
+ "loss": 1.8046,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.06818181818181818,
34
+ "grad_norm": 1.421875,
35
  "learning_rate": 0.00013636363636363637,
36
+ "loss": 1.7402,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.09090909090909091,
41
+ "grad_norm": 2.046875,
42
  "learning_rate": 0.00018181818181818183,
43
+ "loss": 1.6831,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.11363636363636363,
48
+ "grad_norm": 1.6875,
49
  "learning_rate": 0.0001998867339183008,
50
+ "loss": 1.607,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.13636363636363635,
55
+ "grad_norm": 1.4296875,
56
  "learning_rate": 0.00019919548128307954,
57
+ "loss": 1.582,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.1590909090909091,
62
+ "grad_norm": 1.484375,
63
  "learning_rate": 0.00019788024462147788,
64
+ "loss": 1.5335,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.18181818181818182,
69
+ "grad_norm": 0.921875,
70
  "learning_rate": 0.00019594929736144976,
71
+ "loss": 1.5002,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.20454545454545456,
76
+ "grad_norm": 0.85546875,
77
  "learning_rate": 0.00019341478602651069,
78
+ "loss": 1.4868,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.22727272727272727,
83
+ "grad_norm": 0.86328125,
84
  "learning_rate": 0.00019029265382866214,
85
+ "loss": 1.47,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.25,
90
+ "grad_norm": 0.83203125,
91
  "learning_rate": 0.00018660254037844388,
92
+ "loss": 1.4441,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.2727272727272727,
97
+ "grad_norm": 0.88671875,
98
  "learning_rate": 0.0001823676581429833,
99
+ "loss": 1.4332,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.29545454545454547,
104
+ "grad_norm": 0.765625,
105
  "learning_rate": 0.0001776146464291757,
106
+ "loss": 1.4061,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.3181818181818182,
111
+ "grad_norm": 0.85546875,
112
  "learning_rate": 0.00017237340381050703,
113
+ "loss": 1.3746,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.3409090909090909,
118
+ "grad_norm": 0.98828125,
119
  "learning_rate": 0.00016667690005162916,
120
+ "loss": 1.3776,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.36363636363636365,
125
+ "grad_norm": 0.72265625,
126
  "learning_rate": 0.00016056096871376667,
127
+ "loss": 1.3633,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.38636363636363635,
132
+ "grad_norm": 0.61328125,
133
  "learning_rate": 0.00015406408174555976,
134
+ "loss": 1.3569,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.4090909090909091,
139
+ "grad_norm": 0.73046875,
140
  "learning_rate": 0.0001472271074772683,
141
+ "loss": 1.3216,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.4318181818181818,
146
+ "grad_norm": 0.8046875,
147
  "learning_rate": 0.00014009305354066137,
148
+ "loss": 1.3382,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.45454545454545453,
153
+ "grad_norm": 0.91796875,
154
  "learning_rate": 0.00013270679633174218,
155
+ "loss": 1.3274,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.4772727272727273,
160
+ "grad_norm": 0.7890625,
161
  "learning_rate": 0.0001251147987181079,
162
+ "loss": 1.307,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.5,
167
+ "grad_norm": 1.0234375,
168
  "learning_rate": 0.00011736481776669306,
169
+ "loss": 1.3087,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.5227272727272727,
174
+ "grad_norm": 0.59765625,
175
  "learning_rate": 0.00010950560433041826,
176
+ "loss": 1.2977,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.5454545454545454,
181
+ "grad_norm": 0.66796875,
182
  "learning_rate": 0.00010158659638348081,
183
+ "loss": 1.3071,
184
  "step": 120
185
  },
186
  {
187
  "epoch": 0.5681818181818182,
188
+ "grad_norm": 0.58203125,
189
  "learning_rate": 9.365760803434355e-05,
190
+ "loss": 1.3046,
191
  "step": 125
192
  },
193
  {
194
  "epoch": 0.5909090909090909,
195
+ "grad_norm": 0.5390625,
196
  "learning_rate": 8.57685161726715e-05,
197
+ "loss": 1.2901,
198
  "step": 130
199
  },
200
  {
201
  "epoch": 0.6136363636363636,
202
+ "grad_norm": 0.515625,
203
  "learning_rate": 7.796894672134594e-05,
204
+ "loss": 1.2811,
205
  "step": 135
206
  },
207
  {
208
  "epoch": 0.6363636363636364,
209
+ "grad_norm": 0.6484375,
210
  "learning_rate": 7.030796246717255e-05,
211
+ "loss": 1.2961,
212
  "step": 140
213
  },
214
  {
215
  "epoch": 0.6590909090909091,
216
+ "grad_norm": 0.65234375,
217
  "learning_rate": 6.283375443396726e-05,
218
+ "loss": 1.2884,
219
  "step": 145
220
  },
221
  {
222
  "epoch": 0.6818181818181818,
223
+ "grad_norm": 0.59375,
224
  "learning_rate": 5.559333873942259e-05,
225
+ "loss": 1.2864,
226
  "step": 150
227
  },
228
  {
229
  "epoch": 0.7045454545454546,
230
+ "grad_norm": 0.72265625,
231
  "learning_rate": 4.8632260842659393e-05,
232
+ "loss": 1.2918,
233
  "step": 155
234
  },
235
  {
236
  "epoch": 0.7272727272727273,
237
+ "grad_norm": 0.5,
238
  "learning_rate": 4.19943090428802e-05,
239
+ "loss": 1.2773,
240
  "step": 160
241
  },
242
  {
243
  "epoch": 0.75,
244
+ "grad_norm": 0.453125,
245
  "learning_rate": 3.5721239031346066e-05,
246
+ "loss": 1.2545,
247
  "step": 165
248
  },
249
  {
250
  "epoch": 0.7727272727272727,
251
+ "grad_norm": 0.51171875,
252
  "learning_rate": 2.9852511229367865e-05,
253
+ "loss": 1.2819,
254
  "step": 170
255
  },
256
  {
257
  "epoch": 0.7954545454545454,
258
+ "grad_norm": 0.4453125,
259
  "learning_rate": 2.4425042564574184e-05,
260
+ "loss": 1.2799,
261
  "step": 175
262
  },
263
  {
264
  "epoch": 0.8181818181818182,
265
+ "grad_norm": 0.458984375,
266
  "learning_rate": 1.947297424689414e-05,
267
+ "loss": 1.2809,
268
  "step": 180
269
  },
270
  {
271
  "epoch": 0.8409090909090909,
272
+ "grad_norm": 0.447265625,
273
  "learning_rate": 1.5027457005048573e-05,
274
+ "loss": 1.2754,
275
  "step": 185
276
  },
277
  {
278
  "epoch": 0.8636363636363636,
279
+ "grad_norm": 0.671875,
280
  "learning_rate": 1.1116455134507664e-05,
281
+ "loss": 1.2745,
282
  "step": 190
283
  },
284
  {
285
  "epoch": 0.8863636363636364,
286
+ "grad_norm": 0.447265625,
287
  "learning_rate": 7.764570589541875e-06,
288
+ "loss": 1.2754,
289
  "step": 195
290
  },
291
  {
292
  "epoch": 0.9090909090909091,
293
+ "grad_norm": 0.47265625,
294
  "learning_rate": 4.992888225905468e-06,
295
+ "loss": 1.2819,
296
  "step": 200
297
  },
298
  {
299
  "epoch": 0.9318181818181818,
300
+ "grad_norm": 0.54296875,
301
  "learning_rate": 2.818843167645835e-06,
302
+ "loss": 1.2782,
303
  "step": 205
304
  },
305
  {
306
  "epoch": 0.9545454545454546,
307
+ "grad_norm": 0.470703125,
308
  "learning_rate": 1.2561111323605712e-06,
309
+ "loss": 1.2758,
310
  "step": 210
311
  },
312
  {
313
  "epoch": 0.9772727272727273,
314
+ "grad_norm": 0.453125,
315
  "learning_rate": 3.145224048057727e-07,
316
+ "loss": 1.2759,
317
  "step": 215
318
  },
319
  {
320
  "epoch": 1.0,
321
+ "grad_norm": 0.470703125,
322
  "learning_rate": 0.0,
323
+ "loss": 1.2702,
324
  "step": 220
325
  },
326
  {
327
  "epoch": 1.0,
328
  "step": 220,
329
  "total_flos": 7357983621120000.0,
330
+ "train_loss": 1.3832195509563794,
331
+ "train_runtime": 51.7657,
332
+ "train_samples_per_second": 271.338,
333
+ "train_steps_per_second": 4.25
334
  }
335
  ],
336
  "logging_steps": 5,