Training in progress, step 13748, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +521 -3
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 737582948
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9429983e59f652175f71152fba6eaf3af3a03dcccaed4b1c0446ada02b2b54e6
|
3 |
size 737582948
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1475256250
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e8f963dd44ad4b5a4ff6a887f814dc448e58639e52eefe4e323265991e6b18d
|
3 |
size 1475256250
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75615b5e6cc125bb94988b3c50b73a5f8c3305643e30a3d5b2f3189a2032ba16
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2e8e6885d573427d2de37a77bf587fa112946ff22d3ea4df32210439a557a5b
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cc6d8d5bb2a96a1ebfb5cf92fac012f69410a414ce89ccd7c5ae11f14e596fa
|
3 |
+
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b1ad1f84976b61e4cfaae51278742d669e0df2692aced4131064ecd61c1edf2
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35fafd6395e4cb387bb75fb28a0482502f9e17f6c3b0e3e256daf180373b3f0b
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48c89bb33c92eb59bfef32b9537aa0cfa50296c7262cfdb9eb91256dc5b5e9f3
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32dfdc872866fda5b64b7229bac1e43cf4fe2356a4c82d10a2502643547790ec
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a48787eaef9585df14b508d1097c445291248a545d320eeaf26f46b061d496a
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2813db161368db76429d904a036e1161875e895320a4ce21cc6fa1fdd51aa271
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -9107,6 +9107,524 @@
|
|
9107 |
"learning_rate": 4.981531808618395e-05,
|
9108 |
"loss": 0.6818,
|
9109 |
"step": 13000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9110 |
}
|
9111 |
],
|
9112 |
"logging_steps": 10,
|
@@ -9121,7 +9639,7 @@
|
|
9121 |
"should_evaluate": false,
|
9122 |
"should_log": false,
|
9123 |
"should_save": true,
|
9124 |
-
"should_training_stop":
|
9125 |
},
|
9126 |
"attributes": {}
|
9127 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.99692914763958,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 13748,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
9107 |
"learning_rate": 4.981531808618395e-05,
|
9108 |
"loss": 0.6818,
|
9109 |
"step": 13000
|
9110 |
+
},
|
9111 |
+
{
|
9112 |
+
"epoch": 6.621171379019555,
|
9113 |
+
"grad_norm": 0.13289377093315125,
|
9114 |
+
"learning_rate": 4.981517602317332e-05,
|
9115 |
+
"loss": 0.6811,
|
9116 |
+
"step": 13010
|
9117 |
+
},
|
9118 |
+
{
|
9119 |
+
"epoch": 6.626262947700043,
|
9120 |
+
"grad_norm": 0.18308168649673462,
|
9121 |
+
"learning_rate": 4.9815033960162695e-05,
|
9122 |
+
"loss": 0.678,
|
9123 |
+
"step": 13020
|
9124 |
+
},
|
9125 |
+
{
|
9126 |
+
"epoch": 6.631354516380531,
|
9127 |
+
"grad_norm": 0.12425180524587631,
|
9128 |
+
"learning_rate": 4.981489189715207e-05,
|
9129 |
+
"loss": 0.6816,
|
9130 |
+
"step": 13030
|
9131 |
+
},
|
9132 |
+
{
|
9133 |
+
"epoch": 6.636446085061019,
|
9134 |
+
"grad_norm": 0.13754673302173615,
|
9135 |
+
"learning_rate": 4.981474983414144e-05,
|
9136 |
+
"loss": 0.6773,
|
9137 |
+
"step": 13040
|
9138 |
+
},
|
9139 |
+
{
|
9140 |
+
"epoch": 6.641537653741508,
|
9141 |
+
"grad_norm": 0.15316608548164368,
|
9142 |
+
"learning_rate": 4.9814607771130814e-05,
|
9143 |
+
"loss": 0.6765,
|
9144 |
+
"step": 13050
|
9145 |
+
},
|
9146 |
+
{
|
9147 |
+
"epoch": 6.646629222421995,
|
9148 |
+
"grad_norm": 0.136078342795372,
|
9149 |
+
"learning_rate": 4.981446570812018e-05,
|
9150 |
+
"loss": 0.6767,
|
9151 |
+
"step": 13060
|
9152 |
+
},
|
9153 |
+
{
|
9154 |
+
"epoch": 6.651720791102484,
|
9155 |
+
"grad_norm": 0.12898576259613037,
|
9156 |
+
"learning_rate": 4.9814323645109554e-05,
|
9157 |
+
"loss": 0.6786,
|
9158 |
+
"step": 13070
|
9159 |
+
},
|
9160 |
+
{
|
9161 |
+
"epoch": 6.656812359782972,
|
9162 |
+
"grad_norm": 0.11854422837495804,
|
9163 |
+
"learning_rate": 4.981418158209893e-05,
|
9164 |
+
"loss": 0.6806,
|
9165 |
+
"step": 13080
|
9166 |
+
},
|
9167 |
+
{
|
9168 |
+
"epoch": 6.66190392846346,
|
9169 |
+
"grad_norm": 0.1517888456583023,
|
9170 |
+
"learning_rate": 4.98140395190883e-05,
|
9171 |
+
"loss": 0.6829,
|
9172 |
+
"step": 13090
|
9173 |
+
},
|
9174 |
+
{
|
9175 |
+
"epoch": 6.666995497143948,
|
9176 |
+
"grad_norm": 0.1091533899307251,
|
9177 |
+
"learning_rate": 4.9813897456077666e-05,
|
9178 |
+
"loss": 0.6774,
|
9179 |
+
"step": 13100
|
9180 |
+
},
|
9181 |
+
{
|
9182 |
+
"epoch": 6.672087065824436,
|
9183 |
+
"grad_norm": 0.13526228070259094,
|
9184 |
+
"learning_rate": 4.981375539306704e-05,
|
9185 |
+
"loss": 0.6747,
|
9186 |
+
"step": 13110
|
9187 |
+
},
|
9188 |
+
{
|
9189 |
+
"epoch": 6.677178634504925,
|
9190 |
+
"grad_norm": 0.144491046667099,
|
9191 |
+
"learning_rate": 4.981361333005641e-05,
|
9192 |
+
"loss": 0.6787,
|
9193 |
+
"step": 13120
|
9194 |
+
},
|
9195 |
+
{
|
9196 |
+
"epoch": 6.682270203185412,
|
9197 |
+
"grad_norm": 0.16958777606487274,
|
9198 |
+
"learning_rate": 4.9813471267045786e-05,
|
9199 |
+
"loss": 0.6744,
|
9200 |
+
"step": 13130
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 6.687361771865901,
|
9204 |
+
"grad_norm": 0.14115367829799652,
|
9205 |
+
"learning_rate": 4.981332920403516e-05,
|
9206 |
+
"loss": 0.6791,
|
9207 |
+
"step": 13140
|
9208 |
+
},
|
9209 |
+
{
|
9210 |
+
"epoch": 6.692453340546389,
|
9211 |
+
"grad_norm": 0.11081673204898834,
|
9212 |
+
"learning_rate": 4.981318714102453e-05,
|
9213 |
+
"loss": 0.6795,
|
9214 |
+
"step": 13150
|
9215 |
+
},
|
9216 |
+
{
|
9217 |
+
"epoch": 6.6975449092268775,
|
9218 |
+
"grad_norm": 0.14843027293682098,
|
9219 |
+
"learning_rate": 4.9813045078013905e-05,
|
9220 |
+
"loss": 0.6807,
|
9221 |
+
"step": 13160
|
9222 |
+
},
|
9223 |
+
{
|
9224 |
+
"epoch": 6.702636477907365,
|
9225 |
+
"grad_norm": 0.12543180584907532,
|
9226 |
+
"learning_rate": 4.981290301500328e-05,
|
9227 |
+
"loss": 0.6778,
|
9228 |
+
"step": 13170
|
9229 |
+
},
|
9230 |
+
{
|
9231 |
+
"epoch": 6.707728046587853,
|
9232 |
+
"grad_norm": 0.13169404864311218,
|
9233 |
+
"learning_rate": 4.981276095199265e-05,
|
9234 |
+
"loss": 0.675,
|
9235 |
+
"step": 13180
|
9236 |
+
},
|
9237 |
+
{
|
9238 |
+
"epoch": 6.712819615268342,
|
9239 |
+
"grad_norm": 0.15343239903450012,
|
9240 |
+
"learning_rate": 4.9812618888982024e-05,
|
9241 |
+
"loss": 0.6819,
|
9242 |
+
"step": 13190
|
9243 |
+
},
|
9244 |
+
{
|
9245 |
+
"epoch": 6.7179111839488295,
|
9246 |
+
"grad_norm": 0.13029424846172333,
|
9247 |
+
"learning_rate": 4.981247682597139e-05,
|
9248 |
+
"loss": 0.6778,
|
9249 |
+
"step": 13200
|
9250 |
+
},
|
9251 |
+
{
|
9252 |
+
"epoch": 6.723002752629318,
|
9253 |
+
"grad_norm": 0.11084284633398056,
|
9254 |
+
"learning_rate": 4.9812334762960764e-05,
|
9255 |
+
"loss": 0.6824,
|
9256 |
+
"step": 13210
|
9257 |
+
},
|
9258 |
+
{
|
9259 |
+
"epoch": 6.728094321309806,
|
9260 |
+
"grad_norm": 0.11253423988819122,
|
9261 |
+
"learning_rate": 4.981219269995014e-05,
|
9262 |
+
"loss": 0.6798,
|
9263 |
+
"step": 13220
|
9264 |
+
},
|
9265 |
+
{
|
9266 |
+
"epoch": 6.7331858899902945,
|
9267 |
+
"grad_norm": 0.1311793029308319,
|
9268 |
+
"learning_rate": 4.981205063693951e-05,
|
9269 |
+
"loss": 0.6814,
|
9270 |
+
"step": 13230
|
9271 |
+
},
|
9272 |
+
{
|
9273 |
+
"epoch": 6.738277458670782,
|
9274 |
+
"grad_norm": 0.12919209897518158,
|
9275 |
+
"learning_rate": 4.981190857392888e-05,
|
9276 |
+
"loss": 0.6768,
|
9277 |
+
"step": 13240
|
9278 |
+
},
|
9279 |
+
{
|
9280 |
+
"epoch": 6.743369027351271,
|
9281 |
+
"grad_norm": 0.12355062365531921,
|
9282 |
+
"learning_rate": 4.9811766510918256e-05,
|
9283 |
+
"loss": 0.6799,
|
9284 |
+
"step": 13250
|
9285 |
+
},
|
9286 |
+
{
|
9287 |
+
"epoch": 6.748460596031759,
|
9288 |
+
"grad_norm": 0.1338970810174942,
|
9289 |
+
"learning_rate": 4.981162444790763e-05,
|
9290 |
+
"loss": 0.6771,
|
9291 |
+
"step": 13260
|
9292 |
+
},
|
9293 |
+
{
|
9294 |
+
"epoch": 6.7535521647122465,
|
9295 |
+
"grad_norm": 0.14117179811000824,
|
9296 |
+
"learning_rate": 4.9811482384897e-05,
|
9297 |
+
"loss": 0.6799,
|
9298 |
+
"step": 13270
|
9299 |
+
},
|
9300 |
+
{
|
9301 |
+
"epoch": 6.758643733392735,
|
9302 |
+
"grad_norm": 0.1848529875278473,
|
9303 |
+
"learning_rate": 4.9811340321886375e-05,
|
9304 |
+
"loss": 0.6755,
|
9305 |
+
"step": 13280
|
9306 |
+
},
|
9307 |
+
{
|
9308 |
+
"epoch": 6.763735302073223,
|
9309 |
+
"grad_norm": 0.1720336526632309,
|
9310 |
+
"learning_rate": 4.981119825887575e-05,
|
9311 |
+
"loss": 0.67,
|
9312 |
+
"step": 13290
|
9313 |
+
},
|
9314 |
+
{
|
9315 |
+
"epoch": 6.768826870753712,
|
9316 |
+
"grad_norm": 0.1607787162065506,
|
9317 |
+
"learning_rate": 4.981105619586512e-05,
|
9318 |
+
"loss": 0.6827,
|
9319 |
+
"step": 13300
|
9320 |
+
},
|
9321 |
+
{
|
9322 |
+
"epoch": 6.773918439434199,
|
9323 |
+
"grad_norm": 0.14998158812522888,
|
9324 |
+
"learning_rate": 4.981091413285449e-05,
|
9325 |
+
"loss": 0.6759,
|
9326 |
+
"step": 13310
|
9327 |
+
},
|
9328 |
+
{
|
9329 |
+
"epoch": 6.779010008114687,
|
9330 |
+
"grad_norm": 0.11763730645179749,
|
9331 |
+
"learning_rate": 4.981077206984386e-05,
|
9332 |
+
"loss": 0.6747,
|
9333 |
+
"step": 13320
|
9334 |
+
},
|
9335 |
+
{
|
9336 |
+
"epoch": 6.784101576795176,
|
9337 |
+
"grad_norm": 0.12859204411506653,
|
9338 |
+
"learning_rate": 4.9810630006833234e-05,
|
9339 |
+
"loss": 0.6785,
|
9340 |
+
"step": 13330
|
9341 |
+
},
|
9342 |
+
{
|
9343 |
+
"epoch": 6.7891931454756635,
|
9344 |
+
"grad_norm": 0.12227821350097656,
|
9345 |
+
"learning_rate": 4.98104879438226e-05,
|
9346 |
+
"loss": 0.6794,
|
9347 |
+
"step": 13340
|
9348 |
+
},
|
9349 |
+
{
|
9350 |
+
"epoch": 6.794284714156152,
|
9351 |
+
"grad_norm": 0.11308576911687851,
|
9352 |
+
"learning_rate": 4.9810345880811974e-05,
|
9353 |
+
"loss": 0.6777,
|
9354 |
+
"step": 13350
|
9355 |
+
},
|
9356 |
+
{
|
9357 |
+
"epoch": 6.79937628283664,
|
9358 |
+
"grad_norm": 0.12252433598041534,
|
9359 |
+
"learning_rate": 4.981020381780135e-05,
|
9360 |
+
"loss": 0.6778,
|
9361 |
+
"step": 13360
|
9362 |
+
},
|
9363 |
+
{
|
9364 |
+
"epoch": 6.804467851517129,
|
9365 |
+
"grad_norm": 0.11951456218957901,
|
9366 |
+
"learning_rate": 4.981006175479072e-05,
|
9367 |
+
"loss": 0.6778,
|
9368 |
+
"step": 13370
|
9369 |
+
},
|
9370 |
+
{
|
9371 |
+
"epoch": 6.809559420197616,
|
9372 |
+
"grad_norm": 0.13758736848831177,
|
9373 |
+
"learning_rate": 4.980991969178009e-05,
|
9374 |
+
"loss": 0.6757,
|
9375 |
+
"step": 13380
|
9376 |
+
},
|
9377 |
+
{
|
9378 |
+
"epoch": 6.814650988878105,
|
9379 |
+
"grad_norm": 0.15930655598640442,
|
9380 |
+
"learning_rate": 4.9809777628769466e-05,
|
9381 |
+
"loss": 0.675,
|
9382 |
+
"step": 13390
|
9383 |
+
},
|
9384 |
+
{
|
9385 |
+
"epoch": 6.819742557558593,
|
9386 |
+
"grad_norm": 0.16790159046649933,
|
9387 |
+
"learning_rate": 4.980963556575884e-05,
|
9388 |
+
"loss": 0.6685,
|
9389 |
+
"step": 13400
|
9390 |
+
},
|
9391 |
+
{
|
9392 |
+
"epoch": 6.824834126239081,
|
9393 |
+
"grad_norm": 0.1681044101715088,
|
9394 |
+
"learning_rate": 4.980949350274821e-05,
|
9395 |
+
"loss": 0.683,
|
9396 |
+
"step": 13410
|
9397 |
+
},
|
9398 |
+
{
|
9399 |
+
"epoch": 6.829925694919569,
|
9400 |
+
"grad_norm": 0.1336173415184021,
|
9401 |
+
"learning_rate": 4.9809351439737585e-05,
|
9402 |
+
"loss": 0.6746,
|
9403 |
+
"step": 13420
|
9404 |
+
},
|
9405 |
+
{
|
9406 |
+
"epoch": 6.835017263600057,
|
9407 |
+
"grad_norm": 0.11793011426925659,
|
9408 |
+
"learning_rate": 4.980920937672696e-05,
|
9409 |
+
"loss": 0.6789,
|
9410 |
+
"step": 13430
|
9411 |
+
},
|
9412 |
+
{
|
9413 |
+
"epoch": 6.840108832280546,
|
9414 |
+
"grad_norm": 0.14056985080242157,
|
9415 |
+
"learning_rate": 4.980906731371633e-05,
|
9416 |
+
"loss": 0.6797,
|
9417 |
+
"step": 13440
|
9418 |
+
},
|
9419 |
+
{
|
9420 |
+
"epoch": 6.845200400961033,
|
9421 |
+
"grad_norm": 0.11312086880207062,
|
9422 |
+
"learning_rate": 4.9808925250705705e-05,
|
9423 |
+
"loss": 0.6777,
|
9424 |
+
"step": 13450
|
9425 |
+
},
|
9426 |
+
{
|
9427 |
+
"epoch": 6.850291969641522,
|
9428 |
+
"grad_norm": 0.14550986886024475,
|
9429 |
+
"learning_rate": 4.980878318769507e-05,
|
9430 |
+
"loss": 0.6792,
|
9431 |
+
"step": 13460
|
9432 |
+
},
|
9433 |
+
{
|
9434 |
+
"epoch": 6.85538353832201,
|
9435 |
+
"grad_norm": 0.13276565074920654,
|
9436 |
+
"learning_rate": 4.9808641124684444e-05,
|
9437 |
+
"loss": 0.6797,
|
9438 |
+
"step": 13470
|
9439 |
+
},
|
9440 |
+
{
|
9441 |
+
"epoch": 6.8604751070024985,
|
9442 |
+
"grad_norm": 0.1404767632484436,
|
9443 |
+
"learning_rate": 4.980849906167382e-05,
|
9444 |
+
"loss": 0.6767,
|
9445 |
+
"step": 13480
|
9446 |
+
},
|
9447 |
+
{
|
9448 |
+
"epoch": 6.865566675682986,
|
9449 |
+
"grad_norm": 0.11344119906425476,
|
9450 |
+
"learning_rate": 4.980835699866319e-05,
|
9451 |
+
"loss": 0.6779,
|
9452 |
+
"step": 13490
|
9453 |
+
},
|
9454 |
+
{
|
9455 |
+
"epoch": 6.870658244363474,
|
9456 |
+
"grad_norm": 0.18248707056045532,
|
9457 |
+
"learning_rate": 4.9808214935652563e-05,
|
9458 |
+
"loss": 0.6819,
|
9459 |
+
"step": 13500
|
9460 |
+
},
|
9461 |
+
{
|
9462 |
+
"epoch": 6.875749813043963,
|
9463 |
+
"grad_norm": 0.13696008920669556,
|
9464 |
+
"learning_rate": 4.9808072872641937e-05,
|
9465 |
+
"loss": 0.6789,
|
9466 |
+
"step": 13510
|
9467 |
+
},
|
9468 |
+
{
|
9469 |
+
"epoch": 6.8808413817244505,
|
9470 |
+
"grad_norm": 0.1089053824543953,
|
9471 |
+
"learning_rate": 4.98079308096313e-05,
|
9472 |
+
"loss": 0.6833,
|
9473 |
+
"step": 13520
|
9474 |
+
},
|
9475 |
+
{
|
9476 |
+
"epoch": 6.885932950404939,
|
9477 |
+
"grad_norm": 0.13730046153068542,
|
9478 |
+
"learning_rate": 4.9807788746620676e-05,
|
9479 |
+
"loss": 0.685,
|
9480 |
+
"step": 13530
|
9481 |
+
},
|
9482 |
+
{
|
9483 |
+
"epoch": 6.891024519085427,
|
9484 |
+
"grad_norm": 0.11708593368530273,
|
9485 |
+
"learning_rate": 4.980764668361005e-05,
|
9486 |
+
"loss": 0.6797,
|
9487 |
+
"step": 13540
|
9488 |
+
},
|
9489 |
+
{
|
9490 |
+
"epoch": 6.896116087765915,
|
9491 |
+
"grad_norm": 0.14479976892471313,
|
9492 |
+
"learning_rate": 4.980750462059942e-05,
|
9493 |
+
"loss": 0.6779,
|
9494 |
+
"step": 13550
|
9495 |
+
},
|
9496 |
+
{
|
9497 |
+
"epoch": 6.901207656446403,
|
9498 |
+
"grad_norm": 0.13402192294597626,
|
9499 |
+
"learning_rate": 4.9807362557588795e-05,
|
9500 |
+
"loss": 0.6775,
|
9501 |
+
"step": 13560
|
9502 |
+
},
|
9503 |
+
{
|
9504 |
+
"epoch": 6.906299225126891,
|
9505 |
+
"grad_norm": 0.1378648430109024,
|
9506 |
+
"learning_rate": 4.980722049457817e-05,
|
9507 |
+
"loss": 0.6799,
|
9508 |
+
"step": 13570
|
9509 |
+
},
|
9510 |
+
{
|
9511 |
+
"epoch": 6.91139079380738,
|
9512 |
+
"grad_norm": 0.1424325555562973,
|
9513 |
+
"learning_rate": 4.980707843156754e-05,
|
9514 |
+
"loss": 0.6777,
|
9515 |
+
"step": 13580
|
9516 |
+
},
|
9517 |
+
{
|
9518 |
+
"epoch": 6.9164823624878675,
|
9519 |
+
"grad_norm": 0.12795968353748322,
|
9520 |
+
"learning_rate": 4.9806936368556915e-05,
|
9521 |
+
"loss": 0.6756,
|
9522 |
+
"step": 13590
|
9523 |
+
},
|
9524 |
+
{
|
9525 |
+
"epoch": 6.921573931168356,
|
9526 |
+
"grad_norm": 0.16961532831192017,
|
9527 |
+
"learning_rate": 4.980679430554628e-05,
|
9528 |
+
"loss": 0.6762,
|
9529 |
+
"step": 13600
|
9530 |
+
},
|
9531 |
+
{
|
9532 |
+
"epoch": 6.926665499848844,
|
9533 |
+
"grad_norm": 0.16084560751914978,
|
9534 |
+
"learning_rate": 4.9806652242535654e-05,
|
9535 |
+
"loss": 0.6783,
|
9536 |
+
"step": 13610
|
9537 |
+
},
|
9538 |
+
{
|
9539 |
+
"epoch": 6.931757068529333,
|
9540 |
+
"grad_norm": 0.1510113775730133,
|
9541 |
+
"learning_rate": 4.980651017952503e-05,
|
9542 |
+
"loss": 0.676,
|
9543 |
+
"step": 13620
|
9544 |
+
},
|
9545 |
+
{
|
9546 |
+
"epoch": 6.93684863720982,
|
9547 |
+
"grad_norm": 0.1436864286661148,
|
9548 |
+
"learning_rate": 4.98063681165144e-05,
|
9549 |
+
"loss": 0.6769,
|
9550 |
+
"step": 13630
|
9551 |
+
},
|
9552 |
+
{
|
9553 |
+
"epoch": 6.941940205890308,
|
9554 |
+
"grad_norm": 0.14651361107826233,
|
9555 |
+
"learning_rate": 4.980622605350377e-05,
|
9556 |
+
"loss": 0.6786,
|
9557 |
+
"step": 13640
|
9558 |
+
},
|
9559 |
+
{
|
9560 |
+
"epoch": 6.947031774570797,
|
9561 |
+
"grad_norm": 0.12080514430999756,
|
9562 |
+
"learning_rate": 4.9806083990493146e-05,
|
9563 |
+
"loss": 0.6719,
|
9564 |
+
"step": 13650
|
9565 |
+
},
|
9566 |
+
{
|
9567 |
+
"epoch": 6.952123343251285,
|
9568 |
+
"grad_norm": 0.18036852777004242,
|
9569 |
+
"learning_rate": 4.980594192748252e-05,
|
9570 |
+
"loss": 0.6776,
|
9571 |
+
"step": 13660
|
9572 |
+
},
|
9573 |
+
{
|
9574 |
+
"epoch": 6.957214911931773,
|
9575 |
+
"grad_norm": 0.15538708865642548,
|
9576 |
+
"learning_rate": 4.980579986447189e-05,
|
9577 |
+
"loss": 0.677,
|
9578 |
+
"step": 13670
|
9579 |
+
},
|
9580 |
+
{
|
9581 |
+
"epoch": 6.962306480612261,
|
9582 |
+
"grad_norm": 0.14524763822555542,
|
9583 |
+
"learning_rate": 4.9805657801461266e-05,
|
9584 |
+
"loss": 0.6725,
|
9585 |
+
"step": 13680
|
9586 |
+
},
|
9587 |
+
{
|
9588 |
+
"epoch": 6.96739804929275,
|
9589 |
+
"grad_norm": 0.13171471655368805,
|
9590 |
+
"learning_rate": 4.980551573845064e-05,
|
9591 |
+
"loss": 0.6814,
|
9592 |
+
"step": 13690
|
9593 |
+
},
|
9594 |
+
{
|
9595 |
+
"epoch": 6.972489617973237,
|
9596 |
+
"grad_norm": 0.14730645716190338,
|
9597 |
+
"learning_rate": 4.980537367544001e-05,
|
9598 |
+
"loss": 0.6828,
|
9599 |
+
"step": 13700
|
9600 |
+
},
|
9601 |
+
{
|
9602 |
+
"epoch": 6.977581186653726,
|
9603 |
+
"grad_norm": 0.1142466589808464,
|
9604 |
+
"learning_rate": 4.980523161242938e-05,
|
9605 |
+
"loss": 0.677,
|
9606 |
+
"step": 13710
|
9607 |
+
},
|
9608 |
+
{
|
9609 |
+
"epoch": 6.982672755334214,
|
9610 |
+
"grad_norm": 0.11980883777141571,
|
9611 |
+
"learning_rate": 4.980508954941875e-05,
|
9612 |
+
"loss": 0.6847,
|
9613 |
+
"step": 13720
|
9614 |
+
},
|
9615 |
+
{
|
9616 |
+
"epoch": 6.987764324014702,
|
9617 |
+
"grad_norm": 0.10882198065519333,
|
9618 |
+
"learning_rate": 4.9804947486408125e-05,
|
9619 |
+
"loss": 0.6749,
|
9620 |
+
"step": 13730
|
9621 |
+
},
|
9622 |
+
{
|
9623 |
+
"epoch": 6.99285589269519,
|
9624 |
+
"grad_norm": 0.1418180912733078,
|
9625 |
+
"learning_rate": 4.980480542339749e-05,
|
9626 |
+
"loss": 0.675,
|
9627 |
+
"step": 13740
|
9628 |
}
|
9629 |
],
|
9630 |
"logging_steps": 10,
|
|
|
9639 |
"should_evaluate": false,
|
9640 |
"should_log": false,
|
9641 |
"should_save": true,
|
9642 |
+
"should_training_stop": true
|
9643 |
},
|
9644 |
"attributes": {}
|
9645 |
}
|