SystemAdmin123 commited on
Commit
9fd4ab4
·
verified ·
1 Parent(s): fa3fa38

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9e33d86b7643ca1dcb0ff586538eca0faa1c5ceb646f55ab91b4c3f84b9791f
3
  size 4943178720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe6bcaf9f616919347ffeee8a71be2fb613c9f71c6a3338c5b732ffb1e8b2d6
3
  size 4943178720
last-checkpoint/model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09671ff7f4231f95a58974cee08101fdbd61e0e44a2da6afaa3f08be535bc56c
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2236db48c00b99ae5863de2038fabe22d15c5054f2785ede9e8fbb3d1c8613ac
3
  size 4999819336
last-checkpoint/model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:867e8292011b488643369bc98aece5a8d9682c4d0825739096988e70f10694e1
3
  size 4540532728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6ce91b33fc26f5c6807cbf06b0ccf52b9de474b4f39f114b6b15e2346cfeab5
3
  size 4540532728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea5954872831a1199bf6baadd994d97241ff683269e1fa383981e05c79f3d256
3
  size 14710155092
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd02223f419f1c86c6fe67014fc5228d9155b553579c6923475086c41a1c388
3
  size 14710155092
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d608751b30705d9fdab765f269290cd17ed21a1697e9fcb49bf7ebeaac38aebb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f60241cb6cb86cf9966e8cfe2248be00bce643b5808e2c3b78c9cb618eea253
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e84ec92ba6fcaca80f594e6a478d1a67e74e4c4df966b365126aa5fced1503ad
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e76feebe960d60536ad1ed0bcaee2e12a3f8432f33b7ee3b0cae559b12130c0
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff2e96f7570e4637dff53935f12387cbc820714a50eb737472244c44d20994b4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5988c600823ef881ed3900c9909420e69870efab70abf3dca0673a3c88b057
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67a7f9fde521312c299d8fef03e73900bbba79d87446cea5f97a33fc79bebea8
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7090a3c6759641db81e3ee589636615551bb1b7ce0948f2fd4ab7d7beb35de9c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b5bd1b09d58ee3e65553e6f9772dc7c5ca98238b1cade33cccd500df2328864
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d429d3d3635edcf38935f000b0d1f4e5db465042c289fb4623c33dce588231ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.47619047619047616,
5
  "eval_steps": 100,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -50,6 +50,49 @@
50
  "learning_rate": 0.00019939306773179497,
51
  "loss": 8.555,
52
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
  ],
55
  "logging_steps": 10,
@@ -69,7 +112,7 @@
69
  "attributes": {}
70
  }
71
  },
72
- "total_flos": 3.495035542700032e+16,
73
  "train_batch_size": 2,
74
  "trial_name": null,
75
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9523809523809523,
5
  "eval_steps": 100,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
50
  "learning_rate": 0.00019939306773179497,
51
  "loss": 8.555,
52
  "step": 50
53
+ },
54
+ {
55
+ "epoch": 0.5714285714285714,
56
+ "grad_norm": 54.75,
57
+ "learning_rate": 0.00019863613034027224,
58
+ "loss": 9.2629,
59
+ "step": 60
60
+ },
61
+ {
62
+ "epoch": 0.6666666666666666,
63
+ "grad_norm": 38.0,
64
+ "learning_rate": 0.00019757963826274357,
65
+ "loss": 16.9373,
66
+ "step": 70
67
+ },
68
+ {
69
+ "epoch": 0.7619047619047619,
70
+ "grad_norm": 346.0,
71
+ "learning_rate": 0.00019622680003092503,
72
+ "loss": 13.7525,
73
+ "step": 80
74
+ },
75
+ {
76
+ "epoch": 0.8571428571428571,
77
+ "grad_norm": 18.875,
78
+ "learning_rate": 0.00019458172417006347,
79
+ "loss": 10.6593,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 0.9523809523809523,
84
+ "grad_norm": 17.625,
85
+ "learning_rate": 0.00019264940672148018,
86
+ "loss": 10.0689,
87
+ "step": 100
88
+ },
89
+ {
90
+ "epoch": 0.9523809523809523,
91
+ "eval_loss": 10.551448822021484,
92
+ "eval_runtime": 26.9779,
93
+ "eval_samples_per_second": 55.638,
94
+ "eval_steps_per_second": 6.969,
95
+ "step": 100
96
  }
97
  ],
98
  "logging_steps": 10,
 
112
  "attributes": {}
113
  }
114
  },
115
+ "total_flos": 6.990071085400064e+16,
116
  "train_batch_size": 2,
117
  "trial_name": null,
118
  "trial_params": null