SystemAdmin123 commited on
Commit
7fafe3b
·
verified ·
1 Parent(s): accb7ad

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76e466c580fe14588089dcc7d286c83cd652f7c0a24eccc5457f63a2c1a6744b
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ab72b4709d6ffbeb35af35f341004d363c90fd1eda36628e6d84de407c80bf
3
  size 4976698672
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46673c0f6373d0edea145cf1c76d151e7e4002b0f392305989819179f47d88d2
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ab6f792622c0c35fd0799cc8a9d4255063a06471fff7aa9c635e67635f7286f
3
  size 4999802720
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2181f67f278f731239c7b02e50926de87b1f74983952474350708269c5dd6a24
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567bbc43b31b395b0d27e12f1dc5930e0dafe7f7b99b4e60685919b90587ae4c
3
  size 4915916176
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a31a8df2e382dbfa72c7ca55ade38083f4b79558c330f104b4e7917d67b1c3e
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e3a8fbbb21272764f003e8b4f7a98ed04d0de1487f3c39a257ca07988b0161
3
  size 1168138808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3233a68936feec1fb7b28adf511d152994f3fd4a66f95f806271d01bbb201c2
3
  size 16311821124
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b5fa7e25560e0e69c7447925bbecf06781cddc7ad3a142309b7ea1097dbfd80
3
  size 16311821124
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69a04a1208f7a0d6f51f37a136b5c2e55bf3f53b3d0fd57164c5b83ca47a2645
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f60241cb6cb86cf9966e8cfe2248be00bce643b5808e2c3b78c9cb618eea253
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:080a7e72d6be938a9418e60003db90412af8a61e6434f9e9f1b598cca861dbcd
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e76feebe960d60536ad1ed0bcaee2e12a3f8432f33b7ee3b0cae559b12130c0
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3d114a75d37be476b865187eb2b3d29d9343b131614a08f42be0014f110ce6f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5988c600823ef881ed3900c9909420e69870efab70abf3dca0673a3c88b057
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fc5a0f78838743362c5d5378dff81ea2f7d0039da53a423f1759e861bc6b233
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7090a3c6759641db81e3ee589636615551bb1b7ce0948f2fd4ab7d7beb35de9c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86aa1c590799d718328ad7b7198db3fa4678198705c85eb25b7f257d9e38e2cd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a562280f18454416a11d2a32a1435679f44d10b11637c5297815c06d499163
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.127659574468085,
5
- "eval_steps": 200,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,165 +11,39 @@
11
  {
12
  "epoch": 0.010638297872340425,
13
  "eval_loss": 2.7702033519744873,
14
- "eval_runtime": 30.6604,
15
- "eval_samples_per_second": 48.956,
16
- "eval_steps_per_second": 6.132,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.10638297872340426,
21
- "grad_norm": 5.40625,
22
  "learning_rate": 8e-05,
23
- "loss": 2.0559,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.2127659574468085,
28
- "grad_norm": 5.25,
29
  "learning_rate": 0.00016,
30
- "loss": 2.0743,
31
  "step": 20
32
  },
33
  {
34
- "epoch": 0.3191489361702128,
35
- "grad_norm": 5.75,
36
- "learning_rate": 0.00019994532573409262,
37
- "loss": 2.4587,
38
- "step": 30
39
- },
40
- {
41
- "epoch": 0.425531914893617,
42
- "grad_norm": 6.03125,
43
- "learning_rate": 0.00019950829025450114,
44
- "loss": 2.6661,
45
- "step": 40
46
- },
47
- {
48
- "epoch": 0.5319148936170213,
49
- "grad_norm": 6.75,
50
- "learning_rate": 0.00019863613034027224,
51
- "loss": 2.8731,
52
- "step": 50
53
- },
54
- {
55
- "epoch": 0.6382978723404256,
56
- "grad_norm": 4.0625,
57
- "learning_rate": 0.0001973326597248006,
58
- "loss": 2.8111,
59
- "step": 60
60
- },
61
- {
62
- "epoch": 0.7446808510638298,
63
- "grad_norm": 5.59375,
64
- "learning_rate": 0.00019560357815343577,
65
- "loss": 2.8978,
66
- "step": 70
67
- },
68
- {
69
- "epoch": 0.851063829787234,
70
- "grad_norm": 7.09375,
71
- "learning_rate": 0.0001934564464599461,
72
- "loss": 2.9503,
73
- "step": 80
74
- },
75
- {
76
- "epoch": 0.9574468085106383,
77
- "grad_norm": 10.125,
78
- "learning_rate": 0.00019090065350491626,
79
- "loss": 2.9875,
80
- "step": 90
81
- },
82
- {
83
- "epoch": 1.0638297872340425,
84
- "grad_norm": 5.34375,
85
- "learning_rate": 0.0001879473751206489,
86
- "loss": 2.1433,
87
- "step": 100
88
- },
89
- {
90
- "epoch": 1.1702127659574468,
91
- "grad_norm": 5.1875,
92
- "learning_rate": 0.00018460952524209355,
93
- "loss": 1.4928,
94
- "step": 110
95
- },
96
- {
97
- "epoch": 1.2765957446808511,
98
- "grad_norm": 4.90625,
99
- "learning_rate": 0.00018090169943749476,
100
- "loss": 1.6588,
101
- "step": 120
102
- },
103
- {
104
- "epoch": 1.3829787234042552,
105
- "grad_norm": 4.59375,
106
- "learning_rate": 0.00017684011108568592,
107
- "loss": 1.5762,
108
- "step": 130
109
- },
110
- {
111
- "epoch": 1.4893617021276595,
112
- "grad_norm": 4.46875,
113
- "learning_rate": 0.00017244252047910892,
114
- "loss": 1.6862,
115
- "step": 140
116
- },
117
- {
118
- "epoch": 1.5957446808510638,
119
- "grad_norm": 4.59375,
120
- "learning_rate": 0.00016772815716257412,
121
- "loss": 1.6834,
122
- "step": 150
123
- },
124
- {
125
- "epoch": 1.702127659574468,
126
- "grad_norm": 4.46875,
127
- "learning_rate": 0.0001627176358473537,
128
- "loss": 1.6762,
129
- "step": 160
130
- },
131
- {
132
- "epoch": 1.8085106382978724,
133
- "grad_norm": 4.5625,
134
- "learning_rate": 0.00015743286626829437,
135
- "loss": 1.7259,
136
- "step": 170
137
- },
138
- {
139
- "epoch": 1.9148936170212765,
140
- "grad_norm": 4.84375,
141
- "learning_rate": 0.00015189695737812152,
142
- "loss": 1.8411,
143
- "step": 180
144
- },
145
- {
146
- "epoch": 2.021276595744681,
147
- "grad_norm": 3.53125,
148
- "learning_rate": 0.0001461341162978688,
149
- "loss": 1.5208,
150
- "step": 190
151
- },
152
- {
153
- "epoch": 2.127659574468085,
154
- "grad_norm": 2.75,
155
- "learning_rate": 0.00014016954246529696,
156
- "loss": 0.6512,
157
- "step": 200
158
- },
159
- {
160
- "epoch": 2.127659574468085,
161
- "eval_loss": 3.396796941757202,
162
- "eval_runtime": 28.9515,
163
- "eval_samples_per_second": 51.845,
164
- "eval_steps_per_second": 6.494,
165
- "step": 200
166
  }
167
  ],
168
  "logging_steps": 10,
169
  "max_steps": 500,
170
  "num_input_tokens_seen": 0,
171
  "num_train_epochs": 6,
172
- "save_steps": 200,
173
  "stateful_callbacks": {
174
  "TrainerControl": {
175
  "args": {
@@ -182,7 +56,7 @@
182
  "attributes": {}
183
  }
184
  },
185
- "total_flos": 1.4755282835996672e+17,
186
  "train_batch_size": 2,
187
  "trial_name": null,
188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2127659574468085,
5
+ "eval_steps": 20,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.010638297872340425,
13
  "eval_loss": 2.7702033519744873,
14
+ "eval_runtime": 32.7713,
15
+ "eval_samples_per_second": 45.802,
16
+ "eval_steps_per_second": 5.737,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.10638297872340426,
21
+ "grad_norm": 5.4375,
22
  "learning_rate": 8e-05,
23
+ "loss": 2.0564,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.2127659574468085,
28
+ "grad_norm": 5.375,
29
  "learning_rate": 0.00016,
30
+ "loss": 2.073,
31
  "step": 20
32
  },
33
  {
34
+ "epoch": 0.2127659574468085,
35
+ "eval_loss": 2.1077733039855957,
36
+ "eval_runtime": 29.718,
37
+ "eval_samples_per_second": 50.508,
38
+ "eval_steps_per_second": 6.326,
39
+ "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
  ],
42
  "logging_steps": 10,
43
  "max_steps": 500,
44
  "num_input_tokens_seen": 0,
45
  "num_train_epochs": 6,
46
+ "save_steps": 20,
47
  "stateful_callbacks": {
48
  "TrainerControl": {
49
  "args": {
 
56
  "attributes": {}
57
  }
58
  },
59
+ "total_flos": 1.4755282835996672e+16,
60
  "train_batch_size": 2,
61
  "trial_name": null,
62
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dd3a0af1f706fbf33a681f8d5ee9dd18fc80aa1558af309107d63117c714c75
3
  size 7032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c7728cf6d1151c0535e539428181ba72626c4c7cb50ebf1d0cd8c8ffd1ed8d
3
  size 7032