Training in progress, epoch 4
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +50 -0
- training_args.bin +2 -2
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8996c0f0d071e83c9c6e195642babf19c328f88f396b109debadc0b8860c2f1a
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0f4f02a7b24ac19add6c021517f824c8e42a8e8b023bad8e224ed25e5cb19e1
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:931948dd89438a11620172ae4d11d1e7c3ebadfef8cc9874a5e6792052a53e8d
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7986c2d0b49f1a6cfd837ce48944f615dbe369b85db246c7f1a24a1ece8e9e1
|
3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
@@ -186,3 +186,53 @@
|
|
186 |
{"current_steps": 181, "total_steps": 225, "loss": 0.3564, "lr": 9.005715715884409e-06, "epoch": 3.972565157750343, "percentage": 80.44, "elapsed_time": "5:24:31", "remaining_time": "1:18:53"}
|
187 |
{"current_steps": 182, "total_steps": 225, "loss": 0.3561, "lr": 8.616229390145361e-06, "epoch": 3.9945130315500688, "percentage": 80.89, "elapsed_time": "5:26:12", "remaining_time": "1:17:04"}
|
188 |
{"current_steps": 183, "total_steps": 225, "loss": 0.3455, "lr": 8.23433396620986e-06, "epoch": 4.016460905349795, "percentage": 81.33, "elapsed_time": "5:29:41", "remaining_time": "1:15:39"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
{"current_steps": 181, "total_steps": 225, "loss": 0.3564, "lr": 9.005715715884409e-06, "epoch": 3.972565157750343, "percentage": 80.44, "elapsed_time": "5:24:31", "remaining_time": "1:18:53"}
|
187 |
{"current_steps": 182, "total_steps": 225, "loss": 0.3561, "lr": 8.616229390145361e-06, "epoch": 3.9945130315500688, "percentage": 80.89, "elapsed_time": "5:26:12", "remaining_time": "1:17:04"}
|
188 |
{"current_steps": 183, "total_steps": 225, "loss": 0.3455, "lr": 8.23433396620986e-06, "epoch": 4.016460905349795, "percentage": 81.33, "elapsed_time": "5:29:41", "remaining_time": "1:15:39"}
|
189 |
+
{"current_steps": 184, "total_steps": 225, "loss": 0.3384, "lr": 7.86012181445297e-06, "epoch": 4.03840877914952, "percentage": 81.78, "elapsed_time": "5:31:25", "remaining_time": "1:13:51"}
|
190 |
+
{"current_steps": 185, "total_steps": 225, "loss": 0.3375, "lr": 7.4936834468699945e-06, "epoch": 4.060356652949245, "percentage": 82.22, "elapsed_time": "5:33:10", "remaining_time": "1:12:02"}
|
191 |
+
{"current_steps": 186, "total_steps": 225, "loss": 0.3423, "lr": 7.135107495183975e-06, "epoch": 4.082304526748971, "percentage": 82.67, "elapsed_time": "5:34:55", "remaining_time": "1:10:13"}
|
192 |
+
{"current_steps": 187, "total_steps": 225, "loss": 0.3375, "lr": 6.784480689408099e-06, "epoch": 4.104252400548697, "percentage": 83.11, "elapsed_time": "5:36:40", "remaining_time": "1:08:24"}
|
193 |
+
{"current_steps": 188, "total_steps": 225, "loss": 0.3354, "lr": 6.441887836867962e-06, "epoch": 4.126200274348422, "percentage": 83.56, "elapsed_time": "5:38:25", "remaining_time": "1:06:36"}
|
194 |
+
{"current_steps": 189, "total_steps": 225, "loss": 0.3307, "lr": 6.107411801688905e-06, "epoch": 4.148148148148148, "percentage": 84.0, "elapsed_time": "5:40:12", "remaining_time": "1:04:48"}
|
195 |
+
{"current_steps": 183, "total_steps": 225, "loss": 0.341, "lr": 8.23433396620986e-06, "epoch": 4.065843621399177, "percentage": 81.33, "elapsed_time": "0:01:52", "remaining_time": "0:00:25"}
|
196 |
+
{"current_steps": 184, "total_steps": 225, "loss": 0.3427, "lr": 7.86012181445297e-06, "epoch": 4.0877914951989025, "percentage": 81.78, "elapsed_time": "0:03:32", "remaining_time": "0:00:47"}
|
197 |
+
{"current_steps": 185, "total_steps": 225, "loss": 0.337, "lr": 7.4936834468699945e-06, "epoch": 4.109739368998628, "percentage": 82.22, "elapsed_time": "0:05:11", "remaining_time": "0:01:07"}
|
198 |
+
{"current_steps": 186, "total_steps": 225, "loss": 0.3345, "lr": 7.135107495183975e-06, "epoch": 4.131687242798354, "percentage": 82.67, "elapsed_time": "0:06:50", "remaining_time": "0:01:26"}
|
199 |
+
{"current_steps": 187, "total_steps": 225, "loss": 0.3297, "lr": 6.784480689408099e-06, "epoch": 4.153635116598079, "percentage": 83.11, "elapsed_time": "0:08:30", "remaining_time": "0:01:43"}
|
200 |
+
{"current_steps": 188, "total_steps": 225, "loss": 0.3414, "lr": 6.441887836867962e-06, "epoch": 4.175582990397805, "percentage": 83.56, "elapsed_time": "0:10:09", "remaining_time": "0:02:00"}
|
201 |
+
{"current_steps": 189, "total_steps": 225, "loss": 0.3387, "lr": 6.107411801688905e-06, "epoch": 4.197530864197531, "percentage": 84.0, "elapsed_time": "0:11:49", "remaining_time": "0:02:15"}
|
202 |
+
{"current_steps": 190, "total_steps": 225, "loss": 0.3386, "lr": 5.781133484753451e-06, "epoch": 4.219478737997257, "percentage": 84.44, "elapsed_time": "0:13:28", "remaining_time": "0:02:29"}
|
203 |
+
{"current_steps": 191, "total_steps": 225, "loss": 0.3392, "lr": 5.463131804133461e-06, "epoch": 4.2414266117969825, "percentage": 84.89, "elapsed_time": "0:15:09", "remaining_time": "0:02:41"}
|
204 |
+
{"current_steps": 192, "total_steps": 225, "loss": 0.3406, "lr": 5.1534836760019824e-06, "epoch": 4.2633744855967075, "percentage": 85.33, "elapsed_time": "0:16:48", "remaining_time": "0:02:53"}
|
205 |
+
{"current_steps": 193, "total_steps": 225, "loss": 0.3347, "lr": 4.852263996029259e-06, "epoch": 4.285322359396433, "percentage": 85.78, "elapsed_time": "0:18:28", "remaining_time": "0:03:03"}
|
206 |
+
{"current_steps": 194, "total_steps": 225, "loss": 0.3316, "lr": 4.559545621267414e-06, "epoch": 4.307270233196159, "percentage": 86.22, "elapsed_time": "0:20:07", "remaining_time": "0:03:13"}
|
207 |
+
{"current_steps": 195, "total_steps": 225, "loss": 0.3382, "lr": 4.275399352528342e-06, "epoch": 4.329218106995885, "percentage": 86.67, "elapsed_time": "0:21:47", "remaining_time": "0:03:21"}
|
208 |
+
{"current_steps": 196, "total_steps": 225, "loss": 0.3421, "lr": 3.999893917258799e-06, "epoch": 4.35116598079561, "percentage": 87.11, "elapsed_time": "0:23:26", "remaining_time": "0:03:28"}
|
209 |
+
{"current_steps": 197, "total_steps": 225, "loss": 0.3359, "lr": 3.733095952917101e-06, "epoch": 4.373113854595336, "percentage": 87.56, "elapsed_time": "0:25:05", "remaining_time": "0:03:34"}
|
210 |
+
{"current_steps": 198, "total_steps": 225, "loss": 0.3348, "lr": 3.4750699908552464e-06, "epoch": 4.395061728395062, "percentage": 88.0, "elapsed_time": "0:26:45", "remaining_time": "0:03:38"}
|
211 |
+
{"current_steps": 199, "total_steps": 225, "loss": 0.338, "lr": 3.225878440710544e-06, "epoch": 4.4170096021947876, "percentage": 88.44, "elapsed_time": "0:28:24", "remaining_time": "0:03:42"}
|
212 |
+
{"current_steps": 200, "total_steps": 225, "loss": 0.3408, "lr": 2.9855815753103436e-06, "epoch": 4.438957475994513, "percentage": 88.89, "elapsed_time": "0:30:04", "remaining_time": "0:03:45"}
|
213 |
+
{"current_steps": 201, "total_steps": 225, "loss": 0.3336, "lr": 2.754237516093623e-06, "epoch": 4.460905349794238, "percentage": 89.33, "elapsed_time": "0:31:43", "remaining_time": "0:03:47"}
|
214 |
+
{"current_steps": 202, "total_steps": 225, "loss": 0.3347, "lr": 2.5319022190529997e-06, "epoch": 4.482853223593964, "percentage": 89.78, "elapsed_time": "0:33:22", "remaining_time": "0:03:48"}
|
215 |
+
{"current_steps": 203, "total_steps": 225, "loss": 0.3359, "lr": 2.3186294612004365e-06, "epoch": 4.50480109739369, "percentage": 90.22, "elapsed_time": "0:35:02", "remaining_time": "0:03:47"}
|
216 |
+
{"current_steps": 204, "total_steps": 225, "loss": 0.3411, "lr": 2.1144708275599955e-06, "epoch": 4.526748971193416, "percentage": 90.67, "elapsed_time": "0:36:41", "remaining_time": "0:03:46"}
|
217 |
+
{"current_steps": 205, "total_steps": 225, "loss": 0.3351, "lr": 1.9194756986908025e-06, "epoch": 4.548696844993142, "percentage": 91.11, "elapsed_time": "0:38:21", "remaining_time": "0:03:44"}
|
218 |
+
{"current_steps": 206, "total_steps": 225, "loss": 0.3329, "lr": 1.7336912387432115e-06, "epoch": 4.570644718792867, "percentage": 91.56, "elapsed_time": "0:40:01", "remaining_time": "0:03:41"}
|
219 |
+
{"current_steps": 207, "total_steps": 225, "loss": 0.3344, "lr": 1.5571623840510185e-06, "epoch": 4.592592592592593, "percentage": 92.0, "elapsed_time": "0:41:40", "remaining_time": "0:03:37"}
|
220 |
+
{"current_steps": 208, "total_steps": 225, "loss": 0.3353, "lr": 1.3899318322625744e-06, "epoch": 4.614540466392318, "percentage": 92.44, "elapsed_time": "0:43:20", "remaining_time": "0:03:32"}
|
221 |
+
{"current_steps": 209, "total_steps": 225, "loss": 0.3351, "lr": 1.2320400320133551e-06, "epoch": 4.636488340192044, "percentage": 92.89, "elapsed_time": "0:44:59", "remaining_time": "0:03:26"}
|
222 |
+
{"current_steps": 210, "total_steps": 225, "loss": 0.3389, "lr": 1.0835251731425013e-06, "epoch": 4.658436213991769, "percentage": 93.33, "elapsed_time": "0:46:38", "remaining_time": "0:03:19"}
|
223 |
+
{"current_steps": 211, "total_steps": 225, "loss": 0.3407, "lr": 9.444231774557199e-07, "epoch": 4.680384087791495, "percentage": 93.78, "elapsed_time": "0:48:18", "remaining_time": "0:03:12"}
|
224 |
+
{"current_steps": 212, "total_steps": 225, "loss": 0.3412, "lr": 8.147676900367308e-07, "epoch": 4.702331961591221, "percentage": 94.22, "elapsed_time": "0:49:57", "remaining_time": "0:03:03"}
|
225 |
+
{"current_steps": 213, "total_steps": 225, "loss": 0.3308, "lr": 6.945900711094534e-07, "epoch": 4.724279835390947, "percentage": 94.67, "elapsed_time": "0:51:36", "remaining_time": "0:02:54"}
|
226 |
+
{"current_steps": 214, "total_steps": 225, "loss": 0.3326, "lr": 5.839193884527472e-07, "epoch": 4.746227709190672, "percentage": 95.11, "elapsed_time": "0:53:16", "remaining_time": "0:02:44"}
|
227 |
+
{"current_steps": 215, "total_steps": 225, "loss": 0.3312, "lr": 4.827824103697332e-07, "epoch": 4.768175582990398, "percentage": 95.56, "elapsed_time": "0:54:55", "remaining_time": "0:02:33"}
|
228 |
+
{"current_steps": 216, "total_steps": 225, "loss": 0.3396, "lr": 3.912035992132257e-07, "epoch": 4.790123456790123, "percentage": 96.0, "elapsed_time": "0:56:34", "remaining_time": "0:02:21"}
|
229 |
+
{"current_steps": 217, "total_steps": 225, "loss": 0.3279, "lr": 3.0920510546894156e-07, "epoch": 4.812071330589849, "percentage": 96.44, "elapsed_time": "0:58:14", "remaining_time": "0:02:08"}
|
230 |
+
{"current_steps": 218, "total_steps": 225, "loss": 0.3373, "lr": 2.3680676239789647e-07, "epoch": 4.834019204389575, "percentage": 96.89, "elapsed_time": "0:59:53", "remaining_time": "0:01:55"}
|
231 |
+
{"current_steps": 219, "total_steps": 225, "loss": 0.3377, "lr": 1.740260812392558e-07, "epoch": 4.8559670781893, "percentage": 97.33, "elapsed_time": "1:01:32", "remaining_time": "0:01:41"}
|
232 |
+
{"current_steps": 220, "total_steps": 225, "loss": 0.3375, "lr": 1.208782469748293e-07, "epoch": 4.877914951989026, "percentage": 97.78, "elapsed_time": "1:03:12", "remaining_time": "0:01:26"}
|
233 |
+
{"current_steps": 221, "total_steps": 225, "loss": 0.3314, "lr": 7.737611465622686e-08, "epoch": 4.899862825788752, "percentage": 98.22, "elapsed_time": "1:04:51", "remaining_time": "0:01:10"}
|
234 |
+
{"current_steps": 222, "total_steps": 225, "loss": 0.334, "lr": 4.353020629556781e-08, "epoch": 4.921810699588478, "percentage": 98.67, "elapsed_time": "1:06:31", "remaining_time": "0:00:53"}
|
235 |
+
{"current_steps": 223, "total_steps": 225, "loss": 0.3358, "lr": 1.934870832047686e-08, "epoch": 4.9437585733882035, "percentage": 99.11, "elapsed_time": "1:08:11", "remaining_time": "0:00:36"}
|
236 |
+
{"current_steps": 224, "total_steps": 225, "loss": 0.3436, "lr": 4.837469594018984e-09, "epoch": 4.965706447187928, "percentage": 99.56, "elapsed_time": "1:09:50", "remaining_time": "0:00:18"}
|
237 |
+
{"current_steps": 225, "total_steps": 225, "loss": 0.337, "lr": 0.0, "epoch": 4.987654320987654, "percentage": 100.0, "elapsed_time": "1:11:30", "remaining_time": "0:00:00"}
|
238 |
+
{"current_steps": 225, "total_steps": 225, "epoch": 4.987654320987654, "percentage": 100.0, "elapsed_time": "1:14:29", "remaining_time": "0:00:00"}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4bfc0a48d881783e49963de2b1c5b3b6798e34c695babffc01fa68b53b14ac7
|
3 |
+
size 7416
|