Training in progress, epoch 0
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +88 -91
- training_args.bin +2 -2
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43ca864894953f716ae60186c8580f93fd8c235d11e0b7a97f5add7ff6a3180c
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6adbd0c2f5a5d6bf087cbec5bf89c28651e512fb5944a3faa614cdd8f157eb3d
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:020c87e9290b68a7bdf4b30f9137210ba648cb55ffc92804ddf3d8231c323357
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71df631e8ebd2ccb1644080217cddbde8d60ca3bc0b47691e275fcbffb04456c
|
3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
@@ -1,91 +1,88 @@
|
|
1 |
-
{"current_steps": 1, "total_steps": 364, "loss": 1.1132, "lr": 5.405405405405406e-07, "epoch": 0.01910828025477707, "percentage": 0.27, "elapsed_time": "0:00:
|
2 |
-
{"current_steps": 2, "total_steps": 364, "loss": 1.1203, "lr": 1.0810810810810812e-06, "epoch": 0.03821656050955414, "percentage": 0.55, "elapsed_time": "0:00:
|
3 |
-
{"current_steps": 3, "total_steps": 364, "loss": 1.
|
4 |
-
{"current_steps": 4, "total_steps": 364, "loss": 1.0972, "lr": 2.1621621621621623e-06, "epoch": 0.07643312101910828, "percentage": 1.1, "elapsed_time": "0:00:
|
5 |
-
{"current_steps": 5, "total_steps": 364, "loss": 1.
|
6 |
-
{"current_steps": 6, "total_steps": 364, "loss": 0.
|
7 |
-
{"current_steps": 7, "total_steps": 364, "loss": 0.
|
8 |
-
{"current_steps": 8, "total_steps": 364, "loss": 0.
|
9 |
-
{"current_steps": 9, "total_steps": 364, "loss": 0.
|
10 |
-
{"current_steps": 10, "total_steps": 364, "loss": 0.
|
11 |
-
{"current_steps": 11, "total_steps": 364, "loss": 0.
|
12 |
-
{"current_steps": 12, "total_steps": 364, "loss": 0.
|
13 |
-
{"current_steps": 13, "total_steps": 364, "loss": 0.
|
14 |
-
{"current_steps": 14, "total_steps": 364, "loss": 0.5898, "lr": 7.567567567567569e-06, "epoch": 0.267515923566879, "percentage": 3.85, "elapsed_time": "0:01:
|
15 |
-
{"current_steps": 15, "total_steps": 364, "loss": 0.
|
16 |
-
{"current_steps": 16, "total_steps": 364, "loss": 0.
|
17 |
-
{"current_steps": 17, "total_steps": 364, "loss": 0.
|
18 |
-
{"current_steps": 18, "total_steps": 364, "loss": 0.5146, "lr": 9.729729729729732e-06, "epoch": 0.34394904458598724, "percentage": 4.95, "elapsed_time": "0:01:
|
19 |
-
{"current_steps": 19, "total_steps": 364, "loss": 0.
|
20 |
-
{"current_steps": 20, "total_steps": 364, "loss": 0.
|
21 |
-
{"current_steps": 21, "total_steps": 364, "loss": 0.
|
22 |
-
{"current_steps": 22, "total_steps": 364, "loss": 0.483, "lr": 1.1891891891891894e-05, "epoch": 0.42038216560509556, "percentage": 6.04, "elapsed_time": "0:01:
|
23 |
-
{"current_steps": 23, "total_steps": 364, "loss": 0.
|
24 |
-
{"current_steps": 24, "total_steps": 364, "loss": 0.
|
25 |
-
{"current_steps": 25, "total_steps": 364, "loss": 0.
|
26 |
-
{"current_steps": 26, "total_steps": 364, "loss": 0.
|
27 |
-
{"current_steps": 27, "total_steps": 364, "loss": 0.
|
28 |
-
{"current_steps": 28, "total_steps": 364, "loss": 0.
|
29 |
-
{"current_steps": 29, "total_steps": 364, "loss": 0.
|
30 |
-
{"current_steps": 30, "total_steps": 364, "loss": 0.
|
31 |
-
{"current_steps": 31, "total_steps": 364, "loss": 0.
|
32 |
-
{"current_steps": 32, "total_steps": 364, "loss": 0.
|
33 |
-
{"current_steps": 33, "total_steps": 364, "loss": 0.
|
34 |
-
{"current_steps": 34, "total_steps": 364, "loss": 0.
|
35 |
-
{"current_steps": 35, "total_steps": 364, "loss": 0.
|
36 |
-
{"current_steps": 36, "total_steps": 364, "loss": 0.
|
37 |
-
{"current_steps": 37, "total_steps": 364, "loss": 0.
|
38 |
-
{"current_steps": 38, "total_steps": 364, "loss": 0.
|
39 |
-
{"current_steps": 39, "total_steps": 364, "loss": 0.
|
40 |
-
{"current_steps": 40, "total_steps": 364, "loss": 0.
|
41 |
-
{"current_steps": 41, "total_steps": 364, "loss": 0.
|
42 |
-
{"current_steps": 42, "total_steps": 364, "loss": 0.
|
43 |
-
{"current_steps": 43, "total_steps": 364, "loss": 0.
|
44 |
-
{"current_steps": 44, "total_steps": 364, "loss": 0.
|
45 |
-
{"current_steps": 45, "total_steps": 364, "loss": 0.
|
46 |
-
{"current_steps": 46, "total_steps": 364, "loss": 0.
|
47 |
-
{"current_steps": 47, "total_steps": 364, "loss": 0.
|
48 |
-
{"current_steps": 48, "total_steps": 364, "loss": 0.
|
49 |
-
{"current_steps": 49, "total_steps": 364, "loss": 0.
|
50 |
-
{"current_steps": 50, "total_steps": 364, "loss": 0.4186, "lr": 1.99221073756057e-05, "epoch": 0.9554140127388535, "percentage": 13.74, "elapsed_time": "0:03:
|
51 |
-
{"current_steps": 51, "total_steps": 364, "loss": 0.
|
52 |
-
{"current_steps": 52, "total_steps": 364, "loss": 0.
|
53 |
-
{"current_steps": 53, "total_steps": 364, "loss": 0.
|
54 |
-
{"current_steps": 54, "total_steps": 364, "loss": 0.332, "lr": 1.9866921935064907e-05, "epoch": 1.0318471337579618, "percentage": 14.84, "elapsed_time": "0:
|
55 |
-
{"current_steps": 55, "total_steps": 364, "loss": 0.
|
56 |
-
{"current_steps": 56, "total_steps": 364, "loss": 0.
|
57 |
-
{"current_steps": 57, "total_steps": 364, "loss": 0.
|
58 |
-
{"current_steps": 58, "total_steps": 364, "loss": 0.
|
59 |
-
{"current_steps": 59, "total_steps": 364, "loss": 0.
|
60 |
-
{"current_steps": 60, "total_steps": 364, "loss": 0.
|
61 |
-
{"current_steps": 61, "total_steps": 364, "loss": 0.
|
62 |
-
{"current_steps": 62, "total_steps": 364, "loss": 0.
|
63 |
-
{"current_steps": 63, "total_steps": 364, "loss": 0.
|
64 |
-
{"current_steps": 64, "total_steps": 364, "loss": 0.
|
65 |
-
{"current_steps": 65, "total_steps": 364, "loss": 0.
|
66 |
-
{"current_steps": 66, "total_steps": 364, "loss": 0.
|
67 |
-
{"current_steps": 67, "total_steps": 364, "loss": 0.
|
68 |
-
{"current_steps": 68, "total_steps": 364, "loss": 0.
|
69 |
-
{"current_steps": 69, "total_steps": 364, "loss": 0.
|
70 |
-
{"current_steps": 70, "total_steps": 364, "loss": 0.
|
71 |
-
{"current_steps": 71, "total_steps": 364, "loss": 0.
|
72 |
-
{"current_steps": 72, "total_steps": 364, "loss": 0.
|
73 |
-
{"current_steps": 73, "total_steps": 364, "loss": 0.
|
74 |
-
{"current_steps": 74, "total_steps": 364, "loss": 0.
|
75 |
-
{"current_steps": 75, "total_steps": 364, "loss": 0.
|
76 |
-
{"current_steps": 76, "total_steps": 364, "loss": 0.
|
77 |
-
{"current_steps": 77, "total_steps": 364, "loss": 0.
|
78 |
-
{"current_steps": 78, "total_steps": 364, "loss": 0.
|
79 |
-
{"current_steps": 79, "total_steps": 364, "loss": 0.3211, "lr": 1.9196895195764363e-05, "epoch": 1.5095541401273884, "percentage": 21.7, "elapsed_time": "0:07:
|
80 |
-
{"current_steps": 80, "total_steps": 364, "loss": 0.
|
81 |
-
{"current_steps": 81, "total_steps": 364, "loss": 0.
|
82 |
-
{"current_steps": 82, "total_steps": 364, "loss": 0.
|
83 |
-
{"current_steps": 83, "total_steps": 364, "loss": 0.
|
84 |
-
{"current_steps": 84, "total_steps": 364, "loss": 0.
|
85 |
-
{"current_steps": 85, "total_steps": 364, "loss": 0.
|
86 |
-
{"current_steps": 86, "total_steps": 364, "loss": 0.
|
87 |
-
{"current_steps": 87, "total_steps": 364, "loss": 0.
|
88 |
-
{"current_steps": 88, "total_steps": 364, "loss": 0.
|
89 |
-
{"current_steps": 89, "total_steps": 364, "loss": 0.3314, "lr": 1.8777836117563894e-05, "epoch": 1.700636942675159, "percentage": 24.45, "elapsed_time": "0:08:09", "remaining_time": "0:25:13"}
|
90 |
-
{"current_steps": 90, "total_steps": 364, "loss": 0.3278, "lr": 1.873140717240899e-05, "epoch": 1.7197452229299364, "percentage": 24.73, "elapsed_time": "0:08:15", "remaining_time": "0:25:08"}
|
91 |
-
{"current_steps": 91, "total_steps": 364, "loss": 0.318, "lr": 1.8684172319859258e-05, "epoch": 1.7388535031847132, "percentage": 25.0, "elapsed_time": "0:08:19", "remaining_time": "0:24:59"}
|
|
|
1 |
+
{"current_steps": 1, "total_steps": 364, "loss": 1.1132, "lr": 5.405405405405406e-07, "epoch": 0.01910828025477707, "percentage": 0.27, "elapsed_time": "0:00:09", "remaining_time": "0:54:36"}
|
2 |
+
{"current_steps": 2, "total_steps": 364, "loss": 1.1203, "lr": 1.0810810810810812e-06, "epoch": 0.03821656050955414, "percentage": 0.55, "elapsed_time": "0:00:14", "remaining_time": "0:43:38"}
|
3 |
+
{"current_steps": 3, "total_steps": 364, "loss": 1.072, "lr": 1.6216216216216219e-06, "epoch": 0.05732484076433121, "percentage": 0.82, "elapsed_time": "0:00:19", "remaining_time": "0:38:08"}
|
4 |
+
{"current_steps": 4, "total_steps": 364, "loss": 1.0972, "lr": 2.1621621621621623e-06, "epoch": 0.07643312101910828, "percentage": 1.1, "elapsed_time": "0:00:22", "remaining_time": "0:34:13"}
|
5 |
+
{"current_steps": 5, "total_steps": 364, "loss": 1.0656, "lr": 2.702702702702703e-06, "epoch": 0.09554140127388536, "percentage": 1.37, "elapsed_time": "0:00:26", "remaining_time": "0:31:58"}
|
6 |
+
{"current_steps": 6, "total_steps": 364, "loss": 0.9882, "lr": 3.2432432432432437e-06, "epoch": 0.11464968152866242, "percentage": 1.65, "elapsed_time": "0:00:31", "remaining_time": "0:30:55"}
|
7 |
+
{"current_steps": 7, "total_steps": 364, "loss": 0.8802, "lr": 3.7837837837837844e-06, "epoch": 0.1337579617834395, "percentage": 1.92, "elapsed_time": "0:00:35", "remaining_time": "0:30:34"}
|
8 |
+
{"current_steps": 8, "total_steps": 364, "loss": 0.837, "lr": 4.324324324324325e-06, "epoch": 0.15286624203821655, "percentage": 2.2, "elapsed_time": "0:00:40", "remaining_time": "0:29:57"}
|
9 |
+
{"current_steps": 9, "total_steps": 364, "loss": 0.8174, "lr": 4.864864864864866e-06, "epoch": 0.17197452229299362, "percentage": 2.47, "elapsed_time": "0:00:44", "remaining_time": "0:29:07"}
|
10 |
+
{"current_steps": 10, "total_steps": 364, "loss": 0.7624, "lr": 5.405405405405406e-06, "epoch": 0.1910828025477707, "percentage": 2.75, "elapsed_time": "0:00:48", "remaining_time": "0:28:51"}
|
11 |
+
{"current_steps": 11, "total_steps": 364, "loss": 0.7111, "lr": 5.945945945945947e-06, "epoch": 0.21019108280254778, "percentage": 3.02, "elapsed_time": "0:00:53", "remaining_time": "0:28:20"}
|
12 |
+
{"current_steps": 12, "total_steps": 364, "loss": 0.6858, "lr": 6.486486486486487e-06, "epoch": 0.22929936305732485, "percentage": 3.3, "elapsed_time": "0:00:57", "remaining_time": "0:28:01"}
|
13 |
+
{"current_steps": 13, "total_steps": 364, "loss": 0.61, "lr": 7.027027027027028e-06, "epoch": 0.2484076433121019, "percentage": 3.57, "elapsed_time": "0:01:03", "remaining_time": "0:28:36"}
|
14 |
+
{"current_steps": 14, "total_steps": 364, "loss": 0.5898, "lr": 7.567567567567569e-06, "epoch": 0.267515923566879, "percentage": 3.85, "elapsed_time": "0:01:07", "remaining_time": "0:28:08"}
|
15 |
+
{"current_steps": 15, "total_steps": 364, "loss": 0.5504, "lr": 8.108108108108109e-06, "epoch": 0.28662420382165604, "percentage": 4.12, "elapsed_time": "0:01:11", "remaining_time": "0:27:36"}
|
16 |
+
{"current_steps": 16, "total_steps": 364, "loss": 0.541, "lr": 8.64864864864865e-06, "epoch": 0.3057324840764331, "percentage": 4.4, "elapsed_time": "0:01:15", "remaining_time": "0:27:26"}
|
17 |
+
{"current_steps": 17, "total_steps": 364, "loss": 0.5369, "lr": 9.189189189189191e-06, "epoch": 0.3248407643312102, "percentage": 4.67, "elapsed_time": "0:01:21", "remaining_time": "0:27:40"}
|
18 |
+
{"current_steps": 18, "total_steps": 364, "loss": 0.5146, "lr": 9.729729729729732e-06, "epoch": 0.34394904458598724, "percentage": 4.95, "elapsed_time": "0:01:25", "remaining_time": "0:27:24"}
|
19 |
+
{"current_steps": 19, "total_steps": 364, "loss": 0.5047, "lr": 1.027027027027027e-05, "epoch": 0.3630573248407643, "percentage": 5.22, "elapsed_time": "0:01:29", "remaining_time": "0:27:04"}
|
20 |
+
{"current_steps": 20, "total_steps": 364, "loss": 0.5, "lr": 1.0810810810810812e-05, "epoch": 0.3821656050955414, "percentage": 5.49, "elapsed_time": "0:01:33", "remaining_time": "0:26:45"}
|
21 |
+
{"current_steps": 21, "total_steps": 364, "loss": 0.5057, "lr": 1.1351351351351352e-05, "epoch": 0.4012738853503185, "percentage": 5.77, "elapsed_time": "0:01:39", "remaining_time": "0:27:03"}
|
22 |
+
{"current_steps": 22, "total_steps": 364, "loss": 0.483, "lr": 1.1891891891891894e-05, "epoch": 0.42038216560509556, "percentage": 6.04, "elapsed_time": "0:01:44", "remaining_time": "0:27:07"}
|
23 |
+
{"current_steps": 23, "total_steps": 364, "loss": 0.4743, "lr": 1.2432432432432433e-05, "epoch": 0.4394904458598726, "percentage": 6.32, "elapsed_time": "0:01:49", "remaining_time": "0:26:58"}
|
24 |
+
{"current_steps": 24, "total_steps": 364, "loss": 0.4617, "lr": 1.2972972972972975e-05, "epoch": 0.4585987261146497, "percentage": 6.59, "elapsed_time": "0:01:53", "remaining_time": "0:26:43"}
|
25 |
+
{"current_steps": 25, "total_steps": 364, "loss": 0.4761, "lr": 1.3513513513513515e-05, "epoch": 0.47770700636942676, "percentage": 6.87, "elapsed_time": "0:01:57", "remaining_time": "0:26:34"}
|
26 |
+
{"current_steps": 26, "total_steps": 364, "loss": 0.4266, "lr": 1.4054054054054055e-05, "epoch": 0.4968152866242038, "percentage": 7.14, "elapsed_time": "0:02:01", "remaining_time": "0:26:24"}
|
27 |
+
{"current_steps": 27, "total_steps": 364, "loss": 0.4683, "lr": 1.4594594594594596e-05, "epoch": 0.5159235668789809, "percentage": 7.42, "elapsed_time": "0:02:06", "remaining_time": "0:26:15"}
|
28 |
+
{"current_steps": 28, "total_steps": 364, "loss": 0.4009, "lr": 1.5135135135135138e-05, "epoch": 0.535031847133758, "percentage": 7.69, "elapsed_time": "0:02:10", "remaining_time": "0:26:09"}
|
29 |
+
{"current_steps": 29, "total_steps": 364, "loss": 0.4609, "lr": 1.5675675675675676e-05, "epoch": 0.554140127388535, "percentage": 7.97, "elapsed_time": "0:02:15", "remaining_time": "0:26:05"}
|
30 |
+
{"current_steps": 30, "total_steps": 364, "loss": 0.4173, "lr": 1.6216216216216218e-05, "epoch": 0.5732484076433121, "percentage": 8.24, "elapsed_time": "0:02:20", "remaining_time": "0:25:58"}
|
31 |
+
{"current_steps": 31, "total_steps": 364, "loss": 0.4311, "lr": 1.6756756756756757e-05, "epoch": 0.5923566878980892, "percentage": 8.52, "elapsed_time": "0:02:24", "remaining_time": "0:25:57"}
|
32 |
+
{"current_steps": 32, "total_steps": 364, "loss": 0.4295, "lr": 1.72972972972973e-05, "epoch": 0.6114649681528662, "percentage": 8.79, "elapsed_time": "0:02:28", "remaining_time": "0:25:45"}
|
33 |
+
{"current_steps": 33, "total_steps": 364, "loss": 0.4211, "lr": 1.783783783783784e-05, "epoch": 0.6305732484076433, "percentage": 9.07, "elapsed_time": "0:02:33", "remaining_time": "0:25:37"}
|
34 |
+
{"current_steps": 34, "total_steps": 364, "loss": 0.4391, "lr": 1.8378378378378383e-05, "epoch": 0.6496815286624203, "percentage": 9.34, "elapsed_time": "0:02:37", "remaining_time": "0:25:27"}
|
35 |
+
{"current_steps": 35, "total_steps": 364, "loss": 0.4556, "lr": 1.891891891891892e-05, "epoch": 0.6687898089171974, "percentage": 9.62, "elapsed_time": "0:02:41", "remaining_time": "0:25:21"}
|
36 |
+
{"current_steps": 36, "total_steps": 364, "loss": 0.4296, "lr": 1.9459459459459463e-05, "epoch": 0.6878980891719745, "percentage": 9.89, "elapsed_time": "0:02:45", "remaining_time": "0:25:08"}
|
37 |
+
{"current_steps": 37, "total_steps": 364, "loss": 0.4189, "lr": 2e-05, "epoch": 0.7070063694267515, "percentage": 10.16, "elapsed_time": "0:02:50", "remaining_time": "0:25:10"}
|
38 |
+
{"current_steps": 38, "total_steps": 364, "loss": 0.4141, "lr": 1.9999538500851633e-05, "epoch": 0.7261146496815286, "percentage": 10.44, "elapsed_time": "0:02:55", "remaining_time": "0:25:05"}
|
39 |
+
{"current_steps": 39, "total_steps": 364, "loss": 0.4248, "lr": 1.9998154046002822e-05, "epoch": 0.7452229299363057, "percentage": 10.71, "elapsed_time": "0:03:03", "remaining_time": "0:25:31"}
|
40 |
+
{"current_steps": 40, "total_steps": 364, "loss": 0.4054, "lr": 1.9995846763238514e-05, "epoch": 0.7643312101910829, "percentage": 10.99, "elapsed_time": "0:03:08", "remaining_time": "0:25:24"}
|
41 |
+
{"current_steps": 41, "total_steps": 364, "loss": 0.3879, "lr": 1.9992616865520515e-05, "epoch": 0.7834394904458599, "percentage": 11.26, "elapsed_time": "0:03:12", "remaining_time": "0:25:18"}
|
42 |
+
{"current_steps": 42, "total_steps": 364, "loss": 0.4109, "lr": 1.9988464650967834e-05, "epoch": 0.802547770700637, "percentage": 11.54, "elapsed_time": "0:03:16", "remaining_time": "0:25:09"}
|
43 |
+
{"current_steps": 43, "total_steps": 364, "loss": 0.3974, "lr": 1.9983390502829168e-05, "epoch": 0.821656050955414, "percentage": 11.81, "elapsed_time": "0:03:20", "remaining_time": "0:24:56"}
|
44 |
+
{"current_steps": 44, "total_steps": 364, "loss": 0.3866, "lr": 1.9977394889447526e-05, "epoch": 0.8407643312101911, "percentage": 12.09, "elapsed_time": "0:03:24", "remaining_time": "0:24:49"}
|
45 |
+
{"current_steps": 45, "total_steps": 364, "loss": 0.3965, "lr": 1.9970478364217e-05, "epoch": 0.8598726114649682, "percentage": 12.36, "elapsed_time": "0:03:29", "remaining_time": "0:24:45"}
|
46 |
+
{"current_steps": 46, "total_steps": 364, "loss": 0.3955, "lr": 1.9962641565531694e-05, "epoch": 0.8789808917197452, "percentage": 12.64, "elapsed_time": "0:03:33", "remaining_time": "0:24:37"}
|
47 |
+
{"current_steps": 47, "total_steps": 364, "loss": 0.4011, "lr": 1.9953885216726788e-05, "epoch": 0.8980891719745223, "percentage": 12.91, "elapsed_time": "0:03:39", "remaining_time": "0:24:41"}
|
48 |
+
{"current_steps": 48, "total_steps": 364, "loss": 0.3994, "lr": 1.994421012601179e-05, "epoch": 0.9171974522292994, "percentage": 13.19, "elapsed_time": "0:03:43", "remaining_time": "0:24:32"}
|
49 |
+
{"current_steps": 49, "total_steps": 364, "loss": 0.3816, "lr": 1.9933617186395917e-05, "epoch": 0.9363057324840764, "percentage": 13.46, "elapsed_time": "0:03:48", "remaining_time": "0:24:26"}
|
50 |
+
{"current_steps": 50, "total_steps": 364, "loss": 0.4186, "lr": 1.99221073756057e-05, "epoch": 0.9554140127388535, "percentage": 13.74, "elapsed_time": "0:03:52", "remaining_time": "0:24:23"}
|
51 |
+
{"current_steps": 51, "total_steps": 364, "loss": 0.4176, "lr": 1.990968175599471e-05, "epoch": 0.9745222929936306, "percentage": 14.01, "elapsed_time": "0:03:58", "remaining_time": "0:24:23"}
|
52 |
+
{"current_steps": 52, "total_steps": 364, "loss": 0.3976, "lr": 1.9896341474445526e-05, "epoch": 0.9936305732484076, "percentage": 14.29, "elapsed_time": "0:04:02", "remaining_time": "0:24:17"}
|
53 |
+
{"current_steps": 53, "total_steps": 364, "loss": 0.3684, "lr": 1.9882087762263857e-05, "epoch": 1.0127388535031847, "percentage": 14.56, "elapsed_time": "0:05:57", "remaining_time": "0:34:55"}
|
54 |
+
{"current_steps": 54, "total_steps": 364, "loss": 0.332, "lr": 1.9866921935064907e-05, "epoch": 1.0318471337579618, "percentage": 14.84, "elapsed_time": "0:06:01", "remaining_time": "0:34:34"}
|
55 |
+
{"current_steps": 55, "total_steps": 364, "loss": 0.3523, "lr": 1.985084539265195e-05, "epoch": 1.0509554140127388, "percentage": 15.11, "elapsed_time": "0:06:06", "remaining_time": "0:34:19"}
|
56 |
+
{"current_steps": 56, "total_steps": 364, "loss": 0.3353, "lr": 1.983385961888711e-05, "epoch": 1.070063694267516, "percentage": 15.38, "elapsed_time": "0:06:11", "remaining_time": "0:34:02"}
|
57 |
+
{"current_steps": 57, "total_steps": 364, "loss": 0.3593, "lr": 1.9815966181554412e-05, "epoch": 1.089171974522293, "percentage": 15.66, "elapsed_time": "0:06:16", "remaining_time": "0:33:45"}
|
58 |
+
{"current_steps": 58, "total_steps": 364, "loss": 0.3042, "lr": 1.9797166732215078e-05, "epoch": 1.10828025477707, "percentage": 15.93, "elapsed_time": "0:06:20", "remaining_time": "0:33:25"}
|
59 |
+
{"current_steps": 59, "total_steps": 364, "loss": 0.3179, "lr": 1.977746300605507e-05, "epoch": 1.127388535031847, "percentage": 16.21, "elapsed_time": "0:06:24", "remaining_time": "0:33:07"}
|
60 |
+
{"current_steps": 60, "total_steps": 364, "loss": 0.3195, "lr": 1.975685682172497e-05, "epoch": 1.1464968152866242, "percentage": 16.48, "elapsed_time": "0:06:28", "remaining_time": "0:32:50"}
|
61 |
+
{"current_steps": 61, "total_steps": 364, "loss": 0.3361, "lr": 1.973535008117207e-05, "epoch": 1.1656050955414012, "percentage": 16.76, "elapsed_time": "0:06:34", "remaining_time": "0:32:39"}
|
62 |
+
{"current_steps": 62, "total_steps": 364, "loss": 0.3151, "lr": 1.9712944769464864e-05, "epoch": 1.1847133757961783, "percentage": 17.03, "elapsed_time": "0:06:38", "remaining_time": "0:32:18"}
|
63 |
+
{"current_steps": 63, "total_steps": 364, "loss": 0.3288, "lr": 1.9689642954609808e-05, "epoch": 1.2038216560509554, "percentage": 17.31, "elapsed_time": "0:06:42", "remaining_time": "0:32:03"}
|
64 |
+
{"current_steps": 64, "total_steps": 364, "loss": 0.3403, "lr": 1.9665446787360444e-05, "epoch": 1.2229299363057324, "percentage": 17.58, "elapsed_time": "0:06:46", "remaining_time": "0:31:46"}
|
65 |
+
{"current_steps": 65, "total_steps": 364, "loss": 0.3093, "lr": 1.9640358501018885e-05, "epoch": 1.2420382165605095, "percentage": 17.86, "elapsed_time": "0:06:51", "remaining_time": "0:31:32"}
|
66 |
+
{"current_steps": 66, "total_steps": 364, "loss": 0.3077, "lr": 1.9614380411229693e-05, "epoch": 1.2611464968152866, "percentage": 18.13, "elapsed_time": "0:06:55", "remaining_time": "0:31:13"}
|
67 |
+
{"current_steps": 67, "total_steps": 364, "loss": 0.3222, "lr": 1.9587514915766124e-05, "epoch": 1.2802547770700636, "percentage": 18.41, "elapsed_time": "0:06:59", "remaining_time": "0:30:59"}
|
68 |
+
{"current_steps": 68, "total_steps": 364, "loss": 0.304, "lr": 1.9559764494308838e-05, "epoch": 1.2993630573248407, "percentage": 18.68, "elapsed_time": "0:07:03", "remaining_time": "0:30:42"}
|
69 |
+
{"current_steps": 69, "total_steps": 364, "loss": 0.3404, "lr": 1.9531131708217005e-05, "epoch": 1.3184713375796178, "percentage": 18.96, "elapsed_time": "0:07:09", "remaining_time": "0:30:34"}
|
70 |
+
{"current_steps": 70, "total_steps": 364, "loss": 0.3178, "lr": 1.950161920029191e-05, "epoch": 1.3375796178343948, "percentage": 19.23, "elapsed_time": "0:07:13", "remaining_time": "0:30:18"}
|
71 |
+
{"current_steps": 71, "total_steps": 364, "loss": 0.3179, "lr": 1.9471229694533003e-05, "epoch": 1.356687898089172, "percentage": 19.51, "elapsed_time": "0:07:17", "remaining_time": "0:30:04"}
|
72 |
+
{"current_steps": 72, "total_steps": 364, "loss": 0.3393, "lr": 1.943996599588649e-05, "epoch": 1.3757961783439492, "percentage": 19.78, "elapsed_time": "0:07:22", "remaining_time": "0:29:56"}
|
73 |
+
{"current_steps": 73, "total_steps": 364, "loss": 0.3292, "lr": 1.940783098998643e-05, "epoch": 1.394904458598726, "percentage": 20.05, "elapsed_time": "0:07:27", "remaining_time": "0:29:42"}
|
74 |
+
{"current_steps": 74, "total_steps": 364, "loss": 0.304, "lr": 1.93748276428884e-05, "epoch": 1.4140127388535033, "percentage": 20.33, "elapsed_time": "0:07:30", "remaining_time": "0:29:26"}
|
75 |
+
{"current_steps": 75, "total_steps": 364, "loss": 0.2991, "lr": 1.9340959000795707e-05, "epoch": 1.4331210191082802, "percentage": 20.6, "elapsed_time": "0:07:35", "remaining_time": "0:29:13"}
|
76 |
+
{"current_steps": 76, "total_steps": 364, "loss": 0.3389, "lr": 1.9306228189778255e-05, "epoch": 1.4522292993630574, "percentage": 20.88, "elapsed_time": "0:07:40", "remaining_time": "0:29:03"}
|
77 |
+
{"current_steps": 77, "total_steps": 364, "loss": 0.3287, "lr": 1.927063841548398e-05, "epoch": 1.4713375796178343, "percentage": 21.15, "elapsed_time": "0:07:44", "remaining_time": "0:28:50"}
|
78 |
+
{"current_steps": 78, "total_steps": 364, "loss": 0.3171, "lr": 1.9234192962842996e-05, "epoch": 1.4904458598726116, "percentage": 21.43, "elapsed_time": "0:07:48", "remaining_time": "0:28:38"}
|
79 |
+
{"current_steps": 79, "total_steps": 364, "loss": 0.3211, "lr": 1.9196895195764363e-05, "epoch": 1.5095541401273884, "percentage": 21.7, "elapsed_time": "0:07:53", "remaining_time": "0:28:28"}
|
80 |
+
{"current_steps": 80, "total_steps": 364, "loss": 0.3178, "lr": 1.9158748556825637e-05, "epoch": 1.5286624203821657, "percentage": 21.98, "elapsed_time": "0:07:57", "remaining_time": "0:28:16"}
|
81 |
+
{"current_steps": 81, "total_steps": 364, "loss": 0.3292, "lr": 1.9119756566955092e-05, "epoch": 1.5477707006369426, "percentage": 22.25, "elapsed_time": "0:08:03", "remaining_time": "0:28:08"}
|
82 |
+
{"current_steps": 82, "total_steps": 364, "loss": 0.3301, "lr": 1.907992282510675e-05, "epoch": 1.5668789808917198, "percentage": 22.53, "elapsed_time": "0:08:07", "remaining_time": "0:27:57"}
|
83 |
+
{"current_steps": 83, "total_steps": 364, "loss": 0.3269, "lr": 1.90392510079282e-05, "epoch": 1.5859872611464967, "percentage": 22.8, "elapsed_time": "0:08:11", "remaining_time": "0:27:44"}
|
84 |
+
{"current_steps": 84, "total_steps": 364, "loss": 0.3106, "lr": 1.8997744869421248e-05, "epoch": 1.605095541401274, "percentage": 23.08, "elapsed_time": "0:08:15", "remaining_time": "0:27:32"}
|
85 |
+
{"current_steps": 85, "total_steps": 364, "loss": 0.2997, "lr": 1.8955408240595396e-05, "epoch": 1.6242038216560508, "percentage": 23.35, "elapsed_time": "0:08:19", "remaining_time": "0:27:18"}
|
86 |
+
{"current_steps": 86, "total_steps": 364, "loss": 0.3, "lr": 1.891224502911428e-05, "epoch": 1.643312101910828, "percentage": 23.63, "elapsed_time": "0:08:22", "remaining_time": "0:27:05"}
|
87 |
+
{"current_steps": 87, "total_steps": 364, "loss": 0.3277, "lr": 1.886825921893497e-05, "epoch": 1.662420382165605, "percentage": 23.9, "elapsed_time": "0:08:28", "remaining_time": "0:27:00"}
|
88 |
+
{"current_steps": 88, "total_steps": 364, "loss": 0.3143, "lr": 1.8823454869940243e-05, "epoch": 1.6815286624203822, "percentage": 24.18, "elapsed_time": "0:08:33", "remaining_time": "0:26:51"}
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdc6fd08039548fd46002289d4c319bd04c39b84522db1b381c10bbed7383d90
|
3 |
+
size 7288
|