Training in progress, epoch 1
Browse files
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad1cdf7e9dc78d77105d4a044605ada46a001bb8a5c8ce6f7559768ac14dc5b0
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a391ae7acf5756a185d555f7231f34689a51ea6aea483f392df911b6d5b4ce9c
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:797876af006a2e0bdb12218fabdee91e9855d790680335d82936625e1bd63f11
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1a76c9b2c5717f2a40be49322e1e071e3cfa6cd9029bd3a5bec89a32dca7f50
|
3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
@@ -76,3 +76,79 @@
|
|
76 |
{"current_steps": 76, "total_steps": 380, "loss": 0.6123, "lr": 7.758770483143634e-05, "epoch": 0.9878147847278635, "percentage": 20.0, "elapsed_time": "2:09:34", "remaining_time": "8:38:18"}
|
77 |
{"current_steps": 77, "total_steps": 380, "loss": 0.615, "lr": 7.74604496478822e-05, "epoch": 1.0024370430544274, "percentage": 20.26, "elapsed_time": "2:13:04", "remaining_time": "8:43:40"}
|
78 |
{"current_steps": 78, "total_steps": 380, "loss": 0.5888, "lr": 7.733003351549829e-05, "epoch": 1.0154346060113728, "percentage": 20.53, "elapsed_time": "2:14:46", "remaining_time": "8:41:49"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
{"current_steps": 76, "total_steps": 380, "loss": 0.6123, "lr": 7.758770483143634e-05, "epoch": 0.9878147847278635, "percentage": 20.0, "elapsed_time": "2:09:34", "remaining_time": "8:38:18"}
|
77 |
{"current_steps": 77, "total_steps": 380, "loss": 0.615, "lr": 7.74604496478822e-05, "epoch": 1.0024370430544274, "percentage": 20.26, "elapsed_time": "2:13:04", "remaining_time": "8:43:40"}
|
78 |
{"current_steps": 78, "total_steps": 380, "loss": 0.5888, "lr": 7.733003351549829e-05, "epoch": 1.0154346060113728, "percentage": 20.53, "elapsed_time": "2:14:46", "remaining_time": "8:41:49"}
|
79 |
+
{"current_steps": 79, "total_steps": 380, "loss": 0.5932, "lr": 7.719646743892352e-05, "epoch": 1.0284321689683185, "percentage": 20.79, "elapsed_time": "2:16:26", "remaining_time": "8:39:51"}
|
80 |
+
{"current_steps": 80, "total_steps": 380, "loss": 0.5934, "lr": 7.705976268859207e-05, "epoch": 1.0414297319252641, "percentage": 21.05, "elapsed_time": "2:18:07", "remaining_time": "8:37:57"}
|
81 |
+
{"current_steps": 81, "total_steps": 380, "loss": 0.5815, "lr": 7.691993079978252e-05, "epoch": 1.0544272948822095, "percentage": 21.32, "elapsed_time": "2:19:49", "remaining_time": "8:36:06"}
|
82 |
+
{"current_steps": 82, "total_steps": 380, "loss": 0.5752, "lr": 7.677698357164431e-05, "epoch": 1.0674248578391552, "percentage": 21.58, "elapsed_time": "2:21:31", "remaining_time": "8:34:20"}
|
83 |
+
{"current_steps": 83, "total_steps": 380, "loss": 0.5792, "lr": 7.663093306620231e-05, "epoch": 1.0804224207961006, "percentage": 21.84, "elapsed_time": "2:23:12", "remaining_time": "8:32:26"}
|
84 |
+
{"current_steps": 84, "total_steps": 380, "loss": 0.5753, "lr": 7.648179160733883e-05, "epoch": 1.0934199837530463, "percentage": 22.11, "elapsed_time": "2:24:53", "remaining_time": "8:30:33"}
|
85 |
+
{"current_steps": 85, "total_steps": 380, "loss": 0.5777, "lr": 7.632957177975387e-05, "epoch": 1.106417546709992, "percentage": 22.37, "elapsed_time": "2:26:35", "remaining_time": "8:28:46"}
|
86 |
+
{"current_steps": 86, "total_steps": 380, "loss": 0.5834, "lr": 7.61742864279031e-05, "epoch": 1.1194151096669374, "percentage": 22.63, "elapsed_time": "2:28:18", "remaining_time": "8:27:00"}
|
87 |
+
{"current_steps": 87, "total_steps": 380, "loss": 0.5742, "lr": 7.601594865491414e-05, "epoch": 1.132412672623883, "percentage": 22.89, "elapsed_time": "2:29:58", "remaining_time": "8:25:06"}
|
88 |
+
{"current_steps": 88, "total_steps": 380, "loss": 0.5784, "lr": 7.585457182148081e-05, "epoch": 1.1454102355808287, "percentage": 23.16, "elapsed_time": "2:31:39", "remaining_time": "8:23:14"}
|
89 |
+
{"current_steps": 89, "total_steps": 380, "loss": 0.5826, "lr": 7.569016954473577e-05, "epoch": 1.158407798537774, "percentage": 23.42, "elapsed_time": "2:33:21", "remaining_time": "8:21:25"}
|
90 |
+
{"current_steps": 90, "total_steps": 380, "loss": 0.5852, "lr": 7.552275569710152e-05, "epoch": 1.1714053614947197, "percentage": 23.68, "elapsed_time": "2:35:04", "remaining_time": "8:19:41"}
|
91 |
+
{"current_steps": 91, "total_steps": 380, "loss": 0.5763, "lr": 7.535234440511979e-05, "epoch": 1.1844029244516654, "percentage": 23.95, "elapsed_time": "2:36:44", "remaining_time": "8:17:47"}
|
92 |
+
{"current_steps": 92, "total_steps": 380, "loss": 0.5787, "lr": 7.517895004825956e-05, "epoch": 1.1974004874086108, "percentage": 24.21, "elapsed_time": "2:38:25", "remaining_time": "8:15:56"}
|
93 |
+
{"current_steps": 93, "total_steps": 380, "loss": 0.5746, "lr": 7.500258725770375e-05, "epoch": 1.2103980503655565, "percentage": 24.47, "elapsed_time": "2:40:06", "remaining_time": "8:14:06"}
|
94 |
+
{"current_steps": 94, "total_steps": 380, "loss": 0.5703, "lr": 7.48232709151145e-05, "epoch": 1.2233956133225021, "percentage": 24.74, "elapsed_time": "2:41:49", "remaining_time": "8:12:22"}
|
95 |
+
{"current_steps": 95, "total_steps": 380, "loss": 0.579, "lr": 7.464101615137756e-05, "epoch": 1.2363931762794476, "percentage": 25.0, "elapsed_time": "2:43:30", "remaining_time": "8:10:30"}
|
96 |
+
{"current_steps": 96, "total_steps": 380, "loss": 0.5637, "lr": 7.445583834532546e-05, "epoch": 1.2493907392363932, "percentage": 25.26, "elapsed_time": "2:45:10", "remaining_time": "8:08:39"}
|
97 |
+
{"current_steps": 97, "total_steps": 380, "loss": 0.5756, "lr": 7.426775312243986e-05, "epoch": 1.2623883021933389, "percentage": 25.53, "elapsed_time": "2:46:52", "remaining_time": "8:06:52"}
|
98 |
+
{"current_steps": 98, "total_steps": 380, "loss": 0.5734, "lr": 7.407677635353308e-05, "epoch": 1.2753858651502843, "percentage": 25.79, "elapsed_time": "2:48:34", "remaining_time": "8:05:06"}
|
99 |
+
{"current_steps": 99, "total_steps": 380, "loss": 0.5682, "lr": 7.388292415340888e-05, "epoch": 1.28838342810723, "percentage": 26.05, "elapsed_time": "2:50:15", "remaining_time": "8:03:16"}
|
100 |
+
{"current_steps": 100, "total_steps": 380, "loss": 0.5643, "lr": 7.368621287950264e-05, "epoch": 1.3013809910641756, "percentage": 26.32, "elapsed_time": "2:51:56", "remaining_time": "8:01:26"}
|
101 |
+
{"current_steps": 101, "total_steps": 380, "loss": 0.5602, "lr": 7.348665913050115e-05, "epoch": 1.314378554021121, "percentage": 26.58, "elapsed_time": "2:53:38", "remaining_time": "7:59:39"}
|
102 |
+
{"current_steps": 102, "total_steps": 380, "loss": 0.5672, "lr": 7.328427974494201e-05, "epoch": 1.3273761169780667, "percentage": 26.84, "elapsed_time": "2:55:21", "remaining_time": "7:57:54"}
|
103 |
+
{"current_steps": 103, "total_steps": 380, "loss": 0.5648, "lr": 7.307909179979274e-05, "epoch": 1.340373679935012, "percentage": 27.11, "elapsed_time": "2:57:00", "remaining_time": "7:56:02"}
|
104 |
+
{"current_steps": 104, "total_steps": 380, "loss": 0.566, "lr": 7.28711126090098e-05, "epoch": 1.3533712428919578, "percentage": 27.37, "elapsed_time": "2:58:42", "remaining_time": "7:54:16"}
|
105 |
+
{"current_steps": 105, "total_steps": 380, "loss": 0.5571, "lr": 7.266035972207773e-05, "epoch": 1.3663688058489034, "percentage": 27.63, "elapsed_time": "3:00:24", "remaining_time": "7:52:29"}
|
106 |
+
{"current_steps": 106, "total_steps": 380, "loss": 0.5693, "lr": 7.24468509225281e-05, "epoch": 1.3793663688058488, "percentage": 27.89, "elapsed_time": "3:02:06", "remaining_time": "7:50:43"}
|
107 |
+
{"current_steps": 107, "total_steps": 380, "loss": 0.5686, "lr": 7.223060422643914e-05, "epoch": 1.3923639317627945, "percentage": 28.16, "elapsed_time": "3:03:46", "remaining_time": "7:48:52"}
|
108 |
+
{"current_steps": 108, "total_steps": 380, "loss": 0.5678, "lr": 7.201163788091536e-05, "epoch": 1.40536149471974, "percentage": 28.42, "elapsed_time": "3:05:28", "remaining_time": "7:47:06"}
|
109 |
+
{"current_steps": 109, "total_steps": 380, "loss": 0.5683, "lr": 7.178997036254799e-05, "epoch": 1.4183590576766856, "percentage": 28.68, "elapsed_time": "3:07:11", "remaining_time": "7:45:23"}
|
110 |
+
{"current_steps": 110, "total_steps": 380, "loss": 0.5665, "lr": 7.156562037585576e-05, "epoch": 1.4313566206336312, "percentage": 28.95, "elapsed_time": "3:08:53", "remaining_time": "7:43:37"}
|
111 |
+
{"current_steps": 111, "total_steps": 380, "loss": 0.5745, "lr": 7.133860685170665e-05, "epoch": 1.4443541835905767, "percentage": 29.21, "elapsed_time": "3:10:34", "remaining_time": "7:41:51"}
|
112 |
+
{"current_steps": 112, "total_steps": 380, "loss": 0.5645, "lr": 7.110894894572056e-05, "epoch": 1.4573517465475223, "percentage": 29.47, "elapsed_time": "3:12:16", "remaining_time": "7:40:05"}
|
113 |
+
{"current_steps": 113, "total_steps": 380, "loss": 0.5606, "lr": 7.087666603665284e-05, "epoch": 1.470349309504468, "percentage": 29.74, "elapsed_time": "3:14:00", "remaining_time": "7:38:24"}
|
114 |
+
{"current_steps": 114, "total_steps": 380, "loss": 0.5674, "lr": 7.064177772475912e-05, "epoch": 1.4833468724614134, "percentage": 30.0, "elapsed_time": "3:15:40", "remaining_time": "7:36:34"}
|
115 |
+
{"current_steps": 115, "total_steps": 380, "loss": 0.5655, "lr": 7.040430383014146e-05, "epoch": 1.496344435418359, "percentage": 30.26, "elapsed_time": "3:17:21", "remaining_time": "7:34:46"}
|
116 |
+
{"current_steps": 116, "total_steps": 380, "loss": 0.5653, "lr": 7.016426439107586e-05, "epoch": 1.5093419983753047, "percentage": 30.53, "elapsed_time": "3:19:04", "remaining_time": "7:33:03"}
|
117 |
+
{"current_steps": 117, "total_steps": 380, "loss": 0.5567, "lr": 6.992167966232143e-05, "epoch": 1.5223395613322501, "percentage": 30.79, "elapsed_time": "3:20:47", "remaining_time": "7:31:21"}
|
118 |
+
{"current_steps": 118, "total_steps": 380, "loss": 0.575, "lr": 6.967657011341126e-05, "epoch": 1.5353371242891958, "percentage": 31.05, "elapsed_time": "3:22:28", "remaining_time": "7:29:33"}
|
119 |
+
{"current_steps": 119, "total_steps": 380, "loss": 0.5624, "lr": 6.942895642692527e-05, "epoch": 1.5483346872461414, "percentage": 31.32, "elapsed_time": "3:24:09", "remaining_time": "7:27:47"}
|
120 |
+
{"current_steps": 120, "total_steps": 380, "loss": 0.5667, "lr": 6.917885949674483e-05, "epoch": 1.5613322502030869, "percentage": 31.58, "elapsed_time": "3:25:52", "remaining_time": "7:26:03"}
|
121 |
+
{"current_steps": 121, "total_steps": 380, "loss": 0.557, "lr": 6.892630042628988e-05, "epoch": 1.5743298131600325, "percentage": 31.84, "elapsed_time": "3:27:36", "remaining_time": "7:24:22"}
|
122 |
+
{"current_steps": 122, "total_steps": 380, "loss": 0.5638, "lr": 6.867130052673806e-05, "epoch": 1.5873273761169782, "percentage": 32.11, "elapsed_time": "3:29:17", "remaining_time": "7:22:36"}
|
123 |
+
{"current_steps": 123, "total_steps": 380, "loss": 0.5626, "lr": 6.841388131522656e-05, "epoch": 1.6003249390739236, "percentage": 32.37, "elapsed_time": "3:31:03", "remaining_time": "7:20:58"}
|
124 |
+
{"current_steps": 124, "total_steps": 380, "loss": 0.5632, "lr": 6.815406451303647e-05, "epoch": 1.6133225020308692, "percentage": 32.63, "elapsed_time": "3:32:48", "remaining_time": "7:19:21"}
|
125 |
+
{"current_steps": 125, "total_steps": 380, "loss": 0.5643, "lr": 6.789187204375981e-05, "epoch": 1.626320064987815, "percentage": 32.89, "elapsed_time": "3:34:32", "remaining_time": "7:17:39"}
|
126 |
+
{"current_steps": 126, "total_steps": 380, "loss": 0.5611, "lr": 6.762732603144978e-05, "epoch": 1.6393176279447603, "percentage": 33.16, "elapsed_time": "3:36:13", "remaining_time": "7:15:51"}
|
127 |
+
{"current_steps": 127, "total_steps": 380, "loss": 0.5548, "lr": 6.736044879875373e-05, "epoch": 1.652315190901706, "percentage": 33.42, "elapsed_time": "3:37:55", "remaining_time": "7:14:07"}
|
128 |
+
{"current_steps": 128, "total_steps": 380, "loss": 0.5634, "lr": 6.709126286502965e-05, "epoch": 1.6653127538586516, "percentage": 33.68, "elapsed_time": "3:39:38", "remaining_time": "7:12:25"}
|
129 |
+
{"current_steps": 129, "total_steps": 380, "loss": 0.558, "lr": 6.681979094444596e-05, "epoch": 1.678310316815597, "percentage": 33.95, "elapsed_time": "3:41:21", "remaining_time": "7:10:42"}
|
130 |
+
{"current_steps": 130, "total_steps": 380, "loss": 0.5584, "lr": 6.654605594406486e-05, "epoch": 1.6913078797725425, "percentage": 34.21, "elapsed_time": "3:43:02", "remaining_time": "7:08:56"}
|
131 |
+
{"current_steps": 131, "total_steps": 380, "loss": 0.5581, "lr": 6.627008096190938e-05, "epoch": 1.7043054427294884, "percentage": 34.47, "elapsed_time": "3:44:46", "remaining_time": "7:07:13"}
|
132 |
+
{"current_steps": 132, "total_steps": 380, "loss": 0.5604, "lr": 6.59918892850144e-05, "epoch": 1.7173030056864338, "percentage": 34.74, "elapsed_time": "3:46:31", "remaining_time": "7:05:35"}
|
133 |
+
{"current_steps": 133, "total_steps": 380, "loss": 0.5578, "lr": 6.571150438746157e-05, "epoch": 1.7303005686433792, "percentage": 35.0, "elapsed_time": "3:48:15", "remaining_time": "7:03:53"}
|
134 |
+
{"current_steps": 134, "total_steps": 380, "loss": 0.5521, "lr": 6.542894992839873e-05, "epoch": 1.743298131600325, "percentage": 35.26, "elapsed_time": "3:49:57", "remaining_time": "7:02:09"}
|
135 |
+
{"current_steps": 135, "total_steps": 380, "loss": 0.5559, "lr": 6.514424975004329e-05, "epoch": 1.7562956945572705, "percentage": 35.53, "elapsed_time": "3:51:40", "remaining_time": "7:00:27"}
|
136 |
+
{"current_steps": 136, "total_steps": 380, "loss": 0.556, "lr": 6.48574278756706e-05, "epoch": 1.769293257514216, "percentage": 35.79, "elapsed_time": "3:53:25", "remaining_time": "6:58:48"}
|
137 |
+
{"current_steps": 137, "total_steps": 380, "loss": 0.5511, "lr": 6.456850850758673e-05, "epoch": 1.7822908204711616, "percentage": 36.05, "elapsed_time": "3:55:06", "remaining_time": "6:57:00"}
|
138 |
+
{"current_steps": 138, "total_steps": 380, "loss": 0.5517, "lr": 6.427751602508628e-05, "epoch": 1.7952883834281073, "percentage": 36.32, "elapsed_time": "3:56:48", "remaining_time": "6:55:15"}
|
139 |
+
{"current_steps": 139, "total_steps": 380, "loss": 0.5603, "lr": 6.398447498239527e-05, "epoch": 1.8082859463850527, "percentage": 36.58, "elapsed_time": "3:58:33", "remaining_time": "6:53:37"}
|
140 |
+
{"current_steps": 140, "total_steps": 380, "loss": 0.5583, "lr": 6.368941010659921e-05, "epoch": 1.8212835093419983, "percentage": 36.84, "elapsed_time": "4:00:19", "remaining_time": "6:51:58"}
|
141 |
+
{"current_steps": 141, "total_steps": 380, "loss": 0.5566, "lr": 6.339234629555655e-05, "epoch": 1.834281072298944, "percentage": 37.11, "elapsed_time": "4:02:00", "remaining_time": "6:50:12"}
|
142 |
+
{"current_steps": 142, "total_steps": 380, "loss": 0.5592, "lr": 6.309330861579786e-05, "epoch": 1.8472786352558894, "percentage": 37.37, "elapsed_time": "4:03:43", "remaining_time": "6:48:29"}
|
143 |
+
{"current_steps": 143, "total_steps": 380, "loss": 0.5544, "lr": 6.279232230041065e-05, "epoch": 1.860276198212835, "percentage": 37.63, "elapsed_time": "4:05:27", "remaining_time": "6:46:49"}
|
144 |
+
{"current_steps": 144, "total_steps": 380, "loss": 0.5519, "lr": 6.248941274691017e-05, "epoch": 1.8732737611697807, "percentage": 37.89, "elapsed_time": "4:07:12", "remaining_time": "6:45:08"}
|
145 |
+
{"current_steps": 145, "total_steps": 380, "loss": 0.5471, "lr": 6.218460551509636e-05, "epoch": 1.8862713241267262, "percentage": 38.16, "elapsed_time": "4:08:53", "remaining_time": "6:43:22"}
|
146 |
+
{"current_steps": 146, "total_steps": 380, "loss": 0.5551, "lr": 6.18779263248971e-05, "epoch": 1.8992688870836718, "percentage": 38.42, "elapsed_time": "4:10:36", "remaining_time": "6:41:40"}
|
147 |
+
{"current_steps": 147, "total_steps": 380, "loss": 0.5491, "lr": 6.156940105419785e-05, "epoch": 1.9122664500406175, "percentage": 38.68, "elapsed_time": "4:12:21", "remaining_time": "6:40:00"}
|
148 |
+
{"current_steps": 148, "total_steps": 380, "loss": 0.5515, "lr": 6.125905573665824e-05, "epoch": 1.925264012997563, "percentage": 38.95, "elapsed_time": "4:14:04", "remaining_time": "6:38:16"}
|
149 |
+
{"current_steps": 149, "total_steps": 380, "loss": 0.554, "lr": 6.094691655951512e-05, "epoch": 1.9382615759545085, "percentage": 39.21, "elapsed_time": "4:15:45", "remaining_time": "6:36:29"}
|
150 |
+
{"current_steps": 150, "total_steps": 380, "loss": 0.5515, "lr": 6.063300986137297e-05, "epoch": 1.9512591389114542, "percentage": 39.47, "elapsed_time": "4:17:28", "remaining_time": "6:34:47"}
|
151 |
+
{"current_steps": 151, "total_steps": 380, "loss": 0.5514, "lr": 6.0317362129981375e-05, "epoch": 1.9642567018683996, "percentage": 39.74, "elapsed_time": "4:19:15", "remaining_time": "6:33:10"}
|
152 |
+
{"current_steps": 152, "total_steps": 380, "loss": 0.5557, "lr": 6.000000000000001e-05, "epoch": 1.9772542648253453, "percentage": 40.0, "elapsed_time": "4:20:56", "remaining_time": "6:31:25"}
|
153 |
+
{"current_steps": 153, "total_steps": 380, "loss": 0.5581, "lr": 5.968095025075114e-05, "epoch": 1.990251827782291, "percentage": 40.26, "elapsed_time": "4:22:38", "remaining_time": "6:29:40"}
|
154 |
+
{"current_steps": 154, "total_steps": 380, "loss": 0.5411, "lr": 5.936023980395997e-05, "epoch": 2.0048740861088548, "percentage": 40.53, "elapsed_time": "4:26:06", "remaining_time": "6:30:31"}
|