ryanmarten commited on
Commit
8d5a4f6
·
verified ·
1 Parent(s): de5e324

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ef25b8d50247314dd2fad6a771c5fa561a3ea3cbf09b29b544a3327b441fd0a
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6521756723865d68279ad46e3e41141e652835361af2d1b50a6d185850e0423c
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8330298acdc969ea3ef4d9a34fb6f16ab5b3ca8c4b866a53f592856f75aaf978
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8268138485e746461cd0990d8930fdd855eb2425a2cc0486aba2aafe68efbe90
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0173c29b901b0cd117cb97528ce8712490c8a7c0762d953c6f3c4789a582b8d
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96b99acdcec2eada76fe72de8f8cb44b6ab6d178620c5f787593a4860a7bdc6e
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ecfa8091bdec7c8d0811e0b1fd9ee32b2146f13e171449ca32587b5277a1341
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d28ff01245b8b25f338e5f8862cac67b4ff57ac9ac9097810fb66a8e67a44b
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -77,3 +77,81 @@
77
  {"current_steps": 77, "total_steps": 390, "loss": 0.6211, "lr": 3.885432320339167e-05, "epoch": 0.9856, "percentage": 19.74, "elapsed_time": "3:18:10", "remaining_time": "13:25:34"}
78
  {"current_steps": 78, "total_steps": 390, "loss": 0.6224, "lr": 3.879385241571817e-05, "epoch": 0.9984, "percentage": 20.0, "elapsed_time": "3:20:47", "remaining_time": "13:23:10"}
79
  {"current_steps": 79, "total_steps": 390, "loss": 0.574, "lr": 3.873187606680543e-05, "epoch": 1.0112, "percentage": 20.26, "elapsed_time": "3:23:55", "remaining_time": "13:22:48"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  {"current_steps": 77, "total_steps": 390, "loss": 0.6211, "lr": 3.885432320339167e-05, "epoch": 0.9856, "percentage": 19.74, "elapsed_time": "3:18:10", "remaining_time": "13:25:34"}
78
  {"current_steps": 78, "total_steps": 390, "loss": 0.6224, "lr": 3.879385241571817e-05, "epoch": 0.9984, "percentage": 20.0, "elapsed_time": "3:20:47", "remaining_time": "13:23:10"}
79
  {"current_steps": 79, "total_steps": 390, "loss": 0.574, "lr": 3.873187606680543e-05, "epoch": 1.0112, "percentage": 20.26, "elapsed_time": "3:23:55", "remaining_time": "13:22:48"}
80
+ {"current_steps": 80, "total_steps": 390, "loss": 0.5846, "lr": 3.866839912153168e-05, "epoch": 1.024, "percentage": 20.51, "elapsed_time": "3:26:29", "remaining_time": "13:20:11"}
81
+ {"current_steps": 81, "total_steps": 390, "loss": 0.5665, "lr": 3.860342666498677e-05, "epoch": 1.0368, "percentage": 20.77, "elapsed_time": "3:29:05", "remaining_time": "13:17:37"}
82
+ {"current_steps": 82, "total_steps": 390, "loss": 0.5702, "lr": 3.853696390206484e-05, "epoch": 1.0496, "percentage": 21.03, "elapsed_time": "3:31:32", "remaining_time": "13:14:35"}
83
+ {"current_steps": 83, "total_steps": 390, "loss": 0.5804, "lr": 3.846901615704734e-05, "epoch": 1.0624, "percentage": 21.28, "elapsed_time": "3:34:04", "remaining_time": "13:11:50"}
84
+ {"current_steps": 84, "total_steps": 390, "loss": 0.5816, "lr": 3.839958887317649e-05, "epoch": 1.0752, "percentage": 21.54, "elapsed_time": "3:36:40", "remaining_time": "13:09:17"}
85
+ {"current_steps": 85, "total_steps": 390, "loss": 0.5882, "lr": 3.832868761221926e-05, "epoch": 1.088, "percentage": 21.79, "elapsed_time": "3:39:19", "remaining_time": "13:06:57"}
86
+ {"current_steps": 86, "total_steps": 390, "loss": 0.585, "lr": 3.825631805402182e-05, "epoch": 1.1008, "percentage": 22.05, "elapsed_time": "3:41:58", "remaining_time": "13:04:39"}
87
+ {"current_steps": 87, "total_steps": 390, "loss": 0.6037, "lr": 3.818248599605448e-05, "epoch": 1.1136, "percentage": 22.31, "elapsed_time": "3:44:22", "remaining_time": "13:01:28"}
88
+ {"current_steps": 88, "total_steps": 390, "loss": 0.584, "lr": 3.810719735294731e-05, "epoch": 1.1264, "percentage": 22.56, "elapsed_time": "3:47:05", "remaining_time": "12:59:18"}
89
+ {"current_steps": 89, "total_steps": 390, "loss": 0.5797, "lr": 3.8030458156016326e-05, "epoch": 1.1392, "percentage": 22.82, "elapsed_time": "3:49:48", "remaining_time": "12:57:14"}
90
+ {"current_steps": 90, "total_steps": 390, "loss": 0.587, "lr": 3.795227455278029e-05, "epoch": 1.152, "percentage": 23.08, "elapsed_time": "3:52:25", "remaining_time": "12:54:45"}
91
+ {"current_steps": 91, "total_steps": 390, "loss": 0.5882, "lr": 3.787265280646825e-05, "epoch": 1.1648, "percentage": 23.33, "elapsed_time": "3:55:04", "remaining_time": "12:52:22"}
92
+ {"current_steps": 92, "total_steps": 390, "loss": 0.5722, "lr": 3.7791599295517825e-05, "epoch": 1.1776, "percentage": 23.59, "elapsed_time": "3:57:39", "remaining_time": "12:49:47"}
93
+ {"current_steps": 93, "total_steps": 390, "loss": 0.572, "lr": 3.7709120513064196e-05, "epoch": 1.1904, "percentage": 23.85, "elapsed_time": "4:00:02", "remaining_time": "12:46:35"}
94
+ {"current_steps": 94, "total_steps": 390, "loss": 0.5777, "lr": 3.762522306641998e-05, "epoch": 1.2032, "percentage": 24.1, "elapsed_time": "4:02:32", "remaining_time": "12:43:45"}
95
+ {"current_steps": 95, "total_steps": 390, "loss": 0.5794, "lr": 3.7539913676545874e-05, "epoch": 1.216, "percentage": 24.36, "elapsed_time": "4:05:05", "remaining_time": "12:41:04"}
96
+ {"current_steps": 96, "total_steps": 390, "loss": 0.581, "lr": 3.745319917751229e-05, "epoch": 1.2288000000000001, "percentage": 24.62, "elapsed_time": "4:07:45", "remaining_time": "12:38:44"}
97
+ {"current_steps": 97, "total_steps": 390, "loss": 0.5808, "lr": 3.736508651595188e-05, "epoch": 1.2416, "percentage": 24.87, "elapsed_time": "4:10:27", "remaining_time": "12:36:31"}
98
+ {"current_steps": 98, "total_steps": 390, "loss": 0.5694, "lr": 3.727558275050301e-05, "epoch": 1.2544, "percentage": 25.13, "elapsed_time": "4:13:06", "remaining_time": "12:34:10"}
99
+ {"current_steps": 99, "total_steps": 390, "loss": 0.5714, "lr": 3.718469505124434e-05, "epoch": 1.2671999999999999, "percentage": 25.38, "elapsed_time": "4:15:32", "remaining_time": "12:31:09"}
100
+ {"current_steps": 100, "total_steps": 390, "loss": 0.5592, "lr": 3.709243069912041e-05, "epoch": 1.28, "percentage": 25.64, "elapsed_time": "4:17:50", "remaining_time": "12:27:44"}
101
+ {"current_steps": 101, "total_steps": 390, "loss": 0.5738, "lr": 3.699879708535838e-05, "epoch": 1.2928, "percentage": 25.9, "elapsed_time": "4:20:16", "remaining_time": "12:24:44"}
102
+ {"current_steps": 102, "total_steps": 390, "loss": 0.5656, "lr": 3.69038017108759e-05, "epoch": 1.3056, "percentage": 26.15, "elapsed_time": "4:22:40", "remaining_time": "12:21:41"}
103
+ {"current_steps": 103, "total_steps": 390, "loss": 0.5754, "lr": 3.680745218568026e-05, "epoch": 1.3184, "percentage": 26.41, "elapsed_time": "4:25:16", "remaining_time": "12:19:10"}
104
+ {"current_steps": 104, "total_steps": 390, "loss": 0.5695, "lr": 3.6709756228258735e-05, "epoch": 1.3312, "percentage": 26.67, "elapsed_time": "4:27:47", "remaining_time": "12:16:26"}
105
+ {"current_steps": 105, "total_steps": 390, "loss": 0.5773, "lr": 3.6610721664960236e-05, "epoch": 1.3439999999999999, "percentage": 26.92, "elapsed_time": "4:30:24", "remaining_time": "12:13:58"}
106
+ {"current_steps": 106, "total_steps": 390, "loss": 0.5745, "lr": 3.65103564293684e-05, "epoch": 1.3568, "percentage": 27.18, "elapsed_time": "4:32:53", "remaining_time": "12:11:08"}
107
+ {"current_steps": 107, "total_steps": 390, "loss": 0.5651, "lr": 3.640866856166601e-05, "epoch": 1.3696, "percentage": 27.44, "elapsed_time": "4:35:30", "remaining_time": "12:08:41"}
108
+ {"current_steps": 108, "total_steps": 390, "loss": 0.5798, "lr": 3.6305666207990886e-05, "epoch": 1.3824, "percentage": 27.69, "elapsed_time": "4:38:07", "remaining_time": "12:06:13"}
109
+ {"current_steps": 109, "total_steps": 390, "loss": 0.5784, "lr": 3.6201357619783336e-05, "epoch": 1.3952, "percentage": 27.95, "elapsed_time": "4:40:51", "remaining_time": "12:04:01"}
110
+ {"current_steps": 110, "total_steps": 390, "loss": 0.5687, "lr": 3.609575115312511e-05, "epoch": 1.408, "percentage": 28.21, "elapsed_time": "4:43:17", "remaining_time": "12:01:07"}
111
+ {"current_steps": 111, "total_steps": 390, "loss": 0.5706, "lr": 3.598885526807003e-05, "epoch": 1.4208, "percentage": 28.46, "elapsed_time": "4:45:43", "remaining_time": "11:58:11"}
112
+ {"current_steps": 112, "total_steps": 390, "loss": 0.5647, "lr": 3.5880678527966224e-05, "epoch": 1.4336, "percentage": 28.72, "elapsed_time": "4:48:18", "remaining_time": "11:55:37"}
113
+ {"current_steps": 113, "total_steps": 390, "loss": 0.5641, "lr": 3.577122959877017e-05, "epoch": 1.4464000000000001, "percentage": 28.97, "elapsed_time": "4:50:57", "remaining_time": "11:53:13"}
114
+ {"current_steps": 114, "total_steps": 390, "loss": 0.562, "lr": 3.566051724835245e-05, "epoch": 1.4592, "percentage": 29.23, "elapsed_time": "4:53:38", "remaining_time": "11:50:54"}
115
+ {"current_steps": 115, "total_steps": 390, "loss": 0.5585, "lr": 3.554855034579532e-05, "epoch": 1.472, "percentage": 29.49, "elapsed_time": "4:56:19", "remaining_time": "11:48:35"}
116
+ {"current_steps": 116, "total_steps": 390, "loss": 0.5698, "lr": 3.5435337860682304e-05, "epoch": 1.4848, "percentage": 29.74, "elapsed_time": "4:58:51", "remaining_time": "11:45:55"}
117
+ {"current_steps": 117, "total_steps": 390, "loss": 0.5663, "lr": 3.532088886237956e-05, "epoch": 1.4976, "percentage": 30.0, "elapsed_time": "5:01:20", "remaining_time": "11:43:08"}
118
+ {"current_steps": 118, "total_steps": 390, "loss": 0.5713, "lr": 3.520521251930941e-05, "epoch": 1.5104, "percentage": 30.26, "elapsed_time": "5:03:46", "remaining_time": "11:40:13"}
119
+ {"current_steps": 119, "total_steps": 390, "loss": 0.5688, "lr": 3.5088318098215805e-05, "epoch": 1.5232, "percentage": 30.51, "elapsed_time": "5:06:33", "remaining_time": "11:38:07"}
120
+ {"current_steps": 120, "total_steps": 390, "loss": 0.5759, "lr": 3.497021496342203e-05, "epoch": 1.536, "percentage": 30.77, "elapsed_time": "5:09:06", "remaining_time": "11:35:30"}
121
+ {"current_steps": 121, "total_steps": 390, "loss": 0.5549, "lr": 3.485091257608047e-05, "epoch": 1.5488, "percentage": 31.03, "elapsed_time": "5:11:41", "remaining_time": "11:32:57"}
122
+ {"current_steps": 122, "total_steps": 390, "loss": 0.5706, "lr": 3.473042049341474e-05, "epoch": 1.5615999999999999, "percentage": 31.28, "elapsed_time": "5:14:17", "remaining_time": "11:30:24"}
123
+ {"current_steps": 123, "total_steps": 390, "loss": 0.6136, "lr": 3.4608748367954064e-05, "epoch": 1.5744, "percentage": 31.54, "elapsed_time": "5:16:56", "remaining_time": "11:28:00"}
124
+ {"current_steps": 124, "total_steps": 390, "loss": 0.5636, "lr": 3.4485905946759965e-05, "epoch": 1.5872000000000002, "percentage": 31.79, "elapsed_time": "5:19:23", "remaining_time": "11:25:08"}
125
+ {"current_steps": 125, "total_steps": 390, "loss": 0.5745, "lr": 3.4361903070645484e-05, "epoch": 1.6, "percentage": 32.05, "elapsed_time": "5:22:01", "remaining_time": "11:22:41"}
126
+ {"current_steps": 126, "total_steps": 390, "loss": 0.5755, "lr": 3.423674967338681e-05, "epoch": 1.6128, "percentage": 32.31, "elapsed_time": "5:24:29", "remaining_time": "11:19:53"}
127
+ {"current_steps": 127, "total_steps": 390, "loss": 0.5688, "lr": 3.411045578092754e-05, "epoch": 1.6256, "percentage": 32.56, "elapsed_time": "5:27:07", "remaining_time": "11:17:25"}
128
+ {"current_steps": 128, "total_steps": 390, "loss": 0.5655, "lr": 3.398303151057543e-05, "epoch": 1.6383999999999999, "percentage": 32.82, "elapsed_time": "5:29:32", "remaining_time": "11:14:31"}
129
+ {"current_steps": 129, "total_steps": 390, "loss": 0.5666, "lr": 3.385448707019199e-05, "epoch": 1.6512, "percentage": 33.08, "elapsed_time": "5:32:10", "remaining_time": "11:12:03"}
130
+ {"current_steps": 130, "total_steps": 390, "loss": 0.5763, "lr": 3.372483275737468e-05, "epoch": 1.6640000000000001, "percentage": 33.33, "elapsed_time": "5:34:51", "remaining_time": "11:09:42"}
131
+ {"current_steps": 131, "total_steps": 390, "loss": 0.556, "lr": 3.359407895863199e-05, "epoch": 1.6768, "percentage": 33.59, "elapsed_time": "5:37:24", "remaining_time": "11:07:06"}
132
+ {"current_steps": 132, "total_steps": 390, "loss": 0.5522, "lr": 3.34622361485514e-05, "epoch": 1.6896, "percentage": 33.85, "elapsed_time": "5:39:57", "remaining_time": "11:04:27"}
133
+ {"current_steps": 133, "total_steps": 390, "loss": 0.5615, "lr": 3.332931488896029e-05, "epoch": 1.7024, "percentage": 34.1, "elapsed_time": "5:42:41", "remaining_time": "11:02:10"}
134
+ {"current_steps": 134, "total_steps": 390, "loss": 0.5711, "lr": 3.319532582807977e-05, "epoch": 1.7151999999999998, "percentage": 34.36, "elapsed_time": "5:45:11", "remaining_time": "10:59:28"}
135
+ {"current_steps": 135, "total_steps": 390, "loss": 0.5622, "lr": 3.30602796996717e-05, "epoch": 1.728, "percentage": 34.62, "elapsed_time": "5:47:50", "remaining_time": "10:57:02"}
136
+ {"current_steps": 136, "total_steps": 390, "loss": 0.5765, "lr": 3.2924187322178865e-05, "epoch": 1.7408000000000001, "percentage": 34.87, "elapsed_time": "5:50:36", "remaining_time": "10:54:48"}
137
+ {"current_steps": 137, "total_steps": 390, "loss": 0.5747, "lr": 3.278705959785821e-05, "epoch": 1.7536, "percentage": 35.13, "elapsed_time": "5:53:09", "remaining_time": "10:52:10"}
138
+ {"current_steps": 138, "total_steps": 390, "loss": 0.5515, "lr": 3.2648907511907544e-05, "epoch": 1.7664, "percentage": 35.38, "elapsed_time": "5:55:34", "remaining_time": "10:49:18"}
139
+ {"current_steps": 139, "total_steps": 390, "loss": 0.5558, "lr": 3.250974213158555e-05, "epoch": 1.7792, "percentage": 35.64, "elapsed_time": "5:57:48", "remaining_time": "10:46:07"}
140
+ {"current_steps": 140, "total_steps": 390, "loss": 0.5607, "lr": 3.23695746053251e-05, "epoch": 1.792, "percentage": 35.9, "elapsed_time": "6:00:06", "remaining_time": "10:43:02"}
141
+ {"current_steps": 141, "total_steps": 390, "loss": 0.5705, "lr": 3.222841616184025e-05, "epoch": 1.8048, "percentage": 36.15, "elapsed_time": "6:02:57", "remaining_time": "10:40:57"}
142
+ {"current_steps": 142, "total_steps": 390, "loss": 0.5558, "lr": 3.208627810922665e-05, "epoch": 1.8176, "percentage": 36.41, "elapsed_time": "6:05:30", "remaining_time": "10:38:21"}
143
+ {"current_steps": 143, "total_steps": 390, "loss": 0.5571, "lr": 3.194317183405573e-05, "epoch": 1.8304, "percentage": 36.67, "elapsed_time": "6:07:52", "remaining_time": "10:35:24"}
144
+ {"current_steps": 144, "total_steps": 390, "loss": 0.5566, "lr": 3.1799108800462466e-05, "epoch": 1.8432, "percentage": 36.92, "elapsed_time": "6:10:29", "remaining_time": "10:32:55"}
145
+ {"current_steps": 145, "total_steps": 390, "loss": 0.5789, "lr": 3.1654100549227024e-05, "epoch": 1.8559999999999999, "percentage": 37.18, "elapsed_time": "6:13:03", "remaining_time": "10:30:20"}
146
+ {"current_steps": 146, "total_steps": 390, "loss": 0.5678, "lr": 3.1508158696850275e-05, "epoch": 1.8688, "percentage": 37.44, "elapsed_time": "6:15:41", "remaining_time": "10:27:52"}
147
+ {"current_steps": 147, "total_steps": 390, "loss": 0.5599, "lr": 3.136129493462312e-05, "epoch": 1.8816000000000002, "percentage": 37.69, "elapsed_time": "6:18:09", "remaining_time": "10:25:06"}
148
+ {"current_steps": 148, "total_steps": 390, "loss": 0.5754, "lr": 3.121352102768998e-05, "epoch": 1.8944, "percentage": 37.95, "elapsed_time": "6:20:34", "remaining_time": "10:22:17"}
149
+ {"current_steps": 149, "total_steps": 390, "loss": 0.5569, "lr": 3.106484881410628e-05, "epoch": 1.9072, "percentage": 38.21, "elapsed_time": "6:23:08", "remaining_time": "10:19:42"}
150
+ {"current_steps": 150, "total_steps": 390, "loss": 0.5863, "lr": 3.091529020389009e-05, "epoch": 1.92, "percentage": 38.46, "elapsed_time": "6:25:51", "remaining_time": "10:17:21"}
151
+ {"current_steps": 151, "total_steps": 390, "loss": 0.5616, "lr": 3.076485717806808e-05, "epoch": 1.9327999999999999, "percentage": 38.72, "elapsed_time": "6:28:32", "remaining_time": "10:14:58"}
152
+ {"current_steps": 152, "total_steps": 390, "loss": 0.5606, "lr": 3.061356178771564e-05, "epoch": 1.9456, "percentage": 38.97, "elapsed_time": "6:31:14", "remaining_time": "10:12:36"}
153
+ {"current_steps": 153, "total_steps": 390, "loss": 0.5601, "lr": 3.0461416152991555e-05, "epoch": 1.9584000000000001, "percentage": 39.23, "elapsed_time": "6:33:54", "remaining_time": "10:10:10"}
154
+ {"current_steps": 154, "total_steps": 390, "loss": 0.5654, "lr": 3.0308432462167045e-05, "epoch": 1.9712, "percentage": 39.49, "elapsed_time": "6:36:32", "remaining_time": "10:07:41"}
155
+ {"current_steps": 155, "total_steps": 390, "loss": 0.5589, "lr": 3.015462297064936e-05, "epoch": 1.984, "percentage": 39.74, "elapsed_time": "6:38:58", "remaining_time": "10:04:53"}
156
+ {"current_steps": 156, "total_steps": 390, "loss": 0.5563, "lr": 3.0000000000000004e-05, "epoch": 1.9968, "percentage": 40.0, "elapsed_time": "6:41:34", "remaining_time": "10:02:22"}
157
+ {"current_steps": 157, "total_steps": 390, "loss": 0.5318, "lr": 2.98445759369477e-05, "epoch": 2.0096, "percentage": 40.26, "elapsed_time": "6:45:02", "remaining_time": "10:01:07"}