neginr commited on
Commit
551c1ff
·
verified ·
1 Parent(s): 4106ca7

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2cf01c907a3d53ea4c9e515eaaa53661224fe42199871d30f34466eb1f1b3c8
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:931b63db8161802c7e074616f596d62d6b19c7082f7195c49697202d944ab611
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05fb4a48fcaf234a718c857ae5063f9c3f43a97fb6fe4fc4afc6a9ec1acd9bc9
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ecd4a61daf0d011cc89d9512bc13fae358a06766d98781e1c766eb21c23d00b
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e185a690ebe070253c43ff88c54396423b376b3150fcd372613c3dd86ed2b1be
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2bf3599fe963ac3d19145291d3c3db499a707fd8a4198e34a2ff97e8c74a3b7
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42641ce1aa455410fa6644e0a7e1e1b937fba1d8faf81a5b33e201ffa972dfea
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a5a43d8d04f9ceb10976b33b3c3649c8e5175e5087dcd2bf0d25a8a64164146
3
  size 1089994880
trainer_log.jsonl CHANGED
@@ -84,3 +84,81 @@
84
  {"current_steps": 84, "total_steps": 390, "loss": 0.5498, "lr": 3.839958887317649e-05, "epoch": 1.073482428115016, "percentage": 21.54, "elapsed_time": "0:34:23", "remaining_time": "2:05:15"}
85
  {"current_steps": 85, "total_steps": 390, "loss": 0.5283, "lr": 3.832868761221926e-05, "epoch": 1.0862619808306708, "percentage": 21.79, "elapsed_time": "0:34:46", "remaining_time": "2:04:46"}
86
  {"current_steps": 86, "total_steps": 390, "loss": 0.5429, "lr": 3.825631805402182e-05, "epoch": 1.099041533546326, "percentage": 22.05, "elapsed_time": "0:35:09", "remaining_time": "2:04:15"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  {"current_steps": 84, "total_steps": 390, "loss": 0.5498, "lr": 3.839958887317649e-05, "epoch": 1.073482428115016, "percentage": 21.54, "elapsed_time": "0:34:23", "remaining_time": "2:05:15"}
85
  {"current_steps": 85, "total_steps": 390, "loss": 0.5283, "lr": 3.832868761221926e-05, "epoch": 1.0862619808306708, "percentage": 21.79, "elapsed_time": "0:34:46", "remaining_time": "2:04:46"}
86
  {"current_steps": 86, "total_steps": 390, "loss": 0.5429, "lr": 3.825631805402182e-05, "epoch": 1.099041533546326, "percentage": 22.05, "elapsed_time": "0:35:09", "remaining_time": "2:04:15"}
87
+ {"current_steps": 87, "total_steps": 390, "loss": 0.5411, "lr": 3.818248599605448e-05, "epoch": 1.1118210862619808, "percentage": 22.31, "elapsed_time": "0:35:32", "remaining_time": "2:03:46"}
88
+ {"current_steps": 88, "total_steps": 390, "loss": 0.5397, "lr": 3.810719735294731e-05, "epoch": 1.1246006389776357, "percentage": 22.56, "elapsed_time": "0:35:55", "remaining_time": "2:03:15"}
89
+ {"current_steps": 89, "total_steps": 390, "loss": 0.5453, "lr": 3.8030458156016326e-05, "epoch": 1.1373801916932909, "percentage": 22.82, "elapsed_time": "0:36:17", "remaining_time": "2:02:45"}
90
+ {"current_steps": 90, "total_steps": 390, "loss": 0.538, "lr": 3.795227455278029e-05, "epoch": 1.1501597444089458, "percentage": 23.08, "elapsed_time": "0:36:41", "remaining_time": "2:02:17"}
91
+ {"current_steps": 91, "total_steps": 390, "loss": 0.5414, "lr": 3.787265280646825e-05, "epoch": 1.1629392971246006, "percentage": 23.33, "elapsed_time": "0:37:04", "remaining_time": "2:01:50"}
92
+ {"current_steps": 92, "total_steps": 390, "loss": 0.5325, "lr": 3.7791599295517825e-05, "epoch": 1.1757188498402555, "percentage": 23.59, "elapsed_time": "0:37:28", "remaining_time": "2:01:24"}
93
+ {"current_steps": 93, "total_steps": 390, "loss": 0.5323, "lr": 3.7709120513064196e-05, "epoch": 1.1884984025559104, "percentage": 23.85, "elapsed_time": "0:37:51", "remaining_time": "2:00:54"}
94
+ {"current_steps": 94, "total_steps": 390, "loss": 0.5405, "lr": 3.762522306641998e-05, "epoch": 1.2012779552715656, "percentage": 24.1, "elapsed_time": "0:38:14", "remaining_time": "2:00:26"}
95
+ {"current_steps": 95, "total_steps": 390, "loss": 0.544, "lr": 3.7539913676545874e-05, "epoch": 1.2140575079872205, "percentage": 24.36, "elapsed_time": "0:38:38", "remaining_time": "2:00:00"}
96
+ {"current_steps": 96, "total_steps": 390, "loss": 0.5339, "lr": 3.745319917751229e-05, "epoch": 1.2268370607028753, "percentage": 24.62, "elapsed_time": "0:39:01", "remaining_time": "1:59:31"}
97
+ {"current_steps": 97, "total_steps": 390, "loss": 0.5367, "lr": 3.736508651595188e-05, "epoch": 1.2396166134185305, "percentage": 24.87, "elapsed_time": "0:39:24", "remaining_time": "1:59:02"}
98
+ {"current_steps": 98, "total_steps": 390, "loss": 0.5322, "lr": 3.727558275050301e-05, "epoch": 1.2523961661341854, "percentage": 25.13, "elapsed_time": "0:39:47", "remaining_time": "1:58:33"}
99
+ {"current_steps": 99, "total_steps": 390, "loss": 0.5347, "lr": 3.718469505124434e-05, "epoch": 1.2651757188498403, "percentage": 25.38, "elapsed_time": "0:40:09", "remaining_time": "1:58:01"}
100
+ {"current_steps": 100, "total_steps": 390, "loss": 0.5487, "lr": 3.709243069912041e-05, "epoch": 1.2779552715654952, "percentage": 25.64, "elapsed_time": "0:40:31", "remaining_time": "1:57:32"}
101
+ {"current_steps": 101, "total_steps": 390, "loss": 0.558, "lr": 3.699879708535838e-05, "epoch": 1.29073482428115, "percentage": 25.9, "elapsed_time": "0:40:54", "remaining_time": "1:57:04"}
102
+ {"current_steps": 102, "total_steps": 390, "loss": 0.5461, "lr": 3.69038017108759e-05, "epoch": 1.3035143769968052, "percentage": 26.15, "elapsed_time": "0:41:18", "remaining_time": "1:56:38"}
103
+ {"current_steps": 103, "total_steps": 390, "loss": 0.5292, "lr": 3.680745218568026e-05, "epoch": 1.31629392971246, "percentage": 26.41, "elapsed_time": "0:41:41", "remaining_time": "1:56:10"}
104
+ {"current_steps": 104, "total_steps": 390, "loss": 0.5596, "lr": 3.6709756228258735e-05, "epoch": 1.329073482428115, "percentage": 26.67, "elapsed_time": "0:42:04", "remaining_time": "1:55:43"}
105
+ {"current_steps": 105, "total_steps": 390, "loss": 0.5538, "lr": 3.6610721664960236e-05, "epoch": 1.34185303514377, "percentage": 26.92, "elapsed_time": "0:42:28", "remaining_time": "1:55:16"}
106
+ {"current_steps": 106, "total_steps": 390, "loss": 0.5255, "lr": 3.65103564293684e-05, "epoch": 1.354632587859425, "percentage": 27.18, "elapsed_time": "0:42:51", "remaining_time": "1:54:49"}
107
+ {"current_steps": 107, "total_steps": 390, "loss": 0.5295, "lr": 3.640866856166601e-05, "epoch": 1.3674121405750799, "percentage": 27.44, "elapsed_time": "0:43:14", "remaining_time": "1:54:22"}
108
+ {"current_steps": 108, "total_steps": 390, "loss": 0.5374, "lr": 3.6305666207990886e-05, "epoch": 1.3801916932907348, "percentage": 27.69, "elapsed_time": "0:43:37", "remaining_time": "1:53:54"}
109
+ {"current_steps": 109, "total_steps": 390, "loss": 0.5323, "lr": 3.6201357619783336e-05, "epoch": 1.3929712460063897, "percentage": 27.95, "elapsed_time": "0:44:00", "remaining_time": "1:53:27"}
110
+ {"current_steps": 110, "total_steps": 390, "loss": 0.5312, "lr": 3.609575115312511e-05, "epoch": 1.4057507987220448, "percentage": 28.21, "elapsed_time": "0:44:23", "remaining_time": "1:52:59"}
111
+ {"current_steps": 111, "total_steps": 390, "loss": 0.5577, "lr": 3.598885526807003e-05, "epoch": 1.4185303514376997, "percentage": 28.46, "elapsed_time": "0:44:46", "remaining_time": "1:52:33"}
112
+ {"current_steps": 112, "total_steps": 390, "loss": 0.5459, "lr": 3.5880678527966224e-05, "epoch": 1.4313099041533546, "percentage": 28.72, "elapsed_time": "0:45:10", "remaining_time": "1:52:07"}
113
+ {"current_steps": 113, "total_steps": 390, "loss": 0.5313, "lr": 3.577122959877017e-05, "epoch": 1.4440894568690097, "percentage": 28.97, "elapsed_time": "0:45:33", "remaining_time": "1:51:40"}
114
+ {"current_steps": 114, "total_steps": 390, "loss": 0.5459, "lr": 3.566051724835245e-05, "epoch": 1.4568690095846646, "percentage": 29.23, "elapsed_time": "0:45:57", "remaining_time": "1:51:14"}
115
+ {"current_steps": 115, "total_steps": 390, "loss": 0.533, "lr": 3.554855034579532e-05, "epoch": 1.4696485623003195, "percentage": 29.49, "elapsed_time": "0:46:19", "remaining_time": "1:50:45"}
116
+ {"current_steps": 116, "total_steps": 390, "loss": 0.5364, "lr": 3.5435337860682304e-05, "epoch": 1.4824281150159744, "percentage": 29.74, "elapsed_time": "0:46:42", "remaining_time": "1:50:20"}
117
+ {"current_steps": 117, "total_steps": 390, "loss": 0.5438, "lr": 3.532088886237956e-05, "epoch": 1.4952076677316293, "percentage": 30.0, "elapsed_time": "0:47:05", "remaining_time": "1:49:53"}
118
+ {"current_steps": 118, "total_steps": 390, "loss": 0.5404, "lr": 3.520521251930941e-05, "epoch": 1.5079872204472844, "percentage": 30.26, "elapsed_time": "0:47:29", "remaining_time": "1:49:28"}
119
+ {"current_steps": 119, "total_steps": 390, "loss": 0.5703, "lr": 3.5088318098215805e-05, "epoch": 1.5207667731629393, "percentage": 30.51, "elapsed_time": "0:47:50", "remaining_time": "1:48:56"}
120
+ {"current_steps": 120, "total_steps": 390, "loss": 0.5609, "lr": 3.497021496342203e-05, "epoch": 1.5335463258785942, "percentage": 30.77, "elapsed_time": "0:48:12", "remaining_time": "1:48:27"}
121
+ {"current_steps": 121, "total_steps": 390, "loss": 0.5405, "lr": 3.485091257608047e-05, "epoch": 1.5463258785942493, "percentage": 31.03, "elapsed_time": "0:48:35", "remaining_time": "1:48:01"}
122
+ {"current_steps": 122, "total_steps": 390, "loss": 0.5389, "lr": 3.473042049341474e-05, "epoch": 1.5591054313099042, "percentage": 31.28, "elapsed_time": "0:48:58", "remaining_time": "1:47:35"}
123
+ {"current_steps": 123, "total_steps": 390, "loss": 0.539, "lr": 3.4608748367954064e-05, "epoch": 1.571884984025559, "percentage": 31.54, "elapsed_time": "0:49:21", "remaining_time": "1:47:08"}
124
+ {"current_steps": 124, "total_steps": 390, "loss": 0.5391, "lr": 3.4485905946759965e-05, "epoch": 1.5846645367412142, "percentage": 31.79, "elapsed_time": "0:49:44", "remaining_time": "1:46:41"}
125
+ {"current_steps": 125, "total_steps": 390, "loss": 0.5412, "lr": 3.4361903070645484e-05, "epoch": 1.5974440894568689, "percentage": 32.05, "elapsed_time": "0:50:07", "remaining_time": "1:46:16"}
126
+ {"current_steps": 126, "total_steps": 390, "loss": 0.5339, "lr": 3.423674967338681e-05, "epoch": 1.610223642172524, "percentage": 32.31, "elapsed_time": "0:50:30", "remaining_time": "1:45:48"}
127
+ {"current_steps": 127, "total_steps": 390, "loss": 0.5507, "lr": 3.411045578092754e-05, "epoch": 1.623003194888179, "percentage": 32.56, "elapsed_time": "0:50:51", "remaining_time": "1:45:19"}
128
+ {"current_steps": 128, "total_steps": 390, "loss": 0.5352, "lr": 3.398303151057543e-05, "epoch": 1.6357827476038338, "percentage": 32.82, "elapsed_time": "0:51:14", "remaining_time": "1:44:53"}
129
+ {"current_steps": 129, "total_steps": 390, "loss": 0.542, "lr": 3.385448707019199e-05, "epoch": 1.648562300319489, "percentage": 33.08, "elapsed_time": "0:51:38", "remaining_time": "1:44:28"}
130
+ {"current_steps": 130, "total_steps": 390, "loss": 0.5447, "lr": 3.372483275737468e-05, "epoch": 1.6613418530351438, "percentage": 33.33, "elapsed_time": "0:52:01", "remaining_time": "1:44:03"}
131
+ {"current_steps": 131, "total_steps": 390, "loss": 0.5401, "lr": 3.359407895863199e-05, "epoch": 1.6741214057507987, "percentage": 33.59, "elapsed_time": "0:52:25", "remaining_time": "1:43:39"}
132
+ {"current_steps": 132, "total_steps": 390, "loss": 0.5502, "lr": 3.34622361485514e-05, "epoch": 1.6869009584664538, "percentage": 33.85, "elapsed_time": "0:52:49", "remaining_time": "1:43:15"}
133
+ {"current_steps": 133, "total_steps": 390, "loss": 0.526, "lr": 3.332931488896029e-05, "epoch": 1.6996805111821085, "percentage": 34.1, "elapsed_time": "0:53:12", "remaining_time": "1:42:49"}
134
+ {"current_steps": 134, "total_steps": 390, "loss": 0.5243, "lr": 3.319532582807977e-05, "epoch": 1.7124600638977636, "percentage": 34.36, "elapsed_time": "0:53:35", "remaining_time": "1:42:23"}
135
+ {"current_steps": 135, "total_steps": 390, "loss": 0.5333, "lr": 3.30602796996717e-05, "epoch": 1.7252396166134185, "percentage": 34.62, "elapsed_time": "0:53:58", "remaining_time": "1:41:56"}
136
+ {"current_steps": 136, "total_steps": 390, "loss": 0.5317, "lr": 3.2924187322178865e-05, "epoch": 1.7380191693290734, "percentage": 34.87, "elapsed_time": "0:54:20", "remaining_time": "1:41:29"}
137
+ {"current_steps": 137, "total_steps": 390, "loss": 0.5379, "lr": 3.278705959785821e-05, "epoch": 1.7507987220447285, "percentage": 35.13, "elapsed_time": "0:54:43", "remaining_time": "1:41:03"}
138
+ {"current_steps": 138, "total_steps": 390, "loss": 0.5419, "lr": 3.2648907511907544e-05, "epoch": 1.7635782747603834, "percentage": 35.38, "elapsed_time": "0:55:06", "remaining_time": "1:40:38"}
139
+ {"current_steps": 139, "total_steps": 390, "loss": 0.516, "lr": 3.250974213158555e-05, "epoch": 1.7763578274760383, "percentage": 35.64, "elapsed_time": "0:55:29", "remaining_time": "1:40:12"}
140
+ {"current_steps": 140, "total_steps": 390, "loss": 0.5373, "lr": 3.23695746053251e-05, "epoch": 1.7891373801916934, "percentage": 35.9, "elapsed_time": "0:55:52", "remaining_time": "1:39:47"}
141
+ {"current_steps": 141, "total_steps": 390, "loss": 0.5318, "lr": 3.222841616184025e-05, "epoch": 1.8019169329073481, "percentage": 36.15, "elapsed_time": "0:56:16", "remaining_time": "1:39:23"}
142
+ {"current_steps": 142, "total_steps": 390, "loss": 0.5226, "lr": 3.208627810922665e-05, "epoch": 1.8146964856230032, "percentage": 36.41, "elapsed_time": "0:56:40", "remaining_time": "1:38:58"}
143
+ {"current_steps": 143, "total_steps": 390, "loss": 0.5382, "lr": 3.194317183405573e-05, "epoch": 1.8274760383386581, "percentage": 36.67, "elapsed_time": "0:57:02", "remaining_time": "1:38:31"}
144
+ {"current_steps": 144, "total_steps": 390, "loss": 0.5392, "lr": 3.1799108800462466e-05, "epoch": 1.840255591054313, "percentage": 36.92, "elapsed_time": "0:57:25", "remaining_time": "1:38:06"}
145
+ {"current_steps": 145, "total_steps": 390, "loss": 0.5274, "lr": 3.1654100549227024e-05, "epoch": 1.8530351437699681, "percentage": 37.18, "elapsed_time": "0:57:48", "remaining_time": "1:37:40"}
146
+ {"current_steps": 146, "total_steps": 390, "loss": 0.5358, "lr": 3.1508158696850275e-05, "epoch": 1.865814696485623, "percentage": 37.44, "elapsed_time": "0:58:12", "remaining_time": "1:37:16"}
147
+ {"current_steps": 147, "total_steps": 390, "loss": 0.5415, "lr": 3.136129493462312e-05, "epoch": 1.878594249201278, "percentage": 37.69, "elapsed_time": "0:58:35", "remaining_time": "1:36:51"}
148
+ {"current_steps": 148, "total_steps": 390, "loss": 0.5167, "lr": 3.121352102768998e-05, "epoch": 1.891373801916933, "percentage": 37.95, "elapsed_time": "0:58:58", "remaining_time": "1:36:25"}
149
+ {"current_steps": 149, "total_steps": 390, "loss": 0.5405, "lr": 3.106484881410628e-05, "epoch": 1.9041533546325877, "percentage": 38.21, "elapsed_time": "0:59:19", "remaining_time": "1:35:58"}
150
+ {"current_steps": 150, "total_steps": 390, "loss": 0.5225, "lr": 3.091529020389009e-05, "epoch": 1.9169329073482428, "percentage": 38.46, "elapsed_time": "0:59:43", "remaining_time": "1:35:32"}
151
+ {"current_steps": 151, "total_steps": 390, "loss": 0.5268, "lr": 3.076485717806808e-05, "epoch": 1.9297124600638977, "percentage": 38.72, "elapsed_time": "1:00:06", "remaining_time": "1:35:07"}
152
+ {"current_steps": 152, "total_steps": 390, "loss": 0.522, "lr": 3.061356178771564e-05, "epoch": 1.9424920127795526, "percentage": 38.97, "elapsed_time": "1:00:29", "remaining_time": "1:34:42"}
153
+ {"current_steps": 153, "total_steps": 390, "loss": 0.5328, "lr": 3.0461416152991555e-05, "epoch": 1.9552715654952078, "percentage": 39.23, "elapsed_time": "1:00:51", "remaining_time": "1:34:16"}
154
+ {"current_steps": 154, "total_steps": 390, "loss": 0.544, "lr": 3.0308432462167045e-05, "epoch": 1.9680511182108626, "percentage": 39.49, "elapsed_time": "1:01:11", "remaining_time": "1:33:47"}
155
+ {"current_steps": 155, "total_steps": 390, "loss": 0.5444, "lr": 3.015462297064936e-05, "epoch": 1.9808306709265175, "percentage": 39.74, "elapsed_time": "1:01:33", "remaining_time": "1:33:19"}
156
+ {"current_steps": 156, "total_steps": 390, "loss": 0.5312, "lr": 3.0000000000000004e-05, "epoch": 1.9936102236421727, "percentage": 40.0, "elapsed_time": "1:01:56", "remaining_time": "1:32:54"}
157
+ {"current_steps": 157, "total_steps": 390, "loss": 0.4861, "lr": 2.98445759369477e-05, "epoch": 2.0063897763578273, "percentage": 40.26, "elapsed_time": "1:04:09", "remaining_time": "1:35:13"}
158
+ {"current_steps": 158, "total_steps": 390, "loss": 0.4469, "lr": 2.9688363232396056e-05, "epoch": 2.0191693290734825, "percentage": 40.51, "elapsed_time": "1:04:33", "remaining_time": "1:34:47"}
159
+ {"current_steps": 159, "total_steps": 390, "loss": 0.4686, "lr": 2.9531374400426158e-05, "epoch": 2.0319488817891376, "percentage": 40.77, "elapsed_time": "1:04:56", "remaining_time": "1:34:20"}
160
+ {"current_steps": 160, "total_steps": 390, "loss": 0.4571, "lr": 2.9373622017294075e-05, "epoch": 2.0447284345047922, "percentage": 41.03, "elapsed_time": "1:05:19", "remaining_time": "1:33:54"}
161
+ {"current_steps": 161, "total_steps": 390, "loss": 0.4624, "lr": 2.9215118720423375e-05, "epoch": 2.0575079872204474, "percentage": 41.28, "elapsed_time": "1:05:42", "remaining_time": "1:33:27"}
162
+ {"current_steps": 162, "total_steps": 390, "loss": 0.4606, "lr": 2.9055877207392752e-05, "epoch": 2.070287539936102, "percentage": 41.54, "elapsed_time": "1:06:04", "remaining_time": "1:33:00"}
163
+ {"current_steps": 163, "total_steps": 390, "loss": 0.4515, "lr": 2.8895910234918828e-05, "epoch": 2.083067092651757, "percentage": 41.79, "elapsed_time": "1:06:28", "remaining_time": "1:32:34"}
164
+ {"current_steps": 164, "total_steps": 390, "loss": 0.4727, "lr": 2.873523061783426e-05, "epoch": 2.0958466453674123, "percentage": 42.05, "elapsed_time": "1:06:51", "remaining_time": "1:32:08"}