OpenCoder-7B-Instruct-query_nlx / trainer_log.jsonl
k1h0's picture
Upload folder using huggingface_hub
7bc029d verified
{"current_steps": 1, "total_steps": 55, "loss": 0.8371, "lr": 4.995922759815339e-05, "epoch": 0.017937219730941704, "percentage": 1.82, "elapsed_time": "0:02:58", "remaining_time": "2:40:59", "throughput": 11723.96, "total_tokens": 2097152}
{"current_steps": 2, "total_steps": 55, "loss": 0.7804, "lr": 4.9837043383713753e-05, "epoch": 0.03587443946188341, "percentage": 3.64, "elapsed_time": "0:05:45", "remaining_time": "2:32:35", "throughput": 12140.47, "total_tokens": 4194304}
{"current_steps": 3, "total_steps": 55, "loss": 0.7695, "lr": 4.963384589619233e-05, "epoch": 0.053811659192825115, "percentage": 5.45, "elapsed_time": "0:08:32", "remaining_time": "2:27:59", "throughput": 12280.9, "total_tokens": 6291456}
{"current_steps": 4, "total_steps": 55, "loss": 0.7419, "lr": 4.935029792355834e-05, "epoch": 0.07174887892376682, "percentage": 7.27, "elapsed_time": "0:11:18", "remaining_time": "2:24:10", "throughput": 12363.46, "total_tokens": 8388608}
{"current_steps": 5, "total_steps": 55, "loss": 0.7166, "lr": 4.898732434036244e-05, "epoch": 0.08968609865470852, "percentage": 9.09, "elapsed_time": "0:14:04", "remaining_time": "2:20:47", "throughput": 12412.88, "total_tokens": 10485760}
{"current_steps": 6, "total_steps": 55, "loss": 0.7194, "lr": 4.854610909098812e-05, "epoch": 0.10762331838565023, "percentage": 10.91, "elapsed_time": "0:16:51", "remaining_time": "2:17:39", "throughput": 12441.93, "total_tokens": 12582912}
{"current_steps": 7, "total_steps": 55, "loss": 0.6975, "lr": 4.802809132787125e-05, "epoch": 0.12556053811659193, "percentage": 12.73, "elapsed_time": "0:19:37", "remaining_time": "2:14:33", "throughput": 12468.67, "total_tokens": 14680064}
{"current_steps": 8, "total_steps": 55, "loss": 0.7168, "lr": 4.743496071728396e-05, "epoch": 0.14349775784753363, "percentage": 14.55, "elapsed_time": "0:22:23", "remaining_time": "2:11:33", "throughput": 12487.26, "total_tokens": 16777216}
{"current_steps": 9, "total_steps": 55, "loss": 0.6707, "lr": 4.6768651927994434e-05, "epoch": 0.16143497757847533, "percentage": 16.36, "elapsed_time": "0:25:10", "remaining_time": "2:08:38", "throughput": 12498.39, "total_tokens": 18874368}
{"current_steps": 10, "total_steps": 55, "loss": 0.6769, "lr": 4.6031338320779534e-05, "epoch": 0.17937219730941703, "percentage": 18.18, "elapsed_time": "0:27:56", "remaining_time": "2:05:43", "throughput": 12510.8, "total_tokens": 20971520}
{"current_steps": 11, "total_steps": 55, "loss": 0.6873, "lr": 4.522542485937369e-05, "epoch": 0.19730941704035873, "percentage": 20.0, "elapsed_time": "0:30:42", "remaining_time": "2:02:49", "throughput": 12521.66, "total_tokens": 23068672}
{"current_steps": 12, "total_steps": 55, "loss": 0.6643, "lr": 4.4353540265977064e-05, "epoch": 0.21524663677130046, "percentage": 21.82, "elapsed_time": "0:33:28", "remaining_time": "1:59:55", "throughput": 12532.39, "total_tokens": 25165824}
{"current_steps": 13, "total_steps": 55, "loss": 0.6849, "lr": 4.341852844691012e-05, "epoch": 0.23318385650224216, "percentage": 23.64, "elapsed_time": "0:36:14", "remaining_time": "1:57:05", "throughput": 12536.36, "total_tokens": 27262976}
{"current_steps": 14, "total_steps": 55, "loss": 0.6461, "lr": 4.242343921638234e-05, "epoch": 0.25112107623318386, "percentage": 25.45, "elapsed_time": "0:39:02", "remaining_time": "1:54:18", "throughput": 12536.26, "total_tokens": 29360128}
{"current_steps": 15, "total_steps": 55, "loss": 0.6623, "lr": 4.137151834863213e-05, "epoch": 0.26905829596412556, "percentage": 27.27, "elapsed_time": "0:41:50", "remaining_time": "1:51:33", "throughput": 12532.13, "total_tokens": 31457280}
{"current_steps": 16, "total_steps": 55, "loss": 0.6751, "lr": 4.0266196990885955e-05, "epoch": 0.28699551569506726, "percentage": 29.09, "elapsed_time": "0:44:37", "remaining_time": "1:48:47", "throughput": 12530.23, "total_tokens": 33554432}
{"current_steps": 17, "total_steps": 55, "loss": 0.6472, "lr": 3.911108047166924e-05, "epoch": 0.30493273542600896, "percentage": 30.91, "elapsed_time": "0:47:24", "remaining_time": "1:45:58", "throughput": 12533.91, "total_tokens": 35651584}
{"current_steps": 18, "total_steps": 55, "loss": 0.6728, "lr": 3.790993654097405e-05, "epoch": 0.32286995515695066, "percentage": 32.73, "elapsed_time": "0:50:10", "remaining_time": "1:43:08", "throughput": 12538.59, "total_tokens": 37748736}
{"current_steps": 19, "total_steps": 55, "loss": 0.7017, "lr": 3.6666683080641846e-05, "epoch": 0.34080717488789236, "percentage": 34.55, "elapsed_time": "0:52:56", "remaining_time": "1:40:19", "throughput": 12542.68, "total_tokens": 39845888}
{"current_steps": 20, "total_steps": 55, "loss": 0.6502, "lr": 3.5385375325047166e-05, "epoch": 0.35874439461883406, "percentage": 36.36, "elapsed_time": "0:55:43", "remaining_time": "1:37:30", "throughput": 12545.22, "total_tokens": 41943040}
{"current_steps": 21, "total_steps": 55, "loss": 0.6476, "lr": 3.4070192633766025e-05, "epoch": 0.37668161434977576, "percentage": 38.18, "elapsed_time": "0:58:29", "remaining_time": "1:34:42", "throughput": 12548.26, "total_tokens": 44040192}
{"current_steps": 22, "total_steps": 55, "loss": 0.6411, "lr": 3.272542485937369e-05, "epoch": 0.39461883408071746, "percentage": 40.0, "elapsed_time": "1:01:16", "remaining_time": "1:31:54", "throughput": 12550.81, "total_tokens": 46137344}
{"current_steps": 23, "total_steps": 55, "loss": 0.6428, "lr": 3.135545835483718e-05, "epoch": 0.4125560538116592, "percentage": 41.82, "elapsed_time": "1:04:02", "remaining_time": "1:29:05", "throughput": 12553.59, "total_tokens": 48234496}
{"current_steps": 24, "total_steps": 55, "loss": 0.6661, "lr": 2.996476166614364e-05, "epoch": 0.4304932735426009, "percentage": 43.64, "elapsed_time": "1:06:49", "remaining_time": "1:26:18", "throughput": 12553.66, "total_tokens": 50331648}
{"current_steps": 25, "total_steps": 55, "loss": 0.6378, "lr": 2.8557870956832132e-05, "epoch": 0.4484304932735426, "percentage": 45.45, "elapsed_time": "1:09:35", "remaining_time": "1:23:30", "throughput": 12556.55, "total_tokens": 52428800}
{"current_steps": 26, "total_steps": 55, "loss": 0.6532, "lr": 2.7139375211970996e-05, "epoch": 0.4663677130044843, "percentage": 47.27, "elapsed_time": "1:12:21", "remaining_time": "1:20:42", "throughput": 12559.06, "total_tokens": 54525952}
{"current_steps": 27, "total_steps": 55, "loss": 0.6403, "lr": 2.5713901269842404e-05, "epoch": 0.484304932735426, "percentage": 49.09, "elapsed_time": "1:15:08", "remaining_time": "1:17:55", "throughput": 12558.03, "total_tokens": 56623104}
{"current_steps": 28, "total_steps": 55, "loss": 0.6248, "lr": 2.42860987301576e-05, "epoch": 0.5022421524663677, "percentage": 50.91, "elapsed_time": "1:17:56", "remaining_time": "1:15:09", "throughput": 12555.75, "total_tokens": 58720256}
{"current_steps": 29, "total_steps": 55, "loss": 0.6583, "lr": 2.2860624788029013e-05, "epoch": 0.5201793721973094, "percentage": 52.73, "elapsed_time": "1:20:43", "remaining_time": "1:12:22", "throughput": 12557.32, "total_tokens": 60817408}
{"current_steps": 30, "total_steps": 55, "loss": 0.6579, "lr": 2.1442129043167874e-05, "epoch": 0.5381165919282511, "percentage": 54.55, "elapsed_time": "1:23:29", "remaining_time": "1:09:34", "throughput": 12558.75, "total_tokens": 62914560}
{"current_steps": 31, "total_steps": 55, "loss": 0.6659, "lr": 2.003523833385637e-05, "epoch": 0.5560538116591929, "percentage": 56.36, "elapsed_time": "1:26:16", "remaining_time": "1:06:47", "throughput": 12560.18, "total_tokens": 65011712}
{"current_steps": 32, "total_steps": 55, "loss": 0.6423, "lr": 1.8644541645162834e-05, "epoch": 0.5739910313901345, "percentage": 58.18, "elapsed_time": "1:29:02", "remaining_time": "1:04:00", "throughput": 12560.33, "total_tokens": 67108864}
{"current_steps": 33, "total_steps": 55, "loss": 0.6509, "lr": 1.7274575140626318e-05, "epoch": 0.5919282511210763, "percentage": 60.0, "elapsed_time": "1:31:49", "remaining_time": "1:01:13", "throughput": 12560.22, "total_tokens": 69206016}
{"current_steps": 34, "total_steps": 55, "loss": 0.6551, "lr": 1.5929807366233977e-05, "epoch": 0.6098654708520179, "percentage": 61.82, "elapsed_time": "1:34:37", "remaining_time": "0:58:26", "throughput": 12559.4, "total_tokens": 71303168}
{"current_steps": 35, "total_steps": 55, "loss": 0.6232, "lr": 1.4614624674952842e-05, "epoch": 0.6278026905829597, "percentage": 63.64, "elapsed_time": "1:37:23", "remaining_time": "0:55:39", "throughput": 12560.48, "total_tokens": 73400320}
{"current_steps": 36, "total_steps": 55, "loss": 0.6137, "lr": 1.3333316919358157e-05, "epoch": 0.6457399103139013, "percentage": 65.45, "elapsed_time": "1:40:10", "remaining_time": "0:52:52", "throughput": 12560.55, "total_tokens": 75497472}
{"current_steps": 37, "total_steps": 55, "loss": 0.6426, "lr": 1.2090063459025955e-05, "epoch": 0.6636771300448431, "percentage": 67.27, "elapsed_time": "1:42:57", "remaining_time": "0:50:05", "throughput": 12560.88, "total_tokens": 77594624}
{"current_steps": 38, "total_steps": 55, "loss": 0.6512, "lr": 1.0888919528330777e-05, "epoch": 0.6816143497757847, "percentage": 69.09, "elapsed_time": "1:45:44", "remaining_time": "0:47:18", "throughput": 12560.79, "total_tokens": 79691776}
{"current_steps": 39, "total_steps": 55, "loss": 0.6269, "lr": 9.733803009114045e-06, "epoch": 0.6995515695067265, "percentage": 70.91, "elapsed_time": "1:48:31", "remaining_time": "0:44:31", "throughput": 12561.62, "total_tokens": 81788928}
{"current_steps": 40, "total_steps": 55, "loss": 0.6201, "lr": 8.628481651367876e-06, "epoch": 0.7174887892376681, "percentage": 72.73, "elapsed_time": "1:51:17", "remaining_time": "0:41:43", "throughput": 12563.27, "total_tokens": 83886080}
{"current_steps": 41, "total_steps": 55, "loss": 0.642, "lr": 7.576560783617668e-06, "epoch": 0.7354260089686099, "percentage": 74.55, "elapsed_time": "1:54:03", "remaining_time": "0:38:56", "throughput": 12563.83, "total_tokens": 85983232}
{"current_steps": 42, "total_steps": 55, "loss": 0.648, "lr": 6.5814715530898745e-06, "epoch": 0.7533632286995515, "percentage": 76.36, "elapsed_time": "1:56:50", "remaining_time": "0:36:09", "throughput": 12564.32, "total_tokens": 88080384}
{"current_steps": 43, "total_steps": 55, "loss": 0.6442, "lr": 5.646459734022938e-06, "epoch": 0.7713004484304933, "percentage": 78.18, "elapsed_time": "1:59:37", "remaining_time": "0:33:22", "throughput": 12564.19, "total_tokens": 90177536}
{"current_steps": 44, "total_steps": 55, "loss": 0.6488, "lr": 4.7745751406263165e-06, "epoch": 0.7892376681614349, "percentage": 80.0, "elapsed_time": "2:02:24", "remaining_time": "0:30:36", "throughput": 12563.71, "total_tokens": 92274688}
{"current_steps": 45, "total_steps": 55, "loss": 0.65, "lr": 3.968661679220468e-06, "epoch": 0.8071748878923767, "percentage": 81.82, "elapsed_time": "2:05:11", "remaining_time": "0:27:49", "throughput": 12563.43, "total_tokens": 94371840}
{"current_steps": 46, "total_steps": 55, "loss": 0.6584, "lr": 3.2313480720055745e-06, "epoch": 0.8251121076233184, "percentage": 83.64, "elapsed_time": "2:07:58", "remaining_time": "0:25:02", "throughput": 12563.12, "total_tokens": 96468992}
{"current_steps": 47, "total_steps": 55, "loss": 0.6392, "lr": 2.565039282716045e-06, "epoch": 0.8430493273542601, "percentage": 85.45, "elapsed_time": "2:10:45", "remaining_time": "0:22:15", "throughput": 12562.87, "total_tokens": 98566144}
{"current_steps": 48, "total_steps": 55, "loss": 0.6524, "lr": 1.97190867212875e-06, "epoch": 0.8609865470852018, "percentage": 87.27, "elapsed_time": "2:13:32", "remaining_time": "0:19:28", "throughput": 12562.77, "total_tokens": 100663296}
{"current_steps": 49, "total_steps": 55, "loss": 0.6276, "lr": 1.4538909090118846e-06, "epoch": 0.8789237668161435, "percentage": 89.09, "elapsed_time": "2:16:19", "remaining_time": "0:16:41", "throughput": 12562.47, "total_tokens": 102760448}
{"current_steps": 50, "total_steps": 55, "loss": 0.6282, "lr": 1.0126756596375686e-06, "epoch": 0.8968609865470852, "percentage": 90.91, "elapsed_time": "2:19:07", "remaining_time": "0:13:54", "throughput": 12562.14, "total_tokens": 104857600}
{"current_steps": 51, "total_steps": 55, "loss": 0.6344, "lr": 6.497020764416633e-07, "epoch": 0.9147982062780269, "percentage": 92.73, "elapsed_time": "2:21:54", "remaining_time": "0:11:07", "throughput": 12561.89, "total_tokens": 106954752}
{"current_steps": 52, "total_steps": 55, "loss": 0.6464, "lr": 3.6615410380767544e-07, "epoch": 0.9327354260089686, "percentage": 94.55, "elapsed_time": "2:24:41", "remaining_time": "0:08:20", "throughput": 12561.72, "total_tokens": 109051904}
{"current_steps": 53, "total_steps": 55, "loss": 0.6253, "lr": 1.6295661628624447e-07, "epoch": 0.9506726457399103, "percentage": 96.36, "elapsed_time": "2:27:28", "remaining_time": "0:05:33", "throughput": 12561.53, "total_tokens": 111149056}
{"current_steps": 54, "total_steps": 55, "loss": 0.6375, "lr": 4.07724018466088e-08, "epoch": 0.968609865470852, "percentage": 98.18, "elapsed_time": "2:30:15", "remaining_time": "0:02:46", "throughput": 12561.27, "total_tokens": 113246208}
{"current_steps": 55, "total_steps": 55, "loss": 0.6419, "lr": 0.0, "epoch": 0.9865470852017937, "percentage": 100.0, "elapsed_time": "2:33:02", "remaining_time": "0:00:00", "throughput": 12561.25, "total_tokens": 115343360}
{"current_steps": 55, "total_steps": 55, "epoch": 0.9865470852017937, "percentage": 100.0, "elapsed_time": "2:33:27", "remaining_time": "0:00:00", "throughput": 12527.75, "total_tokens": 115343360}