{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5859, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017067759003242875, "grad_norm": 4.927246022675701, "learning_rate": 4e-05, "loss": 1.0624, "num_tokens": 106946.0, "step": 1 }, { "epoch": 0.0003413551800648575, "grad_norm": 3.048352235606045, "learning_rate": 3.9993172896398704e-05, "loss": 1.0364, "num_tokens": 195642.0, "step": 2 }, { "epoch": 0.0005120327700972862, "grad_norm": 2.234228813740128, "learning_rate": 3.998634579279741e-05, "loss": 0.9699, "num_tokens": 296243.0, "step": 3 }, { "epoch": 0.000682710360129715, "grad_norm": 1.3037893496300843, "learning_rate": 3.997951868919611e-05, "loss": 0.9454, "num_tokens": 405415.0, "step": 4 }, { "epoch": 0.0008533879501621437, "grad_norm": 1.1651162875802463, "learning_rate": 3.997269158559482e-05, "loss": 0.8487, "num_tokens": 495347.0, "step": 5 }, { "epoch": 0.0010240655401945725, "grad_norm": 1.1147829929966337, "learning_rate": 3.996586448199352e-05, "loss": 0.9132, "num_tokens": 588334.0, "step": 6 }, { "epoch": 0.0011947431302270011, "grad_norm": 1.0673615638167613, "learning_rate": 3.9959037378392226e-05, "loss": 0.8355, "num_tokens": 689719.0, "step": 7 }, { "epoch": 0.00136542072025943, "grad_norm": 1.060713406138517, "learning_rate": 3.995221027479092e-05, "loss": 0.8826, "num_tokens": 776675.0, "step": 8 }, { "epoch": 0.0015360983102918587, "grad_norm": 1.0379411159025862, "learning_rate": 3.9945383171189627e-05, "loss": 0.8632, "num_tokens": 852690.0, "step": 9 }, { "epoch": 0.0017067759003242873, "grad_norm": 1.06322243417711, "learning_rate": 3.993855606758833e-05, "loss": 0.9177, "num_tokens": 941448.0, "step": 10 }, { "epoch": 0.0018774534903567162, "grad_norm": 0.9303315782513246, "learning_rate": 3.9931728963987034e-05, "loss": 0.8984, "num_tokens": 1041055.0, "step": 11 }, { "epoch": 0.002048131080389145, "grad_norm": 0.9982996896210765, "learning_rate": 3.9924901860385734e-05, "loss": 0.9014, "num_tokens": 1157013.0, "step": 12 }, { "epoch": 0.002218808670421574, "grad_norm": 0.797165502082853, "learning_rate": 3.9918074756784435e-05, "loss": 0.8393, "num_tokens": 1273374.0, "step": 13 }, { "epoch": 0.0023894862604540022, "grad_norm": 0.826217184562919, "learning_rate": 3.991124765318314e-05, "loss": 0.9264, "num_tokens": 1394287.0, "step": 14 }, { "epoch": 0.002560163850486431, "grad_norm": 0.9091360844201513, "learning_rate": 3.990442054958184e-05, "loss": 0.8342, "num_tokens": 1469440.0, "step": 15 }, { "epoch": 0.00273084144051886, "grad_norm": 0.8403549260768445, "learning_rate": 3.989759344598055e-05, "loss": 0.8535, "num_tokens": 1566286.0, "step": 16 }, { "epoch": 0.0029015190305512885, "grad_norm": 0.7596256216386689, "learning_rate": 3.989076634237925e-05, "loss": 0.8317, "num_tokens": 1659927.0, "step": 17 }, { "epoch": 0.0030721966205837174, "grad_norm": 0.7813454419126862, "learning_rate": 3.988393923877795e-05, "loss": 0.7804, "num_tokens": 1745869.0, "step": 18 }, { "epoch": 0.0032428742106161462, "grad_norm": 0.8137842002351539, "learning_rate": 3.987711213517666e-05, "loss": 0.775, "num_tokens": 1835846.0, "step": 19 }, { "epoch": 0.0034135518006485747, "grad_norm": 0.8056565641218325, "learning_rate": 3.987028503157536e-05, "loss": 0.7516, "num_tokens": 1912401.0, "step": 20 }, { "epoch": 0.0035842293906810036, "grad_norm": 0.9069662951755969, "learning_rate": 3.986345792797406e-05, "loss": 0.785, "num_tokens": 2003446.0, "step": 21 }, { "epoch": 0.0037549069807134325, "grad_norm": 0.7653799056401251, "learning_rate": 3.985663082437276e-05, "loss": 0.8327, "num_tokens": 2110659.0, "step": 22 }, { "epoch": 0.003925584570745861, "grad_norm": 0.756816022288689, "learning_rate": 3.9849803720771466e-05, "loss": 0.7609, "num_tokens": 2184697.0, "step": 23 }, { "epoch": 0.00409626216077829, "grad_norm": 0.745738850707005, "learning_rate": 3.9842976617170166e-05, "loss": 0.8217, "num_tokens": 2286020.0, "step": 24 }, { "epoch": 0.004266939750810718, "grad_norm": 0.7727404391184417, "learning_rate": 3.983614951356887e-05, "loss": 0.7643, "num_tokens": 2372368.0, "step": 25 }, { "epoch": 0.004437617340843148, "grad_norm": 0.9247724452839717, "learning_rate": 3.9829322409967574e-05, "loss": 0.8605, "num_tokens": 2438364.0, "step": 26 }, { "epoch": 0.004608294930875576, "grad_norm": 0.7257810538456507, "learning_rate": 3.982249530636628e-05, "loss": 0.7053, "num_tokens": 2511279.0, "step": 27 }, { "epoch": 0.0047789725209080045, "grad_norm": 0.8044674783173333, "learning_rate": 3.981566820276498e-05, "loss": 0.7294, "num_tokens": 2601295.0, "step": 28 }, { "epoch": 0.004949650110940434, "grad_norm": 0.6872460019225002, "learning_rate": 3.980884109916368e-05, "loss": 0.7687, "num_tokens": 2692058.0, "step": 29 }, { "epoch": 0.005120327700972862, "grad_norm": 0.7555484216601004, "learning_rate": 3.980201399556239e-05, "loss": 0.7828, "num_tokens": 2771157.0, "step": 30 }, { "epoch": 0.005291005291005291, "grad_norm": 0.7640368483439702, "learning_rate": 3.979518689196109e-05, "loss": 0.6756, "num_tokens": 2844059.0, "step": 31 }, { "epoch": 0.00546168288103772, "grad_norm": 0.7715086618293171, "learning_rate": 3.978835978835979e-05, "loss": 0.8123, "num_tokens": 2923493.0, "step": 32 }, { "epoch": 0.0056323604710701485, "grad_norm": 0.761091174042492, "learning_rate": 3.978153268475849e-05, "loss": 0.7255, "num_tokens": 2995263.0, "step": 33 }, { "epoch": 0.005803038061102577, "grad_norm": 0.7837044890766441, "learning_rate": 3.97747055811572e-05, "loss": 0.8633, "num_tokens": 3098116.0, "step": 34 }, { "epoch": 0.005973715651135006, "grad_norm": 0.7374929404358518, "learning_rate": 3.97678784775559e-05, "loss": 0.7765, "num_tokens": 3200843.0, "step": 35 }, { "epoch": 0.006144393241167435, "grad_norm": 0.7118141241703863, "learning_rate": 3.9761051373954604e-05, "loss": 0.8119, "num_tokens": 3300176.0, "step": 36 }, { "epoch": 0.006315070831199863, "grad_norm": 0.8088010123025907, "learning_rate": 3.9754224270353305e-05, "loss": 0.8129, "num_tokens": 3400458.0, "step": 37 }, { "epoch": 0.0064857484212322925, "grad_norm": 0.7055686129637639, "learning_rate": 3.974739716675201e-05, "loss": 0.7729, "num_tokens": 3518889.0, "step": 38 }, { "epoch": 0.006656426011264721, "grad_norm": 0.6931394597489553, "learning_rate": 3.974057006315071e-05, "loss": 0.693, "num_tokens": 3598339.0, "step": 39 }, { "epoch": 0.006827103601297149, "grad_norm": 0.6779583132071767, "learning_rate": 3.973374295954941e-05, "loss": 0.7554, "num_tokens": 3702099.0, "step": 40 }, { "epoch": 0.006997781191329579, "grad_norm": 0.7203237143857879, "learning_rate": 3.972691585594812e-05, "loss": 0.6692, "num_tokens": 3792354.0, "step": 41 }, { "epoch": 0.007168458781362007, "grad_norm": 0.8007313963972836, "learning_rate": 3.972008875234682e-05, "loss": 0.7137, "num_tokens": 3853390.0, "step": 42 }, { "epoch": 0.007339136371394436, "grad_norm": 0.7375648687071122, "learning_rate": 3.971326164874553e-05, "loss": 0.7329, "num_tokens": 3949208.0, "step": 43 }, { "epoch": 0.007509813961426865, "grad_norm": 0.7396283530838207, "learning_rate": 3.970643454514423e-05, "loss": 0.7885, "num_tokens": 4058020.0, "step": 44 }, { "epoch": 0.007680491551459293, "grad_norm": 0.8321220234691021, "learning_rate": 3.969960744154293e-05, "loss": 0.8469, "num_tokens": 4141321.0, "step": 45 }, { "epoch": 0.007851169141491723, "grad_norm": 0.7350589135804083, "learning_rate": 3.969278033794163e-05, "loss": 0.8921, "num_tokens": 4250607.0, "step": 46 }, { "epoch": 0.008021846731524151, "grad_norm": 0.7201791502605341, "learning_rate": 3.9685953234340336e-05, "loss": 0.7519, "num_tokens": 4358693.0, "step": 47 }, { "epoch": 0.00819252432155658, "grad_norm": 0.6822472222413762, "learning_rate": 3.9679126130739036e-05, "loss": 0.8139, "num_tokens": 4472404.0, "step": 48 }, { "epoch": 0.008363201911589008, "grad_norm": 0.7205918708852296, "learning_rate": 3.9672299027137736e-05, "loss": 0.6795, "num_tokens": 4540941.0, "step": 49 }, { "epoch": 0.008533879501621437, "grad_norm": 0.689769600573838, "learning_rate": 3.9665471923536444e-05, "loss": 0.7055, "num_tokens": 4654813.0, "step": 50 }, { "epoch": 0.008704557091653867, "grad_norm": 0.8397726567442072, "learning_rate": 3.9658644819935144e-05, "loss": 0.9061, "num_tokens": 4748387.0, "step": 51 }, { "epoch": 0.008875234681686295, "grad_norm": 0.7299234289093371, "learning_rate": 3.965181771633385e-05, "loss": 0.7132, "num_tokens": 4828488.0, "step": 52 }, { "epoch": 0.009045912271718724, "grad_norm": 0.7971945861085944, "learning_rate": 3.964499061273255e-05, "loss": 0.812, "num_tokens": 4908943.0, "step": 53 }, { "epoch": 0.009216589861751152, "grad_norm": 0.7470664732538292, "learning_rate": 3.963816350913126e-05, "loss": 0.7451, "num_tokens": 4991700.0, "step": 54 }, { "epoch": 0.00938726745178358, "grad_norm": 0.7267148612906924, "learning_rate": 3.963133640552996e-05, "loss": 0.8426, "num_tokens": 5090238.0, "step": 55 }, { "epoch": 0.009557945041816009, "grad_norm": 0.7060878197200826, "learning_rate": 3.962450930192866e-05, "loss": 0.8536, "num_tokens": 5184502.0, "step": 56 }, { "epoch": 0.00972862263184844, "grad_norm": 0.7164828805072689, "learning_rate": 3.961768219832736e-05, "loss": 0.7879, "num_tokens": 5305502.0, "step": 57 }, { "epoch": 0.009899300221880868, "grad_norm": 0.7336781645910154, "learning_rate": 3.961085509472607e-05, "loss": 0.7847, "num_tokens": 5380570.0, "step": 58 }, { "epoch": 0.010069977811913296, "grad_norm": 0.6848179383612278, "learning_rate": 3.960402799112477e-05, "loss": 0.7052, "num_tokens": 5462973.0, "step": 59 }, { "epoch": 0.010240655401945725, "grad_norm": 0.7378105624521154, "learning_rate": 3.959720088752347e-05, "loss": 0.8381, "num_tokens": 5567876.0, "step": 60 }, { "epoch": 0.010411332991978153, "grad_norm": 0.7275572362419059, "learning_rate": 3.9590373783922175e-05, "loss": 0.7242, "num_tokens": 5663667.0, "step": 61 }, { "epoch": 0.010582010582010581, "grad_norm": 0.6738102894577289, "learning_rate": 3.9583546680320875e-05, "loss": 0.8847, "num_tokens": 5789458.0, "step": 62 }, { "epoch": 0.010752688172043012, "grad_norm": 0.7007636635184095, "learning_rate": 3.957671957671958e-05, "loss": 0.7467, "num_tokens": 5872500.0, "step": 63 }, { "epoch": 0.01092336576207544, "grad_norm": 0.6551726547438986, "learning_rate": 3.956989247311828e-05, "loss": 0.7711, "num_tokens": 5982089.0, "step": 64 }, { "epoch": 0.011094043352107869, "grad_norm": 0.6885511285462077, "learning_rate": 3.956306536951698e-05, "loss": 0.688, "num_tokens": 6072778.0, "step": 65 }, { "epoch": 0.011264720942140297, "grad_norm": 0.7097694668409246, "learning_rate": 3.955623826591569e-05, "loss": 0.8276, "num_tokens": 6210764.0, "step": 66 }, { "epoch": 0.011435398532172725, "grad_norm": 0.7143955035794974, "learning_rate": 3.954941116231439e-05, "loss": 0.7693, "num_tokens": 6306727.0, "step": 67 }, { "epoch": 0.011606076122205154, "grad_norm": 0.6695535596327769, "learning_rate": 3.95425840587131e-05, "loss": 0.7821, "num_tokens": 6398381.0, "step": 68 }, { "epoch": 0.011776753712237584, "grad_norm": 0.7686511455049567, "learning_rate": 3.95357569551118e-05, "loss": 0.6947, "num_tokens": 6464689.0, "step": 69 }, { "epoch": 0.011947431302270013, "grad_norm": 0.6799563772555682, "learning_rate": 3.95289298515105e-05, "loss": 0.816, "num_tokens": 6592934.0, "step": 70 }, { "epoch": 0.012118108892302441, "grad_norm": 0.6534270630652051, "learning_rate": 3.95221027479092e-05, "loss": 0.7495, "num_tokens": 6705546.0, "step": 71 }, { "epoch": 0.01228878648233487, "grad_norm": 0.7663017891810427, "learning_rate": 3.9515275644307906e-05, "loss": 0.8666, "num_tokens": 6809773.0, "step": 72 }, { "epoch": 0.012459464072367298, "grad_norm": 0.6589745695367404, "learning_rate": 3.9508448540706606e-05, "loss": 0.7142, "num_tokens": 6916368.0, "step": 73 }, { "epoch": 0.012630141662399726, "grad_norm": 0.6518812664326954, "learning_rate": 3.9501621437105314e-05, "loss": 0.7112, "num_tokens": 7009718.0, "step": 74 }, { "epoch": 0.012800819252432157, "grad_norm": 0.7168443925954251, "learning_rate": 3.9494794333504014e-05, "loss": 0.7763, "num_tokens": 7093304.0, "step": 75 }, { "epoch": 0.012971496842464585, "grad_norm": 0.8518409513538764, "learning_rate": 3.9487967229902714e-05, "loss": 0.7652, "num_tokens": 7172731.0, "step": 76 }, { "epoch": 0.013142174432497013, "grad_norm": 0.6764496851467736, "learning_rate": 3.948114012630142e-05, "loss": 0.7308, "num_tokens": 7269718.0, "step": 77 }, { "epoch": 0.013312852022529442, "grad_norm": 0.6741632570449712, "learning_rate": 3.947431302270012e-05, "loss": 0.8394, "num_tokens": 7384158.0, "step": 78 }, { "epoch": 0.01348352961256187, "grad_norm": 0.6543456516854816, "learning_rate": 3.946748591909883e-05, "loss": 0.6941, "num_tokens": 7474574.0, "step": 79 }, { "epoch": 0.013654207202594299, "grad_norm": 0.6303454367812126, "learning_rate": 3.946065881549753e-05, "loss": 0.7465, "num_tokens": 7577425.0, "step": 80 }, { "epoch": 0.013824884792626729, "grad_norm": 0.7107294639214967, "learning_rate": 3.9453831711896237e-05, "loss": 0.743, "num_tokens": 7649837.0, "step": 81 }, { "epoch": 0.013995562382659157, "grad_norm": 0.7020757619747062, "learning_rate": 3.944700460829493e-05, "loss": 0.7375, "num_tokens": 7730824.0, "step": 82 }, { "epoch": 0.014166239972691586, "grad_norm": 0.7308074943683739, "learning_rate": 3.944017750469364e-05, "loss": 0.8706, "num_tokens": 7834039.0, "step": 83 }, { "epoch": 0.014336917562724014, "grad_norm": 0.7018958240322349, "learning_rate": 3.943335040109234e-05, "loss": 0.6853, "num_tokens": 7934944.0, "step": 84 }, { "epoch": 0.014507595152756443, "grad_norm": 0.697643161098838, "learning_rate": 3.9426523297491045e-05, "loss": 0.7309, "num_tokens": 8035166.0, "step": 85 }, { "epoch": 0.014678272742788871, "grad_norm": 0.6468565003915663, "learning_rate": 3.9419696193889745e-05, "loss": 0.734, "num_tokens": 8139451.0, "step": 86 }, { "epoch": 0.014848950332821301, "grad_norm": 0.7296842711049382, "learning_rate": 3.9412869090288446e-05, "loss": 0.6937, "num_tokens": 8214176.0, "step": 87 }, { "epoch": 0.01501962792285373, "grad_norm": 0.7757639961466596, "learning_rate": 3.940604198668715e-05, "loss": 0.7869, "num_tokens": 8283789.0, "step": 88 }, { "epoch": 0.015190305512886158, "grad_norm": 0.7054345909222848, "learning_rate": 3.939921488308585e-05, "loss": 0.7599, "num_tokens": 8374370.0, "step": 89 }, { "epoch": 0.015360983102918587, "grad_norm": 0.712552810457742, "learning_rate": 3.939238777948456e-05, "loss": 0.6566, "num_tokens": 8457534.0, "step": 90 }, { "epoch": 0.015531660692951015, "grad_norm": 0.7226337413539574, "learning_rate": 3.938556067588326e-05, "loss": 0.8167, "num_tokens": 8560410.0, "step": 91 }, { "epoch": 0.015702338282983445, "grad_norm": 0.6961355791629177, "learning_rate": 3.937873357228196e-05, "loss": 0.7731, "num_tokens": 8657358.0, "step": 92 }, { "epoch": 0.015873015873015872, "grad_norm": 0.7188650431735463, "learning_rate": 3.937190646868067e-05, "loss": 0.7436, "num_tokens": 8758804.0, "step": 93 }, { "epoch": 0.016043693463048302, "grad_norm": 0.7198155622289866, "learning_rate": 3.936507936507937e-05, "loss": 0.7015, "num_tokens": 8830660.0, "step": 94 }, { "epoch": 0.01621437105308073, "grad_norm": 0.7283468599367112, "learning_rate": 3.935825226147807e-05, "loss": 0.8131, "num_tokens": 8926170.0, "step": 95 }, { "epoch": 0.01638504864311316, "grad_norm": 1.7094760002588176, "learning_rate": 3.935142515787677e-05, "loss": 0.7444, "num_tokens": 9025990.0, "step": 96 }, { "epoch": 0.01655572623314559, "grad_norm": 0.710438774268186, "learning_rate": 3.9344598054275476e-05, "loss": 0.7075, "num_tokens": 9124019.0, "step": 97 }, { "epoch": 0.016726403823178016, "grad_norm": 0.6950972504166626, "learning_rate": 3.933777095067418e-05, "loss": 0.7482, "num_tokens": 9211588.0, "step": 98 }, { "epoch": 0.016897081413210446, "grad_norm": 0.7197346812206425, "learning_rate": 3.9330943847072884e-05, "loss": 0.7874, "num_tokens": 9300925.0, "step": 99 }, { "epoch": 0.017067759003242873, "grad_norm": 0.789788312202588, "learning_rate": 3.9324116743471584e-05, "loss": 0.7629, "num_tokens": 9399015.0, "step": 100 }, { "epoch": 0.017238436593275303, "grad_norm": 0.7342924974604631, "learning_rate": 3.931728963987029e-05, "loss": 0.7348, "num_tokens": 9485097.0, "step": 101 }, { "epoch": 0.017409114183307733, "grad_norm": 0.7166646732289269, "learning_rate": 3.931046253626899e-05, "loss": 0.8149, "num_tokens": 9575701.0, "step": 102 }, { "epoch": 0.01757979177334016, "grad_norm": 0.7437774544414028, "learning_rate": 3.930363543266769e-05, "loss": 0.7368, "num_tokens": 9662488.0, "step": 103 }, { "epoch": 0.01775046936337259, "grad_norm": 0.6808010328404168, "learning_rate": 3.92968083290664e-05, "loss": 0.7039, "num_tokens": 9761350.0, "step": 104 }, { "epoch": 0.017921146953405017, "grad_norm": 0.7169724509315818, "learning_rate": 3.92899812254651e-05, "loss": 0.7202, "num_tokens": 9838430.0, "step": 105 }, { "epoch": 0.018091824543437447, "grad_norm": 0.7126977172379187, "learning_rate": 3.928315412186381e-05, "loss": 0.7707, "num_tokens": 9921053.0, "step": 106 }, { "epoch": 0.018262502133469874, "grad_norm": 0.6432027470949758, "learning_rate": 3.92763270182625e-05, "loss": 0.7867, "num_tokens": 10030160.0, "step": 107 }, { "epoch": 0.018433179723502304, "grad_norm": 0.726049307681308, "learning_rate": 3.926949991466121e-05, "loss": 0.712, "num_tokens": 10115182.0, "step": 108 }, { "epoch": 0.018603857313534734, "grad_norm": 0.6402177577648095, "learning_rate": 3.926267281105991e-05, "loss": 0.6827, "num_tokens": 10211924.0, "step": 109 }, { "epoch": 0.01877453490356716, "grad_norm": 0.7152309853940433, "learning_rate": 3.9255845707458615e-05, "loss": 0.6939, "num_tokens": 10290113.0, "step": 110 }, { "epoch": 0.01894521249359959, "grad_norm": 0.7822974515684203, "learning_rate": 3.9249018603857316e-05, "loss": 0.7529, "num_tokens": 10352671.0, "step": 111 }, { "epoch": 0.019115890083632018, "grad_norm": 0.6626356348746878, "learning_rate": 3.924219150025602e-05, "loss": 0.7616, "num_tokens": 10440263.0, "step": 112 }, { "epoch": 0.019286567673664448, "grad_norm": 0.6888370834474341, "learning_rate": 3.923536439665472e-05, "loss": 0.7527, "num_tokens": 10533923.0, "step": 113 }, { "epoch": 0.01945724526369688, "grad_norm": 0.6833325132325303, "learning_rate": 3.9228537293053423e-05, "loss": 0.7188, "num_tokens": 10639389.0, "step": 114 }, { "epoch": 0.019627922853729305, "grad_norm": 0.6415478788112731, "learning_rate": 3.922171018945213e-05, "loss": 0.7446, "num_tokens": 10738893.0, "step": 115 }, { "epoch": 0.019798600443761735, "grad_norm": 0.6272733771819627, "learning_rate": 3.921488308585083e-05, "loss": 0.6737, "num_tokens": 10839793.0, "step": 116 }, { "epoch": 0.019969278033794162, "grad_norm": 0.6729755805869422, "learning_rate": 3.920805598224954e-05, "loss": 0.6776, "num_tokens": 10965549.0, "step": 117 }, { "epoch": 0.020139955623826592, "grad_norm": 0.6715717110894642, "learning_rate": 3.920122887864824e-05, "loss": 0.6907, "num_tokens": 11057779.0, "step": 118 }, { "epoch": 0.02031063321385902, "grad_norm": 0.6743663355343439, "learning_rate": 3.919440177504694e-05, "loss": 0.677, "num_tokens": 11142241.0, "step": 119 }, { "epoch": 0.02048131080389145, "grad_norm": 0.7636216756077997, "learning_rate": 3.918757467144564e-05, "loss": 0.7891, "num_tokens": 11227376.0, "step": 120 }, { "epoch": 0.02065198839392388, "grad_norm": 0.7994741365750552, "learning_rate": 3.9180747567844346e-05, "loss": 0.7041, "num_tokens": 11290240.0, "step": 121 }, { "epoch": 0.020822665983956306, "grad_norm": 0.6883108576026933, "learning_rate": 3.917392046424305e-05, "loss": 0.6549, "num_tokens": 11380542.0, "step": 122 }, { "epoch": 0.020993343573988736, "grad_norm": 0.6697062213556211, "learning_rate": 3.916709336064175e-05, "loss": 0.8257, "num_tokens": 11476991.0, "step": 123 }, { "epoch": 0.021164021164021163, "grad_norm": 0.6889690539813721, "learning_rate": 3.9160266257040454e-05, "loss": 0.7905, "num_tokens": 11554220.0, "step": 124 }, { "epoch": 0.021334698754053593, "grad_norm": 0.6998892555161134, "learning_rate": 3.9153439153439155e-05, "loss": 0.8183, "num_tokens": 11671609.0, "step": 125 }, { "epoch": 0.021505376344086023, "grad_norm": 0.7057675693723754, "learning_rate": 3.914661204983786e-05, "loss": 0.6897, "num_tokens": 11736584.0, "step": 126 }, { "epoch": 0.02167605393411845, "grad_norm": 0.7983389150672903, "learning_rate": 3.913978494623656e-05, "loss": 0.7056, "num_tokens": 11794801.0, "step": 127 }, { "epoch": 0.02184673152415088, "grad_norm": 0.7015293342096629, "learning_rate": 3.913295784263527e-05, "loss": 0.6693, "num_tokens": 11860247.0, "step": 128 }, { "epoch": 0.022017409114183307, "grad_norm": 0.659835217469634, "learning_rate": 3.912613073903397e-05, "loss": 0.7694, "num_tokens": 11948286.0, "step": 129 }, { "epoch": 0.022188086704215737, "grad_norm": 0.7021900827469132, "learning_rate": 3.911930363543267e-05, "loss": 0.7816, "num_tokens": 12045966.0, "step": 130 }, { "epoch": 0.022358764294248164, "grad_norm": 0.597681021149141, "learning_rate": 3.911247653183138e-05, "loss": 0.7412, "num_tokens": 12154680.0, "step": 131 }, { "epoch": 0.022529441884280594, "grad_norm": 0.6689515069328568, "learning_rate": 3.910564942823008e-05, "loss": 0.6829, "num_tokens": 12240611.0, "step": 132 }, { "epoch": 0.022700119474313024, "grad_norm": 0.6427117507301527, "learning_rate": 3.909882232462878e-05, "loss": 0.736, "num_tokens": 12341226.0, "step": 133 }, { "epoch": 0.02287079706434545, "grad_norm": 0.641298842454867, "learning_rate": 3.909199522102748e-05, "loss": 0.7794, "num_tokens": 12470035.0, "step": 134 }, { "epoch": 0.02304147465437788, "grad_norm": 0.6559941578850332, "learning_rate": 3.9085168117426186e-05, "loss": 0.7824, "num_tokens": 12569182.0, "step": 135 }, { "epoch": 0.023212152244410308, "grad_norm": 0.5797879584252981, "learning_rate": 3.9078341013824886e-05, "loss": 0.7486, "num_tokens": 12689260.0, "step": 136 }, { "epoch": 0.023382829834442738, "grad_norm": 0.5785520978942137, "learning_rate": 3.907151391022359e-05, "loss": 0.7169, "num_tokens": 12799814.0, "step": 137 }, { "epoch": 0.023553507424475168, "grad_norm": 0.63145340884011, "learning_rate": 3.9064686806622293e-05, "loss": 0.7159, "num_tokens": 12895531.0, "step": 138 }, { "epoch": 0.023724185014507595, "grad_norm": 0.6718641945416584, "learning_rate": 3.9057859703020994e-05, "loss": 0.7595, "num_tokens": 12970095.0, "step": 139 }, { "epoch": 0.023894862604540025, "grad_norm": 0.6688154953483435, "learning_rate": 3.90510325994197e-05, "loss": 0.6489, "num_tokens": 13047947.0, "step": 140 }, { "epoch": 0.024065540194572452, "grad_norm": 0.6119465046315931, "learning_rate": 3.90442054958184e-05, "loss": 0.6582, "num_tokens": 13131231.0, "step": 141 }, { "epoch": 0.024236217784604882, "grad_norm": 0.6071349966689983, "learning_rate": 3.903737839221711e-05, "loss": 0.6955, "num_tokens": 13226746.0, "step": 142 }, { "epoch": 0.02440689537463731, "grad_norm": 0.707819543546065, "learning_rate": 3.903055128861581e-05, "loss": 0.785, "num_tokens": 13326100.0, "step": 143 }, { "epoch": 0.02457757296466974, "grad_norm": 0.7101140252787286, "learning_rate": 3.902372418501451e-05, "loss": 0.7377, "num_tokens": 13408131.0, "step": 144 }, { "epoch": 0.02474825055470217, "grad_norm": 0.7137763420622705, "learning_rate": 3.901689708141321e-05, "loss": 0.7411, "num_tokens": 13492002.0, "step": 145 }, { "epoch": 0.024918928144734596, "grad_norm": 0.6302117820569665, "learning_rate": 3.901006997781192e-05, "loss": 0.749, "num_tokens": 13608989.0, "step": 146 }, { "epoch": 0.025089605734767026, "grad_norm": 0.6520119814156172, "learning_rate": 3.900324287421062e-05, "loss": 0.7218, "num_tokens": 13693079.0, "step": 147 }, { "epoch": 0.025260283324799453, "grad_norm": 0.721973686477052, "learning_rate": 3.8996415770609324e-05, "loss": 0.7097, "num_tokens": 13759793.0, "step": 148 }, { "epoch": 0.025430960914831883, "grad_norm": 0.6444815617055734, "learning_rate": 3.8989588667008025e-05, "loss": 0.6814, "num_tokens": 13853566.0, "step": 149 }, { "epoch": 0.025601638504864313, "grad_norm": 0.6470144981350449, "learning_rate": 3.8982761563406725e-05, "loss": 0.7195, "num_tokens": 13940952.0, "step": 150 }, { "epoch": 0.02577231609489674, "grad_norm": 0.6482708654305512, "learning_rate": 3.897593445980543e-05, "loss": 0.7509, "num_tokens": 14046620.0, "step": 151 }, { "epoch": 0.02594299368492917, "grad_norm": 0.6709976747086389, "learning_rate": 3.896910735620413e-05, "loss": 0.63, "num_tokens": 14138480.0, "step": 152 }, { "epoch": 0.026113671274961597, "grad_norm": 0.6223641618582216, "learning_rate": 3.896228025260284e-05, "loss": 0.702, "num_tokens": 14245728.0, "step": 153 }, { "epoch": 0.026284348864994027, "grad_norm": 0.6702374688661782, "learning_rate": 3.895545314900154e-05, "loss": 0.6915, "num_tokens": 14368951.0, "step": 154 }, { "epoch": 0.026455026455026454, "grad_norm": 0.7064029455559451, "learning_rate": 3.894862604540025e-05, "loss": 0.8063, "num_tokens": 14447989.0, "step": 155 }, { "epoch": 0.026625704045058884, "grad_norm": 0.6958089994646517, "learning_rate": 3.894179894179894e-05, "loss": 0.7468, "num_tokens": 14531991.0, "step": 156 }, { "epoch": 0.026796381635091314, "grad_norm": 0.660823616166228, "learning_rate": 3.893497183819765e-05, "loss": 0.6994, "num_tokens": 14633378.0, "step": 157 }, { "epoch": 0.02696705922512374, "grad_norm": 0.6632760553080884, "learning_rate": 3.892814473459635e-05, "loss": 0.8085, "num_tokens": 14742735.0, "step": 158 }, { "epoch": 0.02713773681515617, "grad_norm": 0.6707076778792253, "learning_rate": 3.8921317630995056e-05, "loss": 0.704, "num_tokens": 14824038.0, "step": 159 }, { "epoch": 0.027308414405188598, "grad_norm": 0.6780371496938489, "learning_rate": 3.8914490527393756e-05, "loss": 0.7246, "num_tokens": 14912709.0, "step": 160 }, { "epoch": 0.027479091995221028, "grad_norm": 0.6502275180697508, "learning_rate": 3.8907663423792456e-05, "loss": 0.7429, "num_tokens": 15007000.0, "step": 161 }, { "epoch": 0.027649769585253458, "grad_norm": 0.730209499791411, "learning_rate": 3.8900836320191163e-05, "loss": 0.6582, "num_tokens": 15063963.0, "step": 162 }, { "epoch": 0.027820447175285885, "grad_norm": 0.7114305063785422, "learning_rate": 3.8894009216589864e-05, "loss": 0.7225, "num_tokens": 15136962.0, "step": 163 }, { "epoch": 0.027991124765318315, "grad_norm": 0.6053172228852393, "learning_rate": 3.888718211298857e-05, "loss": 0.7763, "num_tokens": 15253506.0, "step": 164 }, { "epoch": 0.02816180235535074, "grad_norm": 0.6351742686250045, "learning_rate": 3.888035500938727e-05, "loss": 0.8324, "num_tokens": 15370189.0, "step": 165 }, { "epoch": 0.028332479945383172, "grad_norm": 0.648084921078821, "learning_rate": 3.887352790578597e-05, "loss": 0.7172, "num_tokens": 15462760.0, "step": 166 }, { "epoch": 0.0285031575354156, "grad_norm": 0.6197071544972387, "learning_rate": 3.886670080218468e-05, "loss": 0.772, "num_tokens": 15566974.0, "step": 167 }, { "epoch": 0.02867383512544803, "grad_norm": 0.637730639108508, "learning_rate": 3.885987369858338e-05, "loss": 0.7895, "num_tokens": 15683081.0, "step": 168 }, { "epoch": 0.02884451271548046, "grad_norm": 0.6378121371213399, "learning_rate": 3.885304659498208e-05, "loss": 0.7316, "num_tokens": 15786689.0, "step": 169 }, { "epoch": 0.029015190305512886, "grad_norm": 0.8736094176914838, "learning_rate": 3.884621949138078e-05, "loss": 0.7443, "num_tokens": 15890450.0, "step": 170 }, { "epoch": 0.029185867895545316, "grad_norm": 0.6316372649344915, "learning_rate": 3.883939238777949e-05, "loss": 0.6948, "num_tokens": 15983760.0, "step": 171 }, { "epoch": 0.029356545485577742, "grad_norm": 0.6362300091662586, "learning_rate": 3.883256528417819e-05, "loss": 0.7181, "num_tokens": 16079744.0, "step": 172 }, { "epoch": 0.029527223075610173, "grad_norm": 0.6445033196461297, "learning_rate": 3.8825738180576895e-05, "loss": 0.7482, "num_tokens": 16180459.0, "step": 173 }, { "epoch": 0.029697900665642603, "grad_norm": 0.6744404233553876, "learning_rate": 3.8818911076975595e-05, "loss": 0.8001, "num_tokens": 16279669.0, "step": 174 }, { "epoch": 0.02986857825567503, "grad_norm": 0.6763245820930079, "learning_rate": 3.88120839733743e-05, "loss": 0.802, "num_tokens": 16388120.0, "step": 175 }, { "epoch": 0.03003925584570746, "grad_norm": 0.6266783074188829, "learning_rate": 3.8805256869773e-05, "loss": 0.6685, "num_tokens": 16472724.0, "step": 176 }, { "epoch": 0.030209933435739886, "grad_norm": 0.6634402306452151, "learning_rate": 3.87984297661717e-05, "loss": 0.6102, "num_tokens": 16544904.0, "step": 177 }, { "epoch": 0.030380611025772317, "grad_norm": 0.6605043556586717, "learning_rate": 3.879160266257041e-05, "loss": 0.7208, "num_tokens": 16626077.0, "step": 178 }, { "epoch": 0.030551288615804743, "grad_norm": 0.6315022793756768, "learning_rate": 3.878477555896911e-05, "loss": 0.6938, "num_tokens": 16703787.0, "step": 179 }, { "epoch": 0.030721966205837174, "grad_norm": 0.6207006201392815, "learning_rate": 3.877794845536782e-05, "loss": 0.7035, "num_tokens": 16799190.0, "step": 180 }, { "epoch": 0.030892643795869604, "grad_norm": 0.6875204836935707, "learning_rate": 3.877112135176651e-05, "loss": 0.7634, "num_tokens": 16876553.0, "step": 181 }, { "epoch": 0.03106332138590203, "grad_norm": 0.7156268844019292, "learning_rate": 3.876429424816522e-05, "loss": 0.7638, "num_tokens": 16944538.0, "step": 182 }, { "epoch": 0.03123399897593446, "grad_norm": 0.5877463398085545, "learning_rate": 3.875746714456392e-05, "loss": 0.7017, "num_tokens": 17037853.0, "step": 183 }, { "epoch": 0.03140467656596689, "grad_norm": 0.6238952839009698, "learning_rate": 3.8750640040962626e-05, "loss": 0.7264, "num_tokens": 17152355.0, "step": 184 }, { "epoch": 0.03157535415599932, "grad_norm": 0.6858243373374306, "learning_rate": 3.8743812937361326e-05, "loss": 0.7088, "num_tokens": 17234595.0, "step": 185 }, { "epoch": 0.031746031746031744, "grad_norm": 0.6572960239702703, "learning_rate": 3.873698583376003e-05, "loss": 0.7093, "num_tokens": 17317415.0, "step": 186 }, { "epoch": 0.03191670933606418, "grad_norm": 0.6328308842872361, "learning_rate": 3.8730158730158734e-05, "loss": 0.7003, "num_tokens": 17413091.0, "step": 187 }, { "epoch": 0.032087386926096605, "grad_norm": 0.5921868197175639, "learning_rate": 3.8723331626557434e-05, "loss": 0.7636, "num_tokens": 17520220.0, "step": 188 }, { "epoch": 0.03225806451612903, "grad_norm": 0.6042507252815604, "learning_rate": 3.871650452295614e-05, "loss": 0.6863, "num_tokens": 17613686.0, "step": 189 }, { "epoch": 0.03242874210616146, "grad_norm": 0.6533037514095091, "learning_rate": 3.870967741935484e-05, "loss": 0.744, "num_tokens": 17717456.0, "step": 190 }, { "epoch": 0.03259941969619389, "grad_norm": 0.726955578017353, "learning_rate": 3.870285031575355e-05, "loss": 0.6877, "num_tokens": 17783454.0, "step": 191 }, { "epoch": 0.03277009728622632, "grad_norm": 0.7614930718095211, "learning_rate": 3.869602321215225e-05, "loss": 0.8384, "num_tokens": 17913168.0, "step": 192 }, { "epoch": 0.032940774876258745, "grad_norm": 0.6862381946151612, "learning_rate": 3.868919610855095e-05, "loss": 0.695, "num_tokens": 17983070.0, "step": 193 }, { "epoch": 0.03311145246629118, "grad_norm": 0.6842782212356128, "learning_rate": 3.868236900494965e-05, "loss": 0.779, "num_tokens": 18081234.0, "step": 194 }, { "epoch": 0.033282130056323606, "grad_norm": 0.689032641168344, "learning_rate": 3.867554190134836e-05, "loss": 0.6845, "num_tokens": 18185061.0, "step": 195 }, { "epoch": 0.03345280764635603, "grad_norm": 0.578829188316049, "learning_rate": 3.866871479774706e-05, "loss": 0.6813, "num_tokens": 18302410.0, "step": 196 }, { "epoch": 0.03362348523638846, "grad_norm": 0.7154253172582636, "learning_rate": 3.866188769414576e-05, "loss": 0.7297, "num_tokens": 18406166.0, "step": 197 }, { "epoch": 0.03379416282642089, "grad_norm": 0.6878518398805505, "learning_rate": 3.8655060590544465e-05, "loss": 0.7107, "num_tokens": 18479968.0, "step": 198 }, { "epoch": 0.03396484041645332, "grad_norm": 0.6513611133794195, "learning_rate": 3.8648233486943165e-05, "loss": 0.6678, "num_tokens": 18566343.0, "step": 199 }, { "epoch": 0.034135518006485746, "grad_norm": 0.6795852143155388, "learning_rate": 3.864140638334187e-05, "loss": 0.607, "num_tokens": 18633535.0, "step": 200 }, { "epoch": 0.03430619559651818, "grad_norm": 0.6588749909750174, "learning_rate": 3.863457927974057e-05, "loss": 0.7402, "num_tokens": 18721880.0, "step": 201 }, { "epoch": 0.034476873186550606, "grad_norm": 0.6577964593126544, "learning_rate": 3.862775217613928e-05, "loss": 0.7538, "num_tokens": 18813666.0, "step": 202 }, { "epoch": 0.03464755077658303, "grad_norm": 0.6450724558535014, "learning_rate": 3.862092507253798e-05, "loss": 0.6576, "num_tokens": 18919744.0, "step": 203 }, { "epoch": 0.03481822836661547, "grad_norm": 0.5807406133798961, "learning_rate": 3.861409796893668e-05, "loss": 0.6417, "num_tokens": 19038949.0, "step": 204 }, { "epoch": 0.034988905956647894, "grad_norm": 0.6984421285960188, "learning_rate": 3.860727086533539e-05, "loss": 0.8612, "num_tokens": 19136460.0, "step": 205 }, { "epoch": 0.03515958354668032, "grad_norm": 0.7131514515999404, "learning_rate": 3.860044376173409e-05, "loss": 0.7844, "num_tokens": 19226334.0, "step": 206 }, { "epoch": 0.03533026113671275, "grad_norm": 0.6570968467641974, "learning_rate": 3.859361665813279e-05, "loss": 0.658, "num_tokens": 19307207.0, "step": 207 }, { "epoch": 0.03550093872674518, "grad_norm": 0.7083973990928504, "learning_rate": 3.858678955453149e-05, "loss": 0.7186, "num_tokens": 19377238.0, "step": 208 }, { "epoch": 0.03567161631677761, "grad_norm": 0.6121794210125173, "learning_rate": 3.8579962450930196e-05, "loss": 0.7109, "num_tokens": 19486090.0, "step": 209 }, { "epoch": 0.035842293906810034, "grad_norm": 0.5895451855219302, "learning_rate": 3.8573135347328897e-05, "loss": 0.6954, "num_tokens": 19606784.0, "step": 210 }, { "epoch": 0.03601297149684247, "grad_norm": 0.622315316048051, "learning_rate": 3.8566308243727604e-05, "loss": 0.7422, "num_tokens": 19703675.0, "step": 211 }, { "epoch": 0.036183649086874894, "grad_norm": 0.714850231197574, "learning_rate": 3.8559481140126304e-05, "loss": 0.7167, "num_tokens": 19792382.0, "step": 212 }, { "epoch": 0.03635432667690732, "grad_norm": 0.6372197127312111, "learning_rate": 3.855265403652501e-05, "loss": 0.7109, "num_tokens": 19887823.0, "step": 213 }, { "epoch": 0.03652500426693975, "grad_norm": 0.6648650930455394, "learning_rate": 3.854582693292371e-05, "loss": 0.6563, "num_tokens": 19960808.0, "step": 214 }, { "epoch": 0.03669568185697218, "grad_norm": 0.5822412068870859, "learning_rate": 3.853899982932241e-05, "loss": 0.6669, "num_tokens": 20074750.0, "step": 215 }, { "epoch": 0.03686635944700461, "grad_norm": 0.6449844810308072, "learning_rate": 3.853217272572112e-05, "loss": 0.6678, "num_tokens": 20158166.0, "step": 216 }, { "epoch": 0.037037037037037035, "grad_norm": 0.7442869270444779, "learning_rate": 3.852534562211982e-05, "loss": 0.8017, "num_tokens": 20247917.0, "step": 217 }, { "epoch": 0.03720771462706947, "grad_norm": 0.6215022534031864, "learning_rate": 3.851851851851852e-05, "loss": 0.6049, "num_tokens": 20332387.0, "step": 218 }, { "epoch": 0.037378392217101895, "grad_norm": 0.6734722286985496, "learning_rate": 3.851169141491722e-05, "loss": 0.6136, "num_tokens": 20401060.0, "step": 219 }, { "epoch": 0.03754906980713432, "grad_norm": 0.6240104562725489, "learning_rate": 3.850486431131593e-05, "loss": 0.6873, "num_tokens": 20493176.0, "step": 220 }, { "epoch": 0.03771974739716675, "grad_norm": 0.6222008441982153, "learning_rate": 3.849803720771463e-05, "loss": 0.8038, "num_tokens": 20600517.0, "step": 221 }, { "epoch": 0.03789042498719918, "grad_norm": 0.8500778189172165, "learning_rate": 3.8491210104113335e-05, "loss": 0.7052, "num_tokens": 20664185.0, "step": 222 }, { "epoch": 0.03806110257723161, "grad_norm": 0.590603893198743, "learning_rate": 3.8484383000512035e-05, "loss": 0.6399, "num_tokens": 20762512.0, "step": 223 }, { "epoch": 0.038231780167264036, "grad_norm": 0.5892461323309548, "learning_rate": 3.8477555896910736e-05, "loss": 0.7103, "num_tokens": 20858697.0, "step": 224 }, { "epoch": 0.03840245775729647, "grad_norm": 0.6487089789063719, "learning_rate": 3.847072879330944e-05, "loss": 0.7493, "num_tokens": 20946163.0, "step": 225 }, { "epoch": 0.038573135347328896, "grad_norm": 0.6969749327857867, "learning_rate": 3.846390168970814e-05, "loss": 0.7399, "num_tokens": 21022050.0, "step": 226 }, { "epoch": 0.03874381293736132, "grad_norm": 0.6755166218497488, "learning_rate": 3.845707458610685e-05, "loss": 0.7194, "num_tokens": 21088689.0, "step": 227 }, { "epoch": 0.03891449052739376, "grad_norm": 0.6609915257851098, "learning_rate": 3.845024748250555e-05, "loss": 0.7361, "num_tokens": 21179593.0, "step": 228 }, { "epoch": 0.03908516811742618, "grad_norm": 0.6387367390293444, "learning_rate": 3.844342037890426e-05, "loss": 0.7075, "num_tokens": 21283981.0, "step": 229 }, { "epoch": 0.03925584570745861, "grad_norm": 0.6331325617527113, "learning_rate": 3.843659327530296e-05, "loss": 0.7143, "num_tokens": 21384169.0, "step": 230 }, { "epoch": 0.03942652329749104, "grad_norm": 0.6210236461894455, "learning_rate": 3.842976617170166e-05, "loss": 0.6492, "num_tokens": 21476891.0, "step": 231 }, { "epoch": 0.03959720088752347, "grad_norm": 0.6114145772564389, "learning_rate": 3.842293906810036e-05, "loss": 0.7085, "num_tokens": 21576805.0, "step": 232 }, { "epoch": 0.0397678784775559, "grad_norm": 0.7269263405346674, "learning_rate": 3.8416111964499066e-05, "loss": 0.761, "num_tokens": 21657085.0, "step": 233 }, { "epoch": 0.039938556067588324, "grad_norm": 0.6820360944752177, "learning_rate": 3.8409284860897767e-05, "loss": 0.8129, "num_tokens": 21759879.0, "step": 234 }, { "epoch": 0.04010923365762076, "grad_norm": 0.6088167202958221, "learning_rate": 3.840245775729647e-05, "loss": 0.7167, "num_tokens": 21854206.0, "step": 235 }, { "epoch": 0.040279911247653184, "grad_norm": 0.715618163345676, "learning_rate": 3.8395630653695174e-05, "loss": 0.8192, "num_tokens": 21943842.0, "step": 236 }, { "epoch": 0.04045058883768561, "grad_norm": 0.5716060122844864, "learning_rate": 3.8388803550093874e-05, "loss": 0.6185, "num_tokens": 22031117.0, "step": 237 }, { "epoch": 0.04062126642771804, "grad_norm": 0.5886556672114311, "learning_rate": 3.838197644649258e-05, "loss": 0.6897, "num_tokens": 22136506.0, "step": 238 }, { "epoch": 0.04079194401775047, "grad_norm": 0.620802577880916, "learning_rate": 3.837514934289128e-05, "loss": 0.6853, "num_tokens": 22223545.0, "step": 239 }, { "epoch": 0.0409626216077829, "grad_norm": 0.6952570223287441, "learning_rate": 3.836832223928998e-05, "loss": 0.7166, "num_tokens": 22302727.0, "step": 240 }, { "epoch": 0.041133299197815325, "grad_norm": 0.5964784433761416, "learning_rate": 3.836149513568869e-05, "loss": 0.756, "num_tokens": 22410338.0, "step": 241 }, { "epoch": 0.04130397678784776, "grad_norm": 0.5745001434209539, "learning_rate": 3.835466803208739e-05, "loss": 0.6975, "num_tokens": 22520736.0, "step": 242 }, { "epoch": 0.041474654377880185, "grad_norm": 0.684094979118212, "learning_rate": 3.834784092848609e-05, "loss": 0.7132, "num_tokens": 22623372.0, "step": 243 }, { "epoch": 0.04164533196791261, "grad_norm": 0.6525461038791344, "learning_rate": 3.83410138248848e-05, "loss": 0.6438, "num_tokens": 22717206.0, "step": 244 }, { "epoch": 0.04181600955794504, "grad_norm": 0.6349344841255985, "learning_rate": 3.83341867212835e-05, "loss": 0.7239, "num_tokens": 22795800.0, "step": 245 }, { "epoch": 0.04198668714797747, "grad_norm": 0.6760001681040225, "learning_rate": 3.83273596176822e-05, "loss": 0.7082, "num_tokens": 22889168.0, "step": 246 }, { "epoch": 0.0421573647380099, "grad_norm": 0.6525881334988204, "learning_rate": 3.8320532514080905e-05, "loss": 0.6191, "num_tokens": 22958499.0, "step": 247 }, { "epoch": 0.042328042328042326, "grad_norm": 0.6995991558863518, "learning_rate": 3.8313705410479606e-05, "loss": 0.7662, "num_tokens": 23032072.0, "step": 248 }, { "epoch": 0.04249871991807476, "grad_norm": 0.6222321578658456, "learning_rate": 3.830687830687831e-05, "loss": 0.7402, "num_tokens": 23150064.0, "step": 249 }, { "epoch": 0.042669397508107186, "grad_norm": 0.7883703436400517, "learning_rate": 3.830005120327701e-05, "loss": 0.7649, "num_tokens": 23238540.0, "step": 250 }, { "epoch": 0.04284007509813961, "grad_norm": 0.6108075570979011, "learning_rate": 3.8293224099675714e-05, "loss": 0.7755, "num_tokens": 23349352.0, "step": 251 }, { "epoch": 0.043010752688172046, "grad_norm": 0.5629583837106155, "learning_rate": 3.828639699607442e-05, "loss": 0.7611, "num_tokens": 23487435.0, "step": 252 }, { "epoch": 0.04318143027820447, "grad_norm": 0.7766316251312405, "learning_rate": 3.827956989247312e-05, "loss": 0.7377, "num_tokens": 23555548.0, "step": 253 }, { "epoch": 0.0433521078682369, "grad_norm": 0.688981917011655, "learning_rate": 3.827274278887183e-05, "loss": 0.8426, "num_tokens": 23651358.0, "step": 254 }, { "epoch": 0.04352278545826933, "grad_norm": 0.6003815064076454, "learning_rate": 3.826591568527052e-05, "loss": 0.6793, "num_tokens": 23751596.0, "step": 255 }, { "epoch": 0.04369346304830176, "grad_norm": 0.6082933288781598, "learning_rate": 3.825908858166923e-05, "loss": 0.7241, "num_tokens": 23864315.0, "step": 256 }, { "epoch": 0.04386414063833419, "grad_norm": 0.6035536001130845, "learning_rate": 3.825226147806793e-05, "loss": 0.7836, "num_tokens": 23983063.0, "step": 257 }, { "epoch": 0.044034818228366614, "grad_norm": 0.6964374777788611, "learning_rate": 3.8245434374466637e-05, "loss": 0.7099, "num_tokens": 24074922.0, "step": 258 }, { "epoch": 0.04420549581839905, "grad_norm": 0.6194280084542265, "learning_rate": 3.823860727086534e-05, "loss": 0.7005, "num_tokens": 24178724.0, "step": 259 }, { "epoch": 0.044376173408431474, "grad_norm": 0.6941184117633363, "learning_rate": 3.8231780167264044e-05, "loss": 0.6211, "num_tokens": 24230818.0, "step": 260 }, { "epoch": 0.0445468509984639, "grad_norm": 0.636335070564167, "learning_rate": 3.8224953063662744e-05, "loss": 0.7341, "num_tokens": 24335908.0, "step": 261 }, { "epoch": 0.04471752858849633, "grad_norm": 0.5880543296634169, "learning_rate": 3.8218125960061445e-05, "loss": 0.674, "num_tokens": 24437899.0, "step": 262 }, { "epoch": 0.04488820617852876, "grad_norm": 0.6455051656522064, "learning_rate": 3.821129885646015e-05, "loss": 0.7503, "num_tokens": 24524874.0, "step": 263 }, { "epoch": 0.04505888376856119, "grad_norm": 0.6307888036065953, "learning_rate": 3.820447175285885e-05, "loss": 0.6867, "num_tokens": 24624753.0, "step": 264 }, { "epoch": 0.045229561358593615, "grad_norm": 0.5626945875143103, "learning_rate": 3.819764464925756e-05, "loss": 0.7063, "num_tokens": 24744237.0, "step": 265 }, { "epoch": 0.04540023894862605, "grad_norm": 0.6297842303859197, "learning_rate": 3.819081754565626e-05, "loss": 0.6838, "num_tokens": 24827242.0, "step": 266 }, { "epoch": 0.045570916538658475, "grad_norm": 0.57803160300511, "learning_rate": 3.818399044205496e-05, "loss": 0.6907, "num_tokens": 24923264.0, "step": 267 }, { "epoch": 0.0457415941286909, "grad_norm": 0.57561982584915, "learning_rate": 3.817716333845366e-05, "loss": 0.6745, "num_tokens": 25016783.0, "step": 268 }, { "epoch": 0.04591227171872333, "grad_norm": 0.5898647618667103, "learning_rate": 3.817033623485237e-05, "loss": 0.7535, "num_tokens": 25125599.0, "step": 269 }, { "epoch": 0.04608294930875576, "grad_norm": 0.5617337295246779, "learning_rate": 3.816350913125107e-05, "loss": 0.722, "num_tokens": 25233826.0, "step": 270 }, { "epoch": 0.04625362689878819, "grad_norm": 0.6567101200269726, "learning_rate": 3.815668202764977e-05, "loss": 0.6623, "num_tokens": 25311285.0, "step": 271 }, { "epoch": 0.046424304488820615, "grad_norm": 0.5736134523614869, "learning_rate": 3.8149854924048476e-05, "loss": 0.7753, "num_tokens": 25439181.0, "step": 272 }, { "epoch": 0.04659498207885305, "grad_norm": 0.5735245682460768, "learning_rate": 3.8143027820447176e-05, "loss": 0.7254, "num_tokens": 25555197.0, "step": 273 }, { "epoch": 0.046765659668885476, "grad_norm": 0.5937705901912523, "learning_rate": 3.813620071684588e-05, "loss": 0.7542, "num_tokens": 25684504.0, "step": 274 }, { "epoch": 0.0469363372589179, "grad_norm": 0.610793352589693, "learning_rate": 3.8129373613244584e-05, "loss": 0.6858, "num_tokens": 25773466.0, "step": 275 }, { "epoch": 0.047107014848950336, "grad_norm": 0.6209349900879957, "learning_rate": 3.812254650964329e-05, "loss": 0.6717, "num_tokens": 25859770.0, "step": 276 }, { "epoch": 0.04727769243898276, "grad_norm": 0.618560937061168, "learning_rate": 3.811571940604199e-05, "loss": 0.6747, "num_tokens": 25945581.0, "step": 277 }, { "epoch": 0.04744837002901519, "grad_norm": 0.5893492194310449, "learning_rate": 3.810889230244069e-05, "loss": 0.7323, "num_tokens": 26052467.0, "step": 278 }, { "epoch": 0.047619047619047616, "grad_norm": 0.6103763777874639, "learning_rate": 3.81020651988394e-05, "loss": 0.6923, "num_tokens": 26159719.0, "step": 279 }, { "epoch": 0.04778972520908005, "grad_norm": 0.7591735604873373, "learning_rate": 3.80952380952381e-05, "loss": 0.7395, "num_tokens": 26237527.0, "step": 280 }, { "epoch": 0.04796040279911248, "grad_norm": 0.6214375798546801, "learning_rate": 3.80884109916368e-05, "loss": 0.6459, "num_tokens": 26318069.0, "step": 281 }, { "epoch": 0.048131080389144903, "grad_norm": 0.6347159899200633, "learning_rate": 3.80815838880355e-05, "loss": 0.6436, "num_tokens": 26385961.0, "step": 282 }, { "epoch": 0.04830175797917734, "grad_norm": 0.6059597111647619, "learning_rate": 3.807475678443421e-05, "loss": 0.676, "num_tokens": 26480913.0, "step": 283 }, { "epoch": 0.048472435569209764, "grad_norm": 0.5379958560062462, "learning_rate": 3.806792968083291e-05, "loss": 0.655, "num_tokens": 26590390.0, "step": 284 }, { "epoch": 0.04864311315924219, "grad_norm": 0.6063138258324356, "learning_rate": 3.8061102577231614e-05, "loss": 0.6646, "num_tokens": 26684363.0, "step": 285 }, { "epoch": 0.04881379074927462, "grad_norm": 0.6113104295984738, "learning_rate": 3.8054275473630315e-05, "loss": 0.6162, "num_tokens": 26762027.0, "step": 286 }, { "epoch": 0.04898446833930705, "grad_norm": 0.590332168720213, "learning_rate": 3.804744837002902e-05, "loss": 0.6839, "num_tokens": 26848967.0, "step": 287 }, { "epoch": 0.04915514592933948, "grad_norm": 0.6497749731867216, "learning_rate": 3.804062126642772e-05, "loss": 0.7286, "num_tokens": 26932063.0, "step": 288 }, { "epoch": 0.049325823519371904, "grad_norm": 0.6389317629167284, "learning_rate": 3.803379416282642e-05, "loss": 0.6919, "num_tokens": 27020798.0, "step": 289 }, { "epoch": 0.04949650110940434, "grad_norm": 0.6849937248453838, "learning_rate": 3.802696705922513e-05, "loss": 0.7606, "num_tokens": 27098494.0, "step": 290 }, { "epoch": 0.049667178699436765, "grad_norm": 0.5898908982039505, "learning_rate": 3.802013995562383e-05, "loss": 0.6489, "num_tokens": 27208445.0, "step": 291 }, { "epoch": 0.04983785628946919, "grad_norm": 0.6912780578303257, "learning_rate": 3.801331285202254e-05, "loss": 0.6651, "num_tokens": 27284892.0, "step": 292 }, { "epoch": 0.05000853387950162, "grad_norm": 0.6263191348893974, "learning_rate": 3.800648574842123e-05, "loss": 0.6532, "num_tokens": 27380275.0, "step": 293 }, { "epoch": 0.05017921146953405, "grad_norm": 0.6175922576658898, "learning_rate": 3.799965864481994e-05, "loss": 0.6085, "num_tokens": 27467130.0, "step": 294 }, { "epoch": 0.05034988905956648, "grad_norm": 0.628784904960062, "learning_rate": 3.799283154121864e-05, "loss": 0.6951, "num_tokens": 27564913.0, "step": 295 }, { "epoch": 0.050520566649598905, "grad_norm": 0.7112834864156032, "learning_rate": 3.7986004437617346e-05, "loss": 0.708, "num_tokens": 27641317.0, "step": 296 }, { "epoch": 0.05069124423963134, "grad_norm": 0.6080603299975411, "learning_rate": 3.7979177334016046e-05, "loss": 0.7182, "num_tokens": 27749631.0, "step": 297 }, { "epoch": 0.050861921829663766, "grad_norm": 0.6359672161568899, "learning_rate": 3.7972350230414746e-05, "loss": 0.7225, "num_tokens": 27851888.0, "step": 298 }, { "epoch": 0.05103259941969619, "grad_norm": 0.6243897079554416, "learning_rate": 3.7965523126813454e-05, "loss": 0.6366, "num_tokens": 27939513.0, "step": 299 }, { "epoch": 0.051203277009728626, "grad_norm": 0.6336824410583523, "learning_rate": 3.7958696023212154e-05, "loss": 0.6492, "num_tokens": 28014087.0, "step": 300 }, { "epoch": 0.05137395459976105, "grad_norm": 0.6836687269582991, "learning_rate": 3.795186891961086e-05, "loss": 0.7505, "num_tokens": 28106455.0, "step": 301 }, { "epoch": 0.05154463218979348, "grad_norm": 0.58897601103485, "learning_rate": 3.794504181600956e-05, "loss": 0.6842, "num_tokens": 28228968.0, "step": 302 }, { "epoch": 0.051715309779825906, "grad_norm": 0.5686816880536397, "learning_rate": 3.793821471240827e-05, "loss": 0.6842, "num_tokens": 28336086.0, "step": 303 }, { "epoch": 0.05188598736985834, "grad_norm": 0.6351971025260795, "learning_rate": 3.793138760880697e-05, "loss": 0.6816, "num_tokens": 28418632.0, "step": 304 }, { "epoch": 0.05205666495989077, "grad_norm": 0.6692197003414279, "learning_rate": 3.792456050520567e-05, "loss": 0.7205, "num_tokens": 28525830.0, "step": 305 }, { "epoch": 0.05222734254992319, "grad_norm": 0.5915651352988014, "learning_rate": 3.791773340160437e-05, "loss": 0.6356, "num_tokens": 28632684.0, "step": 306 }, { "epoch": 0.05239802013995563, "grad_norm": 0.6430012226562288, "learning_rate": 3.791090629800308e-05, "loss": 0.6208, "num_tokens": 28722531.0, "step": 307 }, { "epoch": 0.052568697729988054, "grad_norm": 0.6148858177377022, "learning_rate": 3.790407919440178e-05, "loss": 0.7157, "num_tokens": 28816925.0, "step": 308 }, { "epoch": 0.05273937532002048, "grad_norm": 0.5532109935484513, "learning_rate": 3.789725209080048e-05, "loss": 0.6494, "num_tokens": 28939697.0, "step": 309 }, { "epoch": 0.05291005291005291, "grad_norm": 0.6252304951482661, "learning_rate": 3.7890424987199185e-05, "loss": 0.8028, "num_tokens": 29043697.0, "step": 310 }, { "epoch": 0.05308073050008534, "grad_norm": 0.6396855593769076, "learning_rate": 3.7883597883597885e-05, "loss": 0.6816, "num_tokens": 29133084.0, "step": 311 }, { "epoch": 0.05325140809011777, "grad_norm": 0.5486214048846672, "learning_rate": 3.787677077999659e-05, "loss": 0.6813, "num_tokens": 29241495.0, "step": 312 }, { "epoch": 0.053422085680150194, "grad_norm": 0.6346625740127403, "learning_rate": 3.786994367639529e-05, "loss": 0.6512, "num_tokens": 29327619.0, "step": 313 }, { "epoch": 0.05359276327018263, "grad_norm": 0.6518780783411734, "learning_rate": 3.786311657279399e-05, "loss": 0.8498, "num_tokens": 29443552.0, "step": 314 }, { "epoch": 0.053763440860215055, "grad_norm": 0.6771060215630701, "learning_rate": 3.78562894691927e-05, "loss": 0.6308, "num_tokens": 29517036.0, "step": 315 }, { "epoch": 0.05393411845024748, "grad_norm": 0.655692053255562, "learning_rate": 3.78494623655914e-05, "loss": 0.7358, "num_tokens": 29591582.0, "step": 316 }, { "epoch": 0.05410479604027991, "grad_norm": 0.6529142599954677, "learning_rate": 3.78426352619901e-05, "loss": 0.7736, "num_tokens": 29702915.0, "step": 317 }, { "epoch": 0.05427547363031234, "grad_norm": 0.5849937177045179, "learning_rate": 3.783580815838881e-05, "loss": 0.6948, "num_tokens": 29803767.0, "step": 318 }, { "epoch": 0.05444615122034477, "grad_norm": 0.6470684234957662, "learning_rate": 3.782898105478751e-05, "loss": 0.6585, "num_tokens": 29880489.0, "step": 319 }, { "epoch": 0.054616828810377195, "grad_norm": 0.5871947799779641, "learning_rate": 3.782215395118621e-05, "loss": 0.6613, "num_tokens": 29966807.0, "step": 320 }, { "epoch": 0.05478750640040963, "grad_norm": 0.6748519426257876, "learning_rate": 3.7815326847584916e-05, "loss": 0.6879, "num_tokens": 30034718.0, "step": 321 }, { "epoch": 0.054958183990442055, "grad_norm": 0.5726725282393017, "learning_rate": 3.7808499743983616e-05, "loss": 0.7544, "num_tokens": 30160195.0, "step": 322 }, { "epoch": 0.05512886158047448, "grad_norm": 0.6397946190359531, "learning_rate": 3.7801672640382324e-05, "loss": 0.7291, "num_tokens": 30262680.0, "step": 323 }, { "epoch": 0.055299539170506916, "grad_norm": 0.6130145787681108, "learning_rate": 3.7794845536781024e-05, "loss": 0.6148, "num_tokens": 30354916.0, "step": 324 }, { "epoch": 0.05547021676053934, "grad_norm": 0.5974031396073681, "learning_rate": 3.7788018433179724e-05, "loss": 0.6979, "num_tokens": 30448414.0, "step": 325 }, { "epoch": 0.05564089435057177, "grad_norm": 0.5976981697259844, "learning_rate": 3.778119132957843e-05, "loss": 0.6524, "num_tokens": 30526628.0, "step": 326 }, { "epoch": 0.055811571940604196, "grad_norm": 0.6304826988991508, "learning_rate": 3.777436422597713e-05, "loss": 0.6256, "num_tokens": 30624545.0, "step": 327 }, { "epoch": 0.05598224953063663, "grad_norm": 0.6813711287723402, "learning_rate": 3.776753712237584e-05, "loss": 0.7305, "num_tokens": 30716230.0, "step": 328 }, { "epoch": 0.056152927120669056, "grad_norm": 0.563907626488399, "learning_rate": 3.776071001877454e-05, "loss": 0.642, "num_tokens": 30816378.0, "step": 329 }, { "epoch": 0.05632360471070148, "grad_norm": 0.6031698672329959, "learning_rate": 3.775388291517324e-05, "loss": 0.6443, "num_tokens": 30906312.0, "step": 330 }, { "epoch": 0.05649428230073392, "grad_norm": 0.5755762702383291, "learning_rate": 3.774705581157194e-05, "loss": 0.6443, "num_tokens": 31014384.0, "step": 331 }, { "epoch": 0.056664959890766343, "grad_norm": 0.6595818766113125, "learning_rate": 3.774022870797065e-05, "loss": 0.5974, "num_tokens": 31081340.0, "step": 332 }, { "epoch": 0.05683563748079877, "grad_norm": 0.5897464323445675, "learning_rate": 3.773340160436935e-05, "loss": 0.7197, "num_tokens": 31194102.0, "step": 333 }, { "epoch": 0.0570063150708312, "grad_norm": 0.7092730698637518, "learning_rate": 3.7726574500768055e-05, "loss": 0.7674, "num_tokens": 31273069.0, "step": 334 }, { "epoch": 0.05717699266086363, "grad_norm": 0.5633646092664663, "learning_rate": 3.7719747397166755e-05, "loss": 0.7509, "num_tokens": 31395089.0, "step": 335 }, { "epoch": 0.05734767025089606, "grad_norm": 0.6596642203984346, "learning_rate": 3.7712920293565456e-05, "loss": 0.7441, "num_tokens": 31471097.0, "step": 336 }, { "epoch": 0.057518347840928484, "grad_norm": 0.618376222573911, "learning_rate": 3.770609318996416e-05, "loss": 0.7575, "num_tokens": 31578871.0, "step": 337 }, { "epoch": 0.05768902543096092, "grad_norm": 0.6655296451467967, "learning_rate": 3.769926608636286e-05, "loss": 0.7395, "num_tokens": 31663154.0, "step": 338 }, { "epoch": 0.057859703020993344, "grad_norm": 0.5654197679659851, "learning_rate": 3.769243898276157e-05, "loss": 0.6801, "num_tokens": 31752713.0, "step": 339 }, { "epoch": 0.05803038061102577, "grad_norm": 0.557726729457756, "learning_rate": 3.768561187916027e-05, "loss": 0.7357, "num_tokens": 31870373.0, "step": 340 }, { "epoch": 0.0582010582010582, "grad_norm": 0.6511869748115202, "learning_rate": 3.767878477555897e-05, "loss": 0.6266, "num_tokens": 31942321.0, "step": 341 }, { "epoch": 0.05837173579109063, "grad_norm": 0.6126139390912213, "learning_rate": 3.767195767195767e-05, "loss": 0.6416, "num_tokens": 32020304.0, "step": 342 }, { "epoch": 0.05854241338112306, "grad_norm": 0.6207906938465051, "learning_rate": 3.766513056835638e-05, "loss": 0.8902, "num_tokens": 32147859.0, "step": 343 }, { "epoch": 0.058713090971155485, "grad_norm": 0.5839115586986341, "learning_rate": 3.765830346475508e-05, "loss": 0.6709, "num_tokens": 32255454.0, "step": 344 }, { "epoch": 0.05888376856118792, "grad_norm": 0.6117749194430162, "learning_rate": 3.765147636115378e-05, "loss": 0.6951, "num_tokens": 32347959.0, "step": 345 }, { "epoch": 0.059054446151220345, "grad_norm": 0.7899599700946952, "learning_rate": 3.7644649257552486e-05, "loss": 0.7121, "num_tokens": 32434993.0, "step": 346 }, { "epoch": 0.05922512374125277, "grad_norm": 0.696110797268787, "learning_rate": 3.763782215395119e-05, "loss": 0.7693, "num_tokens": 32515984.0, "step": 347 }, { "epoch": 0.059395801331285206, "grad_norm": 0.6020602800551419, "learning_rate": 3.7630995050349894e-05, "loss": 0.6211, "num_tokens": 32601492.0, "step": 348 }, { "epoch": 0.05956647892131763, "grad_norm": 0.6354827029858855, "learning_rate": 3.7624167946748594e-05, "loss": 0.6755, "num_tokens": 32679240.0, "step": 349 }, { "epoch": 0.05973715651135006, "grad_norm": 0.630934904500074, "learning_rate": 3.76173408431473e-05, "loss": 0.646, "num_tokens": 32761575.0, "step": 350 }, { "epoch": 0.059907834101382486, "grad_norm": 0.6980156017972984, "learning_rate": 3.7610513739546e-05, "loss": 0.8192, "num_tokens": 32869168.0, "step": 351 }, { "epoch": 0.06007851169141492, "grad_norm": 0.5432163540470442, "learning_rate": 3.76036866359447e-05, "loss": 0.6837, "num_tokens": 32995115.0, "step": 352 }, { "epoch": 0.060249189281447346, "grad_norm": 0.6253340539995649, "learning_rate": 3.759685953234341e-05, "loss": 0.6686, "num_tokens": 33071548.0, "step": 353 }, { "epoch": 0.06041986687147977, "grad_norm": 0.5661238104760647, "learning_rate": 3.759003242874211e-05, "loss": 0.6937, "num_tokens": 33171193.0, "step": 354 }, { "epoch": 0.06059054446151221, "grad_norm": 0.7015252786346565, "learning_rate": 3.758320532514081e-05, "loss": 0.7226, "num_tokens": 33262750.0, "step": 355 }, { "epoch": 0.06076122205154463, "grad_norm": 0.6352343880154788, "learning_rate": 3.757637822153951e-05, "loss": 0.7279, "num_tokens": 33364584.0, "step": 356 }, { "epoch": 0.06093189964157706, "grad_norm": 0.5681604111780297, "learning_rate": 3.756955111793822e-05, "loss": 0.648, "num_tokens": 33458233.0, "step": 357 }, { "epoch": 0.06110257723160949, "grad_norm": 0.6941505043963863, "learning_rate": 3.756272401433692e-05, "loss": 0.6391, "num_tokens": 33517862.0, "step": 358 }, { "epoch": 0.06127325482164192, "grad_norm": 0.593842649567541, "learning_rate": 3.7555896910735625e-05, "loss": 0.6306, "num_tokens": 33601436.0, "step": 359 }, { "epoch": 0.06144393241167435, "grad_norm": 0.6513768917133473, "learning_rate": 3.7549069807134325e-05, "loss": 0.7172, "num_tokens": 33721288.0, "step": 360 }, { "epoch": 0.061614610001706774, "grad_norm": 0.6448241351864141, "learning_rate": 3.754224270353303e-05, "loss": 0.6901, "num_tokens": 33808335.0, "step": 361 }, { "epoch": 0.06178528759173921, "grad_norm": 0.6314749265241472, "learning_rate": 3.753541559993173e-05, "loss": 0.729, "num_tokens": 33896170.0, "step": 362 }, { "epoch": 0.061955965181771634, "grad_norm": 0.6249680159870818, "learning_rate": 3.7528588496330433e-05, "loss": 0.6657, "num_tokens": 33990712.0, "step": 363 }, { "epoch": 0.06212664277180406, "grad_norm": 0.7819666474285782, "learning_rate": 3.752176139272914e-05, "loss": 0.7161, "num_tokens": 34084129.0, "step": 364 }, { "epoch": 0.06229732036183649, "grad_norm": 0.623136573424545, "learning_rate": 3.751493428912784e-05, "loss": 0.7713, "num_tokens": 34182650.0, "step": 365 }, { "epoch": 0.06246799795186892, "grad_norm": 0.6212610483902984, "learning_rate": 3.750810718552655e-05, "loss": 0.7185, "num_tokens": 34273681.0, "step": 366 }, { "epoch": 0.06263867554190135, "grad_norm": 0.6213927455182943, "learning_rate": 3.750128008192524e-05, "loss": 0.6249, "num_tokens": 34372247.0, "step": 367 }, { "epoch": 0.06280935313193378, "grad_norm": 0.6930180718157347, "learning_rate": 3.749445297832395e-05, "loss": 0.6806, "num_tokens": 34436953.0, "step": 368 }, { "epoch": 0.0629800307219662, "grad_norm": 0.596709588123384, "learning_rate": 3.748762587472265e-05, "loss": 0.7123, "num_tokens": 34534992.0, "step": 369 }, { "epoch": 0.06315070831199864, "grad_norm": 0.5996450257006977, "learning_rate": 3.7480798771121356e-05, "loss": 0.6974, "num_tokens": 34636515.0, "step": 370 }, { "epoch": 0.06332138590203107, "grad_norm": 0.6658216095479699, "learning_rate": 3.747397166752006e-05, "loss": 0.6464, "num_tokens": 34724459.0, "step": 371 }, { "epoch": 0.06349206349206349, "grad_norm": 0.5763381602786025, "learning_rate": 3.746714456391876e-05, "loss": 0.6996, "num_tokens": 34819163.0, "step": 372 }, { "epoch": 0.06366274108209592, "grad_norm": 0.6051052836228817, "learning_rate": 3.7460317460317464e-05, "loss": 0.7139, "num_tokens": 34908285.0, "step": 373 }, { "epoch": 0.06383341867212836, "grad_norm": 0.6835901793665402, "learning_rate": 3.7453490356716165e-05, "loss": 0.6645, "num_tokens": 34969884.0, "step": 374 }, { "epoch": 0.06400409626216078, "grad_norm": 0.6229655940402707, "learning_rate": 3.744666325311487e-05, "loss": 0.709, "num_tokens": 35060653.0, "step": 375 }, { "epoch": 0.06417477385219321, "grad_norm": 0.6953826673855853, "learning_rate": 3.743983614951357e-05, "loss": 0.7187, "num_tokens": 35158562.0, "step": 376 }, { "epoch": 0.06434545144222563, "grad_norm": 0.6494139911621649, "learning_rate": 3.743300904591228e-05, "loss": 0.6973, "num_tokens": 35262656.0, "step": 377 }, { "epoch": 0.06451612903225806, "grad_norm": 0.8114792870367081, "learning_rate": 3.742618194231098e-05, "loss": 0.7079, "num_tokens": 35347985.0, "step": 378 }, { "epoch": 0.0646868066222905, "grad_norm": 0.6485297541376812, "learning_rate": 3.741935483870968e-05, "loss": 0.6747, "num_tokens": 35456474.0, "step": 379 }, { "epoch": 0.06485748421232292, "grad_norm": 0.8323778201312184, "learning_rate": 3.741252773510838e-05, "loss": 0.6492, "num_tokens": 35525737.0, "step": 380 }, { "epoch": 0.06502816180235535, "grad_norm": 0.6501435260781027, "learning_rate": 3.740570063150709e-05, "loss": 0.7383, "num_tokens": 35614137.0, "step": 381 }, { "epoch": 0.06519883939238778, "grad_norm": 0.6287688700486896, "learning_rate": 3.739887352790579e-05, "loss": 0.6872, "num_tokens": 35704315.0, "step": 382 }, { "epoch": 0.0653695169824202, "grad_norm": 0.6275446535269974, "learning_rate": 3.739204642430449e-05, "loss": 0.7269, "num_tokens": 35805797.0, "step": 383 }, { "epoch": 0.06554019457245264, "grad_norm": 0.6502394930113807, "learning_rate": 3.7385219320703195e-05, "loss": 0.7353, "num_tokens": 35881059.0, "step": 384 }, { "epoch": 0.06571087216248507, "grad_norm": 0.6145683864620181, "learning_rate": 3.7378392217101896e-05, "loss": 0.6867, "num_tokens": 35978961.0, "step": 385 }, { "epoch": 0.06588154975251749, "grad_norm": 0.6445877564143657, "learning_rate": 3.73715651135006e-05, "loss": 0.7501, "num_tokens": 36089043.0, "step": 386 }, { "epoch": 0.06605222734254992, "grad_norm": 0.6063159022792604, "learning_rate": 3.73647380098993e-05, "loss": 0.7085, "num_tokens": 36176176.0, "step": 387 }, { "epoch": 0.06622290493258236, "grad_norm": 0.6727770573549958, "learning_rate": 3.735791090629801e-05, "loss": 0.8711, "num_tokens": 36277250.0, "step": 388 }, { "epoch": 0.06639358252261478, "grad_norm": 0.623310534898411, "learning_rate": 3.735108380269671e-05, "loss": 0.7457, "num_tokens": 36373957.0, "step": 389 }, { "epoch": 0.06656426011264721, "grad_norm": 0.5806434325474403, "learning_rate": 3.734425669909541e-05, "loss": 0.6898, "num_tokens": 36490637.0, "step": 390 }, { "epoch": 0.06673493770267964, "grad_norm": 0.6405041237951864, "learning_rate": 3.733742959549412e-05, "loss": 0.7375, "num_tokens": 36580948.0, "step": 391 }, { "epoch": 0.06690561529271206, "grad_norm": 0.5741425983530408, "learning_rate": 3.733060249189282e-05, "loss": 0.6373, "num_tokens": 36675286.0, "step": 392 }, { "epoch": 0.0670762928827445, "grad_norm": 0.594371912247555, "learning_rate": 3.732377538829152e-05, "loss": 0.6641, "num_tokens": 36761001.0, "step": 393 }, { "epoch": 0.06724697047277692, "grad_norm": 0.5391937711030764, "learning_rate": 3.731694828469022e-05, "loss": 0.6588, "num_tokens": 36866705.0, "step": 394 }, { "epoch": 0.06741764806280935, "grad_norm": 0.5911466604120026, "learning_rate": 3.731012118108893e-05, "loss": 0.6659, "num_tokens": 36955975.0, "step": 395 }, { "epoch": 0.06758832565284179, "grad_norm": 0.6671953460804453, "learning_rate": 3.730329407748763e-05, "loss": 0.6408, "num_tokens": 37018213.0, "step": 396 }, { "epoch": 0.0677590032428742, "grad_norm": 0.6097411852759803, "learning_rate": 3.7296466973886334e-05, "loss": 0.677, "num_tokens": 37102646.0, "step": 397 }, { "epoch": 0.06792968083290664, "grad_norm": 0.6489379649230457, "learning_rate": 3.7289639870285035e-05, "loss": 0.7303, "num_tokens": 37188453.0, "step": 398 }, { "epoch": 0.06810035842293907, "grad_norm": 0.6608131972519038, "learning_rate": 3.7282812766683735e-05, "loss": 0.733, "num_tokens": 37270546.0, "step": 399 }, { "epoch": 0.06827103601297149, "grad_norm": 0.529758043822333, "learning_rate": 3.727598566308244e-05, "loss": 0.6306, "num_tokens": 37384124.0, "step": 400 }, { "epoch": 0.06844171360300393, "grad_norm": 0.6109197802484728, "learning_rate": 3.726915855948114e-05, "loss": 0.6559, "num_tokens": 37458924.0, "step": 401 }, { "epoch": 0.06861239119303636, "grad_norm": 0.5896082109101846, "learning_rate": 3.726233145587985e-05, "loss": 0.7135, "num_tokens": 37550494.0, "step": 402 }, { "epoch": 0.06878306878306878, "grad_norm": 0.5910216988125868, "learning_rate": 3.725550435227855e-05, "loss": 0.6942, "num_tokens": 37654010.0, "step": 403 }, { "epoch": 0.06895374637310121, "grad_norm": 0.567421516260058, "learning_rate": 3.724867724867725e-05, "loss": 0.7834, "num_tokens": 37772866.0, "step": 404 }, { "epoch": 0.06912442396313365, "grad_norm": 0.5818669821307247, "learning_rate": 3.724185014507595e-05, "loss": 0.716, "num_tokens": 37862071.0, "step": 405 }, { "epoch": 0.06929510155316607, "grad_norm": 0.7090259986050121, "learning_rate": 3.723502304147466e-05, "loss": 0.7839, "num_tokens": 37955278.0, "step": 406 }, { "epoch": 0.0694657791431985, "grad_norm": 0.6272499259135247, "learning_rate": 3.722819593787336e-05, "loss": 0.7571, "num_tokens": 38041561.0, "step": 407 }, { "epoch": 0.06963645673323093, "grad_norm": 0.5797182356151931, "learning_rate": 3.7221368834272065e-05, "loss": 0.6863, "num_tokens": 38144188.0, "step": 408 }, { "epoch": 0.06980713432326335, "grad_norm": 0.5637511954820482, "learning_rate": 3.7214541730670766e-05, "loss": 0.6038, "num_tokens": 38227874.0, "step": 409 }, { "epoch": 0.06997781191329579, "grad_norm": 0.577565541492488, "learning_rate": 3.7207714627069466e-05, "loss": 0.6494, "num_tokens": 38324140.0, "step": 410 }, { "epoch": 0.0701484895033282, "grad_norm": 0.6986802549714193, "learning_rate": 3.720088752346817e-05, "loss": 0.7747, "num_tokens": 38387616.0, "step": 411 }, { "epoch": 0.07031916709336064, "grad_norm": 0.6095864465584332, "learning_rate": 3.7194060419866874e-05, "loss": 0.6442, "num_tokens": 38481855.0, "step": 412 }, { "epoch": 0.07048984468339307, "grad_norm": 0.5755819955879264, "learning_rate": 3.718723331626558e-05, "loss": 0.7263, "num_tokens": 38582235.0, "step": 413 }, { "epoch": 0.0706605222734255, "grad_norm": 0.6223497068927212, "learning_rate": 3.718040621266428e-05, "loss": 0.6992, "num_tokens": 38670560.0, "step": 414 }, { "epoch": 0.07083119986345793, "grad_norm": 0.6233453697396772, "learning_rate": 3.717357910906298e-05, "loss": 0.6405, "num_tokens": 38754610.0, "step": 415 }, { "epoch": 0.07100187745349036, "grad_norm": 0.6934015078700733, "learning_rate": 3.716675200546168e-05, "loss": 0.6963, "num_tokens": 38821997.0, "step": 416 }, { "epoch": 0.07117255504352278, "grad_norm": 0.6209536790566113, "learning_rate": 3.715992490186039e-05, "loss": 0.7221, "num_tokens": 38908817.0, "step": 417 }, { "epoch": 0.07134323263355521, "grad_norm": 0.5524276254441424, "learning_rate": 3.715309779825909e-05, "loss": 0.7409, "num_tokens": 39008987.0, "step": 418 }, { "epoch": 0.07151391022358765, "grad_norm": 0.5617908344440492, "learning_rate": 3.71462706946578e-05, "loss": 0.7365, "num_tokens": 39139396.0, "step": 419 }, { "epoch": 0.07168458781362007, "grad_norm": 0.6392847987935839, "learning_rate": 3.71394435910565e-05, "loss": 0.6621, "num_tokens": 39235560.0, "step": 420 }, { "epoch": 0.0718552654036525, "grad_norm": 0.5398960958143693, "learning_rate": 3.71326164874552e-05, "loss": 0.6687, "num_tokens": 39337106.0, "step": 421 }, { "epoch": 0.07202594299368494, "grad_norm": 0.6077077542844802, "learning_rate": 3.7125789383853905e-05, "loss": 0.7235, "num_tokens": 39429462.0, "step": 422 }, { "epoch": 0.07219662058371736, "grad_norm": 0.642831709156473, "learning_rate": 3.7118962280252605e-05, "loss": 0.7353, "num_tokens": 39510402.0, "step": 423 }, { "epoch": 0.07236729817374979, "grad_norm": 0.6132545983772113, "learning_rate": 3.711213517665131e-05, "loss": 0.6588, "num_tokens": 39612590.0, "step": 424 }, { "epoch": 0.07253797576378221, "grad_norm": 0.5916884491996948, "learning_rate": 3.710530807305001e-05, "loss": 0.6029, "num_tokens": 39696752.0, "step": 425 }, { "epoch": 0.07270865335381464, "grad_norm": 0.5669085638432203, "learning_rate": 3.709848096944871e-05, "loss": 0.6751, "num_tokens": 39809029.0, "step": 426 }, { "epoch": 0.07287933094384708, "grad_norm": 0.6276322795085529, "learning_rate": 3.709165386584742e-05, "loss": 0.7275, "num_tokens": 39902041.0, "step": 427 }, { "epoch": 0.0730500085338795, "grad_norm": 0.7684142821181216, "learning_rate": 3.708482676224612e-05, "loss": 0.6753, "num_tokens": 39986122.0, "step": 428 }, { "epoch": 0.07322068612391193, "grad_norm": 0.6416831587743044, "learning_rate": 3.707799965864482e-05, "loss": 0.677, "num_tokens": 40082636.0, "step": 429 }, { "epoch": 0.07339136371394436, "grad_norm": 0.6019502529843771, "learning_rate": 3.707117255504352e-05, "loss": 0.7416, "num_tokens": 40181734.0, "step": 430 }, { "epoch": 0.07356204130397678, "grad_norm": 0.6534992919623296, "learning_rate": 3.706434545144223e-05, "loss": 0.6258, "num_tokens": 40254613.0, "step": 431 }, { "epoch": 0.07373271889400922, "grad_norm": 0.6128214905076879, "learning_rate": 3.705751834784093e-05, "loss": 0.7237, "num_tokens": 40345002.0, "step": 432 }, { "epoch": 0.07390339648404165, "grad_norm": 0.654110835969018, "learning_rate": 3.7050691244239636e-05, "loss": 0.6948, "num_tokens": 40419207.0, "step": 433 }, { "epoch": 0.07407407407407407, "grad_norm": 0.587687684508063, "learning_rate": 3.7043864140638336e-05, "loss": 0.7702, "num_tokens": 40527451.0, "step": 434 }, { "epoch": 0.0742447516641065, "grad_norm": 0.5962117540564699, "learning_rate": 3.703703703703704e-05, "loss": 0.66, "num_tokens": 40614978.0, "step": 435 }, { "epoch": 0.07441542925413894, "grad_norm": 0.5857044251877943, "learning_rate": 3.7030209933435744e-05, "loss": 0.7406, "num_tokens": 40723224.0, "step": 436 }, { "epoch": 0.07458610684417136, "grad_norm": 0.5813161379638386, "learning_rate": 3.7023382829834444e-05, "loss": 0.7878, "num_tokens": 40843058.0, "step": 437 }, { "epoch": 0.07475678443420379, "grad_norm": 0.6404650823857034, "learning_rate": 3.701655572623315e-05, "loss": 0.6928, "num_tokens": 40942724.0, "step": 438 }, { "epoch": 0.07492746202423622, "grad_norm": 0.5302747980206566, "learning_rate": 3.700972862263185e-05, "loss": 0.671, "num_tokens": 41065701.0, "step": 439 }, { "epoch": 0.07509813961426864, "grad_norm": 0.5717265825404524, "learning_rate": 3.700290151903056e-05, "loss": 0.7167, "num_tokens": 41188117.0, "step": 440 }, { "epoch": 0.07526881720430108, "grad_norm": 0.6321329034107778, "learning_rate": 3.699607441542925e-05, "loss": 0.6929, "num_tokens": 41292957.0, "step": 441 }, { "epoch": 0.0754394947943335, "grad_norm": 0.6559715204135413, "learning_rate": 3.698924731182796e-05, "loss": 0.6586, "num_tokens": 41354391.0, "step": 442 }, { "epoch": 0.07561017238436593, "grad_norm": 0.5906856574426936, "learning_rate": 3.698242020822666e-05, "loss": 0.7089, "num_tokens": 41464498.0, "step": 443 }, { "epoch": 0.07578084997439836, "grad_norm": 0.5557546275010231, "learning_rate": 3.697559310462537e-05, "loss": 0.6359, "num_tokens": 41557700.0, "step": 444 }, { "epoch": 0.07595152756443078, "grad_norm": 0.6222419947426912, "learning_rate": 3.696876600102407e-05, "loss": 0.6926, "num_tokens": 41631938.0, "step": 445 }, { "epoch": 0.07612220515446322, "grad_norm": 0.6253324108919642, "learning_rate": 3.696193889742277e-05, "loss": 0.7157, "num_tokens": 41714289.0, "step": 446 }, { "epoch": 0.07629288274449565, "grad_norm": 0.5581348306417503, "learning_rate": 3.6955111793821475e-05, "loss": 0.5908, "num_tokens": 41796100.0, "step": 447 }, { "epoch": 0.07646356033452807, "grad_norm": 0.6262323408886254, "learning_rate": 3.6948284690220175e-05, "loss": 0.711, "num_tokens": 41892218.0, "step": 448 }, { "epoch": 0.0766342379245605, "grad_norm": 0.5608123760180548, "learning_rate": 3.694145758661888e-05, "loss": 0.6624, "num_tokens": 41994455.0, "step": 449 }, { "epoch": 0.07680491551459294, "grad_norm": 0.5716962388896752, "learning_rate": 3.693463048301758e-05, "loss": 0.6826, "num_tokens": 42083429.0, "step": 450 }, { "epoch": 0.07697559310462536, "grad_norm": 0.700045660427373, "learning_rate": 3.692780337941629e-05, "loss": 0.5999, "num_tokens": 42141344.0, "step": 451 }, { "epoch": 0.07714627069465779, "grad_norm": 0.6418561966355081, "learning_rate": 3.692097627581499e-05, "loss": 0.807, "num_tokens": 42244546.0, "step": 452 }, { "epoch": 0.07731694828469023, "grad_norm": 0.5726563609384213, "learning_rate": 3.691414917221369e-05, "loss": 0.718, "num_tokens": 42353015.0, "step": 453 }, { "epoch": 0.07748762587472265, "grad_norm": 0.6131113670666067, "learning_rate": 3.690732206861239e-05, "loss": 0.6432, "num_tokens": 42434951.0, "step": 454 }, { "epoch": 0.07765830346475508, "grad_norm": 0.5978744216928057, "learning_rate": 3.69004949650111e-05, "loss": 0.6366, "num_tokens": 42542422.0, "step": 455 }, { "epoch": 0.07782898105478751, "grad_norm": 0.5935002375210909, "learning_rate": 3.68936678614098e-05, "loss": 0.6419, "num_tokens": 42625018.0, "step": 456 }, { "epoch": 0.07799965864481993, "grad_norm": 0.6694130666769859, "learning_rate": 3.68868407578085e-05, "loss": 0.7177, "num_tokens": 42724024.0, "step": 457 }, { "epoch": 0.07817033623485237, "grad_norm": 0.8939360181519956, "learning_rate": 3.6880013654207206e-05, "loss": 0.7087, "num_tokens": 42816602.0, "step": 458 }, { "epoch": 0.07834101382488479, "grad_norm": 0.603140795800803, "learning_rate": 3.6873186550605907e-05, "loss": 0.7126, "num_tokens": 42908318.0, "step": 459 }, { "epoch": 0.07851169141491722, "grad_norm": 0.6343385109275218, "learning_rate": 3.6866359447004614e-05, "loss": 0.8598, "num_tokens": 43010891.0, "step": 460 }, { "epoch": 0.07868236900494965, "grad_norm": 0.5514810574769844, "learning_rate": 3.6859532343403314e-05, "loss": 0.6591, "num_tokens": 43101520.0, "step": 461 }, { "epoch": 0.07885304659498207, "grad_norm": 0.6466220317601231, "learning_rate": 3.685270523980202e-05, "loss": 0.7448, "num_tokens": 43184538.0, "step": 462 }, { "epoch": 0.07902372418501451, "grad_norm": 0.6171968336720612, "learning_rate": 3.684587813620072e-05, "loss": 0.6687, "num_tokens": 43255678.0, "step": 463 }, { "epoch": 0.07919440177504694, "grad_norm": 0.9893091065104817, "learning_rate": 3.683905103259942e-05, "loss": 0.7077, "num_tokens": 43341549.0, "step": 464 }, { "epoch": 0.07936507936507936, "grad_norm": 0.6240041305699835, "learning_rate": 3.683222392899813e-05, "loss": 0.7945, "num_tokens": 43449877.0, "step": 465 }, { "epoch": 0.0795357569551118, "grad_norm": 0.5888619822231417, "learning_rate": 3.682539682539683e-05, "loss": 0.6639, "num_tokens": 43557378.0, "step": 466 }, { "epoch": 0.07970643454514423, "grad_norm": 0.5817277037957258, "learning_rate": 3.681856972179553e-05, "loss": 0.6526, "num_tokens": 43651808.0, "step": 467 }, { "epoch": 0.07987711213517665, "grad_norm": 0.5692245257127788, "learning_rate": 3.681174261819423e-05, "loss": 0.6627, "num_tokens": 43757716.0, "step": 468 }, { "epoch": 0.08004778972520908, "grad_norm": 0.6186624934267938, "learning_rate": 3.680491551459294e-05, "loss": 0.6637, "num_tokens": 43835657.0, "step": 469 }, { "epoch": 0.08021846731524152, "grad_norm": 0.5560640781277002, "learning_rate": 3.679808841099164e-05, "loss": 0.6141, "num_tokens": 43925242.0, "step": 470 }, { "epoch": 0.08038914490527393, "grad_norm": 0.6124977850905327, "learning_rate": 3.6791261307390345e-05, "loss": 0.6069, "num_tokens": 43993838.0, "step": 471 }, { "epoch": 0.08055982249530637, "grad_norm": 0.5696982475095156, "learning_rate": 3.6784434203789045e-05, "loss": 0.7307, "num_tokens": 44088914.0, "step": 472 }, { "epoch": 0.08073050008533879, "grad_norm": 0.5465347641317355, "learning_rate": 3.6777607100187746e-05, "loss": 0.6209, "num_tokens": 44201381.0, "step": 473 }, { "epoch": 0.08090117767537122, "grad_norm": 0.5663520542617837, "learning_rate": 3.677077999658645e-05, "loss": 0.5714, "num_tokens": 44283784.0, "step": 474 }, { "epoch": 0.08107185526540366, "grad_norm": 0.5651685933116898, "learning_rate": 3.676395289298515e-05, "loss": 0.6867, "num_tokens": 44375593.0, "step": 475 }, { "epoch": 0.08124253285543608, "grad_norm": 0.5421169567693844, "learning_rate": 3.675712578938386e-05, "loss": 0.5945, "num_tokens": 44473760.0, "step": 476 }, { "epoch": 0.08141321044546851, "grad_norm": 0.6171344694912092, "learning_rate": 3.675029868578256e-05, "loss": 0.7259, "num_tokens": 44571663.0, "step": 477 }, { "epoch": 0.08158388803550094, "grad_norm": 0.6193533591680713, "learning_rate": 3.674347158218126e-05, "loss": 0.7452, "num_tokens": 44667768.0, "step": 478 }, { "epoch": 0.08175456562553336, "grad_norm": 0.6117427566981729, "learning_rate": 3.673664447857996e-05, "loss": 0.7464, "num_tokens": 44750774.0, "step": 479 }, { "epoch": 0.0819252432155658, "grad_norm": 0.5799960616420552, "learning_rate": 3.672981737497867e-05, "loss": 0.7032, "num_tokens": 44861644.0, "step": 480 }, { "epoch": 0.08209592080559823, "grad_norm": 0.6361081087572035, "learning_rate": 3.672299027137737e-05, "loss": 0.6766, "num_tokens": 44933113.0, "step": 481 }, { "epoch": 0.08226659839563065, "grad_norm": 0.588205544901898, "learning_rate": 3.6716163167776076e-05, "loss": 0.7455, "num_tokens": 45046569.0, "step": 482 }, { "epoch": 0.08243727598566308, "grad_norm": 0.7752783986307354, "learning_rate": 3.6709336064174777e-05, "loss": 0.8238, "num_tokens": 45107247.0, "step": 483 }, { "epoch": 0.08260795357569552, "grad_norm": 0.6074951436464685, "learning_rate": 3.670250896057348e-05, "loss": 0.654, "num_tokens": 45191412.0, "step": 484 }, { "epoch": 0.08277863116572794, "grad_norm": 0.5849451194018884, "learning_rate": 3.6695681856972184e-05, "loss": 0.7134, "num_tokens": 45290166.0, "step": 485 }, { "epoch": 0.08294930875576037, "grad_norm": 0.5572397811694471, "learning_rate": 3.6688854753370884e-05, "loss": 0.633, "num_tokens": 45379898.0, "step": 486 }, { "epoch": 0.0831199863457928, "grad_norm": 0.6310970194616543, "learning_rate": 3.668202764976959e-05, "loss": 0.6827, "num_tokens": 45452606.0, "step": 487 }, { "epoch": 0.08329066393582522, "grad_norm": 0.5456901113610884, "learning_rate": 3.667520054616829e-05, "loss": 0.6189, "num_tokens": 45555304.0, "step": 488 }, { "epoch": 0.08346134152585766, "grad_norm": 0.6094337320529141, "learning_rate": 3.6668373442567e-05, "loss": 0.7914, "num_tokens": 45651032.0, "step": 489 }, { "epoch": 0.08363201911589008, "grad_norm": 0.5640544607770678, "learning_rate": 3.66615463389657e-05, "loss": 0.6418, "num_tokens": 45735812.0, "step": 490 }, { "epoch": 0.08380269670592251, "grad_norm": 0.5418123506293233, "learning_rate": 3.66547192353644e-05, "loss": 0.6468, "num_tokens": 45834101.0, "step": 491 }, { "epoch": 0.08397337429595494, "grad_norm": 0.5548156202834212, "learning_rate": 3.66478921317631e-05, "loss": 0.6736, "num_tokens": 45934369.0, "step": 492 }, { "epoch": 0.08414405188598736, "grad_norm": 0.58122308325762, "learning_rate": 3.664106502816181e-05, "loss": 0.7176, "num_tokens": 46022911.0, "step": 493 }, { "epoch": 0.0843147294760198, "grad_norm": 0.5832625645337377, "learning_rate": 3.663423792456051e-05, "loss": 0.7067, "num_tokens": 46121503.0, "step": 494 }, { "epoch": 0.08448540706605223, "grad_norm": 0.5136226933255437, "learning_rate": 3.662741082095921e-05, "loss": 0.6491, "num_tokens": 46244053.0, "step": 495 }, { "epoch": 0.08465608465608465, "grad_norm": 0.5366138938521392, "learning_rate": 3.6620583717357915e-05, "loss": 0.6466, "num_tokens": 46353856.0, "step": 496 }, { "epoch": 0.08482676224611709, "grad_norm": 0.5655264199164406, "learning_rate": 3.6613756613756616e-05, "loss": 0.7721, "num_tokens": 46461446.0, "step": 497 }, { "epoch": 0.08499743983614952, "grad_norm": 0.52875408467019, "learning_rate": 3.660692951015532e-05, "loss": 0.6596, "num_tokens": 46577296.0, "step": 498 }, { "epoch": 0.08516811742618194, "grad_norm": 0.5779672335867836, "learning_rate": 3.660010240655402e-05, "loss": 0.7262, "num_tokens": 46666140.0, "step": 499 }, { "epoch": 0.08533879501621437, "grad_norm": 0.55134507768759, "learning_rate": 3.6593275302952724e-05, "loss": 0.6877, "num_tokens": 46761310.0, "step": 500 }, { "epoch": 0.0855094726062468, "grad_norm": 0.5466686932508596, "learning_rate": 3.658644819935143e-05, "loss": 0.6517, "num_tokens": 46861703.0, "step": 501 }, { "epoch": 0.08568015019627923, "grad_norm": 0.5517273584736176, "learning_rate": 3.657962109575013e-05, "loss": 0.7141, "num_tokens": 46966366.0, "step": 502 }, { "epoch": 0.08585082778631166, "grad_norm": 0.5800037362559861, "learning_rate": 3.657279399214883e-05, "loss": 0.6958, "num_tokens": 47046507.0, "step": 503 }, { "epoch": 0.08602150537634409, "grad_norm": 0.5617836602155099, "learning_rate": 3.656596688854753e-05, "loss": 0.7618, "num_tokens": 47149623.0, "step": 504 }, { "epoch": 0.08619218296637651, "grad_norm": 0.5502705173909379, "learning_rate": 3.655913978494624e-05, "loss": 0.5706, "num_tokens": 47231903.0, "step": 505 }, { "epoch": 0.08636286055640895, "grad_norm": 0.666081316515715, "learning_rate": 3.655231268134494e-05, "loss": 0.739, "num_tokens": 47331758.0, "step": 506 }, { "epoch": 0.08653353814644137, "grad_norm": 0.5797301542556041, "learning_rate": 3.6545485577743646e-05, "loss": 0.5743, "num_tokens": 47403309.0, "step": 507 }, { "epoch": 0.0867042157364738, "grad_norm": 0.5904541896089521, "learning_rate": 3.653865847414235e-05, "loss": 0.7626, "num_tokens": 47509820.0, "step": 508 }, { "epoch": 0.08687489332650623, "grad_norm": 0.5981561246130371, "learning_rate": 3.6531831370541054e-05, "loss": 0.72, "num_tokens": 47607368.0, "step": 509 }, { "epoch": 0.08704557091653865, "grad_norm": 0.5965878657853652, "learning_rate": 3.6525004266939754e-05, "loss": 0.6892, "num_tokens": 47702654.0, "step": 510 }, { "epoch": 0.08721624850657109, "grad_norm": 0.6111497189704105, "learning_rate": 3.6518177163338455e-05, "loss": 0.6577, "num_tokens": 47766700.0, "step": 511 }, { "epoch": 0.08738692609660352, "grad_norm": 0.5791148451603378, "learning_rate": 3.651135005973716e-05, "loss": 0.7108, "num_tokens": 47885720.0, "step": 512 }, { "epoch": 0.08755760368663594, "grad_norm": 0.5725552727080536, "learning_rate": 3.650452295613586e-05, "loss": 0.7149, "num_tokens": 47980372.0, "step": 513 }, { "epoch": 0.08772828127666837, "grad_norm": 0.5651828738015333, "learning_rate": 3.649769585253457e-05, "loss": 0.6782, "num_tokens": 48067383.0, "step": 514 }, { "epoch": 0.08789895886670081, "grad_norm": 0.5444762935137033, "learning_rate": 3.649086874893327e-05, "loss": 0.6467, "num_tokens": 48161663.0, "step": 515 }, { "epoch": 0.08806963645673323, "grad_norm": 0.5900012700843957, "learning_rate": 3.648404164533197e-05, "loss": 0.5933, "num_tokens": 48243234.0, "step": 516 }, { "epoch": 0.08824031404676566, "grad_norm": 0.5773627576470945, "learning_rate": 3.647721454173067e-05, "loss": 0.6966, "num_tokens": 48345296.0, "step": 517 }, { "epoch": 0.0884109916367981, "grad_norm": 0.5508070391292066, "learning_rate": 3.647038743812938e-05, "loss": 0.6688, "num_tokens": 48441283.0, "step": 518 }, { "epoch": 0.08858166922683051, "grad_norm": 0.5935608672750368, "learning_rate": 3.646356033452808e-05, "loss": 0.719, "num_tokens": 48534521.0, "step": 519 }, { "epoch": 0.08875234681686295, "grad_norm": 0.5645208811010013, "learning_rate": 3.645673323092678e-05, "loss": 0.7341, "num_tokens": 48645176.0, "step": 520 }, { "epoch": 0.08892302440689537, "grad_norm": 0.6608966069365919, "learning_rate": 3.6449906127325486e-05, "loss": 0.8173, "num_tokens": 48749390.0, "step": 521 }, { "epoch": 0.0890937019969278, "grad_norm": 0.5271268839277463, "learning_rate": 3.6443079023724186e-05, "loss": 0.6352, "num_tokens": 48857015.0, "step": 522 }, { "epoch": 0.08926437958696024, "grad_norm": 0.5925351919095255, "learning_rate": 3.643625192012289e-05, "loss": 0.6904, "num_tokens": 48940361.0, "step": 523 }, { "epoch": 0.08943505717699266, "grad_norm": 0.5991121224215802, "learning_rate": 3.6429424816521594e-05, "loss": 0.7506, "num_tokens": 49028322.0, "step": 524 }, { "epoch": 0.08960573476702509, "grad_norm": 0.5989576559906101, "learning_rate": 3.64225977129203e-05, "loss": 0.6579, "num_tokens": 49096177.0, "step": 525 }, { "epoch": 0.08977641235705752, "grad_norm": 0.5426754113879474, "learning_rate": 3.6415770609319e-05, "loss": 0.6627, "num_tokens": 49187665.0, "step": 526 }, { "epoch": 0.08994708994708994, "grad_norm": 0.5602305100542257, "learning_rate": 3.64089435057177e-05, "loss": 0.5757, "num_tokens": 49276433.0, "step": 527 }, { "epoch": 0.09011776753712238, "grad_norm": 0.6304599194999569, "learning_rate": 3.64021164021164e-05, "loss": 0.6369, "num_tokens": 49371701.0, "step": 528 }, { "epoch": 0.09028844512715481, "grad_norm": 0.5719837438744131, "learning_rate": 3.639528929851511e-05, "loss": 0.6689, "num_tokens": 49470912.0, "step": 529 }, { "epoch": 0.09045912271718723, "grad_norm": 0.5545437279124883, "learning_rate": 3.638846219491381e-05, "loss": 0.6597, "num_tokens": 49562256.0, "step": 530 }, { "epoch": 0.09062980030721966, "grad_norm": 0.5171433323911107, "learning_rate": 3.638163509131251e-05, "loss": 0.5871, "num_tokens": 49663638.0, "step": 531 }, { "epoch": 0.0908004778972521, "grad_norm": 0.6665112915660268, "learning_rate": 3.637480798771122e-05, "loss": 0.7018, "num_tokens": 49736857.0, "step": 532 }, { "epoch": 0.09097115548728452, "grad_norm": 0.6021405988117964, "learning_rate": 3.636798088410992e-05, "loss": 0.6464, "num_tokens": 49816289.0, "step": 533 }, { "epoch": 0.09114183307731695, "grad_norm": 0.6222466420135279, "learning_rate": 3.6361153780508624e-05, "loss": 0.6123, "num_tokens": 49897108.0, "step": 534 }, { "epoch": 0.09131251066734938, "grad_norm": 0.5842448933340897, "learning_rate": 3.6354326676907325e-05, "loss": 0.6646, "num_tokens": 49989367.0, "step": 535 }, { "epoch": 0.0914831882573818, "grad_norm": 0.6357341901564604, "learning_rate": 3.634749957330603e-05, "loss": 0.7505, "num_tokens": 50076209.0, "step": 536 }, { "epoch": 0.09165386584741424, "grad_norm": 0.6056788322800208, "learning_rate": 3.634067246970473e-05, "loss": 0.7382, "num_tokens": 50183685.0, "step": 537 }, { "epoch": 0.09182454343744666, "grad_norm": 0.5614211588277851, "learning_rate": 3.633384536610343e-05, "loss": 0.6925, "num_tokens": 50283125.0, "step": 538 }, { "epoch": 0.09199522102747909, "grad_norm": 0.5334627053830949, "learning_rate": 3.632701826250214e-05, "loss": 0.5919, "num_tokens": 50378292.0, "step": 539 }, { "epoch": 0.09216589861751152, "grad_norm": 0.6869647579312297, "learning_rate": 3.632019115890084e-05, "loss": 0.7839, "num_tokens": 50452949.0, "step": 540 }, { "epoch": 0.09233657620754394, "grad_norm": 0.5558184357195769, "learning_rate": 3.631336405529954e-05, "loss": 0.6433, "num_tokens": 50547915.0, "step": 541 }, { "epoch": 0.09250725379757638, "grad_norm": 0.5842349893222868, "learning_rate": 3.630653695169824e-05, "loss": 0.6653, "num_tokens": 50653440.0, "step": 542 }, { "epoch": 0.09267793138760881, "grad_norm": 0.6685350636151807, "learning_rate": 3.629970984809695e-05, "loss": 0.6845, "num_tokens": 50713541.0, "step": 543 }, { "epoch": 0.09284860897764123, "grad_norm": 0.5512215679137163, "learning_rate": 3.629288274449565e-05, "loss": 0.7367, "num_tokens": 50848756.0, "step": 544 }, { "epoch": 0.09301928656767366, "grad_norm": 0.598199161021648, "learning_rate": 3.6286055640894356e-05, "loss": 0.6751, "num_tokens": 50931087.0, "step": 545 }, { "epoch": 0.0931899641577061, "grad_norm": 0.5468294611130413, "learning_rate": 3.6279228537293056e-05, "loss": 0.6635, "num_tokens": 51042095.0, "step": 546 }, { "epoch": 0.09336064174773852, "grad_norm": 0.5849218216045272, "learning_rate": 3.6272401433691756e-05, "loss": 0.6247, "num_tokens": 51131588.0, "step": 547 }, { "epoch": 0.09353131933777095, "grad_norm": 0.5798367401027197, "learning_rate": 3.6265574330090464e-05, "loss": 0.6618, "num_tokens": 51216211.0, "step": 548 }, { "epoch": 0.09370199692780339, "grad_norm": 0.6132243264188201, "learning_rate": 3.6258747226489164e-05, "loss": 0.7182, "num_tokens": 51294920.0, "step": 549 }, { "epoch": 0.0938726745178358, "grad_norm": 0.5942224486250892, "learning_rate": 3.625192012288787e-05, "loss": 0.6176, "num_tokens": 51374099.0, "step": 550 }, { "epoch": 0.09404335210786824, "grad_norm": 0.6196569300399327, "learning_rate": 3.624509301928657e-05, "loss": 0.6019, "num_tokens": 51447836.0, "step": 551 }, { "epoch": 0.09421402969790067, "grad_norm": 0.6465822489696604, "learning_rate": 3.623826591568528e-05, "loss": 0.7073, "num_tokens": 51526598.0, "step": 552 }, { "epoch": 0.09438470728793309, "grad_norm": 0.6522141897092714, "learning_rate": 3.623143881208397e-05, "loss": 0.7982, "num_tokens": 51615753.0, "step": 553 }, { "epoch": 0.09455538487796553, "grad_norm": 0.5776636817634605, "learning_rate": 3.622461170848268e-05, "loss": 0.6442, "num_tokens": 51703857.0, "step": 554 }, { "epoch": 0.09472606246799795, "grad_norm": 0.5896416832310668, "learning_rate": 3.621778460488138e-05, "loss": 0.7461, "num_tokens": 51805181.0, "step": 555 }, { "epoch": 0.09489674005803038, "grad_norm": 0.5998681509325083, "learning_rate": 3.621095750128009e-05, "loss": 0.6354, "num_tokens": 51879924.0, "step": 556 }, { "epoch": 0.09506741764806281, "grad_norm": 0.5941086950073793, "learning_rate": 3.620413039767879e-05, "loss": 0.7689, "num_tokens": 51990472.0, "step": 557 }, { "epoch": 0.09523809523809523, "grad_norm": 0.7768918607888808, "learning_rate": 3.619730329407749e-05, "loss": 0.7049, "num_tokens": 52085459.0, "step": 558 }, { "epoch": 0.09540877282812767, "grad_norm": 0.6195553119074377, "learning_rate": 3.6190476190476195e-05, "loss": 0.6888, "num_tokens": 52158776.0, "step": 559 }, { "epoch": 0.0955794504181601, "grad_norm": 0.570006828896315, "learning_rate": 3.6183649086874895e-05, "loss": 0.6311, "num_tokens": 52242465.0, "step": 560 }, { "epoch": 0.09575012800819252, "grad_norm": 0.5323105926739331, "learning_rate": 3.61768219832736e-05, "loss": 0.6609, "num_tokens": 52363273.0, "step": 561 }, { "epoch": 0.09592080559822495, "grad_norm": 0.6027235861329099, "learning_rate": 3.61699948796723e-05, "loss": 0.6049, "num_tokens": 52443139.0, "step": 562 }, { "epoch": 0.09609148318825739, "grad_norm": 0.5246191328850529, "learning_rate": 3.616316777607101e-05, "loss": 0.6259, "num_tokens": 52540951.0, "step": 563 }, { "epoch": 0.09626216077828981, "grad_norm": 0.5920691370350928, "learning_rate": 3.615634067246971e-05, "loss": 0.666, "num_tokens": 52619939.0, "step": 564 }, { "epoch": 0.09643283836832224, "grad_norm": 0.5176477082552898, "learning_rate": 3.614951356886841e-05, "loss": 0.656, "num_tokens": 52730835.0, "step": 565 }, { "epoch": 0.09660351595835467, "grad_norm": 0.6985077638203099, "learning_rate": 3.614268646526711e-05, "loss": 0.7695, "num_tokens": 52839984.0, "step": 566 }, { "epoch": 0.0967741935483871, "grad_norm": 0.642586488676836, "learning_rate": 3.613585936166582e-05, "loss": 0.6719, "num_tokens": 52913484.0, "step": 567 }, { "epoch": 0.09694487113841953, "grad_norm": 0.5612241710274725, "learning_rate": 3.612903225806452e-05, "loss": 0.668, "num_tokens": 52999344.0, "step": 568 }, { "epoch": 0.09711554872845195, "grad_norm": 0.5472889069565046, "learning_rate": 3.612220515446322e-05, "loss": 0.7276, "num_tokens": 53115112.0, "step": 569 }, { "epoch": 0.09728622631848438, "grad_norm": 0.5233292586874325, "learning_rate": 3.6115378050861926e-05, "loss": 0.7095, "num_tokens": 53234593.0, "step": 570 }, { "epoch": 0.09745690390851681, "grad_norm": 0.6349032007720222, "learning_rate": 3.6108550947260626e-05, "loss": 0.6504, "num_tokens": 53327280.0, "step": 571 }, { "epoch": 0.09762758149854923, "grad_norm": 0.7107056808749788, "learning_rate": 3.6101723843659333e-05, "loss": 0.6554, "num_tokens": 53413514.0, "step": 572 }, { "epoch": 0.09779825908858167, "grad_norm": 0.5668050992936222, "learning_rate": 3.6094896740058034e-05, "loss": 0.6485, "num_tokens": 53497875.0, "step": 573 }, { "epoch": 0.0979689366786141, "grad_norm": 0.5728710367155241, "learning_rate": 3.6088069636456734e-05, "loss": 0.7061, "num_tokens": 53593628.0, "step": 574 }, { "epoch": 0.09813961426864652, "grad_norm": 0.6054900627449904, "learning_rate": 3.608124253285544e-05, "loss": 0.6771, "num_tokens": 53663897.0, "step": 575 }, { "epoch": 0.09831029185867896, "grad_norm": 0.5765057772033648, "learning_rate": 3.607441542925414e-05, "loss": 0.6809, "num_tokens": 53753017.0, "step": 576 }, { "epoch": 0.09848096944871139, "grad_norm": 0.5410896133828244, "learning_rate": 3.606758832565284e-05, "loss": 0.6848, "num_tokens": 53858131.0, "step": 577 }, { "epoch": 0.09865164703874381, "grad_norm": 0.5168766457987327, "learning_rate": 3.606076122205154e-05, "loss": 0.6287, "num_tokens": 53970378.0, "step": 578 }, { "epoch": 0.09882232462877624, "grad_norm": 0.5645138970719202, "learning_rate": 3.605393411845025e-05, "loss": 0.6984, "num_tokens": 54061336.0, "step": 579 }, { "epoch": 0.09899300221880868, "grad_norm": 0.6940957963644416, "learning_rate": 3.604710701484895e-05, "loss": 0.734, "num_tokens": 54125510.0, "step": 580 }, { "epoch": 0.0991636798088411, "grad_norm": 0.5512734524299959, "learning_rate": 3.604027991124766e-05, "loss": 0.5677, "num_tokens": 54223533.0, "step": 581 }, { "epoch": 0.09933435739887353, "grad_norm": 0.5859317266862567, "learning_rate": 3.603345280764636e-05, "loss": 0.6877, "num_tokens": 54312651.0, "step": 582 }, { "epoch": 0.09950503498890596, "grad_norm": 0.5179646033318259, "learning_rate": 3.6026625704045065e-05, "loss": 0.6673, "num_tokens": 54415435.0, "step": 583 }, { "epoch": 0.09967571257893838, "grad_norm": 0.5393411074453566, "learning_rate": 3.6019798600443765e-05, "loss": 0.6343, "num_tokens": 54507459.0, "step": 584 }, { "epoch": 0.09984639016897082, "grad_norm": 0.6429763528593041, "learning_rate": 3.6012971496842465e-05, "loss": 0.7146, "num_tokens": 54575172.0, "step": 585 }, { "epoch": 0.10001706775900324, "grad_norm": 0.5471737327436172, "learning_rate": 3.600614439324117e-05, "loss": 0.7036, "num_tokens": 54679441.0, "step": 586 }, { "epoch": 0.10018774534903567, "grad_norm": 0.5602914550960879, "learning_rate": 3.599931728963987e-05, "loss": 0.7858, "num_tokens": 54774827.0, "step": 587 }, { "epoch": 0.1003584229390681, "grad_norm": 0.5784132946075377, "learning_rate": 3.599249018603858e-05, "loss": 0.669, "num_tokens": 54865214.0, "step": 588 }, { "epoch": 0.10052910052910052, "grad_norm": 0.5567092662138868, "learning_rate": 3.598566308243728e-05, "loss": 0.708, "num_tokens": 54982153.0, "step": 589 }, { "epoch": 0.10069977811913296, "grad_norm": 0.541798675123736, "learning_rate": 3.597883597883598e-05, "loss": 0.6358, "num_tokens": 55080347.0, "step": 590 }, { "epoch": 0.10087045570916539, "grad_norm": 0.5987736762667357, "learning_rate": 3.597200887523468e-05, "loss": 0.7506, "num_tokens": 55174425.0, "step": 591 }, { "epoch": 0.10104113329919781, "grad_norm": 0.6188916045231516, "learning_rate": 3.596518177163339e-05, "loss": 0.7655, "num_tokens": 55263893.0, "step": 592 }, { "epoch": 0.10121181088923024, "grad_norm": 0.5457704484251621, "learning_rate": 3.595835466803209e-05, "loss": 0.693, "num_tokens": 55377530.0, "step": 593 }, { "epoch": 0.10138248847926268, "grad_norm": 0.5187006763402402, "learning_rate": 3.5951527564430796e-05, "loss": 0.7028, "num_tokens": 55503291.0, "step": 594 }, { "epoch": 0.1015531660692951, "grad_norm": 0.6567193334889375, "learning_rate": 3.5944700460829496e-05, "loss": 0.6959, "num_tokens": 55588297.0, "step": 595 }, { "epoch": 0.10172384365932753, "grad_norm": 0.5043553870412054, "learning_rate": 3.59378733572282e-05, "loss": 0.6399, "num_tokens": 55690232.0, "step": 596 }, { "epoch": 0.10189452124935997, "grad_norm": 0.5124965307494519, "learning_rate": 3.5931046253626904e-05, "loss": 0.6659, "num_tokens": 55800734.0, "step": 597 }, { "epoch": 0.10206519883939238, "grad_norm": 0.6025276071585761, "learning_rate": 3.5924219150025604e-05, "loss": 0.7468, "num_tokens": 55893347.0, "step": 598 }, { "epoch": 0.10223587642942482, "grad_norm": 0.5565139206444926, "learning_rate": 3.591739204642431e-05, "loss": 0.6324, "num_tokens": 55972711.0, "step": 599 }, { "epoch": 0.10240655401945725, "grad_norm": 0.5548616025790604, "learning_rate": 3.591056494282301e-05, "loss": 0.7894, "num_tokens": 56084147.0, "step": 600 }, { "epoch": 0.10257723160948967, "grad_norm": 0.5674907666339948, "learning_rate": 3.590373783922171e-05, "loss": 0.6687, "num_tokens": 56193905.0, "step": 601 }, { "epoch": 0.1027479091995221, "grad_norm": 0.6635213859265576, "learning_rate": 3.589691073562041e-05, "loss": 0.5992, "num_tokens": 56268847.0, "step": 602 }, { "epoch": 0.10291858678955453, "grad_norm": 0.5340676972917092, "learning_rate": 3.589008363201912e-05, "loss": 0.5837, "num_tokens": 56360504.0, "step": 603 }, { "epoch": 0.10308926437958696, "grad_norm": 0.5784960380177508, "learning_rate": 3.588325652841782e-05, "loss": 0.7168, "num_tokens": 56450728.0, "step": 604 }, { "epoch": 0.10325994196961939, "grad_norm": 0.5709450831352832, "learning_rate": 3.587642942481652e-05, "loss": 0.6832, "num_tokens": 56534826.0, "step": 605 }, { "epoch": 0.10343061955965181, "grad_norm": 0.5176013164041113, "learning_rate": 3.586960232121523e-05, "loss": 0.5745, "num_tokens": 56639326.0, "step": 606 }, { "epoch": 0.10360129714968425, "grad_norm": 0.5303786403010953, "learning_rate": 3.586277521761393e-05, "loss": 0.598, "num_tokens": 56736623.0, "step": 607 }, { "epoch": 0.10377197473971668, "grad_norm": 0.6308980351562006, "learning_rate": 3.5855948114012635e-05, "loss": 0.7189, "num_tokens": 56815939.0, "step": 608 }, { "epoch": 0.1039426523297491, "grad_norm": 0.524600649587053, "learning_rate": 3.5849121010411335e-05, "loss": 0.6463, "num_tokens": 56925354.0, "step": 609 }, { "epoch": 0.10411332991978153, "grad_norm": 0.5671445271456128, "learning_rate": 3.584229390681004e-05, "loss": 0.6202, "num_tokens": 57008908.0, "step": 610 }, { "epoch": 0.10428400750981397, "grad_norm": 0.5523335187034276, "learning_rate": 3.583546680320874e-05, "loss": 0.7338, "num_tokens": 57117058.0, "step": 611 }, { "epoch": 0.10445468509984639, "grad_norm": 0.57395364668904, "learning_rate": 3.582863969960744e-05, "loss": 0.6703, "num_tokens": 57212519.0, "step": 612 }, { "epoch": 0.10462536268987882, "grad_norm": 0.5791998327318868, "learning_rate": 3.582181259600615e-05, "loss": 0.5932, "num_tokens": 57281204.0, "step": 613 }, { "epoch": 0.10479604027991125, "grad_norm": 0.5440792842892852, "learning_rate": 3.581498549240485e-05, "loss": 0.6911, "num_tokens": 57394089.0, "step": 614 }, { "epoch": 0.10496671786994367, "grad_norm": 0.5740976238414511, "learning_rate": 3.580815838880355e-05, "loss": 0.6748, "num_tokens": 57487956.0, "step": 615 }, { "epoch": 0.10513739545997611, "grad_norm": 0.5919186305228085, "learning_rate": 3.580133128520225e-05, "loss": 0.6746, "num_tokens": 57587245.0, "step": 616 }, { "epoch": 0.10530807305000853, "grad_norm": 0.6284892141454418, "learning_rate": 3.579450418160096e-05, "loss": 0.6109, "num_tokens": 57651510.0, "step": 617 }, { "epoch": 0.10547875064004096, "grad_norm": 0.5929288494812709, "learning_rate": 3.578767707799966e-05, "loss": 0.6615, "num_tokens": 57731553.0, "step": 618 }, { "epoch": 0.1056494282300734, "grad_norm": 0.6204020244407263, "learning_rate": 3.5780849974398366e-05, "loss": 0.7288, "num_tokens": 57816256.0, "step": 619 }, { "epoch": 0.10582010582010581, "grad_norm": 0.6179173674902416, "learning_rate": 3.577402287079707e-05, "loss": 0.7287, "num_tokens": 57903548.0, "step": 620 }, { "epoch": 0.10599078341013825, "grad_norm": 0.6024255415203956, "learning_rate": 3.576719576719577e-05, "loss": 0.6828, "num_tokens": 57992322.0, "step": 621 }, { "epoch": 0.10616146100017068, "grad_norm": 0.6043488816920723, "learning_rate": 3.5760368663594474e-05, "loss": 0.7417, "num_tokens": 58086476.0, "step": 622 }, { "epoch": 0.1063321385902031, "grad_norm": 0.5677628466049353, "learning_rate": 3.5753541559993175e-05, "loss": 0.6786, "num_tokens": 58186341.0, "step": 623 }, { "epoch": 0.10650281618023553, "grad_norm": 0.5391761473766171, "learning_rate": 3.574671445639188e-05, "loss": 0.6106, "num_tokens": 58280540.0, "step": 624 }, { "epoch": 0.10667349377026797, "grad_norm": 0.5605435390881831, "learning_rate": 3.573988735279058e-05, "loss": 0.6509, "num_tokens": 58379218.0, "step": 625 }, { "epoch": 0.10684417136030039, "grad_norm": 0.584790193623198, "learning_rate": 3.573306024918929e-05, "loss": 0.6214, "num_tokens": 58449303.0, "step": 626 }, { "epoch": 0.10701484895033282, "grad_norm": 0.5484038897455566, "learning_rate": 3.572623314558798e-05, "loss": 0.6293, "num_tokens": 58545522.0, "step": 627 }, { "epoch": 0.10718552654036526, "grad_norm": 0.6224808704701998, "learning_rate": 3.571940604198669e-05, "loss": 0.6457, "num_tokens": 58625187.0, "step": 628 }, { "epoch": 0.10735620413039768, "grad_norm": 0.5882332570612269, "learning_rate": 3.571257893838539e-05, "loss": 0.6809, "num_tokens": 58704207.0, "step": 629 }, { "epoch": 0.10752688172043011, "grad_norm": 0.5305310610331889, "learning_rate": 3.57057518347841e-05, "loss": 0.646, "num_tokens": 58800823.0, "step": 630 }, { "epoch": 0.10769755931046254, "grad_norm": 0.5463129173035042, "learning_rate": 3.56989247311828e-05, "loss": 0.6448, "num_tokens": 58900776.0, "step": 631 }, { "epoch": 0.10786823690049496, "grad_norm": 0.5325082747052348, "learning_rate": 3.56920976275815e-05, "loss": 0.6471, "num_tokens": 59013978.0, "step": 632 }, { "epoch": 0.1080389144905274, "grad_norm": 0.570206467126212, "learning_rate": 3.5685270523980205e-05, "loss": 0.7536, "num_tokens": 59118360.0, "step": 633 }, { "epoch": 0.10820959208055982, "grad_norm": 0.5281426220588592, "learning_rate": 3.5678443420378906e-05, "loss": 0.6556, "num_tokens": 59211982.0, "step": 634 }, { "epoch": 0.10838026967059225, "grad_norm": 0.5424375843540711, "learning_rate": 3.567161631677761e-05, "loss": 0.7324, "num_tokens": 59323498.0, "step": 635 }, { "epoch": 0.10855094726062468, "grad_norm": 0.5750254991434355, "learning_rate": 3.566478921317631e-05, "loss": 0.6894, "num_tokens": 59407340.0, "step": 636 }, { "epoch": 0.1087216248506571, "grad_norm": 0.5696354034917398, "learning_rate": 3.565796210957502e-05, "loss": 0.6585, "num_tokens": 59486116.0, "step": 637 }, { "epoch": 0.10889230244068954, "grad_norm": 0.6049668250651179, "learning_rate": 3.565113500597372e-05, "loss": 0.6957, "num_tokens": 59568759.0, "step": 638 }, { "epoch": 0.10906298003072197, "grad_norm": 0.5367882923292222, "learning_rate": 3.564430790237242e-05, "loss": 0.6147, "num_tokens": 59671512.0, "step": 639 }, { "epoch": 0.10923365762075439, "grad_norm": 0.5550579418220778, "learning_rate": 3.563748079877112e-05, "loss": 0.6829, "num_tokens": 59766116.0, "step": 640 }, { "epoch": 0.10940433521078682, "grad_norm": 0.5789495866225279, "learning_rate": 3.563065369516983e-05, "loss": 0.717, "num_tokens": 59860915.0, "step": 641 }, { "epoch": 0.10957501280081926, "grad_norm": 0.5728288884353353, "learning_rate": 3.562382659156853e-05, "loss": 0.6002, "num_tokens": 59938770.0, "step": 642 }, { "epoch": 0.10974569039085168, "grad_norm": 0.5531591242819155, "learning_rate": 3.561699948796723e-05, "loss": 0.5673, "num_tokens": 60030471.0, "step": 643 }, { "epoch": 0.10991636798088411, "grad_norm": 0.6408390111413331, "learning_rate": 3.561017238436594e-05, "loss": 0.6907, "num_tokens": 60108832.0, "step": 644 }, { "epoch": 0.11008704557091654, "grad_norm": 0.5664650239959266, "learning_rate": 3.560334528076464e-05, "loss": 0.6183, "num_tokens": 60200166.0, "step": 645 }, { "epoch": 0.11025772316094896, "grad_norm": 0.5832864925019746, "learning_rate": 3.5596518177163344e-05, "loss": 0.6968, "num_tokens": 60308933.0, "step": 646 }, { "epoch": 0.1104284007509814, "grad_norm": 0.5494724765265943, "learning_rate": 3.5589691073562045e-05, "loss": 0.7025, "num_tokens": 60426437.0, "step": 647 }, { "epoch": 0.11059907834101383, "grad_norm": 0.5427701104136853, "learning_rate": 3.5582863969960745e-05, "loss": 0.6467, "num_tokens": 60515961.0, "step": 648 }, { "epoch": 0.11076975593104625, "grad_norm": 0.5435782818124965, "learning_rate": 3.557603686635945e-05, "loss": 0.6633, "num_tokens": 60616322.0, "step": 649 }, { "epoch": 0.11094043352107869, "grad_norm": 0.547105039704954, "learning_rate": 3.556920976275815e-05, "loss": 0.6909, "num_tokens": 60725345.0, "step": 650 }, { "epoch": 0.1111111111111111, "grad_norm": 0.5356518086169897, "learning_rate": 3.556238265915686e-05, "loss": 0.5577, "num_tokens": 60808324.0, "step": 651 }, { "epoch": 0.11128178870114354, "grad_norm": 0.5252960388426867, "learning_rate": 3.555555555555555e-05, "loss": 0.6546, "num_tokens": 60919355.0, "step": 652 }, { "epoch": 0.11145246629117597, "grad_norm": 0.5508017842633086, "learning_rate": 3.554872845195426e-05, "loss": 0.6561, "num_tokens": 61016724.0, "step": 653 }, { "epoch": 0.11162314388120839, "grad_norm": 0.569740907002769, "learning_rate": 3.554190134835296e-05, "loss": 0.6289, "num_tokens": 61104022.0, "step": 654 }, { "epoch": 0.11179382147124083, "grad_norm": 0.556758332365874, "learning_rate": 3.553507424475167e-05, "loss": 0.6981, "num_tokens": 61228132.0, "step": 655 }, { "epoch": 0.11196449906127326, "grad_norm": 0.5109855853599103, "learning_rate": 3.552824714115037e-05, "loss": 0.6899, "num_tokens": 61355190.0, "step": 656 }, { "epoch": 0.11213517665130568, "grad_norm": 0.6196811965495391, "learning_rate": 3.5521420037549075e-05, "loss": 0.6481, "num_tokens": 61426473.0, "step": 657 }, { "epoch": 0.11230585424133811, "grad_norm": 0.5442808006458479, "learning_rate": 3.5514592933947776e-05, "loss": 0.6244, "num_tokens": 61514462.0, "step": 658 }, { "epoch": 0.11247653183137055, "grad_norm": 0.5684489205744652, "learning_rate": 3.5507765830346476e-05, "loss": 0.6995, "num_tokens": 61611963.0, "step": 659 }, { "epoch": 0.11264720942140297, "grad_norm": 0.512020249952361, "learning_rate": 3.550093872674518e-05, "loss": 0.614, "num_tokens": 61734085.0, "step": 660 }, { "epoch": 0.1128178870114354, "grad_norm": 0.565846878298372, "learning_rate": 3.5494111623143884e-05, "loss": 0.645, "num_tokens": 61824640.0, "step": 661 }, { "epoch": 0.11298856460146783, "grad_norm": 0.5557365717593293, "learning_rate": 3.548728451954259e-05, "loss": 0.7263, "num_tokens": 61948367.0, "step": 662 }, { "epoch": 0.11315924219150025, "grad_norm": 0.5596420160579111, "learning_rate": 3.548045741594129e-05, "loss": 0.6531, "num_tokens": 62043396.0, "step": 663 }, { "epoch": 0.11332991978153269, "grad_norm": 0.5434475155983988, "learning_rate": 3.547363031233999e-05, "loss": 0.6726, "num_tokens": 62154658.0, "step": 664 }, { "epoch": 0.1135005973715651, "grad_norm": 0.5914132570803373, "learning_rate": 3.546680320873869e-05, "loss": 0.5936, "num_tokens": 62224176.0, "step": 665 }, { "epoch": 0.11367127496159754, "grad_norm": 0.5616039117313804, "learning_rate": 3.54599761051374e-05, "loss": 0.7573, "num_tokens": 62339263.0, "step": 666 }, { "epoch": 0.11384195255162997, "grad_norm": 0.6084718007476989, "learning_rate": 3.54531490015361e-05, "loss": 0.655, "num_tokens": 62430588.0, "step": 667 }, { "epoch": 0.1140126301416624, "grad_norm": 0.5851313234960157, "learning_rate": 3.544632189793481e-05, "loss": 0.6904, "num_tokens": 62523970.0, "step": 668 }, { "epoch": 0.11418330773169483, "grad_norm": 0.5566229959844345, "learning_rate": 3.543949479433351e-05, "loss": 0.6185, "num_tokens": 62609796.0, "step": 669 }, { "epoch": 0.11435398532172726, "grad_norm": 0.637078533330038, "learning_rate": 3.543266769073221e-05, "loss": 0.7284, "num_tokens": 62700680.0, "step": 670 }, { "epoch": 0.11452466291175968, "grad_norm": 0.5962917615407175, "learning_rate": 3.5425840587130915e-05, "loss": 0.54, "num_tokens": 62768845.0, "step": 671 }, { "epoch": 0.11469534050179211, "grad_norm": 0.5153351563085283, "learning_rate": 3.5419013483529615e-05, "loss": 0.6321, "num_tokens": 62874436.0, "step": 672 }, { "epoch": 0.11486601809182455, "grad_norm": 0.5981474491872827, "learning_rate": 3.541218637992832e-05, "loss": 0.7183, "num_tokens": 62968898.0, "step": 673 }, { "epoch": 0.11503669568185697, "grad_norm": 0.6000383143515502, "learning_rate": 3.540535927632702e-05, "loss": 0.685, "num_tokens": 63066824.0, "step": 674 }, { "epoch": 0.1152073732718894, "grad_norm": 0.5485883936716764, "learning_rate": 3.539853217272572e-05, "loss": 0.5885, "num_tokens": 63158648.0, "step": 675 }, { "epoch": 0.11537805086192184, "grad_norm": 0.5848365966341512, "learning_rate": 3.539170506912443e-05, "loss": 0.6895, "num_tokens": 63250445.0, "step": 676 }, { "epoch": 0.11554872845195426, "grad_norm": 0.5340453223733347, "learning_rate": 3.538487796552313e-05, "loss": 0.6515, "num_tokens": 63350508.0, "step": 677 }, { "epoch": 0.11571940604198669, "grad_norm": 0.6548762006997945, "learning_rate": 3.537805086192183e-05, "loss": 0.6536, "num_tokens": 63424406.0, "step": 678 }, { "epoch": 0.11589008363201912, "grad_norm": 0.5963683280322891, "learning_rate": 3.537122375832053e-05, "loss": 0.6713, "num_tokens": 63518661.0, "step": 679 }, { "epoch": 0.11606076122205154, "grad_norm": 0.6097308802062946, "learning_rate": 3.536439665471924e-05, "loss": 0.756, "num_tokens": 63605994.0, "step": 680 }, { "epoch": 0.11623143881208398, "grad_norm": 0.5461210887813226, "learning_rate": 3.535756955111794e-05, "loss": 0.6066, "num_tokens": 63695613.0, "step": 681 }, { "epoch": 0.1164021164021164, "grad_norm": 0.562178535912725, "learning_rate": 3.5350742447516646e-05, "loss": 0.607, "num_tokens": 63794856.0, "step": 682 }, { "epoch": 0.11657279399214883, "grad_norm": 0.5564281434986919, "learning_rate": 3.5343915343915346e-05, "loss": 0.6629, "num_tokens": 63880747.0, "step": 683 }, { "epoch": 0.11674347158218126, "grad_norm": 0.5828767144844905, "learning_rate": 3.533708824031405e-05, "loss": 0.7848, "num_tokens": 63989013.0, "step": 684 }, { "epoch": 0.11691414917221368, "grad_norm": 0.5445410125586091, "learning_rate": 3.5330261136712754e-05, "loss": 0.7009, "num_tokens": 64091836.0, "step": 685 }, { "epoch": 0.11708482676224612, "grad_norm": 0.5544837034260781, "learning_rate": 3.5323434033111454e-05, "loss": 0.6395, "num_tokens": 64176998.0, "step": 686 }, { "epoch": 0.11725550435227855, "grad_norm": 0.589313043913957, "learning_rate": 3.531660692951016e-05, "loss": 0.6402, "num_tokens": 64257964.0, "step": 687 }, { "epoch": 0.11742618194231097, "grad_norm": 0.5211055606935268, "learning_rate": 3.530977982590886e-05, "loss": 0.6722, "num_tokens": 64380839.0, "step": 688 }, { "epoch": 0.1175968595323434, "grad_norm": 0.6140494933032988, "learning_rate": 3.530295272230756e-05, "loss": 0.751, "num_tokens": 64471371.0, "step": 689 }, { "epoch": 0.11776753712237584, "grad_norm": 0.5361264491268078, "learning_rate": 3.529612561870626e-05, "loss": 0.6478, "num_tokens": 64586076.0, "step": 690 }, { "epoch": 0.11793821471240826, "grad_norm": 0.6055415250454533, "learning_rate": 3.528929851510497e-05, "loss": 0.7217, "num_tokens": 64673598.0, "step": 691 }, { "epoch": 0.11810889230244069, "grad_norm": 0.5871158410748388, "learning_rate": 3.528247141150367e-05, "loss": 0.594, "num_tokens": 64746648.0, "step": 692 }, { "epoch": 0.11827956989247312, "grad_norm": 0.7097608469064496, "learning_rate": 3.527564430790238e-05, "loss": 0.7103, "num_tokens": 64848272.0, "step": 693 }, { "epoch": 0.11845024748250554, "grad_norm": 0.617877239023509, "learning_rate": 3.526881720430108e-05, "loss": 0.6739, "num_tokens": 64914062.0, "step": 694 }, { "epoch": 0.11862092507253798, "grad_norm": 0.5486207440829552, "learning_rate": 3.5261990100699785e-05, "loss": 0.6708, "num_tokens": 65008863.0, "step": 695 }, { "epoch": 0.11879160266257041, "grad_norm": 0.5343670034139644, "learning_rate": 3.5255162997098485e-05, "loss": 0.6499, "num_tokens": 65108628.0, "step": 696 }, { "epoch": 0.11896228025260283, "grad_norm": 0.6036679497099515, "learning_rate": 3.5248335893497185e-05, "loss": 0.6476, "num_tokens": 65182974.0, "step": 697 }, { "epoch": 0.11913295784263526, "grad_norm": 0.5652892928121769, "learning_rate": 3.524150878989589e-05, "loss": 0.6871, "num_tokens": 65275897.0, "step": 698 }, { "epoch": 0.11930363543266768, "grad_norm": 0.5533258245574573, "learning_rate": 3.523468168629459e-05, "loss": 0.6732, "num_tokens": 65383087.0, "step": 699 }, { "epoch": 0.11947431302270012, "grad_norm": 0.5127905131870493, "learning_rate": 3.52278545826933e-05, "loss": 0.6221, "num_tokens": 65495591.0, "step": 700 }, { "epoch": 0.11964499061273255, "grad_norm": 0.6579466175029122, "learning_rate": 3.5221027479091994e-05, "loss": 0.7038, "num_tokens": 65561850.0, "step": 701 }, { "epoch": 0.11981566820276497, "grad_norm": 0.5355251394301593, "learning_rate": 3.52142003754907e-05, "loss": 0.6298, "num_tokens": 65656789.0, "step": 702 }, { "epoch": 0.1199863457927974, "grad_norm": 0.5865426242938332, "learning_rate": 3.52073732718894e-05, "loss": 0.6783, "num_tokens": 65734510.0, "step": 703 }, { "epoch": 0.12015702338282984, "grad_norm": 0.5530618058332301, "learning_rate": 3.520054616828811e-05, "loss": 0.7228, "num_tokens": 65847469.0, "step": 704 }, { "epoch": 0.12032770097286226, "grad_norm": 0.5796062311041674, "learning_rate": 3.519371906468681e-05, "loss": 0.7232, "num_tokens": 65960147.0, "step": 705 }, { "epoch": 0.12049837856289469, "grad_norm": 0.6494905916710584, "learning_rate": 3.518689196108551e-05, "loss": 0.7221, "num_tokens": 66037222.0, "step": 706 }, { "epoch": 0.12066905615292713, "grad_norm": 0.5554472609733303, "learning_rate": 3.5180064857484216e-05, "loss": 0.6513, "num_tokens": 66152988.0, "step": 707 }, { "epoch": 0.12083973374295955, "grad_norm": 0.6163880793106599, "learning_rate": 3.5173237753882916e-05, "loss": 0.7339, "num_tokens": 66273209.0, "step": 708 }, { "epoch": 0.12101041133299198, "grad_norm": 0.5402528152200645, "learning_rate": 3.5166410650281624e-05, "loss": 0.7121, "num_tokens": 66381159.0, "step": 709 }, { "epoch": 0.12118108892302441, "grad_norm": 0.5780526857918897, "learning_rate": 3.5159583546680324e-05, "loss": 0.7175, "num_tokens": 66467766.0, "step": 710 }, { "epoch": 0.12135176651305683, "grad_norm": 0.5698608862680794, "learning_rate": 3.515275644307903e-05, "loss": 0.6803, "num_tokens": 66570668.0, "step": 711 }, { "epoch": 0.12152244410308927, "grad_norm": 0.5775972964319694, "learning_rate": 3.514592933947773e-05, "loss": 0.64, "num_tokens": 66657917.0, "step": 712 }, { "epoch": 0.12169312169312169, "grad_norm": 0.5669815284631368, "learning_rate": 3.513910223587643e-05, "loss": 0.7067, "num_tokens": 66772634.0, "step": 713 }, { "epoch": 0.12186379928315412, "grad_norm": 0.5952144004827717, "learning_rate": 3.513227513227513e-05, "loss": 0.6136, "num_tokens": 66847211.0, "step": 714 }, { "epoch": 0.12203447687318655, "grad_norm": 0.5344734300408679, "learning_rate": 3.512544802867384e-05, "loss": 0.6589, "num_tokens": 66949210.0, "step": 715 }, { "epoch": 0.12220515446321897, "grad_norm": 0.5763565265778556, "learning_rate": 3.511862092507254e-05, "loss": 0.7708, "num_tokens": 67049501.0, "step": 716 }, { "epoch": 0.12237583205325141, "grad_norm": 0.5728648055688432, "learning_rate": 3.511179382147124e-05, "loss": 0.6328, "num_tokens": 67136396.0, "step": 717 }, { "epoch": 0.12254650964328384, "grad_norm": 0.5875223769966985, "learning_rate": 3.510496671786995e-05, "loss": 0.6405, "num_tokens": 67231166.0, "step": 718 }, { "epoch": 0.12271718723331626, "grad_norm": 0.56243640761712, "learning_rate": 3.509813961426865e-05, "loss": 0.613, "num_tokens": 67306346.0, "step": 719 }, { "epoch": 0.1228878648233487, "grad_norm": 0.5749550090728258, "learning_rate": 3.5091312510667355e-05, "loss": 0.7, "num_tokens": 67414673.0, "step": 720 }, { "epoch": 0.12305854241338113, "grad_norm": 0.5838955395160954, "learning_rate": 3.5084485407066055e-05, "loss": 0.7189, "num_tokens": 67514267.0, "step": 721 }, { "epoch": 0.12322922000341355, "grad_norm": 0.6146460634155827, "learning_rate": 3.5077658303464756e-05, "loss": 0.7205, "num_tokens": 67620580.0, "step": 722 }, { "epoch": 0.12339989759344598, "grad_norm": 0.5366200106455599, "learning_rate": 3.507083119986346e-05, "loss": 0.6447, "num_tokens": 67714421.0, "step": 723 }, { "epoch": 0.12357057518347841, "grad_norm": 0.672742690916885, "learning_rate": 3.506400409626216e-05, "loss": 0.7684, "num_tokens": 67820977.0, "step": 724 }, { "epoch": 0.12374125277351083, "grad_norm": 0.5389895675043395, "learning_rate": 3.505717699266087e-05, "loss": 0.6622, "num_tokens": 67913285.0, "step": 725 }, { "epoch": 0.12391193036354327, "grad_norm": 0.5403535634584391, "learning_rate": 3.5050349889059564e-05, "loss": 0.6472, "num_tokens": 68012589.0, "step": 726 }, { "epoch": 0.1240826079535757, "grad_norm": 0.5314325130864882, "learning_rate": 3.504352278545827e-05, "loss": 0.5942, "num_tokens": 68106011.0, "step": 727 }, { "epoch": 0.12425328554360812, "grad_norm": 0.5061729440467431, "learning_rate": 3.503669568185697e-05, "loss": 0.5794, "num_tokens": 68203229.0, "step": 728 }, { "epoch": 0.12442396313364056, "grad_norm": 0.542021096599923, "learning_rate": 3.502986857825568e-05, "loss": 0.7294, "num_tokens": 68336419.0, "step": 729 }, { "epoch": 0.12459464072367298, "grad_norm": 0.5421642292628917, "learning_rate": 3.502304147465438e-05, "loss": 0.5978, "num_tokens": 68419797.0, "step": 730 }, { "epoch": 0.12476531831370541, "grad_norm": 0.8101015240994291, "learning_rate": 3.5016214371053086e-05, "loss": 0.7748, "num_tokens": 68509292.0, "step": 731 }, { "epoch": 0.12493599590373784, "grad_norm": 0.563472349438742, "learning_rate": 3.5009387267451786e-05, "loss": 0.624, "num_tokens": 68596728.0, "step": 732 }, { "epoch": 0.12510667349377028, "grad_norm": 0.6377601048791521, "learning_rate": 3.500256016385049e-05, "loss": 0.7113, "num_tokens": 68665695.0, "step": 733 }, { "epoch": 0.1252773510838027, "grad_norm": 0.507629656843143, "learning_rate": 3.4995733060249194e-05, "loss": 0.6667, "num_tokens": 68793323.0, "step": 734 }, { "epoch": 0.12544802867383512, "grad_norm": 0.586540038158281, "learning_rate": 3.4988905956647894e-05, "loss": 0.6848, "num_tokens": 68882375.0, "step": 735 }, { "epoch": 0.12561870626386756, "grad_norm": 0.5668186299499408, "learning_rate": 3.49820788530466e-05, "loss": 0.6058, "num_tokens": 68960664.0, "step": 736 }, { "epoch": 0.12578938385389998, "grad_norm": 0.6118298570756933, "learning_rate": 3.49752517494453e-05, "loss": 0.7433, "num_tokens": 69049374.0, "step": 737 }, { "epoch": 0.1259600614439324, "grad_norm": 0.6237656062498739, "learning_rate": 3.496842464584401e-05, "loss": 0.7339, "num_tokens": 69125588.0, "step": 738 }, { "epoch": 0.12613073903396485, "grad_norm": 0.625323638151093, "learning_rate": 3.49615975422427e-05, "loss": 0.6889, "num_tokens": 69199534.0, "step": 739 }, { "epoch": 0.12630141662399727, "grad_norm": 0.5637020598943938, "learning_rate": 3.495477043864141e-05, "loss": 0.613, "num_tokens": 69289122.0, "step": 740 }, { "epoch": 0.1264720942140297, "grad_norm": 0.6252921034857423, "learning_rate": 3.494794333504011e-05, "loss": 0.6571, "num_tokens": 69364679.0, "step": 741 }, { "epoch": 0.12664277180406214, "grad_norm": 0.5857076398829067, "learning_rate": 3.494111623143882e-05, "loss": 0.686, "num_tokens": 69470747.0, "step": 742 }, { "epoch": 0.12681344939409456, "grad_norm": 0.5618516312358169, "learning_rate": 3.493428912783752e-05, "loss": 0.7222, "num_tokens": 69585105.0, "step": 743 }, { "epoch": 0.12698412698412698, "grad_norm": 0.5494599159570872, "learning_rate": 3.492746202423622e-05, "loss": 0.6635, "num_tokens": 69680042.0, "step": 744 }, { "epoch": 0.12715480457415942, "grad_norm": 0.6521494860329216, "learning_rate": 3.4920634920634925e-05, "loss": 0.7177, "num_tokens": 69757519.0, "step": 745 }, { "epoch": 0.12732548216419184, "grad_norm": 0.5543884711444046, "learning_rate": 3.4913807817033626e-05, "loss": 0.6137, "num_tokens": 69849480.0, "step": 746 }, { "epoch": 0.12749615975422426, "grad_norm": 0.5841184316184238, "learning_rate": 3.490698071343233e-05, "loss": 0.7003, "num_tokens": 69947716.0, "step": 747 }, { "epoch": 0.1276668373442567, "grad_norm": 0.589610969252541, "learning_rate": 3.490015360983103e-05, "loss": 0.7601, "num_tokens": 70047569.0, "step": 748 }, { "epoch": 0.12783751493428913, "grad_norm": 0.5756092704200104, "learning_rate": 3.4893326506229733e-05, "loss": 0.6678, "num_tokens": 70138720.0, "step": 749 }, { "epoch": 0.12800819252432155, "grad_norm": 0.5936404714254403, "learning_rate": 3.488649940262844e-05, "loss": 0.7536, "num_tokens": 70235724.0, "step": 750 }, { "epoch": 0.128178870114354, "grad_norm": 0.5859397442508912, "learning_rate": 3.487967229902714e-05, "loss": 0.6052, "num_tokens": 70304151.0, "step": 751 }, { "epoch": 0.12834954770438642, "grad_norm": 0.5578055958186685, "learning_rate": 3.487284519542584e-05, "loss": 0.75, "num_tokens": 70408079.0, "step": 752 }, { "epoch": 0.12852022529441884, "grad_norm": 0.6116526629020942, "learning_rate": 3.486601809182454e-05, "loss": 0.6866, "num_tokens": 70491101.0, "step": 753 }, { "epoch": 0.12869090288445126, "grad_norm": 0.5786351625569407, "learning_rate": 3.485919098822325e-05, "loss": 0.6195, "num_tokens": 70566259.0, "step": 754 }, { "epoch": 0.1288615804744837, "grad_norm": 0.5682226533594387, "learning_rate": 3.485236388462195e-05, "loss": 0.7104, "num_tokens": 70684528.0, "step": 755 }, { "epoch": 0.12903225806451613, "grad_norm": 0.5151472636579965, "learning_rate": 3.4845536781020656e-05, "loss": 0.6093, "num_tokens": 70792835.0, "step": 756 }, { "epoch": 0.12920293565454855, "grad_norm": 0.5206369316928973, "learning_rate": 3.483870967741936e-05, "loss": 0.6388, "num_tokens": 70890168.0, "step": 757 }, { "epoch": 0.129373613244581, "grad_norm": 0.5288438452975153, "learning_rate": 3.4831882573818064e-05, "loss": 0.6096, "num_tokens": 70991953.0, "step": 758 }, { "epoch": 0.1295442908346134, "grad_norm": 0.5481352587915014, "learning_rate": 3.4825055470216764e-05, "loss": 0.6252, "num_tokens": 71080611.0, "step": 759 }, { "epoch": 0.12971496842464583, "grad_norm": 0.5168432193753926, "learning_rate": 3.4818228366615465e-05, "loss": 0.6383, "num_tokens": 71175942.0, "step": 760 }, { "epoch": 0.12988564601467828, "grad_norm": 0.5454879782623707, "learning_rate": 3.481140126301417e-05, "loss": 0.65, "num_tokens": 71266429.0, "step": 761 }, { "epoch": 0.1300563236047107, "grad_norm": 0.5115899853285042, "learning_rate": 3.480457415941287e-05, "loss": 0.6653, "num_tokens": 71385461.0, "step": 762 }, { "epoch": 0.13022700119474312, "grad_norm": 0.4881318719071649, "learning_rate": 3.479774705581157e-05, "loss": 0.6248, "num_tokens": 71495454.0, "step": 763 }, { "epoch": 0.13039767878477557, "grad_norm": 0.5972276050395449, "learning_rate": 3.479091995221027e-05, "loss": 0.5875, "num_tokens": 71576057.0, "step": 764 }, { "epoch": 0.130568356374808, "grad_norm": 0.6002882317192966, "learning_rate": 3.478409284860898e-05, "loss": 0.6583, "num_tokens": 71682346.0, "step": 765 }, { "epoch": 0.1307390339648404, "grad_norm": 0.6008191414622202, "learning_rate": 3.477726574500768e-05, "loss": 0.6007, "num_tokens": 71746013.0, "step": 766 }, { "epoch": 0.13090971155487285, "grad_norm": 0.5577644206648226, "learning_rate": 3.477043864140639e-05, "loss": 0.6554, "num_tokens": 71842149.0, "step": 767 }, { "epoch": 0.13108038914490527, "grad_norm": 0.5412804598382006, "learning_rate": 3.476361153780509e-05, "loss": 0.6106, "num_tokens": 71936603.0, "step": 768 }, { "epoch": 0.1312510667349377, "grad_norm": 0.5859571792002924, "learning_rate": 3.4756784434203795e-05, "loss": 0.6571, "num_tokens": 72031016.0, "step": 769 }, { "epoch": 0.13142174432497014, "grad_norm": 0.5291821636709566, "learning_rate": 3.4749957330602496e-05, "loss": 0.6756, "num_tokens": 72137968.0, "step": 770 }, { "epoch": 0.13159242191500256, "grad_norm": 0.6394837166161085, "learning_rate": 3.4743130227001196e-05, "loss": 0.7097, "num_tokens": 72210425.0, "step": 771 }, { "epoch": 0.13176309950503498, "grad_norm": 0.5113193186003069, "learning_rate": 3.47363031233999e-05, "loss": 0.7028, "num_tokens": 72337173.0, "step": 772 }, { "epoch": 0.13193377709506743, "grad_norm": 0.5007580188937161, "learning_rate": 3.4729476019798603e-05, "loss": 0.6865, "num_tokens": 72468609.0, "step": 773 }, { "epoch": 0.13210445468509985, "grad_norm": 0.551752277013151, "learning_rate": 3.472264891619731e-05, "loss": 0.7417, "num_tokens": 72583376.0, "step": 774 }, { "epoch": 0.13227513227513227, "grad_norm": 0.665338391117444, "learning_rate": 3.471582181259601e-05, "loss": 0.6669, "num_tokens": 72670603.0, "step": 775 }, { "epoch": 0.13244580986516472, "grad_norm": 0.5235473644075423, "learning_rate": 3.470899470899471e-05, "loss": 0.5748, "num_tokens": 72766288.0, "step": 776 }, { "epoch": 0.13261648745519714, "grad_norm": 0.5751657854044766, "learning_rate": 3.470216760539341e-05, "loss": 0.7027, "num_tokens": 72857506.0, "step": 777 }, { "epoch": 0.13278716504522955, "grad_norm": 0.5711890665392755, "learning_rate": 3.469534050179212e-05, "loss": 0.6223, "num_tokens": 72930470.0, "step": 778 }, { "epoch": 0.132957842635262, "grad_norm": 0.5912051950645791, "learning_rate": 3.468851339819082e-05, "loss": 0.6185, "num_tokens": 73015730.0, "step": 779 }, { "epoch": 0.13312852022529442, "grad_norm": 0.554912499365756, "learning_rate": 3.468168629458952e-05, "loss": 0.5899, "num_tokens": 73094330.0, "step": 780 }, { "epoch": 0.13329919781532684, "grad_norm": 0.6044784058877728, "learning_rate": 3.467485919098823e-05, "loss": 0.7074, "num_tokens": 73183653.0, "step": 781 }, { "epoch": 0.1334698754053593, "grad_norm": 0.5527665736175474, "learning_rate": 3.466803208738693e-05, "loss": 0.634, "num_tokens": 73296893.0, "step": 782 }, { "epoch": 0.1336405529953917, "grad_norm": 0.5453087465186685, "learning_rate": 3.4661204983785634e-05, "loss": 0.655, "num_tokens": 73394330.0, "step": 783 }, { "epoch": 0.13381123058542413, "grad_norm": 0.521488374016347, "learning_rate": 3.4654377880184335e-05, "loss": 0.6173, "num_tokens": 73498208.0, "step": 784 }, { "epoch": 0.13398190817545655, "grad_norm": 0.536150553918683, "learning_rate": 3.464755077658304e-05, "loss": 0.6168, "num_tokens": 73615196.0, "step": 785 }, { "epoch": 0.134152585765489, "grad_norm": 0.5626934903443068, "learning_rate": 3.464072367298174e-05, "loss": 0.6755, "num_tokens": 73698091.0, "step": 786 }, { "epoch": 0.13432326335552142, "grad_norm": 0.5619197317110837, "learning_rate": 3.463389656938044e-05, "loss": 0.6618, "num_tokens": 73786974.0, "step": 787 }, { "epoch": 0.13449394094555384, "grad_norm": 0.5832545919952337, "learning_rate": 3.462706946577914e-05, "loss": 0.696, "num_tokens": 73890501.0, "step": 788 }, { "epoch": 0.13466461853558628, "grad_norm": 0.5605809110775734, "learning_rate": 3.462024236217785e-05, "loss": 0.6822, "num_tokens": 73985951.0, "step": 789 }, { "epoch": 0.1348352961256187, "grad_norm": 0.5742836037689109, "learning_rate": 3.461341525857655e-05, "loss": 0.685, "num_tokens": 74071218.0, "step": 790 }, { "epoch": 0.13500597371565112, "grad_norm": 0.5535374871589267, "learning_rate": 3.460658815497525e-05, "loss": 0.6409, "num_tokens": 74165058.0, "step": 791 }, { "epoch": 0.13517665130568357, "grad_norm": 0.786375103297193, "learning_rate": 3.459976105137396e-05, "loss": 0.7335, "num_tokens": 74256953.0, "step": 792 }, { "epoch": 0.135347328895716, "grad_norm": 0.6497376955322123, "learning_rate": 3.459293394777266e-05, "loss": 0.7739, "num_tokens": 74362732.0, "step": 793 }, { "epoch": 0.1355180064857484, "grad_norm": 0.5903316681542662, "learning_rate": 3.4586106844171366e-05, "loss": 0.6672, "num_tokens": 74442391.0, "step": 794 }, { "epoch": 0.13568868407578086, "grad_norm": 0.5518166236051448, "learning_rate": 3.4579279740570066e-05, "loss": 0.6349, "num_tokens": 74544319.0, "step": 795 }, { "epoch": 0.13585936166581328, "grad_norm": 0.5286737623324989, "learning_rate": 3.4572452636968766e-05, "loss": 0.6443, "num_tokens": 74640019.0, "step": 796 }, { "epoch": 0.1360300392558457, "grad_norm": 0.5439447586337827, "learning_rate": 3.4565625533367473e-05, "loss": 0.5757, "num_tokens": 74715454.0, "step": 797 }, { "epoch": 0.13620071684587814, "grad_norm": 0.5229571661421332, "learning_rate": 3.4558798429766174e-05, "loss": 0.6223, "num_tokens": 74833448.0, "step": 798 }, { "epoch": 0.13637139443591056, "grad_norm": 0.6118548998313607, "learning_rate": 3.455197132616488e-05, "loss": 0.7364, "num_tokens": 74925608.0, "step": 799 }, { "epoch": 0.13654207202594298, "grad_norm": 0.5511322691072621, "learning_rate": 3.454514422256358e-05, "loss": 0.6369, "num_tokens": 75016990.0, "step": 800 }, { "epoch": 0.13671274961597543, "grad_norm": 0.6395241835029489, "learning_rate": 3.453831711896228e-05, "loss": 0.6844, "num_tokens": 75135001.0, "step": 801 }, { "epoch": 0.13688342720600785, "grad_norm": 0.5970974833662913, "learning_rate": 3.453149001536098e-05, "loss": 0.6814, "num_tokens": 75232395.0, "step": 802 }, { "epoch": 0.13705410479604027, "grad_norm": 0.5655053991514898, "learning_rate": 3.452466291175969e-05, "loss": 0.6301, "num_tokens": 75335921.0, "step": 803 }, { "epoch": 0.13722478238607272, "grad_norm": 0.5429561610612147, "learning_rate": 3.451783580815839e-05, "loss": 0.5757, "num_tokens": 75417992.0, "step": 804 }, { "epoch": 0.13739545997610514, "grad_norm": 0.5435944660585238, "learning_rate": 3.45110087045571e-05, "loss": 0.6158, "num_tokens": 75513624.0, "step": 805 }, { "epoch": 0.13756613756613756, "grad_norm": 0.6006257552284013, "learning_rate": 3.45041816009558e-05, "loss": 0.7562, "num_tokens": 75601479.0, "step": 806 }, { "epoch": 0.13773681515617, "grad_norm": 0.508677824412768, "learning_rate": 3.44973544973545e-05, "loss": 0.6309, "num_tokens": 75710409.0, "step": 807 }, { "epoch": 0.13790749274620243, "grad_norm": 0.6324385676279917, "learning_rate": 3.4490527393753205e-05, "loss": 0.6818, "num_tokens": 75789107.0, "step": 808 }, { "epoch": 0.13807817033623485, "grad_norm": 0.5307546403786317, "learning_rate": 3.4483700290151905e-05, "loss": 0.6585, "num_tokens": 75886992.0, "step": 809 }, { "epoch": 0.1382488479262673, "grad_norm": 0.5816164019257226, "learning_rate": 3.447687318655061e-05, "loss": 0.6589, "num_tokens": 75968412.0, "step": 810 }, { "epoch": 0.1384195255162997, "grad_norm": 0.5261459375267986, "learning_rate": 3.447004608294931e-05, "loss": 0.6534, "num_tokens": 76081735.0, "step": 811 }, { "epoch": 0.13859020310633213, "grad_norm": 0.6270712091047304, "learning_rate": 3.446321897934802e-05, "loss": 0.6313, "num_tokens": 76157256.0, "step": 812 }, { "epoch": 0.13876088069636458, "grad_norm": 0.6688330334406163, "learning_rate": 3.445639187574671e-05, "loss": 0.643, "num_tokens": 76255793.0, "step": 813 }, { "epoch": 0.138931558286397, "grad_norm": 0.6032989792870185, "learning_rate": 3.444956477214542e-05, "loss": 0.7044, "num_tokens": 76364978.0, "step": 814 }, { "epoch": 0.13910223587642942, "grad_norm": 0.5539320088220586, "learning_rate": 3.444273766854412e-05, "loss": 0.6889, "num_tokens": 76456007.0, "step": 815 }, { "epoch": 0.13927291346646187, "grad_norm": 0.5659243053274413, "learning_rate": 3.443591056494283e-05, "loss": 0.7047, "num_tokens": 76557543.0, "step": 816 }, { "epoch": 0.1394435910564943, "grad_norm": 0.5053253232661202, "learning_rate": 3.442908346134153e-05, "loss": 0.6555, "num_tokens": 76668233.0, "step": 817 }, { "epoch": 0.1396142686465267, "grad_norm": 0.4863387968322268, "learning_rate": 3.442225635774023e-05, "loss": 0.6214, "num_tokens": 76792295.0, "step": 818 }, { "epoch": 0.13978494623655913, "grad_norm": 0.5379595070099465, "learning_rate": 3.4415429254138936e-05, "loss": 0.609, "num_tokens": 76895764.0, "step": 819 }, { "epoch": 0.13995562382659157, "grad_norm": 0.575718187139048, "learning_rate": 3.4408602150537636e-05, "loss": 0.7021, "num_tokens": 77005295.0, "step": 820 }, { "epoch": 0.140126301416624, "grad_norm": 0.5489611891045184, "learning_rate": 3.4401775046936343e-05, "loss": 0.6776, "num_tokens": 77102339.0, "step": 821 }, { "epoch": 0.1402969790066564, "grad_norm": 0.5877043861200004, "learning_rate": 3.4394947943335044e-05, "loss": 0.7584, "num_tokens": 77205921.0, "step": 822 }, { "epoch": 0.14046765659668886, "grad_norm": 0.5832019635546548, "learning_rate": 3.4388120839733744e-05, "loss": 0.7181, "num_tokens": 77310363.0, "step": 823 }, { "epoch": 0.14063833418672128, "grad_norm": 0.6068096383875914, "learning_rate": 3.438129373613245e-05, "loss": 0.6248, "num_tokens": 77395536.0, "step": 824 }, { "epoch": 0.1408090117767537, "grad_norm": 0.57974612144161, "learning_rate": 3.437446663253115e-05, "loss": 0.6875, "num_tokens": 77492077.0, "step": 825 }, { "epoch": 0.14097968936678615, "grad_norm": 0.5245884440317214, "learning_rate": 3.436763952892985e-05, "loss": 0.6929, "num_tokens": 77602561.0, "step": 826 }, { "epoch": 0.14115036695681857, "grad_norm": 0.5358480100176388, "learning_rate": 3.436081242532855e-05, "loss": 0.6355, "num_tokens": 77690875.0, "step": 827 }, { "epoch": 0.141321044546851, "grad_norm": 0.6403265344529828, "learning_rate": 3.435398532172726e-05, "loss": 0.6713, "num_tokens": 77773084.0, "step": 828 }, { "epoch": 0.14149172213688344, "grad_norm": 0.567706182590931, "learning_rate": 3.434715821812596e-05, "loss": 0.6534, "num_tokens": 77860651.0, "step": 829 }, { "epoch": 0.14166239972691586, "grad_norm": 0.5641943417796886, "learning_rate": 3.434033111452467e-05, "loss": 0.6828, "num_tokens": 77970597.0, "step": 830 }, { "epoch": 0.14183307731694828, "grad_norm": 0.5222471859748632, "learning_rate": 3.433350401092337e-05, "loss": 0.6294, "num_tokens": 78065720.0, "step": 831 }, { "epoch": 0.14200375490698072, "grad_norm": 0.6909340103703439, "learning_rate": 3.4326676907322075e-05, "loss": 0.6444, "num_tokens": 78175834.0, "step": 832 }, { "epoch": 0.14217443249701314, "grad_norm": 0.5585964256520968, "learning_rate": 3.4319849803720775e-05, "loss": 0.6043, "num_tokens": 78270197.0, "step": 833 }, { "epoch": 0.14234511008704556, "grad_norm": 0.5638396544966598, "learning_rate": 3.4313022700119475e-05, "loss": 0.63, "num_tokens": 78351477.0, "step": 834 }, { "epoch": 0.142515787677078, "grad_norm": 0.5391729190316967, "learning_rate": 3.430619559651818e-05, "loss": 0.6291, "num_tokens": 78450853.0, "step": 835 }, { "epoch": 0.14268646526711043, "grad_norm": 0.5556800449408063, "learning_rate": 3.429936849291688e-05, "loss": 0.7382, "num_tokens": 78565133.0, "step": 836 }, { "epoch": 0.14285714285714285, "grad_norm": 0.6596159826600803, "learning_rate": 3.429254138931559e-05, "loss": 0.5799, "num_tokens": 78616681.0, "step": 837 }, { "epoch": 0.1430278204471753, "grad_norm": 0.675667039133319, "learning_rate": 3.4285714285714284e-05, "loss": 0.6883, "num_tokens": 78704537.0, "step": 838 }, { "epoch": 0.14319849803720772, "grad_norm": 0.5080138701337171, "learning_rate": 3.427888718211299e-05, "loss": 0.6402, "num_tokens": 78818860.0, "step": 839 }, { "epoch": 0.14336917562724014, "grad_norm": 0.5353719038325939, "learning_rate": 3.427206007851169e-05, "loss": 0.7461, "num_tokens": 78958394.0, "step": 840 }, { "epoch": 0.14353985321727258, "grad_norm": 0.5293009890712081, "learning_rate": 3.42652329749104e-05, "loss": 0.565, "num_tokens": 79045209.0, "step": 841 }, { "epoch": 0.143710530807305, "grad_norm": 0.6021835330874902, "learning_rate": 3.42584058713091e-05, "loss": 0.6585, "num_tokens": 79124440.0, "step": 842 }, { "epoch": 0.14388120839733742, "grad_norm": 0.5669519889442475, "learning_rate": 3.4251578767707806e-05, "loss": 0.6451, "num_tokens": 79216839.0, "step": 843 }, { "epoch": 0.14405188598736987, "grad_norm": 0.5492342432203641, "learning_rate": 3.4244751664106506e-05, "loss": 0.6523, "num_tokens": 79309475.0, "step": 844 }, { "epoch": 0.1442225635774023, "grad_norm": 0.548250402472044, "learning_rate": 3.423792456050521e-05, "loss": 0.6598, "num_tokens": 79401919.0, "step": 845 }, { "epoch": 0.1443932411674347, "grad_norm": 0.5714021943175022, "learning_rate": 3.4231097456903914e-05, "loss": 0.6676, "num_tokens": 79501645.0, "step": 846 }, { "epoch": 0.14456391875746716, "grad_norm": 0.5132700535269695, "learning_rate": 3.4224270353302614e-05, "loss": 0.6232, "num_tokens": 79597228.0, "step": 847 }, { "epoch": 0.14473459634749958, "grad_norm": 0.5333870837297789, "learning_rate": 3.421744324970132e-05, "loss": 0.6073, "num_tokens": 79687981.0, "step": 848 }, { "epoch": 0.144905273937532, "grad_norm": 0.533021694273642, "learning_rate": 3.421061614610002e-05, "loss": 0.6459, "num_tokens": 79797310.0, "step": 849 }, { "epoch": 0.14507595152756442, "grad_norm": 0.5391687470578578, "learning_rate": 3.420378904249872e-05, "loss": 0.7281, "num_tokens": 79905408.0, "step": 850 }, { "epoch": 0.14524662911759686, "grad_norm": 0.5678254899186858, "learning_rate": 3.419696193889742e-05, "loss": 0.6234, "num_tokens": 79981050.0, "step": 851 }, { "epoch": 0.14541730670762928, "grad_norm": 0.5390661289818485, "learning_rate": 3.419013483529613e-05, "loss": 0.7044, "num_tokens": 80074947.0, "step": 852 }, { "epoch": 0.1455879842976617, "grad_norm": 0.6133116211006021, "learning_rate": 3.418330773169483e-05, "loss": 0.6831, "num_tokens": 80162928.0, "step": 853 }, { "epoch": 0.14575866188769415, "grad_norm": 0.5965568366288224, "learning_rate": 3.417648062809353e-05, "loss": 0.6656, "num_tokens": 80249837.0, "step": 854 }, { "epoch": 0.14592933947772657, "grad_norm": 0.568753341765387, "learning_rate": 3.416965352449224e-05, "loss": 0.6465, "num_tokens": 80325714.0, "step": 855 }, { "epoch": 0.146100017067759, "grad_norm": 0.5928803176909075, "learning_rate": 3.416282642089094e-05, "loss": 0.6234, "num_tokens": 80406323.0, "step": 856 }, { "epoch": 0.14627069465779144, "grad_norm": 0.6824749566651956, "learning_rate": 3.4155999317289645e-05, "loss": 0.6935, "num_tokens": 80488217.0, "step": 857 }, { "epoch": 0.14644137224782386, "grad_norm": 0.529640773078604, "learning_rate": 3.4149172213688345e-05, "loss": 0.6257, "num_tokens": 80581710.0, "step": 858 }, { "epoch": 0.14661204983785628, "grad_norm": 0.5518700798906355, "learning_rate": 3.414234511008705e-05, "loss": 0.7169, "num_tokens": 80679433.0, "step": 859 }, { "epoch": 0.14678272742788873, "grad_norm": 0.632374717551825, "learning_rate": 3.413551800648575e-05, "loss": 0.7082, "num_tokens": 80754948.0, "step": 860 }, { "epoch": 0.14695340501792115, "grad_norm": 0.5295219006000464, "learning_rate": 3.412869090288445e-05, "loss": 0.6333, "num_tokens": 80863384.0, "step": 861 }, { "epoch": 0.14712408260795357, "grad_norm": 0.6249389252472077, "learning_rate": 3.4121863799283154e-05, "loss": 0.733, "num_tokens": 80941178.0, "step": 862 }, { "epoch": 0.147294760197986, "grad_norm": 0.5075852102986409, "learning_rate": 3.411503669568186e-05, "loss": 0.6142, "num_tokens": 81044253.0, "step": 863 }, { "epoch": 0.14746543778801843, "grad_norm": 0.5699480628911868, "learning_rate": 3.410820959208056e-05, "loss": 0.7635, "num_tokens": 81137905.0, "step": 864 }, { "epoch": 0.14763611537805085, "grad_norm": 0.5332469757878634, "learning_rate": 3.410138248847926e-05, "loss": 0.6804, "num_tokens": 81247257.0, "step": 865 }, { "epoch": 0.1478067929680833, "grad_norm": 0.571505320531762, "learning_rate": 3.409455538487797e-05, "loss": 0.6447, "num_tokens": 81324173.0, "step": 866 }, { "epoch": 0.14797747055811572, "grad_norm": 0.5365533755167058, "learning_rate": 3.408772828127667e-05, "loss": 0.711, "num_tokens": 81439400.0, "step": 867 }, { "epoch": 0.14814814814814814, "grad_norm": 0.5504323635661605, "learning_rate": 3.4080901177675376e-05, "loss": 0.7185, "num_tokens": 81551660.0, "step": 868 }, { "epoch": 0.1483188257381806, "grad_norm": 0.5684316127600232, "learning_rate": 3.4074074074074077e-05, "loss": 0.6817, "num_tokens": 81636139.0, "step": 869 }, { "epoch": 0.148489503328213, "grad_norm": 0.4839870733512009, "learning_rate": 3.4067246970472784e-05, "loss": 0.5708, "num_tokens": 81749314.0, "step": 870 }, { "epoch": 0.14866018091824543, "grad_norm": 0.5912521547957299, "learning_rate": 3.4060419866871484e-05, "loss": 0.6969, "num_tokens": 81831996.0, "step": 871 }, { "epoch": 0.14883085850827787, "grad_norm": 0.5610188521504091, "learning_rate": 3.4053592763270185e-05, "loss": 0.6546, "num_tokens": 81919024.0, "step": 872 }, { "epoch": 0.1490015360983103, "grad_norm": 0.6110290208548839, "learning_rate": 3.404676565966889e-05, "loss": 0.6678, "num_tokens": 81997921.0, "step": 873 }, { "epoch": 0.14917221368834271, "grad_norm": 0.5560626084124021, "learning_rate": 3.403993855606759e-05, "loss": 0.7044, "num_tokens": 82101933.0, "step": 874 }, { "epoch": 0.14934289127837516, "grad_norm": 0.5376945524080493, "learning_rate": 3.403311145246629e-05, "loss": 0.6731, "num_tokens": 82208881.0, "step": 875 }, { "epoch": 0.14951356886840758, "grad_norm": 0.578938916421303, "learning_rate": 3.402628434886499e-05, "loss": 0.6998, "num_tokens": 82302841.0, "step": 876 }, { "epoch": 0.14968424645844, "grad_norm": 0.5375517284684109, "learning_rate": 3.40194572452637e-05, "loss": 0.6529, "num_tokens": 82410544.0, "step": 877 }, { "epoch": 0.14985492404847245, "grad_norm": 0.5287233760971404, "learning_rate": 3.40126301416624e-05, "loss": 0.6033, "num_tokens": 82502850.0, "step": 878 }, { "epoch": 0.15002560163850487, "grad_norm": 0.5879944050158549, "learning_rate": 3.400580303806111e-05, "loss": 0.612, "num_tokens": 82571366.0, "step": 879 }, { "epoch": 0.1501962792285373, "grad_norm": 0.5903316935120404, "learning_rate": 3.399897593445981e-05, "loss": 0.6653, "num_tokens": 82654603.0, "step": 880 }, { "epoch": 0.1503669568185697, "grad_norm": 0.7520579633509681, "learning_rate": 3.399214883085851e-05, "loss": 0.6168, "num_tokens": 82743789.0, "step": 881 }, { "epoch": 0.15053763440860216, "grad_norm": 0.5577305120035513, "learning_rate": 3.3985321727257215e-05, "loss": 0.693, "num_tokens": 82828601.0, "step": 882 }, { "epoch": 0.15070831199863458, "grad_norm": 0.5253273734816027, "learning_rate": 3.3978494623655916e-05, "loss": 0.5735, "num_tokens": 82913110.0, "step": 883 }, { "epoch": 0.150878989588667, "grad_norm": 0.6063959061247644, "learning_rate": 3.397166752005462e-05, "loss": 0.6041, "num_tokens": 82988483.0, "step": 884 }, { "epoch": 0.15104966717869944, "grad_norm": 0.5813380468100351, "learning_rate": 3.396484041645332e-05, "loss": 0.633, "num_tokens": 83066310.0, "step": 885 }, { "epoch": 0.15122034476873186, "grad_norm": 0.7052725031928047, "learning_rate": 3.395801331285203e-05, "loss": 0.7641, "num_tokens": 83149633.0, "step": 886 }, { "epoch": 0.15139102235876428, "grad_norm": 0.5936942731748192, "learning_rate": 3.3951186209250724e-05, "loss": 0.6059, "num_tokens": 83218628.0, "step": 887 }, { "epoch": 0.15156169994879673, "grad_norm": 0.6308273661680083, "learning_rate": 3.394435910564943e-05, "loss": 0.6729, "num_tokens": 83299432.0, "step": 888 }, { "epoch": 0.15173237753882915, "grad_norm": 0.6326456256089366, "learning_rate": 3.393753200204813e-05, "loss": 0.7423, "num_tokens": 83378244.0, "step": 889 }, { "epoch": 0.15190305512886157, "grad_norm": 0.5144625408900313, "learning_rate": 3.393070489844684e-05, "loss": 0.5509, "num_tokens": 83477260.0, "step": 890 }, { "epoch": 0.15207373271889402, "grad_norm": 0.5452452963018138, "learning_rate": 3.392387779484554e-05, "loss": 0.6092, "num_tokens": 83574957.0, "step": 891 }, { "epoch": 0.15224441030892644, "grad_norm": 0.5276207114347228, "learning_rate": 3.391705069124424e-05, "loss": 0.604, "num_tokens": 83669714.0, "step": 892 }, { "epoch": 0.15241508789895886, "grad_norm": 0.5620109076926356, "learning_rate": 3.3910223587642947e-05, "loss": 0.7082, "num_tokens": 83778720.0, "step": 893 }, { "epoch": 0.1525857654889913, "grad_norm": 0.5386623429541259, "learning_rate": 3.390339648404165e-05, "loss": 0.5727, "num_tokens": 83868735.0, "step": 894 }, { "epoch": 0.15275644307902372, "grad_norm": 0.5579105060993788, "learning_rate": 3.3896569380440354e-05, "loss": 0.6937, "num_tokens": 83959529.0, "step": 895 }, { "epoch": 0.15292712066905614, "grad_norm": 0.5781161004041685, "learning_rate": 3.3889742276839054e-05, "loss": 0.6259, "num_tokens": 84061115.0, "step": 896 }, { "epoch": 0.1530977982590886, "grad_norm": 0.6995594579127282, "learning_rate": 3.3882915173237755e-05, "loss": 0.7324, "num_tokens": 84149028.0, "step": 897 }, { "epoch": 0.153268475849121, "grad_norm": 0.566523654505438, "learning_rate": 3.387608806963646e-05, "loss": 0.5986, "num_tokens": 84232104.0, "step": 898 }, { "epoch": 0.15343915343915343, "grad_norm": 0.5469873349230053, "learning_rate": 3.386926096603516e-05, "loss": 0.6071, "num_tokens": 84310852.0, "step": 899 }, { "epoch": 0.15360983102918588, "grad_norm": 0.5115799836946827, "learning_rate": 3.386243386243386e-05, "loss": 0.5759, "num_tokens": 84406035.0, "step": 900 }, { "epoch": 0.1537805086192183, "grad_norm": 0.5895049281351106, "learning_rate": 3.385560675883257e-05, "loss": 0.6369, "num_tokens": 84481023.0, "step": 901 }, { "epoch": 0.15395118620925072, "grad_norm": 0.5386692301014158, "learning_rate": 3.384877965523127e-05, "loss": 0.6636, "num_tokens": 84585993.0, "step": 902 }, { "epoch": 0.15412186379928317, "grad_norm": 0.5576610714590958, "learning_rate": 3.384195255162997e-05, "loss": 0.7089, "num_tokens": 84698808.0, "step": 903 }, { "epoch": 0.15429254138931559, "grad_norm": 0.5367721066434097, "learning_rate": 3.383512544802868e-05, "loss": 0.7303, "num_tokens": 84809730.0, "step": 904 }, { "epoch": 0.154463218979348, "grad_norm": 0.6094351136428775, "learning_rate": 3.382829834442738e-05, "loss": 0.7967, "num_tokens": 84912253.0, "step": 905 }, { "epoch": 0.15463389656938045, "grad_norm": 0.5150450037609449, "learning_rate": 3.3821471240826085e-05, "loss": 0.6675, "num_tokens": 85025209.0, "step": 906 }, { "epoch": 0.15480457415941287, "grad_norm": 0.5804230665517117, "learning_rate": 3.3814644137224786e-05, "loss": 0.6983, "num_tokens": 85128510.0, "step": 907 }, { "epoch": 0.1549752517494453, "grad_norm": 0.5894012750820004, "learning_rate": 3.3807817033623486e-05, "loss": 0.6956, "num_tokens": 85208648.0, "step": 908 }, { "epoch": 0.15514592933947774, "grad_norm": 0.5140369261794331, "learning_rate": 3.380098993002219e-05, "loss": 0.6213, "num_tokens": 85322037.0, "step": 909 }, { "epoch": 0.15531660692951016, "grad_norm": 0.5244381664227199, "learning_rate": 3.3794162826420894e-05, "loss": 0.7328, "num_tokens": 85440709.0, "step": 910 }, { "epoch": 0.15548728451954258, "grad_norm": 0.5412290336834022, "learning_rate": 3.37873357228196e-05, "loss": 0.6704, "num_tokens": 85535325.0, "step": 911 }, { "epoch": 0.15565796210957503, "grad_norm": 0.6453882029157079, "learning_rate": 3.3780508619218294e-05, "loss": 0.7361, "num_tokens": 85610257.0, "step": 912 }, { "epoch": 0.15582863969960745, "grad_norm": 0.6136194163551394, "learning_rate": 3.3773681515617e-05, "loss": 0.7455, "num_tokens": 85697387.0, "step": 913 }, { "epoch": 0.15599931728963987, "grad_norm": 0.5655453757271677, "learning_rate": 3.37668544120157e-05, "loss": 0.6505, "num_tokens": 85773056.0, "step": 914 }, { "epoch": 0.15616999487967229, "grad_norm": 0.522637440104471, "learning_rate": 3.376002730841441e-05, "loss": 0.6776, "num_tokens": 85883745.0, "step": 915 }, { "epoch": 0.15634067246970473, "grad_norm": 0.6053805701485918, "learning_rate": 3.375320020481311e-05, "loss": 0.6248, "num_tokens": 85954207.0, "step": 916 }, { "epoch": 0.15651135005973715, "grad_norm": 0.5119620412505483, "learning_rate": 3.3746373101211817e-05, "loss": 0.6101, "num_tokens": 86065915.0, "step": 917 }, { "epoch": 0.15668202764976957, "grad_norm": 0.5233423587345852, "learning_rate": 3.373954599761052e-05, "loss": 0.6848, "num_tokens": 86181134.0, "step": 918 }, { "epoch": 0.15685270523980202, "grad_norm": 0.5324111482438306, "learning_rate": 3.373271889400922e-05, "loss": 0.6852, "num_tokens": 86291990.0, "step": 919 }, { "epoch": 0.15702338282983444, "grad_norm": 0.5392927331046415, "learning_rate": 3.3725891790407924e-05, "loss": 0.6738, "num_tokens": 86397077.0, "step": 920 }, { "epoch": 0.15719406041986686, "grad_norm": 0.50717024982269, "learning_rate": 3.3719064686806625e-05, "loss": 0.6185, "num_tokens": 86490892.0, "step": 921 }, { "epoch": 0.1573647380098993, "grad_norm": 0.5014889517801839, "learning_rate": 3.371223758320533e-05, "loss": 0.6326, "num_tokens": 86595437.0, "step": 922 }, { "epoch": 0.15753541559993173, "grad_norm": 0.6118482390908685, "learning_rate": 3.370541047960403e-05, "loss": 0.6787, "num_tokens": 86679623.0, "step": 923 }, { "epoch": 0.15770609318996415, "grad_norm": 0.6101416634989698, "learning_rate": 3.369858337600273e-05, "loss": 0.6299, "num_tokens": 86745158.0, "step": 924 }, { "epoch": 0.1578767707799966, "grad_norm": 0.5401460441470145, "learning_rate": 3.369175627240143e-05, "loss": 0.6318, "num_tokens": 86841547.0, "step": 925 }, { "epoch": 0.15804744837002901, "grad_norm": 0.5725386950595799, "learning_rate": 3.368492916880014e-05, "loss": 0.6515, "num_tokens": 86923256.0, "step": 926 }, { "epoch": 0.15821812596006143, "grad_norm": 0.5385837980711494, "learning_rate": 3.367810206519884e-05, "loss": 0.6305, "num_tokens": 87015641.0, "step": 927 }, { "epoch": 0.15838880355009388, "grad_norm": 0.5662574601274443, "learning_rate": 3.367127496159754e-05, "loss": 0.711, "num_tokens": 87119060.0, "step": 928 }, { "epoch": 0.1585594811401263, "grad_norm": 0.5509696204996667, "learning_rate": 3.366444785799625e-05, "loss": 0.6108, "num_tokens": 87196661.0, "step": 929 }, { "epoch": 0.15873015873015872, "grad_norm": 0.4888470744093665, "learning_rate": 3.365762075439495e-05, "loss": 0.6653, "num_tokens": 87327572.0, "step": 930 }, { "epoch": 0.15890083632019117, "grad_norm": 0.5696327711183752, "learning_rate": 3.3650793650793656e-05, "loss": 0.6556, "num_tokens": 87403614.0, "step": 931 }, { "epoch": 0.1590715139102236, "grad_norm": 0.5018097701442006, "learning_rate": 3.3643966547192356e-05, "loss": 0.6776, "num_tokens": 87540156.0, "step": 932 }, { "epoch": 0.159242191500256, "grad_norm": 0.596709295696763, "learning_rate": 3.363713944359106e-05, "loss": 0.6506, "num_tokens": 87616234.0, "step": 933 }, { "epoch": 0.15941286909028846, "grad_norm": 0.537564012056407, "learning_rate": 3.3630312339989764e-05, "loss": 0.614, "num_tokens": 87710679.0, "step": 934 }, { "epoch": 0.15958354668032088, "grad_norm": 0.5247536754955109, "learning_rate": 3.3623485236388464e-05, "loss": 0.5977, "num_tokens": 87813388.0, "step": 935 }, { "epoch": 0.1597542242703533, "grad_norm": 0.5157772608378306, "learning_rate": 3.361665813278717e-05, "loss": 0.5988, "num_tokens": 87905931.0, "step": 936 }, { "epoch": 0.15992490186038574, "grad_norm": 0.6212252384968989, "learning_rate": 3.360983102918587e-05, "loss": 0.683, "num_tokens": 87980996.0, "step": 937 }, { "epoch": 0.16009557945041816, "grad_norm": 0.5684460242884022, "learning_rate": 3.360300392558457e-05, "loss": 0.6387, "num_tokens": 88068179.0, "step": 938 }, { "epoch": 0.16026625704045058, "grad_norm": 0.5906123563953137, "learning_rate": 3.359617682198327e-05, "loss": 0.7555, "num_tokens": 88156022.0, "step": 939 }, { "epoch": 0.16043693463048303, "grad_norm": 0.5711459515553676, "learning_rate": 3.358934971838198e-05, "loss": 0.6565, "num_tokens": 88251435.0, "step": 940 }, { "epoch": 0.16060761222051545, "grad_norm": 0.5087385774415003, "learning_rate": 3.358252261478068e-05, "loss": 0.6698, "num_tokens": 88368677.0, "step": 941 }, { "epoch": 0.16077828981054787, "grad_norm": 0.6005839238852286, "learning_rate": 3.357569551117939e-05, "loss": 0.6596, "num_tokens": 88460673.0, "step": 942 }, { "epoch": 0.16094896740058032, "grad_norm": 0.6132494820395977, "learning_rate": 3.356886840757809e-05, "loss": 0.7569, "num_tokens": 88560554.0, "step": 943 }, { "epoch": 0.16111964499061274, "grad_norm": 0.5855934545784317, "learning_rate": 3.3562041303976794e-05, "loss": 0.5969, "num_tokens": 88637620.0, "step": 944 }, { "epoch": 0.16129032258064516, "grad_norm": 0.5544399279290957, "learning_rate": 3.3555214200375495e-05, "loss": 0.6316, "num_tokens": 88723861.0, "step": 945 }, { "epoch": 0.16146100017067758, "grad_norm": 0.5618989884003969, "learning_rate": 3.3548387096774195e-05, "loss": 0.6688, "num_tokens": 88827754.0, "step": 946 }, { "epoch": 0.16163167776071002, "grad_norm": 0.5307284892972446, "learning_rate": 3.35415599931729e-05, "loss": 0.591, "num_tokens": 88925247.0, "step": 947 }, { "epoch": 0.16180235535074244, "grad_norm": 0.5438388180778976, "learning_rate": 3.35347328895716e-05, "loss": 0.6895, "num_tokens": 89037888.0, "step": 948 }, { "epoch": 0.16197303294077486, "grad_norm": 0.5042504159395739, "learning_rate": 3.35279057859703e-05, "loss": 0.5396, "num_tokens": 89136267.0, "step": 949 }, { "epoch": 0.1621437105308073, "grad_norm": 0.5560336613164998, "learning_rate": 3.3521078682369003e-05, "loss": 0.6597, "num_tokens": 89231922.0, "step": 950 }, { "epoch": 0.16231438812083973, "grad_norm": 0.6000062700459601, "learning_rate": 3.351425157876771e-05, "loss": 0.5726, "num_tokens": 89304872.0, "step": 951 }, { "epoch": 0.16248506571087215, "grad_norm": 0.5617838527534526, "learning_rate": 3.350742447516641e-05, "loss": 0.6477, "num_tokens": 89389995.0, "step": 952 }, { "epoch": 0.1626557433009046, "grad_norm": 0.5544101163738012, "learning_rate": 3.350059737156512e-05, "loss": 0.6598, "num_tokens": 89490503.0, "step": 953 }, { "epoch": 0.16282642089093702, "grad_norm": 0.5771115770679669, "learning_rate": 3.349377026796382e-05, "loss": 0.7467, "num_tokens": 89587181.0, "step": 954 }, { "epoch": 0.16299709848096944, "grad_norm": 0.5381769375950448, "learning_rate": 3.348694316436252e-05, "loss": 0.6717, "num_tokens": 89686524.0, "step": 955 }, { "epoch": 0.16316777607100189, "grad_norm": 0.5268824851396524, "learning_rate": 3.3480116060761226e-05, "loss": 0.6475, "num_tokens": 89796447.0, "step": 956 }, { "epoch": 0.1633384536610343, "grad_norm": 0.564657557146007, "learning_rate": 3.3473288957159926e-05, "loss": 0.6913, "num_tokens": 89908407.0, "step": 957 }, { "epoch": 0.16350913125106672, "grad_norm": 0.525067481978308, "learning_rate": 3.3466461853558634e-05, "loss": 0.6545, "num_tokens": 90007604.0, "step": 958 }, { "epoch": 0.16367980884109917, "grad_norm": 0.5520817391694264, "learning_rate": 3.3459634749957334e-05, "loss": 0.6492, "num_tokens": 90102323.0, "step": 959 }, { "epoch": 0.1638504864311316, "grad_norm": 0.5230336953839675, "learning_rate": 3.345280764635604e-05, "loss": 0.5668, "num_tokens": 90180654.0, "step": 960 }, { "epoch": 0.164021164021164, "grad_norm": 0.5679019776850219, "learning_rate": 3.3445980542754735e-05, "loss": 0.6084, "num_tokens": 90259316.0, "step": 961 }, { "epoch": 0.16419184161119646, "grad_norm": 0.6600682913525433, "learning_rate": 3.343915343915344e-05, "loss": 0.6441, "num_tokens": 90371191.0, "step": 962 }, { "epoch": 0.16436251920122888, "grad_norm": 0.5692665177939955, "learning_rate": 3.343232633555214e-05, "loss": 0.6638, "num_tokens": 90477961.0, "step": 963 }, { "epoch": 0.1645331967912613, "grad_norm": 0.5664405221890251, "learning_rate": 3.342549923195085e-05, "loss": 0.6386, "num_tokens": 90574624.0, "step": 964 }, { "epoch": 0.16470387438129375, "grad_norm": 0.5929796956519587, "learning_rate": 3.341867212834955e-05, "loss": 0.601, "num_tokens": 90643245.0, "step": 965 }, { "epoch": 0.16487455197132617, "grad_norm": 0.6080988494772495, "learning_rate": 3.341184502474825e-05, "loss": 0.7273, "num_tokens": 90729054.0, "step": 966 }, { "epoch": 0.1650452295613586, "grad_norm": 0.5602185318921884, "learning_rate": 3.340501792114696e-05, "loss": 0.6423, "num_tokens": 90813937.0, "step": 967 }, { "epoch": 0.16521590715139103, "grad_norm": 0.5080185115236072, "learning_rate": 3.339819081754566e-05, "loss": 0.6018, "num_tokens": 90917385.0, "step": 968 }, { "epoch": 0.16538658474142345, "grad_norm": 0.572149736504798, "learning_rate": 3.3391363713944365e-05, "loss": 0.6441, "num_tokens": 91016641.0, "step": 969 }, { "epoch": 0.16555726233145587, "grad_norm": 0.5271122635046286, "learning_rate": 3.3384536610343065e-05, "loss": 0.7383, "num_tokens": 91141154.0, "step": 970 }, { "epoch": 0.16572793992148832, "grad_norm": 0.5256873891421264, "learning_rate": 3.3377709506741766e-05, "loss": 0.7044, "num_tokens": 91254287.0, "step": 971 }, { "epoch": 0.16589861751152074, "grad_norm": 0.5087632236592299, "learning_rate": 3.337088240314047e-05, "loss": 0.686, "num_tokens": 91370648.0, "step": 972 }, { "epoch": 0.16606929510155316, "grad_norm": 0.5707176086482143, "learning_rate": 3.336405529953917e-05, "loss": 0.7016, "num_tokens": 91459373.0, "step": 973 }, { "epoch": 0.1662399726915856, "grad_norm": 0.5078473761790777, "learning_rate": 3.3357228195937873e-05, "loss": 0.5858, "num_tokens": 91555569.0, "step": 974 }, { "epoch": 0.16641065028161803, "grad_norm": 0.5115535191677693, "learning_rate": 3.335040109233658e-05, "loss": 0.5867, "num_tokens": 91660545.0, "step": 975 }, { "epoch": 0.16658132787165045, "grad_norm": 0.5398910125710993, "learning_rate": 3.334357398873528e-05, "loss": 0.7007, "num_tokens": 91757161.0, "step": 976 }, { "epoch": 0.16675200546168287, "grad_norm": 0.5628928232737316, "learning_rate": 3.333674688513398e-05, "loss": 0.6087, "num_tokens": 91834322.0, "step": 977 }, { "epoch": 0.16692268305171531, "grad_norm": 0.5640094547448221, "learning_rate": 3.332991978153269e-05, "loss": 0.638, "num_tokens": 91918212.0, "step": 978 }, { "epoch": 0.16709336064174773, "grad_norm": 0.5521705909359437, "learning_rate": 3.332309267793139e-05, "loss": 0.6765, "num_tokens": 92020926.0, "step": 979 }, { "epoch": 0.16726403823178015, "grad_norm": 0.5212643391856508, "learning_rate": 3.3316265574330096e-05, "loss": 0.581, "num_tokens": 92114569.0, "step": 980 }, { "epoch": 0.1674347158218126, "grad_norm": 0.5363505715396621, "learning_rate": 3.3309438470728796e-05, "loss": 0.6397, "num_tokens": 92204567.0, "step": 981 }, { "epoch": 0.16760539341184502, "grad_norm": 0.5109377152235322, "learning_rate": 3.33026113671275e-05, "loss": 0.6629, "num_tokens": 92319421.0, "step": 982 }, { "epoch": 0.16777607100187744, "grad_norm": 0.5689780830519742, "learning_rate": 3.3295784263526204e-05, "loss": 0.6073, "num_tokens": 92395771.0, "step": 983 }, { "epoch": 0.1679467485919099, "grad_norm": 0.48892928067357494, "learning_rate": 3.3288957159924904e-05, "loss": 0.6402, "num_tokens": 92502786.0, "step": 984 }, { "epoch": 0.1681174261819423, "grad_norm": 0.5144695534470568, "learning_rate": 3.328213005632361e-05, "loss": 0.6171, "num_tokens": 92593123.0, "step": 985 }, { "epoch": 0.16828810377197473, "grad_norm": 0.5335187557280862, "learning_rate": 3.3275302952722305e-05, "loss": 0.6093, "num_tokens": 92689308.0, "step": 986 }, { "epoch": 0.16845878136200718, "grad_norm": 0.49972809867810575, "learning_rate": 3.326847584912101e-05, "loss": 0.7224, "num_tokens": 92807877.0, "step": 987 }, { "epoch": 0.1686294589520396, "grad_norm": 0.5675878316167894, "learning_rate": 3.326164874551971e-05, "loss": 0.7169, "num_tokens": 92886274.0, "step": 988 }, { "epoch": 0.16880013654207202, "grad_norm": 0.6081032682890283, "learning_rate": 3.325482164191842e-05, "loss": 0.7695, "num_tokens": 92972551.0, "step": 989 }, { "epoch": 0.16897081413210446, "grad_norm": 0.6048355601539958, "learning_rate": 3.324799453831712e-05, "loss": 0.7242, "num_tokens": 93085625.0, "step": 990 }, { "epoch": 0.16914149172213688, "grad_norm": 0.5096053077435836, "learning_rate": 3.324116743471583e-05, "loss": 0.6634, "num_tokens": 93189829.0, "step": 991 }, { "epoch": 0.1693121693121693, "grad_norm": 0.5677482548327386, "learning_rate": 3.323434033111453e-05, "loss": 0.5687, "num_tokens": 93254334.0, "step": 992 }, { "epoch": 0.16948284690220175, "grad_norm": 0.5017507355452938, "learning_rate": 3.322751322751323e-05, "loss": 0.578, "num_tokens": 93352547.0, "step": 993 }, { "epoch": 0.16965352449223417, "grad_norm": 0.5330566889094631, "learning_rate": 3.3220686123911935e-05, "loss": 0.6496, "num_tokens": 93458295.0, "step": 994 }, { "epoch": 0.1698242020822666, "grad_norm": 0.5700787268990278, "learning_rate": 3.3213859020310636e-05, "loss": 0.775, "num_tokens": 93561313.0, "step": 995 }, { "epoch": 0.16999487967229904, "grad_norm": 0.4916298915541908, "learning_rate": 3.320703191670934e-05, "loss": 0.568, "num_tokens": 93661031.0, "step": 996 }, { "epoch": 0.17016555726233146, "grad_norm": 0.5705816218462098, "learning_rate": 3.320020481310804e-05, "loss": 0.661, "num_tokens": 93750979.0, "step": 997 }, { "epoch": 0.17033623485236388, "grad_norm": 0.5732547380092387, "learning_rate": 3.3193377709506743e-05, "loss": 0.614, "num_tokens": 93826929.0, "step": 998 }, { "epoch": 0.17050691244239632, "grad_norm": 0.5893235762576946, "learning_rate": 3.3186550605905444e-05, "loss": 0.6944, "num_tokens": 93920199.0, "step": 999 }, { "epoch": 0.17067759003242874, "grad_norm": 0.5076384084987617, "learning_rate": 3.317972350230415e-05, "loss": 0.7353, "num_tokens": 94041831.0, "step": 1000 }, { "epoch": 0.17084826762246116, "grad_norm": 0.5372763661025041, "learning_rate": 3.317289639870285e-05, "loss": 0.6186, "num_tokens": 94129732.0, "step": 1001 }, { "epoch": 0.1710189452124936, "grad_norm": 0.5884394712052557, "learning_rate": 3.316606929510155e-05, "loss": 0.6221, "num_tokens": 94201750.0, "step": 1002 }, { "epoch": 0.17118962280252603, "grad_norm": 0.5678552599723303, "learning_rate": 3.315924219150026e-05, "loss": 0.5957, "num_tokens": 94277552.0, "step": 1003 }, { "epoch": 0.17136030039255845, "grad_norm": 0.5315261466164247, "learning_rate": 3.315241508789896e-05, "loss": 0.6889, "num_tokens": 94376764.0, "step": 1004 }, { "epoch": 0.1715309779825909, "grad_norm": 0.5905329852165664, "learning_rate": 3.3145587984297666e-05, "loss": 0.6871, "num_tokens": 94455167.0, "step": 1005 }, { "epoch": 0.17170165557262332, "grad_norm": 0.5570139304350328, "learning_rate": 3.313876088069637e-05, "loss": 0.586, "num_tokens": 94542107.0, "step": 1006 }, { "epoch": 0.17187233316265574, "grad_norm": 0.6019041874096699, "learning_rate": 3.3131933777095074e-05, "loss": 0.6133, "num_tokens": 94617257.0, "step": 1007 }, { "epoch": 0.17204301075268819, "grad_norm": 0.5370897078121935, "learning_rate": 3.3125106673493774e-05, "loss": 0.6372, "num_tokens": 94719139.0, "step": 1008 }, { "epoch": 0.1722136883427206, "grad_norm": 0.5448237519917914, "learning_rate": 3.3118279569892475e-05, "loss": 0.6826, "num_tokens": 94814676.0, "step": 1009 }, { "epoch": 0.17238436593275303, "grad_norm": 0.5653097312568957, "learning_rate": 3.311145246629118e-05, "loss": 0.764, "num_tokens": 94911780.0, "step": 1010 }, { "epoch": 0.17255504352278545, "grad_norm": 0.5918963709788372, "learning_rate": 3.310462536268988e-05, "loss": 0.6819, "num_tokens": 95016750.0, "step": 1011 }, { "epoch": 0.1727257211128179, "grad_norm": 0.5512379188149057, "learning_rate": 3.309779825908858e-05, "loss": 0.6109, "num_tokens": 95095351.0, "step": 1012 }, { "epoch": 0.1728963987028503, "grad_norm": 0.5494245451851791, "learning_rate": 3.309097115548728e-05, "loss": 0.647, "num_tokens": 95178310.0, "step": 1013 }, { "epoch": 0.17306707629288273, "grad_norm": 0.5553821764129004, "learning_rate": 3.308414405188599e-05, "loss": 0.7341, "num_tokens": 95270267.0, "step": 1014 }, { "epoch": 0.17323775388291518, "grad_norm": 0.5423383397745963, "learning_rate": 3.307731694828469e-05, "loss": 0.6005, "num_tokens": 95355531.0, "step": 1015 }, { "epoch": 0.1734084314729476, "grad_norm": 0.5804827520020734, "learning_rate": 3.30704898446834e-05, "loss": 0.766, "num_tokens": 95470834.0, "step": 1016 }, { "epoch": 0.17357910906298002, "grad_norm": 0.5604912890454723, "learning_rate": 3.30636627410821e-05, "loss": 0.694, "num_tokens": 95594240.0, "step": 1017 }, { "epoch": 0.17374978665301247, "grad_norm": 0.5386607336879292, "learning_rate": 3.3056835637480805e-05, "loss": 0.6089, "num_tokens": 95689900.0, "step": 1018 }, { "epoch": 0.1739204642430449, "grad_norm": 0.5327327196205804, "learning_rate": 3.3050008533879506e-05, "loss": 0.5876, "num_tokens": 95783833.0, "step": 1019 }, { "epoch": 0.1740911418330773, "grad_norm": 0.5585758229537341, "learning_rate": 3.3043181430278206e-05, "loss": 0.6383, "num_tokens": 95884355.0, "step": 1020 }, { "epoch": 0.17426181942310975, "grad_norm": 0.5100032411701333, "learning_rate": 3.303635432667691e-05, "loss": 0.6435, "num_tokens": 96012522.0, "step": 1021 }, { "epoch": 0.17443249701314217, "grad_norm": 0.5079707830850448, "learning_rate": 3.3029527223075613e-05, "loss": 0.6638, "num_tokens": 96124094.0, "step": 1022 }, { "epoch": 0.1746031746031746, "grad_norm": 0.5014631304180031, "learning_rate": 3.3022700119474314e-05, "loss": 0.5505, "num_tokens": 96217511.0, "step": 1023 }, { "epoch": 0.17477385219320704, "grad_norm": 0.573539139444039, "learning_rate": 3.3015873015873014e-05, "loss": 0.7568, "num_tokens": 96333654.0, "step": 1024 }, { "epoch": 0.17494452978323946, "grad_norm": 0.6098169419382015, "learning_rate": 3.300904591227172e-05, "loss": 0.7474, "num_tokens": 96414352.0, "step": 1025 }, { "epoch": 0.17511520737327188, "grad_norm": 0.5385515482086622, "learning_rate": 3.300221880867042e-05, "loss": 0.7492, "num_tokens": 96528981.0, "step": 1026 }, { "epoch": 0.17528588496330433, "grad_norm": 0.48718731497330936, "learning_rate": 3.299539170506913e-05, "loss": 0.6593, "num_tokens": 96653422.0, "step": 1027 }, { "epoch": 0.17545656255333675, "grad_norm": 0.5494473496232791, "learning_rate": 3.298856460146783e-05, "loss": 0.6081, "num_tokens": 96736563.0, "step": 1028 }, { "epoch": 0.17562724014336917, "grad_norm": 0.5690493942851514, "learning_rate": 3.298173749786653e-05, "loss": 0.6718, "num_tokens": 96818360.0, "step": 1029 }, { "epoch": 0.17579791773340162, "grad_norm": 0.5023918120088063, "learning_rate": 3.297491039426524e-05, "loss": 0.6211, "num_tokens": 96926495.0, "step": 1030 }, { "epoch": 0.17596859532343403, "grad_norm": 0.5311648937282897, "learning_rate": 3.296808329066394e-05, "loss": 0.6109, "num_tokens": 97007574.0, "step": 1031 }, { "epoch": 0.17613927291346645, "grad_norm": 0.5564817837314057, "learning_rate": 3.2961256187062644e-05, "loss": 0.6542, "num_tokens": 97097027.0, "step": 1032 }, { "epoch": 0.1763099505034989, "grad_norm": 0.504286089368628, "learning_rate": 3.2954429083461345e-05, "loss": 0.5959, "num_tokens": 97195554.0, "step": 1033 }, { "epoch": 0.17648062809353132, "grad_norm": 0.5113749281139018, "learning_rate": 3.294760197986005e-05, "loss": 0.5622, "num_tokens": 97299934.0, "step": 1034 }, { "epoch": 0.17665130568356374, "grad_norm": 0.6556929834039832, "learning_rate": 3.294077487625875e-05, "loss": 0.7035, "num_tokens": 97390188.0, "step": 1035 }, { "epoch": 0.1768219832735962, "grad_norm": 0.4709716661530288, "learning_rate": 3.293394777265745e-05, "loss": 0.5602, "num_tokens": 97507186.0, "step": 1036 }, { "epoch": 0.1769926608636286, "grad_norm": 0.4732807271980688, "learning_rate": 3.292712066905615e-05, "loss": 0.6962, "num_tokens": 97647976.0, "step": 1037 }, { "epoch": 0.17716333845366103, "grad_norm": 0.4959150405341842, "learning_rate": 3.292029356545486e-05, "loss": 0.6196, "num_tokens": 97749518.0, "step": 1038 }, { "epoch": 0.17733401604369348, "grad_norm": 0.5184711549397046, "learning_rate": 3.291346646185356e-05, "loss": 0.6441, "num_tokens": 97849984.0, "step": 1039 }, { "epoch": 0.1775046936337259, "grad_norm": 0.5545388565474023, "learning_rate": 3.290663935825226e-05, "loss": 0.6686, "num_tokens": 97939752.0, "step": 1040 }, { "epoch": 0.17767537122375832, "grad_norm": 0.5404986268524806, "learning_rate": 3.289981225465097e-05, "loss": 0.7049, "num_tokens": 98039597.0, "step": 1041 }, { "epoch": 0.17784604881379074, "grad_norm": 0.5942106861764868, "learning_rate": 3.289298515104967e-05, "loss": 0.7959, "num_tokens": 98143825.0, "step": 1042 }, { "epoch": 0.17801672640382318, "grad_norm": 0.5218674721373961, "learning_rate": 3.2886158047448375e-05, "loss": 0.6272, "num_tokens": 98229972.0, "step": 1043 }, { "epoch": 0.1781874039938556, "grad_norm": 0.5083313347998115, "learning_rate": 3.2879330943847076e-05, "loss": 0.6193, "num_tokens": 98325135.0, "step": 1044 }, { "epoch": 0.17835808158388802, "grad_norm": 0.5322947242586562, "learning_rate": 3.287250384024578e-05, "loss": 0.6541, "num_tokens": 98419693.0, "step": 1045 }, { "epoch": 0.17852875917392047, "grad_norm": 0.5114651870932921, "learning_rate": 3.2865676736644483e-05, "loss": 0.6395, "num_tokens": 98530164.0, "step": 1046 }, { "epoch": 0.1786994367639529, "grad_norm": 0.5250944059131747, "learning_rate": 3.2858849633043184e-05, "loss": 0.6687, "num_tokens": 98623677.0, "step": 1047 }, { "epoch": 0.1788701143539853, "grad_norm": 0.5549065838463765, "learning_rate": 3.2852022529441884e-05, "loss": 0.6395, "num_tokens": 98712074.0, "step": 1048 }, { "epoch": 0.17904079194401776, "grad_norm": 0.49051174338270936, "learning_rate": 3.284519542584059e-05, "loss": 0.6365, "num_tokens": 98828421.0, "step": 1049 }, { "epoch": 0.17921146953405018, "grad_norm": 0.5544320064390491, "learning_rate": 3.283836832223929e-05, "loss": 0.7162, "num_tokens": 98939646.0, "step": 1050 }, { "epoch": 0.1793821471240826, "grad_norm": 0.5112416115462997, "learning_rate": 3.283154121863799e-05, "loss": 0.6498, "num_tokens": 99047578.0, "step": 1051 }, { "epoch": 0.17955282471411504, "grad_norm": 0.5138905369972275, "learning_rate": 3.28247141150367e-05, "loss": 0.6544, "num_tokens": 99153311.0, "step": 1052 }, { "epoch": 0.17972350230414746, "grad_norm": 0.4971419593723526, "learning_rate": 3.28178870114354e-05, "loss": 0.5947, "num_tokens": 99251817.0, "step": 1053 }, { "epoch": 0.17989417989417988, "grad_norm": 0.8454443649508292, "learning_rate": 3.281105990783411e-05, "loss": 0.6998, "num_tokens": 99354051.0, "step": 1054 }, { "epoch": 0.18006485748421233, "grad_norm": 0.5485911571468621, "learning_rate": 3.280423280423281e-05, "loss": 0.5816, "num_tokens": 99424473.0, "step": 1055 }, { "epoch": 0.18023553507424475, "grad_norm": 0.5607732993723046, "learning_rate": 3.279740570063151e-05, "loss": 0.5992, "num_tokens": 99502483.0, "step": 1056 }, { "epoch": 0.18040621266427717, "grad_norm": 0.542588942197577, "learning_rate": 3.2790578597030215e-05, "loss": 0.6663, "num_tokens": 99592999.0, "step": 1057 }, { "epoch": 0.18057689025430962, "grad_norm": 0.5326090429984599, "learning_rate": 3.2783751493428915e-05, "loss": 0.6086, "num_tokens": 99684311.0, "step": 1058 }, { "epoch": 0.18074756784434204, "grad_norm": 0.5466597851546151, "learning_rate": 3.277692438982762e-05, "loss": 0.6688, "num_tokens": 99780203.0, "step": 1059 }, { "epoch": 0.18091824543437446, "grad_norm": 0.5263014501764363, "learning_rate": 3.277009728622632e-05, "loss": 0.6587, "num_tokens": 99881625.0, "step": 1060 }, { "epoch": 0.1810889230244069, "grad_norm": 0.5100129755144327, "learning_rate": 3.276327018262502e-05, "loss": 0.7083, "num_tokens": 99994639.0, "step": 1061 }, { "epoch": 0.18125960061443933, "grad_norm": 0.559290417245474, "learning_rate": 3.275644307902372e-05, "loss": 0.6071, "num_tokens": 100074500.0, "step": 1062 }, { "epoch": 0.18143027820447175, "grad_norm": 0.5568330258691305, "learning_rate": 3.274961597542243e-05, "loss": 0.6638, "num_tokens": 100166121.0, "step": 1063 }, { "epoch": 0.1816009557945042, "grad_norm": 0.4916206257729899, "learning_rate": 3.274278887182113e-05, "loss": 0.5968, "num_tokens": 100269068.0, "step": 1064 }, { "epoch": 0.1817716333845366, "grad_norm": 0.5640768881512479, "learning_rate": 3.273596176821984e-05, "loss": 0.6893, "num_tokens": 100368625.0, "step": 1065 }, { "epoch": 0.18194231097456903, "grad_norm": 0.5054984045064248, "learning_rate": 3.272913466461854e-05, "loss": 0.5573, "num_tokens": 100457850.0, "step": 1066 }, { "epoch": 0.18211298856460148, "grad_norm": 0.5339726695306779, "learning_rate": 3.272230756101724e-05, "loss": 0.6282, "num_tokens": 100543187.0, "step": 1067 }, { "epoch": 0.1822836661546339, "grad_norm": 0.5526661016089206, "learning_rate": 3.2715480457415946e-05, "loss": 0.7, "num_tokens": 100638844.0, "step": 1068 }, { "epoch": 0.18245434374466632, "grad_norm": 0.5817235797564136, "learning_rate": 3.2708653353814646e-05, "loss": 0.759, "num_tokens": 100728471.0, "step": 1069 }, { "epoch": 0.18262502133469877, "grad_norm": 0.5082540313932651, "learning_rate": 3.270182625021335e-05, "loss": 0.5713, "num_tokens": 100825921.0, "step": 1070 }, { "epoch": 0.1827956989247312, "grad_norm": 0.5351922159364358, "learning_rate": 3.2694999146612054e-05, "loss": 0.5924, "num_tokens": 100910293.0, "step": 1071 }, { "epoch": 0.1829663765147636, "grad_norm": 0.5630524388980729, "learning_rate": 3.2688172043010754e-05, "loss": 0.6728, "num_tokens": 101010646.0, "step": 1072 }, { "epoch": 0.18313705410479603, "grad_norm": 0.5481652878447458, "learning_rate": 3.2681344939409454e-05, "loss": 0.6167, "num_tokens": 101110030.0, "step": 1073 }, { "epoch": 0.18330773169482847, "grad_norm": 0.5595702418640467, "learning_rate": 3.267451783580816e-05, "loss": 0.6012, "num_tokens": 101193384.0, "step": 1074 }, { "epoch": 0.1834784092848609, "grad_norm": 0.48923386837257415, "learning_rate": 3.266769073220686e-05, "loss": 0.6201, "num_tokens": 101297583.0, "step": 1075 }, { "epoch": 0.1836490868748933, "grad_norm": 0.5643403237782005, "learning_rate": 3.266086362860557e-05, "loss": 0.6301, "num_tokens": 101385692.0, "step": 1076 }, { "epoch": 0.18381976446492576, "grad_norm": 0.5713528989538723, "learning_rate": 3.265403652500427e-05, "loss": 0.583, "num_tokens": 101465736.0, "step": 1077 }, { "epoch": 0.18399044205495818, "grad_norm": 0.582717857443111, "learning_rate": 3.264720942140297e-05, "loss": 0.6838, "num_tokens": 101562087.0, "step": 1078 }, { "epoch": 0.1841611196449906, "grad_norm": 0.6111038840317133, "learning_rate": 3.264038231780168e-05, "loss": 0.763, "num_tokens": 101640927.0, "step": 1079 }, { "epoch": 0.18433179723502305, "grad_norm": 0.6366982716146388, "learning_rate": 3.263355521420038e-05, "loss": 0.6473, "num_tokens": 101716415.0, "step": 1080 }, { "epoch": 0.18450247482505547, "grad_norm": 0.5608760171765451, "learning_rate": 3.2626728110599085e-05, "loss": 0.7037, "num_tokens": 101812887.0, "step": 1081 }, { "epoch": 0.1846731524150879, "grad_norm": 0.5198839245260992, "learning_rate": 3.2619901006997785e-05, "loss": 0.5937, "num_tokens": 101908704.0, "step": 1082 }, { "epoch": 0.18484383000512034, "grad_norm": 0.5601209048308777, "learning_rate": 3.2613073903396485e-05, "loss": 0.6666, "num_tokens": 101989305.0, "step": 1083 }, { "epoch": 0.18501450759515276, "grad_norm": 0.6055982527404735, "learning_rate": 3.260624679979519e-05, "loss": 0.803, "num_tokens": 102069943.0, "step": 1084 }, { "epoch": 0.18518518518518517, "grad_norm": 0.5107881762218199, "learning_rate": 3.259941969619389e-05, "loss": 0.6232, "num_tokens": 102177168.0, "step": 1085 }, { "epoch": 0.18535586277521762, "grad_norm": 0.4915864234454951, "learning_rate": 3.259259259259259e-05, "loss": 0.6661, "num_tokens": 102291163.0, "step": 1086 }, { "epoch": 0.18552654036525004, "grad_norm": 0.5122698837114523, "learning_rate": 3.2585765488991294e-05, "loss": 0.6154, "num_tokens": 102391844.0, "step": 1087 }, { "epoch": 0.18569721795528246, "grad_norm": 0.6201310717087297, "learning_rate": 3.257893838539e-05, "loss": 0.6881, "num_tokens": 102458025.0, "step": 1088 }, { "epoch": 0.1858678955453149, "grad_norm": 0.5870012509393363, "learning_rate": 3.25721112817887e-05, "loss": 0.592, "num_tokens": 102521212.0, "step": 1089 }, { "epoch": 0.18603857313534733, "grad_norm": 0.5706288198451396, "learning_rate": 3.256528417818741e-05, "loss": 0.7398, "num_tokens": 102627007.0, "step": 1090 }, { "epoch": 0.18620925072537975, "grad_norm": 0.6452831352495688, "learning_rate": 3.255845707458611e-05, "loss": 0.664, "num_tokens": 102683494.0, "step": 1091 }, { "epoch": 0.1863799283154122, "grad_norm": 0.5851078464776941, "learning_rate": 3.2551629970984816e-05, "loss": 0.665, "num_tokens": 102796964.0, "step": 1092 }, { "epoch": 0.18655060590544462, "grad_norm": 0.5185275827759959, "learning_rate": 3.2544802867383516e-05, "loss": 0.6472, "num_tokens": 102894422.0, "step": 1093 }, { "epoch": 0.18672128349547704, "grad_norm": 0.5545725065968523, "learning_rate": 3.2537975763782217e-05, "loss": 0.6604, "num_tokens": 102980586.0, "step": 1094 }, { "epoch": 0.18689196108550948, "grad_norm": 0.5554932364177928, "learning_rate": 3.2531148660180924e-05, "loss": 0.6521, "num_tokens": 103066341.0, "step": 1095 }, { "epoch": 0.1870626386755419, "grad_norm": 0.6262049191929879, "learning_rate": 3.2524321556579624e-05, "loss": 0.719, "num_tokens": 103151568.0, "step": 1096 }, { "epoch": 0.18723331626557432, "grad_norm": 0.5370813994031527, "learning_rate": 3.251749445297833e-05, "loss": 0.6343, "num_tokens": 103234610.0, "step": 1097 }, { "epoch": 0.18740399385560677, "grad_norm": 0.5026264533014893, "learning_rate": 3.2510667349377025e-05, "loss": 0.5625, "num_tokens": 103330483.0, "step": 1098 }, { "epoch": 0.1875746714456392, "grad_norm": 0.5370105848611278, "learning_rate": 3.250384024577573e-05, "loss": 0.6659, "num_tokens": 103427811.0, "step": 1099 }, { "epoch": 0.1877453490356716, "grad_norm": 0.5474789394609674, "learning_rate": 3.249701314217443e-05, "loss": 0.6211, "num_tokens": 103513579.0, "step": 1100 }, { "epoch": 0.18791602662570406, "grad_norm": 0.5729257506000883, "learning_rate": 3.249018603857314e-05, "loss": 0.711, "num_tokens": 103596537.0, "step": 1101 }, { "epoch": 0.18808670421573648, "grad_norm": 0.5576936814737763, "learning_rate": 3.248335893497184e-05, "loss": 0.7033, "num_tokens": 103692831.0, "step": 1102 }, { "epoch": 0.1882573818057689, "grad_norm": 0.5911318339809046, "learning_rate": 3.247653183137054e-05, "loss": 0.7309, "num_tokens": 103776767.0, "step": 1103 }, { "epoch": 0.18842805939580134, "grad_norm": 0.5278330098262008, "learning_rate": 3.246970472776925e-05, "loss": 0.6419, "num_tokens": 103865563.0, "step": 1104 }, { "epoch": 0.18859873698583376, "grad_norm": 0.5656291534696276, "learning_rate": 3.246287762416795e-05, "loss": 0.68, "num_tokens": 103947893.0, "step": 1105 }, { "epoch": 0.18876941457586618, "grad_norm": 0.571301167092783, "learning_rate": 3.2456050520566655e-05, "loss": 0.6352, "num_tokens": 104031836.0, "step": 1106 }, { "epoch": 0.1889400921658986, "grad_norm": 0.5869834634857306, "learning_rate": 3.2449223416965355e-05, "loss": 0.7461, "num_tokens": 104131353.0, "step": 1107 }, { "epoch": 0.18911076975593105, "grad_norm": 0.5568604812570788, "learning_rate": 3.244239631336406e-05, "loss": 0.5101, "num_tokens": 104194395.0, "step": 1108 }, { "epoch": 0.18928144734596347, "grad_norm": 0.5488974276307095, "learning_rate": 3.243556920976276e-05, "loss": 0.5461, "num_tokens": 104266218.0, "step": 1109 }, { "epoch": 0.1894521249359959, "grad_norm": 0.5911375918844386, "learning_rate": 3.242874210616146e-05, "loss": 0.7039, "num_tokens": 104359938.0, "step": 1110 }, { "epoch": 0.18962280252602834, "grad_norm": 0.586628923600477, "learning_rate": 3.2421915002560164e-05, "loss": 0.7038, "num_tokens": 104447477.0, "step": 1111 }, { "epoch": 0.18979348011606076, "grad_norm": 0.49682435622433574, "learning_rate": 3.241508789895887e-05, "loss": 0.6111, "num_tokens": 104547191.0, "step": 1112 }, { "epoch": 0.18996415770609318, "grad_norm": 0.5631442395035774, "learning_rate": 3.240826079535757e-05, "loss": 0.6986, "num_tokens": 104655299.0, "step": 1113 }, { "epoch": 0.19013483529612563, "grad_norm": 0.5498842309099883, "learning_rate": 3.240143369175627e-05, "loss": 0.6319, "num_tokens": 104737789.0, "step": 1114 }, { "epoch": 0.19030551288615805, "grad_norm": 0.5479643191141993, "learning_rate": 3.239460658815498e-05, "loss": 0.6228, "num_tokens": 104828410.0, "step": 1115 }, { "epoch": 0.19047619047619047, "grad_norm": 0.48133777335755307, "learning_rate": 3.238777948455368e-05, "loss": 0.6286, "num_tokens": 104944506.0, "step": 1116 }, { "epoch": 0.1906468680662229, "grad_norm": 0.5597883222557116, "learning_rate": 3.2380952380952386e-05, "loss": 0.7684, "num_tokens": 105058534.0, "step": 1117 }, { "epoch": 0.19081754565625533, "grad_norm": 0.5345610141127837, "learning_rate": 3.2374125277351087e-05, "loss": 0.7473, "num_tokens": 105165763.0, "step": 1118 }, { "epoch": 0.19098822324628775, "grad_norm": 0.6101094637750812, "learning_rate": 3.2367298173749794e-05, "loss": 0.6369, "num_tokens": 105239622.0, "step": 1119 }, { "epoch": 0.1911589008363202, "grad_norm": 0.541343466939214, "learning_rate": 3.2360471070148494e-05, "loss": 0.6896, "num_tokens": 105344262.0, "step": 1120 }, { "epoch": 0.19132957842635262, "grad_norm": 0.5039821701295021, "learning_rate": 3.2353643966547194e-05, "loss": 0.6215, "num_tokens": 105456847.0, "step": 1121 }, { "epoch": 0.19150025601638504, "grad_norm": 0.5021140546384327, "learning_rate": 3.23468168629459e-05, "loss": 0.5711, "num_tokens": 105552030.0, "step": 1122 }, { "epoch": 0.1916709336064175, "grad_norm": 0.5164271283580785, "learning_rate": 3.23399897593446e-05, "loss": 0.5541, "num_tokens": 105638556.0, "step": 1123 }, { "epoch": 0.1918416111964499, "grad_norm": 0.548664168761183, "learning_rate": 3.23331626557433e-05, "loss": 0.6119, "num_tokens": 105714405.0, "step": 1124 }, { "epoch": 0.19201228878648233, "grad_norm": 0.513142264188867, "learning_rate": 3.2326335552142e-05, "loss": 0.6251, "num_tokens": 105815274.0, "step": 1125 }, { "epoch": 0.19218296637651477, "grad_norm": 0.5681297877892892, "learning_rate": 3.231950844854071e-05, "loss": 0.6082, "num_tokens": 105887672.0, "step": 1126 }, { "epoch": 0.1923536439665472, "grad_norm": 0.5053382579900002, "learning_rate": 3.231268134493941e-05, "loss": 0.6346, "num_tokens": 105986033.0, "step": 1127 }, { "epoch": 0.19252432155657961, "grad_norm": 0.5772468086037074, "learning_rate": 3.230585424133812e-05, "loss": 0.5979, "num_tokens": 106051766.0, "step": 1128 }, { "epoch": 0.19269499914661206, "grad_norm": 0.5122724235561483, "learning_rate": 3.229902713773682e-05, "loss": 0.6438, "num_tokens": 106158971.0, "step": 1129 }, { "epoch": 0.19286567673664448, "grad_norm": 0.5085038865401955, "learning_rate": 3.229220003413552e-05, "loss": 0.5763, "num_tokens": 106256629.0, "step": 1130 }, { "epoch": 0.1930363543266769, "grad_norm": 0.54281220928071, "learning_rate": 3.2285372930534225e-05, "loss": 0.5855, "num_tokens": 106339077.0, "step": 1131 }, { "epoch": 0.19320703191670935, "grad_norm": 0.5261581795764751, "learning_rate": 3.2278545826932926e-05, "loss": 0.6462, "num_tokens": 106439785.0, "step": 1132 }, { "epoch": 0.19337770950674177, "grad_norm": 0.5148012673126799, "learning_rate": 3.227171872333163e-05, "loss": 0.6345, "num_tokens": 106530994.0, "step": 1133 }, { "epoch": 0.1935483870967742, "grad_norm": 0.5402074780243247, "learning_rate": 3.226489161973033e-05, "loss": 0.6763, "num_tokens": 106636742.0, "step": 1134 }, { "epoch": 0.19371906468680664, "grad_norm": 0.5603725284096069, "learning_rate": 3.2258064516129034e-05, "loss": 0.7461, "num_tokens": 106743423.0, "step": 1135 }, { "epoch": 0.19388974227683906, "grad_norm": 0.5508794878723339, "learning_rate": 3.2251237412527734e-05, "loss": 0.5919, "num_tokens": 106819245.0, "step": 1136 }, { "epoch": 0.19406041986687148, "grad_norm": 0.5356098131533387, "learning_rate": 3.224441030892644e-05, "loss": 0.6631, "num_tokens": 106916175.0, "step": 1137 }, { "epoch": 0.1942310974569039, "grad_norm": 0.48016732780647425, "learning_rate": 3.223758320532514e-05, "loss": 0.5783, "num_tokens": 107019118.0, "step": 1138 }, { "epoch": 0.19440177504693634, "grad_norm": 0.5031484153937038, "learning_rate": 3.223075610172385e-05, "loss": 0.5668, "num_tokens": 107107651.0, "step": 1139 }, { "epoch": 0.19457245263696876, "grad_norm": 0.5256326313692414, "learning_rate": 3.222392899812255e-05, "loss": 0.6508, "num_tokens": 107207676.0, "step": 1140 }, { "epoch": 0.19474313022700118, "grad_norm": 0.4911639325029448, "learning_rate": 3.221710189452125e-05, "loss": 0.6563, "num_tokens": 107336884.0, "step": 1141 }, { "epoch": 0.19491380781703363, "grad_norm": 0.5302807238634104, "learning_rate": 3.2210274790919957e-05, "loss": 0.7657, "num_tokens": 107446751.0, "step": 1142 }, { "epoch": 0.19508448540706605, "grad_norm": 0.49781215019971115, "learning_rate": 3.220344768731866e-05, "loss": 0.6265, "num_tokens": 107550760.0, "step": 1143 }, { "epoch": 0.19525516299709847, "grad_norm": 0.5053788189858137, "learning_rate": 3.2196620583717364e-05, "loss": 0.5882, "num_tokens": 107640716.0, "step": 1144 }, { "epoch": 0.19542584058713092, "grad_norm": 0.49393683153341494, "learning_rate": 3.2189793480116064e-05, "loss": 0.6, "num_tokens": 107743473.0, "step": 1145 }, { "epoch": 0.19559651817716334, "grad_norm": 0.5208763932077316, "learning_rate": 3.2182966376514765e-05, "loss": 0.5937, "num_tokens": 107832800.0, "step": 1146 }, { "epoch": 0.19576719576719576, "grad_norm": 0.5292656593605197, "learning_rate": 3.2176139272913465e-05, "loss": 0.6884, "num_tokens": 107944386.0, "step": 1147 }, { "epoch": 0.1959378733572282, "grad_norm": 0.5446768984319162, "learning_rate": 3.216931216931217e-05, "loss": 0.6519, "num_tokens": 108038297.0, "step": 1148 }, { "epoch": 0.19610855094726062, "grad_norm": 0.4887527013954778, "learning_rate": 3.216248506571087e-05, "loss": 0.5768, "num_tokens": 108133689.0, "step": 1149 }, { "epoch": 0.19627922853729304, "grad_norm": 0.5432612700896221, "learning_rate": 3.215565796210958e-05, "loss": 0.6427, "num_tokens": 108222717.0, "step": 1150 }, { "epoch": 0.1964499061273255, "grad_norm": 0.5202153373150797, "learning_rate": 3.214883085850828e-05, "loss": 0.6335, "num_tokens": 108322894.0, "step": 1151 }, { "epoch": 0.1966205837173579, "grad_norm": 0.557108573379649, "learning_rate": 3.214200375490698e-05, "loss": 0.6718, "num_tokens": 108412341.0, "step": 1152 }, { "epoch": 0.19679126130739033, "grad_norm": 0.4922600483102208, "learning_rate": 3.213517665130569e-05, "loss": 0.6421, "num_tokens": 108514089.0, "step": 1153 }, { "epoch": 0.19696193889742278, "grad_norm": 0.5073566943525165, "learning_rate": 3.212834954770439e-05, "loss": 0.6546, "num_tokens": 108612899.0, "step": 1154 }, { "epoch": 0.1971326164874552, "grad_norm": 0.6009183821473304, "learning_rate": 3.2121522444103095e-05, "loss": 0.7235, "num_tokens": 108707226.0, "step": 1155 }, { "epoch": 0.19730329407748762, "grad_norm": 0.5548843708551608, "learning_rate": 3.2114695340501796e-05, "loss": 0.6479, "num_tokens": 108789048.0, "step": 1156 }, { "epoch": 0.19747397166752007, "grad_norm": 0.5425894762001847, "learning_rate": 3.2107868236900496e-05, "loss": 0.6941, "num_tokens": 108910065.0, "step": 1157 }, { "epoch": 0.19764464925755248, "grad_norm": 0.5005868754631077, "learning_rate": 3.21010411332992e-05, "loss": 0.607, "num_tokens": 109013359.0, "step": 1158 }, { "epoch": 0.1978153268475849, "grad_norm": 0.5041495072246857, "learning_rate": 3.2094214029697904e-05, "loss": 0.6131, "num_tokens": 109119889.0, "step": 1159 }, { "epoch": 0.19798600443761735, "grad_norm": 0.5435513972300262, "learning_rate": 3.2087386926096604e-05, "loss": 0.6658, "num_tokens": 109216118.0, "step": 1160 }, { "epoch": 0.19815668202764977, "grad_norm": 0.5649783472557393, "learning_rate": 3.2080559822495304e-05, "loss": 0.7216, "num_tokens": 109302462.0, "step": 1161 }, { "epoch": 0.1983273596176822, "grad_norm": 0.5346780329708679, "learning_rate": 3.207373271889401e-05, "loss": 0.5438, "num_tokens": 109378520.0, "step": 1162 }, { "epoch": 0.19849803720771464, "grad_norm": 0.5580479633017513, "learning_rate": 3.206690561529271e-05, "loss": 0.5833, "num_tokens": 109456659.0, "step": 1163 }, { "epoch": 0.19866871479774706, "grad_norm": 0.5074333066417308, "learning_rate": 3.206007851169142e-05, "loss": 0.5869, "num_tokens": 109543962.0, "step": 1164 }, { "epoch": 0.19883939238777948, "grad_norm": 0.534418029734114, "learning_rate": 3.205325140809012e-05, "loss": 0.6113, "num_tokens": 109622518.0, "step": 1165 }, { "epoch": 0.19901006997781193, "grad_norm": 0.5678302954002895, "learning_rate": 3.2046424304488827e-05, "loss": 0.7108, "num_tokens": 109721877.0, "step": 1166 }, { "epoch": 0.19918074756784435, "grad_norm": 0.5574099816344207, "learning_rate": 3.203959720088753e-05, "loss": 0.6791, "num_tokens": 109818902.0, "step": 1167 }, { "epoch": 0.19935142515787677, "grad_norm": 0.4853571734349343, "learning_rate": 3.203277009728623e-05, "loss": 0.6847, "num_tokens": 109930834.0, "step": 1168 }, { "epoch": 0.19952210274790919, "grad_norm": 0.5806211728395615, "learning_rate": 3.2025942993684934e-05, "loss": 0.7187, "num_tokens": 110018093.0, "step": 1169 }, { "epoch": 0.19969278033794163, "grad_norm": 0.5215439719902569, "learning_rate": 3.2019115890083635e-05, "loss": 0.6969, "num_tokens": 110130828.0, "step": 1170 }, { "epoch": 0.19986345792797405, "grad_norm": 0.5000324385282668, "learning_rate": 3.201228878648234e-05, "loss": 0.6326, "num_tokens": 110243388.0, "step": 1171 }, { "epoch": 0.20003413551800647, "grad_norm": 0.5194375568293453, "learning_rate": 3.2005461682881036e-05, "loss": 0.5373, "num_tokens": 110328721.0, "step": 1172 }, { "epoch": 0.20020481310803892, "grad_norm": 0.5654553749664818, "learning_rate": 3.199863457927974e-05, "loss": 0.5911, "num_tokens": 110393966.0, "step": 1173 }, { "epoch": 0.20037549069807134, "grad_norm": 0.5945101509351507, "learning_rate": 3.199180747567844e-05, "loss": 0.6653, "num_tokens": 110481065.0, "step": 1174 }, { "epoch": 0.20054616828810376, "grad_norm": 0.5214837651290356, "learning_rate": 3.198498037207715e-05, "loss": 0.658, "num_tokens": 110575153.0, "step": 1175 }, { "epoch": 0.2007168458781362, "grad_norm": 0.5543542031231169, "learning_rate": 3.197815326847585e-05, "loss": 0.6772, "num_tokens": 110660771.0, "step": 1176 }, { "epoch": 0.20088752346816863, "grad_norm": 0.49039191995376574, "learning_rate": 3.197132616487455e-05, "loss": 0.6007, "num_tokens": 110758654.0, "step": 1177 }, { "epoch": 0.20105820105820105, "grad_norm": 0.5294079119762495, "learning_rate": 3.196449906127326e-05, "loss": 0.6643, "num_tokens": 110855119.0, "step": 1178 }, { "epoch": 0.2012288786482335, "grad_norm": 0.5662756836137125, "learning_rate": 3.195767195767196e-05, "loss": 0.7346, "num_tokens": 110957561.0, "step": 1179 }, { "epoch": 0.20139955623826591, "grad_norm": 0.49087761417807313, "learning_rate": 3.1950844854070666e-05, "loss": 0.6387, "num_tokens": 111059159.0, "step": 1180 }, { "epoch": 0.20157023382829833, "grad_norm": 0.5762812285546797, "learning_rate": 3.1944017750469366e-05, "loss": 0.7367, "num_tokens": 111157079.0, "step": 1181 }, { "epoch": 0.20174091141833078, "grad_norm": 0.4970947828644773, "learning_rate": 3.193719064686807e-05, "loss": 0.6478, "num_tokens": 111264090.0, "step": 1182 }, { "epoch": 0.2019115890083632, "grad_norm": 0.5945906816054343, "learning_rate": 3.1930363543266774e-05, "loss": 0.6572, "num_tokens": 111333536.0, "step": 1183 }, { "epoch": 0.20208226659839562, "grad_norm": 0.5629439022611719, "learning_rate": 3.1923536439665474e-05, "loss": 0.6658, "num_tokens": 111428728.0, "step": 1184 }, { "epoch": 0.20225294418842807, "grad_norm": 0.5415091052882329, "learning_rate": 3.1916709336064174e-05, "loss": 0.605, "num_tokens": 111511506.0, "step": 1185 }, { "epoch": 0.2024236217784605, "grad_norm": 0.5133376921347124, "learning_rate": 3.190988223246288e-05, "loss": 0.7027, "num_tokens": 111625389.0, "step": 1186 }, { "epoch": 0.2025942993684929, "grad_norm": 0.5413191664490349, "learning_rate": 3.190305512886158e-05, "loss": 0.5584, "num_tokens": 111705376.0, "step": 1187 }, { "epoch": 0.20276497695852536, "grad_norm": 0.5594397807955257, "learning_rate": 3.189622802526028e-05, "loss": 0.6565, "num_tokens": 111784552.0, "step": 1188 }, { "epoch": 0.20293565454855778, "grad_norm": 0.5168194206491402, "learning_rate": 3.188940092165899e-05, "loss": 0.5915, "num_tokens": 111877352.0, "step": 1189 }, { "epoch": 0.2031063321385902, "grad_norm": 0.5403510971648063, "learning_rate": 3.188257381805769e-05, "loss": 0.6026, "num_tokens": 111964240.0, "step": 1190 }, { "epoch": 0.20327700972862264, "grad_norm": 0.4957823174778685, "learning_rate": 3.18757467144564e-05, "loss": 0.6956, "num_tokens": 112078514.0, "step": 1191 }, { "epoch": 0.20344768731865506, "grad_norm": 0.5641963180064267, "learning_rate": 3.18689196108551e-05, "loss": 0.654, "num_tokens": 112163096.0, "step": 1192 }, { "epoch": 0.20361836490868748, "grad_norm": 0.5753269806900153, "learning_rate": 3.1862092507253804e-05, "loss": 0.593, "num_tokens": 112241135.0, "step": 1193 }, { "epoch": 0.20378904249871993, "grad_norm": 0.5345104319266557, "learning_rate": 3.1855265403652505e-05, "loss": 0.6506, "num_tokens": 112342334.0, "step": 1194 }, { "epoch": 0.20395972008875235, "grad_norm": 0.6258082947474475, "learning_rate": 3.1848438300051205e-05, "loss": 0.6784, "num_tokens": 112417200.0, "step": 1195 }, { "epoch": 0.20413039767878477, "grad_norm": 0.4790928286809921, "learning_rate": 3.184161119644991e-05, "loss": 0.5896, "num_tokens": 112527627.0, "step": 1196 }, { "epoch": 0.20430107526881722, "grad_norm": 0.6126519126918921, "learning_rate": 3.183478409284861e-05, "loss": 0.6608, "num_tokens": 112594455.0, "step": 1197 }, { "epoch": 0.20447175285884964, "grad_norm": 0.6237329224514461, "learning_rate": 3.182795698924731e-05, "loss": 0.7256, "num_tokens": 112679144.0, "step": 1198 }, { "epoch": 0.20464243044888206, "grad_norm": 0.5764426538288566, "learning_rate": 3.1821129885646013e-05, "loss": 0.6013, "num_tokens": 112754030.0, "step": 1199 }, { "epoch": 0.2048131080389145, "grad_norm": 0.5703048594151886, "learning_rate": 3.181430278204472e-05, "loss": 0.678, "num_tokens": 112839477.0, "step": 1200 }, { "epoch": 0.20498378562894692, "grad_norm": 0.5347004271125881, "learning_rate": 3.180747567844342e-05, "loss": 0.6478, "num_tokens": 112944672.0, "step": 1201 }, { "epoch": 0.20515446321897934, "grad_norm": 0.5619218222314204, "learning_rate": 3.180064857484213e-05, "loss": 0.6854, "num_tokens": 113036654.0, "step": 1202 }, { "epoch": 0.20532514080901176, "grad_norm": 0.5291634222269491, "learning_rate": 3.179382147124083e-05, "loss": 0.7076, "num_tokens": 113148056.0, "step": 1203 }, { "epoch": 0.2054958183990442, "grad_norm": 0.5609713633971667, "learning_rate": 3.178699436763953e-05, "loss": 0.6273, "num_tokens": 113245588.0, "step": 1204 }, { "epoch": 0.20566649598907663, "grad_norm": 0.5529606138094956, "learning_rate": 3.1780167264038236e-05, "loss": 0.6053, "num_tokens": 113340484.0, "step": 1205 }, { "epoch": 0.20583717357910905, "grad_norm": 0.6097823513189452, "learning_rate": 3.1773340160436936e-05, "loss": 0.651, "num_tokens": 113417891.0, "step": 1206 }, { "epoch": 0.2060078511691415, "grad_norm": 0.5522006009775958, "learning_rate": 3.1766513056835644e-05, "loss": 0.6357, "num_tokens": 113500397.0, "step": 1207 }, { "epoch": 0.20617852875917392, "grad_norm": 0.5536082952019296, "learning_rate": 3.1759685953234344e-05, "loss": 0.6665, "num_tokens": 113589448.0, "step": 1208 }, { "epoch": 0.20634920634920634, "grad_norm": 0.5046931773773035, "learning_rate": 3.1752858849633044e-05, "loss": 0.611, "num_tokens": 113687162.0, "step": 1209 }, { "epoch": 0.20651988393923879, "grad_norm": 0.5985336891710991, "learning_rate": 3.1746031746031745e-05, "loss": 0.6572, "num_tokens": 113761596.0, "step": 1210 }, { "epoch": 0.2066905615292712, "grad_norm": 0.5322990012012707, "learning_rate": 3.173920464243045e-05, "loss": 0.5766, "num_tokens": 113846153.0, "step": 1211 }, { "epoch": 0.20686123911930362, "grad_norm": 0.5368472584282229, "learning_rate": 3.173237753882915e-05, "loss": 0.6438, "num_tokens": 113935361.0, "step": 1212 }, { "epoch": 0.20703191670933607, "grad_norm": 0.5186546839971135, "learning_rate": 3.172555043522786e-05, "loss": 0.5708, "num_tokens": 114021032.0, "step": 1213 }, { "epoch": 0.2072025942993685, "grad_norm": 0.5477781359792933, "learning_rate": 3.171872333162656e-05, "loss": 0.6184, "num_tokens": 114137486.0, "step": 1214 }, { "epoch": 0.2073732718894009, "grad_norm": 0.542155697476986, "learning_rate": 3.171189622802526e-05, "loss": 0.5812, "num_tokens": 114230341.0, "step": 1215 }, { "epoch": 0.20754394947943336, "grad_norm": 0.5349297611024131, "learning_rate": 3.170506912442397e-05, "loss": 0.7017, "num_tokens": 114339722.0, "step": 1216 }, { "epoch": 0.20771462706946578, "grad_norm": 0.5236244210234993, "learning_rate": 3.169824202082267e-05, "loss": 0.6941, "num_tokens": 114445866.0, "step": 1217 }, { "epoch": 0.2078853046594982, "grad_norm": 0.5173004880775959, "learning_rate": 3.1691414917221375e-05, "loss": 0.5812, "num_tokens": 114532325.0, "step": 1218 }, { "epoch": 0.20805598224953065, "grad_norm": 0.5128247496683166, "learning_rate": 3.1684587813620075e-05, "loss": 0.5755, "num_tokens": 114621593.0, "step": 1219 }, { "epoch": 0.20822665983956307, "grad_norm": 0.4991163150905879, "learning_rate": 3.167776071001878e-05, "loss": 0.5972, "num_tokens": 114734962.0, "step": 1220 }, { "epoch": 0.2083973374295955, "grad_norm": 0.5498123984433351, "learning_rate": 3.167093360641748e-05, "loss": 0.6906, "num_tokens": 114831124.0, "step": 1221 }, { "epoch": 0.20856801501962793, "grad_norm": 0.5441310937127648, "learning_rate": 3.166410650281618e-05, "loss": 0.6631, "num_tokens": 114925801.0, "step": 1222 }, { "epoch": 0.20873869260966035, "grad_norm": 0.5498247424056939, "learning_rate": 3.1657279399214883e-05, "loss": 0.6332, "num_tokens": 115003451.0, "step": 1223 }, { "epoch": 0.20890937019969277, "grad_norm": 0.6331994291604266, "learning_rate": 3.165045229561359e-05, "loss": 0.573, "num_tokens": 115107865.0, "step": 1224 }, { "epoch": 0.20908004778972522, "grad_norm": 0.5453299785647201, "learning_rate": 3.164362519201229e-05, "loss": 0.6311, "num_tokens": 115200366.0, "step": 1225 }, { "epoch": 0.20925072537975764, "grad_norm": 0.6223780866393828, "learning_rate": 3.163679808841099e-05, "loss": 0.6405, "num_tokens": 115260802.0, "step": 1226 }, { "epoch": 0.20942140296979006, "grad_norm": 0.5740723518410151, "learning_rate": 3.16299709848097e-05, "loss": 0.7275, "num_tokens": 115355658.0, "step": 1227 }, { "epoch": 0.2095920805598225, "grad_norm": 0.5245484977786767, "learning_rate": 3.16231438812084e-05, "loss": 0.5905, "num_tokens": 115452460.0, "step": 1228 }, { "epoch": 0.20976275814985493, "grad_norm": 0.5683687881764138, "learning_rate": 3.1616316777607106e-05, "loss": 0.6309, "num_tokens": 115571698.0, "step": 1229 }, { "epoch": 0.20993343573988735, "grad_norm": 0.5307401254636978, "learning_rate": 3.1609489674005806e-05, "loss": 0.5892, "num_tokens": 115666418.0, "step": 1230 }, { "epoch": 0.2101041133299198, "grad_norm": 0.565734627882508, "learning_rate": 3.160266257040451e-05, "loss": 0.6696, "num_tokens": 115768408.0, "step": 1231 }, { "epoch": 0.21027479091995221, "grad_norm": 0.5729960832818445, "learning_rate": 3.1595835466803214e-05, "loss": 0.7195, "num_tokens": 115862117.0, "step": 1232 }, { "epoch": 0.21044546850998463, "grad_norm": 0.4843978401101547, "learning_rate": 3.1589008363201914e-05, "loss": 0.5948, "num_tokens": 115973402.0, "step": 1233 }, { "epoch": 0.21061614610001705, "grad_norm": 0.4948623988961894, "learning_rate": 3.1582181259600615e-05, "loss": 0.5926, "num_tokens": 116075702.0, "step": 1234 }, { "epoch": 0.2107868236900495, "grad_norm": 0.4769482260274982, "learning_rate": 3.1575354155999315e-05, "loss": 0.6842, "num_tokens": 116221521.0, "step": 1235 }, { "epoch": 0.21095750128008192, "grad_norm": 0.5233544410930263, "learning_rate": 3.156852705239802e-05, "loss": 0.6473, "num_tokens": 116325459.0, "step": 1236 }, { "epoch": 0.21112817887011434, "grad_norm": 0.47978276562972266, "learning_rate": 3.156169994879672e-05, "loss": 0.5443, "num_tokens": 116415110.0, "step": 1237 }, { "epoch": 0.2112988564601468, "grad_norm": 0.5003696070289188, "learning_rate": 3.155487284519543e-05, "loss": 0.5821, "num_tokens": 116503684.0, "step": 1238 }, { "epoch": 0.2114695340501792, "grad_norm": 0.49538542629099286, "learning_rate": 3.154804574159413e-05, "loss": 0.6478, "num_tokens": 116617822.0, "step": 1239 }, { "epoch": 0.21164021164021163, "grad_norm": 0.5672177984293683, "learning_rate": 3.154121863799284e-05, "loss": 0.6593, "num_tokens": 116695938.0, "step": 1240 }, { "epoch": 0.21181088923024408, "grad_norm": 0.5130558609397715, "learning_rate": 3.153439153439154e-05, "loss": 0.6222, "num_tokens": 116795457.0, "step": 1241 }, { "epoch": 0.2119815668202765, "grad_norm": 0.5119341795162801, "learning_rate": 3.152756443079024e-05, "loss": 0.5919, "num_tokens": 116889602.0, "step": 1242 }, { "epoch": 0.21215224441030892, "grad_norm": 0.5173500262513236, "learning_rate": 3.1520737327188945e-05, "loss": 0.6173, "num_tokens": 116980366.0, "step": 1243 }, { "epoch": 0.21232292200034136, "grad_norm": 0.5613048185328698, "learning_rate": 3.1513910223587645e-05, "loss": 0.7647, "num_tokens": 117102312.0, "step": 1244 }, { "epoch": 0.21249359959037378, "grad_norm": 0.5273003954455242, "learning_rate": 3.150708311998635e-05, "loss": 0.7458, "num_tokens": 117208847.0, "step": 1245 }, { "epoch": 0.2126642771804062, "grad_norm": 0.5770437430742941, "learning_rate": 3.1500256016385046e-05, "loss": 0.6892, "num_tokens": 117284530.0, "step": 1246 }, { "epoch": 0.21283495477043865, "grad_norm": 0.5839081360173941, "learning_rate": 3.149342891278375e-05, "loss": 0.6039, "num_tokens": 117363528.0, "step": 1247 }, { "epoch": 0.21300563236047107, "grad_norm": 0.5669818974123074, "learning_rate": 3.1486601809182454e-05, "loss": 0.5832, "num_tokens": 117436570.0, "step": 1248 }, { "epoch": 0.2131763099505035, "grad_norm": 0.5379855647756979, "learning_rate": 3.147977470558116e-05, "loss": 0.621, "num_tokens": 117513214.0, "step": 1249 }, { "epoch": 0.21334698754053594, "grad_norm": 0.5513990518624673, "learning_rate": 3.147294760197986e-05, "loss": 0.6013, "num_tokens": 117591079.0, "step": 1250 }, { "epoch": 0.21351766513056836, "grad_norm": 0.5551520326386693, "learning_rate": 3.146612049837857e-05, "loss": 0.6568, "num_tokens": 117671224.0, "step": 1251 }, { "epoch": 0.21368834272060078, "grad_norm": 0.5445912747580625, "learning_rate": 3.145929339477727e-05, "loss": 0.5659, "num_tokens": 117746220.0, "step": 1252 }, { "epoch": 0.21385902031063322, "grad_norm": 0.5234794583809146, "learning_rate": 3.145246629117597e-05, "loss": 0.6164, "num_tokens": 117840980.0, "step": 1253 }, { "epoch": 0.21402969790066564, "grad_norm": 0.49693692336591405, "learning_rate": 3.1445639187574676e-05, "loss": 0.6505, "num_tokens": 117942460.0, "step": 1254 }, { "epoch": 0.21420037549069806, "grad_norm": 0.5134943161102437, "learning_rate": 3.143881208397338e-05, "loss": 0.5543, "num_tokens": 118044928.0, "step": 1255 }, { "epoch": 0.2143710530807305, "grad_norm": 0.5183263056183482, "learning_rate": 3.1431984980372084e-05, "loss": 0.5879, "num_tokens": 118128078.0, "step": 1256 }, { "epoch": 0.21454173067076293, "grad_norm": 0.5154068562538553, "learning_rate": 3.1425157876770784e-05, "loss": 0.6568, "num_tokens": 118222261.0, "step": 1257 }, { "epoch": 0.21471240826079535, "grad_norm": 0.5260147692226151, "learning_rate": 3.1418330773169485e-05, "loss": 0.7319, "num_tokens": 118335657.0, "step": 1258 }, { "epoch": 0.2148830858508278, "grad_norm": 0.5152113677500072, "learning_rate": 3.1411503669568185e-05, "loss": 0.625, "num_tokens": 118432427.0, "step": 1259 }, { "epoch": 0.21505376344086022, "grad_norm": 0.5575494752441983, "learning_rate": 3.140467656596689e-05, "loss": 0.7142, "num_tokens": 118527223.0, "step": 1260 }, { "epoch": 0.21522444103089264, "grad_norm": 0.6291522595811296, "learning_rate": 3.139784946236559e-05, "loss": 0.6841, "num_tokens": 118606716.0, "step": 1261 }, { "epoch": 0.21539511862092509, "grad_norm": 0.4824992417597077, "learning_rate": 3.139102235876429e-05, "loss": 0.7177, "num_tokens": 118755823.0, "step": 1262 }, { "epoch": 0.2155657962109575, "grad_norm": 0.5518755713786561, "learning_rate": 3.1384195255163e-05, "loss": 0.6114, "num_tokens": 118836442.0, "step": 1263 }, { "epoch": 0.21573647380098993, "grad_norm": 0.5741911423299354, "learning_rate": 3.13773681515617e-05, "loss": 0.6782, "num_tokens": 118941398.0, "step": 1264 }, { "epoch": 0.21590715139102234, "grad_norm": 0.5633030350424183, "learning_rate": 3.137054104796041e-05, "loss": 0.7179, "num_tokens": 119041387.0, "step": 1265 }, { "epoch": 0.2160778289810548, "grad_norm": 0.5200680387264794, "learning_rate": 3.136371394435911e-05, "loss": 0.6356, "num_tokens": 119137004.0, "step": 1266 }, { "epoch": 0.2162485065710872, "grad_norm": 0.5660484564296533, "learning_rate": 3.1356886840757815e-05, "loss": 0.664, "num_tokens": 119217999.0, "step": 1267 }, { "epoch": 0.21641918416111963, "grad_norm": 0.5546447747144146, "learning_rate": 3.1350059737156515e-05, "loss": 0.6585, "num_tokens": 119303808.0, "step": 1268 }, { "epoch": 0.21658986175115208, "grad_norm": 0.4761753572864742, "learning_rate": 3.1343232633555216e-05, "loss": 0.6305, "num_tokens": 119416053.0, "step": 1269 }, { "epoch": 0.2167605393411845, "grad_norm": 0.4777789784823375, "learning_rate": 3.133640552995392e-05, "loss": 0.6262, "num_tokens": 119527410.0, "step": 1270 }, { "epoch": 0.21693121693121692, "grad_norm": 0.5179031741404582, "learning_rate": 3.132957842635262e-05, "loss": 0.6936, "num_tokens": 119635084.0, "step": 1271 }, { "epoch": 0.21710189452124937, "grad_norm": 0.45758807736388535, "learning_rate": 3.1322751322751324e-05, "loss": 0.5837, "num_tokens": 119767281.0, "step": 1272 }, { "epoch": 0.2172725721112818, "grad_norm": 0.5146491826768651, "learning_rate": 3.1315924219150024e-05, "loss": 0.6197, "num_tokens": 119869259.0, "step": 1273 }, { "epoch": 0.2174432497013142, "grad_norm": 0.48202232164069053, "learning_rate": 3.130909711554873e-05, "loss": 0.5868, "num_tokens": 119964352.0, "step": 1274 }, { "epoch": 0.21761392729134665, "grad_norm": 0.5544959261478811, "learning_rate": 3.130227001194743e-05, "loss": 0.6359, "num_tokens": 120052485.0, "step": 1275 }, { "epoch": 0.21778460488137907, "grad_norm": 0.594636136150917, "learning_rate": 3.129544290834614e-05, "loss": 0.6015, "num_tokens": 120127301.0, "step": 1276 }, { "epoch": 0.2179552824714115, "grad_norm": 0.5235087633329406, "learning_rate": 3.128861580474484e-05, "loss": 0.6517, "num_tokens": 120231508.0, "step": 1277 }, { "epoch": 0.21812596006144394, "grad_norm": 0.5410400563979285, "learning_rate": 3.128178870114354e-05, "loss": 0.6908, "num_tokens": 120324441.0, "step": 1278 }, { "epoch": 0.21829663765147636, "grad_norm": 0.5022069869264864, "learning_rate": 3.127496159754225e-05, "loss": 0.5828, "num_tokens": 120437090.0, "step": 1279 }, { "epoch": 0.21846731524150878, "grad_norm": 0.5721654539778839, "learning_rate": 3.126813449394095e-05, "loss": 0.628, "num_tokens": 120510980.0, "step": 1280 }, { "epoch": 0.21863799283154123, "grad_norm": 0.5061966069305591, "learning_rate": 3.1261307390339654e-05, "loss": 0.6091, "num_tokens": 120602417.0, "step": 1281 }, { "epoch": 0.21880867042157365, "grad_norm": 0.5853981615013306, "learning_rate": 3.1254480286738355e-05, "loss": 0.6632, "num_tokens": 120690982.0, "step": 1282 }, { "epoch": 0.21897934801160607, "grad_norm": 0.49649426661222085, "learning_rate": 3.124765318313706e-05, "loss": 0.6801, "num_tokens": 120807785.0, "step": 1283 }, { "epoch": 0.21915002560163852, "grad_norm": 0.5554993984773283, "learning_rate": 3.1240826079535755e-05, "loss": 0.7424, "num_tokens": 120894739.0, "step": 1284 }, { "epoch": 0.21932070319167093, "grad_norm": 0.5008720564652658, "learning_rate": 3.123399897593446e-05, "loss": 0.524, "num_tokens": 120974440.0, "step": 1285 }, { "epoch": 0.21949138078170335, "grad_norm": 0.5122543402641064, "learning_rate": 3.122717187233316e-05, "loss": 0.6278, "num_tokens": 121066247.0, "step": 1286 }, { "epoch": 0.2196620583717358, "grad_norm": 0.4840263928462038, "learning_rate": 3.122034476873187e-05, "loss": 0.6172, "num_tokens": 121171603.0, "step": 1287 }, { "epoch": 0.21983273596176822, "grad_norm": 0.5924820957646824, "learning_rate": 3.121351766513057e-05, "loss": 0.6743, "num_tokens": 121243932.0, "step": 1288 }, { "epoch": 0.22000341355180064, "grad_norm": 0.5204749135425585, "learning_rate": 3.120669056152927e-05, "loss": 0.6694, "num_tokens": 121348566.0, "step": 1289 }, { "epoch": 0.2201740911418331, "grad_norm": 0.5292652775647423, "learning_rate": 3.119986345792798e-05, "loss": 0.5834, "num_tokens": 121438787.0, "step": 1290 }, { "epoch": 0.2203447687318655, "grad_norm": 0.5061649487699258, "learning_rate": 3.119303635432668e-05, "loss": 0.6399, "num_tokens": 121550565.0, "step": 1291 }, { "epoch": 0.22051544632189793, "grad_norm": 0.5637121935073286, "learning_rate": 3.1186209250725385e-05, "loss": 0.6501, "num_tokens": 121633554.0, "step": 1292 }, { "epoch": 0.22068612391193038, "grad_norm": 0.5338703407322376, "learning_rate": 3.1179382147124086e-05, "loss": 0.6767, "num_tokens": 121719543.0, "step": 1293 }, { "epoch": 0.2208568015019628, "grad_norm": 0.529766493317216, "learning_rate": 3.117255504352279e-05, "loss": 0.6482, "num_tokens": 121811864.0, "step": 1294 }, { "epoch": 0.22102747909199522, "grad_norm": 0.527641882341383, "learning_rate": 3.116572793992149e-05, "loss": 0.6458, "num_tokens": 121897716.0, "step": 1295 }, { "epoch": 0.22119815668202766, "grad_norm": 0.5590862162114275, "learning_rate": 3.1158900836320194e-05, "loss": 0.6397, "num_tokens": 121985184.0, "step": 1296 }, { "epoch": 0.22136883427206008, "grad_norm": 0.5416493845992049, "learning_rate": 3.1152073732718894e-05, "loss": 0.6447, "num_tokens": 122073494.0, "step": 1297 }, { "epoch": 0.2215395118620925, "grad_norm": 0.5400498090162702, "learning_rate": 3.11452466291176e-05, "loss": 0.5836, "num_tokens": 122155152.0, "step": 1298 }, { "epoch": 0.22171018945212492, "grad_norm": 0.5118083289354799, "learning_rate": 3.11384195255163e-05, "loss": 0.6868, "num_tokens": 122284729.0, "step": 1299 }, { "epoch": 0.22188086704215737, "grad_norm": 0.5062459409330697, "learning_rate": 3.1131592421915e-05, "loss": 0.7157, "num_tokens": 122410649.0, "step": 1300 }, { "epoch": 0.2220515446321898, "grad_norm": 0.5252333494656604, "learning_rate": 3.112476531831371e-05, "loss": 0.596, "num_tokens": 122489628.0, "step": 1301 }, { "epoch": 0.2222222222222222, "grad_norm": 0.550696395801145, "learning_rate": 3.111793821471241e-05, "loss": 0.6555, "num_tokens": 122568600.0, "step": 1302 }, { "epoch": 0.22239289981225466, "grad_norm": 0.5209231106681205, "learning_rate": 3.111111111111112e-05, "loss": 0.6526, "num_tokens": 122656549.0, "step": 1303 }, { "epoch": 0.22256357740228708, "grad_norm": 0.7455578243253266, "learning_rate": 3.110428400750982e-05, "loss": 0.7653, "num_tokens": 122738906.0, "step": 1304 }, { "epoch": 0.2227342549923195, "grad_norm": 0.5236233959774621, "learning_rate": 3.109745690390852e-05, "loss": 0.6987, "num_tokens": 122837428.0, "step": 1305 }, { "epoch": 0.22290493258235194, "grad_norm": 0.5301093616548719, "learning_rate": 3.1090629800307225e-05, "loss": 0.6624, "num_tokens": 122933751.0, "step": 1306 }, { "epoch": 0.22307561017238436, "grad_norm": 0.5314253145963397, "learning_rate": 3.1083802696705925e-05, "loss": 0.621, "num_tokens": 123032798.0, "step": 1307 }, { "epoch": 0.22324628776241678, "grad_norm": 0.49901828025909467, "learning_rate": 3.1076975593104625e-05, "loss": 0.5859, "num_tokens": 123134693.0, "step": 1308 }, { "epoch": 0.22341696535244923, "grad_norm": 0.5058207035646075, "learning_rate": 3.1070148489503326e-05, "loss": 0.6623, "num_tokens": 123243226.0, "step": 1309 }, { "epoch": 0.22358764294248165, "grad_norm": 0.5788853745198406, "learning_rate": 3.106332138590203e-05, "loss": 0.628, "num_tokens": 123313885.0, "step": 1310 }, { "epoch": 0.22375832053251407, "grad_norm": 0.5570194632970338, "learning_rate": 3.105649428230073e-05, "loss": 0.6805, "num_tokens": 123398174.0, "step": 1311 }, { "epoch": 0.22392899812254652, "grad_norm": 0.5404540967594337, "learning_rate": 3.104966717869944e-05, "loss": 0.6568, "num_tokens": 123493383.0, "step": 1312 }, { "epoch": 0.22409967571257894, "grad_norm": 0.5226154995881893, "learning_rate": 3.104284007509814e-05, "loss": 0.6394, "num_tokens": 123580513.0, "step": 1313 }, { "epoch": 0.22427035330261136, "grad_norm": 0.538532128324198, "learning_rate": 3.103601297149685e-05, "loss": 0.5581, "num_tokens": 123662885.0, "step": 1314 }, { "epoch": 0.2244410308926438, "grad_norm": 0.5118192073086769, "learning_rate": 3.102918586789555e-05, "loss": 0.6074, "num_tokens": 123754655.0, "step": 1315 }, { "epoch": 0.22461170848267623, "grad_norm": 0.5275613232079875, "learning_rate": 3.102235876429425e-05, "loss": 0.7001, "num_tokens": 123847024.0, "step": 1316 }, { "epoch": 0.22478238607270865, "grad_norm": 0.5138498486398182, "learning_rate": 3.1015531660692956e-05, "loss": 0.6063, "num_tokens": 123943318.0, "step": 1317 }, { "epoch": 0.2249530636627411, "grad_norm": 0.5813332012820843, "learning_rate": 3.1008704557091656e-05, "loss": 0.7098, "num_tokens": 124036114.0, "step": 1318 }, { "epoch": 0.2251237412527735, "grad_norm": 0.48992493418432864, "learning_rate": 3.100187745349036e-05, "loss": 0.5663, "num_tokens": 124144373.0, "step": 1319 }, { "epoch": 0.22529441884280593, "grad_norm": 0.5277472344845527, "learning_rate": 3.0995050349889064e-05, "loss": 0.6055, "num_tokens": 124230269.0, "step": 1320 }, { "epoch": 0.22546509643283838, "grad_norm": 0.48760476000896436, "learning_rate": 3.0988223246287764e-05, "loss": 0.6243, "num_tokens": 124341038.0, "step": 1321 }, { "epoch": 0.2256357740228708, "grad_norm": 0.5320189614642145, "learning_rate": 3.0981396142686464e-05, "loss": 0.6857, "num_tokens": 124432730.0, "step": 1322 }, { "epoch": 0.22580645161290322, "grad_norm": 0.5422975348313949, "learning_rate": 3.097456903908517e-05, "loss": 0.5948, "num_tokens": 124505320.0, "step": 1323 }, { "epoch": 0.22597712920293567, "grad_norm": 0.5487965795405434, "learning_rate": 3.096774193548387e-05, "loss": 0.6711, "num_tokens": 124598252.0, "step": 1324 }, { "epoch": 0.2261478067929681, "grad_norm": 0.46048152737975895, "learning_rate": 3.096091483188258e-05, "loss": 0.5676, "num_tokens": 124712349.0, "step": 1325 }, { "epoch": 0.2263184843830005, "grad_norm": 0.5365308787572153, "learning_rate": 3.095408772828128e-05, "loss": 0.6876, "num_tokens": 124823495.0, "step": 1326 }, { "epoch": 0.22648916197303295, "grad_norm": 0.4572302619795821, "learning_rate": 3.094726062467998e-05, "loss": 0.5318, "num_tokens": 124923331.0, "step": 1327 }, { "epoch": 0.22665983956306537, "grad_norm": 0.5084389269930601, "learning_rate": 3.094043352107869e-05, "loss": 0.5132, "num_tokens": 125020189.0, "step": 1328 }, { "epoch": 0.2268305171530978, "grad_norm": 0.5231638894140928, "learning_rate": 3.093360641747739e-05, "loss": 0.6739, "num_tokens": 125116920.0, "step": 1329 }, { "epoch": 0.2270011947431302, "grad_norm": 0.4889906609430922, "learning_rate": 3.0926779313876095e-05, "loss": 0.6272, "num_tokens": 125233557.0, "step": 1330 }, { "epoch": 0.22717187233316266, "grad_norm": 0.5288945597525497, "learning_rate": 3.0919952210274795e-05, "loss": 0.6712, "num_tokens": 125341875.0, "step": 1331 }, { "epoch": 0.22734254992319508, "grad_norm": 0.5233123467930906, "learning_rate": 3.0913125106673495e-05, "loss": 0.5799, "num_tokens": 125426633.0, "step": 1332 }, { "epoch": 0.2275132275132275, "grad_norm": 0.5026944226909204, "learning_rate": 3.0906298003072196e-05, "loss": 0.6092, "num_tokens": 125536749.0, "step": 1333 }, { "epoch": 0.22768390510325995, "grad_norm": 0.5020655667074998, "learning_rate": 3.08994708994709e-05, "loss": 0.6413, "num_tokens": 125627021.0, "step": 1334 }, { "epoch": 0.22785458269329237, "grad_norm": 0.5369408916890366, "learning_rate": 3.08926437958696e-05, "loss": 0.6198, "num_tokens": 125709899.0, "step": 1335 }, { "epoch": 0.2280252602833248, "grad_norm": 0.49936836298814274, "learning_rate": 3.0885816692268304e-05, "loss": 0.621, "num_tokens": 125819503.0, "step": 1336 }, { "epoch": 0.22819593787335724, "grad_norm": 0.5830525163249084, "learning_rate": 3.087898958866701e-05, "loss": 0.6714, "num_tokens": 125902692.0, "step": 1337 }, { "epoch": 0.22836661546338966, "grad_norm": 0.5085374201600399, "learning_rate": 3.087216248506571e-05, "loss": 0.6382, "num_tokens": 125998340.0, "step": 1338 }, { "epoch": 0.22853729305342207, "grad_norm": 0.5158694398817232, "learning_rate": 3.086533538146442e-05, "loss": 0.6488, "num_tokens": 126088456.0, "step": 1339 }, { "epoch": 0.22870797064345452, "grad_norm": 0.6796306680345512, "learning_rate": 3.085850827786312e-05, "loss": 0.5784, "num_tokens": 126180867.0, "step": 1340 }, { "epoch": 0.22887864823348694, "grad_norm": 0.5354239726248454, "learning_rate": 3.0851681174261826e-05, "loss": 0.6374, "num_tokens": 126272983.0, "step": 1341 }, { "epoch": 0.22904932582351936, "grad_norm": 0.635998331687358, "learning_rate": 3.0844854070660526e-05, "loss": 0.6281, "num_tokens": 126339449.0, "step": 1342 }, { "epoch": 0.2292200034135518, "grad_norm": 0.49852917261294133, "learning_rate": 3.0838026967059227e-05, "loss": 0.6205, "num_tokens": 126440401.0, "step": 1343 }, { "epoch": 0.22939068100358423, "grad_norm": 0.5354176342893149, "learning_rate": 3.0831199863457934e-05, "loss": 0.7033, "num_tokens": 126538743.0, "step": 1344 }, { "epoch": 0.22956135859361665, "grad_norm": 0.5293416212419898, "learning_rate": 3.0824372759856634e-05, "loss": 0.6014, "num_tokens": 126631433.0, "step": 1345 }, { "epoch": 0.2297320361836491, "grad_norm": 0.5400589990098501, "learning_rate": 3.0817545656255334e-05, "loss": 0.599, "num_tokens": 126725605.0, "step": 1346 }, { "epoch": 0.22990271377368152, "grad_norm": 0.5784148263448391, "learning_rate": 3.0810718552654035e-05, "loss": 0.6421, "num_tokens": 126793635.0, "step": 1347 }, { "epoch": 0.23007339136371394, "grad_norm": 0.5232414247774434, "learning_rate": 3.080389144905274e-05, "loss": 0.6219, "num_tokens": 126892672.0, "step": 1348 }, { "epoch": 0.23024406895374638, "grad_norm": 0.4591713707736781, "learning_rate": 3.079706434545144e-05, "loss": 0.6109, "num_tokens": 127006470.0, "step": 1349 }, { "epoch": 0.2304147465437788, "grad_norm": 0.4474923368059129, "learning_rate": 3.079023724185015e-05, "loss": 0.6806, "num_tokens": 127148405.0, "step": 1350 }, { "epoch": 0.23058542413381122, "grad_norm": 0.47335854074438316, "learning_rate": 3.078341013824885e-05, "loss": 0.5751, "num_tokens": 127258311.0, "step": 1351 }, { "epoch": 0.23075610172384367, "grad_norm": 0.4914682074521922, "learning_rate": 3.077658303464755e-05, "loss": 0.7056, "num_tokens": 127375688.0, "step": 1352 }, { "epoch": 0.2309267793138761, "grad_norm": 0.5100803364563288, "learning_rate": 3.076975593104626e-05, "loss": 0.6796, "num_tokens": 127472719.0, "step": 1353 }, { "epoch": 0.2310974569039085, "grad_norm": 0.7190577578227213, "learning_rate": 3.076292882744496e-05, "loss": 0.6917, "num_tokens": 127551021.0, "step": 1354 }, { "epoch": 0.23126813449394096, "grad_norm": 0.589448165272954, "learning_rate": 3.0756101723843665e-05, "loss": 0.6715, "num_tokens": 127635942.0, "step": 1355 }, { "epoch": 0.23143881208397338, "grad_norm": 0.586582032772375, "learning_rate": 3.0749274620242365e-05, "loss": 0.7546, "num_tokens": 127717832.0, "step": 1356 }, { "epoch": 0.2316094896740058, "grad_norm": 0.5231884912491942, "learning_rate": 3.074244751664107e-05, "loss": 0.6418, "num_tokens": 127822525.0, "step": 1357 }, { "epoch": 0.23178016726403824, "grad_norm": 0.6200089333667927, "learning_rate": 3.0735620413039766e-05, "loss": 0.7015, "num_tokens": 127913385.0, "step": 1358 }, { "epoch": 0.23195084485407066, "grad_norm": 0.5364204964116007, "learning_rate": 3.072879330943847e-05, "loss": 0.5791, "num_tokens": 127989122.0, "step": 1359 }, { "epoch": 0.23212152244410308, "grad_norm": 0.5618887639884832, "learning_rate": 3.0721966205837174e-05, "loss": 0.6074, "num_tokens": 128061353.0, "step": 1360 }, { "epoch": 0.2322922000341355, "grad_norm": 0.5349320134033911, "learning_rate": 3.071513910223588e-05, "loss": 0.675, "num_tokens": 128165253.0, "step": 1361 }, { "epoch": 0.23246287762416795, "grad_norm": 0.5150730024192413, "learning_rate": 3.070831199863458e-05, "loss": 0.6151, "num_tokens": 128250503.0, "step": 1362 }, { "epoch": 0.23263355521420037, "grad_norm": 0.5543520272100232, "learning_rate": 3.070148489503328e-05, "loss": 0.6396, "num_tokens": 128352845.0, "step": 1363 }, { "epoch": 0.2328042328042328, "grad_norm": 0.5207715634138996, "learning_rate": 3.069465779143199e-05, "loss": 0.6198, "num_tokens": 128450421.0, "step": 1364 }, { "epoch": 0.23297491039426524, "grad_norm": 0.515682394566172, "learning_rate": 3.068783068783069e-05, "loss": 0.645, "num_tokens": 128541865.0, "step": 1365 }, { "epoch": 0.23314558798429766, "grad_norm": 0.5667396795942035, "learning_rate": 3.0681003584229396e-05, "loss": 0.686, "num_tokens": 128636273.0, "step": 1366 }, { "epoch": 0.23331626557433008, "grad_norm": 0.49709660908414344, "learning_rate": 3.0674176480628097e-05, "loss": 0.6001, "num_tokens": 128722836.0, "step": 1367 }, { "epoch": 0.23348694316436253, "grad_norm": 0.6256207419702865, "learning_rate": 3.0667349377026804e-05, "loss": 0.561, "num_tokens": 128784457.0, "step": 1368 }, { "epoch": 0.23365762075439495, "grad_norm": 0.5055766287518001, "learning_rate": 3.0660522273425504e-05, "loss": 0.6251, "num_tokens": 128880993.0, "step": 1369 }, { "epoch": 0.23382829834442737, "grad_norm": 0.5526813224569053, "learning_rate": 3.0653695169824204e-05, "loss": 0.6662, "num_tokens": 128972307.0, "step": 1370 }, { "epoch": 0.2339989759344598, "grad_norm": 0.5456343117132848, "learning_rate": 3.0646868066222905e-05, "loss": 0.7043, "num_tokens": 129058065.0, "step": 1371 }, { "epoch": 0.23416965352449223, "grad_norm": 0.5283988508782279, "learning_rate": 3.064004096262161e-05, "loss": 0.6409, "num_tokens": 129149780.0, "step": 1372 }, { "epoch": 0.23434033111452465, "grad_norm": 0.5125272199029696, "learning_rate": 3.063321385902031e-05, "loss": 0.6915, "num_tokens": 129247527.0, "step": 1373 }, { "epoch": 0.2345110087045571, "grad_norm": 0.5954929590919974, "learning_rate": 3.062638675541901e-05, "loss": 0.5977, "num_tokens": 129315593.0, "step": 1374 }, { "epoch": 0.23468168629458952, "grad_norm": 0.4850966661934879, "learning_rate": 3.061955965181772e-05, "loss": 0.6736, "num_tokens": 129424816.0, "step": 1375 }, { "epoch": 0.23485236388462194, "grad_norm": 0.5279503184160949, "learning_rate": 3.061273254821642e-05, "loss": 0.5783, "num_tokens": 129508707.0, "step": 1376 }, { "epoch": 0.2350230414746544, "grad_norm": 0.5344811732567887, "learning_rate": 3.060590544461513e-05, "loss": 0.6034, "num_tokens": 129592049.0, "step": 1377 }, { "epoch": 0.2351937190646868, "grad_norm": 0.556847563815085, "learning_rate": 3.059907834101383e-05, "loss": 0.7988, "num_tokens": 129703006.0, "step": 1378 }, { "epoch": 0.23536439665471923, "grad_norm": 0.5074626747522675, "learning_rate": 3.059225123741253e-05, "loss": 0.6395, "num_tokens": 129822172.0, "step": 1379 }, { "epoch": 0.23553507424475167, "grad_norm": 0.5730720136870313, "learning_rate": 3.0585424133811235e-05, "loss": 0.7556, "num_tokens": 129918265.0, "step": 1380 }, { "epoch": 0.2357057518347841, "grad_norm": 0.5190070165286906, "learning_rate": 3.0578597030209936e-05, "loss": 0.6719, "num_tokens": 130016003.0, "step": 1381 }, { "epoch": 0.2358764294248165, "grad_norm": 0.5812502794074597, "learning_rate": 3.057176992660864e-05, "loss": 0.664, "num_tokens": 130106851.0, "step": 1382 }, { "epoch": 0.23604710701484896, "grad_norm": 0.6765207666970194, "learning_rate": 3.0564942823007336e-05, "loss": 0.7287, "num_tokens": 130203068.0, "step": 1383 }, { "epoch": 0.23621778460488138, "grad_norm": 0.49947603410063446, "learning_rate": 3.0558115719406044e-05, "loss": 0.6347, "num_tokens": 130297302.0, "step": 1384 }, { "epoch": 0.2363884621949138, "grad_norm": 0.6336592616392603, "learning_rate": 3.0551288615804744e-05, "loss": 0.6892, "num_tokens": 130435661.0, "step": 1385 }, { "epoch": 0.23655913978494625, "grad_norm": 0.5067524771399632, "learning_rate": 3.054446151220345e-05, "loss": 0.6696, "num_tokens": 130558865.0, "step": 1386 }, { "epoch": 0.23672981737497867, "grad_norm": 0.5465546560326978, "learning_rate": 3.053763440860215e-05, "loss": 0.5636, "num_tokens": 130670069.0, "step": 1387 }, { "epoch": 0.2369004949650111, "grad_norm": 0.5430544087985929, "learning_rate": 3.053080730500086e-05, "loss": 0.5042, "num_tokens": 130739100.0, "step": 1388 }, { "epoch": 0.23707117255504354, "grad_norm": 0.5810494941696542, "learning_rate": 3.052398020139956e-05, "loss": 0.7156, "num_tokens": 130814249.0, "step": 1389 }, { "epoch": 0.23724185014507596, "grad_norm": 0.5506033751706608, "learning_rate": 3.0517153097798263e-05, "loss": 0.6876, "num_tokens": 130911956.0, "step": 1390 }, { "epoch": 0.23741252773510838, "grad_norm": 0.524734952522944, "learning_rate": 3.0510325994196966e-05, "loss": 0.679, "num_tokens": 131022083.0, "step": 1391 }, { "epoch": 0.23758320532514082, "grad_norm": 0.506059831101849, "learning_rate": 3.050349889059567e-05, "loss": 0.588, "num_tokens": 131108627.0, "step": 1392 }, { "epoch": 0.23775388291517324, "grad_norm": 0.56985570051146, "learning_rate": 3.049667178699437e-05, "loss": 0.6345, "num_tokens": 131191129.0, "step": 1393 }, { "epoch": 0.23792456050520566, "grad_norm": 0.5228954656831781, "learning_rate": 3.0489844683393074e-05, "loss": 0.6084, "num_tokens": 131290128.0, "step": 1394 }, { "epoch": 0.23809523809523808, "grad_norm": 0.5385817121588715, "learning_rate": 3.0483017579791775e-05, "loss": 0.6579, "num_tokens": 131374383.0, "step": 1395 }, { "epoch": 0.23826591568527053, "grad_norm": 0.5211749217575233, "learning_rate": 3.047619047619048e-05, "loss": 0.6712, "num_tokens": 131478474.0, "step": 1396 }, { "epoch": 0.23843659327530295, "grad_norm": 0.5748494848870591, "learning_rate": 3.046936337258918e-05, "loss": 0.7061, "num_tokens": 131567451.0, "step": 1397 }, { "epoch": 0.23860727086533537, "grad_norm": 0.5484056857840369, "learning_rate": 3.0462536268987883e-05, "loss": 0.6912, "num_tokens": 131650422.0, "step": 1398 }, { "epoch": 0.23877794845536782, "grad_norm": 0.5519249519350571, "learning_rate": 3.0455709165386586e-05, "loss": 0.624, "num_tokens": 131725740.0, "step": 1399 }, { "epoch": 0.23894862604540024, "grad_norm": 0.53809424433346, "learning_rate": 3.044888206178529e-05, "loss": 0.6654, "num_tokens": 131822127.0, "step": 1400 }, { "epoch": 0.23911930363543266, "grad_norm": 0.6233333456978932, "learning_rate": 3.0442054958183994e-05, "loss": 0.5506, "num_tokens": 131880372.0, "step": 1401 }, { "epoch": 0.2392899812254651, "grad_norm": 0.5283909674199262, "learning_rate": 3.0435227854582698e-05, "loss": 0.5969, "num_tokens": 131976476.0, "step": 1402 }, { "epoch": 0.23946065881549752, "grad_norm": 0.5921157552724325, "learning_rate": 3.0428400750981398e-05, "loss": 0.6781, "num_tokens": 132052977.0, "step": 1403 }, { "epoch": 0.23963133640552994, "grad_norm": 0.579250178609413, "learning_rate": 3.0421573647380102e-05, "loss": 0.6212, "num_tokens": 132137316.0, "step": 1404 }, { "epoch": 0.2398020139955624, "grad_norm": 0.49869281371283847, "learning_rate": 3.0414746543778806e-05, "loss": 0.631, "num_tokens": 132234257.0, "step": 1405 }, { "epoch": 0.2399726915855948, "grad_norm": 0.5272317540658565, "learning_rate": 3.040791944017751e-05, "loss": 0.6064, "num_tokens": 132330861.0, "step": 1406 }, { "epoch": 0.24014336917562723, "grad_norm": 0.5504209384489408, "learning_rate": 3.0401092336576206e-05, "loss": 0.6195, "num_tokens": 132417050.0, "step": 1407 }, { "epoch": 0.24031404676565968, "grad_norm": 0.5671488204833297, "learning_rate": 3.039426523297491e-05, "loss": 0.6905, "num_tokens": 132506316.0, "step": 1408 }, { "epoch": 0.2404847243556921, "grad_norm": 0.5101348836256138, "learning_rate": 3.0387438129373614e-05, "loss": 0.6053, "num_tokens": 132602324.0, "step": 1409 }, { "epoch": 0.24065540194572452, "grad_norm": 0.5661575277074523, "learning_rate": 3.0380611025772318e-05, "loss": 0.6529, "num_tokens": 132681552.0, "step": 1410 }, { "epoch": 0.24082607953575697, "grad_norm": 0.5637159314128432, "learning_rate": 3.037378392217102e-05, "loss": 0.6133, "num_tokens": 132773747.0, "step": 1411 }, { "epoch": 0.24099675712578938, "grad_norm": 0.5947030680547191, "learning_rate": 3.0366956818569725e-05, "loss": 0.626, "num_tokens": 132847563.0, "step": 1412 }, { "epoch": 0.2411674347158218, "grad_norm": 0.55575080933755, "learning_rate": 3.0360129714968426e-05, "loss": 0.6466, "num_tokens": 132924515.0, "step": 1413 }, { "epoch": 0.24133811230585425, "grad_norm": 0.4896266312261726, "learning_rate": 3.035330261136713e-05, "loss": 0.5947, "num_tokens": 133040539.0, "step": 1414 }, { "epoch": 0.24150878989588667, "grad_norm": 0.5372747104229078, "learning_rate": 3.0346475507765833e-05, "loss": 0.6771, "num_tokens": 133132068.0, "step": 1415 }, { "epoch": 0.2416794674859191, "grad_norm": 0.6568590129003236, "learning_rate": 3.0339648404164537e-05, "loss": 0.6828, "num_tokens": 133213198.0, "step": 1416 }, { "epoch": 0.24185014507595154, "grad_norm": 0.5496227499025967, "learning_rate": 3.033282130056324e-05, "loss": 0.6239, "num_tokens": 133296925.0, "step": 1417 }, { "epoch": 0.24202082266598396, "grad_norm": 0.5194967748960357, "learning_rate": 3.0325994196961944e-05, "loss": 0.6366, "num_tokens": 133401006.0, "step": 1418 }, { "epoch": 0.24219150025601638, "grad_norm": 0.5566946253149542, "learning_rate": 3.0319167093360645e-05, "loss": 0.6503, "num_tokens": 133481188.0, "step": 1419 }, { "epoch": 0.24236217784604883, "grad_norm": 0.5338227644578518, "learning_rate": 3.0312339989759345e-05, "loss": 0.623, "num_tokens": 133561189.0, "step": 1420 }, { "epoch": 0.24253285543608125, "grad_norm": 0.5180882479544722, "learning_rate": 3.030551288615805e-05, "loss": 0.52, "num_tokens": 133651897.0, "step": 1421 }, { "epoch": 0.24270353302611367, "grad_norm": 0.4931744842356081, "learning_rate": 3.0298685782556753e-05, "loss": 0.6046, "num_tokens": 133749741.0, "step": 1422 }, { "epoch": 0.2428742106161461, "grad_norm": 0.540365450613624, "learning_rate": 3.0291858678955456e-05, "loss": 0.6039, "num_tokens": 133835537.0, "step": 1423 }, { "epoch": 0.24304488820617853, "grad_norm": 0.6057705776430942, "learning_rate": 3.0285031575354157e-05, "loss": 0.752, "num_tokens": 133916104.0, "step": 1424 }, { "epoch": 0.24321556579621095, "grad_norm": 0.5414105943084075, "learning_rate": 3.027820447175286e-05, "loss": 0.5898, "num_tokens": 133998897.0, "step": 1425 }, { "epoch": 0.24338624338624337, "grad_norm": 0.5724644699432798, "learning_rate": 3.0271377368151564e-05, "loss": 0.6612, "num_tokens": 134075965.0, "step": 1426 }, { "epoch": 0.24355692097627582, "grad_norm": 0.5092635806793304, "learning_rate": 3.0264550264550268e-05, "loss": 0.6758, "num_tokens": 134181854.0, "step": 1427 }, { "epoch": 0.24372759856630824, "grad_norm": 0.47224962245264396, "learning_rate": 3.0257723160948972e-05, "loss": 0.6521, "num_tokens": 134315075.0, "step": 1428 }, { "epoch": 0.24389827615634066, "grad_norm": 0.49708555840002566, "learning_rate": 3.0250896057347676e-05, "loss": 0.5792, "num_tokens": 134413861.0, "step": 1429 }, { "epoch": 0.2440689537463731, "grad_norm": 0.5198995465483546, "learning_rate": 3.0244068953746376e-05, "loss": 0.6367, "num_tokens": 134518347.0, "step": 1430 }, { "epoch": 0.24423963133640553, "grad_norm": 0.4500627773403896, "learning_rate": 3.023724185014508e-05, "loss": 0.5546, "num_tokens": 134637515.0, "step": 1431 }, { "epoch": 0.24441030892643795, "grad_norm": 0.4857823323461901, "learning_rate": 3.023041474654378e-05, "loss": 0.5564, "num_tokens": 134743996.0, "step": 1432 }, { "epoch": 0.2445809865164704, "grad_norm": 0.5150518953201861, "learning_rate": 3.0223587642942484e-05, "loss": 0.6997, "num_tokens": 134852154.0, "step": 1433 }, { "epoch": 0.24475166410650281, "grad_norm": 0.5777695595037731, "learning_rate": 3.0216760539341184e-05, "loss": 0.6683, "num_tokens": 134956239.0, "step": 1434 }, { "epoch": 0.24492234169653523, "grad_norm": 0.519644823221684, "learning_rate": 3.0209933435739888e-05, "loss": 0.6095, "num_tokens": 135043357.0, "step": 1435 }, { "epoch": 0.24509301928656768, "grad_norm": 0.5479927110839381, "learning_rate": 3.0203106332138592e-05, "loss": 0.6592, "num_tokens": 135124476.0, "step": 1436 }, { "epoch": 0.2452636968766001, "grad_norm": 0.5890631093974773, "learning_rate": 3.0196279228537296e-05, "loss": 0.7234, "num_tokens": 135235166.0, "step": 1437 }, { "epoch": 0.24543437446663252, "grad_norm": 0.5003995989136789, "learning_rate": 3.0189452124936e-05, "loss": 0.7069, "num_tokens": 135364067.0, "step": 1438 }, { "epoch": 0.24560505205666497, "grad_norm": 0.5719995897117487, "learning_rate": 3.0182625021334703e-05, "loss": 0.5319, "num_tokens": 135431557.0, "step": 1439 }, { "epoch": 0.2457757296466974, "grad_norm": 0.5431888169755601, "learning_rate": 3.0175797917733403e-05, "loss": 0.6824, "num_tokens": 135526866.0, "step": 1440 }, { "epoch": 0.2459464072367298, "grad_norm": 0.5038567551986254, "learning_rate": 3.0168970814132107e-05, "loss": 0.6059, "num_tokens": 135620295.0, "step": 1441 }, { "epoch": 0.24611708482676226, "grad_norm": 0.5708391021082982, "learning_rate": 3.016214371053081e-05, "loss": 0.6251, "num_tokens": 135689636.0, "step": 1442 }, { "epoch": 0.24628776241679468, "grad_norm": 0.4981041167882797, "learning_rate": 3.0155316606929515e-05, "loss": 0.6608, "num_tokens": 135804899.0, "step": 1443 }, { "epoch": 0.2464584400068271, "grad_norm": 0.5394244853646124, "learning_rate": 3.014848950332822e-05, "loss": 0.6428, "num_tokens": 135887679.0, "step": 1444 }, { "epoch": 0.24662911759685954, "grad_norm": 0.44377890515202334, "learning_rate": 3.0141662399726915e-05, "loss": 0.6267, "num_tokens": 136020787.0, "step": 1445 }, { "epoch": 0.24679979518689196, "grad_norm": 0.5492992299824374, "learning_rate": 3.013483529612562e-05, "loss": 0.6447, "num_tokens": 136103484.0, "step": 1446 }, { "epoch": 0.24697047277692438, "grad_norm": 0.5101190344579398, "learning_rate": 3.0128008192524323e-05, "loss": 0.5725, "num_tokens": 136184075.0, "step": 1447 }, { "epoch": 0.24714115036695683, "grad_norm": 0.5810213296390501, "learning_rate": 3.0121181088923027e-05, "loss": 0.6121, "num_tokens": 136257902.0, "step": 1448 }, { "epoch": 0.24731182795698925, "grad_norm": 0.6841692914658968, "learning_rate": 3.011435398532173e-05, "loss": 0.7045, "num_tokens": 136352675.0, "step": 1449 }, { "epoch": 0.24748250554702167, "grad_norm": 0.5339948812279803, "learning_rate": 3.010752688172043e-05, "loss": 0.6558, "num_tokens": 136439978.0, "step": 1450 }, { "epoch": 0.24765318313705412, "grad_norm": 0.5379426458410761, "learning_rate": 3.0100699778119135e-05, "loss": 0.6052, "num_tokens": 136511577.0, "step": 1451 }, { "epoch": 0.24782386072708654, "grad_norm": 0.46775428335752056, "learning_rate": 3.009387267451784e-05, "loss": 0.5971, "num_tokens": 136611136.0, "step": 1452 }, { "epoch": 0.24799453831711896, "grad_norm": 0.5078766030838417, "learning_rate": 3.0087045570916542e-05, "loss": 0.6128, "num_tokens": 136709378.0, "step": 1453 }, { "epoch": 0.2481652159071514, "grad_norm": 0.5062203453170451, "learning_rate": 3.0080218467315246e-05, "loss": 0.5787, "num_tokens": 136800260.0, "step": 1454 }, { "epoch": 0.24833589349718382, "grad_norm": 0.4770989452241791, "learning_rate": 3.007339136371395e-05, "loss": 0.585, "num_tokens": 136905678.0, "step": 1455 }, { "epoch": 0.24850657108721624, "grad_norm": 0.531780153379372, "learning_rate": 3.006656426011265e-05, "loss": 0.589, "num_tokens": 136987891.0, "step": 1456 }, { "epoch": 0.24867724867724866, "grad_norm": 0.48479787849425293, "learning_rate": 3.005973715651135e-05, "loss": 0.6014, "num_tokens": 137092227.0, "step": 1457 }, { "epoch": 0.2488479262672811, "grad_norm": 0.4800305975876206, "learning_rate": 3.0052910052910054e-05, "loss": 0.5759, "num_tokens": 137191163.0, "step": 1458 }, { "epoch": 0.24901860385731353, "grad_norm": 0.5049393254941668, "learning_rate": 3.0046082949308758e-05, "loss": 0.63, "num_tokens": 137299283.0, "step": 1459 }, { "epoch": 0.24918928144734595, "grad_norm": 0.5289090285507199, "learning_rate": 3.0039255845707462e-05, "loss": 0.6543, "num_tokens": 137391055.0, "step": 1460 }, { "epoch": 0.2493599590373784, "grad_norm": 0.5356699481681063, "learning_rate": 3.0032428742106162e-05, "loss": 0.6295, "num_tokens": 137473618.0, "step": 1461 }, { "epoch": 0.24953063662741082, "grad_norm": 0.5175860090191465, "learning_rate": 3.0025601638504866e-05, "loss": 0.5944, "num_tokens": 137557934.0, "step": 1462 }, { "epoch": 0.24970131421744324, "grad_norm": 0.5049279104519903, "learning_rate": 3.001877453490357e-05, "loss": 0.6355, "num_tokens": 137664052.0, "step": 1463 }, { "epoch": 0.24987199180747569, "grad_norm": 0.4932894487570249, "learning_rate": 3.0011947431302273e-05, "loss": 0.6831, "num_tokens": 137771633.0, "step": 1464 }, { "epoch": 0.25004266939750813, "grad_norm": 0.5848552107663836, "learning_rate": 3.0005120327700977e-05, "loss": 0.6816, "num_tokens": 137853045.0, "step": 1465 }, { "epoch": 0.25021334698754055, "grad_norm": 0.518811311739012, "learning_rate": 2.999829322409968e-05, "loss": 0.5387, "num_tokens": 137933230.0, "step": 1466 }, { "epoch": 0.250384024577573, "grad_norm": 0.5132400973436496, "learning_rate": 2.999146612049838e-05, "loss": 0.6825, "num_tokens": 138026819.0, "step": 1467 }, { "epoch": 0.2505547021676054, "grad_norm": 0.5173734325022021, "learning_rate": 2.9984639016897085e-05, "loss": 0.5782, "num_tokens": 138102793.0, "step": 1468 }, { "epoch": 0.2507253797576378, "grad_norm": 0.5559103218411179, "learning_rate": 2.9977811913295785e-05, "loss": 0.6151, "num_tokens": 138175069.0, "step": 1469 }, { "epoch": 0.25089605734767023, "grad_norm": 0.5727172644222017, "learning_rate": 2.997098480969449e-05, "loss": 0.673, "num_tokens": 138249766.0, "step": 1470 }, { "epoch": 0.2510667349377027, "grad_norm": 0.503680922931298, "learning_rate": 2.996415770609319e-05, "loss": 0.5711, "num_tokens": 138337209.0, "step": 1471 }, { "epoch": 0.2512374125277351, "grad_norm": 0.534112527296179, "learning_rate": 2.9957330602491893e-05, "loss": 0.5777, "num_tokens": 138412909.0, "step": 1472 }, { "epoch": 0.25140809011776755, "grad_norm": 0.4748094170455023, "learning_rate": 2.9950503498890597e-05, "loss": 0.6563, "num_tokens": 138531265.0, "step": 1473 }, { "epoch": 0.25157876770779997, "grad_norm": 0.564087841189348, "learning_rate": 2.99436763952893e-05, "loss": 0.6354, "num_tokens": 138614697.0, "step": 1474 }, { "epoch": 0.2517494452978324, "grad_norm": 0.5029388860974616, "learning_rate": 2.9936849291688005e-05, "loss": 0.5813, "num_tokens": 138706475.0, "step": 1475 }, { "epoch": 0.2519201228878648, "grad_norm": 0.47714325409009445, "learning_rate": 2.993002218808671e-05, "loss": 0.6239, "num_tokens": 138820127.0, "step": 1476 }, { "epoch": 0.2520908004778972, "grad_norm": 0.6320909145211067, "learning_rate": 2.992319508448541e-05, "loss": 0.6576, "num_tokens": 138884189.0, "step": 1477 }, { "epoch": 0.2522614780679297, "grad_norm": 0.5890892367067465, "learning_rate": 2.9916367980884113e-05, "loss": 0.5982, "num_tokens": 138948780.0, "step": 1478 }, { "epoch": 0.2524321556579621, "grad_norm": 0.5315193409361323, "learning_rate": 2.9909540877282816e-05, "loss": 0.6815, "num_tokens": 139063119.0, "step": 1479 }, { "epoch": 0.25260283324799454, "grad_norm": 0.5617235238053899, "learning_rate": 2.990271377368152e-05, "loss": 0.6347, "num_tokens": 139139361.0, "step": 1480 }, { "epoch": 0.25277351083802696, "grad_norm": 0.5401930075759667, "learning_rate": 2.9895886670080224e-05, "loss": 0.4976, "num_tokens": 139211150.0, "step": 1481 }, { "epoch": 0.2529441884280594, "grad_norm": 0.46868075695582584, "learning_rate": 2.988905956647892e-05, "loss": 0.7158, "num_tokens": 139361496.0, "step": 1482 }, { "epoch": 0.2531148660180918, "grad_norm": 0.43582117870858883, "learning_rate": 2.9882232462877625e-05, "loss": 0.6029, "num_tokens": 139500411.0, "step": 1483 }, { "epoch": 0.2532855436081243, "grad_norm": 0.4930277717448138, "learning_rate": 2.987540535927633e-05, "loss": 0.6046, "num_tokens": 139606564.0, "step": 1484 }, { "epoch": 0.2534562211981567, "grad_norm": 0.5084470901240925, "learning_rate": 2.9868578255675032e-05, "loss": 0.5726, "num_tokens": 139719725.0, "step": 1485 }, { "epoch": 0.2536268987881891, "grad_norm": 0.48011145164763613, "learning_rate": 2.9861751152073736e-05, "loss": 0.6344, "num_tokens": 139824776.0, "step": 1486 }, { "epoch": 0.25379757637822153, "grad_norm": 0.4815992656785346, "learning_rate": 2.9854924048472436e-05, "loss": 0.5855, "num_tokens": 139928172.0, "step": 1487 }, { "epoch": 0.25396825396825395, "grad_norm": 0.5069969158999137, "learning_rate": 2.984809694487114e-05, "loss": 0.5625, "num_tokens": 140011821.0, "step": 1488 }, { "epoch": 0.2541389315582864, "grad_norm": 0.5152385983240086, "learning_rate": 2.9841269841269844e-05, "loss": 0.7513, "num_tokens": 140133252.0, "step": 1489 }, { "epoch": 0.25430960914831885, "grad_norm": 0.5766370174929791, "learning_rate": 2.9834442737668548e-05, "loss": 0.6693, "num_tokens": 140225355.0, "step": 1490 }, { "epoch": 0.25448028673835127, "grad_norm": 0.5463644226462849, "learning_rate": 2.982761563406725e-05, "loss": 0.7806, "num_tokens": 140317996.0, "step": 1491 }, { "epoch": 0.2546509643283837, "grad_norm": 0.5105852462959829, "learning_rate": 2.9820788530465955e-05, "loss": 0.6763, "num_tokens": 140431939.0, "step": 1492 }, { "epoch": 0.2548216419184161, "grad_norm": 0.5372631823244021, "learning_rate": 2.9813961426864655e-05, "loss": 0.6438, "num_tokens": 140518173.0, "step": 1493 }, { "epoch": 0.25499231950844853, "grad_norm": 0.5534768553444708, "learning_rate": 2.9807134323263356e-05, "loss": 0.7666, "num_tokens": 140617973.0, "step": 1494 }, { "epoch": 0.25516299709848095, "grad_norm": 0.4690692711003919, "learning_rate": 2.980030721966206e-05, "loss": 0.5878, "num_tokens": 140729558.0, "step": 1495 }, { "epoch": 0.2553336746885134, "grad_norm": 0.5519344538013113, "learning_rate": 2.9793480116060763e-05, "loss": 0.6622, "num_tokens": 140815301.0, "step": 1496 }, { "epoch": 0.25550435227854584, "grad_norm": 0.7214930033997207, "learning_rate": 2.9786653012459467e-05, "loss": 0.6531, "num_tokens": 140919638.0, "step": 1497 }, { "epoch": 0.25567502986857826, "grad_norm": 0.5853761900600749, "learning_rate": 2.9779825908858167e-05, "loss": 0.6237, "num_tokens": 141014876.0, "step": 1498 }, { "epoch": 0.2558457074586107, "grad_norm": 0.6263849408393748, "learning_rate": 2.977299880525687e-05, "loss": 0.7401, "num_tokens": 141106796.0, "step": 1499 }, { "epoch": 0.2560163850486431, "grad_norm": 0.5365435575890392, "learning_rate": 2.9766171701655575e-05, "loss": 0.6765, "num_tokens": 141199769.0, "step": 1500 }, { "epoch": 0.2561870626386755, "grad_norm": 0.5924826250981401, "learning_rate": 2.975934459805428e-05, "loss": 0.6891, "num_tokens": 141287523.0, "step": 1501 }, { "epoch": 0.256357740228708, "grad_norm": 0.5231091512943465, "learning_rate": 2.9752517494452983e-05, "loss": 0.6446, "num_tokens": 141381797.0, "step": 1502 }, { "epoch": 0.2565284178187404, "grad_norm": 0.8789586278375571, "learning_rate": 2.9745690390851686e-05, "loss": 0.6554, "num_tokens": 141485024.0, "step": 1503 }, { "epoch": 0.25669909540877284, "grad_norm": 0.524529035543788, "learning_rate": 2.9738863287250387e-05, "loss": 0.6575, "num_tokens": 141575204.0, "step": 1504 }, { "epoch": 0.25686977299880526, "grad_norm": 0.5617828009333108, "learning_rate": 2.973203618364909e-05, "loss": 0.6628, "num_tokens": 141675952.0, "step": 1505 }, { "epoch": 0.2570404505888377, "grad_norm": 0.5321845639672593, "learning_rate": 2.9725209080047794e-05, "loss": 0.6111, "num_tokens": 141768017.0, "step": 1506 }, { "epoch": 0.2572111281788701, "grad_norm": 0.4816956613082202, "learning_rate": 2.9718381976446495e-05, "loss": 0.5241, "num_tokens": 141868134.0, "step": 1507 }, { "epoch": 0.2573818057689025, "grad_norm": 0.5402964820566525, "learning_rate": 2.9711554872845195e-05, "loss": 0.5369, "num_tokens": 141943108.0, "step": 1508 }, { "epoch": 0.257552483358935, "grad_norm": 0.5619439883493221, "learning_rate": 2.97047277692439e-05, "loss": 0.6031, "num_tokens": 142017650.0, "step": 1509 }, { "epoch": 0.2577231609489674, "grad_norm": 0.5433155794270991, "learning_rate": 2.9697900665642602e-05, "loss": 0.6377, "num_tokens": 142116197.0, "step": 1510 }, { "epoch": 0.25789383853899983, "grad_norm": 0.5223656497393874, "learning_rate": 2.9691073562041306e-05, "loss": 0.642, "num_tokens": 142207277.0, "step": 1511 }, { "epoch": 0.25806451612903225, "grad_norm": 0.5176784238002053, "learning_rate": 2.968424645844001e-05, "loss": 0.737, "num_tokens": 142328430.0, "step": 1512 }, { "epoch": 0.25823519371906467, "grad_norm": 0.6071290315898796, "learning_rate": 2.9677419354838714e-05, "loss": 0.6037, "num_tokens": 142391489.0, "step": 1513 }, { "epoch": 0.2584058713090971, "grad_norm": 0.5655736272189225, "learning_rate": 2.9670592251237414e-05, "loss": 0.5664, "num_tokens": 142455328.0, "step": 1514 }, { "epoch": 0.25857654889912957, "grad_norm": 0.5182064486394659, "learning_rate": 2.9663765147636118e-05, "loss": 0.572, "num_tokens": 142540202.0, "step": 1515 }, { "epoch": 0.258747226489162, "grad_norm": 0.5504339113645859, "learning_rate": 2.965693804403482e-05, "loss": 0.5649, "num_tokens": 142617592.0, "step": 1516 }, { "epoch": 0.2589179040791944, "grad_norm": 0.5439145067927225, "learning_rate": 2.9650110940433525e-05, "loss": 0.7104, "num_tokens": 142725462.0, "step": 1517 }, { "epoch": 0.2590885816692268, "grad_norm": 0.5272139752010805, "learning_rate": 2.964328383683223e-05, "loss": 0.6386, "num_tokens": 142816608.0, "step": 1518 }, { "epoch": 0.25925925925925924, "grad_norm": 0.5024787201354364, "learning_rate": 2.9636456733230926e-05, "loss": 0.6259, "num_tokens": 142928382.0, "step": 1519 }, { "epoch": 0.25942993684929166, "grad_norm": 0.5299193403971175, "learning_rate": 2.962962962962963e-05, "loss": 0.6721, "num_tokens": 143029084.0, "step": 1520 }, { "epoch": 0.25960061443932414, "grad_norm": 0.5499690134059552, "learning_rate": 2.9622802526028334e-05, "loss": 0.6364, "num_tokens": 143106807.0, "step": 1521 }, { "epoch": 0.25977129202935656, "grad_norm": 0.49391904043146334, "learning_rate": 2.9615975422427037e-05, "loss": 0.5594, "num_tokens": 143204105.0, "step": 1522 }, { "epoch": 0.259941969619389, "grad_norm": 0.4955415042346165, "learning_rate": 2.960914831882574e-05, "loss": 0.5762, "num_tokens": 143318252.0, "step": 1523 }, { "epoch": 0.2601126472094214, "grad_norm": 0.5784310482788532, "learning_rate": 2.960232121522444e-05, "loss": 0.6551, "num_tokens": 143396350.0, "step": 1524 }, { "epoch": 0.2602833247994538, "grad_norm": 0.5357940290505442, "learning_rate": 2.9595494111623145e-05, "loss": 0.6191, "num_tokens": 143488749.0, "step": 1525 }, { "epoch": 0.26045400238948624, "grad_norm": 0.47657025406515896, "learning_rate": 2.958866700802185e-05, "loss": 0.643, "num_tokens": 143609040.0, "step": 1526 }, { "epoch": 0.2606246799795187, "grad_norm": 0.534966755733081, "learning_rate": 2.9581839904420553e-05, "loss": 0.5886, "num_tokens": 143686369.0, "step": 1527 }, { "epoch": 0.26079535756955113, "grad_norm": 0.5482248006816225, "learning_rate": 2.9575012800819257e-05, "loss": 0.6829, "num_tokens": 143779401.0, "step": 1528 }, { "epoch": 0.26096603515958355, "grad_norm": 0.462882752711681, "learning_rate": 2.956818569721796e-05, "loss": 0.6091, "num_tokens": 143886778.0, "step": 1529 }, { "epoch": 0.261136712749616, "grad_norm": 0.519357143908201, "learning_rate": 2.9561358593616664e-05, "loss": 0.6604, "num_tokens": 143971964.0, "step": 1530 }, { "epoch": 0.2613073903396484, "grad_norm": 0.6191592034850876, "learning_rate": 2.955453149001536e-05, "loss": 0.7069, "num_tokens": 144091301.0, "step": 1531 }, { "epoch": 0.2614780679296808, "grad_norm": 0.5112825178494692, "learning_rate": 2.9547704386414065e-05, "loss": 0.6098, "num_tokens": 144194027.0, "step": 1532 }, { "epoch": 0.2616487455197133, "grad_norm": 0.5045064120002258, "learning_rate": 2.954087728281277e-05, "loss": 0.6679, "num_tokens": 144304814.0, "step": 1533 }, { "epoch": 0.2618194231097457, "grad_norm": 0.5283253619728645, "learning_rate": 2.9534050179211472e-05, "loss": 0.5436, "num_tokens": 144382924.0, "step": 1534 }, { "epoch": 0.26199010069977813, "grad_norm": 0.5280618535734183, "learning_rate": 2.9527223075610173e-05, "loss": 0.5668, "num_tokens": 144464507.0, "step": 1535 }, { "epoch": 0.26216077828981055, "grad_norm": 0.5645763596542448, "learning_rate": 2.9520395972008877e-05, "loss": 0.6084, "num_tokens": 144542962.0, "step": 1536 }, { "epoch": 0.26233145587984297, "grad_norm": 0.5597915680962701, "learning_rate": 2.951356886840758e-05, "loss": 0.5585, "num_tokens": 144608128.0, "step": 1537 }, { "epoch": 0.2625021334698754, "grad_norm": 0.535778393663071, "learning_rate": 2.9506741764806284e-05, "loss": 0.6028, "num_tokens": 144688473.0, "step": 1538 }, { "epoch": 0.2626728110599078, "grad_norm": 0.5184764074921624, "learning_rate": 2.9499914661204988e-05, "loss": 0.6178, "num_tokens": 144778701.0, "step": 1539 }, { "epoch": 0.2628434886499403, "grad_norm": 0.5536001085036046, "learning_rate": 2.949308755760369e-05, "loss": 0.6388, "num_tokens": 144857983.0, "step": 1540 }, { "epoch": 0.2630141662399727, "grad_norm": 0.4995363070553543, "learning_rate": 2.9486260454002392e-05, "loss": 0.6089, "num_tokens": 144949649.0, "step": 1541 }, { "epoch": 0.2631848438300051, "grad_norm": 0.5726964066510651, "learning_rate": 2.9479433350401096e-05, "loss": 0.6093, "num_tokens": 145037056.0, "step": 1542 }, { "epoch": 0.26335552142003754, "grad_norm": 0.5349989121027221, "learning_rate": 2.94726062467998e-05, "loss": 0.5906, "num_tokens": 145115312.0, "step": 1543 }, { "epoch": 0.26352619901006996, "grad_norm": 0.5133903824577276, "learning_rate": 2.94657791431985e-05, "loss": 0.6846, "num_tokens": 145219903.0, "step": 1544 }, { "epoch": 0.2636968766001024, "grad_norm": 0.48934548669521827, "learning_rate": 2.94589520395972e-05, "loss": 0.6251, "num_tokens": 145318911.0, "step": 1545 }, { "epoch": 0.26386755419013486, "grad_norm": 0.5157008691365432, "learning_rate": 2.9452124935995904e-05, "loss": 0.6322, "num_tokens": 145400457.0, "step": 1546 }, { "epoch": 0.2640382317801673, "grad_norm": 0.4351696695983458, "learning_rate": 2.9445297832394608e-05, "loss": 0.5656, "num_tokens": 145516565.0, "step": 1547 }, { "epoch": 0.2642089093701997, "grad_norm": 0.52396412338884, "learning_rate": 2.943847072879331e-05, "loss": 0.6264, "num_tokens": 145600986.0, "step": 1548 }, { "epoch": 0.2643795869602321, "grad_norm": 0.5245396073408771, "learning_rate": 2.9431643625192015e-05, "loss": 0.6371, "num_tokens": 145705379.0, "step": 1549 }, { "epoch": 0.26455026455026454, "grad_norm": 0.5052271763599925, "learning_rate": 2.942481652159072e-05, "loss": 0.5799, "num_tokens": 145797393.0, "step": 1550 }, { "epoch": 0.26472094214029696, "grad_norm": 0.678321577737671, "learning_rate": 2.941798941798942e-05, "loss": 0.683, "num_tokens": 145884000.0, "step": 1551 }, { "epoch": 0.26489161973032943, "grad_norm": 0.5385645518113303, "learning_rate": 2.9411162314388123e-05, "loss": 0.6482, "num_tokens": 145968971.0, "step": 1552 }, { "epoch": 0.26506229732036185, "grad_norm": 0.4814562000917823, "learning_rate": 2.9404335210786827e-05, "loss": 0.6478, "num_tokens": 146088393.0, "step": 1553 }, { "epoch": 0.26523297491039427, "grad_norm": 0.5176513302065541, "learning_rate": 2.939750810718553e-05, "loss": 0.6146, "num_tokens": 146185422.0, "step": 1554 }, { "epoch": 0.2654036525004267, "grad_norm": 0.48039549607992427, "learning_rate": 2.9390681003584235e-05, "loss": 0.6648, "num_tokens": 146311067.0, "step": 1555 }, { "epoch": 0.2655743300904591, "grad_norm": 0.5086001135682076, "learning_rate": 2.938385389998293e-05, "loss": 0.589, "num_tokens": 146407137.0, "step": 1556 }, { "epoch": 0.26574500768049153, "grad_norm": 0.5342821299350323, "learning_rate": 2.9377026796381635e-05, "loss": 0.6506, "num_tokens": 146490715.0, "step": 1557 }, { "epoch": 0.265915685270524, "grad_norm": 0.5524565657044019, "learning_rate": 2.937019969278034e-05, "loss": 0.5685, "num_tokens": 146563083.0, "step": 1558 }, { "epoch": 0.2660863628605564, "grad_norm": 0.5823713005217535, "learning_rate": 2.9363372589179043e-05, "loss": 0.7462, "num_tokens": 146650872.0, "step": 1559 }, { "epoch": 0.26625704045058884, "grad_norm": 0.4928357675712604, "learning_rate": 2.9356545485577747e-05, "loss": 0.6557, "num_tokens": 146759839.0, "step": 1560 }, { "epoch": 0.26642771804062126, "grad_norm": 0.5307293776661819, "learning_rate": 2.934971838197645e-05, "loss": 0.6778, "num_tokens": 146858971.0, "step": 1561 }, { "epoch": 0.2665983956306537, "grad_norm": 0.5071972904843993, "learning_rate": 2.934289127837515e-05, "loss": 0.639, "num_tokens": 146958459.0, "step": 1562 }, { "epoch": 0.2667690732206861, "grad_norm": 0.5023480526581776, "learning_rate": 2.9336064174773854e-05, "loss": 0.687, "num_tokens": 147073982.0, "step": 1563 }, { "epoch": 0.2669397508107186, "grad_norm": 0.5520075992719014, "learning_rate": 2.9329237071172558e-05, "loss": 0.6476, "num_tokens": 147168638.0, "step": 1564 }, { "epoch": 0.267110428400751, "grad_norm": 0.500779511060749, "learning_rate": 2.9322409967571262e-05, "loss": 0.6647, "num_tokens": 147270442.0, "step": 1565 }, { "epoch": 0.2672811059907834, "grad_norm": 0.5093050911645667, "learning_rate": 2.9315582863969966e-05, "loss": 0.5698, "num_tokens": 147358911.0, "step": 1566 }, { "epoch": 0.26745178358081584, "grad_norm": 0.5120519755410098, "learning_rate": 2.930875576036867e-05, "loss": 0.647, "num_tokens": 147457901.0, "step": 1567 }, { "epoch": 0.26762246117084826, "grad_norm": 0.4848193489376691, "learning_rate": 2.9301928656767366e-05, "loss": 0.6118, "num_tokens": 147565286.0, "step": 1568 }, { "epoch": 0.2677931387608807, "grad_norm": 0.4984239405036281, "learning_rate": 2.929510155316607e-05, "loss": 0.631, "num_tokens": 147683777.0, "step": 1569 }, { "epoch": 0.2679638163509131, "grad_norm": 0.46443558506729576, "learning_rate": 2.9288274449564774e-05, "loss": 0.5916, "num_tokens": 147794162.0, "step": 1570 }, { "epoch": 0.2681344939409456, "grad_norm": 0.46908629472825814, "learning_rate": 2.9281447345963478e-05, "loss": 0.5933, "num_tokens": 147909760.0, "step": 1571 }, { "epoch": 0.268305171530978, "grad_norm": 0.5330693395031496, "learning_rate": 2.9274620242362178e-05, "loss": 0.6386, "num_tokens": 147999056.0, "step": 1572 }, { "epoch": 0.2684758491210104, "grad_norm": 0.5626345771938205, "learning_rate": 2.9267793138760882e-05, "loss": 0.6123, "num_tokens": 148084130.0, "step": 1573 }, { "epoch": 0.26864652671104283, "grad_norm": 0.46367283243322116, "learning_rate": 2.9260966035159586e-05, "loss": 0.6665, "num_tokens": 148204228.0, "step": 1574 }, { "epoch": 0.26881720430107525, "grad_norm": 0.5326705177851854, "learning_rate": 2.925413893155829e-05, "loss": 0.6379, "num_tokens": 148281145.0, "step": 1575 }, { "epoch": 0.26898788189110767, "grad_norm": 0.554595946493135, "learning_rate": 2.9247311827956993e-05, "loss": 0.6012, "num_tokens": 148357719.0, "step": 1576 }, { "epoch": 0.26915855948114015, "grad_norm": 0.5372268066791315, "learning_rate": 2.9240484724355697e-05, "loss": 0.6081, "num_tokens": 148441023.0, "step": 1577 }, { "epoch": 0.26932923707117257, "grad_norm": 0.6260486504727868, "learning_rate": 2.9233657620754397e-05, "loss": 0.606, "num_tokens": 148503699.0, "step": 1578 }, { "epoch": 0.269499914661205, "grad_norm": 0.5002745319355029, "learning_rate": 2.92268305171531e-05, "loss": 0.6704, "num_tokens": 148610884.0, "step": 1579 }, { "epoch": 0.2696705922512374, "grad_norm": 0.5170220802860002, "learning_rate": 2.9220003413551805e-05, "loss": 0.6601, "num_tokens": 148720787.0, "step": 1580 }, { "epoch": 0.2698412698412698, "grad_norm": 0.5384104724262995, "learning_rate": 2.9213176309950505e-05, "loss": 0.5771, "num_tokens": 148796197.0, "step": 1581 }, { "epoch": 0.27001194743130225, "grad_norm": 0.5058126860428649, "learning_rate": 2.9206349206349206e-05, "loss": 0.6212, "num_tokens": 148894658.0, "step": 1582 }, { "epoch": 0.2701826250213347, "grad_norm": 0.5568169329044079, "learning_rate": 2.919952210274791e-05, "loss": 0.639, "num_tokens": 148972172.0, "step": 1583 }, { "epoch": 0.27035330261136714, "grad_norm": 0.5700097961123177, "learning_rate": 2.9192694999146613e-05, "loss": 0.5667, "num_tokens": 149059528.0, "step": 1584 }, { "epoch": 0.27052398020139956, "grad_norm": 0.5274858102015078, "learning_rate": 2.9185867895545317e-05, "loss": 0.6488, "num_tokens": 149152427.0, "step": 1585 }, { "epoch": 0.270694657791432, "grad_norm": 0.5038832631003527, "learning_rate": 2.917904079194402e-05, "loss": 0.6656, "num_tokens": 149261747.0, "step": 1586 }, { "epoch": 0.2708653353814644, "grad_norm": 0.5826917571856942, "learning_rate": 2.9172213688342724e-05, "loss": 0.6818, "num_tokens": 149366533.0, "step": 1587 }, { "epoch": 0.2710360129714968, "grad_norm": 0.555409232448178, "learning_rate": 2.9165386584741425e-05, "loss": 0.7236, "num_tokens": 149457793.0, "step": 1588 }, { "epoch": 0.2712066905615293, "grad_norm": 0.4961785589691689, "learning_rate": 2.915855948114013e-05, "loss": 0.5858, "num_tokens": 149552905.0, "step": 1589 }, { "epoch": 0.2713773681515617, "grad_norm": 0.5706697997553639, "learning_rate": 2.9151732377538832e-05, "loss": 0.6508, "num_tokens": 149623652.0, "step": 1590 }, { "epoch": 0.27154804574159414, "grad_norm": 0.5556306327323185, "learning_rate": 2.9144905273937536e-05, "loss": 0.6392, "num_tokens": 149713979.0, "step": 1591 }, { "epoch": 0.27171872333162655, "grad_norm": 0.49730750102407684, "learning_rate": 2.913807817033624e-05, "loss": 0.6115, "num_tokens": 149828848.0, "step": 1592 }, { "epoch": 0.271889400921659, "grad_norm": 0.524169788304647, "learning_rate": 2.9131251066734937e-05, "loss": 0.6742, "num_tokens": 149943653.0, "step": 1593 }, { "epoch": 0.2720600785116914, "grad_norm": 0.501759367567607, "learning_rate": 2.912442396313364e-05, "loss": 0.556, "num_tokens": 150034896.0, "step": 1594 }, { "epoch": 0.27223075610172387, "grad_norm": 0.5119541631604985, "learning_rate": 2.9117596859532344e-05, "loss": 0.6759, "num_tokens": 150141508.0, "step": 1595 }, { "epoch": 0.2724014336917563, "grad_norm": 0.5340123503829572, "learning_rate": 2.9110769755931048e-05, "loss": 0.5938, "num_tokens": 150238664.0, "step": 1596 }, { "epoch": 0.2725721112817887, "grad_norm": 0.5188588504786178, "learning_rate": 2.9103942652329752e-05, "loss": 0.6742, "num_tokens": 150343343.0, "step": 1597 }, { "epoch": 0.27274278887182113, "grad_norm": 0.5354140245683103, "learning_rate": 2.9097115548728456e-05, "loss": 0.5912, "num_tokens": 150434772.0, "step": 1598 }, { "epoch": 0.27291346646185355, "grad_norm": 0.5672294241462507, "learning_rate": 2.9090288445127156e-05, "loss": 0.6247, "num_tokens": 150513939.0, "step": 1599 }, { "epoch": 0.27308414405188597, "grad_norm": 0.554496137292418, "learning_rate": 2.908346134152586e-05, "loss": 0.595, "num_tokens": 150582432.0, "step": 1600 }, { "epoch": 0.2732548216419184, "grad_norm": 0.4824454080424507, "learning_rate": 2.9076634237924564e-05, "loss": 0.6615, "num_tokens": 150711343.0, "step": 1601 }, { "epoch": 0.27342549923195086, "grad_norm": 0.4955043522938973, "learning_rate": 2.9069807134323267e-05, "loss": 0.607, "num_tokens": 150812637.0, "step": 1602 }, { "epoch": 0.2735961768219833, "grad_norm": 0.5123726943091212, "learning_rate": 2.906298003072197e-05, "loss": 0.5589, "num_tokens": 150897417.0, "step": 1603 }, { "epoch": 0.2737668544120157, "grad_norm": 0.527004506857155, "learning_rate": 2.9056152927120675e-05, "loss": 0.5154, "num_tokens": 150965289.0, "step": 1604 }, { "epoch": 0.2739375320020481, "grad_norm": 0.5413316770921324, "learning_rate": 2.9049325823519375e-05, "loss": 0.6539, "num_tokens": 151046520.0, "step": 1605 }, { "epoch": 0.27410820959208054, "grad_norm": 0.5109708538472376, "learning_rate": 2.9042498719918076e-05, "loss": 0.5681, "num_tokens": 151133312.0, "step": 1606 }, { "epoch": 0.27427888718211296, "grad_norm": 0.48138993842613137, "learning_rate": 2.903567161631678e-05, "loss": 0.5813, "num_tokens": 151243634.0, "step": 1607 }, { "epoch": 0.27444956477214544, "grad_norm": 0.5571396512911816, "learning_rate": 2.9028844512715483e-05, "loss": 0.6247, "num_tokens": 151322382.0, "step": 1608 }, { "epoch": 0.27462024236217786, "grad_norm": 0.6267887557000068, "learning_rate": 2.9022017409114183e-05, "loss": 0.6875, "num_tokens": 151401989.0, "step": 1609 }, { "epoch": 0.2747909199522103, "grad_norm": 0.5086875696311032, "learning_rate": 2.9015190305512887e-05, "loss": 0.5903, "num_tokens": 151489934.0, "step": 1610 }, { "epoch": 0.2749615975422427, "grad_norm": 0.4915241710507783, "learning_rate": 2.900836320191159e-05, "loss": 0.6196, "num_tokens": 151590212.0, "step": 1611 }, { "epoch": 0.2751322751322751, "grad_norm": 0.5243876527333179, "learning_rate": 2.9001536098310295e-05, "loss": 0.6387, "num_tokens": 151695998.0, "step": 1612 }, { "epoch": 0.27530295272230754, "grad_norm": 0.5187938875728016, "learning_rate": 2.8994708994709e-05, "loss": 0.6415, "num_tokens": 151791260.0, "step": 1613 }, { "epoch": 0.27547363031234, "grad_norm": 0.48631853824570576, "learning_rate": 2.8987881891107702e-05, "loss": 0.5941, "num_tokens": 151898212.0, "step": 1614 }, { "epoch": 0.27564430790237243, "grad_norm": 0.5641643980677884, "learning_rate": 2.8981054787506403e-05, "loss": 0.5975, "num_tokens": 151971952.0, "step": 1615 }, { "epoch": 0.27581498549240485, "grad_norm": 0.5841254208484151, "learning_rate": 2.8974227683905106e-05, "loss": 0.6195, "num_tokens": 152045126.0, "step": 1616 }, { "epoch": 0.27598566308243727, "grad_norm": 0.5330335610342765, "learning_rate": 2.896740058030381e-05, "loss": 0.6466, "num_tokens": 152140892.0, "step": 1617 }, { "epoch": 0.2761563406724697, "grad_norm": 0.501418615177315, "learning_rate": 2.896057347670251e-05, "loss": 0.6262, "num_tokens": 152244288.0, "step": 1618 }, { "epoch": 0.2763270182625021, "grad_norm": 0.4930359068405374, "learning_rate": 2.895374637310121e-05, "loss": 0.601, "num_tokens": 152355518.0, "step": 1619 }, { "epoch": 0.2764976958525346, "grad_norm": 0.7749953913051381, "learning_rate": 2.8946919269499915e-05, "loss": 0.6357, "num_tokens": 152449688.0, "step": 1620 }, { "epoch": 0.276668373442567, "grad_norm": 0.4354898483238495, "learning_rate": 2.894009216589862e-05, "loss": 0.6168, "num_tokens": 152590298.0, "step": 1621 }, { "epoch": 0.2768390510325994, "grad_norm": 0.5512742082300046, "learning_rate": 2.8933265062297322e-05, "loss": 0.6614, "num_tokens": 152670584.0, "step": 1622 }, { "epoch": 0.27700972862263185, "grad_norm": 0.5833277577715714, "learning_rate": 2.8926437958696026e-05, "loss": 0.7341, "num_tokens": 152764278.0, "step": 1623 }, { "epoch": 0.27718040621266427, "grad_norm": 0.5175480257914119, "learning_rate": 2.891961085509473e-05, "loss": 0.775, "num_tokens": 152880824.0, "step": 1624 }, { "epoch": 0.2773510838026967, "grad_norm": 0.5277410009278345, "learning_rate": 2.891278375149343e-05, "loss": 0.591, "num_tokens": 152957985.0, "step": 1625 }, { "epoch": 0.27752176139272916, "grad_norm": 0.5029482198829658, "learning_rate": 2.8905956647892134e-05, "loss": 0.596, "num_tokens": 153060255.0, "step": 1626 }, { "epoch": 0.2776924389827616, "grad_norm": 0.5363513224590286, "learning_rate": 2.8899129544290838e-05, "loss": 0.5822, "num_tokens": 153138923.0, "step": 1627 }, { "epoch": 0.277863116572794, "grad_norm": 0.6803750015006881, "learning_rate": 2.889230244068954e-05, "loss": 0.7348, "num_tokens": 153218403.0, "step": 1628 }, { "epoch": 0.2780337941628264, "grad_norm": 0.6129248117956091, "learning_rate": 2.8885475337088245e-05, "loss": 0.7012, "num_tokens": 153311802.0, "step": 1629 }, { "epoch": 0.27820447175285884, "grad_norm": 0.508415673243472, "learning_rate": 2.8878648233486942e-05, "loss": 0.5995, "num_tokens": 153415132.0, "step": 1630 }, { "epoch": 0.27837514934289126, "grad_norm": 0.5345503216174748, "learning_rate": 2.8871821129885646e-05, "loss": 0.631, "num_tokens": 153508670.0, "step": 1631 }, { "epoch": 0.27854582693292373, "grad_norm": 0.5429709552443895, "learning_rate": 2.886499402628435e-05, "loss": 0.5938, "num_tokens": 153589728.0, "step": 1632 }, { "epoch": 0.27871650452295615, "grad_norm": 0.5302578873525465, "learning_rate": 2.8858166922683053e-05, "loss": 0.6868, "num_tokens": 153683480.0, "step": 1633 }, { "epoch": 0.2788871821129886, "grad_norm": 0.5460780570492969, "learning_rate": 2.8851339819081757e-05, "loss": 0.6942, "num_tokens": 153780343.0, "step": 1634 }, { "epoch": 0.279057859703021, "grad_norm": 0.46991157853437066, "learning_rate": 2.884451271548046e-05, "loss": 0.6832, "num_tokens": 153907818.0, "step": 1635 }, { "epoch": 0.2792285372930534, "grad_norm": 0.5443205526752788, "learning_rate": 2.883768561187916e-05, "loss": 0.603, "num_tokens": 153995538.0, "step": 1636 }, { "epoch": 0.27939921488308583, "grad_norm": 0.517683163054388, "learning_rate": 2.8830858508277865e-05, "loss": 0.6431, "num_tokens": 154092004.0, "step": 1637 }, { "epoch": 0.27956989247311825, "grad_norm": 0.48843970974637657, "learning_rate": 2.882403140467657e-05, "loss": 0.6436, "num_tokens": 154201559.0, "step": 1638 }, { "epoch": 0.27974057006315073, "grad_norm": 0.5030594027997682, "learning_rate": 2.8817204301075273e-05, "loss": 0.6193, "num_tokens": 154303723.0, "step": 1639 }, { "epoch": 0.27991124765318315, "grad_norm": 0.5580081897152643, "learning_rate": 2.8810377197473976e-05, "loss": 0.6045, "num_tokens": 154381365.0, "step": 1640 }, { "epoch": 0.28008192524321557, "grad_norm": 0.5709744607317606, "learning_rate": 2.880355009387268e-05, "loss": 0.587, "num_tokens": 154453583.0, "step": 1641 }, { "epoch": 0.280252602833248, "grad_norm": 0.5362278412019073, "learning_rate": 2.879672299027138e-05, "loss": 0.5842, "num_tokens": 154536007.0, "step": 1642 }, { "epoch": 0.2804232804232804, "grad_norm": 0.497498615319572, "learning_rate": 2.878989588667008e-05, "loss": 0.5602, "num_tokens": 154627405.0, "step": 1643 }, { "epoch": 0.2805939580133128, "grad_norm": 0.5369642013038153, "learning_rate": 2.8783068783068785e-05, "loss": 0.7162, "num_tokens": 154726118.0, "step": 1644 }, { "epoch": 0.2807646356033453, "grad_norm": 0.5207523805730631, "learning_rate": 2.877624167946749e-05, "loss": 0.6706, "num_tokens": 154822883.0, "step": 1645 }, { "epoch": 0.2809353131933777, "grad_norm": 0.5631939522324584, "learning_rate": 2.876941457586619e-05, "loss": 0.6165, "num_tokens": 154906962.0, "step": 1646 }, { "epoch": 0.28110599078341014, "grad_norm": 0.8080780598389131, "learning_rate": 2.8762587472264893e-05, "loss": 0.731, "num_tokens": 154986916.0, "step": 1647 }, { "epoch": 0.28127666837344256, "grad_norm": 0.5160576424631989, "learning_rate": 2.8755760368663596e-05, "loss": 0.6873, "num_tokens": 155087532.0, "step": 1648 }, { "epoch": 0.281447345963475, "grad_norm": 0.5181131229758502, "learning_rate": 2.87489332650623e-05, "loss": 0.5582, "num_tokens": 155181714.0, "step": 1649 }, { "epoch": 0.2816180235535074, "grad_norm": 0.5240511077881997, "learning_rate": 2.8742106161461004e-05, "loss": 0.5725, "num_tokens": 155261159.0, "step": 1650 }, { "epoch": 0.2817887011435399, "grad_norm": 0.544777848444035, "learning_rate": 2.8735279057859708e-05, "loss": 0.6623, "num_tokens": 155361701.0, "step": 1651 }, { "epoch": 0.2819593787335723, "grad_norm": 0.5107417479074698, "learning_rate": 2.8728451954258408e-05, "loss": 0.598, "num_tokens": 155445862.0, "step": 1652 }, { "epoch": 0.2821300563236047, "grad_norm": 0.5033343261024319, "learning_rate": 2.8721624850657112e-05, "loss": 0.6627, "num_tokens": 155541762.0, "step": 1653 }, { "epoch": 0.28230073391363714, "grad_norm": 0.5756645373110948, "learning_rate": 2.8714797747055816e-05, "loss": 0.6597, "num_tokens": 155613784.0, "step": 1654 }, { "epoch": 0.28247141150366956, "grad_norm": 0.48027645387004453, "learning_rate": 2.8707970643454516e-05, "loss": 0.5671, "num_tokens": 155722435.0, "step": 1655 }, { "epoch": 0.282642089093702, "grad_norm": 0.5596435407868025, "learning_rate": 2.8701143539853216e-05, "loss": 0.6154, "num_tokens": 155799709.0, "step": 1656 }, { "epoch": 0.28281276668373445, "grad_norm": 0.49442078409359674, "learning_rate": 2.869431643625192e-05, "loss": 0.6084, "num_tokens": 155900850.0, "step": 1657 }, { "epoch": 0.28298344427376687, "grad_norm": 0.5557784292396412, "learning_rate": 2.8687489332650624e-05, "loss": 0.6328, "num_tokens": 155990864.0, "step": 1658 }, { "epoch": 0.2831541218637993, "grad_norm": 0.5240447377676549, "learning_rate": 2.8680662229049328e-05, "loss": 0.6037, "num_tokens": 156075786.0, "step": 1659 }, { "epoch": 0.2833247994538317, "grad_norm": 0.4661683862697689, "learning_rate": 2.867383512544803e-05, "loss": 0.5345, "num_tokens": 156179294.0, "step": 1660 }, { "epoch": 0.28349547704386413, "grad_norm": 0.5150501721970029, "learning_rate": 2.8667008021846735e-05, "loss": 0.5764, "num_tokens": 156260846.0, "step": 1661 }, { "epoch": 0.28366615463389655, "grad_norm": 0.48104035304427045, "learning_rate": 2.8660180918245435e-05, "loss": 0.6085, "num_tokens": 156366999.0, "step": 1662 }, { "epoch": 0.283836832223929, "grad_norm": 0.5068251463953185, "learning_rate": 2.865335381464414e-05, "loss": 0.6772, "num_tokens": 156465835.0, "step": 1663 }, { "epoch": 0.28400750981396145, "grad_norm": 0.5614672302892944, "learning_rate": 2.8646526711042843e-05, "loss": 0.6859, "num_tokens": 156559445.0, "step": 1664 }, { "epoch": 0.28417818740399386, "grad_norm": 0.5367991551876914, "learning_rate": 2.8639699607441547e-05, "loss": 0.6599, "num_tokens": 156655568.0, "step": 1665 }, { "epoch": 0.2843488649940263, "grad_norm": 0.5254668232814469, "learning_rate": 2.863287250384025e-05, "loss": 0.6808, "num_tokens": 156753875.0, "step": 1666 }, { "epoch": 0.2845195425840587, "grad_norm": 0.5191920314414764, "learning_rate": 2.8626045400238954e-05, "loss": 0.6822, "num_tokens": 156858428.0, "step": 1667 }, { "epoch": 0.2846902201740911, "grad_norm": 0.5259457940245835, "learning_rate": 2.861921829663765e-05, "loss": 0.6656, "num_tokens": 156960292.0, "step": 1668 }, { "epoch": 0.28486089776412354, "grad_norm": 0.510430535102296, "learning_rate": 2.8612391193036355e-05, "loss": 0.5978, "num_tokens": 157062927.0, "step": 1669 }, { "epoch": 0.285031575354156, "grad_norm": 0.513103908589769, "learning_rate": 2.860556408943506e-05, "loss": 0.5789, "num_tokens": 157151695.0, "step": 1670 }, { "epoch": 0.28520225294418844, "grad_norm": 0.5703710136604314, "learning_rate": 2.8598736985833763e-05, "loss": 0.603, "num_tokens": 157216206.0, "step": 1671 }, { "epoch": 0.28537293053422086, "grad_norm": 0.5179182189358502, "learning_rate": 2.8591909882232466e-05, "loss": 0.6139, "num_tokens": 157303245.0, "step": 1672 }, { "epoch": 0.2855436081242533, "grad_norm": 0.5195474858227033, "learning_rate": 2.8585082778631167e-05, "loss": 0.619, "num_tokens": 157396679.0, "step": 1673 }, { "epoch": 0.2857142857142857, "grad_norm": 0.5021110766087584, "learning_rate": 2.857825567502987e-05, "loss": 0.6013, "num_tokens": 157495665.0, "step": 1674 }, { "epoch": 0.2858849633043181, "grad_norm": 0.5322598361496493, "learning_rate": 2.8571428571428574e-05, "loss": 0.7452, "num_tokens": 157603297.0, "step": 1675 }, { "epoch": 0.2860556408943506, "grad_norm": 0.5238178421148074, "learning_rate": 2.8564601467827278e-05, "loss": 0.6383, "num_tokens": 157692785.0, "step": 1676 }, { "epoch": 0.286226318484383, "grad_norm": 0.5932445949990176, "learning_rate": 2.8557774364225982e-05, "loss": 0.6085, "num_tokens": 157761519.0, "step": 1677 }, { "epoch": 0.28639699607441543, "grad_norm": 0.4871736548089066, "learning_rate": 2.8550947260624686e-05, "loss": 0.5721, "num_tokens": 157856041.0, "step": 1678 }, { "epoch": 0.28656767366444785, "grad_norm": 0.49812014478642047, "learning_rate": 2.8544120157023386e-05, "loss": 0.7011, "num_tokens": 157962439.0, "step": 1679 }, { "epoch": 0.2867383512544803, "grad_norm": 0.49239755298640253, "learning_rate": 2.8537293053422086e-05, "loss": 0.6361, "num_tokens": 158068439.0, "step": 1680 }, { "epoch": 0.2869090288445127, "grad_norm": 0.5172606236576057, "learning_rate": 2.853046594982079e-05, "loss": 0.6616, "num_tokens": 158161668.0, "step": 1681 }, { "epoch": 0.28707970643454517, "grad_norm": 0.578086344953046, "learning_rate": 2.8523638846219494e-05, "loss": 0.5594, "num_tokens": 158224030.0, "step": 1682 }, { "epoch": 0.2872503840245776, "grad_norm": 0.5476552532679981, "learning_rate": 2.8516811742618194e-05, "loss": 0.605, "num_tokens": 158305611.0, "step": 1683 }, { "epoch": 0.28742106161461, "grad_norm": 0.5877391859469915, "learning_rate": 2.8509984639016898e-05, "loss": 0.6556, "num_tokens": 158384269.0, "step": 1684 }, { "epoch": 0.2875917392046424, "grad_norm": 0.4964676365156057, "learning_rate": 2.8503157535415602e-05, "loss": 0.6009, "num_tokens": 158498931.0, "step": 1685 }, { "epoch": 0.28776241679467485, "grad_norm": 0.6283381844791843, "learning_rate": 2.8496330431814305e-05, "loss": 0.731, "num_tokens": 158587368.0, "step": 1686 }, { "epoch": 0.28793309438470727, "grad_norm": 0.5137682436639205, "learning_rate": 2.848950332821301e-05, "loss": 0.5909, "num_tokens": 158676182.0, "step": 1687 }, { "epoch": 0.28810377197473974, "grad_norm": 0.4933016485529538, "learning_rate": 2.8482676224611713e-05, "loss": 0.6119, "num_tokens": 158787762.0, "step": 1688 }, { "epoch": 0.28827444956477216, "grad_norm": 0.507531991771271, "learning_rate": 2.8475849121010413e-05, "loss": 0.5726, "num_tokens": 158876663.0, "step": 1689 }, { "epoch": 0.2884451271548046, "grad_norm": 0.5510543538142747, "learning_rate": 2.8469022017409117e-05, "loss": 0.639, "num_tokens": 158966122.0, "step": 1690 }, { "epoch": 0.288615804744837, "grad_norm": 0.5158136775843158, "learning_rate": 2.846219491380782e-05, "loss": 0.6815, "num_tokens": 159066120.0, "step": 1691 }, { "epoch": 0.2887864823348694, "grad_norm": 0.4730053850427305, "learning_rate": 2.845536781020652e-05, "loss": 0.5958, "num_tokens": 159177435.0, "step": 1692 }, { "epoch": 0.28895715992490184, "grad_norm": 0.5321961516186255, "learning_rate": 2.844854070660522e-05, "loss": 0.6029, "num_tokens": 159269408.0, "step": 1693 }, { "epoch": 0.2891278375149343, "grad_norm": 0.4954207915356764, "learning_rate": 2.8441713603003925e-05, "loss": 0.6317, "num_tokens": 159380272.0, "step": 1694 }, { "epoch": 0.28929851510496674, "grad_norm": 0.5066299431551413, "learning_rate": 2.843488649940263e-05, "loss": 0.6033, "num_tokens": 159478240.0, "step": 1695 }, { "epoch": 0.28946919269499916, "grad_norm": 0.5071473342845899, "learning_rate": 2.8428059395801333e-05, "loss": 0.6482, "num_tokens": 159571957.0, "step": 1696 }, { "epoch": 0.2896398702850316, "grad_norm": 0.5243815368664668, "learning_rate": 2.8421232292200037e-05, "loss": 0.6234, "num_tokens": 159663756.0, "step": 1697 }, { "epoch": 0.289810547875064, "grad_norm": 0.557916779988723, "learning_rate": 2.841440518859874e-05, "loss": 0.6261, "num_tokens": 159748831.0, "step": 1698 }, { "epoch": 0.2899812254650964, "grad_norm": 0.5932982145420623, "learning_rate": 2.840757808499744e-05, "loss": 0.6271, "num_tokens": 159832680.0, "step": 1699 }, { "epoch": 0.29015190305512883, "grad_norm": 0.5310129591815193, "learning_rate": 2.8400750981396145e-05, "loss": 0.6758, "num_tokens": 159921877.0, "step": 1700 }, { "epoch": 0.2903225806451613, "grad_norm": 0.506480867611394, "learning_rate": 2.839392387779485e-05, "loss": 0.5958, "num_tokens": 160024737.0, "step": 1701 }, { "epoch": 0.29049325823519373, "grad_norm": 0.5780944752025957, "learning_rate": 2.8387096774193552e-05, "loss": 0.678, "num_tokens": 160099760.0, "step": 1702 }, { "epoch": 0.29066393582522615, "grad_norm": 0.5683723309708199, "learning_rate": 2.8380269670592256e-05, "loss": 0.7047, "num_tokens": 160195603.0, "step": 1703 }, { "epoch": 0.29083461341525857, "grad_norm": 0.4827392821323225, "learning_rate": 2.837344256699096e-05, "loss": 0.6839, "num_tokens": 160310590.0, "step": 1704 }, { "epoch": 0.291005291005291, "grad_norm": 0.48061717873652443, "learning_rate": 2.8366615463389657e-05, "loss": 0.5884, "num_tokens": 160421649.0, "step": 1705 }, { "epoch": 0.2911759685953234, "grad_norm": 0.5681248083317467, "learning_rate": 2.835978835978836e-05, "loss": 0.652, "num_tokens": 160502410.0, "step": 1706 }, { "epoch": 0.2913466461853559, "grad_norm": 0.4579952560179596, "learning_rate": 2.8352961256187064e-05, "loss": 0.6339, "num_tokens": 160612592.0, "step": 1707 }, { "epoch": 0.2915173237753883, "grad_norm": 0.5971478164209244, "learning_rate": 2.8346134152585768e-05, "loss": 0.5949, "num_tokens": 160672167.0, "step": 1708 }, { "epoch": 0.2916880013654207, "grad_norm": 0.49110699403490465, "learning_rate": 2.833930704898447e-05, "loss": 0.7011, "num_tokens": 160788663.0, "step": 1709 }, { "epoch": 0.29185867895545314, "grad_norm": 0.5124273592442077, "learning_rate": 2.8332479945383172e-05, "loss": 0.6277, "num_tokens": 160883645.0, "step": 1710 }, { "epoch": 0.29202935654548556, "grad_norm": 0.5421135968999488, "learning_rate": 2.8325652841781876e-05, "loss": 0.7712, "num_tokens": 160995946.0, "step": 1711 }, { "epoch": 0.292200034135518, "grad_norm": 0.5142972450794154, "learning_rate": 2.831882573818058e-05, "loss": 0.5976, "num_tokens": 161091724.0, "step": 1712 }, { "epoch": 0.29237071172555046, "grad_norm": 0.49630007121143865, "learning_rate": 2.8311998634579283e-05, "loss": 0.626, "num_tokens": 161202128.0, "step": 1713 }, { "epoch": 0.2925413893155829, "grad_norm": 0.4705735442102008, "learning_rate": 2.8305171530977987e-05, "loss": 0.5835, "num_tokens": 161311836.0, "step": 1714 }, { "epoch": 0.2927120669056153, "grad_norm": 0.511967959096828, "learning_rate": 2.829834442737669e-05, "loss": 0.6065, "num_tokens": 161405879.0, "step": 1715 }, { "epoch": 0.2928827444956477, "grad_norm": 0.4803112931376538, "learning_rate": 2.829151732377539e-05, "loss": 0.5966, "num_tokens": 161531599.0, "step": 1716 }, { "epoch": 0.29305342208568014, "grad_norm": 0.551101296608276, "learning_rate": 2.828469022017409e-05, "loss": 0.6409, "num_tokens": 161616218.0, "step": 1717 }, { "epoch": 0.29322409967571256, "grad_norm": 0.46614633528149774, "learning_rate": 2.8277863116572795e-05, "loss": 0.6368, "num_tokens": 161734591.0, "step": 1718 }, { "epoch": 0.29339477726574503, "grad_norm": 0.5118990426896141, "learning_rate": 2.82710360129715e-05, "loss": 0.5819, "num_tokens": 161812859.0, "step": 1719 }, { "epoch": 0.29356545485577745, "grad_norm": 0.4843980942700516, "learning_rate": 2.82642089093702e-05, "loss": 0.694, "num_tokens": 161930940.0, "step": 1720 }, { "epoch": 0.29373613244580987, "grad_norm": 0.5395507708966872, "learning_rate": 2.8257381805768903e-05, "loss": 0.5907, "num_tokens": 162018219.0, "step": 1721 }, { "epoch": 0.2939068100358423, "grad_norm": 0.5206632544226754, "learning_rate": 2.8250554702167607e-05, "loss": 0.6226, "num_tokens": 162109290.0, "step": 1722 }, { "epoch": 0.2940774876258747, "grad_norm": 0.4630002062000658, "learning_rate": 2.824372759856631e-05, "loss": 0.5932, "num_tokens": 162228620.0, "step": 1723 }, { "epoch": 0.29424816521590713, "grad_norm": 0.5268639520758155, "learning_rate": 2.8236900494965015e-05, "loss": 0.599, "num_tokens": 162311846.0, "step": 1724 }, { "epoch": 0.2944188428059396, "grad_norm": 0.5867923177223241, "learning_rate": 2.823007339136372e-05, "loss": 0.6144, "num_tokens": 162390419.0, "step": 1725 }, { "epoch": 0.294589520395972, "grad_norm": 0.5484132225371406, "learning_rate": 2.822324628776242e-05, "loss": 0.5794, "num_tokens": 162459637.0, "step": 1726 }, { "epoch": 0.29476019798600445, "grad_norm": 0.5195203589692147, "learning_rate": 2.8216419184161122e-05, "loss": 0.532, "num_tokens": 162533610.0, "step": 1727 }, { "epoch": 0.29493087557603687, "grad_norm": 0.5259597587426807, "learning_rate": 2.8209592080559826e-05, "loss": 0.6817, "num_tokens": 162638222.0, "step": 1728 }, { "epoch": 0.2951015531660693, "grad_norm": 0.5455422681391826, "learning_rate": 2.8202764976958527e-05, "loss": 0.5537, "num_tokens": 162736856.0, "step": 1729 }, { "epoch": 0.2952722307561017, "grad_norm": 0.531586259501683, "learning_rate": 2.8195937873357227e-05, "loss": 0.6022, "num_tokens": 162831312.0, "step": 1730 }, { "epoch": 0.2954429083461341, "grad_norm": 0.5425699455499043, "learning_rate": 2.818911076975593e-05, "loss": 0.5998, "num_tokens": 162919865.0, "step": 1731 }, { "epoch": 0.2956135859361666, "grad_norm": 0.4851087284761314, "learning_rate": 2.8182283666154635e-05, "loss": 0.5698, "num_tokens": 163028135.0, "step": 1732 }, { "epoch": 0.295784263526199, "grad_norm": 0.5273040486282454, "learning_rate": 2.8175456562553338e-05, "loss": 0.6194, "num_tokens": 163122336.0, "step": 1733 }, { "epoch": 0.29595494111623144, "grad_norm": 0.5436608886422389, "learning_rate": 2.8168629458952042e-05, "loss": 0.6251, "num_tokens": 163211168.0, "step": 1734 }, { "epoch": 0.29612561870626386, "grad_norm": 0.539230046357182, "learning_rate": 2.8161802355350746e-05, "loss": 0.6676, "num_tokens": 163310473.0, "step": 1735 }, { "epoch": 0.2962962962962963, "grad_norm": 0.4942881181501553, "learning_rate": 2.815497525174945e-05, "loss": 0.6275, "num_tokens": 163415592.0, "step": 1736 }, { "epoch": 0.2964669738863287, "grad_norm": 0.515105433581793, "learning_rate": 2.814814814814815e-05, "loss": 0.6048, "num_tokens": 163511572.0, "step": 1737 }, { "epoch": 0.2966376514763612, "grad_norm": 0.49012078976524454, "learning_rate": 2.8141321044546854e-05, "loss": 0.5594, "num_tokens": 163598711.0, "step": 1738 }, { "epoch": 0.2968083290663936, "grad_norm": 0.5841856298991178, "learning_rate": 2.8134493940945557e-05, "loss": 0.588, "num_tokens": 163680671.0, "step": 1739 }, { "epoch": 0.296979006656426, "grad_norm": 0.5567756181115766, "learning_rate": 2.812766683734426e-05, "loss": 0.6711, "num_tokens": 163779349.0, "step": 1740 }, { "epoch": 0.29714968424645843, "grad_norm": 0.49025007706922386, "learning_rate": 2.8120839733742965e-05, "loss": 0.5774, "num_tokens": 163887779.0, "step": 1741 }, { "epoch": 0.29732036183649085, "grad_norm": 0.4672875897886219, "learning_rate": 2.8114012630141662e-05, "loss": 0.6276, "num_tokens": 164002007.0, "step": 1742 }, { "epoch": 0.2974910394265233, "grad_norm": 0.47535410824303564, "learning_rate": 2.8107185526540366e-05, "loss": 0.6427, "num_tokens": 164130073.0, "step": 1743 }, { "epoch": 0.29766171701655575, "grad_norm": 0.5357808265527796, "learning_rate": 2.810035842293907e-05, "loss": 0.6159, "num_tokens": 164209745.0, "step": 1744 }, { "epoch": 0.29783239460658817, "grad_norm": 0.563139883766903, "learning_rate": 2.8093531319337773e-05, "loss": 0.6377, "num_tokens": 164285529.0, "step": 1745 }, { "epoch": 0.2980030721966206, "grad_norm": 0.5644131465937604, "learning_rate": 2.8086704215736477e-05, "loss": 0.619, "num_tokens": 164355854.0, "step": 1746 }, { "epoch": 0.298173749786653, "grad_norm": 0.522785443611465, "learning_rate": 2.8079877112135177e-05, "loss": 0.6134, "num_tokens": 164442107.0, "step": 1747 }, { "epoch": 0.29834442737668543, "grad_norm": 0.48210802971826366, "learning_rate": 2.807305000853388e-05, "loss": 0.5715, "num_tokens": 164544873.0, "step": 1748 }, { "epoch": 0.29851510496671785, "grad_norm": 0.4995027462352355, "learning_rate": 2.8066222904932585e-05, "loss": 0.5512, "num_tokens": 164636742.0, "step": 1749 }, { "epoch": 0.2986857825567503, "grad_norm": 0.4429018765768066, "learning_rate": 2.805939580133129e-05, "loss": 0.6401, "num_tokens": 164775013.0, "step": 1750 }, { "epoch": 0.29885646014678274, "grad_norm": 0.505423607327285, "learning_rate": 2.8052568697729992e-05, "loss": 0.5987, "num_tokens": 164868240.0, "step": 1751 }, { "epoch": 0.29902713773681516, "grad_norm": 0.49811876932705607, "learning_rate": 2.8045741594128696e-05, "loss": 0.5721, "num_tokens": 164955406.0, "step": 1752 }, { "epoch": 0.2991978153268476, "grad_norm": 0.511432564671813, "learning_rate": 2.8038914490527397e-05, "loss": 0.7195, "num_tokens": 165066770.0, "step": 1753 }, { "epoch": 0.29936849291688, "grad_norm": 0.6252294370394783, "learning_rate": 2.8032087386926097e-05, "loss": 0.6213, "num_tokens": 165128255.0, "step": 1754 }, { "epoch": 0.2995391705069124, "grad_norm": 0.5416844981134004, "learning_rate": 2.80252602833248e-05, "loss": 0.5556, "num_tokens": 165213571.0, "step": 1755 }, { "epoch": 0.2997098480969449, "grad_norm": 0.49429383458702514, "learning_rate": 2.8018433179723505e-05, "loss": 0.5651, "num_tokens": 165304153.0, "step": 1756 }, { "epoch": 0.2998805256869773, "grad_norm": 0.5334611689704962, "learning_rate": 2.8011606076122205e-05, "loss": 0.6676, "num_tokens": 165405184.0, "step": 1757 }, { "epoch": 0.30005120327700974, "grad_norm": 0.5090027833395734, "learning_rate": 2.800477897252091e-05, "loss": 0.6107, "num_tokens": 165508278.0, "step": 1758 }, { "epoch": 0.30022188086704216, "grad_norm": 0.47394020459295544, "learning_rate": 2.7997951868919612e-05, "loss": 0.6585, "num_tokens": 165639377.0, "step": 1759 }, { "epoch": 0.3003925584570746, "grad_norm": 0.6075424944060056, "learning_rate": 2.7991124765318316e-05, "loss": 0.701, "num_tokens": 165710280.0, "step": 1760 }, { "epoch": 0.300563236047107, "grad_norm": 0.49848372773680205, "learning_rate": 2.798429766171702e-05, "loss": 0.7296, "num_tokens": 165833182.0, "step": 1761 }, { "epoch": 0.3007339136371394, "grad_norm": 0.5334898970042283, "learning_rate": 2.7977470558115724e-05, "loss": 0.623, "num_tokens": 165923095.0, "step": 1762 }, { "epoch": 0.3009045912271719, "grad_norm": 0.4657300758711079, "learning_rate": 2.7970643454514424e-05, "loss": 0.6255, "num_tokens": 166032595.0, "step": 1763 }, { "epoch": 0.3010752688172043, "grad_norm": 0.5022585808641511, "learning_rate": 2.7963816350913128e-05, "loss": 0.6178, "num_tokens": 166122687.0, "step": 1764 }, { "epoch": 0.30124594640723673, "grad_norm": 0.48887779425893924, "learning_rate": 2.795698924731183e-05, "loss": 0.555, "num_tokens": 166211277.0, "step": 1765 }, { "epoch": 0.30141662399726915, "grad_norm": 0.48995370806412936, "learning_rate": 2.7950162143710535e-05, "loss": 0.582, "num_tokens": 166303431.0, "step": 1766 }, { "epoch": 0.30158730158730157, "grad_norm": 0.5055571019418379, "learning_rate": 2.7943335040109232e-05, "loss": 0.6334, "num_tokens": 166414696.0, "step": 1767 }, { "epoch": 0.301757979177334, "grad_norm": 0.508522098259203, "learning_rate": 2.7936507936507936e-05, "loss": 0.6011, "num_tokens": 166504754.0, "step": 1768 }, { "epoch": 0.30192865676736647, "grad_norm": 0.5366746232896207, "learning_rate": 2.792968083290664e-05, "loss": 0.6211, "num_tokens": 166586524.0, "step": 1769 }, { "epoch": 0.3020993343573989, "grad_norm": 0.5243274878034984, "learning_rate": 2.7922853729305344e-05, "loss": 0.6564, "num_tokens": 166690884.0, "step": 1770 }, { "epoch": 0.3022700119474313, "grad_norm": 0.5424351598875291, "learning_rate": 2.7916026625704047e-05, "loss": 0.7154, "num_tokens": 166797631.0, "step": 1771 }, { "epoch": 0.3024406895374637, "grad_norm": 0.4696572777072118, "learning_rate": 2.790919952210275e-05, "loss": 0.5983, "num_tokens": 166910516.0, "step": 1772 }, { "epoch": 0.30261136712749614, "grad_norm": 0.4964059848554195, "learning_rate": 2.7902372418501455e-05, "loss": 0.6756, "num_tokens": 167012673.0, "step": 1773 }, { "epoch": 0.30278204471752856, "grad_norm": 0.5060949723003197, "learning_rate": 2.7895545314900155e-05, "loss": 0.5822, "num_tokens": 167090869.0, "step": 1774 }, { "epoch": 0.30295272230756104, "grad_norm": 0.47902710469794624, "learning_rate": 2.788871821129886e-05, "loss": 0.6682, "num_tokens": 167205260.0, "step": 1775 }, { "epoch": 0.30312339989759346, "grad_norm": 0.5196875042659808, "learning_rate": 2.7881891107697563e-05, "loss": 0.6703, "num_tokens": 167304997.0, "step": 1776 }, { "epoch": 0.3032940774876259, "grad_norm": 0.5074622398663027, "learning_rate": 2.7875064004096267e-05, "loss": 0.6281, "num_tokens": 167404134.0, "step": 1777 }, { "epoch": 0.3034647550776583, "grad_norm": 0.5524582976347004, "learning_rate": 2.786823690049497e-05, "loss": 0.6749, "num_tokens": 167478610.0, "step": 1778 }, { "epoch": 0.3036354326676907, "grad_norm": 0.527367523083142, "learning_rate": 2.7861409796893667e-05, "loss": 0.6039, "num_tokens": 167557600.0, "step": 1779 }, { "epoch": 0.30380611025772314, "grad_norm": 0.49043165250995246, "learning_rate": 2.785458269329237e-05, "loss": 0.5935, "num_tokens": 167650759.0, "step": 1780 }, { "epoch": 0.3039767878477556, "grad_norm": 0.47423846137432085, "learning_rate": 2.7847755589691075e-05, "loss": 0.571, "num_tokens": 167757325.0, "step": 1781 }, { "epoch": 0.30414746543778803, "grad_norm": 0.49128981470440275, "learning_rate": 2.784092848608978e-05, "loss": 0.7346, "num_tokens": 167873590.0, "step": 1782 }, { "epoch": 0.30431814302782045, "grad_norm": 0.5108225845792723, "learning_rate": 2.7834101382488482e-05, "loss": 0.584, "num_tokens": 167961122.0, "step": 1783 }, { "epoch": 0.3044888206178529, "grad_norm": 0.5006221370161872, "learning_rate": 2.7827274278887183e-05, "loss": 0.5814, "num_tokens": 168042945.0, "step": 1784 }, { "epoch": 0.3046594982078853, "grad_norm": 0.49877527306263025, "learning_rate": 2.7820447175285887e-05, "loss": 0.6286, "num_tokens": 168131153.0, "step": 1785 }, { "epoch": 0.3048301757979177, "grad_norm": 0.5504116788321519, "learning_rate": 2.781362007168459e-05, "loss": 0.6304, "num_tokens": 168203131.0, "step": 1786 }, { "epoch": 0.3050008533879502, "grad_norm": 0.5086561188527078, "learning_rate": 2.7806792968083294e-05, "loss": 0.577, "num_tokens": 168290116.0, "step": 1787 }, { "epoch": 0.3051715309779826, "grad_norm": 0.46167745140054206, "learning_rate": 2.7799965864481998e-05, "loss": 0.5725, "num_tokens": 168396860.0, "step": 1788 }, { "epoch": 0.305342208568015, "grad_norm": 0.4852151896015887, "learning_rate": 2.77931387608807e-05, "loss": 0.544, "num_tokens": 168488195.0, "step": 1789 }, { "epoch": 0.30551288615804745, "grad_norm": 0.5100145069417077, "learning_rate": 2.7786311657279402e-05, "loss": 0.5671, "num_tokens": 168568888.0, "step": 1790 }, { "epoch": 0.30568356374807987, "grad_norm": 0.5437459247208325, "learning_rate": 2.7779484553678102e-05, "loss": 0.6318, "num_tokens": 168655732.0, "step": 1791 }, { "epoch": 0.3058542413381123, "grad_norm": 0.47594616249011495, "learning_rate": 2.7772657450076806e-05, "loss": 0.6756, "num_tokens": 168768986.0, "step": 1792 }, { "epoch": 0.3060249189281447, "grad_norm": 0.4838223137868776, "learning_rate": 2.776583034647551e-05, "loss": 0.5809, "num_tokens": 168867051.0, "step": 1793 }, { "epoch": 0.3061955965181772, "grad_norm": 0.43940993800862016, "learning_rate": 2.775900324287421e-05, "loss": 0.5336, "num_tokens": 168987007.0, "step": 1794 }, { "epoch": 0.3063662741082096, "grad_norm": 0.4941808717868668, "learning_rate": 2.7752176139272914e-05, "loss": 0.5371, "num_tokens": 169069278.0, "step": 1795 }, { "epoch": 0.306536951698242, "grad_norm": 0.4735302766967219, "learning_rate": 2.7745349035671618e-05, "loss": 0.5522, "num_tokens": 169170145.0, "step": 1796 }, { "epoch": 0.30670762928827444, "grad_norm": 0.5475719184359104, "learning_rate": 2.773852193207032e-05, "loss": 0.6598, "num_tokens": 169264733.0, "step": 1797 }, { "epoch": 0.30687830687830686, "grad_norm": 0.5457466617490664, "learning_rate": 2.7731694828469025e-05, "loss": 0.5895, "num_tokens": 169349954.0, "step": 1798 }, { "epoch": 0.3070489844683393, "grad_norm": 0.5180067423575152, "learning_rate": 2.772486772486773e-05, "loss": 0.5963, "num_tokens": 169448899.0, "step": 1799 }, { "epoch": 0.30721966205837176, "grad_norm": 0.5286605105348603, "learning_rate": 2.771804062126643e-05, "loss": 0.7339, "num_tokens": 169573490.0, "step": 1800 }, { "epoch": 0.3073903396484042, "grad_norm": 0.4709941108900789, "learning_rate": 2.7711213517665133e-05, "loss": 0.6428, "num_tokens": 169707545.0, "step": 1801 }, { "epoch": 0.3075610172384366, "grad_norm": 0.5077667289457547, "learning_rate": 2.7704386414063837e-05, "loss": 0.546, "num_tokens": 169801017.0, "step": 1802 }, { "epoch": 0.307731694828469, "grad_norm": 0.5481105061776905, "learning_rate": 2.769755931046254e-05, "loss": 0.6649, "num_tokens": 169883887.0, "step": 1803 }, { "epoch": 0.30790237241850144, "grad_norm": 0.5056467747842829, "learning_rate": 2.769073220686124e-05, "loss": 0.6004, "num_tokens": 169971722.0, "step": 1804 }, { "epoch": 0.30807305000853386, "grad_norm": 0.47282580956943737, "learning_rate": 2.768390510325994e-05, "loss": 0.621, "num_tokens": 170082074.0, "step": 1805 }, { "epoch": 0.30824372759856633, "grad_norm": 0.49349678664063634, "learning_rate": 2.7677077999658645e-05, "loss": 0.6435, "num_tokens": 170193314.0, "step": 1806 }, { "epoch": 0.30841440518859875, "grad_norm": 0.45846294702442064, "learning_rate": 2.767025089605735e-05, "loss": 0.5564, "num_tokens": 170293087.0, "step": 1807 }, { "epoch": 0.30858508277863117, "grad_norm": 0.5064441654400016, "learning_rate": 2.7663423792456053e-05, "loss": 0.6201, "num_tokens": 170380367.0, "step": 1808 }, { "epoch": 0.3087557603686636, "grad_norm": 0.48828113454236327, "learning_rate": 2.7656596688854757e-05, "loss": 0.6235, "num_tokens": 170478458.0, "step": 1809 }, { "epoch": 0.308926437958696, "grad_norm": 0.47972064749710336, "learning_rate": 2.764976958525346e-05, "loss": 0.6483, "num_tokens": 170585398.0, "step": 1810 }, { "epoch": 0.30909711554872843, "grad_norm": 0.5682547243955263, "learning_rate": 2.764294248165216e-05, "loss": 0.6849, "num_tokens": 170675909.0, "step": 1811 }, { "epoch": 0.3092677931387609, "grad_norm": 0.5415239075211329, "learning_rate": 2.7636115378050864e-05, "loss": 0.5571, "num_tokens": 170745708.0, "step": 1812 }, { "epoch": 0.3094384707287933, "grad_norm": 0.5254068525871738, "learning_rate": 2.7629288274449568e-05, "loss": 0.5868, "num_tokens": 170826832.0, "step": 1813 }, { "epoch": 0.30960914831882574, "grad_norm": 0.48323020000217637, "learning_rate": 2.7622461170848272e-05, "loss": 0.6276, "num_tokens": 170931437.0, "step": 1814 }, { "epoch": 0.30977982590885816, "grad_norm": 0.4853153131676094, "learning_rate": 2.7615634067246976e-05, "loss": 0.5623, "num_tokens": 171030193.0, "step": 1815 }, { "epoch": 0.3099505034988906, "grad_norm": 0.6064859646426419, "learning_rate": 2.7608806963645673e-05, "loss": 0.7596, "num_tokens": 171124878.0, "step": 1816 }, { "epoch": 0.310121181088923, "grad_norm": 0.5826341418542528, "learning_rate": 2.7601979860044376e-05, "loss": 0.552, "num_tokens": 171190867.0, "step": 1817 }, { "epoch": 0.3102918586789555, "grad_norm": 0.50782550815946, "learning_rate": 2.759515275644308e-05, "loss": 0.6346, "num_tokens": 171293218.0, "step": 1818 }, { "epoch": 0.3104625362689879, "grad_norm": 0.5398105581302006, "learning_rate": 2.7588325652841784e-05, "loss": 0.6782, "num_tokens": 171395575.0, "step": 1819 }, { "epoch": 0.3106332138590203, "grad_norm": 0.5317969320531754, "learning_rate": 2.7581498549240488e-05, "loss": 0.696, "num_tokens": 171484776.0, "step": 1820 }, { "epoch": 0.31080389144905274, "grad_norm": 0.5435586510883168, "learning_rate": 2.7574671445639188e-05, "loss": 0.5437, "num_tokens": 171595611.0, "step": 1821 }, { "epoch": 0.31097456903908516, "grad_norm": 0.5027587193175057, "learning_rate": 2.7567844342037892e-05, "loss": 0.6022, "num_tokens": 171700507.0, "step": 1822 }, { "epoch": 0.3111452466291176, "grad_norm": 0.5312511682260234, "learning_rate": 2.7561017238436596e-05, "loss": 0.6686, "num_tokens": 171789174.0, "step": 1823 }, { "epoch": 0.31131592421915005, "grad_norm": 0.516660746391616, "learning_rate": 2.75541901348353e-05, "loss": 0.6868, "num_tokens": 171884448.0, "step": 1824 }, { "epoch": 0.3114866018091825, "grad_norm": 0.5437733512185642, "learning_rate": 2.7547363031234003e-05, "loss": 0.6057, "num_tokens": 171973042.0, "step": 1825 }, { "epoch": 0.3116572793992149, "grad_norm": 0.48728277841021794, "learning_rate": 2.7540535927632707e-05, "loss": 0.5458, "num_tokens": 172073884.0, "step": 1826 }, { "epoch": 0.3118279569892473, "grad_norm": 0.4768720661962923, "learning_rate": 2.7533708824031407e-05, "loss": 0.6729, "num_tokens": 172188655.0, "step": 1827 }, { "epoch": 0.31199863457927973, "grad_norm": 0.4956906002567987, "learning_rate": 2.752688172043011e-05, "loss": 0.6768, "num_tokens": 172295485.0, "step": 1828 }, { "epoch": 0.31216931216931215, "grad_norm": 0.521469643222855, "learning_rate": 2.752005461682881e-05, "loss": 0.638, "num_tokens": 172385206.0, "step": 1829 }, { "epoch": 0.31233998975934457, "grad_norm": 0.48041061741556823, "learning_rate": 2.7513227513227515e-05, "loss": 0.6653, "num_tokens": 172502539.0, "step": 1830 }, { "epoch": 0.31251066734937705, "grad_norm": 0.45562281426101814, "learning_rate": 2.7506400409626216e-05, "loss": 0.5787, "num_tokens": 172628034.0, "step": 1831 }, { "epoch": 0.31268134493940947, "grad_norm": 0.4949885225637786, "learning_rate": 2.749957330602492e-05, "loss": 0.6161, "num_tokens": 172733349.0, "step": 1832 }, { "epoch": 0.3128520225294419, "grad_norm": 0.4626040482976906, "learning_rate": 2.7492746202423623e-05, "loss": 0.6161, "num_tokens": 172842026.0, "step": 1833 }, { "epoch": 0.3130227001194743, "grad_norm": 0.5175431064341234, "learning_rate": 2.7485919098822327e-05, "loss": 0.5952, "num_tokens": 172926839.0, "step": 1834 }, { "epoch": 0.3131933777095067, "grad_norm": 0.5695573932902815, "learning_rate": 2.747909199522103e-05, "loss": 0.6194, "num_tokens": 173002371.0, "step": 1835 }, { "epoch": 0.31336405529953915, "grad_norm": 0.5275380901542777, "learning_rate": 2.7472264891619734e-05, "loss": 0.5108, "num_tokens": 173069427.0, "step": 1836 }, { "epoch": 0.3135347328895716, "grad_norm": 0.4973606253723709, "learning_rate": 2.7465437788018435e-05, "loss": 0.5247, "num_tokens": 173153429.0, "step": 1837 }, { "epoch": 0.31370541047960404, "grad_norm": 0.4836843289548591, "learning_rate": 2.745861068441714e-05, "loss": 0.6438, "num_tokens": 173265181.0, "step": 1838 }, { "epoch": 0.31387608806963646, "grad_norm": 0.5453619188117896, "learning_rate": 2.7451783580815842e-05, "loss": 0.6606, "num_tokens": 173355839.0, "step": 1839 }, { "epoch": 0.3140467656596689, "grad_norm": 0.5178357122492119, "learning_rate": 2.7444956477214546e-05, "loss": 0.5805, "num_tokens": 173454060.0, "step": 1840 }, { "epoch": 0.3142174432497013, "grad_norm": 0.4716434720455672, "learning_rate": 2.7438129373613246e-05, "loss": 0.6406, "num_tokens": 173583000.0, "step": 1841 }, { "epoch": 0.3143881208397337, "grad_norm": 0.5019995561322425, "learning_rate": 2.7431302270011947e-05, "loss": 0.6606, "num_tokens": 173680801.0, "step": 1842 }, { "epoch": 0.3145587984297662, "grad_norm": 0.522465640323239, "learning_rate": 2.742447516641065e-05, "loss": 0.5697, "num_tokens": 173758679.0, "step": 1843 }, { "epoch": 0.3147294760197986, "grad_norm": 0.48450359517289643, "learning_rate": 2.7417648062809354e-05, "loss": 0.5556, "num_tokens": 173846173.0, "step": 1844 }, { "epoch": 0.31490015360983103, "grad_norm": 0.4903293105816486, "learning_rate": 2.7410820959208058e-05, "loss": 0.6359, "num_tokens": 173956407.0, "step": 1845 }, { "epoch": 0.31507083119986345, "grad_norm": 0.5427494133762597, "learning_rate": 2.7403993855606762e-05, "loss": 0.633, "num_tokens": 174054474.0, "step": 1846 }, { "epoch": 0.3152415087898959, "grad_norm": 0.5820584229016548, "learning_rate": 2.7397166752005466e-05, "loss": 0.5891, "num_tokens": 174164062.0, "step": 1847 }, { "epoch": 0.3154121863799283, "grad_norm": 0.5199327843146129, "learning_rate": 2.7390339648404166e-05, "loss": 0.5825, "num_tokens": 174255757.0, "step": 1848 }, { "epoch": 0.31558286396996077, "grad_norm": 0.5399310739661023, "learning_rate": 2.738351254480287e-05, "loss": 0.5958, "num_tokens": 174333638.0, "step": 1849 }, { "epoch": 0.3157535415599932, "grad_norm": 0.5189435799381489, "learning_rate": 2.7376685441201574e-05, "loss": 0.5622, "num_tokens": 174410523.0, "step": 1850 }, { "epoch": 0.3159242191500256, "grad_norm": 0.4987178819471887, "learning_rate": 2.7369858337600277e-05, "loss": 0.577, "num_tokens": 174512096.0, "step": 1851 }, { "epoch": 0.31609489674005803, "grad_norm": 0.4626250290048233, "learning_rate": 2.736303123399898e-05, "loss": 0.5802, "num_tokens": 174616780.0, "step": 1852 }, { "epoch": 0.31626557433009045, "grad_norm": 0.5193400373930481, "learning_rate": 2.7356204130397678e-05, "loss": 0.5313, "num_tokens": 174693378.0, "step": 1853 }, { "epoch": 0.31643625192012287, "grad_norm": 0.4820741022663225, "learning_rate": 2.7349377026796382e-05, "loss": 0.6021, "num_tokens": 174802364.0, "step": 1854 }, { "epoch": 0.31660692951015534, "grad_norm": 0.4947533438272115, "learning_rate": 2.7342549923195086e-05, "loss": 0.607, "num_tokens": 174906049.0, "step": 1855 }, { "epoch": 0.31677760710018776, "grad_norm": 0.5094831188743407, "learning_rate": 2.733572281959379e-05, "loss": 0.5771, "num_tokens": 175001533.0, "step": 1856 }, { "epoch": 0.3169482846902202, "grad_norm": 0.5468922572893605, "learning_rate": 2.7328895715992493e-05, "loss": 0.6257, "num_tokens": 175071471.0, "step": 1857 }, { "epoch": 0.3171189622802526, "grad_norm": 0.4736833243127142, "learning_rate": 2.7322068612391193e-05, "loss": 0.5623, "num_tokens": 175167340.0, "step": 1858 }, { "epoch": 0.317289639870285, "grad_norm": 0.4738319206684177, "learning_rate": 2.7315241508789897e-05, "loss": 0.5771, "num_tokens": 175270562.0, "step": 1859 }, { "epoch": 0.31746031746031744, "grad_norm": 0.47302938208009443, "learning_rate": 2.73084144051886e-05, "loss": 0.5542, "num_tokens": 175365029.0, "step": 1860 }, { "epoch": 0.31763099505034986, "grad_norm": 0.5233966923337117, "learning_rate": 2.7301587301587305e-05, "loss": 0.6239, "num_tokens": 175457558.0, "step": 1861 }, { "epoch": 0.31780167264038234, "grad_norm": 0.4691322810397915, "learning_rate": 2.729476019798601e-05, "loss": 0.6111, "num_tokens": 175550617.0, "step": 1862 }, { "epoch": 0.31797235023041476, "grad_norm": 0.5566789628767105, "learning_rate": 2.7287933094384712e-05, "loss": 0.6345, "num_tokens": 175630607.0, "step": 1863 }, { "epoch": 0.3181430278204472, "grad_norm": 0.46760884554418203, "learning_rate": 2.7281105990783413e-05, "loss": 0.611, "num_tokens": 175733948.0, "step": 1864 }, { "epoch": 0.3183137054104796, "grad_norm": 0.524677801942664, "learning_rate": 2.7274278887182116e-05, "loss": 0.6564, "num_tokens": 175835431.0, "step": 1865 }, { "epoch": 0.318484383000512, "grad_norm": 0.5321610463982587, "learning_rate": 2.7267451783580817e-05, "loss": 0.6587, "num_tokens": 175923529.0, "step": 1866 }, { "epoch": 0.31865506059054444, "grad_norm": 0.5081755308902504, "learning_rate": 2.726062467997952e-05, "loss": 0.5818, "num_tokens": 176009724.0, "step": 1867 }, { "epoch": 0.3188257381805769, "grad_norm": 0.48858769134510405, "learning_rate": 2.725379757637822e-05, "loss": 0.5474, "num_tokens": 176107059.0, "step": 1868 }, { "epoch": 0.31899641577060933, "grad_norm": 0.5381093309747611, "learning_rate": 2.7246970472776925e-05, "loss": 0.6329, "num_tokens": 176191333.0, "step": 1869 }, { "epoch": 0.31916709336064175, "grad_norm": 0.5392953689705774, "learning_rate": 2.724014336917563e-05, "loss": 0.6996, "num_tokens": 176291374.0, "step": 1870 }, { "epoch": 0.31933777095067417, "grad_norm": 0.5562671188218282, "learning_rate": 2.7233316265574332e-05, "loss": 0.5999, "num_tokens": 176365126.0, "step": 1871 }, { "epoch": 0.3195084485407066, "grad_norm": 0.5576451085408359, "learning_rate": 2.7226489161973036e-05, "loss": 0.5997, "num_tokens": 176438803.0, "step": 1872 }, { "epoch": 0.319679126130739, "grad_norm": 0.5700224174575984, "learning_rate": 2.721966205837174e-05, "loss": 0.7517, "num_tokens": 176524425.0, "step": 1873 }, { "epoch": 0.3198498037207715, "grad_norm": 0.4725734901847189, "learning_rate": 2.721283495477044e-05, "loss": 0.5826, "num_tokens": 176641852.0, "step": 1874 }, { "epoch": 0.3200204813108039, "grad_norm": 0.48938216803461215, "learning_rate": 2.7206007851169144e-05, "loss": 0.656, "num_tokens": 176759562.0, "step": 1875 }, { "epoch": 0.3201911589008363, "grad_norm": 0.5576330861832379, "learning_rate": 2.7199180747567848e-05, "loss": 0.5591, "num_tokens": 176827604.0, "step": 1876 }, { "epoch": 0.32036183649086875, "grad_norm": 0.48741672565527133, "learning_rate": 2.719235364396655e-05, "loss": 0.6414, "num_tokens": 176931017.0, "step": 1877 }, { "epoch": 0.32053251408090117, "grad_norm": 0.494794306828343, "learning_rate": 2.7185526540365252e-05, "loss": 0.6285, "num_tokens": 177030531.0, "step": 1878 }, { "epoch": 0.3207031916709336, "grad_norm": 0.5023381941105307, "learning_rate": 2.7178699436763952e-05, "loss": 0.5827, "num_tokens": 177133933.0, "step": 1879 }, { "epoch": 0.32087386926096606, "grad_norm": 0.5342844412480456, "learning_rate": 2.7171872333162656e-05, "loss": 0.6184, "num_tokens": 177228283.0, "step": 1880 }, { "epoch": 0.3210445468509985, "grad_norm": 0.4858691835206792, "learning_rate": 2.716504522956136e-05, "loss": 0.6008, "num_tokens": 177322386.0, "step": 1881 }, { "epoch": 0.3212152244410309, "grad_norm": 0.5045833000192924, "learning_rate": 2.7158218125960063e-05, "loss": 0.6819, "num_tokens": 177436116.0, "step": 1882 }, { "epoch": 0.3213859020310633, "grad_norm": 0.5069112210976342, "learning_rate": 2.7151391022358767e-05, "loss": 0.647, "num_tokens": 177535594.0, "step": 1883 }, { "epoch": 0.32155657962109574, "grad_norm": 0.46091934005088586, "learning_rate": 2.714456391875747e-05, "loss": 0.6061, "num_tokens": 177644471.0, "step": 1884 }, { "epoch": 0.32172725721112816, "grad_norm": 0.5192742772452592, "learning_rate": 2.713773681515617e-05, "loss": 0.6708, "num_tokens": 177741133.0, "step": 1885 }, { "epoch": 0.32189793480116063, "grad_norm": 0.5579497951834359, "learning_rate": 2.7130909711554875e-05, "loss": 0.6196, "num_tokens": 177814966.0, "step": 1886 }, { "epoch": 0.32206861239119305, "grad_norm": 0.5477769810074912, "learning_rate": 2.712408260795358e-05, "loss": 0.622, "num_tokens": 177908325.0, "step": 1887 }, { "epoch": 0.3222392899812255, "grad_norm": 0.5173005305963398, "learning_rate": 2.7117255504352283e-05, "loss": 0.6135, "num_tokens": 177992322.0, "step": 1888 }, { "epoch": 0.3224099675712579, "grad_norm": 0.45320846150921124, "learning_rate": 2.7110428400750986e-05, "loss": 0.5721, "num_tokens": 178100090.0, "step": 1889 }, { "epoch": 0.3225806451612903, "grad_norm": 0.5794935960018746, "learning_rate": 2.710360129714969e-05, "loss": 0.6199, "num_tokens": 178168170.0, "step": 1890 }, { "epoch": 0.32275132275132273, "grad_norm": 0.543870346153487, "learning_rate": 2.7096774193548387e-05, "loss": 0.6006, "num_tokens": 178242688.0, "step": 1891 }, { "epoch": 0.32292200034135515, "grad_norm": 0.5257829599894944, "learning_rate": 2.708994708994709e-05, "loss": 0.7227, "num_tokens": 178342918.0, "step": 1892 }, { "epoch": 0.32309267793138763, "grad_norm": 0.5215613176433986, "learning_rate": 2.7083119986345795e-05, "loss": 0.6973, "num_tokens": 178453073.0, "step": 1893 }, { "epoch": 0.32326335552142005, "grad_norm": 0.4684466259687379, "learning_rate": 2.70762928827445e-05, "loss": 0.6208, "num_tokens": 178580050.0, "step": 1894 }, { "epoch": 0.32343403311145247, "grad_norm": 0.4861496287039147, "learning_rate": 2.70694657791432e-05, "loss": 0.6621, "num_tokens": 178686014.0, "step": 1895 }, { "epoch": 0.3236047107014849, "grad_norm": 0.5597236775858695, "learning_rate": 2.7062638675541903e-05, "loss": 0.7195, "num_tokens": 178786632.0, "step": 1896 }, { "epoch": 0.3237753882915173, "grad_norm": 0.4629258004409705, "learning_rate": 2.7055811571940606e-05, "loss": 0.5354, "num_tokens": 178886318.0, "step": 1897 }, { "epoch": 0.3239460658815497, "grad_norm": 0.5046581629086911, "learning_rate": 2.704898446833931e-05, "loss": 0.6167, "num_tokens": 178985327.0, "step": 1898 }, { "epoch": 0.3241167434715822, "grad_norm": 0.5242531182441059, "learning_rate": 2.7042157364738014e-05, "loss": 0.5366, "num_tokens": 179061975.0, "step": 1899 }, { "epoch": 0.3242874210616146, "grad_norm": 0.5275215742279357, "learning_rate": 2.7035330261136718e-05, "loss": 0.5734, "num_tokens": 179139224.0, "step": 1900 }, { "epoch": 0.32445809865164704, "grad_norm": 0.5267199728284313, "learning_rate": 2.7028503157535418e-05, "loss": 0.5512, "num_tokens": 179215454.0, "step": 1901 }, { "epoch": 0.32462877624167946, "grad_norm": 0.5924769425670209, "learning_rate": 2.7021676053934122e-05, "loss": 0.5809, "num_tokens": 179310359.0, "step": 1902 }, { "epoch": 0.3247994538317119, "grad_norm": 0.5659247096423088, "learning_rate": 2.7014848950332822e-05, "loss": 0.6915, "num_tokens": 179406546.0, "step": 1903 }, { "epoch": 0.3249701314217443, "grad_norm": 0.47045762969952154, "learning_rate": 2.7008021846731526e-05, "loss": 0.6205, "num_tokens": 179515154.0, "step": 1904 }, { "epoch": 0.3251408090117768, "grad_norm": 0.5124701268666754, "learning_rate": 2.7001194743130226e-05, "loss": 0.5599, "num_tokens": 179593136.0, "step": 1905 }, { "epoch": 0.3253114866018092, "grad_norm": 0.50646867263811, "learning_rate": 2.699436763952893e-05, "loss": 0.6619, "num_tokens": 179682868.0, "step": 1906 }, { "epoch": 0.3254821641918416, "grad_norm": 0.49672186686685693, "learning_rate": 2.6987540535927634e-05, "loss": 0.6202, "num_tokens": 179784221.0, "step": 1907 }, { "epoch": 0.32565284178187404, "grad_norm": 0.5085628876249997, "learning_rate": 2.6980713432326338e-05, "loss": 0.6003, "num_tokens": 179874686.0, "step": 1908 }, { "epoch": 0.32582351937190646, "grad_norm": 0.5160845616846425, "learning_rate": 2.697388632872504e-05, "loss": 0.6629, "num_tokens": 179980906.0, "step": 1909 }, { "epoch": 0.3259941969619389, "grad_norm": 0.5552344749719358, "learning_rate": 2.6967059225123745e-05, "loss": 0.7109, "num_tokens": 180060860.0, "step": 1910 }, { "epoch": 0.32616487455197135, "grad_norm": 0.5244682226944963, "learning_rate": 2.696023212152245e-05, "loss": 0.6016, "num_tokens": 180141540.0, "step": 1911 }, { "epoch": 0.32633555214200377, "grad_norm": 0.5189860297345052, "learning_rate": 2.695340501792115e-05, "loss": 0.6044, "num_tokens": 180227219.0, "step": 1912 }, { "epoch": 0.3265062297320362, "grad_norm": 0.5259039160645965, "learning_rate": 2.6946577914319853e-05, "loss": 0.5874, "num_tokens": 180315633.0, "step": 1913 }, { "epoch": 0.3266769073220686, "grad_norm": 0.5195657921447284, "learning_rate": 2.6939750810718557e-05, "loss": 0.5559, "num_tokens": 180398929.0, "step": 1914 }, { "epoch": 0.32684758491210103, "grad_norm": 0.53289174753913, "learning_rate": 2.6932923707117257e-05, "loss": 0.7015, "num_tokens": 180499400.0, "step": 1915 }, { "epoch": 0.32701826250213345, "grad_norm": 0.5139681003118031, "learning_rate": 2.6926096603515957e-05, "loss": 0.6339, "num_tokens": 180594638.0, "step": 1916 }, { "epoch": 0.3271889400921659, "grad_norm": 0.5204326590827141, "learning_rate": 2.691926949991466e-05, "loss": 0.5818, "num_tokens": 180675512.0, "step": 1917 }, { "epoch": 0.32735961768219835, "grad_norm": 0.5211240059407786, "learning_rate": 2.6912442396313365e-05, "loss": 0.5462, "num_tokens": 180754118.0, "step": 1918 }, { "epoch": 0.32753029527223076, "grad_norm": 0.5182764154274138, "learning_rate": 2.690561529271207e-05, "loss": 0.6542, "num_tokens": 180849545.0, "step": 1919 }, { "epoch": 0.3277009728622632, "grad_norm": 0.4664177390275575, "learning_rate": 2.6898788189110773e-05, "loss": 0.6758, "num_tokens": 180989346.0, "step": 1920 }, { "epoch": 0.3278716504522956, "grad_norm": 0.5489103438718367, "learning_rate": 2.6891961085509476e-05, "loss": 0.63, "num_tokens": 181065958.0, "step": 1921 }, { "epoch": 0.328042328042328, "grad_norm": 0.5101175656176758, "learning_rate": 2.6885133981908177e-05, "loss": 0.6348, "num_tokens": 181160327.0, "step": 1922 }, { "epoch": 0.32821300563236044, "grad_norm": 0.5062551602033847, "learning_rate": 2.687830687830688e-05, "loss": 0.5502, "num_tokens": 181239796.0, "step": 1923 }, { "epoch": 0.3283836832223929, "grad_norm": 0.5332846453865447, "learning_rate": 2.6871479774705584e-05, "loss": 0.6408, "num_tokens": 181324739.0, "step": 1924 }, { "epoch": 0.32855436081242534, "grad_norm": 0.5315342797279429, "learning_rate": 2.6864652671104288e-05, "loss": 0.603, "num_tokens": 181415968.0, "step": 1925 }, { "epoch": 0.32872503840245776, "grad_norm": 0.4902117444361275, "learning_rate": 2.6857825567502992e-05, "loss": 0.6272, "num_tokens": 181524889.0, "step": 1926 }, { "epoch": 0.3288957159924902, "grad_norm": 0.47464486033562714, "learning_rate": 2.6850998463901695e-05, "loss": 0.6142, "num_tokens": 181628459.0, "step": 1927 }, { "epoch": 0.3290663935825226, "grad_norm": 0.5194344933837864, "learning_rate": 2.6844171360300392e-05, "loss": 0.6236, "num_tokens": 181711001.0, "step": 1928 }, { "epoch": 0.329237071172555, "grad_norm": 0.44988794519528585, "learning_rate": 2.6837344256699096e-05, "loss": 0.6125, "num_tokens": 181837907.0, "step": 1929 }, { "epoch": 0.3294077487625875, "grad_norm": 0.4903821401356589, "learning_rate": 2.68305171530978e-05, "loss": 0.5452, "num_tokens": 181935225.0, "step": 1930 }, { "epoch": 0.3295784263526199, "grad_norm": 0.4974956255249516, "learning_rate": 2.6823690049496504e-05, "loss": 0.685, "num_tokens": 182042209.0, "step": 1931 }, { "epoch": 0.32974910394265233, "grad_norm": 0.5178728612715789, "learning_rate": 2.6816862945895204e-05, "loss": 0.6737, "num_tokens": 182138142.0, "step": 1932 }, { "epoch": 0.32991978153268475, "grad_norm": 0.5535101268768201, "learning_rate": 2.6810035842293908e-05, "loss": 0.7063, "num_tokens": 182245085.0, "step": 1933 }, { "epoch": 0.3300904591227172, "grad_norm": 0.5228180010797745, "learning_rate": 2.680320873869261e-05, "loss": 0.5898, "num_tokens": 182341902.0, "step": 1934 }, { "epoch": 0.3302611367127496, "grad_norm": 0.5395768424917943, "learning_rate": 2.6796381635091315e-05, "loss": 0.6432, "num_tokens": 182433906.0, "step": 1935 }, { "epoch": 0.33043181430278207, "grad_norm": 0.532247254114516, "learning_rate": 2.678955453149002e-05, "loss": 0.5869, "num_tokens": 182505842.0, "step": 1936 }, { "epoch": 0.3306024918928145, "grad_norm": 0.508342782279506, "learning_rate": 2.6782727427888723e-05, "loss": 0.648, "num_tokens": 182609549.0, "step": 1937 }, { "epoch": 0.3307731694828469, "grad_norm": 0.49591208372725304, "learning_rate": 2.6775900324287423e-05, "loss": 0.5948, "num_tokens": 182721902.0, "step": 1938 }, { "epoch": 0.3309438470728793, "grad_norm": 0.5813788084963324, "learning_rate": 2.6769073220686127e-05, "loss": 0.7468, "num_tokens": 182803328.0, "step": 1939 }, { "epoch": 0.33111452466291175, "grad_norm": 0.5466027270035207, "learning_rate": 2.6762246117084827e-05, "loss": 0.7371, "num_tokens": 182904076.0, "step": 1940 }, { "epoch": 0.33128520225294417, "grad_norm": 0.5356861681061328, "learning_rate": 2.675541901348353e-05, "loss": 0.6216, "num_tokens": 183006865.0, "step": 1941 }, { "epoch": 0.33145587984297664, "grad_norm": 0.48244871047267024, "learning_rate": 2.6748591909882235e-05, "loss": 0.6706, "num_tokens": 183130404.0, "step": 1942 }, { "epoch": 0.33162655743300906, "grad_norm": 0.45873992818938203, "learning_rate": 2.6741764806280935e-05, "loss": 0.6017, "num_tokens": 183244754.0, "step": 1943 }, { "epoch": 0.3317972350230415, "grad_norm": 0.49143487468288766, "learning_rate": 2.673493770267964e-05, "loss": 0.5742, "num_tokens": 183336947.0, "step": 1944 }, { "epoch": 0.3319679126130739, "grad_norm": 0.5049277735375246, "learning_rate": 2.6728110599078343e-05, "loss": 0.64, "num_tokens": 183440277.0, "step": 1945 }, { "epoch": 0.3321385902031063, "grad_norm": 0.507140575427548, "learning_rate": 2.6721283495477047e-05, "loss": 0.5883, "num_tokens": 183531265.0, "step": 1946 }, { "epoch": 0.33230926779313874, "grad_norm": 0.47359848001669835, "learning_rate": 2.671445639187575e-05, "loss": 0.6307, "num_tokens": 183638488.0, "step": 1947 }, { "epoch": 0.3324799453831712, "grad_norm": 0.46068884496776163, "learning_rate": 2.6707629288274454e-05, "loss": 0.6216, "num_tokens": 183752727.0, "step": 1948 }, { "epoch": 0.33265062297320364, "grad_norm": 0.5724728146515059, "learning_rate": 2.6700802184673155e-05, "loss": 0.6718, "num_tokens": 183822739.0, "step": 1949 }, { "epoch": 0.33282130056323606, "grad_norm": 0.4857913367727272, "learning_rate": 2.669397508107186e-05, "loss": 0.5831, "num_tokens": 183915487.0, "step": 1950 }, { "epoch": 0.3329919781532685, "grad_norm": 0.5081941323526794, "learning_rate": 2.6687147977470562e-05, "loss": 0.6157, "num_tokens": 184023509.0, "step": 1951 }, { "epoch": 0.3331626557433009, "grad_norm": 0.48581796607884437, "learning_rate": 2.6680320873869262e-05, "loss": 0.5588, "num_tokens": 184128920.0, "step": 1952 }, { "epoch": 0.3333333333333333, "grad_norm": 0.5169853295813558, "learning_rate": 2.6673493770267963e-05, "loss": 0.5707, "num_tokens": 184218441.0, "step": 1953 }, { "epoch": 0.33350401092336573, "grad_norm": 0.46853249578058764, "learning_rate": 2.6666666666666667e-05, "loss": 0.5703, "num_tokens": 184328911.0, "step": 1954 }, { "epoch": 0.3336746885133982, "grad_norm": 0.5659009107860529, "learning_rate": 2.665983956306537e-05, "loss": 0.6438, "num_tokens": 184410923.0, "step": 1955 }, { "epoch": 0.33384536610343063, "grad_norm": 0.6941265517804126, "learning_rate": 2.6653012459464074e-05, "loss": 0.7971, "num_tokens": 184521203.0, "step": 1956 }, { "epoch": 0.33401604369346305, "grad_norm": 0.5032712047496162, "learning_rate": 2.6646185355862778e-05, "loss": 0.5654, "num_tokens": 184608090.0, "step": 1957 }, { "epoch": 0.33418672128349547, "grad_norm": 0.4913815613940602, "learning_rate": 2.663935825226148e-05, "loss": 0.6463, "num_tokens": 184719296.0, "step": 1958 }, { "epoch": 0.3343573988735279, "grad_norm": 0.9051111618803426, "learning_rate": 2.6632531148660182e-05, "loss": 0.7064, "num_tokens": 184819483.0, "step": 1959 }, { "epoch": 0.3345280764635603, "grad_norm": 0.6781336605397107, "learning_rate": 2.6625704045058886e-05, "loss": 0.6341, "num_tokens": 184896315.0, "step": 1960 }, { "epoch": 0.3346987540535928, "grad_norm": 0.4842960064807459, "learning_rate": 2.661887694145759e-05, "loss": 0.5971, "num_tokens": 184992573.0, "step": 1961 }, { "epoch": 0.3348694316436252, "grad_norm": 0.48721044708167066, "learning_rate": 2.6612049837856293e-05, "loss": 0.644, "num_tokens": 185093912.0, "step": 1962 }, { "epoch": 0.3350401092336576, "grad_norm": 0.6835529682178801, "learning_rate": 2.6605222734254997e-05, "loss": 0.7048, "num_tokens": 185181848.0, "step": 1963 }, { "epoch": 0.33521078682369004, "grad_norm": 0.4964705801061626, "learning_rate": 2.65983956306537e-05, "loss": 0.572, "num_tokens": 185276775.0, "step": 1964 }, { "epoch": 0.33538146441372246, "grad_norm": 0.5425145020573877, "learning_rate": 2.6591568527052398e-05, "loss": 0.5417, "num_tokens": 185351602.0, "step": 1965 }, { "epoch": 0.3355521420037549, "grad_norm": 0.46657957114185955, "learning_rate": 2.65847414234511e-05, "loss": 0.5982, "num_tokens": 185461091.0, "step": 1966 }, { "epoch": 0.33572281959378736, "grad_norm": 0.5500341453366612, "learning_rate": 2.6577914319849805e-05, "loss": 0.5635, "num_tokens": 185541465.0, "step": 1967 }, { "epoch": 0.3358934971838198, "grad_norm": 0.5598551318744879, "learning_rate": 2.657108721624851e-05, "loss": 0.6274, "num_tokens": 185623328.0, "step": 1968 }, { "epoch": 0.3360641747738522, "grad_norm": 0.49466582101723605, "learning_rate": 2.656426011264721e-05, "loss": 0.5753, "num_tokens": 185719822.0, "step": 1969 }, { "epoch": 0.3362348523638846, "grad_norm": 0.5481429406337164, "learning_rate": 2.6557433009045913e-05, "loss": 0.661, "num_tokens": 185812552.0, "step": 1970 }, { "epoch": 0.33640552995391704, "grad_norm": 0.4769052211906058, "learning_rate": 2.6550605905444617e-05, "loss": 0.608, "num_tokens": 185926554.0, "step": 1971 }, { "epoch": 0.33657620754394946, "grad_norm": 0.47723554284159864, "learning_rate": 2.654377880184332e-05, "loss": 0.6156, "num_tokens": 186053031.0, "step": 1972 }, { "epoch": 0.33674688513398193, "grad_norm": 0.504363311881063, "learning_rate": 2.6536951698242025e-05, "loss": 0.6452, "num_tokens": 186145343.0, "step": 1973 }, { "epoch": 0.33691756272401435, "grad_norm": 0.5381750117533676, "learning_rate": 2.6530124594640728e-05, "loss": 0.5821, "num_tokens": 186230771.0, "step": 1974 }, { "epoch": 0.33708824031404677, "grad_norm": 0.49059624245517675, "learning_rate": 2.652329749103943e-05, "loss": 0.5765, "num_tokens": 186327375.0, "step": 1975 }, { "epoch": 0.3372589179040792, "grad_norm": 0.5903381688615645, "learning_rate": 2.6516470387438132e-05, "loss": 0.6363, "num_tokens": 186410889.0, "step": 1976 }, { "epoch": 0.3374295954941116, "grad_norm": 0.4757470343349251, "learning_rate": 2.6509643283836833e-05, "loss": 0.5926, "num_tokens": 186512937.0, "step": 1977 }, { "epoch": 0.33760027308414403, "grad_norm": 0.5333065890697491, "learning_rate": 2.6502816180235537e-05, "loss": 0.5757, "num_tokens": 186591291.0, "step": 1978 }, { "epoch": 0.3377709506741765, "grad_norm": 0.4974748695315714, "learning_rate": 2.649598907663424e-05, "loss": 0.5865, "num_tokens": 186683029.0, "step": 1979 }, { "epoch": 0.3379416282642089, "grad_norm": 0.5397101787999651, "learning_rate": 2.648916197303294e-05, "loss": 0.6161, "num_tokens": 186770545.0, "step": 1980 }, { "epoch": 0.33811230585424135, "grad_norm": 0.5164728364028098, "learning_rate": 2.6482334869431644e-05, "loss": 0.5855, "num_tokens": 186852884.0, "step": 1981 }, { "epoch": 0.33828298344427377, "grad_norm": 0.5454482483601047, "learning_rate": 2.6475507765830348e-05, "loss": 0.6178, "num_tokens": 186929956.0, "step": 1982 }, { "epoch": 0.3384536610343062, "grad_norm": 0.4863253156492437, "learning_rate": 2.6468680662229052e-05, "loss": 0.5763, "num_tokens": 187033834.0, "step": 1983 }, { "epoch": 0.3386243386243386, "grad_norm": 0.5373455111350864, "learning_rate": 2.6461853558627756e-05, "loss": 0.559, "num_tokens": 187116471.0, "step": 1984 }, { "epoch": 0.3387950162143711, "grad_norm": 0.5073241815216564, "learning_rate": 2.645502645502646e-05, "loss": 0.5781, "num_tokens": 187208305.0, "step": 1985 }, { "epoch": 0.3389656938044035, "grad_norm": 0.595156934476474, "learning_rate": 2.644819935142516e-05, "loss": 0.6902, "num_tokens": 187304436.0, "step": 1986 }, { "epoch": 0.3391363713944359, "grad_norm": 0.6044839081059398, "learning_rate": 2.6441372247823864e-05, "loss": 0.6487, "num_tokens": 187416207.0, "step": 1987 }, { "epoch": 0.33930704898446834, "grad_norm": 0.5669580555126262, "learning_rate": 2.6434545144222567e-05, "loss": 0.6244, "num_tokens": 187488973.0, "step": 1988 }, { "epoch": 0.33947772657450076, "grad_norm": 0.4647826123746471, "learning_rate": 2.642771804062127e-05, "loss": 0.6799, "num_tokens": 187617198.0, "step": 1989 }, { "epoch": 0.3396484041645332, "grad_norm": 0.4558545948620897, "learning_rate": 2.6420890937019968e-05, "loss": 0.5887, "num_tokens": 187742403.0, "step": 1990 }, { "epoch": 0.3398190817545656, "grad_norm": 0.4971554965151551, "learning_rate": 2.6414063833418672e-05, "loss": 0.6157, "num_tokens": 187840661.0, "step": 1991 }, { "epoch": 0.3399897593445981, "grad_norm": 0.432505322727223, "learning_rate": 2.6407236729817376e-05, "loss": 0.5594, "num_tokens": 187953088.0, "step": 1992 }, { "epoch": 0.3401604369346305, "grad_norm": 0.5054505405491881, "learning_rate": 2.640040962621608e-05, "loss": 0.6566, "num_tokens": 188040878.0, "step": 1993 }, { "epoch": 0.3403311145246629, "grad_norm": 0.5786789748459928, "learning_rate": 2.6393582522614783e-05, "loss": 0.6009, "num_tokens": 188132563.0, "step": 1994 }, { "epoch": 0.34050179211469533, "grad_norm": 0.5025080580638932, "learning_rate": 2.6386755419013487e-05, "loss": 0.5753, "num_tokens": 188219974.0, "step": 1995 }, { "epoch": 0.34067246970472775, "grad_norm": 0.5277336572109107, "learning_rate": 2.6379928315412187e-05, "loss": 0.6328, "num_tokens": 188317076.0, "step": 1996 }, { "epoch": 0.3408431472947602, "grad_norm": 0.5035552898121727, "learning_rate": 2.637310121181089e-05, "loss": 0.5713, "num_tokens": 188399604.0, "step": 1997 }, { "epoch": 0.34101382488479265, "grad_norm": 0.4973047450474556, "learning_rate": 2.6366274108209595e-05, "loss": 0.6861, "num_tokens": 188505926.0, "step": 1998 }, { "epoch": 0.34118450247482507, "grad_norm": 0.5747142698065452, "learning_rate": 2.63594470046083e-05, "loss": 0.6086, "num_tokens": 188569751.0, "step": 1999 }, { "epoch": 0.3413551800648575, "grad_norm": 0.48729321236758205, "learning_rate": 2.6352619901007002e-05, "loss": 0.5877, "num_tokens": 188676771.0, "step": 2000 }, { "epoch": 0.3415258576548899, "grad_norm": 0.5332638658246801, "learning_rate": 2.6345792797405706e-05, "loss": 0.5766, "num_tokens": 188751606.0, "step": 2001 }, { "epoch": 0.34169653524492233, "grad_norm": 0.5116934329538998, "learning_rate": 2.6338965693804403e-05, "loss": 0.5408, "num_tokens": 188827257.0, "step": 2002 }, { "epoch": 0.34186721283495475, "grad_norm": 0.4355897628340976, "learning_rate": 2.6332138590203107e-05, "loss": 0.5831, "num_tokens": 188952119.0, "step": 2003 }, { "epoch": 0.3420378904249872, "grad_norm": 0.5087832964828028, "learning_rate": 2.632531148660181e-05, "loss": 0.6656, "num_tokens": 189059696.0, "step": 2004 }, { "epoch": 0.34220856801501964, "grad_norm": 0.4335782651188882, "learning_rate": 2.6318484383000514e-05, "loss": 0.5575, "num_tokens": 189187113.0, "step": 2005 }, { "epoch": 0.34237924560505206, "grad_norm": 0.4963739778559609, "learning_rate": 2.6311657279399215e-05, "loss": 0.6098, "num_tokens": 189278978.0, "step": 2006 }, { "epoch": 0.3425499231950845, "grad_norm": 0.5295843520995083, "learning_rate": 2.630483017579792e-05, "loss": 0.6712, "num_tokens": 189375270.0, "step": 2007 }, { "epoch": 0.3427206007851169, "grad_norm": 0.5173220114287612, "learning_rate": 2.6298003072196622e-05, "loss": 0.5692, "num_tokens": 189461403.0, "step": 2008 }, { "epoch": 0.3428912783751493, "grad_norm": 0.4890394967817501, "learning_rate": 2.6291175968595326e-05, "loss": 0.6561, "num_tokens": 189577391.0, "step": 2009 }, { "epoch": 0.3430619559651818, "grad_norm": 0.5190595554278892, "learning_rate": 2.628434886499403e-05, "loss": 0.533, "num_tokens": 189658723.0, "step": 2010 }, { "epoch": 0.3432326335552142, "grad_norm": 0.4888314885031783, "learning_rate": 2.6277521761392734e-05, "loss": 0.616, "num_tokens": 189773328.0, "step": 2011 }, { "epoch": 0.34340331114524664, "grad_norm": 0.5382879969037266, "learning_rate": 2.6270694657791434e-05, "loss": 0.5813, "num_tokens": 189845493.0, "step": 2012 }, { "epoch": 0.34357398873527906, "grad_norm": 0.5351753726932221, "learning_rate": 2.6263867554190138e-05, "loss": 0.6187, "num_tokens": 189926806.0, "step": 2013 }, { "epoch": 0.3437446663253115, "grad_norm": 0.48363917806378565, "learning_rate": 2.6257040450588838e-05, "loss": 0.5656, "num_tokens": 190017708.0, "step": 2014 }, { "epoch": 0.3439153439153439, "grad_norm": 0.49219373843971426, "learning_rate": 2.6250213346987542e-05, "loss": 0.6524, "num_tokens": 190115902.0, "step": 2015 }, { "epoch": 0.34408602150537637, "grad_norm": 0.5677379293902859, "learning_rate": 2.6243386243386246e-05, "loss": 0.6583, "num_tokens": 190188603.0, "step": 2016 }, { "epoch": 0.3442566990954088, "grad_norm": 0.5184138977011736, "learning_rate": 2.6236559139784946e-05, "loss": 0.6402, "num_tokens": 190271784.0, "step": 2017 }, { "epoch": 0.3444273766854412, "grad_norm": 0.4679885175028407, "learning_rate": 2.622973203618365e-05, "loss": 0.5913, "num_tokens": 190382700.0, "step": 2018 }, { "epoch": 0.34459805427547363, "grad_norm": 0.4974155333572019, "learning_rate": 2.6222904932582354e-05, "loss": 0.6058, "num_tokens": 190481831.0, "step": 2019 }, { "epoch": 0.34476873186550605, "grad_norm": 0.6032273792025741, "learning_rate": 2.6216077828981057e-05, "loss": 0.6234, "num_tokens": 190557662.0, "step": 2020 }, { "epoch": 0.34493940945553847, "grad_norm": 0.5034114171328479, "learning_rate": 2.620925072537976e-05, "loss": 0.6559, "num_tokens": 190660957.0, "step": 2021 }, { "epoch": 0.3451100870455709, "grad_norm": 0.5267131826893524, "learning_rate": 2.6202423621778465e-05, "loss": 0.5699, "num_tokens": 190744050.0, "step": 2022 }, { "epoch": 0.34528076463560337, "grad_norm": 0.508518025631889, "learning_rate": 2.6195596518177165e-05, "loss": 0.6131, "num_tokens": 190844221.0, "step": 2023 }, { "epoch": 0.3454514422256358, "grad_norm": 0.5182029929612848, "learning_rate": 2.618876941457587e-05, "loss": 0.6297, "num_tokens": 190944847.0, "step": 2024 }, { "epoch": 0.3456221198156682, "grad_norm": 0.5044400864456456, "learning_rate": 2.6181942310974573e-05, "loss": 0.6661, "num_tokens": 191040544.0, "step": 2025 }, { "epoch": 0.3457927974057006, "grad_norm": 0.4756422384982896, "learning_rate": 2.6175115207373277e-05, "loss": 0.5745, "num_tokens": 191136159.0, "step": 2026 }, { "epoch": 0.34596347499573304, "grad_norm": 0.5204590320327938, "learning_rate": 2.6168288103771974e-05, "loss": 0.6364, "num_tokens": 191223743.0, "step": 2027 }, { "epoch": 0.34613415258576546, "grad_norm": 0.48702673236325966, "learning_rate": 2.6161461000170677e-05, "loss": 0.614, "num_tokens": 191323076.0, "step": 2028 }, { "epoch": 0.34630483017579794, "grad_norm": 0.5248346225118843, "learning_rate": 2.615463389656938e-05, "loss": 0.5959, "num_tokens": 191402459.0, "step": 2029 }, { "epoch": 0.34647550776583036, "grad_norm": 0.5164412837996571, "learning_rate": 2.6147806792968085e-05, "loss": 0.6432, "num_tokens": 191488264.0, "step": 2030 }, { "epoch": 0.3466461853558628, "grad_norm": 0.4472357824730649, "learning_rate": 2.614097968936679e-05, "loss": 0.5858, "num_tokens": 191605224.0, "step": 2031 }, { "epoch": 0.3468168629458952, "grad_norm": 0.5096566965769875, "learning_rate": 2.6134152585765492e-05, "loss": 0.5795, "num_tokens": 191695844.0, "step": 2032 }, { "epoch": 0.3469875405359276, "grad_norm": 0.5700655464383156, "learning_rate": 2.6127325482164193e-05, "loss": 0.603, "num_tokens": 191776523.0, "step": 2033 }, { "epoch": 0.34715821812596004, "grad_norm": 0.5171411283024171, "learning_rate": 2.6120498378562896e-05, "loss": 0.7249, "num_tokens": 191879820.0, "step": 2034 }, { "epoch": 0.3473288957159925, "grad_norm": 0.5446799236720858, "learning_rate": 2.61136712749616e-05, "loss": 0.6684, "num_tokens": 191975473.0, "step": 2035 }, { "epoch": 0.34749957330602493, "grad_norm": 0.47105729277958236, "learning_rate": 2.6106844171360304e-05, "loss": 0.6223, "num_tokens": 192093475.0, "step": 2036 }, { "epoch": 0.34767025089605735, "grad_norm": 0.5202582850195848, "learning_rate": 2.6100017067759008e-05, "loss": 0.571, "num_tokens": 192173854.0, "step": 2037 }, { "epoch": 0.3478409284860898, "grad_norm": 0.44187254657236363, "learning_rate": 2.609318996415771e-05, "loss": 0.5984, "num_tokens": 192287150.0, "step": 2038 }, { "epoch": 0.3480116060761222, "grad_norm": 0.4513066216354664, "learning_rate": 2.608636286055641e-05, "loss": 0.5683, "num_tokens": 192402683.0, "step": 2039 }, { "epoch": 0.3481822836661546, "grad_norm": 0.5010398892757366, "learning_rate": 2.6079535756955112e-05, "loss": 0.565, "num_tokens": 192483800.0, "step": 2040 }, { "epoch": 0.3483529612561871, "grad_norm": 0.4995997282725358, "learning_rate": 2.6072708653353816e-05, "loss": 0.5754, "num_tokens": 192583662.0, "step": 2041 }, { "epoch": 0.3485236388462195, "grad_norm": 0.4251171402473513, "learning_rate": 2.606588154975252e-05, "loss": 0.5711, "num_tokens": 192711252.0, "step": 2042 }, { "epoch": 0.3486943164362519, "grad_norm": 0.5334860698264265, "learning_rate": 2.605905444615122e-05, "loss": 0.6172, "num_tokens": 192797507.0, "step": 2043 }, { "epoch": 0.34886499402628435, "grad_norm": 0.49395679500962253, "learning_rate": 2.6052227342549924e-05, "loss": 0.619, "num_tokens": 192900312.0, "step": 2044 }, { "epoch": 0.34903567161631677, "grad_norm": 0.5324051302287356, "learning_rate": 2.6045400238948628e-05, "loss": 0.5707, "num_tokens": 192978251.0, "step": 2045 }, { "epoch": 0.3492063492063492, "grad_norm": 0.5279983314437814, "learning_rate": 2.603857313534733e-05, "loss": 0.623, "num_tokens": 193073498.0, "step": 2046 }, { "epoch": 0.34937702679638166, "grad_norm": 0.5386378388811883, "learning_rate": 2.6031746031746035e-05, "loss": 0.6603, "num_tokens": 193151284.0, "step": 2047 }, { "epoch": 0.3495477043864141, "grad_norm": 0.5094145518042561, "learning_rate": 2.602491892814474e-05, "loss": 0.554, "num_tokens": 193237850.0, "step": 2048 }, { "epoch": 0.3497183819764465, "grad_norm": 0.4968825027641457, "learning_rate": 2.6018091824543443e-05, "loss": 0.5731, "num_tokens": 193328809.0, "step": 2049 }, { "epoch": 0.3498890595664789, "grad_norm": 0.49366842702430985, "learning_rate": 2.6011264720942143e-05, "loss": 0.6308, "num_tokens": 193432351.0, "step": 2050 }, { "epoch": 0.35005973715651134, "grad_norm": 0.47598785086562806, "learning_rate": 2.6004437617340847e-05, "loss": 0.5812, "num_tokens": 193528122.0, "step": 2051 }, { "epoch": 0.35023041474654376, "grad_norm": 0.4673227993043067, "learning_rate": 2.5997610513739547e-05, "loss": 0.5027, "num_tokens": 193616135.0, "step": 2052 }, { "epoch": 0.3504010923365762, "grad_norm": 0.4602869017848875, "learning_rate": 2.599078341013825e-05, "loss": 0.5329, "num_tokens": 193710552.0, "step": 2053 }, { "epoch": 0.35057176992660866, "grad_norm": 0.49594503488076164, "learning_rate": 2.598395630653695e-05, "loss": 0.5571, "num_tokens": 193802749.0, "step": 2054 }, { "epoch": 0.3507424475166411, "grad_norm": 0.49780797990806175, "learning_rate": 2.5977129202935655e-05, "loss": 0.6484, "num_tokens": 193895930.0, "step": 2055 }, { "epoch": 0.3509131251066735, "grad_norm": 0.5382288839671874, "learning_rate": 2.597030209933436e-05, "loss": 0.6764, "num_tokens": 193991948.0, "step": 2056 }, { "epoch": 0.3510838026967059, "grad_norm": 0.5776372989010286, "learning_rate": 2.5963474995733063e-05, "loss": 0.612, "num_tokens": 194072739.0, "step": 2057 }, { "epoch": 0.35125448028673834, "grad_norm": 0.4821459507523979, "learning_rate": 2.5956647892131766e-05, "loss": 0.5446, "num_tokens": 194169678.0, "step": 2058 }, { "epoch": 0.35142515787677076, "grad_norm": 0.48391099312412156, "learning_rate": 2.594982078853047e-05, "loss": 0.5613, "num_tokens": 194259510.0, "step": 2059 }, { "epoch": 0.35159583546680323, "grad_norm": 0.5275001849852385, "learning_rate": 2.594299368492917e-05, "loss": 0.6312, "num_tokens": 194345431.0, "step": 2060 }, { "epoch": 0.35176651305683565, "grad_norm": 0.5272837743169848, "learning_rate": 2.5936166581327874e-05, "loss": 0.6422, "num_tokens": 194439896.0, "step": 2061 }, { "epoch": 0.35193719064686807, "grad_norm": 0.5458418385615955, "learning_rate": 2.5929339477726578e-05, "loss": 0.6655, "num_tokens": 194521361.0, "step": 2062 }, { "epoch": 0.3521078682369005, "grad_norm": 0.46306305722170177, "learning_rate": 2.5922512374125282e-05, "loss": 0.6302, "num_tokens": 194649642.0, "step": 2063 }, { "epoch": 0.3522785458269329, "grad_norm": 0.5579964095063006, "learning_rate": 2.591568527052398e-05, "loss": 0.6907, "num_tokens": 194747210.0, "step": 2064 }, { "epoch": 0.35244922341696533, "grad_norm": 0.5248085732463491, "learning_rate": 2.5908858166922683e-05, "loss": 0.6308, "num_tokens": 194843903.0, "step": 2065 }, { "epoch": 0.3526199010069978, "grad_norm": 0.6171067580891004, "learning_rate": 2.5902031063321386e-05, "loss": 0.619, "num_tokens": 194929997.0, "step": 2066 }, { "epoch": 0.3527905785970302, "grad_norm": 0.4945771982424894, "learning_rate": 2.589520395972009e-05, "loss": 0.6516, "num_tokens": 195043040.0, "step": 2067 }, { "epoch": 0.35296125618706264, "grad_norm": 0.5203649599189657, "learning_rate": 2.5888376856118794e-05, "loss": 0.5886, "num_tokens": 195130144.0, "step": 2068 }, { "epoch": 0.35313193377709506, "grad_norm": 0.5045223590169916, "learning_rate": 2.5881549752517498e-05, "loss": 0.5192, "num_tokens": 195202205.0, "step": 2069 }, { "epoch": 0.3533026113671275, "grad_norm": 0.4664162961346976, "learning_rate": 2.5874722648916198e-05, "loss": 0.6197, "num_tokens": 195310219.0, "step": 2070 }, { "epoch": 0.3534732889571599, "grad_norm": 0.5243747444099038, "learning_rate": 2.5867895545314902e-05, "loss": 0.6374, "num_tokens": 195385019.0, "step": 2071 }, { "epoch": 0.3536439665471924, "grad_norm": 0.4513033937985607, "learning_rate": 2.5861068441713606e-05, "loss": 0.5908, "num_tokens": 195497655.0, "step": 2072 }, { "epoch": 0.3538146441372248, "grad_norm": 0.4920970970460804, "learning_rate": 2.585424133811231e-05, "loss": 0.5416, "num_tokens": 195581927.0, "step": 2073 }, { "epoch": 0.3539853217272572, "grad_norm": 0.5124056770023176, "learning_rate": 2.5847414234511013e-05, "loss": 0.5807, "num_tokens": 195672223.0, "step": 2074 }, { "epoch": 0.35415599931728964, "grad_norm": 0.49130454237484433, "learning_rate": 2.5840587130909717e-05, "loss": 0.5455, "num_tokens": 195758725.0, "step": 2075 }, { "epoch": 0.35432667690732206, "grad_norm": 0.5059496397177149, "learning_rate": 2.5833760027308414e-05, "loss": 0.53, "num_tokens": 195854492.0, "step": 2076 }, { "epoch": 0.3544973544973545, "grad_norm": 0.47653442343138347, "learning_rate": 2.5826932923707118e-05, "loss": 0.5724, "num_tokens": 195956226.0, "step": 2077 }, { "epoch": 0.35466803208738695, "grad_norm": 0.4873633398929542, "learning_rate": 2.582010582010582e-05, "loss": 0.6137, "num_tokens": 196059271.0, "step": 2078 }, { "epoch": 0.3548387096774194, "grad_norm": 0.5384631816509435, "learning_rate": 2.5813278716504525e-05, "loss": 0.6209, "num_tokens": 196142986.0, "step": 2079 }, { "epoch": 0.3550093872674518, "grad_norm": 0.4355623256025275, "learning_rate": 2.5806451612903226e-05, "loss": 0.6258, "num_tokens": 196270508.0, "step": 2080 }, { "epoch": 0.3551800648574842, "grad_norm": 0.4939639682709106, "learning_rate": 2.579962450930193e-05, "loss": 0.6453, "num_tokens": 196376758.0, "step": 2081 }, { "epoch": 0.35535074244751663, "grad_norm": 0.4778362025783641, "learning_rate": 2.5792797405700633e-05, "loss": 0.6417, "num_tokens": 196486037.0, "step": 2082 }, { "epoch": 0.35552142003754905, "grad_norm": 0.5770920926268133, "learning_rate": 2.5785970302099337e-05, "loss": 0.5922, "num_tokens": 196546820.0, "step": 2083 }, { "epoch": 0.35569209762758147, "grad_norm": 0.5248187977229507, "learning_rate": 2.577914319849804e-05, "loss": 0.6494, "num_tokens": 196636635.0, "step": 2084 }, { "epoch": 0.35586277521761395, "grad_norm": 0.4459819609627772, "learning_rate": 2.5772316094896744e-05, "loss": 0.586, "num_tokens": 196747571.0, "step": 2085 }, { "epoch": 0.35603345280764637, "grad_norm": 0.43820885082076577, "learning_rate": 2.5765488991295448e-05, "loss": 0.5882, "num_tokens": 196864596.0, "step": 2086 }, { "epoch": 0.3562041303976788, "grad_norm": 0.4713623818923499, "learning_rate": 2.575866188769415e-05, "loss": 0.5515, "num_tokens": 196954773.0, "step": 2087 }, { "epoch": 0.3563748079877112, "grad_norm": 0.538690610357371, "learning_rate": 2.5751834784092852e-05, "loss": 0.5935, "num_tokens": 197036484.0, "step": 2088 }, { "epoch": 0.3565454855777436, "grad_norm": 0.550332713740218, "learning_rate": 2.5745007680491553e-05, "loss": 0.6264, "num_tokens": 197115132.0, "step": 2089 }, { "epoch": 0.35671616316777605, "grad_norm": 0.5842399987064149, "learning_rate": 2.5738180576890256e-05, "loss": 0.683, "num_tokens": 197190318.0, "step": 2090 }, { "epoch": 0.3568868407578085, "grad_norm": 0.5471545067928836, "learning_rate": 2.5731353473288957e-05, "loss": 0.6173, "num_tokens": 197262144.0, "step": 2091 }, { "epoch": 0.35705751834784094, "grad_norm": 0.4887568777611135, "learning_rate": 2.572452636968766e-05, "loss": 0.6212, "num_tokens": 197375976.0, "step": 2092 }, { "epoch": 0.35722819593787336, "grad_norm": 0.47721648484484985, "learning_rate": 2.5717699266086364e-05, "loss": 0.5817, "num_tokens": 197473953.0, "step": 2093 }, { "epoch": 0.3573988735279058, "grad_norm": 0.5143757023725052, "learning_rate": 2.5710872162485068e-05, "loss": 0.7116, "num_tokens": 197575240.0, "step": 2094 }, { "epoch": 0.3575695511179382, "grad_norm": 0.5810588212322416, "learning_rate": 2.5704045058883772e-05, "loss": 0.7454, "num_tokens": 197648166.0, "step": 2095 }, { "epoch": 0.3577402287079706, "grad_norm": 0.6548140475894928, "learning_rate": 2.5697217955282476e-05, "loss": 0.7489, "num_tokens": 197764641.0, "step": 2096 }, { "epoch": 0.3579109062980031, "grad_norm": 0.5340769171192323, "learning_rate": 2.5690390851681176e-05, "loss": 0.5844, "num_tokens": 197845332.0, "step": 2097 }, { "epoch": 0.3580815838880355, "grad_norm": 0.5211830296062894, "learning_rate": 2.568356374807988e-05, "loss": 0.7011, "num_tokens": 197952352.0, "step": 2098 }, { "epoch": 0.35825226147806793, "grad_norm": 0.520787771963465, "learning_rate": 2.5676736644478583e-05, "loss": 0.6507, "num_tokens": 198045180.0, "step": 2099 }, { "epoch": 0.35842293906810035, "grad_norm": 0.535441568885237, "learning_rate": 2.5669909540877287e-05, "loss": 0.6275, "num_tokens": 198123133.0, "step": 2100 }, { "epoch": 0.3585936166581328, "grad_norm": 0.5046026939595865, "learning_rate": 2.5663082437275984e-05, "loss": 0.5878, "num_tokens": 198203964.0, "step": 2101 }, { "epoch": 0.3587642942481652, "grad_norm": 0.5217410625455451, "learning_rate": 2.5656255333674688e-05, "loss": 0.605, "num_tokens": 198286084.0, "step": 2102 }, { "epoch": 0.35893497183819767, "grad_norm": 0.5615094820129063, "learning_rate": 2.5649428230073392e-05, "loss": 0.5722, "num_tokens": 198352796.0, "step": 2103 }, { "epoch": 0.3591056494282301, "grad_norm": 0.4813820650903874, "learning_rate": 2.5642601126472095e-05, "loss": 0.5646, "num_tokens": 198453224.0, "step": 2104 }, { "epoch": 0.3592763270182625, "grad_norm": 0.5156569036408863, "learning_rate": 2.56357740228708e-05, "loss": 0.7282, "num_tokens": 198563193.0, "step": 2105 }, { "epoch": 0.35944700460829493, "grad_norm": 0.4967118068191588, "learning_rate": 2.5628946919269503e-05, "loss": 0.596, "num_tokens": 198658527.0, "step": 2106 }, { "epoch": 0.35961768219832735, "grad_norm": 0.5277302671010591, "learning_rate": 2.5622119815668203e-05, "loss": 0.6656, "num_tokens": 198751358.0, "step": 2107 }, { "epoch": 0.35978835978835977, "grad_norm": 0.5047592306800944, "learning_rate": 2.5615292712066907e-05, "loss": 0.5223, "num_tokens": 198834077.0, "step": 2108 }, { "epoch": 0.35995903737839224, "grad_norm": 0.5703568222144827, "learning_rate": 2.560846560846561e-05, "loss": 0.6013, "num_tokens": 198927003.0, "step": 2109 }, { "epoch": 0.36012971496842466, "grad_norm": 0.5674804410611268, "learning_rate": 2.5601638504864315e-05, "loss": 0.6325, "num_tokens": 199006968.0, "step": 2110 }, { "epoch": 0.3603003925584571, "grad_norm": 0.5308148368292055, "learning_rate": 2.559481140126302e-05, "loss": 0.683, "num_tokens": 199097952.0, "step": 2111 }, { "epoch": 0.3604710701484895, "grad_norm": 0.49863884709423634, "learning_rate": 2.5587984297661722e-05, "loss": 0.6413, "num_tokens": 199196260.0, "step": 2112 }, { "epoch": 0.3606417477385219, "grad_norm": 0.509344150857173, "learning_rate": 2.558115719406042e-05, "loss": 0.6807, "num_tokens": 199298819.0, "step": 2113 }, { "epoch": 0.36081242532855434, "grad_norm": 0.5417904852688515, "learning_rate": 2.5574330090459123e-05, "loss": 0.6023, "num_tokens": 199376311.0, "step": 2114 }, { "epoch": 0.36098310291858676, "grad_norm": 0.4926011859782852, "learning_rate": 2.5567502986857827e-05, "loss": 0.624, "num_tokens": 199474057.0, "step": 2115 }, { "epoch": 0.36115378050861924, "grad_norm": 0.4854151162057932, "learning_rate": 2.556067588325653e-05, "loss": 0.5768, "num_tokens": 199571715.0, "step": 2116 }, { "epoch": 0.36132445809865166, "grad_norm": 0.5637887058069204, "learning_rate": 2.5553848779655234e-05, "loss": 0.5641, "num_tokens": 199633074.0, "step": 2117 }, { "epoch": 0.3614951356886841, "grad_norm": 0.5247202179256627, "learning_rate": 2.5547021676053935e-05, "loss": 0.5356, "num_tokens": 199711531.0, "step": 2118 }, { "epoch": 0.3616658132787165, "grad_norm": 0.5267530410525614, "learning_rate": 2.554019457245264e-05, "loss": 0.5653, "num_tokens": 199794600.0, "step": 2119 }, { "epoch": 0.3618364908687489, "grad_norm": 0.5133251887939788, "learning_rate": 2.5533367468851342e-05, "loss": 0.6371, "num_tokens": 199890508.0, "step": 2120 }, { "epoch": 0.36200716845878134, "grad_norm": 0.5577368866644095, "learning_rate": 2.5526540365250046e-05, "loss": 0.5782, "num_tokens": 199963544.0, "step": 2121 }, { "epoch": 0.3621778460488138, "grad_norm": 0.498379243202252, "learning_rate": 2.551971326164875e-05, "loss": 0.6345, "num_tokens": 200061825.0, "step": 2122 }, { "epoch": 0.36234852363884623, "grad_norm": 0.5440032855011572, "learning_rate": 2.5512886158047453e-05, "loss": 0.5919, "num_tokens": 200128521.0, "step": 2123 }, { "epoch": 0.36251920122887865, "grad_norm": 0.4804495745074138, "learning_rate": 2.5506059054446154e-05, "loss": 0.5532, "num_tokens": 200211553.0, "step": 2124 }, { "epoch": 0.36268987881891107, "grad_norm": 0.5639072892477638, "learning_rate": 2.5499231950844858e-05, "loss": 0.6079, "num_tokens": 200281097.0, "step": 2125 }, { "epoch": 0.3628605564089435, "grad_norm": 0.5562027100584316, "learning_rate": 2.5492404847243558e-05, "loss": 0.5927, "num_tokens": 200348442.0, "step": 2126 }, { "epoch": 0.3630312339989759, "grad_norm": 0.5621631889625764, "learning_rate": 2.5485577743642262e-05, "loss": 0.6259, "num_tokens": 200425632.0, "step": 2127 }, { "epoch": 0.3632019115890084, "grad_norm": 0.47195319488016396, "learning_rate": 2.5478750640040962e-05, "loss": 0.5939, "num_tokens": 200549813.0, "step": 2128 }, { "epoch": 0.3633725891790408, "grad_norm": 0.4626633014973855, "learning_rate": 2.5471923536439666e-05, "loss": 0.586, "num_tokens": 200658496.0, "step": 2129 }, { "epoch": 0.3635432667690732, "grad_norm": 0.5028402082258967, "learning_rate": 2.546509643283837e-05, "loss": 0.6047, "num_tokens": 200748832.0, "step": 2130 }, { "epoch": 0.36371394435910565, "grad_norm": 0.51211947452596, "learning_rate": 2.5458269329237073e-05, "loss": 0.5522, "num_tokens": 200845344.0, "step": 2131 }, { "epoch": 0.36388462194913807, "grad_norm": 0.6261374584016216, "learning_rate": 2.5451442225635777e-05, "loss": 0.6914, "num_tokens": 200917644.0, "step": 2132 }, { "epoch": 0.3640552995391705, "grad_norm": 0.5144567432218283, "learning_rate": 2.544461512203448e-05, "loss": 0.6165, "num_tokens": 201002976.0, "step": 2133 }, { "epoch": 0.36422597712920296, "grad_norm": 0.5072144500371885, "learning_rate": 2.543778801843318e-05, "loss": 0.6516, "num_tokens": 201116059.0, "step": 2134 }, { "epoch": 0.3643966547192354, "grad_norm": 0.4755749250491814, "learning_rate": 2.5430960914831885e-05, "loss": 0.679, "num_tokens": 201222233.0, "step": 2135 }, { "epoch": 0.3645673323092678, "grad_norm": 0.48442210178620376, "learning_rate": 2.542413381123059e-05, "loss": 0.5746, "num_tokens": 201311995.0, "step": 2136 }, { "epoch": 0.3647380098993002, "grad_norm": 0.4473412228864404, "learning_rate": 2.5417306707629293e-05, "loss": 0.603, "num_tokens": 201432133.0, "step": 2137 }, { "epoch": 0.36490868748933264, "grad_norm": 0.4349525985678347, "learning_rate": 2.541047960402799e-05, "loss": 0.5181, "num_tokens": 201545524.0, "step": 2138 }, { "epoch": 0.36507936507936506, "grad_norm": 0.5640896031563569, "learning_rate": 2.5403652500426693e-05, "loss": 0.6612, "num_tokens": 201621075.0, "step": 2139 }, { "epoch": 0.36525004266939753, "grad_norm": 0.4901290522958736, "learning_rate": 2.5396825396825397e-05, "loss": 0.6862, "num_tokens": 201721817.0, "step": 2140 }, { "epoch": 0.36542072025942995, "grad_norm": 0.4555425457008379, "learning_rate": 2.53899982932241e-05, "loss": 0.5476, "num_tokens": 201823744.0, "step": 2141 }, { "epoch": 0.3655913978494624, "grad_norm": 0.5838687959267246, "learning_rate": 2.5383171189622805e-05, "loss": 0.7256, "num_tokens": 201939243.0, "step": 2142 }, { "epoch": 0.3657620754394948, "grad_norm": 0.5520156105447594, "learning_rate": 2.537634408602151e-05, "loss": 0.5483, "num_tokens": 202024618.0, "step": 2143 }, { "epoch": 0.3659327530295272, "grad_norm": 0.5241538928902035, "learning_rate": 2.536951698242021e-05, "loss": 0.6433, "num_tokens": 202120080.0, "step": 2144 }, { "epoch": 0.36610343061955963, "grad_norm": 0.4900672915906874, "learning_rate": 2.5362689878818913e-05, "loss": 0.636, "num_tokens": 202215073.0, "step": 2145 }, { "epoch": 0.36627410820959205, "grad_norm": 0.49630036537384176, "learning_rate": 2.5355862775217616e-05, "loss": 0.578, "num_tokens": 202303559.0, "step": 2146 }, { "epoch": 0.36644478579962453, "grad_norm": 0.4815116668922985, "learning_rate": 2.534903567161632e-05, "loss": 0.5827, "num_tokens": 202402554.0, "step": 2147 }, { "epoch": 0.36661546338965695, "grad_norm": 0.5134878785797574, "learning_rate": 2.5342208568015024e-05, "loss": 0.542, "num_tokens": 202479114.0, "step": 2148 }, { "epoch": 0.36678614097968937, "grad_norm": 0.48594195377218236, "learning_rate": 2.5335381464413728e-05, "loss": 0.6302, "num_tokens": 202569077.0, "step": 2149 }, { "epoch": 0.3669568185697218, "grad_norm": 0.5270678498789436, "learning_rate": 2.5328554360812428e-05, "loss": 0.6992, "num_tokens": 202669222.0, "step": 2150 }, { "epoch": 0.3671274961597542, "grad_norm": 0.48931732152455165, "learning_rate": 2.5321727257211128e-05, "loss": 0.5894, "num_tokens": 202777957.0, "step": 2151 }, { "epoch": 0.3672981737497866, "grad_norm": 0.5036365725366047, "learning_rate": 2.5314900153609832e-05, "loss": 0.552, "num_tokens": 202894023.0, "step": 2152 }, { "epoch": 0.3674688513398191, "grad_norm": 0.4538590119089527, "learning_rate": 2.5308073050008536e-05, "loss": 0.6023, "num_tokens": 203011018.0, "step": 2153 }, { "epoch": 0.3676395289298515, "grad_norm": 0.5275772239323716, "learning_rate": 2.530124594640724e-05, "loss": 0.6383, "num_tokens": 203100479.0, "step": 2154 }, { "epoch": 0.36781020651988394, "grad_norm": 0.4906530155085971, "learning_rate": 2.529441884280594e-05, "loss": 0.613, "num_tokens": 203216841.0, "step": 2155 }, { "epoch": 0.36798088410991636, "grad_norm": 0.5181664998580274, "learning_rate": 2.5287591739204644e-05, "loss": 0.6936, "num_tokens": 203323262.0, "step": 2156 }, { "epoch": 0.3681515616999488, "grad_norm": 0.5450301404654573, "learning_rate": 2.5280764635603347e-05, "loss": 0.5571, "num_tokens": 203392008.0, "step": 2157 }, { "epoch": 0.3683222392899812, "grad_norm": 0.4737497496090665, "learning_rate": 2.527393753200205e-05, "loss": 0.5648, "num_tokens": 203486614.0, "step": 2158 }, { "epoch": 0.3684929168800137, "grad_norm": 0.5416770755560676, "learning_rate": 2.5267110428400755e-05, "loss": 0.5843, "num_tokens": 203564700.0, "step": 2159 }, { "epoch": 0.3686635944700461, "grad_norm": 0.4798038407811237, "learning_rate": 2.526028332479946e-05, "loss": 0.6191, "num_tokens": 203686103.0, "step": 2160 }, { "epoch": 0.3688342720600785, "grad_norm": 0.5142864794975147, "learning_rate": 2.525345622119816e-05, "loss": 0.7211, "num_tokens": 203795249.0, "step": 2161 }, { "epoch": 0.36900494965011094, "grad_norm": 0.4682774859413058, "learning_rate": 2.5246629117596863e-05, "loss": 0.5379, "num_tokens": 203882239.0, "step": 2162 }, { "epoch": 0.36917562724014336, "grad_norm": 0.49730603038880045, "learning_rate": 2.5239802013995563e-05, "loss": 0.5716, "num_tokens": 203969363.0, "step": 2163 }, { "epoch": 0.3693463048301758, "grad_norm": 0.49435968513601425, "learning_rate": 2.5232974910394267e-05, "loss": 0.6496, "num_tokens": 204078603.0, "step": 2164 }, { "epoch": 0.36951698242020825, "grad_norm": 0.48563356419989684, "learning_rate": 2.5226147806792967e-05, "loss": 0.6061, "num_tokens": 204183512.0, "step": 2165 }, { "epoch": 0.36968766001024067, "grad_norm": 0.6743265303722185, "learning_rate": 2.521932070319167e-05, "loss": 0.458, "num_tokens": 204275958.0, "step": 2166 }, { "epoch": 0.3698583376002731, "grad_norm": 0.5015067215785812, "learning_rate": 2.5212493599590375e-05, "loss": 0.5902, "num_tokens": 204374479.0, "step": 2167 }, { "epoch": 0.3700290151903055, "grad_norm": 0.5647506131966961, "learning_rate": 2.520566649598908e-05, "loss": 0.676, "num_tokens": 204459968.0, "step": 2168 }, { "epoch": 0.37019969278033793, "grad_norm": 0.5106620763942856, "learning_rate": 2.5198839392387782e-05, "loss": 0.5921, "num_tokens": 204552216.0, "step": 2169 }, { "epoch": 0.37037037037037035, "grad_norm": 0.5087584064217722, "learning_rate": 2.5192012288786486e-05, "loss": 0.6792, "num_tokens": 204658938.0, "step": 2170 }, { "epoch": 0.3705410479604028, "grad_norm": 0.47053392089079127, "learning_rate": 2.5185185185185187e-05, "loss": 0.6521, "num_tokens": 204764511.0, "step": 2171 }, { "epoch": 0.37071172555043524, "grad_norm": 0.5000622509857107, "learning_rate": 2.517835808158389e-05, "loss": 0.6102, "num_tokens": 204860229.0, "step": 2172 }, { "epoch": 0.37088240314046766, "grad_norm": 0.5201307915686233, "learning_rate": 2.5171530977982594e-05, "loss": 0.6536, "num_tokens": 204952076.0, "step": 2173 }, { "epoch": 0.3710530807305001, "grad_norm": 0.5468825083893679, "learning_rate": 2.5164703874381298e-05, "loss": 0.6051, "num_tokens": 205039522.0, "step": 2174 }, { "epoch": 0.3712237583205325, "grad_norm": 0.47471000894454335, "learning_rate": 2.5157876770779995e-05, "loss": 0.54, "num_tokens": 205148389.0, "step": 2175 }, { "epoch": 0.3713944359105649, "grad_norm": 0.4802903838470032, "learning_rate": 2.51510496671787e-05, "loss": 0.5837, "num_tokens": 205262818.0, "step": 2176 }, { "epoch": 0.3715651135005974, "grad_norm": 0.46943871728196596, "learning_rate": 2.5144222563577402e-05, "loss": 0.6479, "num_tokens": 205396548.0, "step": 2177 }, { "epoch": 0.3717357910906298, "grad_norm": 0.546395910667123, "learning_rate": 2.5137395459976106e-05, "loss": 0.5842, "num_tokens": 205489075.0, "step": 2178 }, { "epoch": 0.37190646868066224, "grad_norm": 0.49919913775057834, "learning_rate": 2.513056835637481e-05, "loss": 0.5835, "num_tokens": 205585046.0, "step": 2179 }, { "epoch": 0.37207714627069466, "grad_norm": 0.45673300474574074, "learning_rate": 2.5123741252773514e-05, "loss": 0.5511, "num_tokens": 205679125.0, "step": 2180 }, { "epoch": 0.3722478238607271, "grad_norm": 0.4895892487875307, "learning_rate": 2.5116914149172214e-05, "loss": 0.58, "num_tokens": 205776196.0, "step": 2181 }, { "epoch": 0.3724185014507595, "grad_norm": 0.49616560675527227, "learning_rate": 2.5110087045570918e-05, "loss": 0.5584, "num_tokens": 205865203.0, "step": 2182 }, { "epoch": 0.3725891790407919, "grad_norm": 0.5315210182724598, "learning_rate": 2.510325994196962e-05, "loss": 0.6228, "num_tokens": 205951589.0, "step": 2183 }, { "epoch": 0.3727598566308244, "grad_norm": 0.5992902133081407, "learning_rate": 2.5096432838368325e-05, "loss": 0.6979, "num_tokens": 206038580.0, "step": 2184 }, { "epoch": 0.3729305342208568, "grad_norm": 0.5200186981777325, "learning_rate": 2.508960573476703e-05, "loss": 0.5951, "num_tokens": 206122574.0, "step": 2185 }, { "epoch": 0.37310121181088923, "grad_norm": 0.5511099710262809, "learning_rate": 2.5082778631165733e-05, "loss": 0.6956, "num_tokens": 206228357.0, "step": 2186 }, { "epoch": 0.37327188940092165, "grad_norm": 0.5786201393299616, "learning_rate": 2.5075951527564433e-05, "loss": 0.555, "num_tokens": 206295160.0, "step": 2187 }, { "epoch": 0.3734425669909541, "grad_norm": 0.5216168565313357, "learning_rate": 2.5069124423963134e-05, "loss": 0.5765, "num_tokens": 206374644.0, "step": 2188 }, { "epoch": 0.3736132445809865, "grad_norm": 0.4846179891763666, "learning_rate": 2.5062297320361837e-05, "loss": 0.5408, "num_tokens": 206461586.0, "step": 2189 }, { "epoch": 0.37378392217101897, "grad_norm": 0.4858060867496365, "learning_rate": 2.505547021676054e-05, "loss": 0.6087, "num_tokens": 206560226.0, "step": 2190 }, { "epoch": 0.3739545997610514, "grad_norm": 0.5108078811471278, "learning_rate": 2.5048643113159245e-05, "loss": 0.6249, "num_tokens": 206650838.0, "step": 2191 }, { "epoch": 0.3741252773510838, "grad_norm": 0.5083680259248301, "learning_rate": 2.5041816009557945e-05, "loss": 0.6415, "num_tokens": 206744997.0, "step": 2192 }, { "epoch": 0.3742959549411162, "grad_norm": 0.5485162771274682, "learning_rate": 2.503498890595665e-05, "loss": 0.5552, "num_tokens": 206813930.0, "step": 2193 }, { "epoch": 0.37446663253114865, "grad_norm": 0.5553339866446881, "learning_rate": 2.5028161802355353e-05, "loss": 0.5641, "num_tokens": 206882845.0, "step": 2194 }, { "epoch": 0.37463731012118107, "grad_norm": 0.5629611265467421, "learning_rate": 2.5021334698754057e-05, "loss": 0.6326, "num_tokens": 206974554.0, "step": 2195 }, { "epoch": 0.37480798771121354, "grad_norm": 0.44596276720850586, "learning_rate": 2.501450759515276e-05, "loss": 0.5492, "num_tokens": 207096066.0, "step": 2196 }, { "epoch": 0.37497866530124596, "grad_norm": 0.5026456149375772, "learning_rate": 2.5007680491551464e-05, "loss": 0.5995, "num_tokens": 207184362.0, "step": 2197 }, { "epoch": 0.3751493428912784, "grad_norm": 0.51234910337486, "learning_rate": 2.5000853387950164e-05, "loss": 0.6794, "num_tokens": 207273794.0, "step": 2198 }, { "epoch": 0.3753200204813108, "grad_norm": 0.47111656426449977, "learning_rate": 2.4994026284348868e-05, "loss": 0.5755, "num_tokens": 207363287.0, "step": 2199 }, { "epoch": 0.3754906980713432, "grad_norm": 0.48774236927144493, "learning_rate": 2.498719918074757e-05, "loss": 0.6086, "num_tokens": 207462334.0, "step": 2200 }, { "epoch": 0.37566137566137564, "grad_norm": 0.5627676232434694, "learning_rate": 2.4980372077146272e-05, "loss": 0.6992, "num_tokens": 207539530.0, "step": 2201 }, { "epoch": 0.3758320532514081, "grad_norm": 0.5076910461948151, "learning_rate": 2.4973544973544973e-05, "loss": 0.5052, "num_tokens": 207614586.0, "step": 2202 }, { "epoch": 0.37600273084144054, "grad_norm": 0.5003767494757384, "learning_rate": 2.4966717869943677e-05, "loss": 0.6437, "num_tokens": 207712239.0, "step": 2203 }, { "epoch": 0.37617340843147296, "grad_norm": 0.47796320845955104, "learning_rate": 2.495989076634238e-05, "loss": 0.556, "num_tokens": 207816586.0, "step": 2204 }, { "epoch": 0.3763440860215054, "grad_norm": 0.5277457582205102, "learning_rate": 2.4953063662741084e-05, "loss": 0.6163, "num_tokens": 207901991.0, "step": 2205 }, { "epoch": 0.3765147636115378, "grad_norm": 0.4981931351067329, "learning_rate": 2.4946236559139788e-05, "loss": 0.7048, "num_tokens": 208012287.0, "step": 2206 }, { "epoch": 0.3766854412015702, "grad_norm": 0.5606426192263063, "learning_rate": 2.493940945553849e-05, "loss": 0.6036, "num_tokens": 208081218.0, "step": 2207 }, { "epoch": 0.3768561187916027, "grad_norm": 0.5776305688872656, "learning_rate": 2.4932582351937192e-05, "loss": 0.5901, "num_tokens": 208150998.0, "step": 2208 }, { "epoch": 0.3770267963816351, "grad_norm": 0.5267997442772258, "learning_rate": 2.4925755248335896e-05, "loss": 0.7056, "num_tokens": 208248761.0, "step": 2209 }, { "epoch": 0.37719747397166753, "grad_norm": 0.4711932376018343, "learning_rate": 2.49189281447346e-05, "loss": 0.5487, "num_tokens": 208348163.0, "step": 2210 }, { "epoch": 0.37736815156169995, "grad_norm": 0.5462963440251482, "learning_rate": 2.4912101041133303e-05, "loss": 0.6103, "num_tokens": 208435734.0, "step": 2211 }, { "epoch": 0.37753882915173237, "grad_norm": 0.49231741481870267, "learning_rate": 2.4905273937532007e-05, "loss": 0.604, "num_tokens": 208533232.0, "step": 2212 }, { "epoch": 0.3777095067417648, "grad_norm": 0.4773116208153891, "learning_rate": 2.4898446833930704e-05, "loss": 0.6063, "num_tokens": 208623347.0, "step": 2213 }, { "epoch": 0.3778801843317972, "grad_norm": 0.4867860894506644, "learning_rate": 2.4891619730329408e-05, "loss": 0.6378, "num_tokens": 208732855.0, "step": 2214 }, { "epoch": 0.3780508619218297, "grad_norm": 0.49511846679846727, "learning_rate": 2.488479262672811e-05, "loss": 0.5793, "num_tokens": 208826739.0, "step": 2215 }, { "epoch": 0.3782215395118621, "grad_norm": 0.5680713110332243, "learning_rate": 2.4877965523126815e-05, "loss": 0.6219, "num_tokens": 208928078.0, "step": 2216 }, { "epoch": 0.3783922171018945, "grad_norm": 0.48627865462517955, "learning_rate": 2.487113841952552e-05, "loss": 0.6072, "num_tokens": 209032999.0, "step": 2217 }, { "epoch": 0.37856289469192694, "grad_norm": 0.5547474902712144, "learning_rate": 2.486431131592422e-05, "loss": 0.6802, "num_tokens": 209111615.0, "step": 2218 }, { "epoch": 0.37873357228195936, "grad_norm": 0.5172616377117005, "learning_rate": 2.4857484212322923e-05, "loss": 0.5023, "num_tokens": 209178427.0, "step": 2219 }, { "epoch": 0.3789042498719918, "grad_norm": 0.46504158875318513, "learning_rate": 2.4850657108721627e-05, "loss": 0.6332, "num_tokens": 209300643.0, "step": 2220 }, { "epoch": 0.37907492746202426, "grad_norm": 0.4477236535634431, "learning_rate": 2.484383000512033e-05, "loss": 0.4957, "num_tokens": 209402228.0, "step": 2221 }, { "epoch": 0.3792456050520567, "grad_norm": 0.4804598856307217, "learning_rate": 2.4837002901519034e-05, "loss": 0.6186, "num_tokens": 209509608.0, "step": 2222 }, { "epoch": 0.3794162826420891, "grad_norm": 0.5511649955259911, "learning_rate": 2.4830175797917738e-05, "loss": 0.6755, "num_tokens": 209598188.0, "step": 2223 }, { "epoch": 0.3795869602321215, "grad_norm": 0.5381740739728642, "learning_rate": 2.4823348694316442e-05, "loss": 0.614, "num_tokens": 209677530.0, "step": 2224 }, { "epoch": 0.37975763782215394, "grad_norm": 0.5318171123349746, "learning_rate": 2.481652159071514e-05, "loss": 0.6261, "num_tokens": 209758366.0, "step": 2225 }, { "epoch": 0.37992831541218636, "grad_norm": 0.5009440889062267, "learning_rate": 2.4809694487113843e-05, "loss": 0.5887, "num_tokens": 209844484.0, "step": 2226 }, { "epoch": 0.38009899300221883, "grad_norm": 0.5146779918174653, "learning_rate": 2.4802867383512547e-05, "loss": 0.5316, "num_tokens": 209932276.0, "step": 2227 }, { "epoch": 0.38026967059225125, "grad_norm": 0.4899092762395234, "learning_rate": 2.479604027991125e-05, "loss": 0.5465, "num_tokens": 210016381.0, "step": 2228 }, { "epoch": 0.38044034818228367, "grad_norm": 0.5146560833671472, "learning_rate": 2.478921317630995e-05, "loss": 0.6461, "num_tokens": 210104991.0, "step": 2229 }, { "epoch": 0.3806110257723161, "grad_norm": 0.4725983314144366, "learning_rate": 2.4782386072708654e-05, "loss": 0.5925, "num_tokens": 210204967.0, "step": 2230 }, { "epoch": 0.3807817033623485, "grad_norm": 0.5192639139389731, "learning_rate": 2.4775558969107358e-05, "loss": 0.5655, "num_tokens": 210276589.0, "step": 2231 }, { "epoch": 0.38095238095238093, "grad_norm": 0.5227151914833778, "learning_rate": 2.4768731865506062e-05, "loss": 0.5982, "num_tokens": 210354878.0, "step": 2232 }, { "epoch": 0.3811230585424134, "grad_norm": 0.5162548977100955, "learning_rate": 2.4761904761904766e-05, "loss": 0.5867, "num_tokens": 210435533.0, "step": 2233 }, { "epoch": 0.3812937361324458, "grad_norm": 0.4886404514584747, "learning_rate": 2.475507765830347e-05, "loss": 0.6311, "num_tokens": 210539613.0, "step": 2234 }, { "epoch": 0.38146441372247825, "grad_norm": 0.5746338244993011, "learning_rate": 2.474825055470217e-05, "loss": 0.6473, "num_tokens": 210642102.0, "step": 2235 }, { "epoch": 0.38163509131251067, "grad_norm": 0.5082037049380634, "learning_rate": 2.4741423451100874e-05, "loss": 0.6297, "num_tokens": 210738390.0, "step": 2236 }, { "epoch": 0.3818057689025431, "grad_norm": 0.5143933765220869, "learning_rate": 2.4734596347499574e-05, "loss": 0.6198, "num_tokens": 210842903.0, "step": 2237 }, { "epoch": 0.3819764464925755, "grad_norm": 0.4704258673813792, "learning_rate": 2.4727769243898278e-05, "loss": 0.6636, "num_tokens": 210954390.0, "step": 2238 }, { "epoch": 0.382147124082608, "grad_norm": 0.5604098404842648, "learning_rate": 2.4720942140296978e-05, "loss": 0.6487, "num_tokens": 211028395.0, "step": 2239 }, { "epoch": 0.3823178016726404, "grad_norm": 0.5811881839111597, "learning_rate": 2.4714115036695682e-05, "loss": 0.5619, "num_tokens": 211090758.0, "step": 2240 }, { "epoch": 0.3824884792626728, "grad_norm": 0.4602173810196816, "learning_rate": 2.4707287933094386e-05, "loss": 0.6254, "num_tokens": 211212080.0, "step": 2241 }, { "epoch": 0.38265915685270524, "grad_norm": 0.454945876096637, "learning_rate": 2.470046082949309e-05, "loss": 0.5127, "num_tokens": 211309346.0, "step": 2242 }, { "epoch": 0.38282983444273766, "grad_norm": 0.4860453344239281, "learning_rate": 2.4693633725891793e-05, "loss": 0.6221, "num_tokens": 211416539.0, "step": 2243 }, { "epoch": 0.3830005120327701, "grad_norm": 0.5025641045607832, "learning_rate": 2.4686806622290497e-05, "loss": 0.5453, "num_tokens": 211496177.0, "step": 2244 }, { "epoch": 0.3831711896228025, "grad_norm": 0.4308234889223253, "learning_rate": 2.4679979518689197e-05, "loss": 0.5855, "num_tokens": 211618522.0, "step": 2245 }, { "epoch": 0.383341867212835, "grad_norm": 0.4837403627892437, "learning_rate": 2.46731524150879e-05, "loss": 0.597, "num_tokens": 211719405.0, "step": 2246 }, { "epoch": 0.3835125448028674, "grad_norm": 0.4966736987089872, "learning_rate": 2.4666325311486605e-05, "loss": 0.6358, "num_tokens": 211815579.0, "step": 2247 }, { "epoch": 0.3836832223928998, "grad_norm": 0.544419689268734, "learning_rate": 2.465949820788531e-05, "loss": 0.667, "num_tokens": 211898107.0, "step": 2248 }, { "epoch": 0.38385389998293223, "grad_norm": 0.5183664714860735, "learning_rate": 2.4652671104284012e-05, "loss": 0.6837, "num_tokens": 211999260.0, "step": 2249 }, { "epoch": 0.38402457757296465, "grad_norm": 0.5386636278331157, "learning_rate": 2.464584400068271e-05, "loss": 0.6722, "num_tokens": 212089774.0, "step": 2250 }, { "epoch": 0.3841952551629971, "grad_norm": 0.5032535945796976, "learning_rate": 2.4639016897081413e-05, "loss": 0.692, "num_tokens": 212192162.0, "step": 2251 }, { "epoch": 0.38436593275302955, "grad_norm": 0.5400079637273572, "learning_rate": 2.4632189793480117e-05, "loss": 0.631, "num_tokens": 212267825.0, "step": 2252 }, { "epoch": 0.38453661034306197, "grad_norm": 0.4732898905362462, "learning_rate": 2.462536268987882e-05, "loss": 0.5764, "num_tokens": 212362700.0, "step": 2253 }, { "epoch": 0.3847072879330944, "grad_norm": 0.5486535296198755, "learning_rate": 2.4618535586277524e-05, "loss": 0.6939, "num_tokens": 212445950.0, "step": 2254 }, { "epoch": 0.3848779655231268, "grad_norm": 0.4756247125567084, "learning_rate": 2.4611708482676228e-05, "loss": 0.5818, "num_tokens": 212548780.0, "step": 2255 }, { "epoch": 0.38504864311315923, "grad_norm": 0.5390679380344604, "learning_rate": 2.460488137907493e-05, "loss": 0.6721, "num_tokens": 212629558.0, "step": 2256 }, { "epoch": 0.38521932070319165, "grad_norm": 0.47647293176486455, "learning_rate": 2.4598054275473632e-05, "loss": 0.5485, "num_tokens": 212719202.0, "step": 2257 }, { "epoch": 0.3853899982932241, "grad_norm": 0.47099850734202336, "learning_rate": 2.4591227171872336e-05, "loss": 0.6231, "num_tokens": 212833253.0, "step": 2258 }, { "epoch": 0.38556067588325654, "grad_norm": 0.46399822192899687, "learning_rate": 2.458440006827104e-05, "loss": 0.6393, "num_tokens": 212953324.0, "step": 2259 }, { "epoch": 0.38573135347328896, "grad_norm": 0.5510509372540577, "learning_rate": 2.4577572964669744e-05, "loss": 0.5974, "num_tokens": 213035779.0, "step": 2260 }, { "epoch": 0.3859020310633214, "grad_norm": 0.5269803875470527, "learning_rate": 2.4570745861068447e-05, "loss": 0.5769, "num_tokens": 213115252.0, "step": 2261 }, { "epoch": 0.3860727086533538, "grad_norm": 0.5174093565386114, "learning_rate": 2.4563918757467144e-05, "loss": 0.6668, "num_tokens": 213213249.0, "step": 2262 }, { "epoch": 0.3862433862433862, "grad_norm": 0.5167648911080228, "learning_rate": 2.4557091653865848e-05, "loss": 0.6932, "num_tokens": 213321298.0, "step": 2263 }, { "epoch": 0.3864140638334187, "grad_norm": 0.6641506584861525, "learning_rate": 2.4550264550264552e-05, "loss": 0.6193, "num_tokens": 213408868.0, "step": 2264 }, { "epoch": 0.3865847414234511, "grad_norm": 0.4634711328227321, "learning_rate": 2.4543437446663256e-05, "loss": 0.5869, "num_tokens": 213516865.0, "step": 2265 }, { "epoch": 0.38675541901348354, "grad_norm": 0.5024339285320417, "learning_rate": 2.4536610343061956e-05, "loss": 0.6476, "num_tokens": 213613533.0, "step": 2266 }, { "epoch": 0.38692609660351596, "grad_norm": 0.8292755299554095, "learning_rate": 2.452978323946066e-05, "loss": 0.6714, "num_tokens": 213694452.0, "step": 2267 }, { "epoch": 0.3870967741935484, "grad_norm": 0.4816080661977116, "learning_rate": 2.4522956135859364e-05, "loss": 0.6258, "num_tokens": 213800420.0, "step": 2268 }, { "epoch": 0.3872674517835808, "grad_norm": 0.49019000391049716, "learning_rate": 2.4516129032258067e-05, "loss": 0.5882, "num_tokens": 213887646.0, "step": 2269 }, { "epoch": 0.38743812937361327, "grad_norm": 0.481881527908397, "learning_rate": 2.450930192865677e-05, "loss": 0.6426, "num_tokens": 213990047.0, "step": 2270 }, { "epoch": 0.3876088069636457, "grad_norm": 0.49932581364452533, "learning_rate": 2.4502474825055475e-05, "loss": 0.5185, "num_tokens": 214073420.0, "step": 2271 }, { "epoch": 0.3877794845536781, "grad_norm": 0.5622899721188643, "learning_rate": 2.4495647721454175e-05, "loss": 0.6481, "num_tokens": 214155199.0, "step": 2272 }, { "epoch": 0.38795016214371053, "grad_norm": 0.5554369807013596, "learning_rate": 2.448882061785288e-05, "loss": 0.6796, "num_tokens": 214255521.0, "step": 2273 }, { "epoch": 0.38812083973374295, "grad_norm": 0.5136688780752265, "learning_rate": 2.448199351425158e-05, "loss": 0.5777, "num_tokens": 214333594.0, "step": 2274 }, { "epoch": 0.38829151732377537, "grad_norm": 0.46503265627801016, "learning_rate": 2.4475166410650283e-05, "loss": 0.5159, "num_tokens": 214423849.0, "step": 2275 }, { "epoch": 0.3884621949138078, "grad_norm": 0.5482201321633803, "learning_rate": 2.4468339307048983e-05, "loss": 0.5612, "num_tokens": 214490397.0, "step": 2276 }, { "epoch": 0.38863287250384027, "grad_norm": 0.5171428493142186, "learning_rate": 2.4461512203447687e-05, "loss": 0.6113, "num_tokens": 214580235.0, "step": 2277 }, { "epoch": 0.3888035500938727, "grad_norm": 0.4588844618726672, "learning_rate": 2.445468509984639e-05, "loss": 0.5962, "num_tokens": 214701281.0, "step": 2278 }, { "epoch": 0.3889742276839051, "grad_norm": 0.5130364264557178, "learning_rate": 2.4447857996245095e-05, "loss": 0.6144, "num_tokens": 214782083.0, "step": 2279 }, { "epoch": 0.3891449052739375, "grad_norm": 0.48789758963631613, "learning_rate": 2.44410308926438e-05, "loss": 0.7143, "num_tokens": 214906548.0, "step": 2280 }, { "epoch": 0.38931558286396994, "grad_norm": 0.5037139082356947, "learning_rate": 2.4434203789042502e-05, "loss": 0.5453, "num_tokens": 215002869.0, "step": 2281 }, { "epoch": 0.38948626045400236, "grad_norm": 0.5141485109322536, "learning_rate": 2.4427376685441203e-05, "loss": 0.7401, "num_tokens": 215096685.0, "step": 2282 }, { "epoch": 0.38965693804403484, "grad_norm": 0.4870337420148902, "learning_rate": 2.4420549581839906e-05, "loss": 0.5737, "num_tokens": 215186777.0, "step": 2283 }, { "epoch": 0.38982761563406726, "grad_norm": 0.4730650815651334, "learning_rate": 2.441372247823861e-05, "loss": 0.5876, "num_tokens": 215284136.0, "step": 2284 }, { "epoch": 0.3899982932240997, "grad_norm": 0.6651115384010468, "learning_rate": 2.4406895374637314e-05, "loss": 0.7528, "num_tokens": 215387777.0, "step": 2285 }, { "epoch": 0.3901689708141321, "grad_norm": 0.48102904555202314, "learning_rate": 2.4400068271036018e-05, "loss": 0.5774, "num_tokens": 215478783.0, "step": 2286 }, { "epoch": 0.3903396484041645, "grad_norm": 0.5114576406349645, "learning_rate": 2.4393241167434715e-05, "loss": 0.5534, "num_tokens": 215587313.0, "step": 2287 }, { "epoch": 0.39051032599419694, "grad_norm": 0.4573035386680115, "learning_rate": 2.438641406383342e-05, "loss": 0.6151, "num_tokens": 215700525.0, "step": 2288 }, { "epoch": 0.3906810035842294, "grad_norm": 0.43218440945781206, "learning_rate": 2.4379586960232122e-05, "loss": 0.5844, "num_tokens": 215820084.0, "step": 2289 }, { "epoch": 0.39085168117426183, "grad_norm": 0.4596394028665521, "learning_rate": 2.4372759856630826e-05, "loss": 0.5876, "num_tokens": 215937993.0, "step": 2290 }, { "epoch": 0.39102235876429425, "grad_norm": 0.5159870834352941, "learning_rate": 2.436593275302953e-05, "loss": 0.5776, "num_tokens": 216014792.0, "step": 2291 }, { "epoch": 0.3911930363543267, "grad_norm": 0.5209175275230461, "learning_rate": 2.4359105649428234e-05, "loss": 0.5665, "num_tokens": 216090978.0, "step": 2292 }, { "epoch": 0.3913637139443591, "grad_norm": 0.467625553479465, "learning_rate": 2.4352278545826934e-05, "loss": 0.6238, "num_tokens": 216202052.0, "step": 2293 }, { "epoch": 0.3915343915343915, "grad_norm": 0.5205690348617306, "learning_rate": 2.4345451442225638e-05, "loss": 0.585, "num_tokens": 216283423.0, "step": 2294 }, { "epoch": 0.391705069124424, "grad_norm": 0.5324064491616337, "learning_rate": 2.433862433862434e-05, "loss": 0.703, "num_tokens": 216384113.0, "step": 2295 }, { "epoch": 0.3918757467144564, "grad_norm": 0.4143767054971347, "learning_rate": 2.4331797235023045e-05, "loss": 0.5389, "num_tokens": 216502345.0, "step": 2296 }, { "epoch": 0.3920464243044888, "grad_norm": 0.4543370106769294, "learning_rate": 2.432497013142175e-05, "loss": 0.5754, "num_tokens": 216604111.0, "step": 2297 }, { "epoch": 0.39221710189452125, "grad_norm": 0.5252777270265523, "learning_rate": 2.4318143027820453e-05, "loss": 0.6452, "num_tokens": 216687688.0, "step": 2298 }, { "epoch": 0.39238777948455367, "grad_norm": 0.6387850887843657, "learning_rate": 2.431131592421915e-05, "loss": 0.6915, "num_tokens": 216749142.0, "step": 2299 }, { "epoch": 0.3925584570745861, "grad_norm": 0.5115380550218598, "learning_rate": 2.4304488820617853e-05, "loss": 0.6005, "num_tokens": 216839291.0, "step": 2300 }, { "epoch": 0.39272913466461856, "grad_norm": 0.4745447822057751, "learning_rate": 2.4297661717016557e-05, "loss": 0.5944, "num_tokens": 216930324.0, "step": 2301 }, { "epoch": 0.392899812254651, "grad_norm": 0.4972397891076115, "learning_rate": 2.429083461341526e-05, "loss": 0.6244, "num_tokens": 217033691.0, "step": 2302 }, { "epoch": 0.3930704898446834, "grad_norm": 0.4842003981105669, "learning_rate": 2.428400750981396e-05, "loss": 0.5336, "num_tokens": 217131729.0, "step": 2303 }, { "epoch": 0.3932411674347158, "grad_norm": 0.49144941846692886, "learning_rate": 2.4277180406212665e-05, "loss": 0.6132, "num_tokens": 217224553.0, "step": 2304 }, { "epoch": 0.39341184502474824, "grad_norm": 0.4397083396401116, "learning_rate": 2.427035330261137e-05, "loss": 0.5359, "num_tokens": 217323036.0, "step": 2305 }, { "epoch": 0.39358252261478066, "grad_norm": 0.5585310814597336, "learning_rate": 2.4263526199010073e-05, "loss": 0.6494, "num_tokens": 217415709.0, "step": 2306 }, { "epoch": 0.3937532002048131, "grad_norm": 0.5579155676702687, "learning_rate": 2.4256699095408776e-05, "loss": 0.5672, "num_tokens": 217479464.0, "step": 2307 }, { "epoch": 0.39392387779484556, "grad_norm": 0.4974477847418061, "learning_rate": 2.424987199180748e-05, "loss": 0.6113, "num_tokens": 217591733.0, "step": 2308 }, { "epoch": 0.394094555384878, "grad_norm": 0.4558657497302916, "learning_rate": 2.424304488820618e-05, "loss": 0.5437, "num_tokens": 217688759.0, "step": 2309 }, { "epoch": 0.3942652329749104, "grad_norm": 0.4694586579607243, "learning_rate": 2.4236217784604884e-05, "loss": 0.7115, "num_tokens": 217814534.0, "step": 2310 }, { "epoch": 0.3944359105649428, "grad_norm": 0.46086899942149856, "learning_rate": 2.4229390681003588e-05, "loss": 0.5685, "num_tokens": 217923522.0, "step": 2311 }, { "epoch": 0.39460658815497524, "grad_norm": 0.46478969650002044, "learning_rate": 2.422256357740229e-05, "loss": 0.6192, "num_tokens": 218031935.0, "step": 2312 }, { "epoch": 0.39477726574500765, "grad_norm": 0.5319914130393121, "learning_rate": 2.421573647380099e-05, "loss": 0.6238, "num_tokens": 218114026.0, "step": 2313 }, { "epoch": 0.39494794333504013, "grad_norm": 0.5779723480440926, "learning_rate": 2.4208909370199693e-05, "loss": 0.7279, "num_tokens": 218212284.0, "step": 2314 }, { "epoch": 0.39511862092507255, "grad_norm": 0.5762672420114009, "learning_rate": 2.4202082266598396e-05, "loss": 0.6496, "num_tokens": 218278560.0, "step": 2315 }, { "epoch": 0.39528929851510497, "grad_norm": 0.48883042399349996, "learning_rate": 2.41952551629971e-05, "loss": 0.7073, "num_tokens": 218413619.0, "step": 2316 }, { "epoch": 0.3954599761051374, "grad_norm": 0.5586017176607562, "learning_rate": 2.4188428059395804e-05, "loss": 0.6013, "num_tokens": 218482865.0, "step": 2317 }, { "epoch": 0.3956306536951698, "grad_norm": 0.5512211886992747, "learning_rate": 2.4181600955794508e-05, "loss": 0.6403, "num_tokens": 218557773.0, "step": 2318 }, { "epoch": 0.39580133128520223, "grad_norm": 0.4852230122561447, "learning_rate": 2.4174773852193208e-05, "loss": 0.5583, "num_tokens": 218658094.0, "step": 2319 }, { "epoch": 0.3959720088752347, "grad_norm": 0.4754562661645349, "learning_rate": 2.4167946748591912e-05, "loss": 0.5772, "num_tokens": 218754065.0, "step": 2320 }, { "epoch": 0.3961426864652671, "grad_norm": 0.5080498344507179, "learning_rate": 2.4161119644990616e-05, "loss": 0.6103, "num_tokens": 218850353.0, "step": 2321 }, { "epoch": 0.39631336405529954, "grad_norm": 0.4908565468542026, "learning_rate": 2.415429254138932e-05, "loss": 0.6196, "num_tokens": 218958224.0, "step": 2322 }, { "epoch": 0.39648404164533196, "grad_norm": 0.6296764515878164, "learning_rate": 2.4147465437788023e-05, "loss": 0.511, "num_tokens": 219025568.0, "step": 2323 }, { "epoch": 0.3966547192353644, "grad_norm": 0.49499959905163043, "learning_rate": 2.414063833418672e-05, "loss": 0.6086, "num_tokens": 219122449.0, "step": 2324 }, { "epoch": 0.3968253968253968, "grad_norm": 0.48240540296678136, "learning_rate": 2.4133811230585424e-05, "loss": 0.7033, "num_tokens": 219242171.0, "step": 2325 }, { "epoch": 0.3969960744154293, "grad_norm": 0.5098217900130855, "learning_rate": 2.4126984126984128e-05, "loss": 0.5922, "num_tokens": 219326718.0, "step": 2326 }, { "epoch": 0.3971667520054617, "grad_norm": 0.5108669573717571, "learning_rate": 2.412015702338283e-05, "loss": 0.6173, "num_tokens": 219424305.0, "step": 2327 }, { "epoch": 0.3973374295954941, "grad_norm": 0.4517055864832452, "learning_rate": 2.4113329919781535e-05, "loss": 0.5856, "num_tokens": 219530723.0, "step": 2328 }, { "epoch": 0.39750810718552654, "grad_norm": 0.5061113587740391, "learning_rate": 2.410650281618024e-05, "loss": 0.6616, "num_tokens": 219629474.0, "step": 2329 }, { "epoch": 0.39767878477555896, "grad_norm": 0.552531277615828, "learning_rate": 2.409967571257894e-05, "loss": 0.6024, "num_tokens": 219703628.0, "step": 2330 }, { "epoch": 0.3978494623655914, "grad_norm": 0.5264662392873181, "learning_rate": 2.4092848608977643e-05, "loss": 0.6131, "num_tokens": 219776280.0, "step": 2331 }, { "epoch": 0.39802013995562385, "grad_norm": 0.4996383012740999, "learning_rate": 2.4086021505376347e-05, "loss": 0.5061, "num_tokens": 219850184.0, "step": 2332 }, { "epoch": 0.3981908175456563, "grad_norm": 0.5304735562715119, "learning_rate": 2.407919440177505e-05, "loss": 0.611, "num_tokens": 219954568.0, "step": 2333 }, { "epoch": 0.3983614951356887, "grad_norm": 0.5741372854651873, "learning_rate": 2.4072367298173754e-05, "loss": 0.6273, "num_tokens": 220031153.0, "step": 2334 }, { "epoch": 0.3985321727257211, "grad_norm": 0.5139152640884838, "learning_rate": 2.4065540194572458e-05, "loss": 0.5774, "num_tokens": 220122702.0, "step": 2335 }, { "epoch": 0.39870285031575353, "grad_norm": 0.5070263410316677, "learning_rate": 2.4058713090971155e-05, "loss": 0.5575, "num_tokens": 220208307.0, "step": 2336 }, { "epoch": 0.39887352790578595, "grad_norm": 0.4766658312740394, "learning_rate": 2.405188598736986e-05, "loss": 0.5889, "num_tokens": 220309717.0, "step": 2337 }, { "epoch": 0.39904420549581837, "grad_norm": 0.5243311512465705, "learning_rate": 2.4045058883768563e-05, "loss": 0.5755, "num_tokens": 220385919.0, "step": 2338 }, { "epoch": 0.39921488308585085, "grad_norm": 0.4815921624764374, "learning_rate": 2.4038231780167266e-05, "loss": 0.5515, "num_tokens": 220485031.0, "step": 2339 }, { "epoch": 0.39938556067588327, "grad_norm": 0.5035206049681762, "learning_rate": 2.4031404676565967e-05, "loss": 0.523, "num_tokens": 220563323.0, "step": 2340 }, { "epoch": 0.3995562382659157, "grad_norm": 0.5869658867071449, "learning_rate": 2.402457757296467e-05, "loss": 0.6709, "num_tokens": 220645640.0, "step": 2341 }, { "epoch": 0.3997269158559481, "grad_norm": 0.5171815049901933, "learning_rate": 2.4017750469363374e-05, "loss": 0.6008, "num_tokens": 220738449.0, "step": 2342 }, { "epoch": 0.3998975934459805, "grad_norm": 0.4827910357112666, "learning_rate": 2.4010923365762078e-05, "loss": 0.7094, "num_tokens": 220855292.0, "step": 2343 }, { "epoch": 0.40006827103601295, "grad_norm": 0.5103880938337038, "learning_rate": 2.4004096262160782e-05, "loss": 0.6057, "num_tokens": 220950820.0, "step": 2344 }, { "epoch": 0.4002389486260454, "grad_norm": 0.4683107310434939, "learning_rate": 2.3997269158559486e-05, "loss": 0.6392, "num_tokens": 221065939.0, "step": 2345 }, { "epoch": 0.40040962621607784, "grad_norm": 0.5139949475253363, "learning_rate": 2.3990442054958186e-05, "loss": 0.5611, "num_tokens": 221164794.0, "step": 2346 }, { "epoch": 0.40058030380611026, "grad_norm": 0.48697691987566954, "learning_rate": 2.398361495135689e-05, "loss": 0.5973, "num_tokens": 221254043.0, "step": 2347 }, { "epoch": 0.4007509813961427, "grad_norm": 0.527218526229706, "learning_rate": 2.3976787847755593e-05, "loss": 0.5691, "num_tokens": 221330027.0, "step": 2348 }, { "epoch": 0.4009216589861751, "grad_norm": 0.5466100689281735, "learning_rate": 2.3969960744154294e-05, "loss": 0.5681, "num_tokens": 221401400.0, "step": 2349 }, { "epoch": 0.4010923365762075, "grad_norm": 0.4855558782707177, "learning_rate": 2.3963133640552994e-05, "loss": 0.5431, "num_tokens": 221498615.0, "step": 2350 }, { "epoch": 0.40126301416624, "grad_norm": 0.46768484952001627, "learning_rate": 2.3956306536951698e-05, "loss": 0.5398, "num_tokens": 221595473.0, "step": 2351 }, { "epoch": 0.4014336917562724, "grad_norm": 0.5048374618658158, "learning_rate": 2.39494794333504e-05, "loss": 0.6133, "num_tokens": 221692338.0, "step": 2352 }, { "epoch": 0.40160436934630483, "grad_norm": 0.5461956022975948, "learning_rate": 2.3942652329749105e-05, "loss": 0.6327, "num_tokens": 221785415.0, "step": 2353 }, { "epoch": 0.40177504693633725, "grad_norm": 0.5101241871893103, "learning_rate": 2.393582522614781e-05, "loss": 0.5569, "num_tokens": 221869726.0, "step": 2354 }, { "epoch": 0.4019457245263697, "grad_norm": 0.527911395728902, "learning_rate": 2.3928998122546513e-05, "loss": 0.644, "num_tokens": 221971363.0, "step": 2355 }, { "epoch": 0.4021164021164021, "grad_norm": 0.4447118524192706, "learning_rate": 2.3922171018945213e-05, "loss": 0.6409, "num_tokens": 222093086.0, "step": 2356 }, { "epoch": 0.40228707970643457, "grad_norm": 0.4647292105272507, "learning_rate": 2.3915343915343917e-05, "loss": 0.5174, "num_tokens": 222188448.0, "step": 2357 }, { "epoch": 0.402457757296467, "grad_norm": 0.551055848953137, "learning_rate": 2.390851681174262e-05, "loss": 0.5717, "num_tokens": 222274483.0, "step": 2358 }, { "epoch": 0.4026284348864994, "grad_norm": 0.4826773806394218, "learning_rate": 2.3901689708141325e-05, "loss": 0.6481, "num_tokens": 222382240.0, "step": 2359 }, { "epoch": 0.40279911247653183, "grad_norm": 0.5594223577606974, "learning_rate": 2.389486260454003e-05, "loss": 0.6243, "num_tokens": 222480795.0, "step": 2360 }, { "epoch": 0.40296979006656425, "grad_norm": 0.47913285272583367, "learning_rate": 2.3888035500938725e-05, "loss": 0.5492, "num_tokens": 222563886.0, "step": 2361 }, { "epoch": 0.40314046765659667, "grad_norm": 0.4780122212461984, "learning_rate": 2.388120839733743e-05, "loss": 0.5641, "num_tokens": 222657442.0, "step": 2362 }, { "epoch": 0.40331114524662914, "grad_norm": 0.5011742054445733, "learning_rate": 2.3874381293736133e-05, "loss": 0.5736, "num_tokens": 222736945.0, "step": 2363 }, { "epoch": 0.40348182283666156, "grad_norm": 0.5973194347165425, "learning_rate": 2.3867554190134837e-05, "loss": 0.5867, "num_tokens": 222795727.0, "step": 2364 }, { "epoch": 0.403652500426694, "grad_norm": 0.5093639915969538, "learning_rate": 2.386072708653354e-05, "loss": 0.5714, "num_tokens": 222878082.0, "step": 2365 }, { "epoch": 0.4038231780167264, "grad_norm": 0.45997697147042527, "learning_rate": 2.3853899982932244e-05, "loss": 0.5406, "num_tokens": 222978612.0, "step": 2366 }, { "epoch": 0.4039938556067588, "grad_norm": 0.49460956930756567, "learning_rate": 2.3847072879330945e-05, "loss": 0.6495, "num_tokens": 223075037.0, "step": 2367 }, { "epoch": 0.40416453319679124, "grad_norm": 0.4866215290968026, "learning_rate": 2.384024577572965e-05, "loss": 0.5108, "num_tokens": 223160217.0, "step": 2368 }, { "epoch": 0.4043352107868237, "grad_norm": 0.5300784892204703, "learning_rate": 2.3833418672128352e-05, "loss": 0.5694, "num_tokens": 223247134.0, "step": 2369 }, { "epoch": 0.40450588837685614, "grad_norm": 0.5479867847596525, "learning_rate": 2.3826591568527056e-05, "loss": 0.644, "num_tokens": 223353843.0, "step": 2370 }, { "epoch": 0.40467656596688856, "grad_norm": 0.5578073075743806, "learning_rate": 2.381976446492576e-05, "loss": 0.6847, "num_tokens": 223458141.0, "step": 2371 }, { "epoch": 0.404847243556921, "grad_norm": 0.47679874306291337, "learning_rate": 2.3812937361324463e-05, "loss": 0.6988, "num_tokens": 223573339.0, "step": 2372 }, { "epoch": 0.4050179211469534, "grad_norm": 0.5002483964172377, "learning_rate": 2.3806110257723164e-05, "loss": 0.58, "num_tokens": 223674483.0, "step": 2373 }, { "epoch": 0.4051885987369858, "grad_norm": 0.4697762806125055, "learning_rate": 2.3799283154121864e-05, "loss": 0.6308, "num_tokens": 223779389.0, "step": 2374 }, { "epoch": 0.40535927632701824, "grad_norm": 0.4761483736158134, "learning_rate": 2.3792456050520568e-05, "loss": 0.6346, "num_tokens": 223883440.0, "step": 2375 }, { "epoch": 0.4055299539170507, "grad_norm": 0.5493140696656084, "learning_rate": 2.378562894691927e-05, "loss": 0.565, "num_tokens": 223947934.0, "step": 2376 }, { "epoch": 0.40570063150708313, "grad_norm": 0.46575341319162544, "learning_rate": 2.3778801843317972e-05, "loss": 0.5905, "num_tokens": 224055726.0, "step": 2377 }, { "epoch": 0.40587130909711555, "grad_norm": 0.4596584934796199, "learning_rate": 2.3771974739716676e-05, "loss": 0.6416, "num_tokens": 224173270.0, "step": 2378 }, { "epoch": 0.40604198668714797, "grad_norm": 0.4967935085637729, "learning_rate": 2.376514763611538e-05, "loss": 0.5524, "num_tokens": 224257734.0, "step": 2379 }, { "epoch": 0.4062126642771804, "grad_norm": 0.4532189791400893, "learning_rate": 2.3758320532514083e-05, "loss": 0.5851, "num_tokens": 224375030.0, "step": 2380 }, { "epoch": 0.4063833418672128, "grad_norm": 0.4852826430586424, "learning_rate": 2.3751493428912787e-05, "loss": 0.5075, "num_tokens": 224458659.0, "step": 2381 }, { "epoch": 0.4065540194572453, "grad_norm": 0.477406283139159, "learning_rate": 2.374466632531149e-05, "loss": 0.5827, "num_tokens": 224552861.0, "step": 2382 }, { "epoch": 0.4067246970472777, "grad_norm": 0.512610816453536, "learning_rate": 2.373783922171019e-05, "loss": 0.6092, "num_tokens": 224638266.0, "step": 2383 }, { "epoch": 0.4068953746373101, "grad_norm": 0.5087510852432886, "learning_rate": 2.3731012118108895e-05, "loss": 0.5901, "num_tokens": 224714313.0, "step": 2384 }, { "epoch": 0.40706605222734255, "grad_norm": 0.4497426540487177, "learning_rate": 2.37241850145076e-05, "loss": 0.5658, "num_tokens": 224837848.0, "step": 2385 }, { "epoch": 0.40723672981737497, "grad_norm": 0.4882065882062325, "learning_rate": 2.37173579109063e-05, "loss": 0.5611, "num_tokens": 224942198.0, "step": 2386 }, { "epoch": 0.4074074074074074, "grad_norm": 0.48647759132024576, "learning_rate": 2.3710530807305e-05, "loss": 0.5719, "num_tokens": 225035689.0, "step": 2387 }, { "epoch": 0.40757808499743986, "grad_norm": 0.5479717528493968, "learning_rate": 2.3703703703703703e-05, "loss": 0.6187, "num_tokens": 225116549.0, "step": 2388 }, { "epoch": 0.4077487625874723, "grad_norm": 0.4744205595420556, "learning_rate": 2.3696876600102407e-05, "loss": 0.6759, "num_tokens": 225243669.0, "step": 2389 }, { "epoch": 0.4079194401775047, "grad_norm": 0.4988533550625587, "learning_rate": 2.369004949650111e-05, "loss": 0.5915, "num_tokens": 225348734.0, "step": 2390 }, { "epoch": 0.4080901177675371, "grad_norm": 0.4616925068734663, "learning_rate": 2.3683222392899815e-05, "loss": 0.5725, "num_tokens": 225455621.0, "step": 2391 }, { "epoch": 0.40826079535756954, "grad_norm": 0.46070930620842343, "learning_rate": 2.367639528929852e-05, "loss": 0.6043, "num_tokens": 225561049.0, "step": 2392 }, { "epoch": 0.40843147294760196, "grad_norm": 0.4561966977048284, "learning_rate": 2.366956818569722e-05, "loss": 0.6125, "num_tokens": 225676137.0, "step": 2393 }, { "epoch": 0.40860215053763443, "grad_norm": 0.4596587784533439, "learning_rate": 2.3662741082095922e-05, "loss": 0.6295, "num_tokens": 225795570.0, "step": 2394 }, { "epoch": 0.40877282812766685, "grad_norm": 0.5532546603083288, "learning_rate": 2.3655913978494626e-05, "loss": 0.5336, "num_tokens": 225856180.0, "step": 2395 }, { "epoch": 0.4089435057176993, "grad_norm": 0.48748599234187384, "learning_rate": 2.364908687489333e-05, "loss": 0.6001, "num_tokens": 225945184.0, "step": 2396 }, { "epoch": 0.4091141833077317, "grad_norm": 0.6030997464875462, "learning_rate": 2.3642259771292034e-05, "loss": 0.5947, "num_tokens": 226003358.0, "step": 2397 }, { "epoch": 0.4092848608977641, "grad_norm": 0.5200076901066409, "learning_rate": 2.363543266769073e-05, "loss": 0.6329, "num_tokens": 226108602.0, "step": 2398 }, { "epoch": 0.40945553848779653, "grad_norm": 0.49315943696077624, "learning_rate": 2.3628605564089434e-05, "loss": 0.5787, "num_tokens": 226209665.0, "step": 2399 }, { "epoch": 0.409626216077829, "grad_norm": 0.4940524731943455, "learning_rate": 2.3621778460488138e-05, "loss": 0.6455, "num_tokens": 226318743.0, "step": 2400 }, { "epoch": 0.40979689366786143, "grad_norm": 0.5164475516520829, "learning_rate": 2.3614951356886842e-05, "loss": 0.6021, "num_tokens": 226399645.0, "step": 2401 }, { "epoch": 0.40996757125789385, "grad_norm": 0.48030433159358443, "learning_rate": 2.3608124253285546e-05, "loss": 0.5736, "num_tokens": 226495384.0, "step": 2402 }, { "epoch": 0.41013824884792627, "grad_norm": 0.4607578997878919, "learning_rate": 2.360129714968425e-05, "loss": 0.5804, "num_tokens": 226597902.0, "step": 2403 }, { "epoch": 0.4103089264379587, "grad_norm": 0.4979041692378095, "learning_rate": 2.359447004608295e-05, "loss": 0.5749, "num_tokens": 226691994.0, "step": 2404 }, { "epoch": 0.4104796040279911, "grad_norm": 0.5392265036230711, "learning_rate": 2.3587642942481654e-05, "loss": 0.6306, "num_tokens": 226789312.0, "step": 2405 }, { "epoch": 0.4106502816180235, "grad_norm": 0.5316926644587151, "learning_rate": 2.3580815838880357e-05, "loss": 0.611, "num_tokens": 226898554.0, "step": 2406 }, { "epoch": 0.410820959208056, "grad_norm": 0.4516054681639296, "learning_rate": 2.357398873527906e-05, "loss": 0.5827, "num_tokens": 227018560.0, "step": 2407 }, { "epoch": 0.4109916367980884, "grad_norm": 0.5192620291469952, "learning_rate": 2.3567161631677765e-05, "loss": 0.5778, "num_tokens": 227103628.0, "step": 2408 }, { "epoch": 0.41116231438812084, "grad_norm": 0.5081503526354306, "learning_rate": 2.356033452807647e-05, "loss": 0.6432, "num_tokens": 227201185.0, "step": 2409 }, { "epoch": 0.41133299197815326, "grad_norm": 0.5030838762359839, "learning_rate": 2.355350742447517e-05, "loss": 0.6387, "num_tokens": 227289777.0, "step": 2410 }, { "epoch": 0.4115036695681857, "grad_norm": 0.5060705334257647, "learning_rate": 2.354668032087387e-05, "loss": 0.6217, "num_tokens": 227385309.0, "step": 2411 }, { "epoch": 0.4116743471582181, "grad_norm": 0.5148647024976987, "learning_rate": 2.3539853217272573e-05, "loss": 0.5266, "num_tokens": 227460742.0, "step": 2412 }, { "epoch": 0.4118450247482506, "grad_norm": 0.545406157889297, "learning_rate": 2.3533026113671277e-05, "loss": 0.5739, "num_tokens": 227541305.0, "step": 2413 }, { "epoch": 0.412015702338283, "grad_norm": 0.5090444406615731, "learning_rate": 2.3526199010069977e-05, "loss": 0.7265, "num_tokens": 227659283.0, "step": 2414 }, { "epoch": 0.4121863799283154, "grad_norm": 0.521322156575517, "learning_rate": 2.351937190646868e-05, "loss": 0.6251, "num_tokens": 227745044.0, "step": 2415 }, { "epoch": 0.41235705751834784, "grad_norm": 0.5783647950196269, "learning_rate": 2.3512544802867385e-05, "loss": 0.5987, "num_tokens": 227824318.0, "step": 2416 }, { "epoch": 0.41252773510838026, "grad_norm": 0.5772601369451659, "learning_rate": 2.350571769926609e-05, "loss": 0.6937, "num_tokens": 227910868.0, "step": 2417 }, { "epoch": 0.4126984126984127, "grad_norm": 0.4516412607910109, "learning_rate": 2.3498890595664792e-05, "loss": 0.6127, "num_tokens": 228034685.0, "step": 2418 }, { "epoch": 0.41286909028844515, "grad_norm": 0.46061570607257624, "learning_rate": 2.3492063492063496e-05, "loss": 0.5247, "num_tokens": 228140044.0, "step": 2419 }, { "epoch": 0.41303976787847757, "grad_norm": 0.46889340598196716, "learning_rate": 2.3485236388462197e-05, "loss": 0.6185, "num_tokens": 228252518.0, "step": 2420 }, { "epoch": 0.41321044546851, "grad_norm": 0.5636269323798917, "learning_rate": 2.34784092848609e-05, "loss": 0.6032, "num_tokens": 228317673.0, "step": 2421 }, { "epoch": 0.4133811230585424, "grad_norm": 0.5056278638901767, "learning_rate": 2.3471582181259604e-05, "loss": 0.6943, "num_tokens": 228426921.0, "step": 2422 }, { "epoch": 0.41355180064857483, "grad_norm": 0.4848544027612261, "learning_rate": 2.3464755077658304e-05, "loss": 0.5607, "num_tokens": 228517763.0, "step": 2423 }, { "epoch": 0.41372247823860725, "grad_norm": 0.5394153485261212, "learning_rate": 2.3457927974057005e-05, "loss": 0.601, "num_tokens": 228602347.0, "step": 2424 }, { "epoch": 0.4138931558286397, "grad_norm": 0.4597146703871082, "learning_rate": 2.345110087045571e-05, "loss": 0.6142, "num_tokens": 228716552.0, "step": 2425 }, { "epoch": 0.41406383341867214, "grad_norm": 0.4778422274346972, "learning_rate": 2.3444273766854412e-05, "loss": 0.6795, "num_tokens": 228838043.0, "step": 2426 }, { "epoch": 0.41423451100870456, "grad_norm": 0.46777090321775516, "learning_rate": 2.3437446663253116e-05, "loss": 0.6089, "num_tokens": 228940764.0, "step": 2427 }, { "epoch": 0.414405188598737, "grad_norm": 0.468592919993177, "learning_rate": 2.343061955965182e-05, "loss": 0.5406, "num_tokens": 229041832.0, "step": 2428 }, { "epoch": 0.4145758661887694, "grad_norm": 0.5210872537256965, "learning_rate": 2.3423792456050524e-05, "loss": 0.6356, "num_tokens": 229127531.0, "step": 2429 }, { "epoch": 0.4147465437788018, "grad_norm": 0.48649362788437384, "learning_rate": 2.3416965352449227e-05, "loss": 0.5794, "num_tokens": 229228282.0, "step": 2430 }, { "epoch": 0.4149172213688343, "grad_norm": 0.5476611757647712, "learning_rate": 2.3410138248847928e-05, "loss": 0.6485, "num_tokens": 229307799.0, "step": 2431 }, { "epoch": 0.4150878989588667, "grad_norm": 0.5485819060755103, "learning_rate": 2.340331114524663e-05, "loss": 0.6721, "num_tokens": 229388122.0, "step": 2432 }, { "epoch": 0.41525857654889914, "grad_norm": 0.5316969641454928, "learning_rate": 2.3396484041645335e-05, "loss": 0.6486, "num_tokens": 229482232.0, "step": 2433 }, { "epoch": 0.41542925413893156, "grad_norm": 0.49366758642462444, "learning_rate": 2.338965693804404e-05, "loss": 0.6202, "num_tokens": 229587515.0, "step": 2434 }, { "epoch": 0.415599931728964, "grad_norm": 0.5129202706098873, "learning_rate": 2.3382829834442743e-05, "loss": 0.6907, "num_tokens": 229678038.0, "step": 2435 }, { "epoch": 0.4157706093189964, "grad_norm": 0.520720213228936, "learning_rate": 2.337600273084144e-05, "loss": 0.6808, "num_tokens": 229781869.0, "step": 2436 }, { "epoch": 0.4159412869090288, "grad_norm": 0.46760161887191715, "learning_rate": 2.3369175627240144e-05, "loss": 0.6047, "num_tokens": 229889667.0, "step": 2437 }, { "epoch": 0.4161119644990613, "grad_norm": 0.5956009578607092, "learning_rate": 2.3362348523638847e-05, "loss": 0.6337, "num_tokens": 229959265.0, "step": 2438 }, { "epoch": 0.4162826420890937, "grad_norm": 0.4718005198326276, "learning_rate": 2.335552142003755e-05, "loss": 0.6128, "num_tokens": 230067762.0, "step": 2439 }, { "epoch": 0.41645331967912613, "grad_norm": 0.48236929535202605, "learning_rate": 2.3348694316436255e-05, "loss": 0.5846, "num_tokens": 230161868.0, "step": 2440 }, { "epoch": 0.41662399726915855, "grad_norm": 0.5400069729479058, "learning_rate": 2.3341867212834955e-05, "loss": 0.6538, "num_tokens": 230249885.0, "step": 2441 }, { "epoch": 0.416794674859191, "grad_norm": 0.4549652815730992, "learning_rate": 2.333504010923366e-05, "loss": 0.5093, "num_tokens": 230348472.0, "step": 2442 }, { "epoch": 0.4169653524492234, "grad_norm": 0.4795533703251996, "learning_rate": 2.3328213005632363e-05, "loss": 0.6035, "num_tokens": 230457574.0, "step": 2443 }, { "epoch": 0.41713603003925587, "grad_norm": 0.526187457336726, "learning_rate": 2.3321385902031067e-05, "loss": 0.5956, "num_tokens": 230539249.0, "step": 2444 }, { "epoch": 0.4173067076292883, "grad_norm": 0.4658642902949089, "learning_rate": 2.331455879842977e-05, "loss": 0.64, "num_tokens": 230645202.0, "step": 2445 }, { "epoch": 0.4174773852193207, "grad_norm": 0.5381344051767857, "learning_rate": 2.3307731694828474e-05, "loss": 0.6082, "num_tokens": 230716906.0, "step": 2446 }, { "epoch": 0.4176480628093531, "grad_norm": 0.5041649765101441, "learning_rate": 2.3300904591227174e-05, "loss": 0.6082, "num_tokens": 230810481.0, "step": 2447 }, { "epoch": 0.41781874039938555, "grad_norm": 0.4800391425260171, "learning_rate": 2.3294077487625875e-05, "loss": 0.5557, "num_tokens": 230905938.0, "step": 2448 }, { "epoch": 0.41798941798941797, "grad_norm": 0.6810146112254023, "learning_rate": 2.328725038402458e-05, "loss": 0.6736, "num_tokens": 231004769.0, "step": 2449 }, { "epoch": 0.41816009557945044, "grad_norm": 0.5585616555317745, "learning_rate": 2.3280423280423282e-05, "loss": 0.6563, "num_tokens": 231083298.0, "step": 2450 }, { "epoch": 0.41833077316948286, "grad_norm": 0.4946678806979562, "learning_rate": 2.3273596176821983e-05, "loss": 0.6006, "num_tokens": 231178480.0, "step": 2451 }, { "epoch": 0.4185014507595153, "grad_norm": 0.5231648478404225, "learning_rate": 2.3266769073220686e-05, "loss": 0.6382, "num_tokens": 231274576.0, "step": 2452 }, { "epoch": 0.4186721283495477, "grad_norm": 0.5336800825751757, "learning_rate": 2.325994196961939e-05, "loss": 0.6712, "num_tokens": 231362586.0, "step": 2453 }, { "epoch": 0.4188428059395801, "grad_norm": 0.48778946265858764, "learning_rate": 2.3253114866018094e-05, "loss": 0.5932, "num_tokens": 231455955.0, "step": 2454 }, { "epoch": 0.41901348352961254, "grad_norm": 0.5206615824413576, "learning_rate": 2.3246287762416798e-05, "loss": 0.671, "num_tokens": 231552790.0, "step": 2455 }, { "epoch": 0.419184161119645, "grad_norm": 0.5420527326218986, "learning_rate": 2.32394606588155e-05, "loss": 0.5903, "num_tokens": 231637537.0, "step": 2456 }, { "epoch": 0.41935483870967744, "grad_norm": 0.4942111123907891, "learning_rate": 2.3232633555214202e-05, "loss": 0.5574, "num_tokens": 231723131.0, "step": 2457 }, { "epoch": 0.41952551629970986, "grad_norm": 0.49141636576124403, "learning_rate": 2.3225806451612906e-05, "loss": 0.602, "num_tokens": 231821859.0, "step": 2458 }, { "epoch": 0.4196961938897423, "grad_norm": 0.5694458384248355, "learning_rate": 2.321897934801161e-05, "loss": 0.6384, "num_tokens": 231889356.0, "step": 2459 }, { "epoch": 0.4198668714797747, "grad_norm": 0.48563768685635605, "learning_rate": 2.321215224441031e-05, "loss": 0.5495, "num_tokens": 231979313.0, "step": 2460 }, { "epoch": 0.4200375490698071, "grad_norm": 0.5888112077119407, "learning_rate": 2.3205325140809014e-05, "loss": 0.6057, "num_tokens": 232068070.0, "step": 2461 }, { "epoch": 0.4202082266598396, "grad_norm": 0.5287289873795853, "learning_rate": 2.3198498037207714e-05, "loss": 0.6132, "num_tokens": 232153413.0, "step": 2462 }, { "epoch": 0.420378904249872, "grad_norm": 0.5273263731937167, "learning_rate": 2.3191670933606418e-05, "loss": 0.5684, "num_tokens": 232225925.0, "step": 2463 }, { "epoch": 0.42054958183990443, "grad_norm": 0.48314870230664186, "learning_rate": 2.318484383000512e-05, "loss": 0.6199, "num_tokens": 232339882.0, "step": 2464 }, { "epoch": 0.42072025942993685, "grad_norm": 0.5051384802987633, "learning_rate": 2.3178016726403825e-05, "loss": 0.6596, "num_tokens": 232442942.0, "step": 2465 }, { "epoch": 0.42089093701996927, "grad_norm": 0.4960526251571781, "learning_rate": 2.317118962280253e-05, "loss": 0.6749, "num_tokens": 232549835.0, "step": 2466 }, { "epoch": 0.4210616146100017, "grad_norm": 0.48984079711799244, "learning_rate": 2.3164362519201233e-05, "loss": 0.5682, "num_tokens": 232640063.0, "step": 2467 }, { "epoch": 0.4212322922000341, "grad_norm": 0.45729403752942915, "learning_rate": 2.3157535415599933e-05, "loss": 0.6049, "num_tokens": 232750426.0, "step": 2468 }, { "epoch": 0.4214029697900666, "grad_norm": 0.5517104808036845, "learning_rate": 2.3150708311998637e-05, "loss": 0.6752, "num_tokens": 232834365.0, "step": 2469 }, { "epoch": 0.421573647380099, "grad_norm": 0.5189941321409404, "learning_rate": 2.314388120839734e-05, "loss": 0.6505, "num_tokens": 232930010.0, "step": 2470 }, { "epoch": 0.4217443249701314, "grad_norm": 0.4522457187025338, "learning_rate": 2.3137054104796044e-05, "loss": 0.5736, "num_tokens": 233050542.0, "step": 2471 }, { "epoch": 0.42191500256016384, "grad_norm": 0.4744985319533887, "learning_rate": 2.3130227001194748e-05, "loss": 0.5721, "num_tokens": 233151744.0, "step": 2472 }, { "epoch": 0.42208568015019626, "grad_norm": 0.5143808051188117, "learning_rate": 2.3123399897593445e-05, "loss": 0.5339, "num_tokens": 233223876.0, "step": 2473 }, { "epoch": 0.4222563577402287, "grad_norm": 0.45284402369904325, "learning_rate": 2.311657279399215e-05, "loss": 0.6714, "num_tokens": 233352950.0, "step": 2474 }, { "epoch": 0.42242703533026116, "grad_norm": 0.4620876462403817, "learning_rate": 2.3109745690390853e-05, "loss": 0.6313, "num_tokens": 233464419.0, "step": 2475 }, { "epoch": 0.4225977129202936, "grad_norm": 0.5709251833202815, "learning_rate": 2.3102918586789556e-05, "loss": 0.596, "num_tokens": 233525369.0, "step": 2476 }, { "epoch": 0.422768390510326, "grad_norm": 0.46181121378830037, "learning_rate": 2.309609148318826e-05, "loss": 0.5706, "num_tokens": 233620549.0, "step": 2477 }, { "epoch": 0.4229390681003584, "grad_norm": 0.5219546298010829, "learning_rate": 2.308926437958696e-05, "loss": 0.6404, "num_tokens": 233701882.0, "step": 2478 }, { "epoch": 0.42310974569039084, "grad_norm": 0.6040177866618024, "learning_rate": 2.3082437275985664e-05, "loss": 0.7101, "num_tokens": 233780016.0, "step": 2479 }, { "epoch": 0.42328042328042326, "grad_norm": 0.48536466536905054, "learning_rate": 2.3075610172384368e-05, "loss": 0.6166, "num_tokens": 233879642.0, "step": 2480 }, { "epoch": 0.42345110087045573, "grad_norm": 0.5340276693024203, "learning_rate": 2.3068783068783072e-05, "loss": 0.5618, "num_tokens": 233953978.0, "step": 2481 }, { "epoch": 0.42362177846048815, "grad_norm": 0.4848365626571597, "learning_rate": 2.3061955965181776e-05, "loss": 0.5971, "num_tokens": 234051855.0, "step": 2482 }, { "epoch": 0.42379245605052057, "grad_norm": 0.521589634841622, "learning_rate": 2.305512886158048e-05, "loss": 0.6128, "num_tokens": 234140974.0, "step": 2483 }, { "epoch": 0.423963133640553, "grad_norm": 0.48215216891259505, "learning_rate": 2.304830175797918e-05, "loss": 0.5427, "num_tokens": 234234149.0, "step": 2484 }, { "epoch": 0.4241338112305854, "grad_norm": 0.5165289585602113, "learning_rate": 2.304147465437788e-05, "loss": 0.6776, "num_tokens": 234348281.0, "step": 2485 }, { "epoch": 0.42430448882061783, "grad_norm": 0.45347305931865717, "learning_rate": 2.3034647550776584e-05, "loss": 0.6131, "num_tokens": 234473929.0, "step": 2486 }, { "epoch": 0.4244751664106503, "grad_norm": 0.4824675015184745, "learning_rate": 2.3027820447175288e-05, "loss": 0.5758, "num_tokens": 234566419.0, "step": 2487 }, { "epoch": 0.4246458440006827, "grad_norm": 0.4869940415932061, "learning_rate": 2.3020993343573988e-05, "loss": 0.5198, "num_tokens": 234647900.0, "step": 2488 }, { "epoch": 0.42481652159071515, "grad_norm": 0.49459886685091925, "learning_rate": 2.3014166239972692e-05, "loss": 0.6043, "num_tokens": 234735283.0, "step": 2489 }, { "epoch": 0.42498719918074757, "grad_norm": 0.5226338468829149, "learning_rate": 2.3007339136371396e-05, "loss": 0.5799, "num_tokens": 234811504.0, "step": 2490 }, { "epoch": 0.42515787677078, "grad_norm": 0.5070939356920735, "learning_rate": 2.30005120327701e-05, "loss": 0.6901, "num_tokens": 234903747.0, "step": 2491 }, { "epoch": 0.4253285543608124, "grad_norm": 0.48947200297657145, "learning_rate": 2.2993684929168803e-05, "loss": 0.6145, "num_tokens": 235011656.0, "step": 2492 }, { "epoch": 0.4254992319508449, "grad_norm": 0.5239127944219475, "learning_rate": 2.2986857825567507e-05, "loss": 0.5847, "num_tokens": 235088624.0, "step": 2493 }, { "epoch": 0.4256699095408773, "grad_norm": 0.4792615995571878, "learning_rate": 2.2980030721966207e-05, "loss": 0.6434, "num_tokens": 235194608.0, "step": 2494 }, { "epoch": 0.4258405871309097, "grad_norm": 0.49173237234621275, "learning_rate": 2.297320361836491e-05, "loss": 0.6473, "num_tokens": 235294274.0, "step": 2495 }, { "epoch": 0.42601126472094214, "grad_norm": 0.5279406622433027, "learning_rate": 2.2966376514763615e-05, "loss": 0.651, "num_tokens": 235378974.0, "step": 2496 }, { "epoch": 0.42618194231097456, "grad_norm": 0.4826879677900294, "learning_rate": 2.2959549411162315e-05, "loss": 0.6371, "num_tokens": 235484888.0, "step": 2497 }, { "epoch": 0.426352619901007, "grad_norm": 0.479038634180759, "learning_rate": 2.295272230756102e-05, "loss": 0.5998, "num_tokens": 235581422.0, "step": 2498 }, { "epoch": 0.4265232974910394, "grad_norm": 0.5582766412730716, "learning_rate": 2.294589520395972e-05, "loss": 0.5587, "num_tokens": 235654021.0, "step": 2499 }, { "epoch": 0.4266939750810719, "grad_norm": 0.5496135746353961, "learning_rate": 2.2939068100358423e-05, "loss": 0.6148, "num_tokens": 235722472.0, "step": 2500 }, { "epoch": 0.4268646526711043, "grad_norm": 0.4498748705837028, "learning_rate": 2.2932240996757127e-05, "loss": 0.5618, "num_tokens": 235818256.0, "step": 2501 }, { "epoch": 0.4270353302611367, "grad_norm": 0.48511428247190175, "learning_rate": 2.292541389315583e-05, "loss": 0.6027, "num_tokens": 235916641.0, "step": 2502 }, { "epoch": 0.42720600785116913, "grad_norm": 0.4709496942005982, "learning_rate": 2.2918586789554534e-05, "loss": 0.5514, "num_tokens": 236014457.0, "step": 2503 }, { "epoch": 0.42737668544120155, "grad_norm": 0.5766047185812241, "learning_rate": 2.2911759685953238e-05, "loss": 0.6906, "num_tokens": 236108106.0, "step": 2504 }, { "epoch": 0.427547363031234, "grad_norm": 0.46978157140525084, "learning_rate": 2.290493258235194e-05, "loss": 0.5879, "num_tokens": 236215864.0, "step": 2505 }, { "epoch": 0.42771804062126645, "grad_norm": 0.5228237232266124, "learning_rate": 2.2898105478750642e-05, "loss": 0.6844, "num_tokens": 236316254.0, "step": 2506 }, { "epoch": 0.42788871821129887, "grad_norm": 0.4870277931848846, "learning_rate": 2.2891278375149346e-05, "loss": 0.6723, "num_tokens": 236417712.0, "step": 2507 }, { "epoch": 0.4280593958013313, "grad_norm": 0.5331310700142968, "learning_rate": 2.288445127154805e-05, "loss": 0.5773, "num_tokens": 236490547.0, "step": 2508 }, { "epoch": 0.4282300733913637, "grad_norm": 0.4846161002170908, "learning_rate": 2.2877624167946754e-05, "loss": 0.5691, "num_tokens": 236577425.0, "step": 2509 }, { "epoch": 0.42840075098139613, "grad_norm": 0.4934705576897008, "learning_rate": 2.287079706434545e-05, "loss": 0.5546, "num_tokens": 236669099.0, "step": 2510 }, { "epoch": 0.42857142857142855, "grad_norm": 0.4730443927304277, "learning_rate": 2.2863969960744154e-05, "loss": 0.5586, "num_tokens": 236758688.0, "step": 2511 }, { "epoch": 0.428742106161461, "grad_norm": 0.523490264049964, "learning_rate": 2.2857142857142858e-05, "loss": 0.6435, "num_tokens": 236850280.0, "step": 2512 }, { "epoch": 0.42891278375149344, "grad_norm": 0.5629196344335939, "learning_rate": 2.2850315753541562e-05, "loss": 0.6088, "num_tokens": 236937253.0, "step": 2513 }, { "epoch": 0.42908346134152586, "grad_norm": 0.5224168737289571, "learning_rate": 2.2843488649940266e-05, "loss": 0.6908, "num_tokens": 237027715.0, "step": 2514 }, { "epoch": 0.4292541389315583, "grad_norm": 0.49040087288691053, "learning_rate": 2.2836661546338966e-05, "loss": 0.6032, "num_tokens": 237120576.0, "step": 2515 }, { "epoch": 0.4294248165215907, "grad_norm": 0.5202106349458903, "learning_rate": 2.282983444273767e-05, "loss": 0.5611, "num_tokens": 237196200.0, "step": 2516 }, { "epoch": 0.4295954941116231, "grad_norm": 0.49781323849789383, "learning_rate": 2.2823007339136373e-05, "loss": 0.572, "num_tokens": 237283968.0, "step": 2517 }, { "epoch": 0.4297661717016556, "grad_norm": 0.5112627399640833, "learning_rate": 2.2816180235535077e-05, "loss": 0.6533, "num_tokens": 237376024.0, "step": 2518 }, { "epoch": 0.429936849291688, "grad_norm": 0.5408163641323811, "learning_rate": 2.280935313193378e-05, "loss": 0.6143, "num_tokens": 237450672.0, "step": 2519 }, { "epoch": 0.43010752688172044, "grad_norm": 0.5196509086797625, "learning_rate": 2.2802526028332485e-05, "loss": 0.6255, "num_tokens": 237538598.0, "step": 2520 }, { "epoch": 0.43027820447175286, "grad_norm": 0.5341305528755987, "learning_rate": 2.2795698924731185e-05, "loss": 0.6018, "num_tokens": 237636195.0, "step": 2521 }, { "epoch": 0.4304488820617853, "grad_norm": 0.5460944241858391, "learning_rate": 2.2788871821129886e-05, "loss": 0.711, "num_tokens": 237726539.0, "step": 2522 }, { "epoch": 0.4306195596518177, "grad_norm": 0.5128657575994936, "learning_rate": 2.278204471752859e-05, "loss": 0.5686, "num_tokens": 237809062.0, "step": 2523 }, { "epoch": 0.43079023724185017, "grad_norm": 0.5186122599417522, "learning_rate": 2.2775217613927293e-05, "loss": 0.5845, "num_tokens": 237891952.0, "step": 2524 }, { "epoch": 0.4309609148318826, "grad_norm": 0.5029195570056039, "learning_rate": 2.2768390510325993e-05, "loss": 0.6409, "num_tokens": 238000342.0, "step": 2525 }, { "epoch": 0.431131592421915, "grad_norm": 0.4918946078050662, "learning_rate": 2.2761563406724697e-05, "loss": 0.6017, "num_tokens": 238093585.0, "step": 2526 }, { "epoch": 0.43130227001194743, "grad_norm": 0.48213839953670873, "learning_rate": 2.27547363031234e-05, "loss": 0.6904, "num_tokens": 238208459.0, "step": 2527 }, { "epoch": 0.43147294760197985, "grad_norm": 0.582103789167394, "learning_rate": 2.2747909199522105e-05, "loss": 0.6578, "num_tokens": 238277853.0, "step": 2528 }, { "epoch": 0.43164362519201227, "grad_norm": 0.5198258482005479, "learning_rate": 2.274108209592081e-05, "loss": 0.593, "num_tokens": 238355832.0, "step": 2529 }, { "epoch": 0.4318143027820447, "grad_norm": 0.5615283760894826, "learning_rate": 2.2734254992319512e-05, "loss": 0.6266, "num_tokens": 238442937.0, "step": 2530 }, { "epoch": 0.43198498037207717, "grad_norm": 0.5201473238667882, "learning_rate": 2.2727427888718213e-05, "loss": 0.6471, "num_tokens": 238532721.0, "step": 2531 }, { "epoch": 0.4321556579621096, "grad_norm": 0.44750227083814326, "learning_rate": 2.2720600785116916e-05, "loss": 0.5914, "num_tokens": 238649020.0, "step": 2532 }, { "epoch": 0.432326335552142, "grad_norm": 0.5471151883861114, "learning_rate": 2.271377368151562e-05, "loss": 0.5935, "num_tokens": 238730208.0, "step": 2533 }, { "epoch": 0.4324970131421744, "grad_norm": 0.496678497446435, "learning_rate": 2.2706946577914324e-05, "loss": 0.688, "num_tokens": 238842108.0, "step": 2534 }, { "epoch": 0.43266769073220684, "grad_norm": 0.7760185601836864, "learning_rate": 2.2700119474313024e-05, "loss": 0.629, "num_tokens": 238938784.0, "step": 2535 }, { "epoch": 0.43283836832223926, "grad_norm": 0.541915315942646, "learning_rate": 2.2693292370711725e-05, "loss": 0.707, "num_tokens": 239024439.0, "step": 2536 }, { "epoch": 0.43300904591227174, "grad_norm": 0.4916292661919091, "learning_rate": 2.268646526711043e-05, "loss": 0.5862, "num_tokens": 239135100.0, "step": 2537 }, { "epoch": 0.43317972350230416, "grad_norm": 0.5141653496301187, "learning_rate": 2.2679638163509132e-05, "loss": 0.548, "num_tokens": 239224207.0, "step": 2538 }, { "epoch": 0.4333504010923366, "grad_norm": 0.4820296772588601, "learning_rate": 2.2672811059907836e-05, "loss": 0.5072, "num_tokens": 239304370.0, "step": 2539 }, { "epoch": 0.433521078682369, "grad_norm": 0.5766977634631069, "learning_rate": 2.266598395630654e-05, "loss": 0.6845, "num_tokens": 239390188.0, "step": 2540 }, { "epoch": 0.4336917562724014, "grad_norm": 0.5284240515795154, "learning_rate": 2.2659156852705243e-05, "loss": 0.6475, "num_tokens": 239476011.0, "step": 2541 }, { "epoch": 0.43386243386243384, "grad_norm": 0.49447827270605055, "learning_rate": 2.2652329749103944e-05, "loss": 0.6855, "num_tokens": 239608156.0, "step": 2542 }, { "epoch": 0.4340331114524663, "grad_norm": 0.4589708700826861, "learning_rate": 2.2645502645502648e-05, "loss": 0.6203, "num_tokens": 239727351.0, "step": 2543 }, { "epoch": 0.43420378904249873, "grad_norm": 0.52053326723325, "learning_rate": 2.263867554190135e-05, "loss": 0.6016, "num_tokens": 239813462.0, "step": 2544 }, { "epoch": 0.43437446663253115, "grad_norm": 0.5113305967564903, "learning_rate": 2.2631848438300055e-05, "loss": 0.6591, "num_tokens": 239903036.0, "step": 2545 }, { "epoch": 0.4345451442225636, "grad_norm": 0.5251654475285746, "learning_rate": 2.262502133469876e-05, "loss": 0.6186, "num_tokens": 239992282.0, "step": 2546 }, { "epoch": 0.434715821812596, "grad_norm": 0.4902941961995629, "learning_rate": 2.2618194231097456e-05, "loss": 0.6007, "num_tokens": 240098339.0, "step": 2547 }, { "epoch": 0.4348864994026284, "grad_norm": 0.5442764419217503, "learning_rate": 2.261136712749616e-05, "loss": 0.5821, "num_tokens": 240176305.0, "step": 2548 }, { "epoch": 0.4350571769926609, "grad_norm": 0.4762878067200765, "learning_rate": 2.2604540023894863e-05, "loss": 0.6384, "num_tokens": 240302197.0, "step": 2549 }, { "epoch": 0.4352278545826933, "grad_norm": 0.7472243901535014, "learning_rate": 2.2597712920293567e-05, "loss": 0.5898, "num_tokens": 240391107.0, "step": 2550 }, { "epoch": 0.4353985321727257, "grad_norm": 0.5241849598962637, "learning_rate": 2.259088581669227e-05, "loss": 0.6005, "num_tokens": 240468115.0, "step": 2551 }, { "epoch": 0.43556920976275815, "grad_norm": 0.48292660462830933, "learning_rate": 2.258405871309097e-05, "loss": 0.5806, "num_tokens": 240565732.0, "step": 2552 }, { "epoch": 0.43573988735279057, "grad_norm": 0.516881008260058, "learning_rate": 2.2577231609489675e-05, "loss": 0.5778, "num_tokens": 240648147.0, "step": 2553 }, { "epoch": 0.435910564942823, "grad_norm": 0.4895792453705771, "learning_rate": 2.257040450588838e-05, "loss": 0.5437, "num_tokens": 240728460.0, "step": 2554 }, { "epoch": 0.43608124253285546, "grad_norm": 0.4758815736941176, "learning_rate": 2.2563577402287083e-05, "loss": 0.5684, "num_tokens": 240824100.0, "step": 2555 }, { "epoch": 0.4362519201228879, "grad_norm": 0.46943526819896236, "learning_rate": 2.2556750298685786e-05, "loss": 0.5979, "num_tokens": 240929737.0, "step": 2556 }, { "epoch": 0.4364225977129203, "grad_norm": 0.4946878756345388, "learning_rate": 2.254992319508449e-05, "loss": 0.6106, "num_tokens": 241021393.0, "step": 2557 }, { "epoch": 0.4365932753029527, "grad_norm": 0.4537656216802752, "learning_rate": 2.254309609148319e-05, "loss": 0.5839, "num_tokens": 241128045.0, "step": 2558 }, { "epoch": 0.43676395289298514, "grad_norm": 0.4387663055529923, "learning_rate": 2.253626898788189e-05, "loss": 0.5455, "num_tokens": 241243825.0, "step": 2559 }, { "epoch": 0.43693463048301756, "grad_norm": 0.5113537246380078, "learning_rate": 2.2529441884280595e-05, "loss": 0.5152, "num_tokens": 241327034.0, "step": 2560 }, { "epoch": 0.43710530807305004, "grad_norm": 0.4494886032275182, "learning_rate": 2.25226147806793e-05, "loss": 0.6752, "num_tokens": 241462151.0, "step": 2561 }, { "epoch": 0.43727598566308246, "grad_norm": 0.46342746330674767, "learning_rate": 2.2515787677078e-05, "loss": 0.5888, "num_tokens": 241577934.0, "step": 2562 }, { "epoch": 0.4374466632531149, "grad_norm": 0.5397426112038425, "learning_rate": 2.2508960573476703e-05, "loss": 0.6329, "num_tokens": 241657174.0, "step": 2563 }, { "epoch": 0.4376173408431473, "grad_norm": 0.49675335161778333, "learning_rate": 2.2502133469875406e-05, "loss": 0.6338, "num_tokens": 241754481.0, "step": 2564 }, { "epoch": 0.4377880184331797, "grad_norm": 0.46642326950866575, "learning_rate": 2.249530636627411e-05, "loss": 0.5577, "num_tokens": 241882118.0, "step": 2565 }, { "epoch": 0.43795869602321214, "grad_norm": 0.4646964692773092, "learning_rate": 2.2488479262672814e-05, "loss": 0.5837, "num_tokens": 241990885.0, "step": 2566 }, { "epoch": 0.43812937361324455, "grad_norm": 0.5281412133226361, "learning_rate": 2.2481652159071518e-05, "loss": 0.6268, "num_tokens": 242079258.0, "step": 2567 }, { "epoch": 0.43830005120327703, "grad_norm": 0.5111772076144409, "learning_rate": 2.247482505547022e-05, "loss": 0.6914, "num_tokens": 242177265.0, "step": 2568 }, { "epoch": 0.43847072879330945, "grad_norm": 0.4912385753566334, "learning_rate": 2.2467997951868922e-05, "loss": 0.6028, "num_tokens": 242278310.0, "step": 2569 }, { "epoch": 0.43864140638334187, "grad_norm": 0.4317212900158851, "learning_rate": 2.2461170848267625e-05, "loss": 0.5222, "num_tokens": 242389825.0, "step": 2570 }, { "epoch": 0.4388120839733743, "grad_norm": 0.5243356039464475, "learning_rate": 2.245434374466633e-05, "loss": 0.6291, "num_tokens": 242482664.0, "step": 2571 }, { "epoch": 0.4389827615634067, "grad_norm": 0.552420354029794, "learning_rate": 2.244751664106503e-05, "loss": 0.6183, "num_tokens": 242551180.0, "step": 2572 }, { "epoch": 0.43915343915343913, "grad_norm": 0.6364496903674042, "learning_rate": 2.244068953746373e-05, "loss": 0.7041, "num_tokens": 242629729.0, "step": 2573 }, { "epoch": 0.4393241167434716, "grad_norm": 0.45926330539581645, "learning_rate": 2.2433862433862434e-05, "loss": 0.6444, "num_tokens": 242745446.0, "step": 2574 }, { "epoch": 0.439494794333504, "grad_norm": 0.4732218506688462, "learning_rate": 2.2427035330261138e-05, "loss": 0.7685, "num_tokens": 242890885.0, "step": 2575 }, { "epoch": 0.43966547192353644, "grad_norm": 0.5692676424228504, "learning_rate": 2.242020822665984e-05, "loss": 0.6476, "num_tokens": 242963528.0, "step": 2576 }, { "epoch": 0.43983614951356886, "grad_norm": 0.5206638962334924, "learning_rate": 2.2413381123058545e-05, "loss": 0.5993, "num_tokens": 243046368.0, "step": 2577 }, { "epoch": 0.4400068271036013, "grad_norm": 0.552263682973172, "learning_rate": 2.240655401945725e-05, "loss": 0.6275, "num_tokens": 243125148.0, "step": 2578 }, { "epoch": 0.4401775046936337, "grad_norm": 0.45422858037733566, "learning_rate": 2.239972691585595e-05, "loss": 0.6028, "num_tokens": 243232313.0, "step": 2579 }, { "epoch": 0.4403481822836662, "grad_norm": 0.5349189504649133, "learning_rate": 2.2392899812254653e-05, "loss": 0.6405, "num_tokens": 243319617.0, "step": 2580 }, { "epoch": 0.4405188598736986, "grad_norm": 0.5266341657557501, "learning_rate": 2.2386072708653357e-05, "loss": 0.61, "num_tokens": 243398057.0, "step": 2581 }, { "epoch": 0.440689537463731, "grad_norm": 0.44233297941610783, "learning_rate": 2.237924560505206e-05, "loss": 0.5949, "num_tokens": 243509749.0, "step": 2582 }, { "epoch": 0.44086021505376344, "grad_norm": 0.4968970326965827, "learning_rate": 2.2372418501450764e-05, "loss": 0.5884, "num_tokens": 243594011.0, "step": 2583 }, { "epoch": 0.44103089264379586, "grad_norm": 0.48844097107824186, "learning_rate": 2.236559139784946e-05, "loss": 0.6413, "num_tokens": 243695029.0, "step": 2584 }, { "epoch": 0.4412015702338283, "grad_norm": 0.4888926129475806, "learning_rate": 2.2358764294248165e-05, "loss": 0.5904, "num_tokens": 243799129.0, "step": 2585 }, { "epoch": 0.44137224782386075, "grad_norm": 0.52000822457509, "learning_rate": 2.235193719064687e-05, "loss": 0.5118, "num_tokens": 243867683.0, "step": 2586 }, { "epoch": 0.4415429254138932, "grad_norm": 0.4528842311944032, "learning_rate": 2.2345110087045572e-05, "loss": 0.5863, "num_tokens": 243973334.0, "step": 2587 }, { "epoch": 0.4417136030039256, "grad_norm": 0.49125821302736017, "learning_rate": 2.2338282983444276e-05, "loss": 0.597, "num_tokens": 244065410.0, "step": 2588 }, { "epoch": 0.441884280593958, "grad_norm": 0.6186733275304095, "learning_rate": 2.2331455879842977e-05, "loss": 0.5989, "num_tokens": 244131195.0, "step": 2589 }, { "epoch": 0.44205495818399043, "grad_norm": 0.5729987207979114, "learning_rate": 2.232462877624168e-05, "loss": 0.5443, "num_tokens": 244196766.0, "step": 2590 }, { "epoch": 0.44222563577402285, "grad_norm": 0.5142090022939979, "learning_rate": 2.2317801672640384e-05, "loss": 0.6175, "num_tokens": 244276240.0, "step": 2591 }, { "epoch": 0.4423963133640553, "grad_norm": 0.4987865264073442, "learning_rate": 2.2310974569039088e-05, "loss": 0.5613, "num_tokens": 244355625.0, "step": 2592 }, { "epoch": 0.44256699095408775, "grad_norm": 0.5009183737494584, "learning_rate": 2.230414746543779e-05, "loss": 0.6436, "num_tokens": 244452544.0, "step": 2593 }, { "epoch": 0.44273766854412017, "grad_norm": 0.530079104388083, "learning_rate": 2.2297320361836495e-05, "loss": 0.6562, "num_tokens": 244545761.0, "step": 2594 }, { "epoch": 0.4429083461341526, "grad_norm": 0.44332239451849786, "learning_rate": 2.2290493258235196e-05, "loss": 0.5028, "num_tokens": 244653411.0, "step": 2595 }, { "epoch": 0.443079023724185, "grad_norm": 0.5366336966522953, "learning_rate": 2.22836661546339e-05, "loss": 0.5255, "num_tokens": 244724951.0, "step": 2596 }, { "epoch": 0.4432497013142174, "grad_norm": 0.5166689418663938, "learning_rate": 2.22768390510326e-05, "loss": 0.5879, "num_tokens": 244808471.0, "step": 2597 }, { "epoch": 0.44342037890424985, "grad_norm": 0.5717143998873989, "learning_rate": 2.2270011947431304e-05, "loss": 0.6585, "num_tokens": 244882043.0, "step": 2598 }, { "epoch": 0.4435910564942823, "grad_norm": 0.5366321214555893, "learning_rate": 2.2263184843830004e-05, "loss": 0.6412, "num_tokens": 244967121.0, "step": 2599 }, { "epoch": 0.44376173408431474, "grad_norm": 0.48118848368492245, "learning_rate": 2.2256357740228708e-05, "loss": 0.6207, "num_tokens": 245064507.0, "step": 2600 }, { "epoch": 0.44393241167434716, "grad_norm": 0.4489875101767695, "learning_rate": 2.224953063662741e-05, "loss": 0.6079, "num_tokens": 245182602.0, "step": 2601 }, { "epoch": 0.4441030892643796, "grad_norm": 0.5047008683575259, "learning_rate": 2.2242703533026115e-05, "loss": 0.5571, "num_tokens": 245264495.0, "step": 2602 }, { "epoch": 0.444273766854412, "grad_norm": 0.5761905172076729, "learning_rate": 2.223587642942482e-05, "loss": 0.61, "num_tokens": 245340290.0, "step": 2603 }, { "epoch": 0.4444444444444444, "grad_norm": 0.4473611792552422, "learning_rate": 2.2229049325823523e-05, "loss": 0.5845, "num_tokens": 245455185.0, "step": 2604 }, { "epoch": 0.4446151220344769, "grad_norm": 0.43914434042460493, "learning_rate": 2.2222222222222227e-05, "loss": 0.5744, "num_tokens": 245564029.0, "step": 2605 }, { "epoch": 0.4447857996245093, "grad_norm": 0.4870096486350764, "learning_rate": 2.2215395118620927e-05, "loss": 0.6041, "num_tokens": 245662664.0, "step": 2606 }, { "epoch": 0.44495647721454173, "grad_norm": 0.5109084552308337, "learning_rate": 2.220856801501963e-05, "loss": 0.6697, "num_tokens": 245763917.0, "step": 2607 }, { "epoch": 0.44512715480457415, "grad_norm": 0.5426651367902541, "learning_rate": 2.2201740911418335e-05, "loss": 0.5781, "num_tokens": 245844796.0, "step": 2608 }, { "epoch": 0.4452978323946066, "grad_norm": 0.546278051504539, "learning_rate": 2.2194913807817035e-05, "loss": 0.6757, "num_tokens": 245931569.0, "step": 2609 }, { "epoch": 0.445468509984639, "grad_norm": 0.5620117455950127, "learning_rate": 2.2188086704215735e-05, "loss": 0.5552, "num_tokens": 245999461.0, "step": 2610 }, { "epoch": 0.44563918757467147, "grad_norm": 0.5028485883604119, "learning_rate": 2.218125960061444e-05, "loss": 0.6174, "num_tokens": 246094720.0, "step": 2611 }, { "epoch": 0.4458098651647039, "grad_norm": 0.5023630423979022, "learning_rate": 2.2174432497013143e-05, "loss": 0.6232, "num_tokens": 246184114.0, "step": 2612 }, { "epoch": 0.4459805427547363, "grad_norm": 0.5359197112078263, "learning_rate": 2.2167605393411847e-05, "loss": 0.6819, "num_tokens": 246278010.0, "step": 2613 }, { "epoch": 0.44615122034476873, "grad_norm": 0.46660335018241733, "learning_rate": 2.216077828981055e-05, "loss": 0.5823, "num_tokens": 246378332.0, "step": 2614 }, { "epoch": 0.44632189793480115, "grad_norm": 0.4502390614138913, "learning_rate": 2.2153951186209254e-05, "loss": 0.5737, "num_tokens": 246491698.0, "step": 2615 }, { "epoch": 0.44649257552483357, "grad_norm": 0.45962104716658414, "learning_rate": 2.2147124082607955e-05, "loss": 0.5303, "num_tokens": 246592932.0, "step": 2616 }, { "epoch": 0.44666325311486604, "grad_norm": 0.49897038605380406, "learning_rate": 2.2140296979006658e-05, "loss": 0.5645, "num_tokens": 246689124.0, "step": 2617 }, { "epoch": 0.44683393070489846, "grad_norm": 0.5572183525256971, "learning_rate": 2.2133469875405362e-05, "loss": 0.6093, "num_tokens": 246775559.0, "step": 2618 }, { "epoch": 0.4470046082949309, "grad_norm": 0.5320918682040001, "learning_rate": 2.2126642771804066e-05, "loss": 0.6907, "num_tokens": 246875798.0, "step": 2619 }, { "epoch": 0.4471752858849633, "grad_norm": 0.49902347269483965, "learning_rate": 2.211981566820277e-05, "loss": 0.5803, "num_tokens": 246967965.0, "step": 2620 }, { "epoch": 0.4473459634749957, "grad_norm": 0.4697614620946911, "learning_rate": 2.2112988564601467e-05, "loss": 0.5624, "num_tokens": 247065684.0, "step": 2621 }, { "epoch": 0.44751664106502814, "grad_norm": 0.5098309958901, "learning_rate": 2.210616146100017e-05, "loss": 0.5588, "num_tokens": 247143650.0, "step": 2622 }, { "epoch": 0.4476873186550606, "grad_norm": 0.45301152421528595, "learning_rate": 2.2099334357398874e-05, "loss": 0.5317, "num_tokens": 247241394.0, "step": 2623 }, { "epoch": 0.44785799624509304, "grad_norm": 0.5791304347845814, "learning_rate": 2.2092507253797578e-05, "loss": 0.7056, "num_tokens": 247317827.0, "step": 2624 }, { "epoch": 0.44802867383512546, "grad_norm": 0.510602467356276, "learning_rate": 2.208568015019628e-05, "loss": 0.621, "num_tokens": 247398968.0, "step": 2625 }, { "epoch": 0.4481993514251579, "grad_norm": 0.5520524240197466, "learning_rate": 2.2078853046594982e-05, "loss": 0.6341, "num_tokens": 247467696.0, "step": 2626 }, { "epoch": 0.4483700290151903, "grad_norm": 0.440156804912293, "learning_rate": 2.2072025942993686e-05, "loss": 0.6334, "num_tokens": 247589560.0, "step": 2627 }, { "epoch": 0.4485407066052227, "grad_norm": 0.4814301688892049, "learning_rate": 2.206519883939239e-05, "loss": 0.6645, "num_tokens": 247700874.0, "step": 2628 }, { "epoch": 0.44871138419525514, "grad_norm": 0.5120669456164516, "learning_rate": 2.2058371735791093e-05, "loss": 0.577, "num_tokens": 247789275.0, "step": 2629 }, { "epoch": 0.4488820617852876, "grad_norm": 0.4851087413702751, "learning_rate": 2.2051544632189797e-05, "loss": 0.5764, "num_tokens": 247873523.0, "step": 2630 }, { "epoch": 0.44905273937532003, "grad_norm": 0.4813969553208375, "learning_rate": 2.20447175285885e-05, "loss": 0.6201, "num_tokens": 247981152.0, "step": 2631 }, { "epoch": 0.44922341696535245, "grad_norm": 0.4671701413468477, "learning_rate": 2.20378904249872e-05, "loss": 0.5984, "num_tokens": 248091723.0, "step": 2632 }, { "epoch": 0.44939409455538487, "grad_norm": 0.479262607556036, "learning_rate": 2.2031063321385905e-05, "loss": 0.5625, "num_tokens": 248185121.0, "step": 2633 }, { "epoch": 0.4495647721454173, "grad_norm": 0.5029360008865684, "learning_rate": 2.2024236217784605e-05, "loss": 0.5833, "num_tokens": 248270213.0, "step": 2634 }, { "epoch": 0.4497354497354497, "grad_norm": 0.47440513991610844, "learning_rate": 2.201740911418331e-05, "loss": 0.5571, "num_tokens": 248364828.0, "step": 2635 }, { "epoch": 0.4499061273254822, "grad_norm": 0.45780678804342867, "learning_rate": 2.2010582010582013e-05, "loss": 0.5074, "num_tokens": 248462625.0, "step": 2636 }, { "epoch": 0.4500768049155146, "grad_norm": 0.43583090453738677, "learning_rate": 2.2003754906980713e-05, "loss": 0.5985, "num_tokens": 248587365.0, "step": 2637 }, { "epoch": 0.450247482505547, "grad_norm": 0.4751326898654281, "learning_rate": 2.1996927803379417e-05, "loss": 0.549, "num_tokens": 248675408.0, "step": 2638 }, { "epoch": 0.45041816009557945, "grad_norm": 0.4897931279866619, "learning_rate": 2.199010069977812e-05, "loss": 0.5767, "num_tokens": 248772781.0, "step": 2639 }, { "epoch": 0.45058883768561186, "grad_norm": 0.464968438204167, "learning_rate": 2.1983273596176824e-05, "loss": 0.5985, "num_tokens": 248875389.0, "step": 2640 }, { "epoch": 0.4507595152756443, "grad_norm": 0.47934141604342356, "learning_rate": 2.1976446492575528e-05, "loss": 0.6473, "num_tokens": 248978847.0, "step": 2641 }, { "epoch": 0.45093019286567676, "grad_norm": 0.46202929448705005, "learning_rate": 2.1969619388974232e-05, "loss": 0.5557, "num_tokens": 249077387.0, "step": 2642 }, { "epoch": 0.4511008704557092, "grad_norm": 0.4669031593904029, "learning_rate": 2.1962792285372932e-05, "loss": 0.6377, "num_tokens": 249186186.0, "step": 2643 }, { "epoch": 0.4512715480457416, "grad_norm": 0.4818446992141536, "learning_rate": 2.1955965181771636e-05, "loss": 0.6142, "num_tokens": 249276322.0, "step": 2644 }, { "epoch": 0.451442225635774, "grad_norm": 0.5206873682325155, "learning_rate": 2.194913807817034e-05, "loss": 0.5931, "num_tokens": 249356032.0, "step": 2645 }, { "epoch": 0.45161290322580644, "grad_norm": 0.4904311948887937, "learning_rate": 2.194231097456904e-05, "loss": 0.6456, "num_tokens": 249447783.0, "step": 2646 }, { "epoch": 0.45178358081583886, "grad_norm": 0.5051070625167915, "learning_rate": 2.193548387096774e-05, "loss": 0.653, "num_tokens": 249544918.0, "step": 2647 }, { "epoch": 0.45195425840587133, "grad_norm": 0.48394695161020934, "learning_rate": 2.1928656767366444e-05, "loss": 0.6731, "num_tokens": 249666134.0, "step": 2648 }, { "epoch": 0.45212493599590375, "grad_norm": 0.6236400763954874, "learning_rate": 2.1921829663765148e-05, "loss": 0.6376, "num_tokens": 249730722.0, "step": 2649 }, { "epoch": 0.4522956135859362, "grad_norm": 0.5296528795195605, "learning_rate": 2.1915002560163852e-05, "loss": 0.6439, "num_tokens": 249824393.0, "step": 2650 }, { "epoch": 0.4524662911759686, "grad_norm": 0.5402670088569885, "learning_rate": 2.1908175456562556e-05, "loss": 0.6271, "num_tokens": 249899777.0, "step": 2651 }, { "epoch": 0.452636968766001, "grad_norm": 0.5214666083029563, "learning_rate": 2.190134835296126e-05, "loss": 0.6329, "num_tokens": 249983658.0, "step": 2652 }, { "epoch": 0.45280764635603343, "grad_norm": 0.5155060497767013, "learning_rate": 2.189452124935996e-05, "loss": 0.5468, "num_tokens": 250056972.0, "step": 2653 }, { "epoch": 0.4529783239460659, "grad_norm": 0.5123576252338988, "learning_rate": 2.1887694145758664e-05, "loss": 0.6084, "num_tokens": 250146056.0, "step": 2654 }, { "epoch": 0.45314900153609833, "grad_norm": 0.4573813913309695, "learning_rate": 2.1880867042157367e-05, "loss": 0.5401, "num_tokens": 250239996.0, "step": 2655 }, { "epoch": 0.45331967912613075, "grad_norm": 0.4900878244985309, "learning_rate": 2.187403993855607e-05, "loss": 0.5688, "num_tokens": 250323790.0, "step": 2656 }, { "epoch": 0.45349035671616317, "grad_norm": 0.4149883874484175, "learning_rate": 2.1867212834954775e-05, "loss": 0.5082, "num_tokens": 250441476.0, "step": 2657 }, { "epoch": 0.4536610343061956, "grad_norm": 0.47656844710860563, "learning_rate": 2.1860385731353472e-05, "loss": 0.6266, "num_tokens": 250544290.0, "step": 2658 }, { "epoch": 0.453831711896228, "grad_norm": 0.4845560280103682, "learning_rate": 2.1853558627752176e-05, "loss": 0.605, "num_tokens": 250635137.0, "step": 2659 }, { "epoch": 0.4540023894862604, "grad_norm": 0.5501062747650491, "learning_rate": 2.184673152415088e-05, "loss": 0.6716, "num_tokens": 250723603.0, "step": 2660 }, { "epoch": 0.4541730670762929, "grad_norm": 0.5060656645449053, "learning_rate": 2.1839904420549583e-05, "loss": 0.6075, "num_tokens": 250807032.0, "step": 2661 }, { "epoch": 0.4543437446663253, "grad_norm": 0.5490914490169315, "learning_rate": 2.1833077316948287e-05, "loss": 0.6039, "num_tokens": 250881349.0, "step": 2662 }, { "epoch": 0.45451442225635774, "grad_norm": 0.5518084638535478, "learning_rate": 2.1826250213346987e-05, "loss": 0.6173, "num_tokens": 250954601.0, "step": 2663 }, { "epoch": 0.45468509984639016, "grad_norm": 0.5188757259891337, "learning_rate": 2.181942310974569e-05, "loss": 0.5742, "num_tokens": 251037888.0, "step": 2664 }, { "epoch": 0.4548557774364226, "grad_norm": 0.42891735579509616, "learning_rate": 2.1812596006144395e-05, "loss": 0.6409, "num_tokens": 251189573.0, "step": 2665 }, { "epoch": 0.455026455026455, "grad_norm": 0.4983411705245901, "learning_rate": 2.18057689025431e-05, "loss": 0.687, "num_tokens": 251287495.0, "step": 2666 }, { "epoch": 0.4551971326164875, "grad_norm": 0.4277310209837677, "learning_rate": 2.1798941798941802e-05, "loss": 0.5603, "num_tokens": 251399551.0, "step": 2667 }, { "epoch": 0.4553678102065199, "grad_norm": 0.5379763310298462, "learning_rate": 2.1792114695340506e-05, "loss": 0.6491, "num_tokens": 251495460.0, "step": 2668 }, { "epoch": 0.4555384877965523, "grad_norm": 0.5793320463852714, "learning_rate": 2.1785287591739207e-05, "loss": 0.6586, "num_tokens": 251569850.0, "step": 2669 }, { "epoch": 0.45570916538658474, "grad_norm": 0.5138862774460591, "learning_rate": 2.177846048813791e-05, "loss": 0.4993, "num_tokens": 251635219.0, "step": 2670 }, { "epoch": 0.45587984297661716, "grad_norm": 0.46451610028617224, "learning_rate": 2.177163338453661e-05, "loss": 0.6478, "num_tokens": 251734664.0, "step": 2671 }, { "epoch": 0.4560505205666496, "grad_norm": 0.46347905200253525, "learning_rate": 2.1764806280935314e-05, "loss": 0.5132, "num_tokens": 251825135.0, "step": 2672 }, { "epoch": 0.45622119815668205, "grad_norm": 0.5343093870676354, "learning_rate": 2.1757979177334018e-05, "loss": 0.6766, "num_tokens": 251913129.0, "step": 2673 }, { "epoch": 0.45639187574671447, "grad_norm": 0.4807991156526324, "learning_rate": 2.175115207373272e-05, "loss": 0.6574, "num_tokens": 252040267.0, "step": 2674 }, { "epoch": 0.4565625533367469, "grad_norm": 0.449970939357211, "learning_rate": 2.1744324970131422e-05, "loss": 0.6522, "num_tokens": 252162680.0, "step": 2675 }, { "epoch": 0.4567332309267793, "grad_norm": 0.4899504671693258, "learning_rate": 2.1737497866530126e-05, "loss": 0.539, "num_tokens": 252242028.0, "step": 2676 }, { "epoch": 0.45690390851681173, "grad_norm": 0.534506894966146, "learning_rate": 2.173067076292883e-05, "loss": 0.6374, "num_tokens": 252320352.0, "step": 2677 }, { "epoch": 0.45707458610684415, "grad_norm": 0.5095412357643972, "learning_rate": 2.1723843659327534e-05, "loss": 0.5281, "num_tokens": 252399500.0, "step": 2678 }, { "epoch": 0.4572452636968766, "grad_norm": 0.49495185182561446, "learning_rate": 2.1717016555726237e-05, "loss": 0.6448, "num_tokens": 252503539.0, "step": 2679 }, { "epoch": 0.45741594128690904, "grad_norm": 0.5401902817117739, "learning_rate": 2.1710189452124938e-05, "loss": 0.6448, "num_tokens": 252601035.0, "step": 2680 }, { "epoch": 0.45758661887694146, "grad_norm": 0.5126373176164805, "learning_rate": 2.170336234852364e-05, "loss": 0.5221, "num_tokens": 252675476.0, "step": 2681 }, { "epoch": 0.4577572964669739, "grad_norm": 0.45887015454751556, "learning_rate": 2.1696535244922345e-05, "loss": 0.6513, "num_tokens": 252793682.0, "step": 2682 }, { "epoch": 0.4579279740570063, "grad_norm": 0.4851983085177145, "learning_rate": 2.1689708141321046e-05, "loss": 0.5626, "num_tokens": 252883635.0, "step": 2683 }, { "epoch": 0.4580986516470387, "grad_norm": 0.4475961757021382, "learning_rate": 2.1682881037719746e-05, "loss": 0.584, "num_tokens": 252988266.0, "step": 2684 }, { "epoch": 0.4582693292370712, "grad_norm": 0.533397037662657, "learning_rate": 2.167605393411845e-05, "loss": 0.5458, "num_tokens": 253060434.0, "step": 2685 }, { "epoch": 0.4584400068271036, "grad_norm": 0.468909483484282, "learning_rate": 2.1669226830517154e-05, "loss": 0.567, "num_tokens": 253163661.0, "step": 2686 }, { "epoch": 0.45861068441713604, "grad_norm": 0.49013215528410803, "learning_rate": 2.1662399726915857e-05, "loss": 0.5447, "num_tokens": 253252443.0, "step": 2687 }, { "epoch": 0.45878136200716846, "grad_norm": 0.5101037997252743, "learning_rate": 2.165557262331456e-05, "loss": 0.6503, "num_tokens": 253343889.0, "step": 2688 }, { "epoch": 0.4589520395972009, "grad_norm": 0.5416609106886574, "learning_rate": 2.1648745519713265e-05, "loss": 0.5865, "num_tokens": 253410157.0, "step": 2689 }, { "epoch": 0.4591227171872333, "grad_norm": 0.4763870751627381, "learning_rate": 2.1641918416111965e-05, "loss": 0.6105, "num_tokens": 253510128.0, "step": 2690 }, { "epoch": 0.4592933947772657, "grad_norm": 0.5192453133651226, "learning_rate": 2.163509131251067e-05, "loss": 0.586, "num_tokens": 253613755.0, "step": 2691 }, { "epoch": 0.4594640723672982, "grad_norm": 0.41801178293284635, "learning_rate": 2.1628264208909373e-05, "loss": 0.5344, "num_tokens": 253729015.0, "step": 2692 }, { "epoch": 0.4596347499573306, "grad_norm": 0.5175024187026647, "learning_rate": 2.1621437105308076e-05, "loss": 0.6966, "num_tokens": 253835320.0, "step": 2693 }, { "epoch": 0.45980542754736303, "grad_norm": 0.46692279736761694, "learning_rate": 2.161461000170678e-05, "loss": 0.6089, "num_tokens": 253946172.0, "step": 2694 }, { "epoch": 0.45997610513739545, "grad_norm": 0.4556377595573705, "learning_rate": 2.1607782898105484e-05, "loss": 0.6357, "num_tokens": 254069081.0, "step": 2695 }, { "epoch": 0.46014678272742787, "grad_norm": 0.5096058063937354, "learning_rate": 2.160095579450418e-05, "loss": 0.5666, "num_tokens": 254143707.0, "step": 2696 }, { "epoch": 0.4603174603174603, "grad_norm": 0.4697062558434564, "learning_rate": 2.1594128690902885e-05, "loss": 0.6854, "num_tokens": 254257172.0, "step": 2697 }, { "epoch": 0.46048813790749277, "grad_norm": 0.43452372290769165, "learning_rate": 2.158730158730159e-05, "loss": 0.4872, "num_tokens": 254359008.0, "step": 2698 }, { "epoch": 0.4606588154975252, "grad_norm": 0.47506255175268614, "learning_rate": 2.1580474483700292e-05, "loss": 0.5584, "num_tokens": 254457732.0, "step": 2699 }, { "epoch": 0.4608294930875576, "grad_norm": 0.4825256722489792, "learning_rate": 2.1573647380098993e-05, "loss": 0.5267, "num_tokens": 254548013.0, "step": 2700 }, { "epoch": 0.46100017067759, "grad_norm": 0.4475224760665684, "learning_rate": 2.1566820276497696e-05, "loss": 0.5225, "num_tokens": 254643516.0, "step": 2701 }, { "epoch": 0.46117084826762245, "grad_norm": 0.4970843091402074, "learning_rate": 2.15599931728964e-05, "loss": 0.5754, "num_tokens": 254726894.0, "step": 2702 }, { "epoch": 0.46134152585765487, "grad_norm": 0.532043048192978, "learning_rate": 2.1553166069295104e-05, "loss": 0.6824, "num_tokens": 254813121.0, "step": 2703 }, { "epoch": 0.46151220344768734, "grad_norm": 0.5809695355203921, "learning_rate": 2.1546338965693808e-05, "loss": 0.6376, "num_tokens": 254884956.0, "step": 2704 }, { "epoch": 0.46168288103771976, "grad_norm": 0.4798013170735045, "learning_rate": 2.153951186209251e-05, "loss": 0.6295, "num_tokens": 254994943.0, "step": 2705 }, { "epoch": 0.4618535586277522, "grad_norm": 0.4504287928624493, "learning_rate": 2.1532684758491212e-05, "loss": 0.5506, "num_tokens": 255103187.0, "step": 2706 }, { "epoch": 0.4620242362177846, "grad_norm": 0.44893247123842717, "learning_rate": 2.1525857654889916e-05, "loss": 0.6333, "num_tokens": 255230709.0, "step": 2707 }, { "epoch": 0.462194913807817, "grad_norm": 0.5165339470923785, "learning_rate": 2.1519030551288616e-05, "loss": 0.6054, "num_tokens": 255324439.0, "step": 2708 }, { "epoch": 0.46236559139784944, "grad_norm": 0.5165071690117639, "learning_rate": 2.151220344768732e-05, "loss": 0.6107, "num_tokens": 255417881.0, "step": 2709 }, { "epoch": 0.4625362689878819, "grad_norm": 0.5118061736669948, "learning_rate": 2.1505376344086024e-05, "loss": 0.6294, "num_tokens": 255503647.0, "step": 2710 }, { "epoch": 0.46270694657791434, "grad_norm": 0.4826331281296802, "learning_rate": 2.1498549240484724e-05, "loss": 0.6237, "num_tokens": 255630711.0, "step": 2711 }, { "epoch": 0.46287762416794676, "grad_norm": 0.495917927205697, "learning_rate": 2.1491722136883428e-05, "loss": 0.6224, "num_tokens": 255723881.0, "step": 2712 }, { "epoch": 0.4630483017579792, "grad_norm": 0.5281611216289528, "learning_rate": 2.148489503328213e-05, "loss": 0.569, "num_tokens": 255802399.0, "step": 2713 }, { "epoch": 0.4632189793480116, "grad_norm": 0.49943734826201736, "learning_rate": 2.1478067929680835e-05, "loss": 0.6321, "num_tokens": 255901555.0, "step": 2714 }, { "epoch": 0.463389656938044, "grad_norm": 0.4338083277999674, "learning_rate": 2.147124082607954e-05, "loss": 0.5535, "num_tokens": 256020739.0, "step": 2715 }, { "epoch": 0.4635603345280765, "grad_norm": 0.5052399142250522, "learning_rate": 2.1464413722478243e-05, "loss": 0.6914, "num_tokens": 256124207.0, "step": 2716 }, { "epoch": 0.4637310121181089, "grad_norm": 0.48899857994621937, "learning_rate": 2.1457586618876943e-05, "loss": 0.7021, "num_tokens": 256238330.0, "step": 2717 }, { "epoch": 0.46390168970814133, "grad_norm": 0.4792818614164604, "learning_rate": 2.1450759515275647e-05, "loss": 0.6451, "num_tokens": 256356293.0, "step": 2718 }, { "epoch": 0.46407236729817375, "grad_norm": 0.4711209198146715, "learning_rate": 2.144393241167435e-05, "loss": 0.6062, "num_tokens": 256465675.0, "step": 2719 }, { "epoch": 0.46424304488820617, "grad_norm": 0.4794865769132404, "learning_rate": 2.143710530807305e-05, "loss": 0.5781, "num_tokens": 256567406.0, "step": 2720 }, { "epoch": 0.4644137224782386, "grad_norm": 0.5822744476091425, "learning_rate": 2.143027820447175e-05, "loss": 0.5562, "num_tokens": 256638073.0, "step": 2721 }, { "epoch": 0.464584400068271, "grad_norm": 0.5174570146541233, "learning_rate": 2.1423451100870455e-05, "loss": 0.6498, "num_tokens": 256728903.0, "step": 2722 }, { "epoch": 0.4647550776583035, "grad_norm": 0.522380222445697, "learning_rate": 2.141662399726916e-05, "loss": 0.5305, "num_tokens": 256803410.0, "step": 2723 }, { "epoch": 0.4649257552483359, "grad_norm": 0.4705149360734729, "learning_rate": 2.1409796893667863e-05, "loss": 0.5351, "num_tokens": 256894565.0, "step": 2724 }, { "epoch": 0.4650964328383683, "grad_norm": 0.5125265450739548, "learning_rate": 2.1402969790066566e-05, "loss": 0.5662, "num_tokens": 256968342.0, "step": 2725 }, { "epoch": 0.46526711042840074, "grad_norm": 0.48535455295541086, "learning_rate": 2.139614268646527e-05, "loss": 0.6385, "num_tokens": 257088775.0, "step": 2726 }, { "epoch": 0.46543778801843316, "grad_norm": 0.5046206349465021, "learning_rate": 2.138931558286397e-05, "loss": 0.6356, "num_tokens": 257180623.0, "step": 2727 }, { "epoch": 0.4656084656084656, "grad_norm": 0.4566979731256016, "learning_rate": 2.1382488479262674e-05, "loss": 0.5548, "num_tokens": 257283260.0, "step": 2728 }, { "epoch": 0.46577914319849806, "grad_norm": 0.48635655470546224, "learning_rate": 2.1375661375661378e-05, "loss": 0.6265, "num_tokens": 257383139.0, "step": 2729 }, { "epoch": 0.4659498207885305, "grad_norm": 0.43836545371515856, "learning_rate": 2.1368834272060082e-05, "loss": 0.53, "num_tokens": 257495692.0, "step": 2730 }, { "epoch": 0.4661204983785629, "grad_norm": 0.5017524088201423, "learning_rate": 2.1362007168458786e-05, "loss": 0.5858, "num_tokens": 257580900.0, "step": 2731 }, { "epoch": 0.4662911759685953, "grad_norm": 0.5175788731086552, "learning_rate": 2.135518006485749e-05, "loss": 0.6439, "num_tokens": 257666792.0, "step": 2732 }, { "epoch": 0.46646185355862774, "grad_norm": 0.5013665939126793, "learning_rate": 2.1348352961256186e-05, "loss": 0.5731, "num_tokens": 257746823.0, "step": 2733 }, { "epoch": 0.46663253114866016, "grad_norm": 0.5213046230528113, "learning_rate": 2.134152585765489e-05, "loss": 0.5961, "num_tokens": 257832161.0, "step": 2734 }, { "epoch": 0.46680320873869263, "grad_norm": 0.4698592175420274, "learning_rate": 2.1334698754053594e-05, "loss": 0.5557, "num_tokens": 257925650.0, "step": 2735 }, { "epoch": 0.46697388632872505, "grad_norm": 0.5141208416163267, "learning_rate": 2.1327871650452298e-05, "loss": 0.5753, "num_tokens": 258005482.0, "step": 2736 }, { "epoch": 0.46714456391875747, "grad_norm": 0.47903234581461634, "learning_rate": 2.1321044546850998e-05, "loss": 0.6635, "num_tokens": 258122242.0, "step": 2737 }, { "epoch": 0.4673152415087899, "grad_norm": 0.48032257526866257, "learning_rate": 2.1314217443249702e-05, "loss": 0.5934, "num_tokens": 258231666.0, "step": 2738 }, { "epoch": 0.4674859190988223, "grad_norm": 0.5062134724087523, "learning_rate": 2.1307390339648406e-05, "loss": 0.557, "num_tokens": 258315621.0, "step": 2739 }, { "epoch": 0.46765659668885473, "grad_norm": 0.42315926867101145, "learning_rate": 2.130056323604711e-05, "loss": 0.5864, "num_tokens": 258462027.0, "step": 2740 }, { "epoch": 0.4678272742788872, "grad_norm": 0.5076751437387765, "learning_rate": 2.1293736132445813e-05, "loss": 0.574, "num_tokens": 258540097.0, "step": 2741 }, { "epoch": 0.4679979518689196, "grad_norm": 0.5828506260265786, "learning_rate": 2.1286909028844517e-05, "loss": 0.5912, "num_tokens": 258603939.0, "step": 2742 }, { "epoch": 0.46816862945895205, "grad_norm": 0.4918951916378864, "learning_rate": 2.128008192524322e-05, "loss": 0.5281, "num_tokens": 258686714.0, "step": 2743 }, { "epoch": 0.46833930704898447, "grad_norm": 0.599051915332191, "learning_rate": 2.127325482164192e-05, "loss": 0.5225, "num_tokens": 258742602.0, "step": 2744 }, { "epoch": 0.4685099846390169, "grad_norm": 0.48175779235830407, "learning_rate": 2.126642771804062e-05, "loss": 0.6622, "num_tokens": 258860547.0, "step": 2745 }, { "epoch": 0.4686806622290493, "grad_norm": 0.5039805038326765, "learning_rate": 2.1259600614439325e-05, "loss": 0.5565, "num_tokens": 258946288.0, "step": 2746 }, { "epoch": 0.4688513398190818, "grad_norm": 0.4456602429145936, "learning_rate": 2.125277351083803e-05, "loss": 0.5703, "num_tokens": 259048471.0, "step": 2747 }, { "epoch": 0.4690220174091142, "grad_norm": 0.5213329659127194, "learning_rate": 2.124594640723673e-05, "loss": 0.6473, "num_tokens": 259136222.0, "step": 2748 }, { "epoch": 0.4691926949991466, "grad_norm": 0.4810675999084171, "learning_rate": 2.1239119303635433e-05, "loss": 0.5731, "num_tokens": 259228976.0, "step": 2749 }, { "epoch": 0.46936337258917904, "grad_norm": 0.4616284869674558, "learning_rate": 2.1232292200034137e-05, "loss": 0.5457, "num_tokens": 259327772.0, "step": 2750 }, { "epoch": 0.46953405017921146, "grad_norm": 0.5143090665069365, "learning_rate": 2.122546509643284e-05, "loss": 0.5636, "num_tokens": 259439027.0, "step": 2751 }, { "epoch": 0.4697047277692439, "grad_norm": 0.5099116798577986, "learning_rate": 2.1218637992831544e-05, "loss": 0.5871, "num_tokens": 259515938.0, "step": 2752 }, { "epoch": 0.46987540535927635, "grad_norm": 0.5043400886320073, "learning_rate": 2.1211810889230248e-05, "loss": 0.5714, "num_tokens": 259602114.0, "step": 2753 }, { "epoch": 0.4700460829493088, "grad_norm": 0.5356962274851458, "learning_rate": 2.120498378562895e-05, "loss": 0.5962, "num_tokens": 259681020.0, "step": 2754 }, { "epoch": 0.4702167605393412, "grad_norm": 0.4933197566261178, "learning_rate": 2.1198156682027652e-05, "loss": 0.6703, "num_tokens": 259788336.0, "step": 2755 }, { "epoch": 0.4703874381293736, "grad_norm": 0.45607870193496064, "learning_rate": 2.1191329578426356e-05, "loss": 0.6339, "num_tokens": 259899175.0, "step": 2756 }, { "epoch": 0.47055811571940603, "grad_norm": 0.572336262821779, "learning_rate": 2.118450247482506e-05, "loss": 0.7077, "num_tokens": 260013792.0, "step": 2757 }, { "epoch": 0.47072879330943845, "grad_norm": 0.4747638104208514, "learning_rate": 2.1177675371223757e-05, "loss": 0.5952, "num_tokens": 260107161.0, "step": 2758 }, { "epoch": 0.4708994708994709, "grad_norm": 0.4880430412156721, "learning_rate": 2.117084826762246e-05, "loss": 0.6175, "num_tokens": 260211867.0, "step": 2759 }, { "epoch": 0.47107014848950335, "grad_norm": 0.48080589764162546, "learning_rate": 2.1164021164021164e-05, "loss": 0.6284, "num_tokens": 260329563.0, "step": 2760 }, { "epoch": 0.47124082607953577, "grad_norm": 0.5640412135714783, "learning_rate": 2.1157194060419868e-05, "loss": 0.5451, "num_tokens": 260408467.0, "step": 2761 }, { "epoch": 0.4714115036695682, "grad_norm": 0.5308643548808377, "learning_rate": 2.1150366956818572e-05, "loss": 0.6434, "num_tokens": 260499690.0, "step": 2762 }, { "epoch": 0.4715821812596006, "grad_norm": 0.5479100710111386, "learning_rate": 2.1143539853217276e-05, "loss": 0.572, "num_tokens": 260568393.0, "step": 2763 }, { "epoch": 0.471752858849633, "grad_norm": 0.4904881270152355, "learning_rate": 2.1136712749615976e-05, "loss": 0.6574, "num_tokens": 260671939.0, "step": 2764 }, { "epoch": 0.47192353643966545, "grad_norm": 0.48745440236315607, "learning_rate": 2.112988564601468e-05, "loss": 0.5118, "num_tokens": 260750790.0, "step": 2765 }, { "epoch": 0.4720942140296979, "grad_norm": 0.5120088791789342, "learning_rate": 2.1123058542413383e-05, "loss": 0.5665, "num_tokens": 260825742.0, "step": 2766 }, { "epoch": 0.47226489161973034, "grad_norm": 0.5025574965794489, "learning_rate": 2.1116231438812087e-05, "loss": 0.5627, "num_tokens": 260906910.0, "step": 2767 }, { "epoch": 0.47243556920976276, "grad_norm": 0.46105268417018186, "learning_rate": 2.110940433521079e-05, "loss": 0.6551, "num_tokens": 261040205.0, "step": 2768 }, { "epoch": 0.4726062467997952, "grad_norm": 0.5029042239548835, "learning_rate": 2.1102577231609495e-05, "loss": 0.6565, "num_tokens": 261128147.0, "step": 2769 }, { "epoch": 0.4727769243898276, "grad_norm": 0.44595621382141026, "learning_rate": 2.109575012800819e-05, "loss": 0.6284, "num_tokens": 261253380.0, "step": 2770 }, { "epoch": 0.47294760197986, "grad_norm": 0.4770356213613586, "learning_rate": 2.1088923024406895e-05, "loss": 0.5172, "num_tokens": 261337748.0, "step": 2771 }, { "epoch": 0.4731182795698925, "grad_norm": 0.48806674291261415, "learning_rate": 2.10820959208056e-05, "loss": 0.5656, "num_tokens": 261424238.0, "step": 2772 }, { "epoch": 0.4732889571599249, "grad_norm": 0.4732493503639952, "learning_rate": 2.1075268817204303e-05, "loss": 0.5372, "num_tokens": 261506820.0, "step": 2773 }, { "epoch": 0.47345963474995734, "grad_norm": 0.5368010190397625, "learning_rate": 2.1068441713603007e-05, "loss": 0.6426, "num_tokens": 261614388.0, "step": 2774 }, { "epoch": 0.47363031233998976, "grad_norm": 0.5238842412880865, "learning_rate": 2.1061614610001707e-05, "loss": 0.6612, "num_tokens": 261707423.0, "step": 2775 }, { "epoch": 0.4738009899300222, "grad_norm": 0.5583084495044456, "learning_rate": 2.105478750640041e-05, "loss": 0.5968, "num_tokens": 261796233.0, "step": 2776 }, { "epoch": 0.4739716675200546, "grad_norm": 0.4635386559002841, "learning_rate": 2.1047960402799115e-05, "loss": 0.5857, "num_tokens": 261903272.0, "step": 2777 }, { "epoch": 0.47414234511008707, "grad_norm": 0.5196872290181236, "learning_rate": 2.104113329919782e-05, "loss": 0.6859, "num_tokens": 262001311.0, "step": 2778 }, { "epoch": 0.4743130227001195, "grad_norm": 0.42604246411813196, "learning_rate": 2.1034306195596522e-05, "loss": 0.5824, "num_tokens": 262128348.0, "step": 2779 }, { "epoch": 0.4744837002901519, "grad_norm": 0.4932638343346999, "learning_rate": 2.1027479091995226e-05, "loss": 0.5973, "num_tokens": 262219890.0, "step": 2780 }, { "epoch": 0.47465437788018433, "grad_norm": 0.529869511773184, "learning_rate": 2.1020651988393926e-05, "loss": 0.6461, "num_tokens": 262312992.0, "step": 2781 }, { "epoch": 0.47482505547021675, "grad_norm": 0.48225732417768413, "learning_rate": 2.1013824884792627e-05, "loss": 0.6713, "num_tokens": 262411274.0, "step": 2782 }, { "epoch": 0.47499573306024917, "grad_norm": 0.46819328848847286, "learning_rate": 2.100699778119133e-05, "loss": 0.5983, "num_tokens": 262506439.0, "step": 2783 }, { "epoch": 0.47516641065028165, "grad_norm": 0.5659150884490732, "learning_rate": 2.1000170677590034e-05, "loss": 0.6237, "num_tokens": 262574804.0, "step": 2784 }, { "epoch": 0.47533708824031407, "grad_norm": 0.4763195479481333, "learning_rate": 2.0993343573988735e-05, "loss": 0.606, "num_tokens": 262680329.0, "step": 2785 }, { "epoch": 0.4755077658303465, "grad_norm": 0.4505571499406244, "learning_rate": 2.098651647038744e-05, "loss": 0.5603, "num_tokens": 262784724.0, "step": 2786 }, { "epoch": 0.4756784434203789, "grad_norm": 0.4973408721146918, "learning_rate": 2.0979689366786142e-05, "loss": 0.5913, "num_tokens": 262875852.0, "step": 2787 }, { "epoch": 0.4758491210104113, "grad_norm": 0.500000423358977, "learning_rate": 2.0972862263184846e-05, "loss": 0.5063, "num_tokens": 262947849.0, "step": 2788 }, { "epoch": 0.47601979860044374, "grad_norm": 0.44921344675656755, "learning_rate": 2.096603515958355e-05, "loss": 0.5514, "num_tokens": 263047158.0, "step": 2789 }, { "epoch": 0.47619047619047616, "grad_norm": 0.4444766323327889, "learning_rate": 2.0959208055982253e-05, "loss": 0.6219, "num_tokens": 263160022.0, "step": 2790 }, { "epoch": 0.47636115378050864, "grad_norm": 0.45276882487364545, "learning_rate": 2.0952380952380954e-05, "loss": 0.5747, "num_tokens": 263260563.0, "step": 2791 }, { "epoch": 0.47653183137054106, "grad_norm": 0.47520414357712126, "learning_rate": 2.0945553848779658e-05, "loss": 0.5485, "num_tokens": 263348330.0, "step": 2792 }, { "epoch": 0.4767025089605735, "grad_norm": 0.48415549480452974, "learning_rate": 2.093872674517836e-05, "loss": 0.539, "num_tokens": 263443498.0, "step": 2793 }, { "epoch": 0.4768731865506059, "grad_norm": 0.5112120567390389, "learning_rate": 2.0931899641577065e-05, "loss": 0.6542, "num_tokens": 263545307.0, "step": 2794 }, { "epoch": 0.4770438641406383, "grad_norm": 0.4843555789044252, "learning_rate": 2.0925072537975762e-05, "loss": 0.5634, "num_tokens": 263629848.0, "step": 2795 }, { "epoch": 0.47721454173067074, "grad_norm": 0.5222218856297645, "learning_rate": 2.0918245434374466e-05, "loss": 0.6744, "num_tokens": 263717207.0, "step": 2796 }, { "epoch": 0.4773852193207032, "grad_norm": 0.470252192997519, "learning_rate": 2.091141833077317e-05, "loss": 0.5468, "num_tokens": 263805494.0, "step": 2797 }, { "epoch": 0.47755589691073563, "grad_norm": 0.48854999449656045, "learning_rate": 2.0904591227171873e-05, "loss": 0.683, "num_tokens": 263912486.0, "step": 2798 }, { "epoch": 0.47772657450076805, "grad_norm": 0.4278552040817007, "learning_rate": 2.0897764123570577e-05, "loss": 0.5628, "num_tokens": 264026976.0, "step": 2799 }, { "epoch": 0.4778972520908005, "grad_norm": 0.49895084174843796, "learning_rate": 2.089093701996928e-05, "loss": 0.5633, "num_tokens": 264105730.0, "step": 2800 }, { "epoch": 0.4780679296808329, "grad_norm": 0.4679635040376754, "learning_rate": 2.088410991636798e-05, "loss": 0.6781, "num_tokens": 264219219.0, "step": 2801 }, { "epoch": 0.4782386072708653, "grad_norm": 0.5602714348621045, "learning_rate": 2.0877282812766685e-05, "loss": 0.582, "num_tokens": 264289623.0, "step": 2802 }, { "epoch": 0.4784092848608978, "grad_norm": 0.48337998899673135, "learning_rate": 2.087045570916539e-05, "loss": 0.6106, "num_tokens": 264386781.0, "step": 2803 }, { "epoch": 0.4785799624509302, "grad_norm": 0.4759665388205996, "learning_rate": 2.0863628605564093e-05, "loss": 0.5962, "num_tokens": 264487751.0, "step": 2804 }, { "epoch": 0.4787506400409626, "grad_norm": 0.46836843412874873, "learning_rate": 2.0856801501962796e-05, "loss": 0.5863, "num_tokens": 264594699.0, "step": 2805 }, { "epoch": 0.47892131763099505, "grad_norm": 0.500968308707824, "learning_rate": 2.08499743983615e-05, "loss": 0.556, "num_tokens": 264671718.0, "step": 2806 }, { "epoch": 0.47909199522102747, "grad_norm": 0.5893157032577367, "learning_rate": 2.0843147294760197e-05, "loss": 0.6597, "num_tokens": 264763398.0, "step": 2807 }, { "epoch": 0.4792626728110599, "grad_norm": 0.48047844707117543, "learning_rate": 2.08363201911589e-05, "loss": 0.6292, "num_tokens": 264866139.0, "step": 2808 }, { "epoch": 0.47943335040109236, "grad_norm": 0.4814903472753632, "learning_rate": 2.0829493087557605e-05, "loss": 0.6403, "num_tokens": 264964569.0, "step": 2809 }, { "epoch": 0.4796040279911248, "grad_norm": 0.4735962744889165, "learning_rate": 2.082266598395631e-05, "loss": 0.6227, "num_tokens": 265070831.0, "step": 2810 }, { "epoch": 0.4797747055811572, "grad_norm": 0.5462762968017906, "learning_rate": 2.0815838880355012e-05, "loss": 0.682, "num_tokens": 265161962.0, "step": 2811 }, { "epoch": 0.4799453831711896, "grad_norm": 0.47681667500741154, "learning_rate": 2.0809011776753712e-05, "loss": 0.6144, "num_tokens": 265267361.0, "step": 2812 }, { "epoch": 0.48011606076122204, "grad_norm": 0.47302407008804076, "learning_rate": 2.0802184673152416e-05, "loss": 0.7001, "num_tokens": 265396435.0, "step": 2813 }, { "epoch": 0.48028673835125446, "grad_norm": 0.45148044789709957, "learning_rate": 2.079535756955112e-05, "loss": 0.5564, "num_tokens": 265504015.0, "step": 2814 }, { "epoch": 0.48045741594128694, "grad_norm": 0.5184527820534015, "learning_rate": 2.0788530465949824e-05, "loss": 0.5347, "num_tokens": 265575304.0, "step": 2815 }, { "epoch": 0.48062809353131936, "grad_norm": 0.5296202969839803, "learning_rate": 2.0781703362348528e-05, "loss": 0.5559, "num_tokens": 265646428.0, "step": 2816 }, { "epoch": 0.4807987711213518, "grad_norm": 0.48816019063909155, "learning_rate": 2.077487625874723e-05, "loss": 0.5582, "num_tokens": 265724991.0, "step": 2817 }, { "epoch": 0.4809694487113842, "grad_norm": 0.48781723245203773, "learning_rate": 2.076804915514593e-05, "loss": 0.6339, "num_tokens": 265816340.0, "step": 2818 }, { "epoch": 0.4811401263014166, "grad_norm": 0.4575290190832969, "learning_rate": 2.0761222051544635e-05, "loss": 0.6281, "num_tokens": 265928853.0, "step": 2819 }, { "epoch": 0.48131080389144903, "grad_norm": 0.4796099583872098, "learning_rate": 2.0754394947943336e-05, "loss": 0.5578, "num_tokens": 266022688.0, "step": 2820 }, { "epoch": 0.48148148148148145, "grad_norm": 0.4719757609846022, "learning_rate": 2.074756784434204e-05, "loss": 0.5461, "num_tokens": 266120945.0, "step": 2821 }, { "epoch": 0.48165215907151393, "grad_norm": 0.4703801534661618, "learning_rate": 2.074074074074074e-05, "loss": 0.538, "num_tokens": 266214668.0, "step": 2822 }, { "epoch": 0.48182283666154635, "grad_norm": 0.5186243701215413, "learning_rate": 2.0733913637139444e-05, "loss": 0.7006, "num_tokens": 266323538.0, "step": 2823 }, { "epoch": 0.48199351425157877, "grad_norm": 0.5423821529180219, "learning_rate": 2.0727086533538147e-05, "loss": 0.6059, "num_tokens": 266424684.0, "step": 2824 }, { "epoch": 0.4821641918416112, "grad_norm": 0.5351325254007536, "learning_rate": 2.072025942993685e-05, "loss": 0.5537, "num_tokens": 266494881.0, "step": 2825 }, { "epoch": 0.4823348694316436, "grad_norm": 0.4850658743946264, "learning_rate": 2.0713432326335555e-05, "loss": 0.7092, "num_tokens": 266597616.0, "step": 2826 }, { "epoch": 0.48250554702167603, "grad_norm": 0.43852264950976283, "learning_rate": 2.070660522273426e-05, "loss": 0.5255, "num_tokens": 266710415.0, "step": 2827 }, { "epoch": 0.4826762246117085, "grad_norm": 0.5100733019147135, "learning_rate": 2.069977811913296e-05, "loss": 0.5831, "num_tokens": 266802276.0, "step": 2828 }, { "epoch": 0.4828469022017409, "grad_norm": 0.4853108524274792, "learning_rate": 2.0692951015531663e-05, "loss": 0.6497, "num_tokens": 266901085.0, "step": 2829 }, { "epoch": 0.48301757979177334, "grad_norm": 0.4886110931391704, "learning_rate": 2.0686123911930367e-05, "loss": 0.6721, "num_tokens": 266996334.0, "step": 2830 }, { "epoch": 0.48318825738180576, "grad_norm": 0.5475197851006306, "learning_rate": 2.067929680832907e-05, "loss": 0.6338, "num_tokens": 267073096.0, "step": 2831 }, { "epoch": 0.4833589349718382, "grad_norm": 0.5609227356508061, "learning_rate": 2.0672469704727767e-05, "loss": 0.6343, "num_tokens": 267171610.0, "step": 2832 }, { "epoch": 0.4835296125618706, "grad_norm": 0.4975415381615914, "learning_rate": 2.066564260112647e-05, "loss": 0.6528, "num_tokens": 267281372.0, "step": 2833 }, { "epoch": 0.4837002901519031, "grad_norm": 0.4761638301635794, "learning_rate": 2.0658815497525175e-05, "loss": 0.6004, "num_tokens": 267379502.0, "step": 2834 }, { "epoch": 0.4838709677419355, "grad_norm": 0.5140227357409637, "learning_rate": 2.065198839392388e-05, "loss": 0.6529, "num_tokens": 267480547.0, "step": 2835 }, { "epoch": 0.4840416453319679, "grad_norm": 0.503050097407809, "learning_rate": 2.0645161290322582e-05, "loss": 0.5572, "num_tokens": 267561848.0, "step": 2836 }, { "epoch": 0.48421232292200034, "grad_norm": 0.5389466118288465, "learning_rate": 2.0638334186721286e-05, "loss": 0.6482, "num_tokens": 267650090.0, "step": 2837 }, { "epoch": 0.48438300051203276, "grad_norm": 0.5085763390579215, "learning_rate": 2.0631507083119987e-05, "loss": 0.5152, "num_tokens": 267727785.0, "step": 2838 }, { "epoch": 0.4845536781020652, "grad_norm": 0.4899520289556035, "learning_rate": 2.062467997951869e-05, "loss": 0.6089, "num_tokens": 267824441.0, "step": 2839 }, { "epoch": 0.48472435569209765, "grad_norm": 0.4913559210543756, "learning_rate": 2.0617852875917394e-05, "loss": 0.6319, "num_tokens": 267932216.0, "step": 2840 }, { "epoch": 0.4848950332821301, "grad_norm": 0.4980148330177103, "learning_rate": 2.0611025772316098e-05, "loss": 0.6726, "num_tokens": 268031290.0, "step": 2841 }, { "epoch": 0.4850657108721625, "grad_norm": 0.5430757340372253, "learning_rate": 2.06041986687148e-05, "loss": 0.5945, "num_tokens": 268095642.0, "step": 2842 }, { "epoch": 0.4852363884621949, "grad_norm": 0.47229457625039367, "learning_rate": 2.0597371565113505e-05, "loss": 0.6917, "num_tokens": 268233579.0, "step": 2843 }, { "epoch": 0.48540706605222733, "grad_norm": 0.43624329222044694, "learning_rate": 2.0590544461512202e-05, "loss": 0.5778, "num_tokens": 268348629.0, "step": 2844 }, { "epoch": 0.48557774364225975, "grad_norm": 0.44498761098212736, "learning_rate": 2.0583717357910906e-05, "loss": 0.6101, "num_tokens": 268462140.0, "step": 2845 }, { "epoch": 0.4857484212322922, "grad_norm": 0.5003639627514537, "learning_rate": 2.057689025430961e-05, "loss": 0.645, "num_tokens": 268572442.0, "step": 2846 }, { "epoch": 0.48591909882232465, "grad_norm": 0.4856748373294264, "learning_rate": 2.0570063150708314e-05, "loss": 0.553, "num_tokens": 268660026.0, "step": 2847 }, { "epoch": 0.48608977641235707, "grad_norm": 0.4700848228046942, "learning_rate": 2.0563236047107017e-05, "loss": 0.5311, "num_tokens": 268751811.0, "step": 2848 }, { "epoch": 0.4862604540023895, "grad_norm": 0.4505358090745403, "learning_rate": 2.0556408943505718e-05, "loss": 0.5525, "num_tokens": 268860115.0, "step": 2849 }, { "epoch": 0.4864311315924219, "grad_norm": 0.49432532150934183, "learning_rate": 2.054958183990442e-05, "loss": 0.6154, "num_tokens": 268960297.0, "step": 2850 }, { "epoch": 0.4866018091824543, "grad_norm": 0.5674496830874917, "learning_rate": 2.0542754736303125e-05, "loss": 0.6641, "num_tokens": 269031095.0, "step": 2851 }, { "epoch": 0.48677248677248675, "grad_norm": 0.5063399703064866, "learning_rate": 2.053592763270183e-05, "loss": 0.5867, "num_tokens": 269110924.0, "step": 2852 }, { "epoch": 0.4869431643625192, "grad_norm": 0.5221460778498629, "learning_rate": 2.0529100529100533e-05, "loss": 0.706, "num_tokens": 269213733.0, "step": 2853 }, { "epoch": 0.48711384195255164, "grad_norm": 0.5396749092105997, "learning_rate": 2.0522273425499237e-05, "loss": 0.6733, "num_tokens": 269310298.0, "step": 2854 }, { "epoch": 0.48728451954258406, "grad_norm": 0.441556058435517, "learning_rate": 2.0515446321897937e-05, "loss": 0.5524, "num_tokens": 269420576.0, "step": 2855 }, { "epoch": 0.4874551971326165, "grad_norm": 0.4615154380288719, "learning_rate": 2.050861921829664e-05, "loss": 0.5911, "num_tokens": 269540185.0, "step": 2856 }, { "epoch": 0.4876258747226489, "grad_norm": 0.4295552421179953, "learning_rate": 2.050179211469534e-05, "loss": 0.5412, "num_tokens": 269654267.0, "step": 2857 }, { "epoch": 0.4877965523126813, "grad_norm": 0.4963979554435014, "learning_rate": 2.0494965011094045e-05, "loss": 0.6481, "num_tokens": 269750827.0, "step": 2858 }, { "epoch": 0.4879672299027138, "grad_norm": 0.5039026699213918, "learning_rate": 2.0488137907492745e-05, "loss": 0.5964, "num_tokens": 269854249.0, "step": 2859 }, { "epoch": 0.4881379074927462, "grad_norm": 0.5706880894505407, "learning_rate": 2.048131080389145e-05, "loss": 0.6873, "num_tokens": 269926085.0, "step": 2860 }, { "epoch": 0.48830858508277863, "grad_norm": 0.46280494591719906, "learning_rate": 2.0474483700290153e-05, "loss": 0.5435, "num_tokens": 270019949.0, "step": 2861 }, { "epoch": 0.48847926267281105, "grad_norm": 0.4557477938540628, "learning_rate": 2.0467656596688857e-05, "loss": 0.6436, "num_tokens": 270133154.0, "step": 2862 }, { "epoch": 0.4886499402628435, "grad_norm": 0.4675394053210359, "learning_rate": 2.046082949308756e-05, "loss": 0.629, "num_tokens": 270240858.0, "step": 2863 }, { "epoch": 0.4888206178528759, "grad_norm": 0.4652502111645639, "learning_rate": 2.0454002389486264e-05, "loss": 0.582, "num_tokens": 270340225.0, "step": 2864 }, { "epoch": 0.48899129544290837, "grad_norm": 0.5829988283039427, "learning_rate": 2.0447175285884964e-05, "loss": 0.6863, "num_tokens": 270413709.0, "step": 2865 }, { "epoch": 0.4891619730329408, "grad_norm": 0.48266785152693065, "learning_rate": 2.0440348182283668e-05, "loss": 0.511, "num_tokens": 270497651.0, "step": 2866 }, { "epoch": 0.4893326506229732, "grad_norm": 0.4968484465603754, "learning_rate": 2.0433521078682372e-05, "loss": 0.508, "num_tokens": 270577026.0, "step": 2867 }, { "epoch": 0.48950332821300563, "grad_norm": 0.47595925265430217, "learning_rate": 2.0426693975081076e-05, "loss": 0.5843, "num_tokens": 270674391.0, "step": 2868 }, { "epoch": 0.48967400580303805, "grad_norm": 0.46628377517710473, "learning_rate": 2.0419866871479773e-05, "loss": 0.6643, "num_tokens": 270787274.0, "step": 2869 }, { "epoch": 0.48984468339307047, "grad_norm": 0.5385215825334826, "learning_rate": 2.0413039767878476e-05, "loss": 0.5962, "num_tokens": 270858889.0, "step": 2870 }, { "epoch": 0.49001536098310294, "grad_norm": 0.5805884963067375, "learning_rate": 2.040621266427718e-05, "loss": 0.6293, "num_tokens": 270957643.0, "step": 2871 }, { "epoch": 0.49018603857313536, "grad_norm": 0.498084441145313, "learning_rate": 2.0399385560675884e-05, "loss": 0.6639, "num_tokens": 271047676.0, "step": 2872 }, { "epoch": 0.4903567161631678, "grad_norm": 0.487564019574099, "learning_rate": 2.0392558457074588e-05, "loss": 0.5933, "num_tokens": 271129962.0, "step": 2873 }, { "epoch": 0.4905273937532002, "grad_norm": 0.6472447182346212, "learning_rate": 2.038573135347329e-05, "loss": 0.714, "num_tokens": 271231719.0, "step": 2874 }, { "epoch": 0.4906980713432326, "grad_norm": 0.4971211809010418, "learning_rate": 2.0378904249871992e-05, "loss": 0.5887, "num_tokens": 271328947.0, "step": 2875 }, { "epoch": 0.49086874893326504, "grad_norm": 0.4863071185912599, "learning_rate": 2.0372077146270696e-05, "loss": 0.5036, "num_tokens": 271422273.0, "step": 2876 }, { "epoch": 0.4910394265232975, "grad_norm": 0.5413413168451584, "learning_rate": 2.03652500426694e-05, "loss": 0.632, "num_tokens": 271502160.0, "step": 2877 }, { "epoch": 0.49121010411332994, "grad_norm": 0.5144456312536052, "learning_rate": 2.0358422939068103e-05, "loss": 0.6403, "num_tokens": 271585123.0, "step": 2878 }, { "epoch": 0.49138078170336236, "grad_norm": 0.4842619958313146, "learning_rate": 2.0351595835466807e-05, "loss": 0.6668, "num_tokens": 271684652.0, "step": 2879 }, { "epoch": 0.4915514592933948, "grad_norm": 0.5284020508258055, "learning_rate": 2.034476873186551e-05, "loss": 0.6673, "num_tokens": 271763620.0, "step": 2880 }, { "epoch": 0.4917221368834272, "grad_norm": 0.4548950335050338, "learning_rate": 2.0337941628264208e-05, "loss": 0.6492, "num_tokens": 271880062.0, "step": 2881 }, { "epoch": 0.4918928144734596, "grad_norm": 0.496889245805252, "learning_rate": 2.033111452466291e-05, "loss": 0.5442, "num_tokens": 271963728.0, "step": 2882 }, { "epoch": 0.49206349206349204, "grad_norm": 0.6059432208166287, "learning_rate": 2.0324287421061615e-05, "loss": 0.6488, "num_tokens": 272040540.0, "step": 2883 }, { "epoch": 0.4922341696535245, "grad_norm": 0.5110023970730857, "learning_rate": 2.031746031746032e-05, "loss": 0.5592, "num_tokens": 272114524.0, "step": 2884 }, { "epoch": 0.49240484724355693, "grad_norm": 0.4839276755721502, "learning_rate": 2.0310633213859023e-05, "loss": 0.5364, "num_tokens": 272200906.0, "step": 2885 }, { "epoch": 0.49257552483358935, "grad_norm": 0.4801959231712062, "learning_rate": 2.0303806110257723e-05, "loss": 0.5353, "num_tokens": 272293099.0, "step": 2886 }, { "epoch": 0.49274620242362177, "grad_norm": 0.5388421563548168, "learning_rate": 2.0296979006656427e-05, "loss": 0.5697, "num_tokens": 272368710.0, "step": 2887 }, { "epoch": 0.4929168800136542, "grad_norm": 0.5320129170945297, "learning_rate": 2.029015190305513e-05, "loss": 0.5955, "num_tokens": 272449086.0, "step": 2888 }, { "epoch": 0.4930875576036866, "grad_norm": 0.48623700351007676, "learning_rate": 2.0283324799453834e-05, "loss": 0.5246, "num_tokens": 272534506.0, "step": 2889 }, { "epoch": 0.4932582351937191, "grad_norm": 0.5505810187608702, "learning_rate": 2.0276497695852538e-05, "loss": 0.6309, "num_tokens": 272608333.0, "step": 2890 }, { "epoch": 0.4934289127837515, "grad_norm": 0.491520130776598, "learning_rate": 2.0269670592251242e-05, "loss": 0.601, "num_tokens": 272696781.0, "step": 2891 }, { "epoch": 0.4935995903737839, "grad_norm": 0.5171691089471971, "learning_rate": 2.0262843488649942e-05, "loss": 0.5634, "num_tokens": 272768975.0, "step": 2892 }, { "epoch": 0.49377026796381634, "grad_norm": 0.4898250317116262, "learning_rate": 2.0256016385048646e-05, "loss": 0.5827, "num_tokens": 272855964.0, "step": 2893 }, { "epoch": 0.49394094555384876, "grad_norm": 0.465241279888239, "learning_rate": 2.0249189281447346e-05, "loss": 0.5259, "num_tokens": 272948600.0, "step": 2894 }, { "epoch": 0.4941116231438812, "grad_norm": 0.4819560221806663, "learning_rate": 2.024236217784605e-05, "loss": 0.594, "num_tokens": 273044634.0, "step": 2895 }, { "epoch": 0.49428230073391366, "grad_norm": 0.46378968328026815, "learning_rate": 2.023553507424475e-05, "loss": 0.5531, "num_tokens": 273150236.0, "step": 2896 }, { "epoch": 0.4944529783239461, "grad_norm": 0.4671772497805685, "learning_rate": 2.0228707970643454e-05, "loss": 0.5557, "num_tokens": 273240551.0, "step": 2897 }, { "epoch": 0.4946236559139785, "grad_norm": 0.4254915440115111, "learning_rate": 2.0221880867042158e-05, "loss": 0.5726, "num_tokens": 273358847.0, "step": 2898 }, { "epoch": 0.4947943335040109, "grad_norm": 0.45365023233787427, "learning_rate": 2.0215053763440862e-05, "loss": 0.6623, "num_tokens": 273474304.0, "step": 2899 }, { "epoch": 0.49496501109404334, "grad_norm": 0.4953276860605578, "learning_rate": 2.0208226659839566e-05, "loss": 0.5599, "num_tokens": 273563073.0, "step": 2900 }, { "epoch": 0.49513568868407576, "grad_norm": 0.535719945733631, "learning_rate": 2.020139955623827e-05, "loss": 0.6257, "num_tokens": 273643942.0, "step": 2901 }, { "epoch": 0.49530636627410823, "grad_norm": 0.49624489344201517, "learning_rate": 2.019457245263697e-05, "loss": 0.6399, "num_tokens": 273746877.0, "step": 2902 }, { "epoch": 0.49547704386414065, "grad_norm": 0.5094719768446331, "learning_rate": 2.0187745349035674e-05, "loss": 0.6273, "num_tokens": 273838856.0, "step": 2903 }, { "epoch": 0.4956477214541731, "grad_norm": 0.49548672356664875, "learning_rate": 2.0180918245434377e-05, "loss": 0.5558, "num_tokens": 273918060.0, "step": 2904 }, { "epoch": 0.4958183990442055, "grad_norm": 0.5398754637770871, "learning_rate": 2.017409114183308e-05, "loss": 0.6597, "num_tokens": 273992093.0, "step": 2905 }, { "epoch": 0.4959890766342379, "grad_norm": 0.45619085238186824, "learning_rate": 2.0167264038231778e-05, "loss": 0.5587, "num_tokens": 274093531.0, "step": 2906 }, { "epoch": 0.49615975422427033, "grad_norm": 0.5489587968582248, "learning_rate": 2.0160436934630482e-05, "loss": 0.6701, "num_tokens": 274184099.0, "step": 2907 }, { "epoch": 0.4963304318143028, "grad_norm": 0.43894239267208035, "learning_rate": 2.0153609831029186e-05, "loss": 0.561, "num_tokens": 274293553.0, "step": 2908 }, { "epoch": 0.49650110940433523, "grad_norm": 0.46366286245970384, "learning_rate": 2.014678272742789e-05, "loss": 0.5245, "num_tokens": 274382139.0, "step": 2909 }, { "epoch": 0.49667178699436765, "grad_norm": 0.491312583007306, "learning_rate": 2.0139955623826593e-05, "loss": 0.6272, "num_tokens": 274474948.0, "step": 2910 }, { "epoch": 0.49684246458440007, "grad_norm": 0.45909200225024077, "learning_rate": 2.0133128520225297e-05, "loss": 0.5362, "num_tokens": 274566879.0, "step": 2911 }, { "epoch": 0.4970131421744325, "grad_norm": 0.5163405294702257, "learning_rate": 2.0126301416623997e-05, "loss": 0.5565, "num_tokens": 274639087.0, "step": 2912 }, { "epoch": 0.4971838197644649, "grad_norm": 0.5249171033729432, "learning_rate": 2.01194743130227e-05, "loss": 0.6861, "num_tokens": 274729536.0, "step": 2913 }, { "epoch": 0.4973544973544973, "grad_norm": 0.5188809095733038, "learning_rate": 2.0112647209421405e-05, "loss": 0.595, "num_tokens": 274822197.0, "step": 2914 }, { "epoch": 0.4975251749445298, "grad_norm": 0.5029710825396685, "learning_rate": 2.010582010582011e-05, "loss": 0.6168, "num_tokens": 274918420.0, "step": 2915 }, { "epoch": 0.4976958525345622, "grad_norm": 0.476806919717425, "learning_rate": 2.0098993002218812e-05, "loss": 0.6054, "num_tokens": 275024506.0, "step": 2916 }, { "epoch": 0.49786653012459464, "grad_norm": 0.4906048200663985, "learning_rate": 2.0092165898617516e-05, "loss": 0.5873, "num_tokens": 275113309.0, "step": 2917 }, { "epoch": 0.49803720771462706, "grad_norm": 0.4890390341998896, "learning_rate": 2.008533879501622e-05, "loss": 0.6282, "num_tokens": 275207914.0, "step": 2918 }, { "epoch": 0.4982078853046595, "grad_norm": 0.5378272846379831, "learning_rate": 2.0078511691414917e-05, "loss": 0.5884, "num_tokens": 275285327.0, "step": 2919 }, { "epoch": 0.4983785628946919, "grad_norm": 0.5405475677384355, "learning_rate": 2.007168458781362e-05, "loss": 0.5701, "num_tokens": 275357235.0, "step": 2920 }, { "epoch": 0.4985492404847244, "grad_norm": 0.4560160124200505, "learning_rate": 2.0064857484212324e-05, "loss": 0.5768, "num_tokens": 275466507.0, "step": 2921 }, { "epoch": 0.4987199180747568, "grad_norm": 0.4747863422821953, "learning_rate": 2.0058030380611028e-05, "loss": 0.6002, "num_tokens": 275557993.0, "step": 2922 }, { "epoch": 0.4988905956647892, "grad_norm": 0.5033637466490302, "learning_rate": 2.005120327700973e-05, "loss": 0.5972, "num_tokens": 275659357.0, "step": 2923 }, { "epoch": 0.49906127325482164, "grad_norm": 0.4929275467909048, "learning_rate": 2.0044376173408432e-05, "loss": 0.5888, "num_tokens": 275754575.0, "step": 2924 }, { "epoch": 0.49923195084485406, "grad_norm": 0.48010205872811457, "learning_rate": 2.0037549069807136e-05, "loss": 0.5642, "num_tokens": 275845434.0, "step": 2925 }, { "epoch": 0.4994026284348865, "grad_norm": 0.4614143190938438, "learning_rate": 2.003072196620584e-05, "loss": 0.5571, "num_tokens": 275943269.0, "step": 2926 }, { "epoch": 0.49957330602491895, "grad_norm": 0.4250591814469685, "learning_rate": 2.0023894862604544e-05, "loss": 0.6295, "num_tokens": 276079374.0, "step": 2927 }, { "epoch": 0.49974398361495137, "grad_norm": 0.46939168719919083, "learning_rate": 2.0017067759003247e-05, "loss": 0.5895, "num_tokens": 276187227.0, "step": 2928 }, { "epoch": 0.4999146612049838, "grad_norm": 0.4719718062116493, "learning_rate": 2.0010240655401948e-05, "loss": 0.615, "num_tokens": 276289234.0, "step": 2929 }, { "epoch": 0.5000853387950163, "grad_norm": 0.44102594712594595, "learning_rate": 2.000341355180065e-05, "loss": 0.5523, "num_tokens": 276394942.0, "step": 2930 }, { "epoch": 0.5002560163850487, "grad_norm": 0.4975705580641857, "learning_rate": 1.9996586448199352e-05, "loss": 0.5007, "num_tokens": 276464686.0, "step": 2931 }, { "epoch": 0.5004266939750811, "grad_norm": 0.4771783426710486, "learning_rate": 1.9989759344598056e-05, "loss": 0.5579, "num_tokens": 276562565.0, "step": 2932 }, { "epoch": 0.5005973715651135, "grad_norm": 0.7741405083195809, "learning_rate": 1.998293224099676e-05, "loss": 0.6752, "num_tokens": 276637420.0, "step": 2933 }, { "epoch": 0.500768049155146, "grad_norm": 0.5251762038860214, "learning_rate": 1.997610513739546e-05, "loss": 0.5764, "num_tokens": 276707390.0, "step": 2934 }, { "epoch": 0.5009387267451784, "grad_norm": 0.5072670055509272, "learning_rate": 1.9969278033794163e-05, "loss": 0.5965, "num_tokens": 276801890.0, "step": 2935 }, { "epoch": 0.5011094043352108, "grad_norm": 0.4940913063499955, "learning_rate": 1.9962450930192867e-05, "loss": 0.6061, "num_tokens": 276901189.0, "step": 2936 }, { "epoch": 0.5012800819252432, "grad_norm": 0.41887722565286695, "learning_rate": 1.995562382659157e-05, "loss": 0.5439, "num_tokens": 277009913.0, "step": 2937 }, { "epoch": 0.5014507595152756, "grad_norm": 0.5261208203728652, "learning_rate": 1.9948796722990275e-05, "loss": 0.5819, "num_tokens": 277089893.0, "step": 2938 }, { "epoch": 0.501621437105308, "grad_norm": 0.4742556700588168, "learning_rate": 1.9941969619388975e-05, "loss": 0.5418, "num_tokens": 277181275.0, "step": 2939 }, { "epoch": 0.5017921146953405, "grad_norm": 0.4773173712133262, "learning_rate": 1.993514251578768e-05, "loss": 0.5873, "num_tokens": 277291082.0, "step": 2940 }, { "epoch": 0.5019627922853729, "grad_norm": 0.4810682830758116, "learning_rate": 1.992831541218638e-05, "loss": 0.6019, "num_tokens": 277384470.0, "step": 2941 }, { "epoch": 0.5021334698754054, "grad_norm": 0.5394096755076262, "learning_rate": 1.9921488308585083e-05, "loss": 0.6943, "num_tokens": 277475574.0, "step": 2942 }, { "epoch": 0.5023041474654378, "grad_norm": 0.4623212370764887, "learning_rate": 1.9914661204983787e-05, "loss": 0.4497, "num_tokens": 277575476.0, "step": 2943 }, { "epoch": 0.5024748250554703, "grad_norm": 0.4721381274963078, "learning_rate": 1.990783410138249e-05, "loss": 0.6449, "num_tokens": 277691964.0, "step": 2944 }, { "epoch": 0.5026455026455027, "grad_norm": 0.4740335866573741, "learning_rate": 1.9901006997781194e-05, "loss": 0.609, "num_tokens": 277793285.0, "step": 2945 }, { "epoch": 0.5028161802355351, "grad_norm": 0.49388965943517776, "learning_rate": 1.9894179894179895e-05, "loss": 0.6406, "num_tokens": 277882339.0, "step": 2946 }, { "epoch": 0.5029868578255675, "grad_norm": 0.436812256713808, "learning_rate": 1.98873527905786e-05, "loss": 0.5308, "num_tokens": 277991391.0, "step": 2947 }, { "epoch": 0.5031575354155999, "grad_norm": 0.46667214092929044, "learning_rate": 1.9880525686977302e-05, "loss": 0.665, "num_tokens": 278110783.0, "step": 2948 }, { "epoch": 0.5033282130056324, "grad_norm": 0.4460446183248579, "learning_rate": 1.9873698583376006e-05, "loss": 0.6056, "num_tokens": 278231174.0, "step": 2949 }, { "epoch": 0.5034988905956648, "grad_norm": 0.4610080488916346, "learning_rate": 1.9866871479774706e-05, "loss": 0.5698, "num_tokens": 278336426.0, "step": 2950 }, { "epoch": 0.5036695681856972, "grad_norm": 0.49105454013287486, "learning_rate": 1.986004437617341e-05, "loss": 0.6565, "num_tokens": 278445529.0, "step": 2951 }, { "epoch": 0.5038402457757296, "grad_norm": 0.48664315308457706, "learning_rate": 1.9853217272572114e-05, "loss": 0.5775, "num_tokens": 278539674.0, "step": 2952 }, { "epoch": 0.504010923365762, "grad_norm": 0.491773574244215, "learning_rate": 1.9846390168970814e-05, "loss": 0.6109, "num_tokens": 278623893.0, "step": 2953 }, { "epoch": 0.5041816009557945, "grad_norm": 0.4846123642846501, "learning_rate": 1.9839563065369518e-05, "loss": 0.5848, "num_tokens": 278711398.0, "step": 2954 }, { "epoch": 0.504352278545827, "grad_norm": 0.5400095007019781, "learning_rate": 1.9832735961768222e-05, "loss": 0.6801, "num_tokens": 278795679.0, "step": 2955 }, { "epoch": 0.5045229561358594, "grad_norm": 0.48445297245385405, "learning_rate": 1.9825908858166926e-05, "loss": 0.4877, "num_tokens": 278873437.0, "step": 2956 }, { "epoch": 0.5046936337258918, "grad_norm": 0.4879103365170065, "learning_rate": 1.981908175456563e-05, "loss": 0.5218, "num_tokens": 278954245.0, "step": 2957 }, { "epoch": 0.5048643113159242, "grad_norm": 0.46903809359514, "learning_rate": 1.981225465096433e-05, "loss": 0.5629, "num_tokens": 279051252.0, "step": 2958 }, { "epoch": 0.5050349889059567, "grad_norm": 0.43800524681905495, "learning_rate": 1.9805427547363033e-05, "loss": 0.5614, "num_tokens": 279171527.0, "step": 2959 }, { "epoch": 0.5052056664959891, "grad_norm": 0.48470298564778774, "learning_rate": 1.9798600443761734e-05, "loss": 0.6161, "num_tokens": 279275253.0, "step": 2960 }, { "epoch": 0.5053763440860215, "grad_norm": 0.4422180281282132, "learning_rate": 1.9791773340160438e-05, "loss": 0.5607, "num_tokens": 279394381.0, "step": 2961 }, { "epoch": 0.5055470216760539, "grad_norm": 0.5173804612580816, "learning_rate": 1.978494623655914e-05, "loss": 0.5145, "num_tokens": 279463675.0, "step": 2962 }, { "epoch": 0.5057176992660863, "grad_norm": 0.4861594430832971, "learning_rate": 1.9778119132957845e-05, "loss": 0.6544, "num_tokens": 279563914.0, "step": 2963 }, { "epoch": 0.5058883768561188, "grad_norm": 0.5370598110552905, "learning_rate": 1.977129202935655e-05, "loss": 0.5751, "num_tokens": 279635373.0, "step": 2964 }, { "epoch": 0.5060590544461512, "grad_norm": 0.40512112123146315, "learning_rate": 1.976446492575525e-05, "loss": 0.4642, "num_tokens": 279738066.0, "step": 2965 }, { "epoch": 0.5062297320361836, "grad_norm": 0.48516635962975396, "learning_rate": 1.9757637822153953e-05, "loss": 0.5324, "num_tokens": 279825405.0, "step": 2966 }, { "epoch": 0.5064004096262161, "grad_norm": 0.47761889825735676, "learning_rate": 1.9750810718552657e-05, "loss": 0.5675, "num_tokens": 279916976.0, "step": 2967 }, { "epoch": 0.5065710872162486, "grad_norm": 0.48467705809184164, "learning_rate": 1.9743983614951357e-05, "loss": 0.6388, "num_tokens": 280014819.0, "step": 2968 }, { "epoch": 0.506741764806281, "grad_norm": 0.49112037568722255, "learning_rate": 1.973715651135006e-05, "loss": 0.6284, "num_tokens": 280101724.0, "step": 2969 }, { "epoch": 0.5069124423963134, "grad_norm": 0.5196017705035091, "learning_rate": 1.9730329407748765e-05, "loss": 0.569, "num_tokens": 280182468.0, "step": 2970 }, { "epoch": 0.5070831199863458, "grad_norm": 0.48651490627590277, "learning_rate": 1.9723502304147465e-05, "loss": 0.6284, "num_tokens": 280283600.0, "step": 2971 }, { "epoch": 0.5072537975763782, "grad_norm": 0.4900399486625487, "learning_rate": 1.971667520054617e-05, "loss": 0.6125, "num_tokens": 280395572.0, "step": 2972 }, { "epoch": 0.5074244751664106, "grad_norm": 0.462590242455195, "learning_rate": 1.9709848096944873e-05, "loss": 0.5844, "num_tokens": 280499670.0, "step": 2973 }, { "epoch": 0.5075951527564431, "grad_norm": 0.46188075324290406, "learning_rate": 1.9703020993343576e-05, "loss": 0.6253, "num_tokens": 280610315.0, "step": 2974 }, { "epoch": 0.5077658303464755, "grad_norm": 0.4736056374948957, "learning_rate": 1.969619388974228e-05, "loss": 0.5839, "num_tokens": 280706345.0, "step": 2975 }, { "epoch": 0.5079365079365079, "grad_norm": 0.4848915550295775, "learning_rate": 1.968936678614098e-05, "loss": 0.5708, "num_tokens": 280806614.0, "step": 2976 }, { "epoch": 0.5081071855265403, "grad_norm": 0.48142681847192265, "learning_rate": 1.9682539682539684e-05, "loss": 0.6171, "num_tokens": 280900276.0, "step": 2977 }, { "epoch": 0.5082778631165727, "grad_norm": 0.48184087955603466, "learning_rate": 1.9675712578938385e-05, "loss": 0.7332, "num_tokens": 281018160.0, "step": 2978 }, { "epoch": 0.5084485407066052, "grad_norm": 0.5189285895755861, "learning_rate": 1.966888547533709e-05, "loss": 0.6189, "num_tokens": 281112355.0, "step": 2979 }, { "epoch": 0.5086192182966377, "grad_norm": 0.45114369378314184, "learning_rate": 1.9662058371735792e-05, "loss": 0.5742, "num_tokens": 281216604.0, "step": 2980 }, { "epoch": 0.5087898958866701, "grad_norm": 0.4778301970506635, "learning_rate": 1.9655231268134496e-05, "loss": 0.5944, "num_tokens": 281305562.0, "step": 2981 }, { "epoch": 0.5089605734767025, "grad_norm": 0.481077307398366, "learning_rate": 1.96484041645332e-05, "loss": 0.5897, "num_tokens": 281399791.0, "step": 2982 }, { "epoch": 0.509131251066735, "grad_norm": 0.48070077200139133, "learning_rate": 1.9641577060931903e-05, "loss": 0.5517, "num_tokens": 281489601.0, "step": 2983 }, { "epoch": 0.5093019286567674, "grad_norm": 0.4782524800222262, "learning_rate": 1.9634749957330604e-05, "loss": 0.6754, "num_tokens": 281596912.0, "step": 2984 }, { "epoch": 0.5094726062467998, "grad_norm": 0.4502046084842937, "learning_rate": 1.9627922853729308e-05, "loss": 0.6508, "num_tokens": 281714977.0, "step": 2985 }, { "epoch": 0.5096432838368322, "grad_norm": 0.47907545473856067, "learning_rate": 1.962109575012801e-05, "loss": 0.5671, "num_tokens": 281802206.0, "step": 2986 }, { "epoch": 0.5098139614268646, "grad_norm": 0.530366170950225, "learning_rate": 1.9614268646526712e-05, "loss": 0.5955, "num_tokens": 281889443.0, "step": 2987 }, { "epoch": 0.5099846390168971, "grad_norm": 0.4623370003924888, "learning_rate": 1.9607441542925415e-05, "loss": 0.5207, "num_tokens": 281984193.0, "step": 2988 }, { "epoch": 0.5101553166069295, "grad_norm": 0.49909173616891644, "learning_rate": 1.960061443932412e-05, "loss": 0.5945, "num_tokens": 282084342.0, "step": 2989 }, { "epoch": 0.5103259941969619, "grad_norm": 0.5179203294604595, "learning_rate": 1.959378733572282e-05, "loss": 0.5445, "num_tokens": 282151303.0, "step": 2990 }, { "epoch": 0.5104966717869943, "grad_norm": 0.49245418357466686, "learning_rate": 1.9586960232121523e-05, "loss": 0.6116, "num_tokens": 282248160.0, "step": 2991 }, { "epoch": 0.5106673493770268, "grad_norm": 0.4622715852388729, "learning_rate": 1.9580133128520227e-05, "loss": 0.5322, "num_tokens": 282341664.0, "step": 2992 }, { "epoch": 0.5108380269670593, "grad_norm": 0.5243470610776291, "learning_rate": 1.957330602491893e-05, "loss": 0.5974, "num_tokens": 282423621.0, "step": 2993 }, { "epoch": 0.5110087045570917, "grad_norm": 0.5556243636411453, "learning_rate": 1.9566478921317635e-05, "loss": 0.717, "num_tokens": 282505588.0, "step": 2994 }, { "epoch": 0.5111793821471241, "grad_norm": 0.5072434851303513, "learning_rate": 1.9559651817716335e-05, "loss": 0.573, "num_tokens": 282598864.0, "step": 2995 }, { "epoch": 0.5113500597371565, "grad_norm": 0.5225446207886094, "learning_rate": 1.955282471411504e-05, "loss": 0.6382, "num_tokens": 282689028.0, "step": 2996 }, { "epoch": 0.511520737327189, "grad_norm": 0.476109844287748, "learning_rate": 1.954599761051374e-05, "loss": 0.5917, "num_tokens": 282793863.0, "step": 2997 }, { "epoch": 0.5116914149172214, "grad_norm": 0.4360343762801639, "learning_rate": 1.9539170506912443e-05, "loss": 0.5952, "num_tokens": 282917345.0, "step": 2998 }, { "epoch": 0.5118620925072538, "grad_norm": 0.5629899474850296, "learning_rate": 1.9532343403311147e-05, "loss": 0.5938, "num_tokens": 283025443.0, "step": 2999 }, { "epoch": 0.5120327700972862, "grad_norm": 0.4827770436003431, "learning_rate": 1.952551629970985e-05, "loss": 0.6941, "num_tokens": 283140841.0, "step": 3000 }, { "epoch": 0.5122034476873186, "grad_norm": 0.4950193129650829, "learning_rate": 1.9518689196108554e-05, "loss": 0.604, "num_tokens": 283242724.0, "step": 3001 }, { "epoch": 0.512374125277351, "grad_norm": 0.538507416274736, "learning_rate": 1.9511862092507255e-05, "loss": 0.5611, "num_tokens": 283314086.0, "step": 3002 }, { "epoch": 0.5125448028673835, "grad_norm": 0.4902763877975039, "learning_rate": 1.950503498890596e-05, "loss": 0.5236, "num_tokens": 283394447.0, "step": 3003 }, { "epoch": 0.512715480457416, "grad_norm": 0.4808692674379869, "learning_rate": 1.9498207885304662e-05, "loss": 0.6275, "num_tokens": 283495255.0, "step": 3004 }, { "epoch": 0.5128861580474484, "grad_norm": 0.4524486935752449, "learning_rate": 1.9491380781703363e-05, "loss": 0.6077, "num_tokens": 283607115.0, "step": 3005 }, { "epoch": 0.5130568356374808, "grad_norm": 0.5013746394545435, "learning_rate": 1.9484553678102066e-05, "loss": 0.5317, "num_tokens": 283685570.0, "step": 3006 }, { "epoch": 0.5132275132275133, "grad_norm": 0.4696492811067798, "learning_rate": 1.947772657450077e-05, "loss": 0.537, "num_tokens": 283790425.0, "step": 3007 }, { "epoch": 0.5133981908175457, "grad_norm": 0.5181201771383211, "learning_rate": 1.947089947089947e-05, "loss": 0.5493, "num_tokens": 283864698.0, "step": 3008 }, { "epoch": 0.5135688684075781, "grad_norm": 0.4877515465542224, "learning_rate": 1.9464072367298174e-05, "loss": 0.6028, "num_tokens": 283960900.0, "step": 3009 }, { "epoch": 0.5137395459976105, "grad_norm": 0.5027771829819515, "learning_rate": 1.9457245263696878e-05, "loss": 0.5916, "num_tokens": 284055415.0, "step": 3010 }, { "epoch": 0.5139102235876429, "grad_norm": 0.48986552585127024, "learning_rate": 1.9450418160095582e-05, "loss": 0.5521, "num_tokens": 284137168.0, "step": 3011 }, { "epoch": 0.5140809011776754, "grad_norm": 0.5119016201134485, "learning_rate": 1.9443591056494285e-05, "loss": 0.5387, "num_tokens": 284212400.0, "step": 3012 }, { "epoch": 0.5142515787677078, "grad_norm": 0.4899372920087474, "learning_rate": 1.9436763952892986e-05, "loss": 0.5631, "num_tokens": 284299926.0, "step": 3013 }, { "epoch": 0.5144222563577402, "grad_norm": 0.4627070474435737, "learning_rate": 1.942993684929169e-05, "loss": 0.5282, "num_tokens": 284390237.0, "step": 3014 }, { "epoch": 0.5145929339477726, "grad_norm": 0.45876659524433944, "learning_rate": 1.942310974569039e-05, "loss": 0.5867, "num_tokens": 284495904.0, "step": 3015 }, { "epoch": 0.514763611537805, "grad_norm": 0.4531856732068326, "learning_rate": 1.9416282642089094e-05, "loss": 0.5674, "num_tokens": 284599765.0, "step": 3016 }, { "epoch": 0.5149342891278376, "grad_norm": 0.47073773617801373, "learning_rate": 1.9409455538487798e-05, "loss": 0.6272, "num_tokens": 284704393.0, "step": 3017 }, { "epoch": 0.51510496671787, "grad_norm": 0.5556281196176842, "learning_rate": 1.94026284348865e-05, "loss": 0.6858, "num_tokens": 284831806.0, "step": 3018 }, { "epoch": 0.5152756443079024, "grad_norm": 0.4636423093459759, "learning_rate": 1.9395801331285205e-05, "loss": 0.5445, "num_tokens": 284925750.0, "step": 3019 }, { "epoch": 0.5154463218979348, "grad_norm": 0.5172454843211848, "learning_rate": 1.938897422768391e-05, "loss": 0.5603, "num_tokens": 285002267.0, "step": 3020 }, { "epoch": 0.5156169994879672, "grad_norm": 0.5217880374596379, "learning_rate": 1.938214712408261e-05, "loss": 0.6191, "num_tokens": 285094178.0, "step": 3021 }, { "epoch": 0.5157876770779997, "grad_norm": 0.48150729983089013, "learning_rate": 1.9375320020481313e-05, "loss": 0.5423, "num_tokens": 285194975.0, "step": 3022 }, { "epoch": 0.5159583546680321, "grad_norm": 0.5104719614188584, "learning_rate": 1.9368492916880017e-05, "loss": 0.6351, "num_tokens": 285279276.0, "step": 3023 }, { "epoch": 0.5161290322580645, "grad_norm": 0.4876665873192858, "learning_rate": 1.9361665813278717e-05, "loss": 0.5916, "num_tokens": 285369027.0, "step": 3024 }, { "epoch": 0.5162997098480969, "grad_norm": 0.49809699138444763, "learning_rate": 1.935483870967742e-05, "loss": 0.6036, "num_tokens": 285465401.0, "step": 3025 }, { "epoch": 0.5164703874381293, "grad_norm": 0.4816305692056345, "learning_rate": 1.9348011606076125e-05, "loss": 0.6263, "num_tokens": 285567129.0, "step": 3026 }, { "epoch": 0.5166410650281618, "grad_norm": 0.5046099856709171, "learning_rate": 1.9341184502474825e-05, "loss": 0.6389, "num_tokens": 285659715.0, "step": 3027 }, { "epoch": 0.5168117426181942, "grad_norm": 0.5020514058115976, "learning_rate": 1.933435739887353e-05, "loss": 0.501, "num_tokens": 285732251.0, "step": 3028 }, { "epoch": 0.5169824202082267, "grad_norm": 0.511957456816222, "learning_rate": 1.9327530295272232e-05, "loss": 0.5947, "num_tokens": 285813118.0, "step": 3029 }, { "epoch": 0.5171530977982591, "grad_norm": 0.48154322659100113, "learning_rate": 1.9320703191670936e-05, "loss": 0.5768, "num_tokens": 285911107.0, "step": 3030 }, { "epoch": 0.5173237753882916, "grad_norm": 0.5167740776492398, "learning_rate": 1.931387608806964e-05, "loss": 0.6153, "num_tokens": 286000637.0, "step": 3031 }, { "epoch": 0.517494452978324, "grad_norm": 0.5226636203175864, "learning_rate": 1.930704898446834e-05, "loss": 0.565, "num_tokens": 286078744.0, "step": 3032 }, { "epoch": 0.5176651305683564, "grad_norm": 0.5882399039665996, "learning_rate": 1.9300221880867044e-05, "loss": 0.5891, "num_tokens": 286141377.0, "step": 3033 }, { "epoch": 0.5178358081583888, "grad_norm": 0.4848101422904708, "learning_rate": 1.9293394777265745e-05, "loss": 0.6931, "num_tokens": 286251333.0, "step": 3034 }, { "epoch": 0.5180064857484212, "grad_norm": 0.5328465917443551, "learning_rate": 1.9286567673664448e-05, "loss": 0.5384, "num_tokens": 286321604.0, "step": 3035 }, { "epoch": 0.5181771633384537, "grad_norm": 0.5044144193585542, "learning_rate": 1.9279740570063152e-05, "loss": 0.6092, "num_tokens": 286410800.0, "step": 3036 }, { "epoch": 0.5183478409284861, "grad_norm": 0.5054690928006791, "learning_rate": 1.9272913466461856e-05, "loss": 0.5397, "num_tokens": 286487176.0, "step": 3037 }, { "epoch": 0.5185185185185185, "grad_norm": 0.4813520903445108, "learning_rate": 1.926608636286056e-05, "loss": 0.5792, "num_tokens": 286575659.0, "step": 3038 }, { "epoch": 0.5186891961085509, "grad_norm": 0.5077837053874465, "learning_rate": 1.925925925925926e-05, "loss": 0.6257, "num_tokens": 286672233.0, "step": 3039 }, { "epoch": 0.5188598736985833, "grad_norm": 0.48236979674324726, "learning_rate": 1.9252432155657964e-05, "loss": 0.6064, "num_tokens": 286768634.0, "step": 3040 }, { "epoch": 0.5190305512886159, "grad_norm": 0.5495224752518717, "learning_rate": 1.9245605052056667e-05, "loss": 0.6363, "num_tokens": 286844411.0, "step": 3041 }, { "epoch": 0.5192012288786483, "grad_norm": 0.48521363641370185, "learning_rate": 1.9238777948455368e-05, "loss": 0.6568, "num_tokens": 286941807.0, "step": 3042 }, { "epoch": 0.5193719064686807, "grad_norm": 1.5228632554393573, "learning_rate": 1.923195084485407e-05, "loss": 0.6466, "num_tokens": 287036506.0, "step": 3043 }, { "epoch": 0.5195425840587131, "grad_norm": 0.5101526111811319, "learning_rate": 1.9225123741252775e-05, "loss": 0.6649, "num_tokens": 287131012.0, "step": 3044 }, { "epoch": 0.5197132616487455, "grad_norm": 0.4986510648102983, "learning_rate": 1.921829663765148e-05, "loss": 0.6099, "num_tokens": 287221035.0, "step": 3045 }, { "epoch": 0.519883939238778, "grad_norm": 0.5548382687774132, "learning_rate": 1.921146953405018e-05, "loss": 0.6217, "num_tokens": 287292326.0, "step": 3046 }, { "epoch": 0.5200546168288104, "grad_norm": 0.4554967398245753, "learning_rate": 1.9204642430448883e-05, "loss": 0.6382, "num_tokens": 287413783.0, "step": 3047 }, { "epoch": 0.5202252944188428, "grad_norm": 0.4322202331786147, "learning_rate": 1.9197815326847587e-05, "loss": 0.5768, "num_tokens": 287531619.0, "step": 3048 }, { "epoch": 0.5203959720088752, "grad_norm": 0.44901753249154897, "learning_rate": 1.919098822324629e-05, "loss": 0.5493, "num_tokens": 287637645.0, "step": 3049 }, { "epoch": 0.5205666495989076, "grad_norm": 0.5211045382420716, "learning_rate": 1.918416111964499e-05, "loss": 0.6247, "num_tokens": 287734817.0, "step": 3050 }, { "epoch": 0.5207373271889401, "grad_norm": 0.4886424431346369, "learning_rate": 1.9177334016043695e-05, "loss": 0.5002, "num_tokens": 287818165.0, "step": 3051 }, { "epoch": 0.5209080047789725, "grad_norm": 0.48465132078631906, "learning_rate": 1.91705069124424e-05, "loss": 0.6078, "num_tokens": 287920763.0, "step": 3052 }, { "epoch": 0.5210786823690049, "grad_norm": 0.5124813734123707, "learning_rate": 1.91636798088411e-05, "loss": 0.5601, "num_tokens": 287994739.0, "step": 3053 }, { "epoch": 0.5212493599590374, "grad_norm": 0.454315701155547, "learning_rate": 1.9156852705239803e-05, "loss": 0.6836, "num_tokens": 288129005.0, "step": 3054 }, { "epoch": 0.5214200375490698, "grad_norm": 0.4932409259681437, "learning_rate": 1.9150025601638507e-05, "loss": 0.5367, "num_tokens": 288207646.0, "step": 3055 }, { "epoch": 0.5215907151391023, "grad_norm": 0.45861993920417854, "learning_rate": 1.914319849803721e-05, "loss": 0.5717, "num_tokens": 288310646.0, "step": 3056 }, { "epoch": 0.5217613927291347, "grad_norm": 0.5059191566207744, "learning_rate": 1.9136371394435914e-05, "loss": 0.5841, "num_tokens": 288386143.0, "step": 3057 }, { "epoch": 0.5219320703191671, "grad_norm": 0.48893187170993, "learning_rate": 1.9129544290834615e-05, "loss": 0.6688, "num_tokens": 288507418.0, "step": 3058 }, { "epoch": 0.5221027479091995, "grad_norm": 0.48248134982254554, "learning_rate": 1.9122717187233318e-05, "loss": 0.5296, "num_tokens": 288593725.0, "step": 3059 }, { "epoch": 0.522273425499232, "grad_norm": 0.4290723430703719, "learning_rate": 1.9115890083632022e-05, "loss": 0.572, "num_tokens": 288711237.0, "step": 3060 }, { "epoch": 0.5224441030892644, "grad_norm": 0.5100046363756213, "learning_rate": 1.9109062980030722e-05, "loss": 0.6185, "num_tokens": 288799765.0, "step": 3061 }, { "epoch": 0.5226147806792968, "grad_norm": 2.1172531965142025, "learning_rate": 1.9102235876429426e-05, "loss": 0.6748, "num_tokens": 288881655.0, "step": 3062 }, { "epoch": 0.5227854582693292, "grad_norm": 0.4391269532085232, "learning_rate": 1.909540877282813e-05, "loss": 0.544, "num_tokens": 288989117.0, "step": 3063 }, { "epoch": 0.5229561358593616, "grad_norm": 0.4888247967699203, "learning_rate": 1.908858166922683e-05, "loss": 0.6287, "num_tokens": 289080247.0, "step": 3064 }, { "epoch": 0.523126813449394, "grad_norm": 0.4724934015259245, "learning_rate": 1.9081754565625534e-05, "loss": 0.5505, "num_tokens": 289167228.0, "step": 3065 }, { "epoch": 0.5232974910394266, "grad_norm": 0.52857593254418, "learning_rate": 1.9074927462024238e-05, "loss": 0.5131, "num_tokens": 289231423.0, "step": 3066 }, { "epoch": 0.523468168629459, "grad_norm": 0.5204026469098294, "learning_rate": 1.906810035842294e-05, "loss": 0.5975, "num_tokens": 289309694.0, "step": 3067 }, { "epoch": 0.5236388462194914, "grad_norm": 0.5199542356497531, "learning_rate": 1.9061273254821645e-05, "loss": 0.6471, "num_tokens": 289397751.0, "step": 3068 }, { "epoch": 0.5238095238095238, "grad_norm": 1.524366310335856, "learning_rate": 1.9054446151220346e-05, "loss": 0.5984, "num_tokens": 289481609.0, "step": 3069 }, { "epoch": 0.5239802013995563, "grad_norm": 0.508134895841481, "learning_rate": 1.904761904761905e-05, "loss": 0.5914, "num_tokens": 289564347.0, "step": 3070 }, { "epoch": 0.5241508789895887, "grad_norm": 0.454723155153608, "learning_rate": 1.904079194401775e-05, "loss": 0.5585, "num_tokens": 289664958.0, "step": 3071 }, { "epoch": 0.5243215565796211, "grad_norm": 0.4951123133062021, "learning_rate": 1.9033964840416454e-05, "loss": 0.6587, "num_tokens": 289772482.0, "step": 3072 }, { "epoch": 0.5244922341696535, "grad_norm": 0.5306634005039207, "learning_rate": 1.9027137736815157e-05, "loss": 0.5695, "num_tokens": 289872338.0, "step": 3073 }, { "epoch": 0.5246629117596859, "grad_norm": 0.44941872366850255, "learning_rate": 1.902031063321386e-05, "loss": 0.5187, "num_tokens": 289971375.0, "step": 3074 }, { "epoch": 0.5248335893497184, "grad_norm": 0.5033283957777007, "learning_rate": 1.9013483529612565e-05, "loss": 0.653, "num_tokens": 290058980.0, "step": 3075 }, { "epoch": 0.5250042669397508, "grad_norm": 0.4634991004334279, "learning_rate": 1.900665642601127e-05, "loss": 0.6152, "num_tokens": 290172457.0, "step": 3076 }, { "epoch": 0.5251749445297832, "grad_norm": 0.7397813267490337, "learning_rate": 1.899982932240997e-05, "loss": 0.5889, "num_tokens": 290248559.0, "step": 3077 }, { "epoch": 0.5253456221198156, "grad_norm": 0.4545408132966553, "learning_rate": 1.8993002218808673e-05, "loss": 0.5189, "num_tokens": 290353084.0, "step": 3078 }, { "epoch": 0.5255162997098481, "grad_norm": 0.5337862846065333, "learning_rate": 1.8986175115207373e-05, "loss": 0.5693, "num_tokens": 290426274.0, "step": 3079 }, { "epoch": 0.5256869772998806, "grad_norm": 0.5072801236477027, "learning_rate": 1.8979348011606077e-05, "loss": 0.626, "num_tokens": 290520308.0, "step": 3080 }, { "epoch": 0.525857654889913, "grad_norm": 0.6846596353320044, "learning_rate": 1.897252090800478e-05, "loss": 0.681, "num_tokens": 290631274.0, "step": 3081 }, { "epoch": 0.5260283324799454, "grad_norm": 0.46940235925860574, "learning_rate": 1.8965693804403484e-05, "loss": 0.5944, "num_tokens": 290731753.0, "step": 3082 }, { "epoch": 0.5261990100699778, "grad_norm": 0.4581819830385618, "learning_rate": 1.8958866700802185e-05, "loss": 0.6281, "num_tokens": 290840533.0, "step": 3083 }, { "epoch": 0.5263696876600102, "grad_norm": 0.5234861313459777, "learning_rate": 1.895203959720089e-05, "loss": 0.6172, "num_tokens": 290924941.0, "step": 3084 }, { "epoch": 0.5265403652500427, "grad_norm": 0.5443188455210963, "learning_rate": 1.8945212493599592e-05, "loss": 0.5897, "num_tokens": 291012116.0, "step": 3085 }, { "epoch": 0.5267110428400751, "grad_norm": 0.49991854968020083, "learning_rate": 1.8938385389998296e-05, "loss": 0.6558, "num_tokens": 291104819.0, "step": 3086 }, { "epoch": 0.5268817204301075, "grad_norm": 0.49915325354140133, "learning_rate": 1.8931558286396997e-05, "loss": 0.5598, "num_tokens": 291187979.0, "step": 3087 }, { "epoch": 0.5270523980201399, "grad_norm": 0.5816091477979147, "learning_rate": 1.89247311827957e-05, "loss": 0.6216, "num_tokens": 291286621.0, "step": 3088 }, { "epoch": 0.5272230756101723, "grad_norm": 0.47255607851583553, "learning_rate": 1.8917904079194404e-05, "loss": 0.5635, "num_tokens": 291379406.0, "step": 3089 }, { "epoch": 0.5273937532002048, "grad_norm": 0.4684916338653572, "learning_rate": 1.8911076975593104e-05, "loss": 0.5881, "num_tokens": 291474745.0, "step": 3090 }, { "epoch": 0.5275644307902373, "grad_norm": 0.5866435318981604, "learning_rate": 1.8904249871991808e-05, "loss": 0.5629, "num_tokens": 291537098.0, "step": 3091 }, { "epoch": 0.5277351083802697, "grad_norm": 0.48887748506146694, "learning_rate": 1.8897422768390512e-05, "loss": 0.539, "num_tokens": 291624421.0, "step": 3092 }, { "epoch": 0.5279057859703021, "grad_norm": 0.4769655046784254, "learning_rate": 1.8890595664789216e-05, "loss": 0.5615, "num_tokens": 291714728.0, "step": 3093 }, { "epoch": 0.5280764635603346, "grad_norm": 0.45306263899742144, "learning_rate": 1.888376856118792e-05, "loss": 0.5344, "num_tokens": 291811401.0, "step": 3094 }, { "epoch": 0.528247141150367, "grad_norm": 0.4718968143085379, "learning_rate": 1.887694145758662e-05, "loss": 0.6405, "num_tokens": 291922253.0, "step": 3095 }, { "epoch": 0.5284178187403994, "grad_norm": 0.49293646928296436, "learning_rate": 1.8870114353985324e-05, "loss": 0.6171, "num_tokens": 292015737.0, "step": 3096 }, { "epoch": 0.5285884963304318, "grad_norm": 0.4759670854501478, "learning_rate": 1.8863287250384027e-05, "loss": 0.5566, "num_tokens": 292103536.0, "step": 3097 }, { "epoch": 0.5287591739204642, "grad_norm": 0.49465844830277833, "learning_rate": 1.8856460146782728e-05, "loss": 0.6606, "num_tokens": 292214865.0, "step": 3098 }, { "epoch": 0.5289298515104967, "grad_norm": 0.5784176835781271, "learning_rate": 1.884963304318143e-05, "loss": 0.6092, "num_tokens": 292295494.0, "step": 3099 }, { "epoch": 0.5291005291005291, "grad_norm": 0.46785602039302854, "learning_rate": 1.8842805939580135e-05, "loss": 0.6332, "num_tokens": 292406006.0, "step": 3100 }, { "epoch": 0.5292712066905615, "grad_norm": 0.5076115154766822, "learning_rate": 1.8835978835978836e-05, "loss": 0.6072, "num_tokens": 292500133.0, "step": 3101 }, { "epoch": 0.5294418842805939, "grad_norm": 0.48243160609350183, "learning_rate": 1.882915173237754e-05, "loss": 0.5725, "num_tokens": 292589253.0, "step": 3102 }, { "epoch": 0.5296125618706264, "grad_norm": 0.4937433957404967, "learning_rate": 1.8822324628776243e-05, "loss": 0.6808, "num_tokens": 292703622.0, "step": 3103 }, { "epoch": 0.5297832394606589, "grad_norm": 0.5034398626535053, "learning_rate": 1.8815497525174947e-05, "loss": 0.5689, "num_tokens": 292798827.0, "step": 3104 }, { "epoch": 0.5299539170506913, "grad_norm": 0.4444816337100842, "learning_rate": 1.880867042157365e-05, "loss": 0.6595, "num_tokens": 292935380.0, "step": 3105 }, { "epoch": 0.5301245946407237, "grad_norm": 0.46148661851400324, "learning_rate": 1.880184331797235e-05, "loss": 0.6197, "num_tokens": 293043197.0, "step": 3106 }, { "epoch": 0.5302952722307561, "grad_norm": 0.4946595869764554, "learning_rate": 1.8795016214371055e-05, "loss": 0.5729, "num_tokens": 293132361.0, "step": 3107 }, { "epoch": 0.5304659498207885, "grad_norm": 0.4645059581582265, "learning_rate": 1.8788189110769755e-05, "loss": 0.5804, "num_tokens": 293230618.0, "step": 3108 }, { "epoch": 0.530636627410821, "grad_norm": 0.5138792024133612, "learning_rate": 1.878136200716846e-05, "loss": 0.6102, "num_tokens": 293322605.0, "step": 3109 }, { "epoch": 0.5308073050008534, "grad_norm": 0.4717327697724768, "learning_rate": 1.8774534903567163e-05, "loss": 0.5821, "num_tokens": 293424794.0, "step": 3110 }, { "epoch": 0.5309779825908858, "grad_norm": 0.5472640720404892, "learning_rate": 1.8767707799965867e-05, "loss": 0.5302, "num_tokens": 293492098.0, "step": 3111 }, { "epoch": 0.5311486601809182, "grad_norm": 0.502409823649258, "learning_rate": 1.876088069636457e-05, "loss": 0.5663, "num_tokens": 293573506.0, "step": 3112 }, { "epoch": 0.5313193377709506, "grad_norm": 0.5583028272867726, "learning_rate": 1.8754053592763274e-05, "loss": 0.6321, "num_tokens": 293649862.0, "step": 3113 }, { "epoch": 0.5314900153609831, "grad_norm": 0.4919084966524508, "learning_rate": 1.8747226489161974e-05, "loss": 0.6158, "num_tokens": 293738238.0, "step": 3114 }, { "epoch": 0.5316606929510155, "grad_norm": 0.44877547497738846, "learning_rate": 1.8740399385560678e-05, "loss": 0.5219, "num_tokens": 293838737.0, "step": 3115 }, { "epoch": 0.531831370541048, "grad_norm": 0.5091816078784778, "learning_rate": 1.873357228195938e-05, "loss": 0.6747, "num_tokens": 293931223.0, "step": 3116 }, { "epoch": 0.5320020481310804, "grad_norm": 0.513770049372497, "learning_rate": 1.8726745178358082e-05, "loss": 0.518, "num_tokens": 294002444.0, "step": 3117 }, { "epoch": 0.5321727257211128, "grad_norm": 0.5144762648658163, "learning_rate": 1.8719918074756786e-05, "loss": 0.6404, "num_tokens": 294083709.0, "step": 3118 }, { "epoch": 0.5323434033111453, "grad_norm": 0.47123935885642715, "learning_rate": 1.871309097115549e-05, "loss": 0.606, "num_tokens": 294177381.0, "step": 3119 }, { "epoch": 0.5325140809011777, "grad_norm": 0.5205291858748078, "learning_rate": 1.870626386755419e-05, "loss": 0.6072, "num_tokens": 294257440.0, "step": 3120 }, { "epoch": 0.5326847584912101, "grad_norm": 0.46569845528236886, "learning_rate": 1.8699436763952894e-05, "loss": 0.5659, "num_tokens": 294356959.0, "step": 3121 }, { "epoch": 0.5328554360812425, "grad_norm": 0.4477377960003955, "learning_rate": 1.8692609660351598e-05, "loss": 0.5357, "num_tokens": 294460069.0, "step": 3122 }, { "epoch": 0.533026113671275, "grad_norm": 0.46100425163723446, "learning_rate": 1.86857825567503e-05, "loss": 0.6119, "num_tokens": 294567585.0, "step": 3123 }, { "epoch": 0.5331967912613074, "grad_norm": 0.5375254138913531, "learning_rate": 1.8678955453149005e-05, "loss": 0.6738, "num_tokens": 294652175.0, "step": 3124 }, { "epoch": 0.5333674688513398, "grad_norm": 0.4696762081092029, "learning_rate": 1.8672128349547706e-05, "loss": 0.5812, "num_tokens": 294753375.0, "step": 3125 }, { "epoch": 0.5335381464413722, "grad_norm": 0.5569104416803978, "learning_rate": 1.866530124594641e-05, "loss": 0.6261, "num_tokens": 294829117.0, "step": 3126 }, { "epoch": 0.5337088240314046, "grad_norm": 0.47907533028386085, "learning_rate": 1.865847414234511e-05, "loss": 0.5961, "num_tokens": 294928559.0, "step": 3127 }, { "epoch": 0.5338795016214372, "grad_norm": 0.4871309903816328, "learning_rate": 1.8651647038743814e-05, "loss": 0.63, "num_tokens": 295031030.0, "step": 3128 }, { "epoch": 0.5340501792114696, "grad_norm": 0.498340397998727, "learning_rate": 1.8644819935142517e-05, "loss": 0.5985, "num_tokens": 295124358.0, "step": 3129 }, { "epoch": 0.534220856801502, "grad_norm": 0.4677177820403074, "learning_rate": 1.863799283154122e-05, "loss": 0.6105, "num_tokens": 295235538.0, "step": 3130 }, { "epoch": 0.5343915343915344, "grad_norm": 0.44278885921496336, "learning_rate": 1.8631165727939925e-05, "loss": 0.6469, "num_tokens": 295352064.0, "step": 3131 }, { "epoch": 0.5345622119815668, "grad_norm": 0.4997947680496291, "learning_rate": 1.8624338624338625e-05, "loss": 0.6643, "num_tokens": 295458037.0, "step": 3132 }, { "epoch": 0.5347328895715993, "grad_norm": 0.47983147106104573, "learning_rate": 1.861751152073733e-05, "loss": 0.5743, "num_tokens": 295558378.0, "step": 3133 }, { "epoch": 0.5349035671616317, "grad_norm": 0.5742657413633274, "learning_rate": 1.8610684417136033e-05, "loss": 0.5877, "num_tokens": 295642216.0, "step": 3134 }, { "epoch": 0.5350742447516641, "grad_norm": 0.5512940575987569, "learning_rate": 1.8603857313534733e-05, "loss": 0.546, "num_tokens": 295703948.0, "step": 3135 }, { "epoch": 0.5352449223416965, "grad_norm": 0.4946730163318817, "learning_rate": 1.8597030209933437e-05, "loss": 0.5631, "num_tokens": 295790389.0, "step": 3136 }, { "epoch": 0.5354155999317289, "grad_norm": 0.4887477887396777, "learning_rate": 1.859020310633214e-05, "loss": 0.5084, "num_tokens": 295868277.0, "step": 3137 }, { "epoch": 0.5355862775217614, "grad_norm": 0.4939331405656355, "learning_rate": 1.858337600273084e-05, "loss": 0.6121, "num_tokens": 295960032.0, "step": 3138 }, { "epoch": 0.5357569551117938, "grad_norm": 0.44332207046075983, "learning_rate": 1.8576548899129545e-05, "loss": 0.5278, "num_tokens": 296064609.0, "step": 3139 }, { "epoch": 0.5359276327018262, "grad_norm": 0.4716402345665519, "learning_rate": 1.856972179552825e-05, "loss": 0.5443, "num_tokens": 296153273.0, "step": 3140 }, { "epoch": 0.5360983102918587, "grad_norm": 0.7331001234109966, "learning_rate": 1.8562894691926952e-05, "loss": 0.6505, "num_tokens": 296239939.0, "step": 3141 }, { "epoch": 0.5362689878818911, "grad_norm": 0.6020887696965216, "learning_rate": 1.8556067588325656e-05, "loss": 0.5832, "num_tokens": 296297131.0, "step": 3142 }, { "epoch": 0.5364396654719236, "grad_norm": 0.5014675787987937, "learning_rate": 1.8549240484724356e-05, "loss": 0.677, "num_tokens": 296398145.0, "step": 3143 }, { "epoch": 0.536610343061956, "grad_norm": 0.4963523187021656, "learning_rate": 1.854241338112306e-05, "loss": 0.5831, "num_tokens": 296486703.0, "step": 3144 }, { "epoch": 0.5367810206519884, "grad_norm": 0.5281739579824689, "learning_rate": 1.853558627752176e-05, "loss": 0.4828, "num_tokens": 296552110.0, "step": 3145 }, { "epoch": 0.5369516982420208, "grad_norm": 0.4760106425177943, "learning_rate": 1.8528759173920464e-05, "loss": 0.5683, "num_tokens": 296647754.0, "step": 3146 }, { "epoch": 0.5371223758320532, "grad_norm": 0.5295359706150942, "learning_rate": 1.8521932070319168e-05, "loss": 0.5745, "num_tokens": 296714173.0, "step": 3147 }, { "epoch": 0.5372930534220857, "grad_norm": 0.4925007048696446, "learning_rate": 1.8515104966717872e-05, "loss": 0.6572, "num_tokens": 296811929.0, "step": 3148 }, { "epoch": 0.5374637310121181, "grad_norm": 0.5014920664614139, "learning_rate": 1.8508277863116576e-05, "loss": 0.5448, "num_tokens": 296889693.0, "step": 3149 }, { "epoch": 0.5376344086021505, "grad_norm": 0.509856093477438, "learning_rate": 1.850145075951528e-05, "loss": 0.5221, "num_tokens": 296970419.0, "step": 3150 }, { "epoch": 0.5378050861921829, "grad_norm": 0.4859194339045163, "learning_rate": 1.849462365591398e-05, "loss": 0.5701, "num_tokens": 297058182.0, "step": 3151 }, { "epoch": 0.5379757637822153, "grad_norm": 0.4260823101325428, "learning_rate": 1.8487796552312684e-05, "loss": 0.5962, "num_tokens": 297183460.0, "step": 3152 }, { "epoch": 0.5381464413722479, "grad_norm": 0.5117070554062627, "learning_rate": 1.8480969448711384e-05, "loss": 0.5743, "num_tokens": 297271913.0, "step": 3153 }, { "epoch": 0.5383171189622803, "grad_norm": 0.5398525168197303, "learning_rate": 1.8474142345110088e-05, "loss": 0.5196, "num_tokens": 297335633.0, "step": 3154 }, { "epoch": 0.5384877965523127, "grad_norm": 0.4495771082418198, "learning_rate": 1.846731524150879e-05, "loss": 0.5624, "num_tokens": 297458953.0, "step": 3155 }, { "epoch": 0.5386584741423451, "grad_norm": 0.5095522098206506, "learning_rate": 1.8460488137907495e-05, "loss": 0.5312, "num_tokens": 297532331.0, "step": 3156 }, { "epoch": 0.5388291517323776, "grad_norm": 0.5091703545242908, "learning_rate": 1.8453661034306196e-05, "loss": 0.5776, "num_tokens": 297608284.0, "step": 3157 }, { "epoch": 0.53899982932241, "grad_norm": 0.48677995448589406, "learning_rate": 1.84468339307049e-05, "loss": 0.6223, "num_tokens": 297701487.0, "step": 3158 }, { "epoch": 0.5391705069124424, "grad_norm": 0.5128182744358407, "learning_rate": 1.8440006827103603e-05, "loss": 0.5801, "num_tokens": 297791443.0, "step": 3159 }, { "epoch": 0.5393411845024748, "grad_norm": 0.4769646135195715, "learning_rate": 1.8433179723502307e-05, "loss": 0.5782, "num_tokens": 297888605.0, "step": 3160 }, { "epoch": 0.5395118620925072, "grad_norm": 0.4679065628152036, "learning_rate": 1.842635261990101e-05, "loss": 0.5389, "num_tokens": 297984545.0, "step": 3161 }, { "epoch": 0.5396825396825397, "grad_norm": 0.47102394877596043, "learning_rate": 1.841952551629971e-05, "loss": 0.6432, "num_tokens": 298096207.0, "step": 3162 }, { "epoch": 0.5398532172725721, "grad_norm": 0.4811281947811005, "learning_rate": 1.8412698412698415e-05, "loss": 0.5351, "num_tokens": 298191476.0, "step": 3163 }, { "epoch": 0.5400238948626045, "grad_norm": 0.4801625711916469, "learning_rate": 1.8405871309097115e-05, "loss": 0.5945, "num_tokens": 298286416.0, "step": 3164 }, { "epoch": 0.540194572452637, "grad_norm": 0.49265939682546595, "learning_rate": 1.839904420549582e-05, "loss": 0.6251, "num_tokens": 298386764.0, "step": 3165 }, { "epoch": 0.5403652500426694, "grad_norm": 0.5806949875090246, "learning_rate": 1.8392217101894523e-05, "loss": 0.5684, "num_tokens": 298446944.0, "step": 3166 }, { "epoch": 0.5405359276327019, "grad_norm": 0.5194972616554627, "learning_rate": 1.8385389998293226e-05, "loss": 0.5994, "num_tokens": 298551130.0, "step": 3167 }, { "epoch": 0.5407066052227343, "grad_norm": 0.462951292469605, "learning_rate": 1.837856289469193e-05, "loss": 0.6287, "num_tokens": 298667621.0, "step": 3168 }, { "epoch": 0.5408772828127667, "grad_norm": 0.5481396978317484, "learning_rate": 1.837173579109063e-05, "loss": 0.7019, "num_tokens": 298787302.0, "step": 3169 }, { "epoch": 0.5410479604027991, "grad_norm": 0.4514213527132757, "learning_rate": 1.8364908687489334e-05, "loss": 0.6121, "num_tokens": 298900323.0, "step": 3170 }, { "epoch": 0.5412186379928315, "grad_norm": 0.4739895635184822, "learning_rate": 1.8358081583888038e-05, "loss": 0.598, "num_tokens": 299000916.0, "step": 3171 }, { "epoch": 0.541389315582864, "grad_norm": 0.5373048455451247, "learning_rate": 1.835125448028674e-05, "loss": 0.6256, "num_tokens": 299091810.0, "step": 3172 }, { "epoch": 0.5415599931728964, "grad_norm": 0.49565904091367974, "learning_rate": 1.8344427376685442e-05, "loss": 0.6069, "num_tokens": 299184629.0, "step": 3173 }, { "epoch": 0.5417306707629288, "grad_norm": 0.5678050071708751, "learning_rate": 1.8337600273084146e-05, "loss": 0.6119, "num_tokens": 299256201.0, "step": 3174 }, { "epoch": 0.5419013483529612, "grad_norm": 0.5493665813661777, "learning_rate": 1.833077316948285e-05, "loss": 0.6038, "num_tokens": 299342267.0, "step": 3175 }, { "epoch": 0.5420720259429936, "grad_norm": 0.6061777124639972, "learning_rate": 1.832394606588155e-05, "loss": 0.6665, "num_tokens": 299402288.0, "step": 3176 }, { "epoch": 0.5422427035330261, "grad_norm": 0.4575587146131139, "learning_rate": 1.8317118962280254e-05, "loss": 0.511, "num_tokens": 299495967.0, "step": 3177 }, { "epoch": 0.5424133811230586, "grad_norm": 0.47335441681094187, "learning_rate": 1.8310291858678958e-05, "loss": 0.5521, "num_tokens": 299589313.0, "step": 3178 }, { "epoch": 0.542584058713091, "grad_norm": 0.5080892885533668, "learning_rate": 1.830346475507766e-05, "loss": 0.5423, "num_tokens": 299674199.0, "step": 3179 }, { "epoch": 0.5427547363031234, "grad_norm": 0.47616754706253833, "learning_rate": 1.8296637651476362e-05, "loss": 0.5814, "num_tokens": 299768568.0, "step": 3180 }, { "epoch": 0.5429254138931559, "grad_norm": 0.5268873533757173, "learning_rate": 1.8289810547875066e-05, "loss": 0.623, "num_tokens": 299883966.0, "step": 3181 }, { "epoch": 0.5430960914831883, "grad_norm": 0.5347463351585031, "learning_rate": 1.8282983444273766e-05, "loss": 0.7061, "num_tokens": 299973541.0, "step": 3182 }, { "epoch": 0.5432667690732207, "grad_norm": 0.4666365118649967, "learning_rate": 1.827615634067247e-05, "loss": 0.5217, "num_tokens": 300059735.0, "step": 3183 }, { "epoch": 0.5434374466632531, "grad_norm": 0.6678722340013037, "learning_rate": 1.8269329237071173e-05, "loss": 0.6449, "num_tokens": 300128767.0, "step": 3184 }, { "epoch": 0.5436081242532855, "grad_norm": 0.4778818539275673, "learning_rate": 1.8262502133469877e-05, "loss": 0.5767, "num_tokens": 300233167.0, "step": 3185 }, { "epoch": 0.543778801843318, "grad_norm": 0.47773791275368616, "learning_rate": 1.825567502986858e-05, "loss": 0.6249, "num_tokens": 300340634.0, "step": 3186 }, { "epoch": 0.5439494794333504, "grad_norm": 0.48543969441675605, "learning_rate": 1.8248847926267285e-05, "loss": 0.6437, "num_tokens": 300441825.0, "step": 3187 }, { "epoch": 0.5441201570233828, "grad_norm": 0.518559513130123, "learning_rate": 1.8242020822665985e-05, "loss": 0.6081, "num_tokens": 300519321.0, "step": 3188 }, { "epoch": 0.5442908346134152, "grad_norm": 0.4984595262484238, "learning_rate": 1.823519371906469e-05, "loss": 0.5742, "num_tokens": 300603745.0, "step": 3189 }, { "epoch": 0.5444615122034477, "grad_norm": 0.5137485556208886, "learning_rate": 1.822836661546339e-05, "loss": 0.5754, "num_tokens": 300681396.0, "step": 3190 }, { "epoch": 0.5446321897934802, "grad_norm": 0.5139848126854727, "learning_rate": 1.8221539511862093e-05, "loss": 0.6888, "num_tokens": 300786118.0, "step": 3191 }, { "epoch": 0.5448028673835126, "grad_norm": 0.5341011746051796, "learning_rate": 1.8214712408260797e-05, "loss": 0.5602, "num_tokens": 300852332.0, "step": 3192 }, { "epoch": 0.544973544973545, "grad_norm": 0.47136690118520136, "learning_rate": 1.82078853046595e-05, "loss": 0.6087, "num_tokens": 300953361.0, "step": 3193 }, { "epoch": 0.5451442225635774, "grad_norm": 0.4920469822534496, "learning_rate": 1.82010582010582e-05, "loss": 0.5494, "num_tokens": 301039297.0, "step": 3194 }, { "epoch": 0.5453149001536098, "grad_norm": 0.4045765986627514, "learning_rate": 1.8194231097456905e-05, "loss": 0.6028, "num_tokens": 301189305.0, "step": 3195 }, { "epoch": 0.5454855777436423, "grad_norm": 0.5290667768829725, "learning_rate": 1.818740399385561e-05, "loss": 0.653, "num_tokens": 301271414.0, "step": 3196 }, { "epoch": 0.5456562553336747, "grad_norm": 0.5606600705473238, "learning_rate": 1.8180576890254312e-05, "loss": 0.5793, "num_tokens": 301342761.0, "step": 3197 }, { "epoch": 0.5458269329237071, "grad_norm": 0.47757107445298597, "learning_rate": 1.8173749786653016e-05, "loss": 0.5759, "num_tokens": 301426913.0, "step": 3198 }, { "epoch": 0.5459976105137395, "grad_norm": 0.4916283032487723, "learning_rate": 1.8166922683051716e-05, "loss": 0.523, "num_tokens": 301502425.0, "step": 3199 }, { "epoch": 0.5461682881037719, "grad_norm": 0.4974874523625466, "learning_rate": 1.816009557945042e-05, "loss": 0.539, "num_tokens": 301580795.0, "step": 3200 }, { "epoch": 0.5463389656938044, "grad_norm": 0.5167836872333724, "learning_rate": 1.815326847584912e-05, "loss": 0.656, "num_tokens": 301668144.0, "step": 3201 }, { "epoch": 0.5465096432838368, "grad_norm": 0.4421607605024438, "learning_rate": 1.8146441372247824e-05, "loss": 0.5614, "num_tokens": 301781480.0, "step": 3202 }, { "epoch": 0.5466803208738693, "grad_norm": 0.4327382609998891, "learning_rate": 1.8139614268646528e-05, "loss": 0.5393, "num_tokens": 301885740.0, "step": 3203 }, { "epoch": 0.5468509984639017, "grad_norm": 0.5242168256508906, "learning_rate": 1.8132787165045232e-05, "loss": 0.6308, "num_tokens": 301977618.0, "step": 3204 }, { "epoch": 0.5470216760539341, "grad_norm": 0.5027019470236086, "learning_rate": 1.8125960061443936e-05, "loss": 0.5576, "num_tokens": 302066331.0, "step": 3205 }, { "epoch": 0.5471923536439666, "grad_norm": 0.4574739279951613, "learning_rate": 1.811913295784264e-05, "loss": 0.6986, "num_tokens": 302193526.0, "step": 3206 }, { "epoch": 0.547363031233999, "grad_norm": 0.49300264302155467, "learning_rate": 1.811230585424134e-05, "loss": 0.6016, "num_tokens": 302279945.0, "step": 3207 }, { "epoch": 0.5475337088240314, "grad_norm": 0.510589083852796, "learning_rate": 1.8105478750640043e-05, "loss": 0.6277, "num_tokens": 302369656.0, "step": 3208 }, { "epoch": 0.5477043864140638, "grad_norm": 0.5298805507825468, "learning_rate": 1.8098651647038744e-05, "loss": 0.7264, "num_tokens": 302458956.0, "step": 3209 }, { "epoch": 0.5478750640040962, "grad_norm": 0.5200215035228213, "learning_rate": 1.8091824543437448e-05, "loss": 0.6363, "num_tokens": 302542248.0, "step": 3210 }, { "epoch": 0.5480457415941287, "grad_norm": 0.5062231492177758, "learning_rate": 1.808499743983615e-05, "loss": 0.6594, "num_tokens": 302639754.0, "step": 3211 }, { "epoch": 0.5482164191841611, "grad_norm": 0.5107437967606423, "learning_rate": 1.8078170336234855e-05, "loss": 0.6301, "num_tokens": 302729142.0, "step": 3212 }, { "epoch": 0.5483870967741935, "grad_norm": 0.516081881588452, "learning_rate": 1.8071343232633555e-05, "loss": 0.6305, "num_tokens": 302814021.0, "step": 3213 }, { "epoch": 0.5485577743642259, "grad_norm": 0.5234151056720422, "learning_rate": 1.806451612903226e-05, "loss": 0.5049, "num_tokens": 302881807.0, "step": 3214 }, { "epoch": 0.5487284519542585, "grad_norm": 0.4823381538407324, "learning_rate": 1.8057689025430963e-05, "loss": 0.6385, "num_tokens": 302989371.0, "step": 3215 }, { "epoch": 0.5488991295442909, "grad_norm": 0.5150285631001777, "learning_rate": 1.8050861921829667e-05, "loss": 0.5675, "num_tokens": 303069599.0, "step": 3216 }, { "epoch": 0.5490698071343233, "grad_norm": 0.4723402181102485, "learning_rate": 1.8044034818228367e-05, "loss": 0.6057, "num_tokens": 303174219.0, "step": 3217 }, { "epoch": 0.5492404847243557, "grad_norm": 0.49337664265067743, "learning_rate": 1.803720771462707e-05, "loss": 0.5265, "num_tokens": 303262956.0, "step": 3218 }, { "epoch": 0.5494111623143881, "grad_norm": 0.5077334143231674, "learning_rate": 1.803038061102577e-05, "loss": 0.5436, "num_tokens": 303348342.0, "step": 3219 }, { "epoch": 0.5495818399044206, "grad_norm": 0.47025477353862094, "learning_rate": 1.8023553507424475e-05, "loss": 0.6163, "num_tokens": 303450794.0, "step": 3220 }, { "epoch": 0.549752517494453, "grad_norm": 0.4520548381352575, "learning_rate": 1.801672640382318e-05, "loss": 0.5988, "num_tokens": 303561550.0, "step": 3221 }, { "epoch": 0.5499231950844854, "grad_norm": 0.4772411906887436, "learning_rate": 1.8009899300221883e-05, "loss": 0.5595, "num_tokens": 303655516.0, "step": 3222 }, { "epoch": 0.5500938726745178, "grad_norm": 0.6399986101806774, "learning_rate": 1.8003072196620586e-05, "loss": 0.7234, "num_tokens": 303749288.0, "step": 3223 }, { "epoch": 0.5502645502645502, "grad_norm": 0.5830972218278365, "learning_rate": 1.799624509301929e-05, "loss": 0.6602, "num_tokens": 303826917.0, "step": 3224 }, { "epoch": 0.5504352278545827, "grad_norm": 0.4513218194573533, "learning_rate": 1.798941798941799e-05, "loss": 0.5475, "num_tokens": 303927059.0, "step": 3225 }, { "epoch": 0.5506059054446151, "grad_norm": 0.5090470490543042, "learning_rate": 1.7982590885816694e-05, "loss": 0.6211, "num_tokens": 304017605.0, "step": 3226 }, { "epoch": 0.5507765830346476, "grad_norm": 0.4817139696161923, "learning_rate": 1.7975763782215398e-05, "loss": 0.568, "num_tokens": 304111586.0, "step": 3227 }, { "epoch": 0.55094726062468, "grad_norm": 0.4876906531398363, "learning_rate": 1.79689366786141e-05, "loss": 0.5659, "num_tokens": 304202554.0, "step": 3228 }, { "epoch": 0.5511179382147124, "grad_norm": 0.5135803400123429, "learning_rate": 1.7962109575012802e-05, "loss": 0.5686, "num_tokens": 304286081.0, "step": 3229 }, { "epoch": 0.5512886158047449, "grad_norm": 0.5542935167033908, "learning_rate": 1.7955282471411506e-05, "loss": 0.6218, "num_tokens": 304393147.0, "step": 3230 }, { "epoch": 0.5514592933947773, "grad_norm": 0.4631522362795691, "learning_rate": 1.7948455367810206e-05, "loss": 0.5925, "num_tokens": 304488182.0, "step": 3231 }, { "epoch": 0.5516299709848097, "grad_norm": 0.46177547248283696, "learning_rate": 1.794162826420891e-05, "loss": 0.5887, "num_tokens": 304595680.0, "step": 3232 }, { "epoch": 0.5518006485748421, "grad_norm": 0.4249764851239371, "learning_rate": 1.7934801160607614e-05, "loss": 0.5523, "num_tokens": 304711338.0, "step": 3233 }, { "epoch": 0.5519713261648745, "grad_norm": 0.472141374427442, "learning_rate": 1.7927974057006318e-05, "loss": 0.567, "num_tokens": 304802397.0, "step": 3234 }, { "epoch": 0.552142003754907, "grad_norm": 0.499008726962044, "learning_rate": 1.792114695340502e-05, "loss": 0.5972, "num_tokens": 304896005.0, "step": 3235 }, { "epoch": 0.5523126813449394, "grad_norm": 0.4891375649919364, "learning_rate": 1.791431984980372e-05, "loss": 0.6879, "num_tokens": 305022644.0, "step": 3236 }, { "epoch": 0.5524833589349718, "grad_norm": 0.47382732594438787, "learning_rate": 1.7907492746202425e-05, "loss": 0.5186, "num_tokens": 305104199.0, "step": 3237 }, { "epoch": 0.5526540365250042, "grad_norm": 0.5170436842163904, "learning_rate": 1.7900665642601126e-05, "loss": 0.6089, "num_tokens": 305189019.0, "step": 3238 }, { "epoch": 0.5528247141150366, "grad_norm": 0.5197673760716421, "learning_rate": 1.789383853899983e-05, "loss": 0.5526, "num_tokens": 305263058.0, "step": 3239 }, { "epoch": 0.5529953917050692, "grad_norm": 0.545814739763571, "learning_rate": 1.7887011435398533e-05, "loss": 0.5579, "num_tokens": 305355625.0, "step": 3240 }, { "epoch": 0.5531660692951016, "grad_norm": 0.46661647062513395, "learning_rate": 1.7880184331797237e-05, "loss": 0.607, "num_tokens": 305465993.0, "step": 3241 }, { "epoch": 0.553336746885134, "grad_norm": 0.4664173992416769, "learning_rate": 1.787335722819594e-05, "loss": 0.5303, "num_tokens": 305560054.0, "step": 3242 }, { "epoch": 0.5535074244751664, "grad_norm": 0.568720812769375, "learning_rate": 1.7866530124594645e-05, "loss": 0.517, "num_tokens": 305626284.0, "step": 3243 }, { "epoch": 0.5536781020651989, "grad_norm": 0.43603321235188447, "learning_rate": 1.7859703020993345e-05, "loss": 0.5519, "num_tokens": 305742173.0, "step": 3244 }, { "epoch": 0.5538487796552313, "grad_norm": 0.46996134541818224, "learning_rate": 1.785287591739205e-05, "loss": 0.6059, "num_tokens": 305840660.0, "step": 3245 }, { "epoch": 0.5540194572452637, "grad_norm": 0.4531314697794946, "learning_rate": 1.784604881379075e-05, "loss": 0.6001, "num_tokens": 305945919.0, "step": 3246 }, { "epoch": 0.5541901348352961, "grad_norm": 0.45883437444837166, "learning_rate": 1.7839221710189453e-05, "loss": 0.518, "num_tokens": 306037434.0, "step": 3247 }, { "epoch": 0.5543608124253285, "grad_norm": 0.5067524085668664, "learning_rate": 1.7832394606588157e-05, "loss": 0.5823, "num_tokens": 306123727.0, "step": 3248 }, { "epoch": 0.554531490015361, "grad_norm": 0.49109835670158475, "learning_rate": 1.782556750298686e-05, "loss": 0.6271, "num_tokens": 306221052.0, "step": 3249 }, { "epoch": 0.5547021676053934, "grad_norm": 0.5248816548787603, "learning_rate": 1.781874039938556e-05, "loss": 0.5471, "num_tokens": 306290238.0, "step": 3250 }, { "epoch": 0.5548728451954258, "grad_norm": 0.5389995989238137, "learning_rate": 1.7811913295784265e-05, "loss": 0.599, "num_tokens": 306365015.0, "step": 3251 }, { "epoch": 0.5550435227854583, "grad_norm": 0.45246749456858393, "learning_rate": 1.780508619218297e-05, "loss": 0.5739, "num_tokens": 306474320.0, "step": 3252 }, { "epoch": 0.5552142003754907, "grad_norm": 0.47046632397711896, "learning_rate": 1.7798259088581672e-05, "loss": 0.5607, "num_tokens": 306574641.0, "step": 3253 }, { "epoch": 0.5553848779655232, "grad_norm": 0.5526187193711913, "learning_rate": 1.7791431984980372e-05, "loss": 0.6452, "num_tokens": 306662624.0, "step": 3254 }, { "epoch": 0.5555555555555556, "grad_norm": 0.5282506170555303, "learning_rate": 1.7784604881379076e-05, "loss": 0.6892, "num_tokens": 306764459.0, "step": 3255 }, { "epoch": 0.555726233145588, "grad_norm": 0.42626734291698226, "learning_rate": 1.7777777777777777e-05, "loss": 0.6354, "num_tokens": 306898202.0, "step": 3256 }, { "epoch": 0.5558969107356204, "grad_norm": 0.45583998515904645, "learning_rate": 1.777095067417648e-05, "loss": 0.5292, "num_tokens": 306989372.0, "step": 3257 }, { "epoch": 0.5560675883256528, "grad_norm": 0.4729546708431539, "learning_rate": 1.7764123570575184e-05, "loss": 0.5586, "num_tokens": 307079368.0, "step": 3258 }, { "epoch": 0.5562382659156853, "grad_norm": 0.5412158141009671, "learning_rate": 1.7757296466973888e-05, "loss": 0.6058, "num_tokens": 307154916.0, "step": 3259 }, { "epoch": 0.5564089435057177, "grad_norm": 0.4985072417762757, "learning_rate": 1.775046936337259e-05, "loss": 0.5186, "num_tokens": 307229811.0, "step": 3260 }, { "epoch": 0.5565796210957501, "grad_norm": 0.49914609763029083, "learning_rate": 1.7743642259771295e-05, "loss": 0.5331, "num_tokens": 307311504.0, "step": 3261 }, { "epoch": 0.5567502986857825, "grad_norm": 0.45308885795214476, "learning_rate": 1.7736815156169996e-05, "loss": 0.5457, "num_tokens": 307406460.0, "step": 3262 }, { "epoch": 0.5569209762758149, "grad_norm": 0.42820504559889633, "learning_rate": 1.77299880525687e-05, "loss": 0.6331, "num_tokens": 307542428.0, "step": 3263 }, { "epoch": 0.5570916538658475, "grad_norm": 0.5014936856172574, "learning_rate": 1.7723160948967403e-05, "loss": 0.5959, "num_tokens": 307634502.0, "step": 3264 }, { "epoch": 0.5572623314558799, "grad_norm": 0.5181959545458348, "learning_rate": 1.7716333845366104e-05, "loss": 0.6621, "num_tokens": 307729705.0, "step": 3265 }, { "epoch": 0.5574330090459123, "grad_norm": 0.5127166513836698, "learning_rate": 1.7709506741764807e-05, "loss": 0.554, "num_tokens": 307804167.0, "step": 3266 }, { "epoch": 0.5576036866359447, "grad_norm": 0.509682024435058, "learning_rate": 1.770267963816351e-05, "loss": 0.6554, "num_tokens": 307895708.0, "step": 3267 }, { "epoch": 0.5577743642259771, "grad_norm": 0.6067119577624526, "learning_rate": 1.7695852534562215e-05, "loss": 0.6178, "num_tokens": 307960130.0, "step": 3268 }, { "epoch": 0.5579450418160096, "grad_norm": 0.5456001007185598, "learning_rate": 1.7689025430960915e-05, "loss": 0.5635, "num_tokens": 308027455.0, "step": 3269 }, { "epoch": 0.558115719406042, "grad_norm": 0.577521520516446, "learning_rate": 1.768219832735962e-05, "loss": 0.6347, "num_tokens": 308112416.0, "step": 3270 }, { "epoch": 0.5582863969960744, "grad_norm": 0.4788793616345669, "learning_rate": 1.7675371223758323e-05, "loss": 0.599, "num_tokens": 308209810.0, "step": 3271 }, { "epoch": 0.5584570745861068, "grad_norm": 0.5111544818376718, "learning_rate": 1.7668544120157027e-05, "loss": 0.5075, "num_tokens": 308288632.0, "step": 3272 }, { "epoch": 0.5586277521761392, "grad_norm": 0.48367623993346553, "learning_rate": 1.7661717016555727e-05, "loss": 0.5581, "num_tokens": 308372956.0, "step": 3273 }, { "epoch": 0.5587984297661717, "grad_norm": 0.47066521941545914, "learning_rate": 1.765488991295443e-05, "loss": 0.5028, "num_tokens": 308453721.0, "step": 3274 }, { "epoch": 0.5589691073562041, "grad_norm": 0.5346035154466612, "learning_rate": 1.764806280935313e-05, "loss": 0.6597, "num_tokens": 308541153.0, "step": 3275 }, { "epoch": 0.5591397849462365, "grad_norm": 0.44466765941216807, "learning_rate": 1.7641235705751835e-05, "loss": 0.6089, "num_tokens": 308667945.0, "step": 3276 }, { "epoch": 0.559310462536269, "grad_norm": 0.48343368120798147, "learning_rate": 1.763440860215054e-05, "loss": 0.5874, "num_tokens": 308763287.0, "step": 3277 }, { "epoch": 0.5594811401263015, "grad_norm": 0.5252910576541392, "learning_rate": 1.7627581498549242e-05, "loss": 0.6137, "num_tokens": 308847429.0, "step": 3278 }, { "epoch": 0.5596518177163339, "grad_norm": 0.48154936182873703, "learning_rate": 1.7620754394947946e-05, "loss": 0.6169, "num_tokens": 308958300.0, "step": 3279 }, { "epoch": 0.5598224953063663, "grad_norm": 0.4645424219602783, "learning_rate": 1.761392729134665e-05, "loss": 0.5806, "num_tokens": 309059530.0, "step": 3280 }, { "epoch": 0.5599931728963987, "grad_norm": 0.491364920286677, "learning_rate": 1.760710018774535e-05, "loss": 0.6145, "num_tokens": 309153971.0, "step": 3281 }, { "epoch": 0.5601638504864311, "grad_norm": 0.48665003552311026, "learning_rate": 1.7600273084144054e-05, "loss": 0.6084, "num_tokens": 309250903.0, "step": 3282 }, { "epoch": 0.5603345280764636, "grad_norm": 0.44883052975272497, "learning_rate": 1.7593445980542754e-05, "loss": 0.5212, "num_tokens": 309365865.0, "step": 3283 }, { "epoch": 0.560505205666496, "grad_norm": 0.4651044514767892, "learning_rate": 1.7586618876941458e-05, "loss": 0.6204, "num_tokens": 309476572.0, "step": 3284 }, { "epoch": 0.5606758832565284, "grad_norm": 0.5058720650060079, "learning_rate": 1.7579791773340162e-05, "loss": 0.6121, "num_tokens": 309559113.0, "step": 3285 }, { "epoch": 0.5608465608465608, "grad_norm": 0.4926489813992244, "learning_rate": 1.7572964669738866e-05, "loss": 0.5965, "num_tokens": 309644246.0, "step": 3286 }, { "epoch": 0.5610172384365932, "grad_norm": 0.5375769351508352, "learning_rate": 1.7566137566137566e-05, "loss": 0.6239, "num_tokens": 309725665.0, "step": 3287 }, { "epoch": 0.5611879160266257, "grad_norm": 0.45926084705654124, "learning_rate": 1.755931046253627e-05, "loss": 0.639, "num_tokens": 309846550.0, "step": 3288 }, { "epoch": 0.5613585936166582, "grad_norm": 0.5549364284843736, "learning_rate": 1.7552483358934974e-05, "loss": 0.5462, "num_tokens": 309910070.0, "step": 3289 }, { "epoch": 0.5615292712066906, "grad_norm": 0.5068280246466197, "learning_rate": 1.7545656255333677e-05, "loss": 0.6269, "num_tokens": 310002689.0, "step": 3290 }, { "epoch": 0.561699948796723, "grad_norm": 0.4409216472899892, "learning_rate": 1.7538829151732378e-05, "loss": 0.6414, "num_tokens": 310122038.0, "step": 3291 }, { "epoch": 0.5618706263867554, "grad_norm": 0.47063104687880447, "learning_rate": 1.753200204813108e-05, "loss": 0.5554, "num_tokens": 310215537.0, "step": 3292 }, { "epoch": 0.5620413039767879, "grad_norm": 0.414441529654539, "learning_rate": 1.7525174944529782e-05, "loss": 0.5173, "num_tokens": 310336156.0, "step": 3293 }, { "epoch": 0.5622119815668203, "grad_norm": 0.5081478153481397, "learning_rate": 1.7518347840928486e-05, "loss": 0.5493, "num_tokens": 310421019.0, "step": 3294 }, { "epoch": 0.5623826591568527, "grad_norm": 0.5151175075417055, "learning_rate": 1.751152073732719e-05, "loss": 0.5262, "num_tokens": 310496166.0, "step": 3295 }, { "epoch": 0.5625533367468851, "grad_norm": 0.5036068405546553, "learning_rate": 1.7504693633725893e-05, "loss": 0.618, "num_tokens": 310598057.0, "step": 3296 }, { "epoch": 0.5627240143369175, "grad_norm": 0.49653064779619194, "learning_rate": 1.7497866530124597e-05, "loss": 0.6215, "num_tokens": 310697502.0, "step": 3297 }, { "epoch": 0.56289469192695, "grad_norm": 0.4721057118463774, "learning_rate": 1.74910394265233e-05, "loss": 0.5991, "num_tokens": 310803866.0, "step": 3298 }, { "epoch": 0.5630653695169824, "grad_norm": 0.48490020173843534, "learning_rate": 1.7484212322922005e-05, "loss": 0.5705, "num_tokens": 310891989.0, "step": 3299 }, { "epoch": 0.5632360471070148, "grad_norm": 0.45234984273568635, "learning_rate": 1.7477385219320705e-05, "loss": 0.5557, "num_tokens": 310996686.0, "step": 3300 }, { "epoch": 0.5634067246970472, "grad_norm": 0.47512806095817806, "learning_rate": 1.747055811571941e-05, "loss": 0.7044, "num_tokens": 311120945.0, "step": 3301 }, { "epoch": 0.5635774022870798, "grad_norm": 0.5461026318725828, "learning_rate": 1.746373101211811e-05, "loss": 0.5392, "num_tokens": 311187782.0, "step": 3302 }, { "epoch": 0.5637480798771122, "grad_norm": 0.47704580210513703, "learning_rate": 1.7456903908516813e-05, "loss": 0.6069, "num_tokens": 311277892.0, "step": 3303 }, { "epoch": 0.5639187574671446, "grad_norm": 0.5048100465013765, "learning_rate": 1.7450076804915517e-05, "loss": 0.6023, "num_tokens": 311361219.0, "step": 3304 }, { "epoch": 0.564089435057177, "grad_norm": 0.5189239340111449, "learning_rate": 1.744324970131422e-05, "loss": 0.57, "num_tokens": 311437595.0, "step": 3305 }, { "epoch": 0.5642601126472094, "grad_norm": 0.4769703009731969, "learning_rate": 1.743642259771292e-05, "loss": 0.6548, "num_tokens": 311561149.0, "step": 3306 }, { "epoch": 0.5644307902372419, "grad_norm": 0.45474484344930943, "learning_rate": 1.7429595494111624e-05, "loss": 0.5948, "num_tokens": 311676778.0, "step": 3307 }, { "epoch": 0.5646014678272743, "grad_norm": 0.5000657917008792, "learning_rate": 1.7422768390510328e-05, "loss": 0.6407, "num_tokens": 311770304.0, "step": 3308 }, { "epoch": 0.5647721454173067, "grad_norm": 0.4600489107831235, "learning_rate": 1.7415941286909032e-05, "loss": 0.6723, "num_tokens": 311889592.0, "step": 3309 }, { "epoch": 0.5649428230073391, "grad_norm": 0.46654437464728876, "learning_rate": 1.7409114183307732e-05, "loss": 0.493, "num_tokens": 311976365.0, "step": 3310 }, { "epoch": 0.5651135005973715, "grad_norm": 0.5246576992050234, "learning_rate": 1.7402287079706436e-05, "loss": 0.595, "num_tokens": 312062174.0, "step": 3311 }, { "epoch": 0.565284178187404, "grad_norm": 0.5128550382420654, "learning_rate": 1.7395459976105136e-05, "loss": 0.5754, "num_tokens": 312155023.0, "step": 3312 }, { "epoch": 0.5654548557774364, "grad_norm": 0.48222214995864376, "learning_rate": 1.738863287250384e-05, "loss": 0.6161, "num_tokens": 312252598.0, "step": 3313 }, { "epoch": 0.5656255333674689, "grad_norm": 0.5103327983383292, "learning_rate": 1.7381805768902544e-05, "loss": 0.5536, "num_tokens": 312335423.0, "step": 3314 }, { "epoch": 0.5657962109575013, "grad_norm": 0.5437608050596344, "learning_rate": 1.7374978665301248e-05, "loss": 0.5488, "num_tokens": 312403802.0, "step": 3315 }, { "epoch": 0.5659668885475337, "grad_norm": 0.533864717457484, "learning_rate": 1.736815156169995e-05, "loss": 0.585, "num_tokens": 312478822.0, "step": 3316 }, { "epoch": 0.5661375661375662, "grad_norm": 0.44791649322539234, "learning_rate": 1.7361324458098655e-05, "loss": 0.5513, "num_tokens": 312575105.0, "step": 3317 }, { "epoch": 0.5663082437275986, "grad_norm": 0.5246317889557266, "learning_rate": 1.7354497354497356e-05, "loss": 0.6685, "num_tokens": 312661242.0, "step": 3318 }, { "epoch": 0.566478921317631, "grad_norm": 0.5177839841028439, "learning_rate": 1.734767025089606e-05, "loss": 0.5531, "num_tokens": 312741927.0, "step": 3319 }, { "epoch": 0.5666495989076634, "grad_norm": 0.5585216646486987, "learning_rate": 1.734084314729476e-05, "loss": 0.5543, "num_tokens": 312806053.0, "step": 3320 }, { "epoch": 0.5668202764976958, "grad_norm": 0.47028421604634163, "learning_rate": 1.7334016043693464e-05, "loss": 0.731, "num_tokens": 312942352.0, "step": 3321 }, { "epoch": 0.5669909540877283, "grad_norm": 0.6252852715412945, "learning_rate": 1.7327188940092167e-05, "loss": 0.6486, "num_tokens": 313021684.0, "step": 3322 }, { "epoch": 0.5671616316777607, "grad_norm": 0.4923556243855555, "learning_rate": 1.732036183649087e-05, "loss": 0.6038, "num_tokens": 313116241.0, "step": 3323 }, { "epoch": 0.5673323092677931, "grad_norm": 0.5399428176979846, "learning_rate": 1.731353473288957e-05, "loss": 0.671, "num_tokens": 313196528.0, "step": 3324 }, { "epoch": 0.5675029868578255, "grad_norm": 0.524253850592435, "learning_rate": 1.7306707629288275e-05, "loss": 0.5374, "num_tokens": 313268453.0, "step": 3325 }, { "epoch": 0.567673664447858, "grad_norm": 0.4353046469033207, "learning_rate": 1.729988052568698e-05, "loss": 0.5132, "num_tokens": 313375265.0, "step": 3326 }, { "epoch": 0.5678443420378905, "grad_norm": 0.4754664112088372, "learning_rate": 1.7293053422085683e-05, "loss": 0.5675, "num_tokens": 313476516.0, "step": 3327 }, { "epoch": 0.5680150196279229, "grad_norm": 0.47551302888807684, "learning_rate": 1.7286226318484383e-05, "loss": 0.5539, "num_tokens": 313571078.0, "step": 3328 }, { "epoch": 0.5681856972179553, "grad_norm": 0.4730134134112909, "learning_rate": 1.7279399214883087e-05, "loss": 0.5652, "num_tokens": 313660954.0, "step": 3329 }, { "epoch": 0.5683563748079877, "grad_norm": 0.42879111081414684, "learning_rate": 1.727257211128179e-05, "loss": 0.6032, "num_tokens": 313788191.0, "step": 3330 }, { "epoch": 0.5685270523980201, "grad_norm": 0.43649882470101564, "learning_rate": 1.726574500768049e-05, "loss": 0.5289, "num_tokens": 313892184.0, "step": 3331 }, { "epoch": 0.5686977299880526, "grad_norm": 0.4490969604872597, "learning_rate": 1.7258917904079195e-05, "loss": 0.5928, "num_tokens": 314015912.0, "step": 3332 }, { "epoch": 0.568868407578085, "grad_norm": 0.5081398929910466, "learning_rate": 1.72520908004779e-05, "loss": 0.5968, "num_tokens": 314104624.0, "step": 3333 }, { "epoch": 0.5690390851681174, "grad_norm": 0.5471754397727308, "learning_rate": 1.7245263696876602e-05, "loss": 0.6159, "num_tokens": 314179968.0, "step": 3334 }, { "epoch": 0.5692097627581498, "grad_norm": 0.4362566038774037, "learning_rate": 1.7238436593275306e-05, "loss": 0.5948, "num_tokens": 314307149.0, "step": 3335 }, { "epoch": 0.5693804403481822, "grad_norm": 0.509347342332704, "learning_rate": 1.723160948967401e-05, "loss": 0.6716, "num_tokens": 314410779.0, "step": 3336 }, { "epoch": 0.5695511179382147, "grad_norm": 0.4535973605151748, "learning_rate": 1.722478238607271e-05, "loss": 0.5139, "num_tokens": 314511886.0, "step": 3337 }, { "epoch": 0.5697217955282471, "grad_norm": 0.5260413596276682, "learning_rate": 1.7217955282471414e-05, "loss": 0.5629, "num_tokens": 314595008.0, "step": 3338 }, { "epoch": 0.5698924731182796, "grad_norm": 0.47918130690110916, "learning_rate": 1.7211128178870114e-05, "loss": 0.5383, "num_tokens": 314678706.0, "step": 3339 }, { "epoch": 0.570063150708312, "grad_norm": 0.5640412365333978, "learning_rate": 1.7204301075268818e-05, "loss": 0.5428, "num_tokens": 314764439.0, "step": 3340 }, { "epoch": 0.5702338282983445, "grad_norm": 0.5529933749582262, "learning_rate": 1.7197473971667522e-05, "loss": 0.6412, "num_tokens": 314840691.0, "step": 3341 }, { "epoch": 0.5704045058883769, "grad_norm": 0.5186054895910153, "learning_rate": 1.7190646868066226e-05, "loss": 0.5008, "num_tokens": 314905972.0, "step": 3342 }, { "epoch": 0.5705751834784093, "grad_norm": 0.5069785134096528, "learning_rate": 1.7183819764464926e-05, "loss": 0.5794, "num_tokens": 314986142.0, "step": 3343 }, { "epoch": 0.5707458610684417, "grad_norm": 0.51221383199397, "learning_rate": 1.717699266086363e-05, "loss": 0.5767, "num_tokens": 315063894.0, "step": 3344 }, { "epoch": 0.5709165386584741, "grad_norm": 0.4888638220785249, "learning_rate": 1.7170165557262334e-05, "loss": 0.6811, "num_tokens": 315170636.0, "step": 3345 }, { "epoch": 0.5710872162485066, "grad_norm": 0.4584002387486498, "learning_rate": 1.7163338453661037e-05, "loss": 0.6003, "num_tokens": 315293863.0, "step": 3346 }, { "epoch": 0.571257893838539, "grad_norm": 0.4642472111313573, "learning_rate": 1.7156511350059738e-05, "loss": 0.5873, "num_tokens": 315389645.0, "step": 3347 }, { "epoch": 0.5714285714285714, "grad_norm": 0.42611479044066325, "learning_rate": 1.714968424645844e-05, "loss": 0.5549, "num_tokens": 315505170.0, "step": 3348 }, { "epoch": 0.5715992490186038, "grad_norm": 0.48042256383374965, "learning_rate": 1.7142857142857142e-05, "loss": 0.5723, "num_tokens": 315596433.0, "step": 3349 }, { "epoch": 0.5717699266086362, "grad_norm": 0.4705121959148618, "learning_rate": 1.7136030039255846e-05, "loss": 0.6016, "num_tokens": 315699274.0, "step": 3350 }, { "epoch": 0.5719406041986688, "grad_norm": 0.5088060286836834, "learning_rate": 1.712920293565455e-05, "loss": 0.6943, "num_tokens": 315801714.0, "step": 3351 }, { "epoch": 0.5721112817887012, "grad_norm": 0.5230649800713418, "learning_rate": 1.7122375832053253e-05, "loss": 0.676, "num_tokens": 315889626.0, "step": 3352 }, { "epoch": 0.5722819593787336, "grad_norm": 0.4635005858964634, "learning_rate": 1.7115548728451957e-05, "loss": 0.561, "num_tokens": 315985559.0, "step": 3353 }, { "epoch": 0.572452636968766, "grad_norm": 0.4841387927554741, "learning_rate": 1.710872162485066e-05, "loss": 0.5897, "num_tokens": 316081438.0, "step": 3354 }, { "epoch": 0.5726233145587984, "grad_norm": 0.6165731313023756, "learning_rate": 1.710189452124936e-05, "loss": 0.5351, "num_tokens": 316149352.0, "step": 3355 }, { "epoch": 0.5727939921488309, "grad_norm": 0.4431700143413621, "learning_rate": 1.7095067417648065e-05, "loss": 0.617, "num_tokens": 316277260.0, "step": 3356 }, { "epoch": 0.5729646697388633, "grad_norm": 0.4712993326962704, "learning_rate": 1.7088240314046765e-05, "loss": 0.5621, "num_tokens": 316374626.0, "step": 3357 }, { "epoch": 0.5731353473288957, "grad_norm": 0.4739631949659726, "learning_rate": 1.708141321044547e-05, "loss": 0.5851, "num_tokens": 316474566.0, "step": 3358 }, { "epoch": 0.5733060249189281, "grad_norm": 0.49056661930781337, "learning_rate": 1.7074586106844173e-05, "loss": 0.58, "num_tokens": 316561447.0, "step": 3359 }, { "epoch": 0.5734767025089605, "grad_norm": 0.547252617210736, "learning_rate": 1.7067759003242876e-05, "loss": 0.5989, "num_tokens": 316632345.0, "step": 3360 }, { "epoch": 0.573647380098993, "grad_norm": 0.4719195382780712, "learning_rate": 1.7060931899641577e-05, "loss": 0.5484, "num_tokens": 316729205.0, "step": 3361 }, { "epoch": 0.5738180576890254, "grad_norm": 0.5192758321686136, "learning_rate": 1.705410479604028e-05, "loss": 0.6434, "num_tokens": 316811179.0, "step": 3362 }, { "epoch": 0.5739887352790578, "grad_norm": 0.4364843825816534, "learning_rate": 1.7047277692438984e-05, "loss": 0.5282, "num_tokens": 316916459.0, "step": 3363 }, { "epoch": 0.5741594128690903, "grad_norm": 0.4781421180716582, "learning_rate": 1.7040450588837688e-05, "loss": 0.5172, "num_tokens": 317000261.0, "step": 3364 }, { "epoch": 0.5743300904591228, "grad_norm": 0.45568561940221425, "learning_rate": 1.7033623485236392e-05, "loss": 0.5677, "num_tokens": 317101738.0, "step": 3365 }, { "epoch": 0.5745007680491552, "grad_norm": 0.480393493024414, "learning_rate": 1.7026796381635092e-05, "loss": 0.5498, "num_tokens": 317194884.0, "step": 3366 }, { "epoch": 0.5746714456391876, "grad_norm": 0.5169768241879293, "learning_rate": 1.7019969278033796e-05, "loss": 0.5571, "num_tokens": 317274231.0, "step": 3367 }, { "epoch": 0.57484212322922, "grad_norm": 0.49364872326471004, "learning_rate": 1.7013142174432496e-05, "loss": 0.562, "num_tokens": 317353009.0, "step": 3368 }, { "epoch": 0.5750128008192524, "grad_norm": 0.5020528857820099, "learning_rate": 1.70063150708312e-05, "loss": 0.6303, "num_tokens": 317447358.0, "step": 3369 }, { "epoch": 0.5751834784092849, "grad_norm": 0.48347783466502814, "learning_rate": 1.6999487967229904e-05, "loss": 0.5552, "num_tokens": 317538812.0, "step": 3370 }, { "epoch": 0.5753541559993173, "grad_norm": 0.5024648160596316, "learning_rate": 1.6992660863628608e-05, "loss": 0.5793, "num_tokens": 317626500.0, "step": 3371 }, { "epoch": 0.5755248335893497, "grad_norm": 0.5362371262400161, "learning_rate": 1.698583376002731e-05, "loss": 0.6038, "num_tokens": 317706960.0, "step": 3372 }, { "epoch": 0.5756955111793821, "grad_norm": 0.43391250784740065, "learning_rate": 1.6979006656426015e-05, "loss": 0.5421, "num_tokens": 317810467.0, "step": 3373 }, { "epoch": 0.5758661887694145, "grad_norm": 0.47675995754529843, "learning_rate": 1.6972179552824716e-05, "loss": 0.5922, "num_tokens": 317907758.0, "step": 3374 }, { "epoch": 0.576036866359447, "grad_norm": 0.5022474620928103, "learning_rate": 1.696535244922342e-05, "loss": 0.6584, "num_tokens": 318004398.0, "step": 3375 }, { "epoch": 0.5762075439494795, "grad_norm": 0.4477429113790345, "learning_rate": 1.695852534562212e-05, "loss": 0.5855, "num_tokens": 318114251.0, "step": 3376 }, { "epoch": 0.5763782215395119, "grad_norm": 0.48127642391089953, "learning_rate": 1.6951698242020823e-05, "loss": 0.6467, "num_tokens": 318211029.0, "step": 3377 }, { "epoch": 0.5765488991295443, "grad_norm": 0.4821702938382031, "learning_rate": 1.6944871138419527e-05, "loss": 0.6376, "num_tokens": 318318309.0, "step": 3378 }, { "epoch": 0.5767195767195767, "grad_norm": 0.44235193879225015, "learning_rate": 1.693804403481823e-05, "loss": 0.5271, "num_tokens": 318417759.0, "step": 3379 }, { "epoch": 0.5768902543096092, "grad_norm": 0.5169100775006121, "learning_rate": 1.693121693121693e-05, "loss": 0.5878, "num_tokens": 318498972.0, "step": 3380 }, { "epoch": 0.5770609318996416, "grad_norm": 0.5174659272685124, "learning_rate": 1.6924389827615635e-05, "loss": 0.6089, "num_tokens": 318579422.0, "step": 3381 }, { "epoch": 0.577231609489674, "grad_norm": 0.5248974556096488, "learning_rate": 1.691756272401434e-05, "loss": 0.5333, "num_tokens": 318666205.0, "step": 3382 }, { "epoch": 0.5774022870797064, "grad_norm": 0.446110804800162, "learning_rate": 1.6910735620413043e-05, "loss": 0.5003, "num_tokens": 318760235.0, "step": 3383 }, { "epoch": 0.5775729646697388, "grad_norm": 0.5245651352758642, "learning_rate": 1.6903908516811743e-05, "loss": 0.6187, "num_tokens": 318842738.0, "step": 3384 }, { "epoch": 0.5777436422597713, "grad_norm": 0.4673888904571171, "learning_rate": 1.6897081413210447e-05, "loss": 0.5896, "num_tokens": 318945050.0, "step": 3385 }, { "epoch": 0.5779143198498037, "grad_norm": 0.5369846244791284, "learning_rate": 1.6890254309609147e-05, "loss": 0.661, "num_tokens": 319033597.0, "step": 3386 }, { "epoch": 0.5780849974398361, "grad_norm": 0.49766684146399287, "learning_rate": 1.688342720600785e-05, "loss": 0.6465, "num_tokens": 319129380.0, "step": 3387 }, { "epoch": 0.5782556750298686, "grad_norm": 0.4975961151790149, "learning_rate": 1.6876600102406555e-05, "loss": 0.5302, "num_tokens": 319203591.0, "step": 3388 }, { "epoch": 0.578426352619901, "grad_norm": 2.0160519694258014, "learning_rate": 1.686977299880526e-05, "loss": 0.6782, "num_tokens": 319312177.0, "step": 3389 }, { "epoch": 0.5785970302099335, "grad_norm": 0.4938330690464966, "learning_rate": 1.6862945895203962e-05, "loss": 0.6279, "num_tokens": 319401740.0, "step": 3390 }, { "epoch": 0.5787677077999659, "grad_norm": 0.5621636643947034, "learning_rate": 1.6856118791602666e-05, "loss": 0.6063, "num_tokens": 319492618.0, "step": 3391 }, { "epoch": 0.5789383853899983, "grad_norm": 0.501252848975728, "learning_rate": 1.6849291688001366e-05, "loss": 0.6815, "num_tokens": 319592787.0, "step": 3392 }, { "epoch": 0.5791090629800307, "grad_norm": 0.4714149380259448, "learning_rate": 1.684246458440007e-05, "loss": 0.5801, "num_tokens": 319682183.0, "step": 3393 }, { "epoch": 0.5792797405700632, "grad_norm": 0.48810039371887937, "learning_rate": 1.683563748079877e-05, "loss": 0.6165, "num_tokens": 319773046.0, "step": 3394 }, { "epoch": 0.5794504181600956, "grad_norm": 0.5123057491727071, "learning_rate": 1.6828810377197474e-05, "loss": 0.637, "num_tokens": 319861148.0, "step": 3395 }, { "epoch": 0.579621095750128, "grad_norm": 0.5065731024667902, "learning_rate": 1.6821983273596178e-05, "loss": 0.5179, "num_tokens": 319928854.0, "step": 3396 }, { "epoch": 0.5797917733401604, "grad_norm": 0.4784781144423495, "learning_rate": 1.6815156169994882e-05, "loss": 0.6186, "num_tokens": 320028031.0, "step": 3397 }, { "epoch": 0.5799624509301928, "grad_norm": 0.5845857359924372, "learning_rate": 1.6808329066393586e-05, "loss": 0.4784, "num_tokens": 320088039.0, "step": 3398 }, { "epoch": 0.5801331285202252, "grad_norm": 0.4613729136728334, "learning_rate": 1.6801501962792286e-05, "loss": 0.5477, "num_tokens": 320181891.0, "step": 3399 }, { "epoch": 0.5803038061102577, "grad_norm": 0.5207832946692702, "learning_rate": 1.679467485919099e-05, "loss": 0.5746, "num_tokens": 320254737.0, "step": 3400 }, { "epoch": 0.5804744837002902, "grad_norm": 0.5186217913825224, "learning_rate": 1.6787847755589693e-05, "loss": 0.4771, "num_tokens": 320327074.0, "step": 3401 }, { "epoch": 0.5806451612903226, "grad_norm": 0.4984518346444063, "learning_rate": 1.6781020651988397e-05, "loss": 0.6004, "num_tokens": 320407614.0, "step": 3402 }, { "epoch": 0.580815838880355, "grad_norm": 0.45282959761243674, "learning_rate": 1.6774193548387098e-05, "loss": 0.5178, "num_tokens": 320498586.0, "step": 3403 }, { "epoch": 0.5809865164703875, "grad_norm": 0.4514384082547995, "learning_rate": 1.67673664447858e-05, "loss": 0.5511, "num_tokens": 320608409.0, "step": 3404 }, { "epoch": 0.5811571940604199, "grad_norm": 0.5100196565298788, "learning_rate": 1.6760539341184502e-05, "loss": 0.5652, "num_tokens": 320690206.0, "step": 3405 }, { "epoch": 0.5813278716504523, "grad_norm": 0.5063598064805167, "learning_rate": 1.6753712237583206e-05, "loss": 0.5257, "num_tokens": 320770437.0, "step": 3406 }, { "epoch": 0.5814985492404847, "grad_norm": 0.5251953606112958, "learning_rate": 1.674688513398191e-05, "loss": 0.5117, "num_tokens": 320846835.0, "step": 3407 }, { "epoch": 0.5816692268305171, "grad_norm": 0.539291200314105, "learning_rate": 1.6740058030380613e-05, "loss": 0.6663, "num_tokens": 320929595.0, "step": 3408 }, { "epoch": 0.5818399044205496, "grad_norm": 0.5151923309641129, "learning_rate": 1.6733230926779317e-05, "loss": 0.5059, "num_tokens": 320998454.0, "step": 3409 }, { "epoch": 0.582010582010582, "grad_norm": 0.4926914025348591, "learning_rate": 1.672640382317802e-05, "loss": 0.6207, "num_tokens": 321085049.0, "step": 3410 }, { "epoch": 0.5821812596006144, "grad_norm": 0.44443432165146446, "learning_rate": 1.671957671957672e-05, "loss": 0.6027, "num_tokens": 321202767.0, "step": 3411 }, { "epoch": 0.5823519371906468, "grad_norm": 0.5062688247618641, "learning_rate": 1.6712749615975425e-05, "loss": 0.6641, "num_tokens": 321302548.0, "step": 3412 }, { "epoch": 0.5825226147806793, "grad_norm": 0.4563485148005084, "learning_rate": 1.6705922512374125e-05, "loss": 0.5794, "num_tokens": 321408481.0, "step": 3413 }, { "epoch": 0.5826932923707118, "grad_norm": 0.5225899332240858, "learning_rate": 1.669909540877283e-05, "loss": 0.6255, "num_tokens": 321493497.0, "step": 3414 }, { "epoch": 0.5828639699607442, "grad_norm": 0.5070986731535245, "learning_rate": 1.6692268305171533e-05, "loss": 0.5849, "num_tokens": 321583292.0, "step": 3415 }, { "epoch": 0.5830346475507766, "grad_norm": 0.5769005656588899, "learning_rate": 1.6685441201570236e-05, "loss": 0.7271, "num_tokens": 321658333.0, "step": 3416 }, { "epoch": 0.583205325140809, "grad_norm": 0.4650899537162327, "learning_rate": 1.6678614097968937e-05, "loss": 0.5953, "num_tokens": 321763643.0, "step": 3417 }, { "epoch": 0.5833760027308414, "grad_norm": 0.5003784372431751, "learning_rate": 1.667178699436764e-05, "loss": 0.6099, "num_tokens": 321856084.0, "step": 3418 }, { "epoch": 0.5835466803208739, "grad_norm": 0.6941578168595592, "learning_rate": 1.6664959890766344e-05, "loss": 0.6836, "num_tokens": 321935023.0, "step": 3419 }, { "epoch": 0.5837173579109063, "grad_norm": 0.462340659045164, "learning_rate": 1.6658132787165048e-05, "loss": 0.5839, "num_tokens": 322034661.0, "step": 3420 }, { "epoch": 0.5838880355009387, "grad_norm": 0.4770013136665141, "learning_rate": 1.665130568356375e-05, "loss": 0.5751, "num_tokens": 322140680.0, "step": 3421 }, { "epoch": 0.5840587130909711, "grad_norm": 0.45840028420734075, "learning_rate": 1.6644478579962452e-05, "loss": 0.6443, "num_tokens": 322249435.0, "step": 3422 }, { "epoch": 0.5842293906810035, "grad_norm": 0.44390763152656665, "learning_rate": 1.6637651476361153e-05, "loss": 0.5211, "num_tokens": 322355746.0, "step": 3423 }, { "epoch": 0.584400068271036, "grad_norm": 0.4783874905055336, "learning_rate": 1.6630824372759856e-05, "loss": 0.6059, "num_tokens": 322452987.0, "step": 3424 }, { "epoch": 0.5845707458610685, "grad_norm": 0.7490223457084462, "learning_rate": 1.662399726915856e-05, "loss": 0.5941, "num_tokens": 322552204.0, "step": 3425 }, { "epoch": 0.5847414234511009, "grad_norm": 0.44919708444501066, "learning_rate": 1.6617170165557264e-05, "loss": 0.7078, "num_tokens": 322690801.0, "step": 3426 }, { "epoch": 0.5849121010411333, "grad_norm": 0.470743677896631, "learning_rate": 1.6610343061955968e-05, "loss": 0.6059, "num_tokens": 322791294.0, "step": 3427 }, { "epoch": 0.5850827786311658, "grad_norm": 0.5172693049275585, "learning_rate": 1.660351595835467e-05, "loss": 0.6096, "num_tokens": 322871595.0, "step": 3428 }, { "epoch": 0.5852534562211982, "grad_norm": 0.46113037665088324, "learning_rate": 1.6596688854753372e-05, "loss": 0.5223, "num_tokens": 322960633.0, "step": 3429 }, { "epoch": 0.5854241338112306, "grad_norm": 0.588500438022913, "learning_rate": 1.6589861751152075e-05, "loss": 0.6459, "num_tokens": 323045837.0, "step": 3430 }, { "epoch": 0.585594811401263, "grad_norm": 0.4769898101821736, "learning_rate": 1.6583034647550776e-05, "loss": 0.5695, "num_tokens": 323140385.0, "step": 3431 }, { "epoch": 0.5857654889912954, "grad_norm": 0.42093291299997915, "learning_rate": 1.657620754394948e-05, "loss": 0.5549, "num_tokens": 323248644.0, "step": 3432 }, { "epoch": 0.5859361665813279, "grad_norm": 0.5123637575469956, "learning_rate": 1.6569380440348183e-05, "loss": 0.5827, "num_tokens": 323328879.0, "step": 3433 }, { "epoch": 0.5861068441713603, "grad_norm": 0.47854014614612067, "learning_rate": 1.6562553336746887e-05, "loss": 0.6199, "num_tokens": 323432197.0, "step": 3434 }, { "epoch": 0.5862775217613927, "grad_norm": 0.515272611651013, "learning_rate": 1.655572623314559e-05, "loss": 0.5453, "num_tokens": 323514814.0, "step": 3435 }, { "epoch": 0.5864481993514251, "grad_norm": 0.49263240467192193, "learning_rate": 1.654889912954429e-05, "loss": 0.5177, "num_tokens": 323593158.0, "step": 3436 }, { "epoch": 0.5866188769414575, "grad_norm": 0.5181548944993186, "learning_rate": 1.6542072025942995e-05, "loss": 0.5929, "num_tokens": 323665981.0, "step": 3437 }, { "epoch": 0.5867895545314901, "grad_norm": 0.5107243707506156, "learning_rate": 1.65352449223417e-05, "loss": 0.4775, "num_tokens": 323739396.0, "step": 3438 }, { "epoch": 0.5869602321215225, "grad_norm": 0.5215371731113385, "learning_rate": 1.6528417818740403e-05, "loss": 0.6111, "num_tokens": 323825209.0, "step": 3439 }, { "epoch": 0.5871309097115549, "grad_norm": 0.4875260370869305, "learning_rate": 1.6521590715139103e-05, "loss": 0.6784, "num_tokens": 323930517.0, "step": 3440 }, { "epoch": 0.5873015873015873, "grad_norm": 0.46072543772918223, "learning_rate": 1.6514763611537807e-05, "loss": 0.6258, "num_tokens": 324047082.0, "step": 3441 }, { "epoch": 0.5874722648916197, "grad_norm": 0.47503874849886124, "learning_rate": 1.6507936507936507e-05, "loss": 0.5885, "num_tokens": 324143441.0, "step": 3442 }, { "epoch": 0.5876429424816522, "grad_norm": 0.4946061522468672, "learning_rate": 1.650110940433521e-05, "loss": 0.6077, "num_tokens": 324235127.0, "step": 3443 }, { "epoch": 0.5878136200716846, "grad_norm": 0.5204089628531647, "learning_rate": 1.6494282300733915e-05, "loss": 0.5703, "num_tokens": 324317138.0, "step": 3444 }, { "epoch": 0.587984297661717, "grad_norm": 0.4749032719934435, "learning_rate": 1.648745519713262e-05, "loss": 0.6395, "num_tokens": 324420156.0, "step": 3445 }, { "epoch": 0.5881549752517494, "grad_norm": 0.4627421487943803, "learning_rate": 1.6480628093531322e-05, "loss": 0.6156, "num_tokens": 324541241.0, "step": 3446 }, { "epoch": 0.5883256528417818, "grad_norm": 0.5009049822801436, "learning_rate": 1.6473800989930026e-05, "loss": 0.5754, "num_tokens": 324640168.0, "step": 3447 }, { "epoch": 0.5884963304318143, "grad_norm": 0.478099548768233, "learning_rate": 1.6466973886328726e-05, "loss": 0.6544, "num_tokens": 324752657.0, "step": 3448 }, { "epoch": 0.5886670080218467, "grad_norm": 0.4634614994278779, "learning_rate": 1.646014678272743e-05, "loss": 0.5427, "num_tokens": 324847347.0, "step": 3449 }, { "epoch": 0.5888376856118792, "grad_norm": 0.46070321839494754, "learning_rate": 1.645331967912613e-05, "loss": 0.57, "num_tokens": 324946610.0, "step": 3450 }, { "epoch": 0.5890083632019116, "grad_norm": 0.5231379243175022, "learning_rate": 1.6446492575524834e-05, "loss": 0.5411, "num_tokens": 325023241.0, "step": 3451 }, { "epoch": 0.589179040791944, "grad_norm": 0.43480062830402616, "learning_rate": 1.6439665471923538e-05, "loss": 0.61, "num_tokens": 325138078.0, "step": 3452 }, { "epoch": 0.5893497183819765, "grad_norm": 0.49422081951620705, "learning_rate": 1.6432838368322242e-05, "loss": 0.5664, "num_tokens": 325220044.0, "step": 3453 }, { "epoch": 0.5895203959720089, "grad_norm": 0.48258124512385187, "learning_rate": 1.6426011264720942e-05, "loss": 0.5962, "num_tokens": 325314906.0, "step": 3454 }, { "epoch": 0.5896910735620413, "grad_norm": 0.5145229911048186, "learning_rate": 1.6419184161119646e-05, "loss": 0.53, "num_tokens": 325386693.0, "step": 3455 }, { "epoch": 0.5898617511520737, "grad_norm": 0.48729252116828337, "learning_rate": 1.641235705751835e-05, "loss": 0.5791, "num_tokens": 325484313.0, "step": 3456 }, { "epoch": 0.5900324287421062, "grad_norm": 0.5037730755123196, "learning_rate": 1.6405529953917053e-05, "loss": 0.5767, "num_tokens": 325558561.0, "step": 3457 }, { "epoch": 0.5902031063321386, "grad_norm": 0.422431551370684, "learning_rate": 1.6398702850315754e-05, "loss": 0.6485, "num_tokens": 325699214.0, "step": 3458 }, { "epoch": 0.590373783922171, "grad_norm": 0.4679279275414914, "learning_rate": 1.6391875746714457e-05, "loss": 0.5909, "num_tokens": 325795069.0, "step": 3459 }, { "epoch": 0.5905444615122034, "grad_norm": 0.4691872704594372, "learning_rate": 1.638504864311316e-05, "loss": 0.5186, "num_tokens": 325886181.0, "step": 3460 }, { "epoch": 0.5907151391022358, "grad_norm": 0.46263030185825793, "learning_rate": 1.637822153951186e-05, "loss": 0.5903, "num_tokens": 325988852.0, "step": 3461 }, { "epoch": 0.5908858166922683, "grad_norm": 0.4161144559742367, "learning_rate": 1.6371394435910565e-05, "loss": 0.5229, "num_tokens": 326097376.0, "step": 3462 }, { "epoch": 0.5910564942823008, "grad_norm": 0.4440427479851766, "learning_rate": 1.636456733230927e-05, "loss": 0.5386, "num_tokens": 326199750.0, "step": 3463 }, { "epoch": 0.5912271718723332, "grad_norm": 0.48290536824263913, "learning_rate": 1.6357740228707973e-05, "loss": 0.5759, "num_tokens": 326305771.0, "step": 3464 }, { "epoch": 0.5913978494623656, "grad_norm": 0.4596765439069304, "learning_rate": 1.6350913125106677e-05, "loss": 0.4881, "num_tokens": 326393093.0, "step": 3465 }, { "epoch": 0.591568527052398, "grad_norm": 0.5348192412486799, "learning_rate": 1.6344086021505377e-05, "loss": 0.508, "num_tokens": 326453418.0, "step": 3466 }, { "epoch": 0.5917392046424305, "grad_norm": 0.4517653221110361, "learning_rate": 1.633725891790408e-05, "loss": 0.5361, "num_tokens": 326548824.0, "step": 3467 }, { "epoch": 0.5919098822324629, "grad_norm": 0.5209496808804358, "learning_rate": 1.6330431814302785e-05, "loss": 0.5144, "num_tokens": 326625250.0, "step": 3468 }, { "epoch": 0.5920805598224953, "grad_norm": 0.47025465322751436, "learning_rate": 1.6323604710701485e-05, "loss": 0.5512, "num_tokens": 326726340.0, "step": 3469 }, { "epoch": 0.5922512374125277, "grad_norm": 0.47061330754100467, "learning_rate": 1.631677760710019e-05, "loss": 0.6403, "num_tokens": 326849618.0, "step": 3470 }, { "epoch": 0.5924219150025601, "grad_norm": 0.4511645431403223, "learning_rate": 1.6309950503498892e-05, "loss": 0.521, "num_tokens": 326951923.0, "step": 3471 }, { "epoch": 0.5925925925925926, "grad_norm": 0.44792677288294896, "learning_rate": 1.6303123399897596e-05, "loss": 0.5256, "num_tokens": 327053352.0, "step": 3472 }, { "epoch": 0.592763270182625, "grad_norm": 0.4481250766516246, "learning_rate": 1.6296296296296297e-05, "loss": 0.5542, "num_tokens": 327159320.0, "step": 3473 }, { "epoch": 0.5929339477726574, "grad_norm": 0.4982954391133741, "learning_rate": 1.6289469192695e-05, "loss": 0.5682, "num_tokens": 327242255.0, "step": 3474 }, { "epoch": 0.5931046253626899, "grad_norm": 0.5619331447282091, "learning_rate": 1.6282642089093704e-05, "loss": 0.5164, "num_tokens": 327300763.0, "step": 3475 }, { "epoch": 0.5932753029527223, "grad_norm": 0.5505898344465563, "learning_rate": 1.6275814985492408e-05, "loss": 0.622, "num_tokens": 327374608.0, "step": 3476 }, { "epoch": 0.5934459805427548, "grad_norm": 0.4465007355509528, "learning_rate": 1.6268987881891108e-05, "loss": 0.5923, "num_tokens": 327500652.0, "step": 3477 }, { "epoch": 0.5936166581327872, "grad_norm": 0.4483297398820762, "learning_rate": 1.6262160778289812e-05, "loss": 0.5692, "num_tokens": 327603369.0, "step": 3478 }, { "epoch": 0.5937873357228196, "grad_norm": 0.5097390202052126, "learning_rate": 1.6255333674688512e-05, "loss": 0.6141, "num_tokens": 327709011.0, "step": 3479 }, { "epoch": 0.593958013312852, "grad_norm": 0.5230288550716223, "learning_rate": 1.6248506571087216e-05, "loss": 0.725, "num_tokens": 327811581.0, "step": 3480 }, { "epoch": 0.5941286909028844, "grad_norm": 0.4398686395277153, "learning_rate": 1.624167946748592e-05, "loss": 0.5388, "num_tokens": 327917696.0, "step": 3481 }, { "epoch": 0.5942993684929169, "grad_norm": 0.50371967442367, "learning_rate": 1.6234852363884624e-05, "loss": 0.5583, "num_tokens": 327998586.0, "step": 3482 }, { "epoch": 0.5944700460829493, "grad_norm": 0.5321201117379377, "learning_rate": 1.6228025260283327e-05, "loss": 0.6396, "num_tokens": 328082008.0, "step": 3483 }, { "epoch": 0.5946407236729817, "grad_norm": 0.4498250081348172, "learning_rate": 1.622119815668203e-05, "loss": 0.582, "num_tokens": 328193830.0, "step": 3484 }, { "epoch": 0.5948114012630141, "grad_norm": 0.513290958466778, "learning_rate": 1.621437105308073e-05, "loss": 0.5557, "num_tokens": 328272092.0, "step": 3485 }, { "epoch": 0.5949820788530465, "grad_norm": 0.5433823877590496, "learning_rate": 1.6207543949479435e-05, "loss": 0.6033, "num_tokens": 328344585.0, "step": 3486 }, { "epoch": 0.5951527564430791, "grad_norm": 0.4818543042340627, "learning_rate": 1.6200716845878136e-05, "loss": 0.6621, "num_tokens": 328450016.0, "step": 3487 }, { "epoch": 0.5953234340331115, "grad_norm": 0.41308126649874743, "learning_rate": 1.619388974227684e-05, "loss": 0.6051, "num_tokens": 328592559.0, "step": 3488 }, { "epoch": 0.5954941116231439, "grad_norm": 0.5005934198017281, "learning_rate": 1.6187062638675543e-05, "loss": 0.5956, "num_tokens": 328673783.0, "step": 3489 }, { "epoch": 0.5956647892131763, "grad_norm": 0.5248849979332141, "learning_rate": 1.6180235535074247e-05, "loss": 0.6591, "num_tokens": 328768225.0, "step": 3490 }, { "epoch": 0.5958354668032088, "grad_norm": 0.45621046431560197, "learning_rate": 1.617340843147295e-05, "loss": 0.5551, "num_tokens": 328873183.0, "step": 3491 }, { "epoch": 0.5960061443932412, "grad_norm": 0.45422241597130736, "learning_rate": 1.616658132787165e-05, "loss": 0.4965, "num_tokens": 328966564.0, "step": 3492 }, { "epoch": 0.5961768219832736, "grad_norm": 0.5068246496678622, "learning_rate": 1.6159754224270355e-05, "loss": 0.5908, "num_tokens": 329046390.0, "step": 3493 }, { "epoch": 0.596347499573306, "grad_norm": 0.43414529565329013, "learning_rate": 1.615292712066906e-05, "loss": 0.5891, "num_tokens": 329160218.0, "step": 3494 }, { "epoch": 0.5965181771633384, "grad_norm": 0.5405466094334678, "learning_rate": 1.614610001706776e-05, "loss": 0.5697, "num_tokens": 329244435.0, "step": 3495 }, { "epoch": 0.5966888547533709, "grad_norm": 0.4282377330947544, "learning_rate": 1.6139272913466463e-05, "loss": 0.5577, "num_tokens": 329358832.0, "step": 3496 }, { "epoch": 0.5968595323434033, "grad_norm": 0.45888735950087634, "learning_rate": 1.6132445809865167e-05, "loss": 0.6194, "num_tokens": 329481881.0, "step": 3497 }, { "epoch": 0.5970302099334357, "grad_norm": 0.5424363732866481, "learning_rate": 1.6125618706263867e-05, "loss": 0.6625, "num_tokens": 329565152.0, "step": 3498 }, { "epoch": 0.5972008875234681, "grad_norm": 0.4370678219583965, "learning_rate": 1.611879160266257e-05, "loss": 0.5212, "num_tokens": 329663206.0, "step": 3499 }, { "epoch": 0.5973715651135006, "grad_norm": 0.5050054667231645, "learning_rate": 1.6111964499061275e-05, "loss": 0.6248, "num_tokens": 329762091.0, "step": 3500 }, { "epoch": 0.5975422427035331, "grad_norm": 0.5165606190454154, "learning_rate": 1.6105137395459978e-05, "loss": 0.5777, "num_tokens": 329841719.0, "step": 3501 }, { "epoch": 0.5977129202935655, "grad_norm": 0.4917888329517648, "learning_rate": 1.6098310291858682e-05, "loss": 0.5343, "num_tokens": 329922561.0, "step": 3502 }, { "epoch": 0.5978835978835979, "grad_norm": 0.5153612126517976, "learning_rate": 1.6091483188257382e-05, "loss": 0.5774, "num_tokens": 330005363.0, "step": 3503 }, { "epoch": 0.5980542754736303, "grad_norm": 0.5060206634497117, "learning_rate": 1.6084656084656086e-05, "loss": 0.5555, "num_tokens": 330091185.0, "step": 3504 }, { "epoch": 0.5982249530636627, "grad_norm": 0.47266395070301465, "learning_rate": 1.607782898105479e-05, "loss": 0.6076, "num_tokens": 330197046.0, "step": 3505 }, { "epoch": 0.5983956306536952, "grad_norm": 0.514218987118427, "learning_rate": 1.607100187745349e-05, "loss": 0.613, "num_tokens": 330283439.0, "step": 3506 }, { "epoch": 0.5985663082437276, "grad_norm": 0.4776126173692995, "learning_rate": 1.6064174773852194e-05, "loss": 0.5759, "num_tokens": 330379182.0, "step": 3507 }, { "epoch": 0.59873698583376, "grad_norm": 0.5092469133654185, "learning_rate": 1.6057347670250898e-05, "loss": 0.5583, "num_tokens": 330452996.0, "step": 3508 }, { "epoch": 0.5989076634237924, "grad_norm": 0.501264829006857, "learning_rate": 1.60505205666496e-05, "loss": 0.5288, "num_tokens": 330538109.0, "step": 3509 }, { "epoch": 0.5990783410138248, "grad_norm": 0.4663941297479572, "learning_rate": 1.6043693463048302e-05, "loss": 0.5535, "num_tokens": 330635178.0, "step": 3510 }, { "epoch": 0.5992490186038573, "grad_norm": 0.4596591299349046, "learning_rate": 1.6036866359447006e-05, "loss": 0.5657, "num_tokens": 330738394.0, "step": 3511 }, { "epoch": 0.5994196961938898, "grad_norm": 0.41470433142727503, "learning_rate": 1.603003925584571e-05, "loss": 0.546, "num_tokens": 330861565.0, "step": 3512 }, { "epoch": 0.5995903737839222, "grad_norm": 0.47116253014389337, "learning_rate": 1.6023212152244413e-05, "loss": 0.5132, "num_tokens": 330945830.0, "step": 3513 }, { "epoch": 0.5997610513739546, "grad_norm": 0.48306542865657215, "learning_rate": 1.6016385048643114e-05, "loss": 0.5767, "num_tokens": 331039640.0, "step": 3514 }, { "epoch": 0.599931728963987, "grad_norm": 0.4832665039034823, "learning_rate": 1.6009557945041817e-05, "loss": 0.5714, "num_tokens": 331133593.0, "step": 3515 }, { "epoch": 0.6001024065540195, "grad_norm": 0.43546121304830837, "learning_rate": 1.6002730841440518e-05, "loss": 0.5046, "num_tokens": 331241107.0, "step": 3516 }, { "epoch": 0.6002730841440519, "grad_norm": 0.508546166804123, "learning_rate": 1.599590373783922e-05, "loss": 0.5704, "num_tokens": 331315245.0, "step": 3517 }, { "epoch": 0.6004437617340843, "grad_norm": 0.48173405077260556, "learning_rate": 1.5989076634237925e-05, "loss": 0.5209, "num_tokens": 331395653.0, "step": 3518 }, { "epoch": 0.6006144393241167, "grad_norm": 0.44117044114018444, "learning_rate": 1.598224953063663e-05, "loss": 0.5706, "num_tokens": 331502992.0, "step": 3519 }, { "epoch": 0.6007851169141492, "grad_norm": 0.48419777722006313, "learning_rate": 1.5975422427035333e-05, "loss": 0.5231, "num_tokens": 331583352.0, "step": 3520 }, { "epoch": 0.6009557945041816, "grad_norm": 0.4319149087876771, "learning_rate": 1.5968595323434037e-05, "loss": 0.4707, "num_tokens": 331681125.0, "step": 3521 }, { "epoch": 0.601126472094214, "grad_norm": 0.5336806178316883, "learning_rate": 1.5961768219832737e-05, "loss": 0.683, "num_tokens": 331766761.0, "step": 3522 }, { "epoch": 0.6012971496842464, "grad_norm": 0.50913828563718, "learning_rate": 1.595494111623144e-05, "loss": 0.4925, "num_tokens": 331832986.0, "step": 3523 }, { "epoch": 0.6014678272742788, "grad_norm": 0.4999149141786681, "learning_rate": 1.594811401263014e-05, "loss": 0.6036, "num_tokens": 331920176.0, "step": 3524 }, { "epoch": 0.6016385048643114, "grad_norm": 0.45643424507846037, "learning_rate": 1.5941286909028845e-05, "loss": 0.5373, "num_tokens": 332011380.0, "step": 3525 }, { "epoch": 0.6018091824543438, "grad_norm": 0.4895805807897716, "learning_rate": 1.593445980542755e-05, "loss": 0.6227, "num_tokens": 332101430.0, "step": 3526 }, { "epoch": 0.6019798600443762, "grad_norm": 0.5248339298189859, "learning_rate": 1.5927632701826252e-05, "loss": 0.4964, "num_tokens": 332169401.0, "step": 3527 }, { "epoch": 0.6021505376344086, "grad_norm": 0.45333932853880793, "learning_rate": 1.5920805598224956e-05, "loss": 0.5414, "num_tokens": 332263475.0, "step": 3528 }, { "epoch": 0.602321215224441, "grad_norm": 0.5081325201182729, "learning_rate": 1.5913978494623657e-05, "loss": 0.558, "num_tokens": 332335930.0, "step": 3529 }, { "epoch": 0.6024918928144735, "grad_norm": 0.4673834374942599, "learning_rate": 1.590715139102236e-05, "loss": 0.5624, "num_tokens": 332442448.0, "step": 3530 }, { "epoch": 0.6026625704045059, "grad_norm": 0.4553827742008108, "learning_rate": 1.5900324287421064e-05, "loss": 0.5981, "num_tokens": 332547154.0, "step": 3531 }, { "epoch": 0.6028332479945383, "grad_norm": 0.4721699582839534, "learning_rate": 1.5893497183819764e-05, "loss": 0.5869, "num_tokens": 332644612.0, "step": 3532 }, { "epoch": 0.6030039255845707, "grad_norm": 0.5061967467746666, "learning_rate": 1.5886670080218468e-05, "loss": 0.6863, "num_tokens": 332749829.0, "step": 3533 }, { "epoch": 0.6031746031746031, "grad_norm": 0.47637961348332303, "learning_rate": 1.5879842976617172e-05, "loss": 0.5655, "num_tokens": 332835923.0, "step": 3534 }, { "epoch": 0.6033452807646356, "grad_norm": 0.5154119209436892, "learning_rate": 1.5873015873015872e-05, "loss": 0.6265, "num_tokens": 332923170.0, "step": 3535 }, { "epoch": 0.603515958354668, "grad_norm": 0.5081112935766111, "learning_rate": 1.5866188769414576e-05, "loss": 0.6191, "num_tokens": 333022093.0, "step": 3536 }, { "epoch": 0.6036866359447005, "grad_norm": 0.5748614251507108, "learning_rate": 1.585936166581328e-05, "loss": 0.6006, "num_tokens": 333081119.0, "step": 3537 }, { "epoch": 0.6038573135347329, "grad_norm": 0.49975371316108547, "learning_rate": 1.5852534562211984e-05, "loss": 0.5518, "num_tokens": 333185782.0, "step": 3538 }, { "epoch": 0.6040279911247654, "grad_norm": 0.5011537556388476, "learning_rate": 1.5845707458610687e-05, "loss": 0.5993, "num_tokens": 333267473.0, "step": 3539 }, { "epoch": 0.6041986687147978, "grad_norm": 0.5065451376089161, "learning_rate": 1.583888035500939e-05, "loss": 0.6006, "num_tokens": 333351484.0, "step": 3540 }, { "epoch": 0.6043693463048302, "grad_norm": 0.42931378241159973, "learning_rate": 1.583205325140809e-05, "loss": 0.578, "num_tokens": 333464833.0, "step": 3541 }, { "epoch": 0.6045400238948626, "grad_norm": 0.5265061258277138, "learning_rate": 1.5825226147806795e-05, "loss": 0.6603, "num_tokens": 333559982.0, "step": 3542 }, { "epoch": 0.604710701484895, "grad_norm": 0.4666433059466304, "learning_rate": 1.5818399044205496e-05, "loss": 0.6654, "num_tokens": 333678733.0, "step": 3543 }, { "epoch": 0.6048813790749274, "grad_norm": 0.46007571990397467, "learning_rate": 1.58115719406042e-05, "loss": 0.6184, "num_tokens": 333786644.0, "step": 3544 }, { "epoch": 0.6050520566649599, "grad_norm": 0.4836505037708075, "learning_rate": 1.5804744837002903e-05, "loss": 0.5411, "num_tokens": 333864465.0, "step": 3545 }, { "epoch": 0.6052227342549923, "grad_norm": 0.44643590287133966, "learning_rate": 1.5797917733401607e-05, "loss": 0.6334, "num_tokens": 333985826.0, "step": 3546 }, { "epoch": 0.6053934118450247, "grad_norm": 0.4563441077611402, "learning_rate": 1.5791090629800307e-05, "loss": 0.5683, "num_tokens": 334082731.0, "step": 3547 }, { "epoch": 0.6055640894350571, "grad_norm": 0.43824989500312383, "learning_rate": 1.578426352619901e-05, "loss": 0.509, "num_tokens": 334183951.0, "step": 3548 }, { "epoch": 0.6057347670250897, "grad_norm": 0.4809467067314409, "learning_rate": 1.5777436422597715e-05, "loss": 0.6405, "num_tokens": 334279305.0, "step": 3549 }, { "epoch": 0.6059054446151221, "grad_norm": 0.40416997720200354, "learning_rate": 1.577060931899642e-05, "loss": 0.5865, "num_tokens": 334412376.0, "step": 3550 }, { "epoch": 0.6060761222051545, "grad_norm": 0.5443869338741327, "learning_rate": 1.576378221539512e-05, "loss": 0.5922, "num_tokens": 334481050.0, "step": 3551 }, { "epoch": 0.6062467997951869, "grad_norm": 0.4958878213686479, "learning_rate": 1.5756955111793823e-05, "loss": 0.5748, "num_tokens": 334565789.0, "step": 3552 }, { "epoch": 0.6064174773852193, "grad_norm": 0.5333063975251238, "learning_rate": 1.5750128008192523e-05, "loss": 0.5862, "num_tokens": 334641650.0, "step": 3553 }, { "epoch": 0.6065881549752518, "grad_norm": 0.5328468918164831, "learning_rate": 1.5743300904591227e-05, "loss": 0.4957, "num_tokens": 334708991.0, "step": 3554 }, { "epoch": 0.6067588325652842, "grad_norm": 0.5315588286706288, "learning_rate": 1.573647380098993e-05, "loss": 0.5913, "num_tokens": 334775757.0, "step": 3555 }, { "epoch": 0.6069295101553166, "grad_norm": 0.4502736466342284, "learning_rate": 1.5729646697388634e-05, "loss": 0.613, "num_tokens": 334893450.0, "step": 3556 }, { "epoch": 0.607100187745349, "grad_norm": 0.4847829872648731, "learning_rate": 1.5722819593787338e-05, "loss": 0.6052, "num_tokens": 334999495.0, "step": 3557 }, { "epoch": 0.6072708653353814, "grad_norm": 0.47564502300934663, "learning_rate": 1.5715992490186042e-05, "loss": 0.5599, "num_tokens": 335097311.0, "step": 3558 }, { "epoch": 0.6074415429254139, "grad_norm": 0.48197325739122276, "learning_rate": 1.5709165386584742e-05, "loss": 0.5643, "num_tokens": 335192835.0, "step": 3559 }, { "epoch": 0.6076122205154463, "grad_norm": 0.4732091802320942, "learning_rate": 1.5702338282983446e-05, "loss": 0.6446, "num_tokens": 335302038.0, "step": 3560 }, { "epoch": 0.6077828981054787, "grad_norm": 0.478078090808869, "learning_rate": 1.5695511179382146e-05, "loss": 0.6546, "num_tokens": 335415942.0, "step": 3561 }, { "epoch": 0.6079535756955112, "grad_norm": 0.510997394901381, "learning_rate": 1.568868407578085e-05, "loss": 0.5994, "num_tokens": 335529698.0, "step": 3562 }, { "epoch": 0.6081242532855436, "grad_norm": 0.4677029569142117, "learning_rate": 1.5681856972179554e-05, "loss": 0.5525, "num_tokens": 335627305.0, "step": 3563 }, { "epoch": 0.6082949308755761, "grad_norm": 0.6072226921708521, "learning_rate": 1.5675029868578258e-05, "loss": 0.6404, "num_tokens": 335687412.0, "step": 3564 }, { "epoch": 0.6084656084656085, "grad_norm": 0.5280063348059985, "learning_rate": 1.566820276497696e-05, "loss": 0.6567, "num_tokens": 335776330.0, "step": 3565 }, { "epoch": 0.6086362860556409, "grad_norm": 0.5638495669546878, "learning_rate": 1.5661375661375662e-05, "loss": 0.532, "num_tokens": 335834418.0, "step": 3566 }, { "epoch": 0.6088069636456733, "grad_norm": 0.494067904647009, "learning_rate": 1.5654548557774366e-05, "loss": 0.6539, "num_tokens": 335952102.0, "step": 3567 }, { "epoch": 0.6089776412357057, "grad_norm": 0.5039296280612839, "learning_rate": 1.564772145417307e-05, "loss": 0.6218, "num_tokens": 336042594.0, "step": 3568 }, { "epoch": 0.6091483188257382, "grad_norm": 0.45085058129902134, "learning_rate": 1.564089435057177e-05, "loss": 0.6364, "num_tokens": 336153601.0, "step": 3569 }, { "epoch": 0.6093189964157706, "grad_norm": 0.5652739850821955, "learning_rate": 1.5634067246970474e-05, "loss": 0.6205, "num_tokens": 336218455.0, "step": 3570 }, { "epoch": 0.609489674005803, "grad_norm": 0.4998241610889815, "learning_rate": 1.5627240143369177e-05, "loss": 0.6067, "num_tokens": 336303128.0, "step": 3571 }, { "epoch": 0.6096603515958354, "grad_norm": 0.5042686542986183, "learning_rate": 1.5620413039767878e-05, "loss": 0.5986, "num_tokens": 336381366.0, "step": 3572 }, { "epoch": 0.6098310291858678, "grad_norm": 0.5123094512281448, "learning_rate": 1.561358593616658e-05, "loss": 0.56, "num_tokens": 336469528.0, "step": 3573 }, { "epoch": 0.6100017067759004, "grad_norm": 0.44067480916934315, "learning_rate": 1.5606758832565285e-05, "loss": 0.5235, "num_tokens": 336573003.0, "step": 3574 }, { "epoch": 0.6101723843659328, "grad_norm": 0.4963773347075571, "learning_rate": 1.559993172896399e-05, "loss": 0.686, "num_tokens": 336683559.0, "step": 3575 }, { "epoch": 0.6103430619559652, "grad_norm": 0.5238960286407632, "learning_rate": 1.5593104625362693e-05, "loss": 0.5669, "num_tokens": 336763733.0, "step": 3576 }, { "epoch": 0.6105137395459976, "grad_norm": 0.46028308280279734, "learning_rate": 1.5586277521761396e-05, "loss": 0.5363, "num_tokens": 336859202.0, "step": 3577 }, { "epoch": 0.61068441713603, "grad_norm": 0.5610403841862394, "learning_rate": 1.5579450418160097e-05, "loss": 0.5747, "num_tokens": 336920085.0, "step": 3578 }, { "epoch": 0.6108550947260625, "grad_norm": 0.5263054310428272, "learning_rate": 1.55726233145588e-05, "loss": 0.556, "num_tokens": 336997800.0, "step": 3579 }, { "epoch": 0.6110257723160949, "grad_norm": 0.4721633104674769, "learning_rate": 1.55657962109575e-05, "loss": 0.5685, "num_tokens": 337096537.0, "step": 3580 }, { "epoch": 0.6111964499061273, "grad_norm": 0.49339995410004833, "learning_rate": 1.5558969107356205e-05, "loss": 0.5393, "num_tokens": 337181166.0, "step": 3581 }, { "epoch": 0.6113671274961597, "grad_norm": 0.4569740964315961, "learning_rate": 1.555214200375491e-05, "loss": 0.5712, "num_tokens": 337282377.0, "step": 3582 }, { "epoch": 0.6115378050861922, "grad_norm": 0.5057941783869859, "learning_rate": 1.5545314900153612e-05, "loss": 0.6078, "num_tokens": 337367178.0, "step": 3583 }, { "epoch": 0.6117084826762246, "grad_norm": 0.4529236860369162, "learning_rate": 1.5538487796552313e-05, "loss": 0.587, "num_tokens": 337474853.0, "step": 3584 }, { "epoch": 0.611879160266257, "grad_norm": 0.4743824641380396, "learning_rate": 1.5531660692951016e-05, "loss": 0.5184, "num_tokens": 337563244.0, "step": 3585 }, { "epoch": 0.6120498378562894, "grad_norm": 0.49151963027893303, "learning_rate": 1.552483358934972e-05, "loss": 0.5794, "num_tokens": 337646559.0, "step": 3586 }, { "epoch": 0.612220515446322, "grad_norm": 0.4517849612027927, "learning_rate": 1.5518006485748424e-05, "loss": 0.5162, "num_tokens": 337759048.0, "step": 3587 }, { "epoch": 0.6123911930363544, "grad_norm": 0.5222405296769319, "learning_rate": 1.5511179382147124e-05, "loss": 0.642, "num_tokens": 337853256.0, "step": 3588 }, { "epoch": 0.6125618706263868, "grad_norm": 0.4171613405524083, "learning_rate": 1.5504352278545828e-05, "loss": 0.5469, "num_tokens": 337979580.0, "step": 3589 }, { "epoch": 0.6127325482164192, "grad_norm": 0.46140265164928784, "learning_rate": 1.5497525174944532e-05, "loss": 0.6138, "num_tokens": 338078482.0, "step": 3590 }, { "epoch": 0.6129032258064516, "grad_norm": 0.4487878058578686, "learning_rate": 1.5490698071343232e-05, "loss": 0.555, "num_tokens": 338178684.0, "step": 3591 }, { "epoch": 0.613073903396484, "grad_norm": 0.4549476267048519, "learning_rate": 1.5483870967741936e-05, "loss": 0.5798, "num_tokens": 338276604.0, "step": 3592 }, { "epoch": 0.6132445809865165, "grad_norm": 0.4722610788044621, "learning_rate": 1.547704386414064e-05, "loss": 0.5803, "num_tokens": 338377702.0, "step": 3593 }, { "epoch": 0.6134152585765489, "grad_norm": 0.44884286717765065, "learning_rate": 1.5470216760539344e-05, "loss": 0.6061, "num_tokens": 338494643.0, "step": 3594 }, { "epoch": 0.6135859361665813, "grad_norm": 0.4608032421507256, "learning_rate": 1.5463389656938047e-05, "loss": 0.581, "num_tokens": 338591452.0, "step": 3595 }, { "epoch": 0.6137566137566137, "grad_norm": 0.4742398469308564, "learning_rate": 1.5456562553336748e-05, "loss": 0.5448, "num_tokens": 338680769.0, "step": 3596 }, { "epoch": 0.6139272913466461, "grad_norm": 0.4542343168789292, "learning_rate": 1.544973544973545e-05, "loss": 0.5889, "num_tokens": 338783746.0, "step": 3597 }, { "epoch": 0.6140979689366786, "grad_norm": 0.6392609180940966, "learning_rate": 1.5442908346134152e-05, "loss": 0.6332, "num_tokens": 338894749.0, "step": 3598 }, { "epoch": 0.6142686465267111, "grad_norm": 0.4979693800373054, "learning_rate": 1.5436081242532856e-05, "loss": 0.6316, "num_tokens": 338990261.0, "step": 3599 }, { "epoch": 0.6144393241167435, "grad_norm": 0.4466541577265138, "learning_rate": 1.542925413893156e-05, "loss": 0.6486, "num_tokens": 339113866.0, "step": 3600 }, { "epoch": 0.6146100017067759, "grad_norm": 0.45129280163638863, "learning_rate": 1.5422427035330263e-05, "loss": 0.5356, "num_tokens": 339210252.0, "step": 3601 }, { "epoch": 0.6147806792968084, "grad_norm": 0.47195079061526096, "learning_rate": 1.5415599931728967e-05, "loss": 0.6129, "num_tokens": 339309689.0, "step": 3602 }, { "epoch": 0.6149513568868408, "grad_norm": 0.5212364805207232, "learning_rate": 1.5408772828127667e-05, "loss": 0.5477, "num_tokens": 339378051.0, "step": 3603 }, { "epoch": 0.6151220344768732, "grad_norm": 0.5396847459763521, "learning_rate": 1.540194572452637e-05, "loss": 0.585, "num_tokens": 339451859.0, "step": 3604 }, { "epoch": 0.6152927120669056, "grad_norm": 0.4341881765379073, "learning_rate": 1.5395118620925075e-05, "loss": 0.5842, "num_tokens": 339561517.0, "step": 3605 }, { "epoch": 0.615463389656938, "grad_norm": 0.48833094372894487, "learning_rate": 1.5388291517323775e-05, "loss": 0.541, "num_tokens": 339641740.0, "step": 3606 }, { "epoch": 0.6156340672469705, "grad_norm": 0.48360585686163576, "learning_rate": 1.538146441372248e-05, "loss": 0.5787, "num_tokens": 339729608.0, "step": 3607 }, { "epoch": 0.6158047448370029, "grad_norm": 0.4518395899250133, "learning_rate": 1.5374637310121183e-05, "loss": 0.6031, "num_tokens": 339869426.0, "step": 3608 }, { "epoch": 0.6159754224270353, "grad_norm": 0.47905926666327536, "learning_rate": 1.5367810206519883e-05, "loss": 0.6564, "num_tokens": 339975709.0, "step": 3609 }, { "epoch": 0.6161461000170677, "grad_norm": 0.4096668339608625, "learning_rate": 1.5360983102918587e-05, "loss": 0.6037, "num_tokens": 340121865.0, "step": 3610 }, { "epoch": 0.6163167776071002, "grad_norm": 0.47677538889693727, "learning_rate": 1.535415599931729e-05, "loss": 0.648, "num_tokens": 340228887.0, "step": 3611 }, { "epoch": 0.6164874551971327, "grad_norm": 0.6006899294497313, "learning_rate": 1.5347328895715994e-05, "loss": 0.6627, "num_tokens": 340292466.0, "step": 3612 }, { "epoch": 0.6166581327871651, "grad_norm": 0.459436354100249, "learning_rate": 1.5340501792114698e-05, "loss": 0.6557, "num_tokens": 340397727.0, "step": 3613 }, { "epoch": 0.6168288103771975, "grad_norm": 0.4516739355932989, "learning_rate": 1.5333674688513402e-05, "loss": 0.6281, "num_tokens": 340504407.0, "step": 3614 }, { "epoch": 0.6169994879672299, "grad_norm": 0.47308332035647443, "learning_rate": 1.5326847584912102e-05, "loss": 0.553, "num_tokens": 340602015.0, "step": 3615 }, { "epoch": 0.6171701655572623, "grad_norm": 0.5103576392384896, "learning_rate": 1.5320020481310806e-05, "loss": 0.6255, "num_tokens": 340688774.0, "step": 3616 }, { "epoch": 0.6173408431472948, "grad_norm": 0.4833232896366674, "learning_rate": 1.5313193377709506e-05, "loss": 0.5409, "num_tokens": 340774762.0, "step": 3617 }, { "epoch": 0.6175115207373272, "grad_norm": 0.499264150849373, "learning_rate": 1.530636627410821e-05, "loss": 0.5944, "num_tokens": 340870740.0, "step": 3618 }, { "epoch": 0.6176821983273596, "grad_norm": 0.43180361352943164, "learning_rate": 1.5299539170506914e-05, "loss": 0.5842, "num_tokens": 340991550.0, "step": 3619 }, { "epoch": 0.617852875917392, "grad_norm": 0.5572215292936863, "learning_rate": 1.5292712066905618e-05, "loss": 0.5474, "num_tokens": 341052416.0, "step": 3620 }, { "epoch": 0.6180235535074244, "grad_norm": 0.5269219426126724, "learning_rate": 1.528588496330432e-05, "loss": 0.6321, "num_tokens": 341144747.0, "step": 3621 }, { "epoch": 0.6181942310974569, "grad_norm": 0.5033562469962622, "learning_rate": 1.5279057859703022e-05, "loss": 0.5558, "num_tokens": 341223769.0, "step": 3622 }, { "epoch": 0.6183649086874893, "grad_norm": 0.49387216400216066, "learning_rate": 1.5272230756101726e-05, "loss": 0.5865, "num_tokens": 341312144.0, "step": 3623 }, { "epoch": 0.6185355862775218, "grad_norm": 0.46471698786936644, "learning_rate": 1.526540365250043e-05, "loss": 0.6981, "num_tokens": 341427029.0, "step": 3624 }, { "epoch": 0.6187062638675542, "grad_norm": 0.45268702190855903, "learning_rate": 1.5258576548899131e-05, "loss": 0.6694, "num_tokens": 341547117.0, "step": 3625 }, { "epoch": 0.6188769414575866, "grad_norm": 0.4939130836944239, "learning_rate": 1.5251749445297835e-05, "loss": 0.5347, "num_tokens": 341623379.0, "step": 3626 }, { "epoch": 0.6190476190476191, "grad_norm": 0.4561296898615653, "learning_rate": 1.5244922341696537e-05, "loss": 0.5654, "num_tokens": 341726766.0, "step": 3627 }, { "epoch": 0.6192182966376515, "grad_norm": 0.49732555709580856, "learning_rate": 1.523809523809524e-05, "loss": 0.5812, "num_tokens": 341804954.0, "step": 3628 }, { "epoch": 0.6193889742276839, "grad_norm": 0.44490121308273284, "learning_rate": 1.5231268134493941e-05, "loss": 0.5199, "num_tokens": 341908951.0, "step": 3629 }, { "epoch": 0.6195596518177163, "grad_norm": 0.4987171664391396, "learning_rate": 1.5224441030892645e-05, "loss": 0.5479, "num_tokens": 342000341.0, "step": 3630 }, { "epoch": 0.6197303294077487, "grad_norm": 0.5496376064303287, "learning_rate": 1.5217613927291349e-05, "loss": 0.605, "num_tokens": 342085336.0, "step": 3631 }, { "epoch": 0.6199010069977812, "grad_norm": 0.4591253207101078, "learning_rate": 1.5210786823690051e-05, "loss": 0.5267, "num_tokens": 342176899.0, "step": 3632 }, { "epoch": 0.6200716845878136, "grad_norm": 0.5301172577030788, "learning_rate": 1.5203959720088755e-05, "loss": 0.7093, "num_tokens": 342264592.0, "step": 3633 }, { "epoch": 0.620242362177846, "grad_norm": 0.4866651815236698, "learning_rate": 1.5197132616487455e-05, "loss": 0.5601, "num_tokens": 342346530.0, "step": 3634 }, { "epoch": 0.6204130397678784, "grad_norm": 0.4931079672594089, "learning_rate": 1.5190305512886159e-05, "loss": 0.5935, "num_tokens": 342437306.0, "step": 3635 }, { "epoch": 0.620583717357911, "grad_norm": 0.46196211170807955, "learning_rate": 1.5183478409284863e-05, "loss": 0.595, "num_tokens": 342540328.0, "step": 3636 }, { "epoch": 0.6207543949479434, "grad_norm": 0.4772542000080506, "learning_rate": 1.5176651305683565e-05, "loss": 0.598, "num_tokens": 342634785.0, "step": 3637 }, { "epoch": 0.6209250725379758, "grad_norm": 0.4655459115376325, "learning_rate": 1.5169824202082268e-05, "loss": 0.5042, "num_tokens": 342718914.0, "step": 3638 }, { "epoch": 0.6210957501280082, "grad_norm": 0.4808679672955145, "learning_rate": 1.5162997098480972e-05, "loss": 0.5475, "num_tokens": 342806165.0, "step": 3639 }, { "epoch": 0.6212664277180406, "grad_norm": 0.48392231549808584, "learning_rate": 1.5156169994879673e-05, "loss": 0.6064, "num_tokens": 342906883.0, "step": 3640 }, { "epoch": 0.6214371053080731, "grad_norm": 0.4529870164825527, "learning_rate": 1.5149342891278376e-05, "loss": 0.672, "num_tokens": 343026004.0, "step": 3641 }, { "epoch": 0.6216077828981055, "grad_norm": 0.5280642991632045, "learning_rate": 1.5142515787677078e-05, "loss": 0.6072, "num_tokens": 343101526.0, "step": 3642 }, { "epoch": 0.6217784604881379, "grad_norm": 0.5079651106200068, "learning_rate": 1.5135688684075782e-05, "loss": 0.6263, "num_tokens": 343193465.0, "step": 3643 }, { "epoch": 0.6219491380781703, "grad_norm": 0.5036191103268727, "learning_rate": 1.5128861580474486e-05, "loss": 0.6105, "num_tokens": 343281564.0, "step": 3644 }, { "epoch": 0.6221198156682027, "grad_norm": 0.5003318758885824, "learning_rate": 1.5122034476873188e-05, "loss": 0.5803, "num_tokens": 343361820.0, "step": 3645 }, { "epoch": 0.6222904932582352, "grad_norm": 0.45579182914669025, "learning_rate": 1.511520737327189e-05, "loss": 0.6344, "num_tokens": 343486923.0, "step": 3646 }, { "epoch": 0.6224611708482676, "grad_norm": 0.5129605331748879, "learning_rate": 1.5108380269670592e-05, "loss": 0.5799, "num_tokens": 343568737.0, "step": 3647 }, { "epoch": 0.6226318484383001, "grad_norm": 0.46515014431960366, "learning_rate": 1.5101553166069296e-05, "loss": 0.601, "num_tokens": 343672498.0, "step": 3648 }, { "epoch": 0.6228025260283325, "grad_norm": 0.5467994798525068, "learning_rate": 1.5094726062468e-05, "loss": 0.6429, "num_tokens": 343744753.0, "step": 3649 }, { "epoch": 0.622973203618365, "grad_norm": 0.5322713736920139, "learning_rate": 1.5087898958866702e-05, "loss": 0.536, "num_tokens": 343812170.0, "step": 3650 }, { "epoch": 0.6231438812083974, "grad_norm": 0.518424488638914, "learning_rate": 1.5081071855265405e-05, "loss": 0.5642, "num_tokens": 343886817.0, "step": 3651 }, { "epoch": 0.6233145587984298, "grad_norm": 0.5423349107231067, "learning_rate": 1.507424475166411e-05, "loss": 0.5732, "num_tokens": 343999190.0, "step": 3652 }, { "epoch": 0.6234852363884622, "grad_norm": 0.5440556783414959, "learning_rate": 1.506741764806281e-05, "loss": 0.6365, "num_tokens": 344078927.0, "step": 3653 }, { "epoch": 0.6236559139784946, "grad_norm": 0.46203045289940403, "learning_rate": 1.5060590544461513e-05, "loss": 0.6083, "num_tokens": 344182562.0, "step": 3654 }, { "epoch": 0.623826591568527, "grad_norm": 0.46694938751930487, "learning_rate": 1.5053763440860215e-05, "loss": 0.5157, "num_tokens": 344266315.0, "step": 3655 }, { "epoch": 0.6239972691585595, "grad_norm": 0.49518303254528184, "learning_rate": 1.504693633725892e-05, "loss": 0.5158, "num_tokens": 344345526.0, "step": 3656 }, { "epoch": 0.6241679467485919, "grad_norm": 0.48470456800599965, "learning_rate": 1.5040109233657623e-05, "loss": 0.5933, "num_tokens": 344447954.0, "step": 3657 }, { "epoch": 0.6243386243386243, "grad_norm": 0.49802194703712266, "learning_rate": 1.5033282130056325e-05, "loss": 0.5587, "num_tokens": 344532540.0, "step": 3658 }, { "epoch": 0.6245093019286567, "grad_norm": 0.5275025191007251, "learning_rate": 1.5026455026455027e-05, "loss": 0.6993, "num_tokens": 344622861.0, "step": 3659 }, { "epoch": 0.6246799795186891, "grad_norm": 0.484272541688487, "learning_rate": 1.5019627922853731e-05, "loss": 0.6051, "num_tokens": 344721117.0, "step": 3660 }, { "epoch": 0.6248506571087217, "grad_norm": 0.5426041229609327, "learning_rate": 1.5012800819252433e-05, "loss": 0.5952, "num_tokens": 344792058.0, "step": 3661 }, { "epoch": 0.6250213346987541, "grad_norm": 0.5375199277688153, "learning_rate": 1.5005973715651137e-05, "loss": 0.5403, "num_tokens": 344864527.0, "step": 3662 }, { "epoch": 0.6251920122887865, "grad_norm": 0.48323595311673584, "learning_rate": 1.499914661204984e-05, "loss": 0.6009, "num_tokens": 344959125.0, "step": 3663 }, { "epoch": 0.6253626898788189, "grad_norm": 0.5188249775848618, "learning_rate": 1.4992319508448543e-05, "loss": 0.5248, "num_tokens": 345058892.0, "step": 3664 }, { "epoch": 0.6255333674688514, "grad_norm": 0.45328848902892505, "learning_rate": 1.4985492404847245e-05, "loss": 0.6351, "num_tokens": 345182736.0, "step": 3665 }, { "epoch": 0.6257040450588838, "grad_norm": 0.5154085227923573, "learning_rate": 1.4978665301245947e-05, "loss": 0.5787, "num_tokens": 345267006.0, "step": 3666 }, { "epoch": 0.6258747226489162, "grad_norm": 0.4700823181980873, "learning_rate": 1.497183819764465e-05, "loss": 0.5573, "num_tokens": 345365916.0, "step": 3667 }, { "epoch": 0.6260454002389486, "grad_norm": 0.44215985499443855, "learning_rate": 1.4965011094043354e-05, "loss": 0.6259, "num_tokens": 345492549.0, "step": 3668 }, { "epoch": 0.626216077828981, "grad_norm": 0.6734393352781106, "learning_rate": 1.4958183990442056e-05, "loss": 0.6583, "num_tokens": 345576894.0, "step": 3669 }, { "epoch": 0.6263867554190135, "grad_norm": 0.44737622704841884, "learning_rate": 1.495135688684076e-05, "loss": 0.5609, "num_tokens": 345677144.0, "step": 3670 }, { "epoch": 0.6265574330090459, "grad_norm": 0.4904059630611964, "learning_rate": 1.494452978323946e-05, "loss": 0.5663, "num_tokens": 345760087.0, "step": 3671 }, { "epoch": 0.6267281105990783, "grad_norm": 0.4846798933422681, "learning_rate": 1.4937702679638164e-05, "loss": 0.6625, "num_tokens": 345857558.0, "step": 3672 }, { "epoch": 0.6268987881891108, "grad_norm": 0.5140338397321844, "learning_rate": 1.4930875576036868e-05, "loss": 0.5745, "num_tokens": 345940449.0, "step": 3673 }, { "epoch": 0.6270694657791432, "grad_norm": 0.4579086435169565, "learning_rate": 1.492404847243557e-05, "loss": 0.5259, "num_tokens": 346034060.0, "step": 3674 }, { "epoch": 0.6272401433691757, "grad_norm": 0.46718407084506575, "learning_rate": 1.4917221368834274e-05, "loss": 0.5531, "num_tokens": 346118852.0, "step": 3675 }, { "epoch": 0.6274108209592081, "grad_norm": 0.466267934052504, "learning_rate": 1.4910394265232978e-05, "loss": 0.5438, "num_tokens": 346215185.0, "step": 3676 }, { "epoch": 0.6275814985492405, "grad_norm": 0.46413998348802865, "learning_rate": 1.4903567161631678e-05, "loss": 0.6132, "num_tokens": 346318696.0, "step": 3677 }, { "epoch": 0.6277521761392729, "grad_norm": 0.4504372498577167, "learning_rate": 1.4896740058030382e-05, "loss": 0.5718, "num_tokens": 346424870.0, "step": 3678 }, { "epoch": 0.6279228537293053, "grad_norm": 0.5087467728312661, "learning_rate": 1.4889912954429084e-05, "loss": 0.4863, "num_tokens": 346499973.0, "step": 3679 }, { "epoch": 0.6280935313193378, "grad_norm": 0.5121803175158001, "learning_rate": 1.4883085850827787e-05, "loss": 0.5054, "num_tokens": 346570215.0, "step": 3680 }, { "epoch": 0.6282642089093702, "grad_norm": 0.5058349750572209, "learning_rate": 1.4876258747226491e-05, "loss": 0.5895, "num_tokens": 346647290.0, "step": 3681 }, { "epoch": 0.6284348864994026, "grad_norm": 0.49298229833583507, "learning_rate": 1.4869431643625193e-05, "loss": 0.5666, "num_tokens": 346736277.0, "step": 3682 }, { "epoch": 0.628605564089435, "grad_norm": 0.46180474787414244, "learning_rate": 1.4862604540023897e-05, "loss": 0.6018, "num_tokens": 346846314.0, "step": 3683 }, { "epoch": 0.6287762416794674, "grad_norm": 0.4722385134379064, "learning_rate": 1.4855777436422597e-05, "loss": 0.5161, "num_tokens": 346937773.0, "step": 3684 }, { "epoch": 0.6289469192694999, "grad_norm": 0.47672708593527663, "learning_rate": 1.4848950332821301e-05, "loss": 0.5062, "num_tokens": 347024863.0, "step": 3685 }, { "epoch": 0.6291175968595324, "grad_norm": 0.45980006040918736, "learning_rate": 1.4842123229220005e-05, "loss": 0.5513, "num_tokens": 347124855.0, "step": 3686 }, { "epoch": 0.6292882744495648, "grad_norm": 0.5087906334866202, "learning_rate": 1.4835296125618707e-05, "loss": 0.5893, "num_tokens": 347210676.0, "step": 3687 }, { "epoch": 0.6294589520395972, "grad_norm": 0.4345086644347096, "learning_rate": 1.482846902201741e-05, "loss": 0.6718, "num_tokens": 347352099.0, "step": 3688 }, { "epoch": 0.6296296296296297, "grad_norm": 0.49246945368243084, "learning_rate": 1.4821641918416115e-05, "loss": 0.5786, "num_tokens": 347443628.0, "step": 3689 }, { "epoch": 0.6298003072196621, "grad_norm": 0.5326390358560981, "learning_rate": 1.4814814814814815e-05, "loss": 0.5436, "num_tokens": 347514180.0, "step": 3690 }, { "epoch": 0.6299709848096945, "grad_norm": 0.9621833306767478, "learning_rate": 1.4807987711213519e-05, "loss": 0.631, "num_tokens": 347597539.0, "step": 3691 }, { "epoch": 0.6301416623997269, "grad_norm": 0.42885374568712187, "learning_rate": 1.480116060761222e-05, "loss": 0.4931, "num_tokens": 347700173.0, "step": 3692 }, { "epoch": 0.6303123399897593, "grad_norm": 0.46785250904922543, "learning_rate": 1.4794333504010925e-05, "loss": 0.5889, "num_tokens": 347805623.0, "step": 3693 }, { "epoch": 0.6304830175797917, "grad_norm": 0.4751398418236468, "learning_rate": 1.4787506400409628e-05, "loss": 0.5489, "num_tokens": 347893698.0, "step": 3694 }, { "epoch": 0.6306536951698242, "grad_norm": 0.49032594241916083, "learning_rate": 1.4780679296808332e-05, "loss": 0.6338, "num_tokens": 347987041.0, "step": 3695 }, { "epoch": 0.6308243727598566, "grad_norm": 0.4386323849733401, "learning_rate": 1.4773852193207032e-05, "loss": 0.5962, "num_tokens": 348097986.0, "step": 3696 }, { "epoch": 0.630995050349889, "grad_norm": 0.5450382832701353, "learning_rate": 1.4767025089605736e-05, "loss": 0.633, "num_tokens": 348177045.0, "step": 3697 }, { "epoch": 0.6311657279399215, "grad_norm": 0.49356701007848697, "learning_rate": 1.4760197986004438e-05, "loss": 0.6194, "num_tokens": 348272461.0, "step": 3698 }, { "epoch": 0.631336405529954, "grad_norm": 0.426814678418761, "learning_rate": 1.4753370882403142e-05, "loss": 0.4986, "num_tokens": 348386429.0, "step": 3699 }, { "epoch": 0.6315070831199864, "grad_norm": 0.4873136220503191, "learning_rate": 1.4746543778801846e-05, "loss": 0.5275, "num_tokens": 348468522.0, "step": 3700 }, { "epoch": 0.6316777607100188, "grad_norm": 0.4958706273963355, "learning_rate": 1.4739716675200548e-05, "loss": 0.5487, "num_tokens": 348554210.0, "step": 3701 }, { "epoch": 0.6318484383000512, "grad_norm": 0.5304705339998864, "learning_rate": 1.473288957159925e-05, "loss": 0.7096, "num_tokens": 348640467.0, "step": 3702 }, { "epoch": 0.6320191158900836, "grad_norm": 0.5283853808155656, "learning_rate": 1.4726062467997952e-05, "loss": 0.6158, "num_tokens": 348734098.0, "step": 3703 }, { "epoch": 0.6321897934801161, "grad_norm": 0.473656765582445, "learning_rate": 1.4719235364396656e-05, "loss": 0.5666, "num_tokens": 348825712.0, "step": 3704 }, { "epoch": 0.6323604710701485, "grad_norm": 0.4475526464790234, "learning_rate": 1.471240826079536e-05, "loss": 0.5841, "num_tokens": 348938880.0, "step": 3705 }, { "epoch": 0.6325311486601809, "grad_norm": 0.4652775060530408, "learning_rate": 1.4705581157194062e-05, "loss": 0.512, "num_tokens": 349026217.0, "step": 3706 }, { "epoch": 0.6327018262502133, "grad_norm": 0.44225836407471136, "learning_rate": 1.4698754053592765e-05, "loss": 0.597, "num_tokens": 349137986.0, "step": 3707 }, { "epoch": 0.6328725038402457, "grad_norm": 0.5023214308626308, "learning_rate": 1.4691926949991466e-05, "loss": 0.5555, "num_tokens": 349216389.0, "step": 3708 }, { "epoch": 0.6330431814302782, "grad_norm": 0.45603836233473594, "learning_rate": 1.468509984639017e-05, "loss": 0.5528, "num_tokens": 349307720.0, "step": 3709 }, { "epoch": 0.6332138590203107, "grad_norm": 0.454297782264187, "learning_rate": 1.4678272742788873e-05, "loss": 0.5349, "num_tokens": 349402403.0, "step": 3710 }, { "epoch": 0.6333845366103431, "grad_norm": 0.45377734597370345, "learning_rate": 1.4671445639187575e-05, "loss": 0.6106, "num_tokens": 349507600.0, "step": 3711 }, { "epoch": 0.6335552142003755, "grad_norm": 0.5062774700847578, "learning_rate": 1.4664618535586279e-05, "loss": 0.5726, "num_tokens": 349582864.0, "step": 3712 }, { "epoch": 0.633725891790408, "grad_norm": 0.4583254916136759, "learning_rate": 1.4657791431984983e-05, "loss": 0.5538, "num_tokens": 349679529.0, "step": 3713 }, { "epoch": 0.6338965693804404, "grad_norm": 0.558291336086708, "learning_rate": 1.4650964328383683e-05, "loss": 0.6403, "num_tokens": 349753937.0, "step": 3714 }, { "epoch": 0.6340672469704728, "grad_norm": 0.46138867312532283, "learning_rate": 1.4644137224782387e-05, "loss": 0.5873, "num_tokens": 349859941.0, "step": 3715 }, { "epoch": 0.6342379245605052, "grad_norm": 0.5290761272650552, "learning_rate": 1.4637310121181089e-05, "loss": 0.5552, "num_tokens": 349926983.0, "step": 3716 }, { "epoch": 0.6344086021505376, "grad_norm": 0.48811342586908746, "learning_rate": 1.4630483017579793e-05, "loss": 0.6403, "num_tokens": 350042706.0, "step": 3717 }, { "epoch": 0.63457927974057, "grad_norm": 0.45835958355892853, "learning_rate": 1.4623655913978497e-05, "loss": 0.6075, "num_tokens": 350156388.0, "step": 3718 }, { "epoch": 0.6347499573306025, "grad_norm": 0.5193060728839891, "learning_rate": 1.4616828810377199e-05, "loss": 0.543, "num_tokens": 350232403.0, "step": 3719 }, { "epoch": 0.6349206349206349, "grad_norm": 0.4499430473617779, "learning_rate": 1.4610001706775902e-05, "loss": 0.5548, "num_tokens": 350333080.0, "step": 3720 }, { "epoch": 0.6350913125106673, "grad_norm": 0.44897183442136046, "learning_rate": 1.4603174603174603e-05, "loss": 0.5513, "num_tokens": 350452093.0, "step": 3721 }, { "epoch": 0.6352619901006997, "grad_norm": 0.5116532833015546, "learning_rate": 1.4596347499573307e-05, "loss": 0.6153, "num_tokens": 350531851.0, "step": 3722 }, { "epoch": 0.6354326676907323, "grad_norm": 0.4788419997066486, "learning_rate": 1.458952039597201e-05, "loss": 0.5584, "num_tokens": 350625250.0, "step": 3723 }, { "epoch": 0.6356033452807647, "grad_norm": 0.4525914919743234, "learning_rate": 1.4582693292370712e-05, "loss": 0.5445, "num_tokens": 350745541.0, "step": 3724 }, { "epoch": 0.6357740228707971, "grad_norm": 0.4601355358581514, "learning_rate": 1.4575866188769416e-05, "loss": 0.6311, "num_tokens": 350853486.0, "step": 3725 }, { "epoch": 0.6359447004608295, "grad_norm": 0.5329886492811607, "learning_rate": 1.456903908516812e-05, "loss": 0.6641, "num_tokens": 350935298.0, "step": 3726 }, { "epoch": 0.6361153780508619, "grad_norm": 0.4455291735330309, "learning_rate": 1.456221198156682e-05, "loss": 0.542, "num_tokens": 351039912.0, "step": 3727 }, { "epoch": 0.6362860556408944, "grad_norm": 0.4876350485841393, "learning_rate": 1.4555384877965524e-05, "loss": 0.5164, "num_tokens": 351117100.0, "step": 3728 }, { "epoch": 0.6364567332309268, "grad_norm": 0.49137010819118804, "learning_rate": 1.4548557774364228e-05, "loss": 0.6718, "num_tokens": 351220881.0, "step": 3729 }, { "epoch": 0.6366274108209592, "grad_norm": 0.44709323161464476, "learning_rate": 1.454173067076293e-05, "loss": 0.5254, "num_tokens": 351323434.0, "step": 3730 }, { "epoch": 0.6367980884109916, "grad_norm": 0.5250752865022018, "learning_rate": 1.4534903567161634e-05, "loss": 0.6903, "num_tokens": 351408250.0, "step": 3731 }, { "epoch": 0.636968766001024, "grad_norm": 0.5187653491823154, "learning_rate": 1.4528076463560337e-05, "loss": 0.5808, "num_tokens": 351482993.0, "step": 3732 }, { "epoch": 0.6371394435910565, "grad_norm": 0.5763755890986934, "learning_rate": 1.4521249359959038e-05, "loss": 0.6076, "num_tokens": 351547977.0, "step": 3733 }, { "epoch": 0.6373101211810889, "grad_norm": 0.5630513685715748, "learning_rate": 1.4514422256357742e-05, "loss": 0.6017, "num_tokens": 351610899.0, "step": 3734 }, { "epoch": 0.6374807987711214, "grad_norm": 0.5155879540563318, "learning_rate": 1.4507595152756444e-05, "loss": 0.6155, "num_tokens": 351692402.0, "step": 3735 }, { "epoch": 0.6376514763611538, "grad_norm": 0.4816079220069336, "learning_rate": 1.4500768049155147e-05, "loss": 0.526, "num_tokens": 351779989.0, "step": 3736 }, { "epoch": 0.6378221539511862, "grad_norm": 0.46713109047215534, "learning_rate": 1.4493940945553851e-05, "loss": 0.6258, "num_tokens": 351883951.0, "step": 3737 }, { "epoch": 0.6379928315412187, "grad_norm": 0.4936754808222198, "learning_rate": 1.4487113841952553e-05, "loss": 0.682, "num_tokens": 351989465.0, "step": 3738 }, { "epoch": 0.6381635091312511, "grad_norm": 0.5183543763455014, "learning_rate": 1.4480286738351255e-05, "loss": 0.6539, "num_tokens": 352086491.0, "step": 3739 }, { "epoch": 0.6383341867212835, "grad_norm": 0.46370058373013345, "learning_rate": 1.4473459634749957e-05, "loss": 0.7025, "num_tokens": 352205771.0, "step": 3740 }, { "epoch": 0.6385048643113159, "grad_norm": 0.5102338274372333, "learning_rate": 1.4466632531148661e-05, "loss": 0.5586, "num_tokens": 352284606.0, "step": 3741 }, { "epoch": 0.6386755419013483, "grad_norm": 0.4738402000806786, "learning_rate": 1.4459805427547365e-05, "loss": 0.5812, "num_tokens": 352386500.0, "step": 3742 }, { "epoch": 0.6388462194913808, "grad_norm": 0.4932969001076414, "learning_rate": 1.4452978323946067e-05, "loss": 0.5934, "num_tokens": 352478818.0, "step": 3743 }, { "epoch": 0.6390168970814132, "grad_norm": 0.5241791669624905, "learning_rate": 1.444615122034477e-05, "loss": 0.6022, "num_tokens": 352573485.0, "step": 3744 }, { "epoch": 0.6391875746714456, "grad_norm": 0.47739991325212305, "learning_rate": 1.4439324116743471e-05, "loss": 0.5739, "num_tokens": 352673466.0, "step": 3745 }, { "epoch": 0.639358252261478, "grad_norm": 0.5473925501149179, "learning_rate": 1.4432497013142175e-05, "loss": 0.6279, "num_tokens": 352750706.0, "step": 3746 }, { "epoch": 0.6395289298515104, "grad_norm": 0.5235512408597872, "learning_rate": 1.4425669909540879e-05, "loss": 0.5958, "num_tokens": 352826075.0, "step": 3747 }, { "epoch": 0.639699607441543, "grad_norm": 0.46722217146907263, "learning_rate": 1.441884280593958e-05, "loss": 0.5519, "num_tokens": 352917742.0, "step": 3748 }, { "epoch": 0.6398702850315754, "grad_norm": 0.49945002733656146, "learning_rate": 1.4412015702338284e-05, "loss": 0.6494, "num_tokens": 353019844.0, "step": 3749 }, { "epoch": 0.6400409626216078, "grad_norm": 0.47881003653860266, "learning_rate": 1.4405188598736988e-05, "loss": 0.624, "num_tokens": 353136413.0, "step": 3750 }, { "epoch": 0.6402116402116402, "grad_norm": 0.443405708421006, "learning_rate": 1.439836149513569e-05, "loss": 0.61, "num_tokens": 353247375.0, "step": 3751 }, { "epoch": 0.6403823178016727, "grad_norm": 0.5620455584694852, "learning_rate": 1.4391534391534392e-05, "loss": 0.6152, "num_tokens": 353315152.0, "step": 3752 }, { "epoch": 0.6405529953917051, "grad_norm": 0.5512611706778124, "learning_rate": 1.4384707287933094e-05, "loss": 0.5554, "num_tokens": 353378303.0, "step": 3753 }, { "epoch": 0.6407236729817375, "grad_norm": 0.4492987975372472, "learning_rate": 1.4377880184331798e-05, "loss": 0.5514, "num_tokens": 353479876.0, "step": 3754 }, { "epoch": 0.6408943505717699, "grad_norm": 0.5470720570886061, "learning_rate": 1.4371053080730502e-05, "loss": 0.533, "num_tokens": 353548305.0, "step": 3755 }, { "epoch": 0.6410650281618023, "grad_norm": 0.4750913669050522, "learning_rate": 1.4364225977129204e-05, "loss": 0.6744, "num_tokens": 353663938.0, "step": 3756 }, { "epoch": 0.6412357057518348, "grad_norm": 0.4704155386337519, "learning_rate": 1.4357398873527908e-05, "loss": 0.5246, "num_tokens": 353752590.0, "step": 3757 }, { "epoch": 0.6414063833418672, "grad_norm": 0.5019138475475852, "learning_rate": 1.4350571769926608e-05, "loss": 0.5972, "num_tokens": 353837683.0, "step": 3758 }, { "epoch": 0.6415770609318996, "grad_norm": 0.6192455871800803, "learning_rate": 1.4343744666325312e-05, "loss": 0.6349, "num_tokens": 353965500.0, "step": 3759 }, { "epoch": 0.6417477385219321, "grad_norm": 0.7692675502128963, "learning_rate": 1.4336917562724016e-05, "loss": 0.6936, "num_tokens": 354065614.0, "step": 3760 }, { "epoch": 0.6419184161119645, "grad_norm": 0.4723790604081513, "learning_rate": 1.4330090459122718e-05, "loss": 0.5073, "num_tokens": 354142643.0, "step": 3761 }, { "epoch": 0.642089093701997, "grad_norm": 0.45032966659767376, "learning_rate": 1.4323263355521422e-05, "loss": 0.5841, "num_tokens": 354260220.0, "step": 3762 }, { "epoch": 0.6422597712920294, "grad_norm": 0.4247892854563498, "learning_rate": 1.4316436251920125e-05, "loss": 0.5496, "num_tokens": 354376728.0, "step": 3763 }, { "epoch": 0.6424304488820618, "grad_norm": 0.4787239698405895, "learning_rate": 1.4309609148318826e-05, "loss": 0.5465, "num_tokens": 354465252.0, "step": 3764 }, { "epoch": 0.6426011264720942, "grad_norm": 0.44270779526128357, "learning_rate": 1.430278204471753e-05, "loss": 0.4938, "num_tokens": 354558980.0, "step": 3765 }, { "epoch": 0.6427718040621266, "grad_norm": 0.483882307699064, "learning_rate": 1.4295954941116233e-05, "loss": 0.5903, "num_tokens": 354649463.0, "step": 3766 }, { "epoch": 0.6429424816521591, "grad_norm": 0.4964214518537526, "learning_rate": 1.4289127837514935e-05, "loss": 0.5542, "num_tokens": 354734746.0, "step": 3767 }, { "epoch": 0.6431131592421915, "grad_norm": 0.49157338235783127, "learning_rate": 1.4282300733913639e-05, "loss": 0.5337, "num_tokens": 354818852.0, "step": 3768 }, { "epoch": 0.6432838368322239, "grad_norm": 0.4487574602954322, "learning_rate": 1.4275473630312343e-05, "loss": 0.5847, "num_tokens": 354925027.0, "step": 3769 }, { "epoch": 0.6434545144222563, "grad_norm": 0.500312550545791, "learning_rate": 1.4268646526711043e-05, "loss": 0.5067, "num_tokens": 355006510.0, "step": 3770 }, { "epoch": 0.6436251920122887, "grad_norm": 0.4714850495194229, "learning_rate": 1.4261819423109747e-05, "loss": 0.5702, "num_tokens": 355102256.0, "step": 3771 }, { "epoch": 0.6437958696023213, "grad_norm": 0.45024301794887495, "learning_rate": 1.4254992319508449e-05, "loss": 0.6018, "num_tokens": 355218351.0, "step": 3772 }, { "epoch": 0.6439665471923537, "grad_norm": 0.4376018469857722, "learning_rate": 1.4248165215907153e-05, "loss": 0.5019, "num_tokens": 355321424.0, "step": 3773 }, { "epoch": 0.6441372247823861, "grad_norm": 0.49762643059750217, "learning_rate": 1.4241338112305857e-05, "loss": 0.5992, "num_tokens": 355415149.0, "step": 3774 }, { "epoch": 0.6443079023724185, "grad_norm": 0.5385516527773379, "learning_rate": 1.4234511008704559e-05, "loss": 0.5905, "num_tokens": 355487991.0, "step": 3775 }, { "epoch": 0.644478579962451, "grad_norm": 0.48099440183462305, "learning_rate": 1.422768390510326e-05, "loss": 0.6146, "num_tokens": 355589825.0, "step": 3776 }, { "epoch": 0.6446492575524834, "grad_norm": 0.471949567960015, "learning_rate": 1.4220856801501963e-05, "loss": 0.5476, "num_tokens": 355681377.0, "step": 3777 }, { "epoch": 0.6448199351425158, "grad_norm": 0.5537635568798891, "learning_rate": 1.4214029697900666e-05, "loss": 0.6676, "num_tokens": 355756965.0, "step": 3778 }, { "epoch": 0.6449906127325482, "grad_norm": 0.5043811754287045, "learning_rate": 1.420720259429937e-05, "loss": 0.5912, "num_tokens": 355846644.0, "step": 3779 }, { "epoch": 0.6451612903225806, "grad_norm": 0.47095812943265924, "learning_rate": 1.4200375490698072e-05, "loss": 0.5618, "num_tokens": 355934349.0, "step": 3780 }, { "epoch": 0.645331967912613, "grad_norm": 0.4736574695521975, "learning_rate": 1.4193548387096776e-05, "loss": 0.5598, "num_tokens": 356018550.0, "step": 3781 }, { "epoch": 0.6455026455026455, "grad_norm": 0.46991381477626787, "learning_rate": 1.418672128349548e-05, "loss": 0.5593, "num_tokens": 356118335.0, "step": 3782 }, { "epoch": 0.6456733230926779, "grad_norm": 0.49323274881496043, "learning_rate": 1.417989417989418e-05, "loss": 0.519, "num_tokens": 356196914.0, "step": 3783 }, { "epoch": 0.6458440006827103, "grad_norm": 0.5201692555192612, "learning_rate": 1.4173067076292884e-05, "loss": 0.5645, "num_tokens": 356269150.0, "step": 3784 }, { "epoch": 0.6460146782727428, "grad_norm": 0.4521880936211394, "learning_rate": 1.4166239972691586e-05, "loss": 0.6278, "num_tokens": 356389342.0, "step": 3785 }, { "epoch": 0.6461853558627753, "grad_norm": 0.4684579853999267, "learning_rate": 1.415941286909029e-05, "loss": 0.4999, "num_tokens": 356471130.0, "step": 3786 }, { "epoch": 0.6463560334528077, "grad_norm": 0.5083423087705352, "learning_rate": 1.4152585765488994e-05, "loss": 0.6163, "num_tokens": 356556260.0, "step": 3787 }, { "epoch": 0.6465267110428401, "grad_norm": 0.4621525280929079, "learning_rate": 1.4145758661887696e-05, "loss": 0.5773, "num_tokens": 356650921.0, "step": 3788 }, { "epoch": 0.6466973886328725, "grad_norm": 0.42477973824923165, "learning_rate": 1.4138931558286398e-05, "loss": 0.53, "num_tokens": 356761096.0, "step": 3789 }, { "epoch": 0.6468680662229049, "grad_norm": 0.49359292319936676, "learning_rate": 1.41321044546851e-05, "loss": 0.4914, "num_tokens": 356834927.0, "step": 3790 }, { "epoch": 0.6470387438129374, "grad_norm": 0.4652983849899644, "learning_rate": 1.4125277351083804e-05, "loss": 0.6744, "num_tokens": 356948551.0, "step": 3791 }, { "epoch": 0.6472094214029698, "grad_norm": 0.6116197856950916, "learning_rate": 1.4118450247482507e-05, "loss": 0.6452, "num_tokens": 357032365.0, "step": 3792 }, { "epoch": 0.6473800989930022, "grad_norm": 0.46856990185529046, "learning_rate": 1.411162314388121e-05, "loss": 0.5193, "num_tokens": 357117204.0, "step": 3793 }, { "epoch": 0.6475507765830346, "grad_norm": 0.5335093984872318, "learning_rate": 1.4104796040279913e-05, "loss": 0.5757, "num_tokens": 357190350.0, "step": 3794 }, { "epoch": 0.647721454173067, "grad_norm": 0.519910066996408, "learning_rate": 1.4097968936678613e-05, "loss": 0.6568, "num_tokens": 357287409.0, "step": 3795 }, { "epoch": 0.6478921317630995, "grad_norm": 0.4794030378778992, "learning_rate": 1.4091141833077317e-05, "loss": 0.52, "num_tokens": 357370802.0, "step": 3796 }, { "epoch": 0.648062809353132, "grad_norm": 0.47085393760406064, "learning_rate": 1.4084314729476021e-05, "loss": 0.5131, "num_tokens": 357460751.0, "step": 3797 }, { "epoch": 0.6482334869431644, "grad_norm": 0.45758280946260976, "learning_rate": 1.4077487625874725e-05, "loss": 0.5732, "num_tokens": 357563036.0, "step": 3798 }, { "epoch": 0.6484041645331968, "grad_norm": 0.4715463199161767, "learning_rate": 1.4070660522273427e-05, "loss": 0.5605, "num_tokens": 357649387.0, "step": 3799 }, { "epoch": 0.6485748421232292, "grad_norm": 0.45928005350275714, "learning_rate": 1.406383341867213e-05, "loss": 0.6481, "num_tokens": 357774860.0, "step": 3800 }, { "epoch": 0.6487455197132617, "grad_norm": 0.5070566708477471, "learning_rate": 1.4057006315070831e-05, "loss": 0.5737, "num_tokens": 357848372.0, "step": 3801 }, { "epoch": 0.6489161973032941, "grad_norm": 0.5247942028080713, "learning_rate": 1.4050179211469535e-05, "loss": 0.6493, "num_tokens": 357925095.0, "step": 3802 }, { "epoch": 0.6490868748933265, "grad_norm": 0.5367026230330154, "learning_rate": 1.4043352107868239e-05, "loss": 0.6618, "num_tokens": 358008635.0, "step": 3803 }, { "epoch": 0.6492575524833589, "grad_norm": 0.46948072264818336, "learning_rate": 1.403652500426694e-05, "loss": 0.5128, "num_tokens": 358098072.0, "step": 3804 }, { "epoch": 0.6494282300733913, "grad_norm": 0.45925761924160596, "learning_rate": 1.4029697900665644e-05, "loss": 0.5214, "num_tokens": 358193639.0, "step": 3805 }, { "epoch": 0.6495989076634238, "grad_norm": 0.453543468518027, "learning_rate": 1.4022870797064348e-05, "loss": 0.618, "num_tokens": 358304673.0, "step": 3806 }, { "epoch": 0.6497695852534562, "grad_norm": 0.47912465555542594, "learning_rate": 1.4016043693463048e-05, "loss": 0.5717, "num_tokens": 358409941.0, "step": 3807 }, { "epoch": 0.6499402628434886, "grad_norm": 0.521424638758478, "learning_rate": 1.4009216589861752e-05, "loss": 0.663, "num_tokens": 358499947.0, "step": 3808 }, { "epoch": 0.6501109404335211, "grad_norm": 0.46707697621446365, "learning_rate": 1.4002389486260454e-05, "loss": 0.5787, "num_tokens": 358602414.0, "step": 3809 }, { "epoch": 0.6502816180235536, "grad_norm": 0.5011905552941018, "learning_rate": 1.3995562382659158e-05, "loss": 0.5703, "num_tokens": 358681273.0, "step": 3810 }, { "epoch": 0.650452295613586, "grad_norm": 0.43915677429411487, "learning_rate": 1.3988735279057862e-05, "loss": 0.5245, "num_tokens": 358784752.0, "step": 3811 }, { "epoch": 0.6506229732036184, "grad_norm": 0.48464019174607387, "learning_rate": 1.3981908175456564e-05, "loss": 0.5379, "num_tokens": 358866608.0, "step": 3812 }, { "epoch": 0.6507936507936508, "grad_norm": 0.4516495812404038, "learning_rate": 1.3975081071855268e-05, "loss": 0.6232, "num_tokens": 358974827.0, "step": 3813 }, { "epoch": 0.6509643283836832, "grad_norm": 0.494694300343105, "learning_rate": 1.3968253968253968e-05, "loss": 0.641, "num_tokens": 359065953.0, "step": 3814 }, { "epoch": 0.6511350059737157, "grad_norm": 0.5595818227980645, "learning_rate": 1.3961426864652672e-05, "loss": 0.6359, "num_tokens": 359151497.0, "step": 3815 }, { "epoch": 0.6513056835637481, "grad_norm": 0.4331794975008109, "learning_rate": 1.3954599761051376e-05, "loss": 0.5319, "num_tokens": 359267988.0, "step": 3816 }, { "epoch": 0.6514763611537805, "grad_norm": 0.48754286493112237, "learning_rate": 1.3947772657450078e-05, "loss": 0.5589, "num_tokens": 359357092.0, "step": 3817 }, { "epoch": 0.6516470387438129, "grad_norm": 0.5042861628473454, "learning_rate": 1.3940945553848781e-05, "loss": 0.4697, "num_tokens": 359433466.0, "step": 3818 }, { "epoch": 0.6518177163338453, "grad_norm": 0.5419075112174828, "learning_rate": 1.3934118450247485e-05, "loss": 0.6368, "num_tokens": 359511416.0, "step": 3819 }, { "epoch": 0.6519883939238778, "grad_norm": 0.524463326016875, "learning_rate": 1.3927291346646186e-05, "loss": 0.494, "num_tokens": 359579854.0, "step": 3820 }, { "epoch": 0.6521590715139102, "grad_norm": 0.501050749758819, "learning_rate": 1.392046424304489e-05, "loss": 0.6182, "num_tokens": 359683262.0, "step": 3821 }, { "epoch": 0.6523297491039427, "grad_norm": 0.4579773648560269, "learning_rate": 1.3913637139443591e-05, "loss": 0.5012, "num_tokens": 359770007.0, "step": 3822 }, { "epoch": 0.6525004266939751, "grad_norm": 0.4805502822957503, "learning_rate": 1.3906810035842295e-05, "loss": 0.5969, "num_tokens": 359871491.0, "step": 3823 }, { "epoch": 0.6526711042840075, "grad_norm": 0.5574061231943033, "learning_rate": 1.3899982932240999e-05, "loss": 0.6261, "num_tokens": 359959348.0, "step": 3824 }, { "epoch": 0.65284178187404, "grad_norm": 0.45864521019314236, "learning_rate": 1.3893155828639701e-05, "loss": 0.581, "num_tokens": 360060554.0, "step": 3825 }, { "epoch": 0.6530124594640724, "grad_norm": 0.545446093445203, "learning_rate": 1.3886328725038403e-05, "loss": 0.5554, "num_tokens": 360125603.0, "step": 3826 }, { "epoch": 0.6531831370541048, "grad_norm": 0.5837132429395301, "learning_rate": 1.3879501621437105e-05, "loss": 0.5555, "num_tokens": 360195176.0, "step": 3827 }, { "epoch": 0.6533538146441372, "grad_norm": 0.5393913593362436, "learning_rate": 1.3872674517835809e-05, "loss": 0.6063, "num_tokens": 360267449.0, "step": 3828 }, { "epoch": 0.6535244922341696, "grad_norm": 0.47833750959271826, "learning_rate": 1.3865847414234513e-05, "loss": 0.6126, "num_tokens": 360375699.0, "step": 3829 }, { "epoch": 0.6536951698242021, "grad_norm": 0.4876729998359854, "learning_rate": 1.3859020310633215e-05, "loss": 0.6065, "num_tokens": 360473788.0, "step": 3830 }, { "epoch": 0.6538658474142345, "grad_norm": 0.46982986606844346, "learning_rate": 1.3852193207031918e-05, "loss": 0.5124, "num_tokens": 360563861.0, "step": 3831 }, { "epoch": 0.6540365250042669, "grad_norm": 0.5057571263258188, "learning_rate": 1.384536610343062e-05, "loss": 0.5681, "num_tokens": 360644215.0, "step": 3832 }, { "epoch": 0.6542072025942993, "grad_norm": 0.4738716921155277, "learning_rate": 1.3838538999829323e-05, "loss": 0.5399, "num_tokens": 360735974.0, "step": 3833 }, { "epoch": 0.6543778801843319, "grad_norm": 0.5184390907452492, "learning_rate": 1.3831711896228026e-05, "loss": 0.5918, "num_tokens": 360821384.0, "step": 3834 }, { "epoch": 0.6545485577743643, "grad_norm": 0.5080531304522219, "learning_rate": 1.382488479262673e-05, "loss": 0.5368, "num_tokens": 360893999.0, "step": 3835 }, { "epoch": 0.6547192353643967, "grad_norm": 0.4385195608657318, "learning_rate": 1.3818057689025432e-05, "loss": 0.531, "num_tokens": 361004858.0, "step": 3836 }, { "epoch": 0.6548899129544291, "grad_norm": 0.5757929451370575, "learning_rate": 1.3811230585424136e-05, "loss": 0.5966, "num_tokens": 361095720.0, "step": 3837 }, { "epoch": 0.6550605905444615, "grad_norm": 0.47335493281966323, "learning_rate": 1.3804403481822836e-05, "loss": 0.588, "num_tokens": 361191803.0, "step": 3838 }, { "epoch": 0.655231268134494, "grad_norm": 0.4469422686721553, "learning_rate": 1.379757637822154e-05, "loss": 0.5872, "num_tokens": 361303048.0, "step": 3839 }, { "epoch": 0.6554019457245264, "grad_norm": 0.49706397009095593, "learning_rate": 1.3790749274620244e-05, "loss": 0.5718, "num_tokens": 361381006.0, "step": 3840 }, { "epoch": 0.6555726233145588, "grad_norm": 0.43484543141682247, "learning_rate": 1.3783922171018946e-05, "loss": 0.5354, "num_tokens": 361488160.0, "step": 3841 }, { "epoch": 0.6557433009045912, "grad_norm": 0.4861595081507606, "learning_rate": 1.377709506741765e-05, "loss": 0.6466, "num_tokens": 361592977.0, "step": 3842 }, { "epoch": 0.6559139784946236, "grad_norm": 0.47344903594427895, "learning_rate": 1.3770267963816353e-05, "loss": 0.6212, "num_tokens": 361710327.0, "step": 3843 }, { "epoch": 0.656084656084656, "grad_norm": 0.47487003140995504, "learning_rate": 1.3763440860215056e-05, "loss": 0.562, "num_tokens": 361792686.0, "step": 3844 }, { "epoch": 0.6562553336746885, "grad_norm": 0.442425311545529, "learning_rate": 1.3756613756613758e-05, "loss": 0.5177, "num_tokens": 361906590.0, "step": 3845 }, { "epoch": 0.6564260112647209, "grad_norm": 0.46280129930172653, "learning_rate": 1.374978665301246e-05, "loss": 0.6114, "num_tokens": 362009758.0, "step": 3846 }, { "epoch": 0.6565966888547534, "grad_norm": 0.46018879007957286, "learning_rate": 1.3742959549411163e-05, "loss": 0.4948, "num_tokens": 362096133.0, "step": 3847 }, { "epoch": 0.6567673664447858, "grad_norm": 0.5199273694038551, "learning_rate": 1.3736132445809867e-05, "loss": 0.6525, "num_tokens": 362187768.0, "step": 3848 }, { "epoch": 0.6569380440348183, "grad_norm": 0.49922608597260465, "learning_rate": 1.372930534220857e-05, "loss": 0.5936, "num_tokens": 362282605.0, "step": 3849 }, { "epoch": 0.6571087216248507, "grad_norm": 0.4482358578309397, "learning_rate": 1.3722478238607273e-05, "loss": 0.6662, "num_tokens": 362399966.0, "step": 3850 }, { "epoch": 0.6572793992148831, "grad_norm": 0.48430908163091957, "learning_rate": 1.3715651135005973e-05, "loss": 0.6159, "num_tokens": 362495109.0, "step": 3851 }, { "epoch": 0.6574500768049155, "grad_norm": 0.4639578483438303, "learning_rate": 1.3708824031404677e-05, "loss": 0.5711, "num_tokens": 362586814.0, "step": 3852 }, { "epoch": 0.6576207543949479, "grad_norm": 0.4773415211107689, "learning_rate": 1.3701996927803381e-05, "loss": 0.5699, "num_tokens": 362673763.0, "step": 3853 }, { "epoch": 0.6577914319849804, "grad_norm": 0.48879983215784506, "learning_rate": 1.3695169824202083e-05, "loss": 0.51, "num_tokens": 362751817.0, "step": 3854 }, { "epoch": 0.6579621095750128, "grad_norm": 0.47587606704829305, "learning_rate": 1.3688342720600787e-05, "loss": 0.6143, "num_tokens": 362854457.0, "step": 3855 }, { "epoch": 0.6581327871650452, "grad_norm": 0.49575094076396786, "learning_rate": 1.368151561699949e-05, "loss": 0.6411, "num_tokens": 362943245.0, "step": 3856 }, { "epoch": 0.6583034647550776, "grad_norm": 0.48861598435958487, "learning_rate": 1.3674688513398191e-05, "loss": 0.6705, "num_tokens": 363042520.0, "step": 3857 }, { "epoch": 0.65847414234511, "grad_norm": 0.48341797585162827, "learning_rate": 1.3667861409796895e-05, "loss": 0.5824, "num_tokens": 363136031.0, "step": 3858 }, { "epoch": 0.6586448199351426, "grad_norm": 0.46725213663166304, "learning_rate": 1.3661034306195597e-05, "loss": 0.6268, "num_tokens": 363234006.0, "step": 3859 }, { "epoch": 0.658815497525175, "grad_norm": 0.4931048048171122, "learning_rate": 1.36542072025943e-05, "loss": 0.6134, "num_tokens": 363325240.0, "step": 3860 }, { "epoch": 0.6589861751152074, "grad_norm": 0.4847845144139469, "learning_rate": 1.3647380098993004e-05, "loss": 0.5639, "num_tokens": 363416207.0, "step": 3861 }, { "epoch": 0.6591568527052398, "grad_norm": 0.46882004002283767, "learning_rate": 1.3640552995391706e-05, "loss": 0.5423, "num_tokens": 363500021.0, "step": 3862 }, { "epoch": 0.6593275302952722, "grad_norm": 0.4859102114932772, "learning_rate": 1.3633725891790408e-05, "loss": 0.5727, "num_tokens": 363588761.0, "step": 3863 }, { "epoch": 0.6594982078853047, "grad_norm": 0.4844895718136432, "learning_rate": 1.362689878818911e-05, "loss": 0.5283, "num_tokens": 363669923.0, "step": 3864 }, { "epoch": 0.6596688854753371, "grad_norm": 0.4964913259096266, "learning_rate": 1.3620071684587814e-05, "loss": 0.6087, "num_tokens": 363754801.0, "step": 3865 }, { "epoch": 0.6598395630653695, "grad_norm": 0.5240712327122897, "learning_rate": 1.3613244580986518e-05, "loss": 0.6576, "num_tokens": 363844255.0, "step": 3866 }, { "epoch": 0.6600102406554019, "grad_norm": 0.45436054425535355, "learning_rate": 1.360641747738522e-05, "loss": 0.6251, "num_tokens": 363960685.0, "step": 3867 }, { "epoch": 0.6601809182454343, "grad_norm": 0.5339749741248881, "learning_rate": 1.3599590373783924e-05, "loss": 0.5333, "num_tokens": 364030461.0, "step": 3868 }, { "epoch": 0.6603515958354668, "grad_norm": 0.43954772104875783, "learning_rate": 1.3592763270182626e-05, "loss": 0.563, "num_tokens": 364138622.0, "step": 3869 }, { "epoch": 0.6605222734254992, "grad_norm": 0.5404528285182921, "learning_rate": 1.3585936166581328e-05, "loss": 0.58, "num_tokens": 364219300.0, "step": 3870 }, { "epoch": 0.6606929510155317, "grad_norm": 0.4458182578221126, "learning_rate": 1.3579109062980032e-05, "loss": 0.5655, "num_tokens": 364333521.0, "step": 3871 }, { "epoch": 0.6608636286055641, "grad_norm": 0.5590390059422352, "learning_rate": 1.3572281959378735e-05, "loss": 0.5754, "num_tokens": 364415357.0, "step": 3872 }, { "epoch": 0.6610343061955966, "grad_norm": 0.46700229721333314, "learning_rate": 1.3565454855777438e-05, "loss": 0.5134, "num_tokens": 364512313.0, "step": 3873 }, { "epoch": 0.661204983785629, "grad_norm": 0.44910310152083815, "learning_rate": 1.3558627752176141e-05, "loss": 0.5374, "num_tokens": 364618434.0, "step": 3874 }, { "epoch": 0.6613756613756614, "grad_norm": 0.5262434006250625, "learning_rate": 1.3551800648574845e-05, "loss": 0.6543, "num_tokens": 364696947.0, "step": 3875 }, { "epoch": 0.6615463389656938, "grad_norm": 0.5201023426647235, "learning_rate": 1.3544973544973545e-05, "loss": 0.6271, "num_tokens": 364790363.0, "step": 3876 }, { "epoch": 0.6617170165557262, "grad_norm": 0.5064768933774368, "learning_rate": 1.353814644137225e-05, "loss": 0.5623, "num_tokens": 364867253.0, "step": 3877 }, { "epoch": 0.6618876941457587, "grad_norm": 0.4381762921032272, "learning_rate": 1.3531319337770951e-05, "loss": 0.6004, "num_tokens": 364987837.0, "step": 3878 }, { "epoch": 0.6620583717357911, "grad_norm": 0.4476766985155958, "learning_rate": 1.3524492234169655e-05, "loss": 0.5693, "num_tokens": 365090954.0, "step": 3879 }, { "epoch": 0.6622290493258235, "grad_norm": 0.4901774184494246, "learning_rate": 1.3517665130568359e-05, "loss": 0.5989, "num_tokens": 365179209.0, "step": 3880 }, { "epoch": 0.6623997269158559, "grad_norm": 0.4773411585180803, "learning_rate": 1.3510838026967061e-05, "loss": 0.5837, "num_tokens": 365268462.0, "step": 3881 }, { "epoch": 0.6625704045058883, "grad_norm": 0.4706484490210508, "learning_rate": 1.3504010923365763e-05, "loss": 0.5719, "num_tokens": 365367679.0, "step": 3882 }, { "epoch": 0.6627410820959208, "grad_norm": 0.5451557908769266, "learning_rate": 1.3497183819764465e-05, "loss": 0.5761, "num_tokens": 365440078.0, "step": 3883 }, { "epoch": 0.6629117596859533, "grad_norm": 0.5146778705425954, "learning_rate": 1.3490356716163169e-05, "loss": 0.5539, "num_tokens": 365516659.0, "step": 3884 }, { "epoch": 0.6630824372759857, "grad_norm": 0.4783250610052764, "learning_rate": 1.3483529612561873e-05, "loss": 0.5073, "num_tokens": 365597268.0, "step": 3885 }, { "epoch": 0.6632531148660181, "grad_norm": 0.5284659316820723, "learning_rate": 1.3476702508960575e-05, "loss": 0.5947, "num_tokens": 365674682.0, "step": 3886 }, { "epoch": 0.6634237924560505, "grad_norm": 0.48087486450286565, "learning_rate": 1.3469875405359278e-05, "loss": 0.5022, "num_tokens": 365745696.0, "step": 3887 }, { "epoch": 0.663594470046083, "grad_norm": 0.4758439553270209, "learning_rate": 1.3463048301757979e-05, "loss": 0.5669, "num_tokens": 365842111.0, "step": 3888 }, { "epoch": 0.6637651476361154, "grad_norm": 0.4917384493645637, "learning_rate": 1.3456221198156683e-05, "loss": 0.5259, "num_tokens": 365926833.0, "step": 3889 }, { "epoch": 0.6639358252261478, "grad_norm": 0.46048475148750806, "learning_rate": 1.3449394094555386e-05, "loss": 0.5695, "num_tokens": 366029225.0, "step": 3890 }, { "epoch": 0.6641065028161802, "grad_norm": 0.5072643775193107, "learning_rate": 1.3442566990954088e-05, "loss": 0.5585, "num_tokens": 366109952.0, "step": 3891 }, { "epoch": 0.6642771804062126, "grad_norm": 0.46807258143500996, "learning_rate": 1.3435739887352792e-05, "loss": 0.559, "num_tokens": 366197881.0, "step": 3892 }, { "epoch": 0.6644478579962451, "grad_norm": 0.4468003780211038, "learning_rate": 1.3428912783751496e-05, "loss": 0.5977, "num_tokens": 366311366.0, "step": 3893 }, { "epoch": 0.6646185355862775, "grad_norm": 0.4709087979991452, "learning_rate": 1.3422085680150196e-05, "loss": 0.62, "num_tokens": 366414750.0, "step": 3894 }, { "epoch": 0.6647892131763099, "grad_norm": 0.483905127038126, "learning_rate": 1.34152585765489e-05, "loss": 0.494, "num_tokens": 366489913.0, "step": 3895 }, { "epoch": 0.6649598907663424, "grad_norm": 0.48153906224264775, "learning_rate": 1.3408431472947602e-05, "loss": 0.6077, "num_tokens": 366583627.0, "step": 3896 }, { "epoch": 0.6651305683563749, "grad_norm": 0.4523695685062877, "learning_rate": 1.3401604369346306e-05, "loss": 0.597, "num_tokens": 366690692.0, "step": 3897 }, { "epoch": 0.6653012459464073, "grad_norm": 0.4640931836133353, "learning_rate": 1.339477726574501e-05, "loss": 0.6443, "num_tokens": 366801858.0, "step": 3898 }, { "epoch": 0.6654719235364397, "grad_norm": 0.4930684426229937, "learning_rate": 1.3387950162143712e-05, "loss": 0.481, "num_tokens": 366881346.0, "step": 3899 }, { "epoch": 0.6656426011264721, "grad_norm": 0.4354349849340033, "learning_rate": 1.3381123058542414e-05, "loss": 0.5328, "num_tokens": 366989461.0, "step": 3900 }, { "epoch": 0.6658132787165045, "grad_norm": 0.5413073345400635, "learning_rate": 1.3374295954941117e-05, "loss": 0.5045, "num_tokens": 367081799.0, "step": 3901 }, { "epoch": 0.665983956306537, "grad_norm": 0.549986675776134, "learning_rate": 1.336746885133982e-05, "loss": 0.5585, "num_tokens": 367149466.0, "step": 3902 }, { "epoch": 0.6661546338965694, "grad_norm": 0.4718377973415796, "learning_rate": 1.3360641747738523e-05, "loss": 0.5956, "num_tokens": 367252723.0, "step": 3903 }, { "epoch": 0.6663253114866018, "grad_norm": 0.5427661063119847, "learning_rate": 1.3353814644137227e-05, "loss": 0.6714, "num_tokens": 367348346.0, "step": 3904 }, { "epoch": 0.6664959890766342, "grad_norm": 0.47243663929427127, "learning_rate": 1.334698754053593e-05, "loss": 0.6018, "num_tokens": 367448078.0, "step": 3905 }, { "epoch": 0.6666666666666666, "grad_norm": 0.45987168143886736, "learning_rate": 1.3340160436934631e-05, "loss": 0.6429, "num_tokens": 367562428.0, "step": 3906 }, { "epoch": 0.666837344256699, "grad_norm": 0.44058195082701784, "learning_rate": 1.3333333333333333e-05, "loss": 0.6336, "num_tokens": 367678712.0, "step": 3907 }, { "epoch": 0.6670080218467315, "grad_norm": 0.5079800186377516, "learning_rate": 1.3326506229732037e-05, "loss": 0.6066, "num_tokens": 367771069.0, "step": 3908 }, { "epoch": 0.667178699436764, "grad_norm": 0.43435918763356135, "learning_rate": 1.331967912613074e-05, "loss": 0.6009, "num_tokens": 367890124.0, "step": 3909 }, { "epoch": 0.6673493770267964, "grad_norm": 0.4738886606817851, "learning_rate": 1.3312852022529443e-05, "loss": 0.5716, "num_tokens": 367987285.0, "step": 3910 }, { "epoch": 0.6675200546168288, "grad_norm": 0.5054669784639049, "learning_rate": 1.3306024918928147e-05, "loss": 0.5851, "num_tokens": 368073108.0, "step": 3911 }, { "epoch": 0.6676907322068613, "grad_norm": 0.44863718837625305, "learning_rate": 1.329919781532685e-05, "loss": 0.5563, "num_tokens": 368169836.0, "step": 3912 }, { "epoch": 0.6678614097968937, "grad_norm": 0.4832844885958517, "learning_rate": 1.329237071172555e-05, "loss": 0.5392, "num_tokens": 368256300.0, "step": 3913 }, { "epoch": 0.6680320873869261, "grad_norm": 0.4614198883947568, "learning_rate": 1.3285543608124255e-05, "loss": 0.586, "num_tokens": 368359679.0, "step": 3914 }, { "epoch": 0.6682027649769585, "grad_norm": 0.5614347400781755, "learning_rate": 1.3278716504522957e-05, "loss": 0.6733, "num_tokens": 368428215.0, "step": 3915 }, { "epoch": 0.6683734425669909, "grad_norm": 0.3989020829987458, "learning_rate": 1.327188940092166e-05, "loss": 0.5173, "num_tokens": 368558774.0, "step": 3916 }, { "epoch": 0.6685441201570234, "grad_norm": 0.4995362728923673, "learning_rate": 1.3265062297320364e-05, "loss": 0.5046, "num_tokens": 368636531.0, "step": 3917 }, { "epoch": 0.6687147977470558, "grad_norm": 0.5403513042804551, "learning_rate": 1.3258235193719066e-05, "loss": 0.6608, "num_tokens": 368721908.0, "step": 3918 }, { "epoch": 0.6688854753370882, "grad_norm": 0.41583835953363724, "learning_rate": 1.3251408090117768e-05, "loss": 0.5023, "num_tokens": 368829971.0, "step": 3919 }, { "epoch": 0.6690561529271206, "grad_norm": 0.4675339979005938, "learning_rate": 1.324458098651647e-05, "loss": 0.6641, "num_tokens": 368953536.0, "step": 3920 }, { "epoch": 0.6692268305171531, "grad_norm": 0.4692610056469804, "learning_rate": 1.3237753882915174e-05, "loss": 0.6242, "num_tokens": 369052120.0, "step": 3921 }, { "epoch": 0.6693975081071856, "grad_norm": 0.4886875527172367, "learning_rate": 1.3230926779313878e-05, "loss": 0.5556, "num_tokens": 369132887.0, "step": 3922 }, { "epoch": 0.669568185697218, "grad_norm": 0.44807815673139656, "learning_rate": 1.322409967571258e-05, "loss": 0.5242, "num_tokens": 369229738.0, "step": 3923 }, { "epoch": 0.6697388632872504, "grad_norm": 0.4845760094393384, "learning_rate": 1.3217272572111284e-05, "loss": 0.5615, "num_tokens": 369317223.0, "step": 3924 }, { "epoch": 0.6699095408772828, "grad_norm": 0.4730877862858023, "learning_rate": 1.3210445468509984e-05, "loss": 0.5812, "num_tokens": 369413333.0, "step": 3925 }, { "epoch": 0.6700802184673152, "grad_norm": 0.46345381787556394, "learning_rate": 1.3203618364908688e-05, "loss": 0.5272, "num_tokens": 369504426.0, "step": 3926 }, { "epoch": 0.6702508960573477, "grad_norm": 0.43509041327947295, "learning_rate": 1.3196791261307392e-05, "loss": 0.5962, "num_tokens": 369623353.0, "step": 3927 }, { "epoch": 0.6704215736473801, "grad_norm": 0.47421324070794063, "learning_rate": 1.3189964157706094e-05, "loss": 0.6358, "num_tokens": 369726183.0, "step": 3928 }, { "epoch": 0.6705922512374125, "grad_norm": 0.522656948766794, "learning_rate": 1.3183137054104797e-05, "loss": 0.5813, "num_tokens": 369809992.0, "step": 3929 }, { "epoch": 0.6707629288274449, "grad_norm": 0.5629849587636582, "learning_rate": 1.3176309950503501e-05, "loss": 0.5955, "num_tokens": 369884565.0, "step": 3930 }, { "epoch": 0.6709336064174773, "grad_norm": 0.4563283812427275, "learning_rate": 1.3169482846902202e-05, "loss": 0.5311, "num_tokens": 369979825.0, "step": 3931 }, { "epoch": 0.6711042840075098, "grad_norm": 0.5297552232792441, "learning_rate": 1.3162655743300905e-05, "loss": 0.5443, "num_tokens": 370048712.0, "step": 3932 }, { "epoch": 0.6712749615975423, "grad_norm": 0.44006101274232795, "learning_rate": 1.3155828639699607e-05, "loss": 0.4604, "num_tokens": 370137061.0, "step": 3933 }, { "epoch": 0.6714456391875747, "grad_norm": 0.4715885369183258, "learning_rate": 1.3149001536098311e-05, "loss": 0.6032, "num_tokens": 370231380.0, "step": 3934 }, { "epoch": 0.6716163167776071, "grad_norm": 0.47471174736142724, "learning_rate": 1.3142174432497015e-05, "loss": 0.6287, "num_tokens": 370326872.0, "step": 3935 }, { "epoch": 0.6717869943676396, "grad_norm": 0.5096784268127553, "learning_rate": 1.3135347328895717e-05, "loss": 0.5666, "num_tokens": 370407851.0, "step": 3936 }, { "epoch": 0.671957671957672, "grad_norm": 0.550460980030132, "learning_rate": 1.3128520225294419e-05, "loss": 0.5654, "num_tokens": 370480855.0, "step": 3937 }, { "epoch": 0.6721283495477044, "grad_norm": 0.4482154268881826, "learning_rate": 1.3121693121693123e-05, "loss": 0.5897, "num_tokens": 370592762.0, "step": 3938 }, { "epoch": 0.6722990271377368, "grad_norm": 0.49236923350843836, "learning_rate": 1.3114866018091825e-05, "loss": 0.5555, "num_tokens": 370682071.0, "step": 3939 }, { "epoch": 0.6724697047277692, "grad_norm": 0.4645556556035112, "learning_rate": 1.3108038914490529e-05, "loss": 0.5575, "num_tokens": 370778782.0, "step": 3940 }, { "epoch": 0.6726403823178017, "grad_norm": 0.5442119468998368, "learning_rate": 1.3101211810889232e-05, "loss": 0.5865, "num_tokens": 370877948.0, "step": 3941 }, { "epoch": 0.6728110599078341, "grad_norm": 0.41274817736462865, "learning_rate": 1.3094384707287935e-05, "loss": 0.4812, "num_tokens": 370991265.0, "step": 3942 }, { "epoch": 0.6729817374978665, "grad_norm": 0.4318675923781927, "learning_rate": 1.3087557603686638e-05, "loss": 0.5593, "num_tokens": 371110208.0, "step": 3943 }, { "epoch": 0.6731524150878989, "grad_norm": 0.49398476900863464, "learning_rate": 1.3080730500085339e-05, "loss": 0.5022, "num_tokens": 371185312.0, "step": 3944 }, { "epoch": 0.6733230926779313, "grad_norm": 0.4895199848831399, "learning_rate": 1.3073903396484042e-05, "loss": 0.5561, "num_tokens": 371269487.0, "step": 3945 }, { "epoch": 0.6734937702679639, "grad_norm": 0.45079761094101095, "learning_rate": 1.3067076292882746e-05, "loss": 0.5239, "num_tokens": 371360083.0, "step": 3946 }, { "epoch": 0.6736644478579963, "grad_norm": 0.4172355030909849, "learning_rate": 1.3060249189281448e-05, "loss": 0.5465, "num_tokens": 371477200.0, "step": 3947 }, { "epoch": 0.6738351254480287, "grad_norm": 0.47589173388707723, "learning_rate": 1.3053422085680152e-05, "loss": 0.5552, "num_tokens": 371564593.0, "step": 3948 }, { "epoch": 0.6740058030380611, "grad_norm": 0.4932993887730987, "learning_rate": 1.3046594982078856e-05, "loss": 0.601, "num_tokens": 371652666.0, "step": 3949 }, { "epoch": 0.6741764806280935, "grad_norm": 0.4520444687615404, "learning_rate": 1.3039767878477556e-05, "loss": 0.5546, "num_tokens": 371756174.0, "step": 3950 }, { "epoch": 0.674347158218126, "grad_norm": 0.4678625848778644, "learning_rate": 1.303294077487626e-05, "loss": 0.5077, "num_tokens": 371841444.0, "step": 3951 }, { "epoch": 0.6745178358081584, "grad_norm": 0.5098638786240405, "learning_rate": 1.3026113671274962e-05, "loss": 0.5446, "num_tokens": 371927800.0, "step": 3952 }, { "epoch": 0.6746885133981908, "grad_norm": 0.5342670218190857, "learning_rate": 1.3019286567673666e-05, "loss": 0.4723, "num_tokens": 371985193.0, "step": 3953 }, { "epoch": 0.6748591909882232, "grad_norm": 0.47351783144531995, "learning_rate": 1.301245946407237e-05, "loss": 0.6899, "num_tokens": 372095587.0, "step": 3954 }, { "epoch": 0.6750298685782556, "grad_norm": 0.4519901576644261, "learning_rate": 1.3005632360471072e-05, "loss": 0.6201, "num_tokens": 372216165.0, "step": 3955 }, { "epoch": 0.6752005461682881, "grad_norm": 0.44024301916018543, "learning_rate": 1.2998805256869774e-05, "loss": 0.6081, "num_tokens": 372327487.0, "step": 3956 }, { "epoch": 0.6753712237583205, "grad_norm": 0.47490491629724535, "learning_rate": 1.2991978153268476e-05, "loss": 0.5726, "num_tokens": 372420314.0, "step": 3957 }, { "epoch": 0.675541901348353, "grad_norm": 0.5160588372149131, "learning_rate": 1.298515104966718e-05, "loss": 0.5587, "num_tokens": 372496063.0, "step": 3958 }, { "epoch": 0.6757125789383854, "grad_norm": 0.47087352543192884, "learning_rate": 1.2978323946065883e-05, "loss": 0.5034, "num_tokens": 372576292.0, "step": 3959 }, { "epoch": 0.6758832565284179, "grad_norm": 0.5375732964760931, "learning_rate": 1.2971496842464585e-05, "loss": 0.6639, "num_tokens": 372653300.0, "step": 3960 }, { "epoch": 0.6760539341184503, "grad_norm": 0.5244784505998659, "learning_rate": 1.2964669738863289e-05, "loss": 0.5805, "num_tokens": 372725783.0, "step": 3961 }, { "epoch": 0.6762246117084827, "grad_norm": 0.44371765120544604, "learning_rate": 1.295784263526199e-05, "loss": 0.5496, "num_tokens": 372829501.0, "step": 3962 }, { "epoch": 0.6763952892985151, "grad_norm": 0.4318101128642206, "learning_rate": 1.2951015531660693e-05, "loss": 0.5306, "num_tokens": 372956734.0, "step": 3963 }, { "epoch": 0.6765659668885475, "grad_norm": 0.45964076486576644, "learning_rate": 1.2944188428059397e-05, "loss": 0.5476, "num_tokens": 373050401.0, "step": 3964 }, { "epoch": 0.67673664447858, "grad_norm": 0.45178352505846103, "learning_rate": 1.2937361324458099e-05, "loss": 0.6254, "num_tokens": 373149843.0, "step": 3965 }, { "epoch": 0.6769073220686124, "grad_norm": 0.479381452973111, "learning_rate": 1.2930534220856803e-05, "loss": 0.622, "num_tokens": 373254991.0, "step": 3966 }, { "epoch": 0.6770779996586448, "grad_norm": 0.46826821232498084, "learning_rate": 1.2923707117255507e-05, "loss": 0.6261, "num_tokens": 373355851.0, "step": 3967 }, { "epoch": 0.6772486772486772, "grad_norm": 0.5175973628130143, "learning_rate": 1.2916880013654207e-05, "loss": 0.4656, "num_tokens": 373420009.0, "step": 3968 }, { "epoch": 0.6774193548387096, "grad_norm": 0.4703526614054862, "learning_rate": 1.291005291005291e-05, "loss": 0.5593, "num_tokens": 373514686.0, "step": 3969 }, { "epoch": 0.6775900324287422, "grad_norm": 0.5486419527964167, "learning_rate": 1.2903225806451613e-05, "loss": 0.4631, "num_tokens": 373578961.0, "step": 3970 }, { "epoch": 0.6777607100187746, "grad_norm": 0.5112740497366018, "learning_rate": 1.2896398702850317e-05, "loss": 0.5831, "num_tokens": 373659220.0, "step": 3971 }, { "epoch": 0.677931387608807, "grad_norm": 0.4852943365380353, "learning_rate": 1.288957159924902e-05, "loss": 0.5637, "num_tokens": 373752906.0, "step": 3972 }, { "epoch": 0.6781020651988394, "grad_norm": 0.42291431733463675, "learning_rate": 1.2882744495647724e-05, "loss": 0.5253, "num_tokens": 373864418.0, "step": 3973 }, { "epoch": 0.6782727427888718, "grad_norm": 0.47922288410543473, "learning_rate": 1.2875917392046426e-05, "loss": 0.5647, "num_tokens": 373956732.0, "step": 3974 }, { "epoch": 0.6784434203789043, "grad_norm": 0.49586294412876053, "learning_rate": 1.2869090288445128e-05, "loss": 0.5582, "num_tokens": 374055968.0, "step": 3975 }, { "epoch": 0.6786140979689367, "grad_norm": 0.482838573079878, "learning_rate": 1.286226318484383e-05, "loss": 0.4865, "num_tokens": 374137412.0, "step": 3976 }, { "epoch": 0.6787847755589691, "grad_norm": 0.42937021485697635, "learning_rate": 1.2855436081242534e-05, "loss": 0.5384, "num_tokens": 374250058.0, "step": 3977 }, { "epoch": 0.6789554531490015, "grad_norm": 0.4841046605588727, "learning_rate": 1.2848608977641238e-05, "loss": 0.5594, "num_tokens": 374341358.0, "step": 3978 }, { "epoch": 0.6791261307390339, "grad_norm": 0.4810300559091305, "learning_rate": 1.284178187403994e-05, "loss": 0.6545, "num_tokens": 374449506.0, "step": 3979 }, { "epoch": 0.6792968083290664, "grad_norm": 0.49583356444898374, "learning_rate": 1.2834954770438644e-05, "loss": 0.5578, "num_tokens": 374534068.0, "step": 3980 }, { "epoch": 0.6794674859190988, "grad_norm": 0.4832047631064496, "learning_rate": 1.2828127666837344e-05, "loss": 0.5995, "num_tokens": 374630059.0, "step": 3981 }, { "epoch": 0.6796381635091312, "grad_norm": 0.5009175749615926, "learning_rate": 1.2821300563236048e-05, "loss": 0.5917, "num_tokens": 374722425.0, "step": 3982 }, { "epoch": 0.6798088410991637, "grad_norm": 0.526994307319266, "learning_rate": 1.2814473459634752e-05, "loss": 0.4939, "num_tokens": 374789911.0, "step": 3983 }, { "epoch": 0.6799795186891961, "grad_norm": 0.4628120028205401, "learning_rate": 1.2807646356033454e-05, "loss": 0.5968, "num_tokens": 374886445.0, "step": 3984 }, { "epoch": 0.6801501962792286, "grad_norm": 0.5094668807061902, "learning_rate": 1.2800819252432157e-05, "loss": 0.6628, "num_tokens": 374990935.0, "step": 3985 }, { "epoch": 0.680320873869261, "grad_norm": 0.5078004848459915, "learning_rate": 1.2793992148830861e-05, "loss": 0.6272, "num_tokens": 375086412.0, "step": 3986 }, { "epoch": 0.6804915514592934, "grad_norm": 0.5325557818356629, "learning_rate": 1.2787165045229561e-05, "loss": 0.6338, "num_tokens": 375171145.0, "step": 3987 }, { "epoch": 0.6806622290493258, "grad_norm": 0.5528878102579359, "learning_rate": 1.2780337941628265e-05, "loss": 0.5747, "num_tokens": 375242894.0, "step": 3988 }, { "epoch": 0.6808329066393582, "grad_norm": 0.5009449516449519, "learning_rate": 1.2773510838026967e-05, "loss": 0.5134, "num_tokens": 375312671.0, "step": 3989 }, { "epoch": 0.6810035842293907, "grad_norm": 0.41792388126071, "learning_rate": 1.2766683734425671e-05, "loss": 0.5155, "num_tokens": 375430873.0, "step": 3990 }, { "epoch": 0.6811742618194231, "grad_norm": 0.4870826623173786, "learning_rate": 1.2759856630824375e-05, "loss": 0.7072, "num_tokens": 375543147.0, "step": 3991 }, { "epoch": 0.6813449394094555, "grad_norm": 0.7798893663304252, "learning_rate": 1.2753029527223077e-05, "loss": 0.6427, "num_tokens": 375643525.0, "step": 3992 }, { "epoch": 0.6815156169994879, "grad_norm": 0.5170430908717488, "learning_rate": 1.2746202423621779e-05, "loss": 0.6013, "num_tokens": 375726049.0, "step": 3993 }, { "epoch": 0.6816862945895203, "grad_norm": 0.518984081683617, "learning_rate": 1.2739375320020481e-05, "loss": 0.5648, "num_tokens": 375800069.0, "step": 3994 }, { "epoch": 0.6818569721795529, "grad_norm": 0.4673278891359286, "learning_rate": 1.2732548216419185e-05, "loss": 0.4575, "num_tokens": 375886742.0, "step": 3995 }, { "epoch": 0.6820276497695853, "grad_norm": 0.5349828310794014, "learning_rate": 1.2725721112817889e-05, "loss": 0.5673, "num_tokens": 375975622.0, "step": 3996 }, { "epoch": 0.6821983273596177, "grad_norm": 0.4736897301530035, "learning_rate": 1.271889400921659e-05, "loss": 0.5895, "num_tokens": 376073021.0, "step": 3997 }, { "epoch": 0.6823690049496501, "grad_norm": 0.4396162038929571, "learning_rate": 1.2712066905615294e-05, "loss": 0.5747, "num_tokens": 376179605.0, "step": 3998 }, { "epoch": 0.6825396825396826, "grad_norm": 0.41313397260575235, "learning_rate": 1.2705239802013995e-05, "loss": 0.5057, "num_tokens": 376298028.0, "step": 3999 }, { "epoch": 0.682710360129715, "grad_norm": 0.5007542436673901, "learning_rate": 1.2698412698412699e-05, "loss": 0.5575, "num_tokens": 376378459.0, "step": 4000 }, { "epoch": 0.6828810377197474, "grad_norm": 0.46710920856908594, "learning_rate": 1.2691585594811402e-05, "loss": 0.5934, "num_tokens": 376507883.0, "step": 4001 }, { "epoch": 0.6830517153097798, "grad_norm": 0.45788473064042273, "learning_rate": 1.2684758491210104e-05, "loss": 0.665, "num_tokens": 376640253.0, "step": 4002 }, { "epoch": 0.6832223928998122, "grad_norm": 0.4928340237578695, "learning_rate": 1.2677931387608808e-05, "loss": 0.5535, "num_tokens": 376719843.0, "step": 4003 }, { "epoch": 0.6833930704898447, "grad_norm": 0.5967309056662397, "learning_rate": 1.2671104284007512e-05, "loss": 0.6641, "num_tokens": 376814272.0, "step": 4004 }, { "epoch": 0.6835637480798771, "grad_norm": 0.44562501549381434, "learning_rate": 1.2664277180406214e-05, "loss": 0.6094, "num_tokens": 376927617.0, "step": 4005 }, { "epoch": 0.6837344256699095, "grad_norm": 0.5276424684287614, "learning_rate": 1.2657450076804916e-05, "loss": 0.6026, "num_tokens": 377006077.0, "step": 4006 }, { "epoch": 0.6839051032599419, "grad_norm": 0.5002842600027759, "learning_rate": 1.265062297320362e-05, "loss": 0.5393, "num_tokens": 377084691.0, "step": 4007 }, { "epoch": 0.6840757808499744, "grad_norm": 0.4824961189191605, "learning_rate": 1.2643795869602322e-05, "loss": 0.567, "num_tokens": 377179137.0, "step": 4008 }, { "epoch": 0.6842464584400069, "grad_norm": 0.4530197900078497, "learning_rate": 1.2636968766001026e-05, "loss": 0.5748, "num_tokens": 377285517.0, "step": 4009 }, { "epoch": 0.6844171360300393, "grad_norm": 0.4771373728293529, "learning_rate": 1.263014166239973e-05, "loss": 0.6288, "num_tokens": 377382664.0, "step": 4010 }, { "epoch": 0.6845878136200717, "grad_norm": 0.5388506509807646, "learning_rate": 1.2623314558798431e-05, "loss": 0.6189, "num_tokens": 377461086.0, "step": 4011 }, { "epoch": 0.6847584912101041, "grad_norm": 0.4633369447734021, "learning_rate": 1.2616487455197134e-05, "loss": 0.6021, "num_tokens": 377563124.0, "step": 4012 }, { "epoch": 0.6849291688001365, "grad_norm": 0.46258660344649455, "learning_rate": 1.2609660351595836e-05, "loss": 0.5711, "num_tokens": 377664534.0, "step": 4013 }, { "epoch": 0.685099846390169, "grad_norm": 0.49254562154701925, "learning_rate": 1.260283324799454e-05, "loss": 0.5808, "num_tokens": 377756223.0, "step": 4014 }, { "epoch": 0.6852705239802014, "grad_norm": 0.46974237827311843, "learning_rate": 1.2596006144393243e-05, "loss": 0.5418, "num_tokens": 377842646.0, "step": 4015 }, { "epoch": 0.6854412015702338, "grad_norm": 0.5254622344007362, "learning_rate": 1.2589179040791945e-05, "loss": 0.6022, "num_tokens": 377919052.0, "step": 4016 }, { "epoch": 0.6856118791602662, "grad_norm": 0.5350632755480206, "learning_rate": 1.2582351937190649e-05, "loss": 0.5153, "num_tokens": 377990794.0, "step": 4017 }, { "epoch": 0.6857825567502986, "grad_norm": 0.5436893898941847, "learning_rate": 1.257552483358935e-05, "loss": 0.6305, "num_tokens": 378083524.0, "step": 4018 }, { "epoch": 0.6859532343403311, "grad_norm": 0.4682620879185587, "learning_rate": 1.2568697729988053e-05, "loss": 0.6102, "num_tokens": 378177240.0, "step": 4019 }, { "epoch": 0.6861239119303636, "grad_norm": 0.499652155715656, "learning_rate": 1.2561870626386757e-05, "loss": 0.5778, "num_tokens": 378266368.0, "step": 4020 }, { "epoch": 0.686294589520396, "grad_norm": 0.5323047922942338, "learning_rate": 1.2555043522785459e-05, "loss": 0.5079, "num_tokens": 378329831.0, "step": 4021 }, { "epoch": 0.6864652671104284, "grad_norm": 0.4278035849310555, "learning_rate": 1.2548216419184163e-05, "loss": 0.5518, "num_tokens": 378447572.0, "step": 4022 }, { "epoch": 0.6866359447004609, "grad_norm": 0.5470483384286055, "learning_rate": 1.2541389315582866e-05, "loss": 0.5659, "num_tokens": 378514709.0, "step": 4023 }, { "epoch": 0.6868066222904933, "grad_norm": 0.43018992337247614, "learning_rate": 1.2534562211981567e-05, "loss": 0.4979, "num_tokens": 378619182.0, "step": 4024 }, { "epoch": 0.6869772998805257, "grad_norm": 0.5073740953022507, "learning_rate": 1.252773510838027e-05, "loss": 0.6247, "num_tokens": 378709502.0, "step": 4025 }, { "epoch": 0.6871479774705581, "grad_norm": 0.49477186774888265, "learning_rate": 1.2520908004778973e-05, "loss": 0.6078, "num_tokens": 378803480.0, "step": 4026 }, { "epoch": 0.6873186550605905, "grad_norm": 0.453094144828214, "learning_rate": 1.2514080901177676e-05, "loss": 0.4982, "num_tokens": 378894070.0, "step": 4027 }, { "epoch": 0.687489332650623, "grad_norm": 0.44838988293392007, "learning_rate": 1.250725379757638e-05, "loss": 0.5687, "num_tokens": 379000458.0, "step": 4028 }, { "epoch": 0.6876600102406554, "grad_norm": 0.49385735167949923, "learning_rate": 1.2500426693975082e-05, "loss": 0.5258, "num_tokens": 379084061.0, "step": 4029 }, { "epoch": 0.6878306878306878, "grad_norm": 0.46204392316872395, "learning_rate": 1.2493599590373784e-05, "loss": 0.542, "num_tokens": 379180181.0, "step": 4030 }, { "epoch": 0.6880013654207202, "grad_norm": 0.43223331323832526, "learning_rate": 1.2486772486772486e-05, "loss": 0.5681, "num_tokens": 379297424.0, "step": 4031 }, { "epoch": 0.6881720430107527, "grad_norm": 0.4538144503030459, "learning_rate": 1.247994538317119e-05, "loss": 0.5885, "num_tokens": 379406255.0, "step": 4032 }, { "epoch": 0.6883427206007852, "grad_norm": 0.46911435586466554, "learning_rate": 1.2473118279569894e-05, "loss": 0.5306, "num_tokens": 379490036.0, "step": 4033 }, { "epoch": 0.6885133981908176, "grad_norm": 0.5580320177043012, "learning_rate": 1.2466291175968596e-05, "loss": 0.6005, "num_tokens": 379559450.0, "step": 4034 }, { "epoch": 0.68868407578085, "grad_norm": 0.47166784928078126, "learning_rate": 1.24594640723673e-05, "loss": 0.5355, "num_tokens": 379646937.0, "step": 4035 }, { "epoch": 0.6888547533708824, "grad_norm": 0.5234184865564476, "learning_rate": 1.2452636968766004e-05, "loss": 0.5924, "num_tokens": 379719631.0, "step": 4036 }, { "epoch": 0.6890254309609148, "grad_norm": 0.5137521610700557, "learning_rate": 1.2445809865164704e-05, "loss": 0.57, "num_tokens": 379806896.0, "step": 4037 }, { "epoch": 0.6891961085509473, "grad_norm": 0.4795421270342953, "learning_rate": 1.2438982761563408e-05, "loss": 0.6234, "num_tokens": 379914659.0, "step": 4038 }, { "epoch": 0.6893667861409797, "grad_norm": 0.5311394655591134, "learning_rate": 1.243215565796211e-05, "loss": 0.5719, "num_tokens": 379987868.0, "step": 4039 }, { "epoch": 0.6895374637310121, "grad_norm": 0.511974874374787, "learning_rate": 1.2425328554360813e-05, "loss": 0.6034, "num_tokens": 380067411.0, "step": 4040 }, { "epoch": 0.6897081413210445, "grad_norm": 0.4955596832842013, "learning_rate": 1.2418501450759517e-05, "loss": 0.5419, "num_tokens": 380143074.0, "step": 4041 }, { "epoch": 0.6898788189110769, "grad_norm": 0.4804937047127304, "learning_rate": 1.2411674347158221e-05, "loss": 0.4975, "num_tokens": 380222104.0, "step": 4042 }, { "epoch": 0.6900494965011094, "grad_norm": 0.433240743306625, "learning_rate": 1.2404847243556921e-05, "loss": 0.5987, "num_tokens": 380346009.0, "step": 4043 }, { "epoch": 0.6902201740911418, "grad_norm": 0.48420599151813526, "learning_rate": 1.2398020139955625e-05, "loss": 0.5788, "num_tokens": 380436466.0, "step": 4044 }, { "epoch": 0.6903908516811743, "grad_norm": 0.4731453396836686, "learning_rate": 1.2391193036354327e-05, "loss": 0.6, "num_tokens": 380534445.0, "step": 4045 }, { "epoch": 0.6905615292712067, "grad_norm": 0.47191237618476606, "learning_rate": 1.2384365932753031e-05, "loss": 0.6286, "num_tokens": 380642445.0, "step": 4046 }, { "epoch": 0.6907322068612392, "grad_norm": 0.5290557727695562, "learning_rate": 1.2377538829151735e-05, "loss": 0.5983, "num_tokens": 380734530.0, "step": 4047 }, { "epoch": 0.6909028844512716, "grad_norm": 0.47287585622700823, "learning_rate": 1.2370711725550437e-05, "loss": 0.6172, "num_tokens": 380834419.0, "step": 4048 }, { "epoch": 0.691073562041304, "grad_norm": 0.5279989191247129, "learning_rate": 1.2363884621949139e-05, "loss": 0.6222, "num_tokens": 380919086.0, "step": 4049 }, { "epoch": 0.6912442396313364, "grad_norm": 0.47755089851661237, "learning_rate": 1.2357057518347841e-05, "loss": 0.6064, "num_tokens": 381017957.0, "step": 4050 }, { "epoch": 0.6914149172213688, "grad_norm": 0.4716655937733939, "learning_rate": 1.2350230414746545e-05, "loss": 0.5796, "num_tokens": 381114080.0, "step": 4051 }, { "epoch": 0.6915855948114012, "grad_norm": 0.46183917665298285, "learning_rate": 1.2343403311145248e-05, "loss": 0.5309, "num_tokens": 381204240.0, "step": 4052 }, { "epoch": 0.6917562724014337, "grad_norm": 0.4553713791502409, "learning_rate": 1.233657620754395e-05, "loss": 0.5623, "num_tokens": 381303624.0, "step": 4053 }, { "epoch": 0.6919269499914661, "grad_norm": 0.45966633833573894, "learning_rate": 1.2329749103942654e-05, "loss": 0.5893, "num_tokens": 381403708.0, "step": 4054 }, { "epoch": 0.6920976275814985, "grad_norm": 0.5126112929840628, "learning_rate": 1.2322922000341355e-05, "loss": 0.612, "num_tokens": 381507996.0, "step": 4055 }, { "epoch": 0.6922683051715309, "grad_norm": 0.4412261892596809, "learning_rate": 1.2316094896740058e-05, "loss": 0.5958, "num_tokens": 381616231.0, "step": 4056 }, { "epoch": 0.6924389827615635, "grad_norm": 0.5199409604884575, "learning_rate": 1.2309267793138762e-05, "loss": 0.5946, "num_tokens": 381700626.0, "step": 4057 }, { "epoch": 0.6926096603515959, "grad_norm": 0.5004536355882978, "learning_rate": 1.2302440689537464e-05, "loss": 0.5296, "num_tokens": 381776606.0, "step": 4058 }, { "epoch": 0.6927803379416283, "grad_norm": 0.5340610225242278, "learning_rate": 1.2295613585936168e-05, "loss": 0.5623, "num_tokens": 381846468.0, "step": 4059 }, { "epoch": 0.6929510155316607, "grad_norm": 0.5290200104684436, "learning_rate": 1.2288786482334872e-05, "loss": 0.545, "num_tokens": 381923632.0, "step": 4060 }, { "epoch": 0.6931216931216931, "grad_norm": 0.49215463299086043, "learning_rate": 1.2281959378733572e-05, "loss": 0.6017, "num_tokens": 382021289.0, "step": 4061 }, { "epoch": 0.6932923707117256, "grad_norm": 0.48606487259932674, "learning_rate": 1.2275132275132276e-05, "loss": 0.5271, "num_tokens": 382103866.0, "step": 4062 }, { "epoch": 0.693463048301758, "grad_norm": 0.4647370571879275, "learning_rate": 1.2268305171530978e-05, "loss": 0.5177, "num_tokens": 382187471.0, "step": 4063 }, { "epoch": 0.6936337258917904, "grad_norm": 0.5370922361764342, "learning_rate": 1.2261478067929682e-05, "loss": 0.5887, "num_tokens": 382260235.0, "step": 4064 }, { "epoch": 0.6938044034818228, "grad_norm": 0.46547838801904734, "learning_rate": 1.2254650964328386e-05, "loss": 0.5894, "num_tokens": 382361957.0, "step": 4065 }, { "epoch": 0.6939750810718552, "grad_norm": 0.44219338404949154, "learning_rate": 1.2247823860727088e-05, "loss": 0.6121, "num_tokens": 382478011.0, "step": 4066 }, { "epoch": 0.6941457586618877, "grad_norm": 0.47544560079500564, "learning_rate": 1.224099675712579e-05, "loss": 0.5301, "num_tokens": 382563398.0, "step": 4067 }, { "epoch": 0.6943164362519201, "grad_norm": 0.4551338676597597, "learning_rate": 1.2234169653524492e-05, "loss": 0.6093, "num_tokens": 382660760.0, "step": 4068 }, { "epoch": 0.6944871138419525, "grad_norm": 0.48884576489496545, "learning_rate": 1.2227342549923195e-05, "loss": 0.5847, "num_tokens": 382761643.0, "step": 4069 }, { "epoch": 0.694657791431985, "grad_norm": 0.5311950941231147, "learning_rate": 1.22205154463219e-05, "loss": 0.6558, "num_tokens": 382844064.0, "step": 4070 }, { "epoch": 0.6948284690220174, "grad_norm": 0.4562787785903537, "learning_rate": 1.2213688342720601e-05, "loss": 0.5867, "num_tokens": 382946341.0, "step": 4071 }, { "epoch": 0.6949991466120499, "grad_norm": 0.4489774176318795, "learning_rate": 1.2206861239119305e-05, "loss": 0.5142, "num_tokens": 383043547.0, "step": 4072 }, { "epoch": 0.6951698242020823, "grad_norm": 0.5191170172749052, "learning_rate": 1.2200034135518009e-05, "loss": 0.5238, "num_tokens": 383126662.0, "step": 4073 }, { "epoch": 0.6953405017921147, "grad_norm": 0.4662900618847519, "learning_rate": 1.219320703191671e-05, "loss": 0.5792, "num_tokens": 383224838.0, "step": 4074 }, { "epoch": 0.6955111793821471, "grad_norm": 0.4994288408397585, "learning_rate": 1.2186379928315413e-05, "loss": 0.5954, "num_tokens": 383312525.0, "step": 4075 }, { "epoch": 0.6956818569721795, "grad_norm": 0.5613017039908135, "learning_rate": 1.2179552824714117e-05, "loss": 0.5779, "num_tokens": 383378198.0, "step": 4076 }, { "epoch": 0.695852534562212, "grad_norm": 0.4846737178308851, "learning_rate": 1.2172725721112819e-05, "loss": 0.5795, "num_tokens": 383475526.0, "step": 4077 }, { "epoch": 0.6960232121522444, "grad_norm": 0.46784073180407176, "learning_rate": 1.2165898617511523e-05, "loss": 0.5479, "num_tokens": 383562515.0, "step": 4078 }, { "epoch": 0.6961938897422768, "grad_norm": 0.454240140371569, "learning_rate": 1.2159071513910226e-05, "loss": 0.5988, "num_tokens": 383666269.0, "step": 4079 }, { "epoch": 0.6963645673323092, "grad_norm": 0.4528950323368183, "learning_rate": 1.2152244410308927e-05, "loss": 0.5212, "num_tokens": 383762697.0, "step": 4080 }, { "epoch": 0.6965352449223416, "grad_norm": 0.497087172696841, "learning_rate": 1.214541730670763e-05, "loss": 0.5636, "num_tokens": 383840281.0, "step": 4081 }, { "epoch": 0.6967059225123742, "grad_norm": 0.4465017208586227, "learning_rate": 1.2138590203106333e-05, "loss": 0.5035, "num_tokens": 383944622.0, "step": 4082 }, { "epoch": 0.6968766001024066, "grad_norm": 0.477920664090481, "learning_rate": 1.2131763099505036e-05, "loss": 0.5452, "num_tokens": 384036236.0, "step": 4083 }, { "epoch": 0.697047277692439, "grad_norm": 0.4573135602714023, "learning_rate": 1.212493599590374e-05, "loss": 0.5576, "num_tokens": 384136963.0, "step": 4084 }, { "epoch": 0.6972179552824714, "grad_norm": 0.5138586498152113, "learning_rate": 1.2118108892302442e-05, "loss": 0.584, "num_tokens": 384220301.0, "step": 4085 }, { "epoch": 0.6973886328725039, "grad_norm": 0.5073213015452517, "learning_rate": 1.2111281788701144e-05, "loss": 0.5483, "num_tokens": 384290978.0, "step": 4086 }, { "epoch": 0.6975593104625363, "grad_norm": 0.49296826031986285, "learning_rate": 1.2104454685099846e-05, "loss": 0.6203, "num_tokens": 384407367.0, "step": 4087 }, { "epoch": 0.6977299880525687, "grad_norm": 0.5092827679773567, "learning_rate": 1.209762758149855e-05, "loss": 0.5517, "num_tokens": 384499652.0, "step": 4088 }, { "epoch": 0.6979006656426011, "grad_norm": 0.47500974673443863, "learning_rate": 1.2090800477897254e-05, "loss": 0.5739, "num_tokens": 384590374.0, "step": 4089 }, { "epoch": 0.6980713432326335, "grad_norm": 0.4792486588484749, "learning_rate": 1.2083973374295956e-05, "loss": 0.6713, "num_tokens": 384707783.0, "step": 4090 }, { "epoch": 0.698242020822666, "grad_norm": 0.5074972850223977, "learning_rate": 1.207714627069466e-05, "loss": 0.6046, "num_tokens": 384792223.0, "step": 4091 }, { "epoch": 0.6984126984126984, "grad_norm": 0.4513597053395292, "learning_rate": 1.207031916709336e-05, "loss": 0.6322, "num_tokens": 384903810.0, "step": 4092 }, { "epoch": 0.6985833760027308, "grad_norm": 0.4606965108238913, "learning_rate": 1.2063492063492064e-05, "loss": 0.5773, "num_tokens": 385009666.0, "step": 4093 }, { "epoch": 0.6987540535927633, "grad_norm": 0.4362702584096806, "learning_rate": 1.2056664959890768e-05, "loss": 0.5937, "num_tokens": 385129323.0, "step": 4094 }, { "epoch": 0.6989247311827957, "grad_norm": 0.4918086275347048, "learning_rate": 1.204983785628947e-05, "loss": 0.6722, "num_tokens": 385232104.0, "step": 4095 }, { "epoch": 0.6990954087728282, "grad_norm": 0.5164245568965276, "learning_rate": 1.2043010752688173e-05, "loss": 0.6246, "num_tokens": 385317431.0, "step": 4096 }, { "epoch": 0.6992660863628606, "grad_norm": 0.4429121718117885, "learning_rate": 1.2036183649086877e-05, "loss": 0.5538, "num_tokens": 385421385.0, "step": 4097 }, { "epoch": 0.699436763952893, "grad_norm": 0.5064010273685836, "learning_rate": 1.2029356545485578e-05, "loss": 0.6038, "num_tokens": 385512985.0, "step": 4098 }, { "epoch": 0.6996074415429254, "grad_norm": 0.4983713392854213, "learning_rate": 1.2022529441884281e-05, "loss": 0.5941, "num_tokens": 385600660.0, "step": 4099 }, { "epoch": 0.6997781191329578, "grad_norm": 0.4711965770383893, "learning_rate": 1.2015702338282983e-05, "loss": 0.6212, "num_tokens": 385716851.0, "step": 4100 }, { "epoch": 0.6999487967229903, "grad_norm": 0.4592991284258155, "learning_rate": 1.2008875234681687e-05, "loss": 0.5983, "num_tokens": 385815909.0, "step": 4101 }, { "epoch": 0.7001194743130227, "grad_norm": 0.48315263148918547, "learning_rate": 1.2002048131080391e-05, "loss": 0.5577, "num_tokens": 385911058.0, "step": 4102 }, { "epoch": 0.7002901519030551, "grad_norm": 0.4625280064411512, "learning_rate": 1.1995221027479093e-05, "loss": 0.5811, "num_tokens": 386003613.0, "step": 4103 }, { "epoch": 0.7004608294930875, "grad_norm": 0.45073773680556023, "learning_rate": 1.1988393923877797e-05, "loss": 0.61, "num_tokens": 386116346.0, "step": 4104 }, { "epoch": 0.7006315070831199, "grad_norm": 0.4797281450030446, "learning_rate": 1.1981566820276497e-05, "loss": 0.486, "num_tokens": 386198496.0, "step": 4105 }, { "epoch": 0.7008021846731524, "grad_norm": 0.5221826230793649, "learning_rate": 1.19747397166752e-05, "loss": 0.5766, "num_tokens": 386284154.0, "step": 4106 }, { "epoch": 0.7009728622631849, "grad_norm": 0.440929936072235, "learning_rate": 1.1967912613073905e-05, "loss": 0.5549, "num_tokens": 386394358.0, "step": 4107 }, { "epoch": 0.7011435398532173, "grad_norm": 0.4867872242468334, "learning_rate": 1.1961085509472607e-05, "loss": 0.5291, "num_tokens": 386514960.0, "step": 4108 }, { "epoch": 0.7013142174432497, "grad_norm": 0.4473277090887126, "learning_rate": 1.195425840587131e-05, "loss": 0.5738, "num_tokens": 386616634.0, "step": 4109 }, { "epoch": 0.7014848950332822, "grad_norm": 0.5150706882467296, "learning_rate": 1.1947431302270014e-05, "loss": 0.6203, "num_tokens": 386699751.0, "step": 4110 }, { "epoch": 0.7016555726233146, "grad_norm": 0.5137330314722427, "learning_rate": 1.1940604198668715e-05, "loss": 0.5724, "num_tokens": 386772855.0, "step": 4111 }, { "epoch": 0.701826250213347, "grad_norm": 0.5179589818625412, "learning_rate": 1.1933777095067418e-05, "loss": 0.553, "num_tokens": 386855662.0, "step": 4112 }, { "epoch": 0.7019969278033794, "grad_norm": 0.4658119026067818, "learning_rate": 1.1926949991466122e-05, "loss": 0.5207, "num_tokens": 386947518.0, "step": 4113 }, { "epoch": 0.7021676053934118, "grad_norm": 0.47551722206467856, "learning_rate": 1.1920122887864824e-05, "loss": 0.6397, "num_tokens": 387048007.0, "step": 4114 }, { "epoch": 0.7023382829834443, "grad_norm": 0.5094923225732427, "learning_rate": 1.1913295784263528e-05, "loss": 0.5598, "num_tokens": 387127313.0, "step": 4115 }, { "epoch": 0.7025089605734767, "grad_norm": 0.5737273606485829, "learning_rate": 1.1906468680662232e-05, "loss": 0.6097, "num_tokens": 387197702.0, "step": 4116 }, { "epoch": 0.7026796381635091, "grad_norm": 0.5455939725478027, "learning_rate": 1.1899641577060932e-05, "loss": 0.6246, "num_tokens": 387279024.0, "step": 4117 }, { "epoch": 0.7028503157535415, "grad_norm": 0.47159716738246876, "learning_rate": 1.1892814473459636e-05, "loss": 0.5855, "num_tokens": 387388381.0, "step": 4118 }, { "epoch": 0.703020993343574, "grad_norm": 0.49897711903709224, "learning_rate": 1.1885987369858338e-05, "loss": 0.5754, "num_tokens": 387476029.0, "step": 4119 }, { "epoch": 0.7031916709336065, "grad_norm": 0.5361073272392255, "learning_rate": 1.1879160266257042e-05, "loss": 0.4912, "num_tokens": 387545297.0, "step": 4120 }, { "epoch": 0.7033623485236389, "grad_norm": 0.5153720574080317, "learning_rate": 1.1872333162655745e-05, "loss": 0.6424, "num_tokens": 387634139.0, "step": 4121 }, { "epoch": 0.7035330261136713, "grad_norm": 0.48922896284720485, "learning_rate": 1.1865506059054447e-05, "loss": 0.5724, "num_tokens": 387722374.0, "step": 4122 }, { "epoch": 0.7037037037037037, "grad_norm": 0.5300479469769814, "learning_rate": 1.185867895545315e-05, "loss": 0.5668, "num_tokens": 387798657.0, "step": 4123 }, { "epoch": 0.7038743812937361, "grad_norm": 0.498534968868138, "learning_rate": 1.1851851851851852e-05, "loss": 0.5816, "num_tokens": 387885736.0, "step": 4124 }, { "epoch": 0.7040450588837686, "grad_norm": 0.4469669770993118, "learning_rate": 1.1845024748250555e-05, "loss": 0.5674, "num_tokens": 387985495.0, "step": 4125 }, { "epoch": 0.704215736473801, "grad_norm": 0.6072948830818997, "learning_rate": 1.183819764464926e-05, "loss": 0.6895, "num_tokens": 388067429.0, "step": 4126 }, { "epoch": 0.7043864140638334, "grad_norm": 0.4695087224784077, "learning_rate": 1.1831370541047961e-05, "loss": 0.6271, "num_tokens": 388171507.0, "step": 4127 }, { "epoch": 0.7045570916538658, "grad_norm": 0.5616788388089635, "learning_rate": 1.1824543437446665e-05, "loss": 0.6122, "num_tokens": 388245270.0, "step": 4128 }, { "epoch": 0.7047277692438982, "grad_norm": 0.5202271142642079, "learning_rate": 1.1817716333845365e-05, "loss": 0.5509, "num_tokens": 388315829.0, "step": 4129 }, { "epoch": 0.7048984468339307, "grad_norm": 0.45977910805690847, "learning_rate": 1.1810889230244069e-05, "loss": 0.5678, "num_tokens": 388409929.0, "step": 4130 }, { "epoch": 0.7050691244239631, "grad_norm": 0.4962189760774949, "learning_rate": 1.1804062126642773e-05, "loss": 0.6235, "num_tokens": 388502353.0, "step": 4131 }, { "epoch": 0.7052398020139956, "grad_norm": 0.4430935988467391, "learning_rate": 1.1797235023041475e-05, "loss": 0.5954, "num_tokens": 388611406.0, "step": 4132 }, { "epoch": 0.705410479604028, "grad_norm": 0.49181163287906027, "learning_rate": 1.1790407919440179e-05, "loss": 0.504, "num_tokens": 388693290.0, "step": 4133 }, { "epoch": 0.7055811571940604, "grad_norm": 0.46996332694526616, "learning_rate": 1.1783580815838882e-05, "loss": 0.5939, "num_tokens": 388789765.0, "step": 4134 }, { "epoch": 0.7057518347840929, "grad_norm": 0.4639741478649215, "learning_rate": 1.1776753712237585e-05, "loss": 0.5901, "num_tokens": 388886144.0, "step": 4135 }, { "epoch": 0.7059225123741253, "grad_norm": 0.44680707031708133, "learning_rate": 1.1769926608636287e-05, "loss": 0.541, "num_tokens": 388982983.0, "step": 4136 }, { "epoch": 0.7060931899641577, "grad_norm": 0.5113767031152918, "learning_rate": 1.1763099505034989e-05, "loss": 0.5799, "num_tokens": 389065716.0, "step": 4137 }, { "epoch": 0.7062638675541901, "grad_norm": 0.45003599662460125, "learning_rate": 1.1756272401433692e-05, "loss": 0.5395, "num_tokens": 389167247.0, "step": 4138 }, { "epoch": 0.7064345451442225, "grad_norm": 0.5266127172020076, "learning_rate": 1.1749445297832396e-05, "loss": 0.5929, "num_tokens": 389245786.0, "step": 4139 }, { "epoch": 0.706605222734255, "grad_norm": 0.4799074028520023, "learning_rate": 1.1742618194231098e-05, "loss": 0.6516, "num_tokens": 389357290.0, "step": 4140 }, { "epoch": 0.7067759003242874, "grad_norm": 0.45100202123838, "learning_rate": 1.1735791090629802e-05, "loss": 0.5728, "num_tokens": 389460618.0, "step": 4141 }, { "epoch": 0.7069465779143198, "grad_norm": 0.4550728042857259, "learning_rate": 1.1728963987028502e-05, "loss": 0.6377, "num_tokens": 389574928.0, "step": 4142 }, { "epoch": 0.7071172555043522, "grad_norm": 0.471279616475047, "learning_rate": 1.1722136883427206e-05, "loss": 0.5254, "num_tokens": 389656875.0, "step": 4143 }, { "epoch": 0.7072879330943848, "grad_norm": 0.4293760751552169, "learning_rate": 1.171530977982591e-05, "loss": 0.633, "num_tokens": 389776456.0, "step": 4144 }, { "epoch": 0.7074586106844172, "grad_norm": 0.5439217105760531, "learning_rate": 1.1708482676224614e-05, "loss": 0.5685, "num_tokens": 389852003.0, "step": 4145 }, { "epoch": 0.7076292882744496, "grad_norm": 0.4641489262208069, "learning_rate": 1.1701655572623316e-05, "loss": 0.5186, "num_tokens": 389936346.0, "step": 4146 }, { "epoch": 0.707799965864482, "grad_norm": 0.46690216391868716, "learning_rate": 1.169482846902202e-05, "loss": 0.6127, "num_tokens": 390039130.0, "step": 4147 }, { "epoch": 0.7079706434545144, "grad_norm": 0.4865507768783059, "learning_rate": 1.168800136542072e-05, "loss": 0.5304, "num_tokens": 390119440.0, "step": 4148 }, { "epoch": 0.7081413210445469, "grad_norm": 0.44386639879508083, "learning_rate": 1.1681174261819424e-05, "loss": 0.5818, "num_tokens": 390229029.0, "step": 4149 }, { "epoch": 0.7083119986345793, "grad_norm": 0.566539970379962, "learning_rate": 1.1674347158218127e-05, "loss": 0.6115, "num_tokens": 390296697.0, "step": 4150 }, { "epoch": 0.7084826762246117, "grad_norm": 0.4775035536028336, "learning_rate": 1.166752005461683e-05, "loss": 0.5572, "num_tokens": 390384121.0, "step": 4151 }, { "epoch": 0.7086533538146441, "grad_norm": 0.5257205587574723, "learning_rate": 1.1660692951015533e-05, "loss": 0.6971, "num_tokens": 390480115.0, "step": 4152 }, { "epoch": 0.7088240314046765, "grad_norm": 0.4485352700716774, "learning_rate": 1.1653865847414237e-05, "loss": 0.6109, "num_tokens": 390600360.0, "step": 4153 }, { "epoch": 0.708994708994709, "grad_norm": 0.42741389771797933, "learning_rate": 1.1647038743812937e-05, "loss": 0.5617, "num_tokens": 390715247.0, "step": 4154 }, { "epoch": 0.7091653865847414, "grad_norm": 0.535568392326428, "learning_rate": 1.1640211640211641e-05, "loss": 0.6482, "num_tokens": 390798135.0, "step": 4155 }, { "epoch": 0.7093360641747739, "grad_norm": 0.5619900277389324, "learning_rate": 1.1633384536610343e-05, "loss": 0.6313, "num_tokens": 390862606.0, "step": 4156 }, { "epoch": 0.7095067417648063, "grad_norm": 0.4762809618343771, "learning_rate": 1.1626557433009047e-05, "loss": 0.6075, "num_tokens": 390964702.0, "step": 4157 }, { "epoch": 0.7096774193548387, "grad_norm": 0.5793691528101894, "learning_rate": 1.161973032940775e-05, "loss": 0.6894, "num_tokens": 391034775.0, "step": 4158 }, { "epoch": 0.7098480969448712, "grad_norm": 0.4562038988249158, "learning_rate": 1.1612903225806453e-05, "loss": 0.6117, "num_tokens": 391145964.0, "step": 4159 }, { "epoch": 0.7100187745349036, "grad_norm": 0.4834432103175352, "learning_rate": 1.1606076122205155e-05, "loss": 0.5086, "num_tokens": 391230699.0, "step": 4160 }, { "epoch": 0.710189452124936, "grad_norm": 0.5002894036568455, "learning_rate": 1.1599249018603857e-05, "loss": 0.5662, "num_tokens": 391316360.0, "step": 4161 }, { "epoch": 0.7103601297149684, "grad_norm": 0.46752050551593594, "learning_rate": 1.159242191500256e-05, "loss": 0.5611, "num_tokens": 391401539.0, "step": 4162 }, { "epoch": 0.7105308073050008, "grad_norm": 0.4214846935168744, "learning_rate": 1.1585594811401264e-05, "loss": 0.6184, "num_tokens": 391539981.0, "step": 4163 }, { "epoch": 0.7107014848950333, "grad_norm": 0.4700432162855328, "learning_rate": 1.1578767707799967e-05, "loss": 0.5276, "num_tokens": 391639570.0, "step": 4164 }, { "epoch": 0.7108721624850657, "grad_norm": 0.44950213231519465, "learning_rate": 1.157194060419867e-05, "loss": 0.5712, "num_tokens": 391748337.0, "step": 4165 }, { "epoch": 0.7110428400750981, "grad_norm": 0.4510234376025752, "learning_rate": 1.1565113500597374e-05, "loss": 0.5241, "num_tokens": 391845460.0, "step": 4166 }, { "epoch": 0.7112135176651305, "grad_norm": 0.5150983933947361, "learning_rate": 1.1558286396996074e-05, "loss": 0.5399, "num_tokens": 391923148.0, "step": 4167 }, { "epoch": 0.7113841952551629, "grad_norm": 0.5174045543091362, "learning_rate": 1.1551459293394778e-05, "loss": 0.6655, "num_tokens": 392018433.0, "step": 4168 }, { "epoch": 0.7115548728451955, "grad_norm": 0.46114593488945044, "learning_rate": 1.154463218979348e-05, "loss": 0.5941, "num_tokens": 392117750.0, "step": 4169 }, { "epoch": 0.7117255504352279, "grad_norm": 0.49318611587337335, "learning_rate": 1.1537805086192184e-05, "loss": 0.5574, "num_tokens": 392203828.0, "step": 4170 }, { "epoch": 0.7118962280252603, "grad_norm": 0.4472131132911206, "learning_rate": 1.1530977982590888e-05, "loss": 0.5357, "num_tokens": 392298151.0, "step": 4171 }, { "epoch": 0.7120669056152927, "grad_norm": 0.45776158314149157, "learning_rate": 1.152415087898959e-05, "loss": 0.5801, "num_tokens": 392410414.0, "step": 4172 }, { "epoch": 0.7122375832053252, "grad_norm": 0.48586553100051166, "learning_rate": 1.1517323775388292e-05, "loss": 0.4706, "num_tokens": 392481467.0, "step": 4173 }, { "epoch": 0.7124082607953576, "grad_norm": 0.4554274055597214, "learning_rate": 1.1510496671786994e-05, "loss": 0.5963, "num_tokens": 392580901.0, "step": 4174 }, { "epoch": 0.71257893838539, "grad_norm": 0.49527018255153715, "learning_rate": 1.1503669568185698e-05, "loss": 0.5785, "num_tokens": 392665451.0, "step": 4175 }, { "epoch": 0.7127496159754224, "grad_norm": 0.43773732566450524, "learning_rate": 1.1496842464584402e-05, "loss": 0.4915, "num_tokens": 392760311.0, "step": 4176 }, { "epoch": 0.7129202935654548, "grad_norm": 0.4682743527969606, "learning_rate": 1.1490015360983104e-05, "loss": 0.5852, "num_tokens": 392860046.0, "step": 4177 }, { "epoch": 0.7130909711554873, "grad_norm": 0.49649240403115796, "learning_rate": 1.1483188257381807e-05, "loss": 0.6317, "num_tokens": 392955586.0, "step": 4178 }, { "epoch": 0.7132616487455197, "grad_norm": 0.48005824183818957, "learning_rate": 1.147636115378051e-05, "loss": 0.5842, "num_tokens": 393045702.0, "step": 4179 }, { "epoch": 0.7134323263355521, "grad_norm": 0.506874548233269, "learning_rate": 1.1469534050179212e-05, "loss": 0.564, "num_tokens": 393130559.0, "step": 4180 }, { "epoch": 0.7136030039255846, "grad_norm": 0.5587040289719484, "learning_rate": 1.1462706946577915e-05, "loss": 0.5812, "num_tokens": 393204210.0, "step": 4181 }, { "epoch": 0.713773681515617, "grad_norm": 0.45735799408219047, "learning_rate": 1.1455879842976619e-05, "loss": 0.5438, "num_tokens": 393299506.0, "step": 4182 }, { "epoch": 0.7139443591056495, "grad_norm": 0.49685647805765465, "learning_rate": 1.1449052739375321e-05, "loss": 0.5839, "num_tokens": 393386537.0, "step": 4183 }, { "epoch": 0.7141150366956819, "grad_norm": 0.4960685302032027, "learning_rate": 1.1442225635774025e-05, "loss": 0.5717, "num_tokens": 393471218.0, "step": 4184 }, { "epoch": 0.7142857142857143, "grad_norm": 0.5010635880712248, "learning_rate": 1.1435398532172725e-05, "loss": 0.605, "num_tokens": 393553272.0, "step": 4185 }, { "epoch": 0.7144563918757467, "grad_norm": 0.4437033887987615, "learning_rate": 1.1428571428571429e-05, "loss": 0.5193, "num_tokens": 393650492.0, "step": 4186 }, { "epoch": 0.7146270694657791, "grad_norm": 0.525044433467863, "learning_rate": 1.1421744324970133e-05, "loss": 0.5922, "num_tokens": 393742432.0, "step": 4187 }, { "epoch": 0.7147977470558116, "grad_norm": 0.4918521216072958, "learning_rate": 1.1414917221368835e-05, "loss": 0.6036, "num_tokens": 393833004.0, "step": 4188 }, { "epoch": 0.714968424645844, "grad_norm": 0.46463665306488, "learning_rate": 1.1408090117767539e-05, "loss": 0.5525, "num_tokens": 393927669.0, "step": 4189 }, { "epoch": 0.7151391022358764, "grad_norm": 0.510975638009283, "learning_rate": 1.1401263014166242e-05, "loss": 0.6436, "num_tokens": 394032873.0, "step": 4190 }, { "epoch": 0.7153097798259088, "grad_norm": 0.4990804113409891, "learning_rate": 1.1394435910564943e-05, "loss": 0.5967, "num_tokens": 394119576.0, "step": 4191 }, { "epoch": 0.7154804574159412, "grad_norm": 0.46474717014559763, "learning_rate": 1.1387608806963647e-05, "loss": 0.5996, "num_tokens": 394216625.0, "step": 4192 }, { "epoch": 0.7156511350059738, "grad_norm": 0.5012372546895841, "learning_rate": 1.1380781703362349e-05, "loss": 0.5861, "num_tokens": 394300425.0, "step": 4193 }, { "epoch": 0.7158218125960062, "grad_norm": 0.5053258544258128, "learning_rate": 1.1373954599761052e-05, "loss": 0.5912, "num_tokens": 394386569.0, "step": 4194 }, { "epoch": 0.7159924901860386, "grad_norm": 0.5075604560609313, "learning_rate": 1.1367127496159756e-05, "loss": 0.7188, "num_tokens": 394489063.0, "step": 4195 }, { "epoch": 0.716163167776071, "grad_norm": 0.503464258220226, "learning_rate": 1.1360300392558458e-05, "loss": 0.6462, "num_tokens": 394590155.0, "step": 4196 }, { "epoch": 0.7163338453661034, "grad_norm": 0.4839578544102546, "learning_rate": 1.1353473288957162e-05, "loss": 0.5533, "num_tokens": 394675328.0, "step": 4197 }, { "epoch": 0.7165045229561359, "grad_norm": 0.4762296794221066, "learning_rate": 1.1346646185355862e-05, "loss": 0.601, "num_tokens": 394767226.0, "step": 4198 }, { "epoch": 0.7166752005461683, "grad_norm": 0.5019653933263576, "learning_rate": 1.1339819081754566e-05, "loss": 0.5119, "num_tokens": 394841606.0, "step": 4199 }, { "epoch": 0.7168458781362007, "grad_norm": 0.5015511839199336, "learning_rate": 1.133299197815327e-05, "loss": 0.5008, "num_tokens": 394935485.0, "step": 4200 }, { "epoch": 0.7170165557262331, "grad_norm": 0.5032290976528789, "learning_rate": 1.1326164874551972e-05, "loss": 0.5655, "num_tokens": 395019812.0, "step": 4201 }, { "epoch": 0.7171872333162655, "grad_norm": 0.45437127327478266, "learning_rate": 1.1319337770950676e-05, "loss": 0.6015, "num_tokens": 395116839.0, "step": 4202 }, { "epoch": 0.717357910906298, "grad_norm": 0.5118814455209338, "learning_rate": 1.131251066734938e-05, "loss": 0.5928, "num_tokens": 395195277.0, "step": 4203 }, { "epoch": 0.7175285884963304, "grad_norm": 0.5437099535664465, "learning_rate": 1.130568356374808e-05, "loss": 0.5955, "num_tokens": 395277249.0, "step": 4204 }, { "epoch": 0.7176992660863628, "grad_norm": 0.46091704401643957, "learning_rate": 1.1298856460146784e-05, "loss": 0.6331, "num_tokens": 395384063.0, "step": 4205 }, { "epoch": 0.7178699436763953, "grad_norm": 0.4582373418303137, "learning_rate": 1.1292029356545486e-05, "loss": 0.5757, "num_tokens": 395486319.0, "step": 4206 }, { "epoch": 0.7180406212664278, "grad_norm": 0.48749294555648176, "learning_rate": 1.128520225294419e-05, "loss": 0.534, "num_tokens": 395573708.0, "step": 4207 }, { "epoch": 0.7182112988564602, "grad_norm": 0.4603445447588311, "learning_rate": 1.1278375149342893e-05, "loss": 0.4987, "num_tokens": 395664782.0, "step": 4208 }, { "epoch": 0.7183819764464926, "grad_norm": 0.5060192014328837, "learning_rate": 1.1271548045741595e-05, "loss": 0.516, "num_tokens": 395732802.0, "step": 4209 }, { "epoch": 0.718552654036525, "grad_norm": 0.4434204672622063, "learning_rate": 1.1264720942140297e-05, "loss": 0.5536, "num_tokens": 395841375.0, "step": 4210 }, { "epoch": 0.7187233316265574, "grad_norm": 0.47353460631279026, "learning_rate": 1.1257893838539e-05, "loss": 0.5885, "num_tokens": 395940279.0, "step": 4211 }, { "epoch": 0.7188940092165899, "grad_norm": 0.49640599650948386, "learning_rate": 1.1251066734937703e-05, "loss": 0.5896, "num_tokens": 396028905.0, "step": 4212 }, { "epoch": 0.7190646868066223, "grad_norm": 0.5130460925822962, "learning_rate": 1.1244239631336407e-05, "loss": 0.5563, "num_tokens": 396105967.0, "step": 4213 }, { "epoch": 0.7192353643966547, "grad_norm": 0.5954253195407005, "learning_rate": 1.123741252773511e-05, "loss": 0.5234, "num_tokens": 396168687.0, "step": 4214 }, { "epoch": 0.7194060419866871, "grad_norm": 0.49660376968702424, "learning_rate": 1.1230585424133813e-05, "loss": 0.5868, "num_tokens": 396261438.0, "step": 4215 }, { "epoch": 0.7195767195767195, "grad_norm": 0.4248828646703177, "learning_rate": 1.1223758320532515e-05, "loss": 0.5497, "num_tokens": 396387790.0, "step": 4216 }, { "epoch": 0.719747397166752, "grad_norm": 0.4718087066644489, "learning_rate": 1.1216931216931217e-05, "loss": 0.5466, "num_tokens": 396481634.0, "step": 4217 }, { "epoch": 0.7199180747567845, "grad_norm": 0.4438709788033794, "learning_rate": 1.121010411332992e-05, "loss": 0.5318, "num_tokens": 396578776.0, "step": 4218 }, { "epoch": 0.7200887523468169, "grad_norm": 0.5057812104466077, "learning_rate": 1.1203277009728624e-05, "loss": 0.5786, "num_tokens": 396658658.0, "step": 4219 }, { "epoch": 0.7202594299368493, "grad_norm": 0.43397773024908165, "learning_rate": 1.1196449906127326e-05, "loss": 0.5638, "num_tokens": 396769724.0, "step": 4220 }, { "epoch": 0.7204301075268817, "grad_norm": 0.5076846569046706, "learning_rate": 1.118962280252603e-05, "loss": 0.664, "num_tokens": 396862125.0, "step": 4221 }, { "epoch": 0.7206007851169142, "grad_norm": 0.4859766236841738, "learning_rate": 1.118279569892473e-05, "loss": 0.6066, "num_tokens": 396953338.0, "step": 4222 }, { "epoch": 0.7207714627069466, "grad_norm": 0.4905874972790196, "learning_rate": 1.1175968595323434e-05, "loss": 0.578, "num_tokens": 397045299.0, "step": 4223 }, { "epoch": 0.720942140296979, "grad_norm": 0.5116476724363765, "learning_rate": 1.1169141491722138e-05, "loss": 0.6113, "num_tokens": 397130200.0, "step": 4224 }, { "epoch": 0.7211128178870114, "grad_norm": 0.5172394796334555, "learning_rate": 1.116231438812084e-05, "loss": 0.5016, "num_tokens": 397202802.0, "step": 4225 }, { "epoch": 0.7212834954770438, "grad_norm": 0.4824177100568654, "learning_rate": 1.1155487284519544e-05, "loss": 0.5852, "num_tokens": 397295235.0, "step": 4226 }, { "epoch": 0.7214541730670763, "grad_norm": 0.46816329691972003, "learning_rate": 1.1148660180918248e-05, "loss": 0.522, "num_tokens": 397381501.0, "step": 4227 }, { "epoch": 0.7216248506571087, "grad_norm": 0.4421047173746162, "learning_rate": 1.114183307731695e-05, "loss": 0.5957, "num_tokens": 397490072.0, "step": 4228 }, { "epoch": 0.7217955282471411, "grad_norm": 0.48211719875379355, "learning_rate": 1.1135005973715652e-05, "loss": 0.5466, "num_tokens": 397573653.0, "step": 4229 }, { "epoch": 0.7219662058371735, "grad_norm": 0.516314532863785, "learning_rate": 1.1128178870114354e-05, "loss": 0.5713, "num_tokens": 397658244.0, "step": 4230 }, { "epoch": 0.722136883427206, "grad_norm": 0.43421595646834105, "learning_rate": 1.1121351766513058e-05, "loss": 0.6123, "num_tokens": 397789428.0, "step": 4231 }, { "epoch": 0.7223075610172385, "grad_norm": 0.4486745693589412, "learning_rate": 1.1114524662911761e-05, "loss": 0.5055, "num_tokens": 397880439.0, "step": 4232 }, { "epoch": 0.7224782386072709, "grad_norm": 0.4485558434425161, "learning_rate": 1.1107697559310464e-05, "loss": 0.6138, "num_tokens": 397987451.0, "step": 4233 }, { "epoch": 0.7226489161973033, "grad_norm": 0.5150493879545628, "learning_rate": 1.1100870455709167e-05, "loss": 0.5778, "num_tokens": 398069412.0, "step": 4234 }, { "epoch": 0.7228195937873357, "grad_norm": 0.4923403310614869, "learning_rate": 1.1094043352107868e-05, "loss": 0.5007, "num_tokens": 398149491.0, "step": 4235 }, { "epoch": 0.7229902713773682, "grad_norm": 0.508297830570167, "learning_rate": 1.1087216248506571e-05, "loss": 0.5752, "num_tokens": 398236566.0, "step": 4236 }, { "epoch": 0.7231609489674006, "grad_norm": 0.4945358274783822, "learning_rate": 1.1080389144905275e-05, "loss": 0.5658, "num_tokens": 398323481.0, "step": 4237 }, { "epoch": 0.723331626557433, "grad_norm": 0.43933018936293533, "learning_rate": 1.1073562041303977e-05, "loss": 0.5659, "num_tokens": 398439506.0, "step": 4238 }, { "epoch": 0.7235023041474654, "grad_norm": 0.47957955274082376, "learning_rate": 1.1066734937702681e-05, "loss": 0.5545, "num_tokens": 398530686.0, "step": 4239 }, { "epoch": 0.7236729817374978, "grad_norm": 0.4981186171336324, "learning_rate": 1.1059907834101385e-05, "loss": 0.6578, "num_tokens": 398632195.0, "step": 4240 }, { "epoch": 0.7238436593275303, "grad_norm": 0.39668857886562986, "learning_rate": 1.1053080730500085e-05, "loss": 0.6086, "num_tokens": 398775602.0, "step": 4241 }, { "epoch": 0.7240143369175627, "grad_norm": 0.5017674933457379, "learning_rate": 1.1046253626898789e-05, "loss": 0.5822, "num_tokens": 398858041.0, "step": 4242 }, { "epoch": 0.7241850145075952, "grad_norm": 0.475976097431269, "learning_rate": 1.1039426523297491e-05, "loss": 0.6044, "num_tokens": 398958214.0, "step": 4243 }, { "epoch": 0.7243556920976276, "grad_norm": 0.5498234789615752, "learning_rate": 1.1032599419696195e-05, "loss": 0.5314, "num_tokens": 399019678.0, "step": 4244 }, { "epoch": 0.72452636968766, "grad_norm": 0.4351373478037276, "learning_rate": 1.1025772316094899e-05, "loss": 0.4949, "num_tokens": 399121942.0, "step": 4245 }, { "epoch": 0.7246970472776925, "grad_norm": 0.4728345845623212, "learning_rate": 1.10189452124936e-05, "loss": 0.5614, "num_tokens": 399213707.0, "step": 4246 }, { "epoch": 0.7248677248677249, "grad_norm": 0.5083460313009013, "learning_rate": 1.1012118108892303e-05, "loss": 0.624, "num_tokens": 399299997.0, "step": 4247 }, { "epoch": 0.7250384024577573, "grad_norm": 0.47362224826575244, "learning_rate": 1.1005291005291006e-05, "loss": 0.4953, "num_tokens": 399389935.0, "step": 4248 }, { "epoch": 0.7252090800477897, "grad_norm": 0.5614903176423032, "learning_rate": 1.0998463901689708e-05, "loss": 0.6632, "num_tokens": 399463600.0, "step": 4249 }, { "epoch": 0.7253797576378221, "grad_norm": 0.46113292434069125, "learning_rate": 1.0991636798088412e-05, "loss": 0.5462, "num_tokens": 399551132.0, "step": 4250 }, { "epoch": 0.7255504352278546, "grad_norm": 0.45646447092112674, "learning_rate": 1.0984809694487116e-05, "loss": 0.6299, "num_tokens": 399667527.0, "step": 4251 }, { "epoch": 0.725721112817887, "grad_norm": 0.4757833012679637, "learning_rate": 1.0977982590885818e-05, "loss": 0.5381, "num_tokens": 399756210.0, "step": 4252 }, { "epoch": 0.7258917904079194, "grad_norm": 0.48451657295623723, "learning_rate": 1.097115548728452e-05, "loss": 0.5273, "num_tokens": 399840785.0, "step": 4253 }, { "epoch": 0.7260624679979518, "grad_norm": 0.48873621621534413, "learning_rate": 1.0964328383683222e-05, "loss": 0.5454, "num_tokens": 399922712.0, "step": 4254 }, { "epoch": 0.7262331455879844, "grad_norm": 0.5271392920997412, "learning_rate": 1.0957501280081926e-05, "loss": 0.5187, "num_tokens": 399987108.0, "step": 4255 }, { "epoch": 0.7264038231780168, "grad_norm": 0.47006711064445234, "learning_rate": 1.095067417648063e-05, "loss": 0.5538, "num_tokens": 400084568.0, "step": 4256 }, { "epoch": 0.7265745007680492, "grad_norm": 0.9007229472025261, "learning_rate": 1.0943847072879332e-05, "loss": 0.6006, "num_tokens": 400189460.0, "step": 4257 }, { "epoch": 0.7267451783580816, "grad_norm": 0.4360122036906084, "learning_rate": 1.0937019969278036e-05, "loss": 0.6515, "num_tokens": 400324729.0, "step": 4258 }, { "epoch": 0.726915855948114, "grad_norm": 0.46022337421372767, "learning_rate": 1.0930192865676736e-05, "loss": 0.5312, "num_tokens": 400413701.0, "step": 4259 }, { "epoch": 0.7270865335381465, "grad_norm": 0.4889204313897834, "learning_rate": 1.092336576207544e-05, "loss": 0.6946, "num_tokens": 400529592.0, "step": 4260 }, { "epoch": 0.7272572111281789, "grad_norm": 0.5144220169431971, "learning_rate": 1.0916538658474143e-05, "loss": 0.6694, "num_tokens": 400624837.0, "step": 4261 }, { "epoch": 0.7274278887182113, "grad_norm": 0.46089609599842474, "learning_rate": 1.0909711554872846e-05, "loss": 0.5621, "num_tokens": 400720834.0, "step": 4262 }, { "epoch": 0.7275985663082437, "grad_norm": 0.49010959026949963, "learning_rate": 1.090288445127155e-05, "loss": 0.4912, "num_tokens": 400795348.0, "step": 4263 }, { "epoch": 0.7277692438982761, "grad_norm": 0.48340462364432335, "learning_rate": 1.0896057347670253e-05, "loss": 0.5876, "num_tokens": 400893159.0, "step": 4264 }, { "epoch": 0.7279399214883086, "grad_norm": 0.4841646189438429, "learning_rate": 1.0889230244068955e-05, "loss": 0.559, "num_tokens": 400985202.0, "step": 4265 }, { "epoch": 0.728110599078341, "grad_norm": 0.4240401646462732, "learning_rate": 1.0882403140467657e-05, "loss": 0.5767, "num_tokens": 401103215.0, "step": 4266 }, { "epoch": 0.7282812766683734, "grad_norm": 0.4199670040833908, "learning_rate": 1.087557603686636e-05, "loss": 0.5959, "num_tokens": 401243987.0, "step": 4267 }, { "epoch": 0.7284519542584059, "grad_norm": 0.456592933416189, "learning_rate": 1.0868748933265063e-05, "loss": 0.6036, "num_tokens": 401349738.0, "step": 4268 }, { "epoch": 0.7286226318484383, "grad_norm": 0.5011114662576707, "learning_rate": 1.0861921829663767e-05, "loss": 0.6299, "num_tokens": 401449410.0, "step": 4269 }, { "epoch": 0.7287933094384708, "grad_norm": 0.4975992581576432, "learning_rate": 1.0855094726062469e-05, "loss": 0.4878, "num_tokens": 401524669.0, "step": 4270 }, { "epoch": 0.7289639870285032, "grad_norm": 0.5073792807564796, "learning_rate": 1.0848267622461173e-05, "loss": 0.5776, "num_tokens": 401605899.0, "step": 4271 }, { "epoch": 0.7291346646185356, "grad_norm": 0.46544974304262576, "learning_rate": 1.0841440518859873e-05, "loss": 0.5667, "num_tokens": 401697163.0, "step": 4272 }, { "epoch": 0.729305342208568, "grad_norm": 0.45135038211353384, "learning_rate": 1.0834613415258577e-05, "loss": 0.5204, "num_tokens": 401808030.0, "step": 4273 }, { "epoch": 0.7294760197986004, "grad_norm": 0.5335618470214282, "learning_rate": 1.082778631165728e-05, "loss": 0.5847, "num_tokens": 401922010.0, "step": 4274 }, { "epoch": 0.7296466973886329, "grad_norm": 0.47486480242624474, "learning_rate": 1.0820959208055983e-05, "loss": 0.5219, "num_tokens": 402009618.0, "step": 4275 }, { "epoch": 0.7298173749786653, "grad_norm": 0.47957998079741854, "learning_rate": 1.0814132104454686e-05, "loss": 0.6057, "num_tokens": 402097974.0, "step": 4276 }, { "epoch": 0.7299880525686977, "grad_norm": 0.5248642266356605, "learning_rate": 1.080730500085339e-05, "loss": 0.6323, "num_tokens": 402186255.0, "step": 4277 }, { "epoch": 0.7301587301587301, "grad_norm": 0.44403562449678036, "learning_rate": 1.080047789725209e-05, "loss": 0.647, "num_tokens": 402304298.0, "step": 4278 }, { "epoch": 0.7303294077487625, "grad_norm": 0.4409154088588288, "learning_rate": 1.0793650793650794e-05, "loss": 0.5569, "num_tokens": 402408389.0, "step": 4279 }, { "epoch": 0.7305000853387951, "grad_norm": 0.5318449933322448, "learning_rate": 1.0786823690049496e-05, "loss": 0.5137, "num_tokens": 402473259.0, "step": 4280 }, { "epoch": 0.7306707629288275, "grad_norm": 0.449191021430992, "learning_rate": 1.07799965864482e-05, "loss": 0.6212, "num_tokens": 402585559.0, "step": 4281 }, { "epoch": 0.7308414405188599, "grad_norm": 0.43371587149727714, "learning_rate": 1.0773169482846904e-05, "loss": 0.5024, "num_tokens": 402682239.0, "step": 4282 }, { "epoch": 0.7310121181088923, "grad_norm": 0.6084963995079462, "learning_rate": 1.0766342379245606e-05, "loss": 0.7041, "num_tokens": 402763370.0, "step": 4283 }, { "epoch": 0.7311827956989247, "grad_norm": 0.45702546848225545, "learning_rate": 1.0759515275644308e-05, "loss": 0.5266, "num_tokens": 402852727.0, "step": 4284 }, { "epoch": 0.7313534732889572, "grad_norm": 0.5536129462551732, "learning_rate": 1.0752688172043012e-05, "loss": 0.5457, "num_tokens": 402915202.0, "step": 4285 }, { "epoch": 0.7315241508789896, "grad_norm": 0.4742987565734776, "learning_rate": 1.0745861068441714e-05, "loss": 0.6226, "num_tokens": 403014906.0, "step": 4286 }, { "epoch": 0.731694828469022, "grad_norm": 0.4656047402094198, "learning_rate": 1.0739033964840418e-05, "loss": 0.5699, "num_tokens": 403111390.0, "step": 4287 }, { "epoch": 0.7318655060590544, "grad_norm": 0.5255141446701619, "learning_rate": 1.0732206861239121e-05, "loss": 0.6443, "num_tokens": 403188508.0, "step": 4288 }, { "epoch": 0.7320361836490868, "grad_norm": 0.46533597170111096, "learning_rate": 1.0725379757637823e-05, "loss": 0.5637, "num_tokens": 403279757.0, "step": 4289 }, { "epoch": 0.7322068612391193, "grad_norm": 0.5052571511016747, "learning_rate": 1.0718552654036525e-05, "loss": 0.5651, "num_tokens": 403356256.0, "step": 4290 }, { "epoch": 0.7323775388291517, "grad_norm": 0.4694439794584718, "learning_rate": 1.0711725550435228e-05, "loss": 0.6153, "num_tokens": 403454824.0, "step": 4291 }, { "epoch": 0.7325482164191841, "grad_norm": 0.4672981802420552, "learning_rate": 1.0704898446833931e-05, "loss": 0.5706, "num_tokens": 403553258.0, "step": 4292 }, { "epoch": 0.7327188940092166, "grad_norm": 0.4720078360728437, "learning_rate": 1.0698071343232635e-05, "loss": 0.5347, "num_tokens": 403642251.0, "step": 4293 }, { "epoch": 0.7328895715992491, "grad_norm": 0.47150758800785336, "learning_rate": 1.0691244239631337e-05, "loss": 0.4696, "num_tokens": 403720537.0, "step": 4294 }, { "epoch": 0.7330602491892815, "grad_norm": 0.4393035690120951, "learning_rate": 1.0684417136030041e-05, "loss": 0.5859, "num_tokens": 403836042.0, "step": 4295 }, { "epoch": 0.7332309267793139, "grad_norm": 0.46849974286421786, "learning_rate": 1.0677590032428745e-05, "loss": 0.6108, "num_tokens": 403957621.0, "step": 4296 }, { "epoch": 0.7334016043693463, "grad_norm": 0.4903523105629108, "learning_rate": 1.0670762928827445e-05, "loss": 0.6342, "num_tokens": 404049128.0, "step": 4297 }, { "epoch": 0.7335722819593787, "grad_norm": 0.4931107689643817, "learning_rate": 1.0663935825226149e-05, "loss": 0.629, "num_tokens": 404151150.0, "step": 4298 }, { "epoch": 0.7337429595494112, "grad_norm": 0.5187412944131274, "learning_rate": 1.0657108721624851e-05, "loss": 0.5803, "num_tokens": 404228678.0, "step": 4299 }, { "epoch": 0.7339136371394436, "grad_norm": 0.5097933104434164, "learning_rate": 1.0650281618023555e-05, "loss": 0.5635, "num_tokens": 404301393.0, "step": 4300 }, { "epoch": 0.734084314729476, "grad_norm": 0.47578495689221606, "learning_rate": 1.0643454514422258e-05, "loss": 0.6241, "num_tokens": 404396430.0, "step": 4301 }, { "epoch": 0.7342549923195084, "grad_norm": 1.669509249490551, "learning_rate": 1.063662741082096e-05, "loss": 0.7102, "num_tokens": 404524026.0, "step": 4302 }, { "epoch": 0.7344256699095408, "grad_norm": 0.45355446441671304, "learning_rate": 1.0629800307219663e-05, "loss": 0.4654, "num_tokens": 404617248.0, "step": 4303 }, { "epoch": 0.7345963474995733, "grad_norm": 0.4708137223745126, "learning_rate": 1.0622973203618365e-05, "loss": 0.589, "num_tokens": 404718747.0, "step": 4304 }, { "epoch": 0.7347670250896058, "grad_norm": 0.5578500235490801, "learning_rate": 1.0616146100017068e-05, "loss": 0.592, "num_tokens": 404786852.0, "step": 4305 }, { "epoch": 0.7349377026796382, "grad_norm": 0.47051368370278285, "learning_rate": 1.0609318996415772e-05, "loss": 0.6073, "num_tokens": 404888953.0, "step": 4306 }, { "epoch": 0.7351083802696706, "grad_norm": 0.47038255151727043, "learning_rate": 1.0602491892814474e-05, "loss": 0.5978, "num_tokens": 404987239.0, "step": 4307 }, { "epoch": 0.735279057859703, "grad_norm": 0.44107785804690974, "learning_rate": 1.0595664789213178e-05, "loss": 0.4563, "num_tokens": 405074522.0, "step": 4308 }, { "epoch": 0.7354497354497355, "grad_norm": 0.4642151946678104, "learning_rate": 1.0588837685611878e-05, "loss": 0.6299, "num_tokens": 405176975.0, "step": 4309 }, { "epoch": 0.7356204130397679, "grad_norm": 0.51745894851754, "learning_rate": 1.0582010582010582e-05, "loss": 0.6605, "num_tokens": 405264249.0, "step": 4310 }, { "epoch": 0.7357910906298003, "grad_norm": 0.4921636970623996, "learning_rate": 1.0575183478409286e-05, "loss": 0.5894, "num_tokens": 405353843.0, "step": 4311 }, { "epoch": 0.7359617682198327, "grad_norm": 0.5307533088110954, "learning_rate": 1.0568356374807988e-05, "loss": 0.5434, "num_tokens": 405423522.0, "step": 4312 }, { "epoch": 0.7361324458098651, "grad_norm": 0.5036438764272756, "learning_rate": 1.0561529271206692e-05, "loss": 0.5534, "num_tokens": 405495927.0, "step": 4313 }, { "epoch": 0.7363031233998976, "grad_norm": 0.4676877689821344, "learning_rate": 1.0554702167605395e-05, "loss": 0.6148, "num_tokens": 405598446.0, "step": 4314 }, { "epoch": 0.73647380098993, "grad_norm": 0.49306354131565944, "learning_rate": 1.0547875064004096e-05, "loss": 0.5909, "num_tokens": 405688639.0, "step": 4315 }, { "epoch": 0.7366444785799624, "grad_norm": 0.4533554453512728, "learning_rate": 1.05410479604028e-05, "loss": 0.5605, "num_tokens": 405789197.0, "step": 4316 }, { "epoch": 0.7368151561699949, "grad_norm": 0.4674787467885761, "learning_rate": 1.0534220856801503e-05, "loss": 0.5479, "num_tokens": 405882289.0, "step": 4317 }, { "epoch": 0.7369858337600274, "grad_norm": 0.42260705282148414, "learning_rate": 1.0527393753200205e-05, "loss": 0.5954, "num_tokens": 406012059.0, "step": 4318 }, { "epoch": 0.7371565113500598, "grad_norm": 0.4297292433764594, "learning_rate": 1.052056664959891e-05, "loss": 0.5297, "num_tokens": 406114309.0, "step": 4319 }, { "epoch": 0.7373271889400922, "grad_norm": 0.4648437317034659, "learning_rate": 1.0513739545997613e-05, "loss": 0.5761, "num_tokens": 406212512.0, "step": 4320 }, { "epoch": 0.7374978665301246, "grad_norm": 0.4659457976469246, "learning_rate": 1.0506912442396313e-05, "loss": 0.6255, "num_tokens": 406316112.0, "step": 4321 }, { "epoch": 0.737668544120157, "grad_norm": 0.439761587026063, "learning_rate": 1.0500085338795017e-05, "loss": 0.6542, "num_tokens": 406450896.0, "step": 4322 }, { "epoch": 0.7378392217101895, "grad_norm": 0.514637197365571, "learning_rate": 1.049325823519372e-05, "loss": 0.6096, "num_tokens": 406540478.0, "step": 4323 }, { "epoch": 0.7380098993002219, "grad_norm": 0.5128764747249741, "learning_rate": 1.0486431131592423e-05, "loss": 0.5749, "num_tokens": 406625568.0, "step": 4324 }, { "epoch": 0.7381805768902543, "grad_norm": 0.5014231139847178, "learning_rate": 1.0479604027991127e-05, "loss": 0.6242, "num_tokens": 406713060.0, "step": 4325 }, { "epoch": 0.7383512544802867, "grad_norm": 0.4900536540071442, "learning_rate": 1.0472776924389829e-05, "loss": 0.6036, "num_tokens": 406819894.0, "step": 4326 }, { "epoch": 0.7385219320703191, "grad_norm": 0.5210721666198292, "learning_rate": 1.0465949820788533e-05, "loss": 0.6062, "num_tokens": 406901039.0, "step": 4327 }, { "epoch": 0.7386926096603516, "grad_norm": 0.4221029210598249, "learning_rate": 1.0459122717187233e-05, "loss": 0.5243, "num_tokens": 407011478.0, "step": 4328 }, { "epoch": 0.738863287250384, "grad_norm": 0.5065806761484253, "learning_rate": 1.0452295613585937e-05, "loss": 0.5591, "num_tokens": 407092023.0, "step": 4329 }, { "epoch": 0.7390339648404165, "grad_norm": 0.5299471235955461, "learning_rate": 1.044546850998464e-05, "loss": 0.6163, "num_tokens": 407166222.0, "step": 4330 }, { "epoch": 0.7392046424304489, "grad_norm": 0.4644656118352512, "learning_rate": 1.0438641406383342e-05, "loss": 0.5349, "num_tokens": 407255179.0, "step": 4331 }, { "epoch": 0.7393753200204813, "grad_norm": 0.42416355737859723, "learning_rate": 1.0431814302782046e-05, "loss": 0.5774, "num_tokens": 407375686.0, "step": 4332 }, { "epoch": 0.7395459976105138, "grad_norm": 0.47666681298339775, "learning_rate": 1.042498719918075e-05, "loss": 0.6016, "num_tokens": 407476498.0, "step": 4333 }, { "epoch": 0.7397166752005462, "grad_norm": 0.4689306569306729, "learning_rate": 1.041816009557945e-05, "loss": 0.5775, "num_tokens": 407570896.0, "step": 4334 }, { "epoch": 0.7398873527905786, "grad_norm": 0.4178411759780276, "learning_rate": 1.0411332991978154e-05, "loss": 0.6259, "num_tokens": 407700567.0, "step": 4335 }, { "epoch": 0.740058030380611, "grad_norm": 0.4890278807145064, "learning_rate": 1.0404505888376856e-05, "loss": 0.628, "num_tokens": 407794123.0, "step": 4336 }, { "epoch": 0.7402287079706434, "grad_norm": 0.4611684405861955, "learning_rate": 1.039767878477556e-05, "loss": 0.4847, "num_tokens": 407880561.0, "step": 4337 }, { "epoch": 0.7403993855606759, "grad_norm": 0.5376170906370259, "learning_rate": 1.0390851681174264e-05, "loss": 0.582, "num_tokens": 407960276.0, "step": 4338 }, { "epoch": 0.7405700631507083, "grad_norm": 0.4709328670410732, "learning_rate": 1.0384024577572966e-05, "loss": 0.6639, "num_tokens": 408070349.0, "step": 4339 }, { "epoch": 0.7407407407407407, "grad_norm": 0.46193189972691256, "learning_rate": 1.0377197473971668e-05, "loss": 0.5161, "num_tokens": 408158931.0, "step": 4340 }, { "epoch": 0.7409114183307731, "grad_norm": 0.48283336467627475, "learning_rate": 1.037037037037037e-05, "loss": 0.5826, "num_tokens": 408243374.0, "step": 4341 }, { "epoch": 0.7410820959208057, "grad_norm": 0.47770340990277727, "learning_rate": 1.0363543266769074e-05, "loss": 0.5856, "num_tokens": 408346353.0, "step": 4342 }, { "epoch": 0.7412527735108381, "grad_norm": 0.5006449405412168, "learning_rate": 1.0356716163167777e-05, "loss": 0.5606, "num_tokens": 408430597.0, "step": 4343 }, { "epoch": 0.7414234511008705, "grad_norm": 0.5045732695299237, "learning_rate": 1.034988905956648e-05, "loss": 0.6064, "num_tokens": 408515497.0, "step": 4344 }, { "epoch": 0.7415941286909029, "grad_norm": 0.49643919641604445, "learning_rate": 1.0343061955965183e-05, "loss": 0.6412, "num_tokens": 408612575.0, "step": 4345 }, { "epoch": 0.7417648062809353, "grad_norm": 0.4890017962231783, "learning_rate": 1.0336234852363884e-05, "loss": 0.561, "num_tokens": 408697989.0, "step": 4346 }, { "epoch": 0.7419354838709677, "grad_norm": 0.508604066073957, "learning_rate": 1.0329407748762587e-05, "loss": 0.5274, "num_tokens": 408773159.0, "step": 4347 }, { "epoch": 0.7421061614610002, "grad_norm": 0.456461593082836, "learning_rate": 1.0322580645161291e-05, "loss": 0.496, "num_tokens": 408865940.0, "step": 4348 }, { "epoch": 0.7422768390510326, "grad_norm": 0.4538357239881049, "learning_rate": 1.0315753541559993e-05, "loss": 0.5252, "num_tokens": 408965243.0, "step": 4349 }, { "epoch": 0.742447516641065, "grad_norm": 0.46692002715015735, "learning_rate": 1.0308926437958697e-05, "loss": 0.5316, "num_tokens": 409061642.0, "step": 4350 }, { "epoch": 0.7426181942310974, "grad_norm": 0.48673691838618155, "learning_rate": 1.03020993343574e-05, "loss": 0.5245, "num_tokens": 409140804.0, "step": 4351 }, { "epoch": 0.7427888718211298, "grad_norm": 0.4649158259465345, "learning_rate": 1.0295272230756101e-05, "loss": 0.6799, "num_tokens": 409249076.0, "step": 4352 }, { "epoch": 0.7429595494111623, "grad_norm": 0.4503658617151835, "learning_rate": 1.0288445127154805e-05, "loss": 0.622, "num_tokens": 409366014.0, "step": 4353 }, { "epoch": 0.7431302270011948, "grad_norm": 0.4255692682504055, "learning_rate": 1.0281618023553509e-05, "loss": 0.6617, "num_tokens": 409522537.0, "step": 4354 }, { "epoch": 0.7433009045912272, "grad_norm": 0.4256897379491504, "learning_rate": 1.027479091995221e-05, "loss": 0.572, "num_tokens": 409632993.0, "step": 4355 }, { "epoch": 0.7434715821812596, "grad_norm": 0.48937589736438847, "learning_rate": 1.0267963816350915e-05, "loss": 0.6388, "num_tokens": 409728832.0, "step": 4356 }, { "epoch": 0.7436422597712921, "grad_norm": 0.4381952498125304, "learning_rate": 1.0261136712749618e-05, "loss": 0.5531, "num_tokens": 409836329.0, "step": 4357 }, { "epoch": 0.7438129373613245, "grad_norm": 0.42946332888756505, "learning_rate": 1.025430960914832e-05, "loss": 0.4915, "num_tokens": 409933160.0, "step": 4358 }, { "epoch": 0.7439836149513569, "grad_norm": 0.499375841223843, "learning_rate": 1.0247482505547022e-05, "loss": 0.5864, "num_tokens": 410038113.0, "step": 4359 }, { "epoch": 0.7441542925413893, "grad_norm": 0.42718790270831775, "learning_rate": 1.0240655401945725e-05, "loss": 0.5701, "num_tokens": 410150970.0, "step": 4360 }, { "epoch": 0.7443249701314217, "grad_norm": 0.4582628267905334, "learning_rate": 1.0233828298344428e-05, "loss": 0.6103, "num_tokens": 410251937.0, "step": 4361 }, { "epoch": 0.7444956477214542, "grad_norm": 0.5050187945859961, "learning_rate": 1.0227001194743132e-05, "loss": 0.5931, "num_tokens": 410332976.0, "step": 4362 }, { "epoch": 0.7446663253114866, "grad_norm": 0.4323297849711002, "learning_rate": 1.0220174091141834e-05, "loss": 0.5981, "num_tokens": 410451985.0, "step": 4363 }, { "epoch": 0.744837002901519, "grad_norm": 0.490469374144618, "learning_rate": 1.0213346987540538e-05, "loss": 0.6239, "num_tokens": 410547692.0, "step": 4364 }, { "epoch": 0.7450076804915514, "grad_norm": 0.5257572815768866, "learning_rate": 1.0206519883939238e-05, "loss": 0.6191, "num_tokens": 410626494.0, "step": 4365 }, { "epoch": 0.7451783580815838, "grad_norm": 0.44716341800888604, "learning_rate": 1.0199692780337942e-05, "loss": 0.5248, "num_tokens": 410719273.0, "step": 4366 }, { "epoch": 0.7453490356716164, "grad_norm": 0.47428584305823773, "learning_rate": 1.0192865676736646e-05, "loss": 0.5464, "num_tokens": 410803289.0, "step": 4367 }, { "epoch": 0.7455197132616488, "grad_norm": 0.5288738538491394, "learning_rate": 1.0186038573135348e-05, "loss": 0.581, "num_tokens": 410877659.0, "step": 4368 }, { "epoch": 0.7456903908516812, "grad_norm": 0.46576432387876987, "learning_rate": 1.0179211469534052e-05, "loss": 0.494, "num_tokens": 410956814.0, "step": 4369 }, { "epoch": 0.7458610684417136, "grad_norm": 0.4697540971372057, "learning_rate": 1.0172384365932755e-05, "loss": 0.5703, "num_tokens": 411059046.0, "step": 4370 }, { "epoch": 0.746031746031746, "grad_norm": 0.4231476153775304, "learning_rate": 1.0165557262331456e-05, "loss": 0.5505, "num_tokens": 411172596.0, "step": 4371 }, { "epoch": 0.7462024236217785, "grad_norm": 0.4606591526802254, "learning_rate": 1.015873015873016e-05, "loss": 0.6563, "num_tokens": 411275754.0, "step": 4372 }, { "epoch": 0.7463731012118109, "grad_norm": 0.5289200935191991, "learning_rate": 1.0151903055128862e-05, "loss": 0.5371, "num_tokens": 411350866.0, "step": 4373 }, { "epoch": 0.7465437788018433, "grad_norm": 0.48747155728287284, "learning_rate": 1.0145075951527565e-05, "loss": 0.5067, "num_tokens": 411434036.0, "step": 4374 }, { "epoch": 0.7467144563918757, "grad_norm": 0.4814716628817321, "learning_rate": 1.0138248847926269e-05, "loss": 0.6368, "num_tokens": 411547356.0, "step": 4375 }, { "epoch": 0.7468851339819081, "grad_norm": 0.4833844030022916, "learning_rate": 1.0131421744324971e-05, "loss": 0.5491, "num_tokens": 411629877.0, "step": 4376 }, { "epoch": 0.7470558115719406, "grad_norm": 0.43095842106338755, "learning_rate": 1.0124594640723673e-05, "loss": 0.5631, "num_tokens": 411746738.0, "step": 4377 }, { "epoch": 0.747226489161973, "grad_norm": 0.4166632996317229, "learning_rate": 1.0117767537122375e-05, "loss": 0.5269, "num_tokens": 411861509.0, "step": 4378 }, { "epoch": 0.7473971667520055, "grad_norm": 0.45351160281636504, "learning_rate": 1.0110940433521079e-05, "loss": 0.5175, "num_tokens": 411947430.0, "step": 4379 }, { "epoch": 0.7475678443420379, "grad_norm": 0.509981827291222, "learning_rate": 1.0104113329919783e-05, "loss": 0.6005, "num_tokens": 412024959.0, "step": 4380 }, { "epoch": 0.7477385219320704, "grad_norm": 0.4845683367329745, "learning_rate": 1.0097286226318485e-05, "loss": 0.5526, "num_tokens": 412121735.0, "step": 4381 }, { "epoch": 0.7479091995221028, "grad_norm": 0.501363653202676, "learning_rate": 1.0090459122717189e-05, "loss": 0.5759, "num_tokens": 412201515.0, "step": 4382 }, { "epoch": 0.7480798771121352, "grad_norm": 0.5599795773150181, "learning_rate": 1.0083632019115889e-05, "loss": 0.4714, "num_tokens": 412257731.0, "step": 4383 }, { "epoch": 0.7482505547021676, "grad_norm": 0.4471650661394628, "learning_rate": 1.0076804915514593e-05, "loss": 0.5263, "num_tokens": 412367029.0, "step": 4384 }, { "epoch": 0.7484212322922, "grad_norm": 0.5409099620981359, "learning_rate": 1.0069977811913297e-05, "loss": 0.5923, "num_tokens": 412438913.0, "step": 4385 }, { "epoch": 0.7485919098822325, "grad_norm": 0.5096706124730942, "learning_rate": 1.0063150708311999e-05, "loss": 0.5549, "num_tokens": 412524198.0, "step": 4386 }, { "epoch": 0.7487625874722649, "grad_norm": 0.4461483333737283, "learning_rate": 1.0056323604710702e-05, "loss": 0.5377, "num_tokens": 412632293.0, "step": 4387 }, { "epoch": 0.7489332650622973, "grad_norm": 0.5005457174954253, "learning_rate": 1.0049496501109406e-05, "loss": 0.5174, "num_tokens": 412707462.0, "step": 4388 }, { "epoch": 0.7491039426523297, "grad_norm": 0.5412747316586721, "learning_rate": 1.004266939750811e-05, "loss": 0.553, "num_tokens": 412773088.0, "step": 4389 }, { "epoch": 0.7492746202423621, "grad_norm": 0.4746974294476905, "learning_rate": 1.003584229390681e-05, "loss": 0.6174, "num_tokens": 412870784.0, "step": 4390 }, { "epoch": 0.7494452978323946, "grad_norm": 0.5093856787982399, "learning_rate": 1.0029015190305514e-05, "loss": 0.5849, "num_tokens": 412950449.0, "step": 4391 }, { "epoch": 0.7496159754224271, "grad_norm": 0.4590577271651555, "learning_rate": 1.0022188086704216e-05, "loss": 0.4777, "num_tokens": 413034471.0, "step": 4392 }, { "epoch": 0.7497866530124595, "grad_norm": 0.4453938008489481, "learning_rate": 1.001536098310292e-05, "loss": 0.6468, "num_tokens": 413153889.0, "step": 4393 }, { "epoch": 0.7499573306024919, "grad_norm": 0.48649040961583473, "learning_rate": 1.0008533879501624e-05, "loss": 0.5992, "num_tokens": 413245032.0, "step": 4394 }, { "epoch": 0.7501280081925243, "grad_norm": 0.5286552103135749, "learning_rate": 1.0001706775900326e-05, "loss": 0.5175, "num_tokens": 413315226.0, "step": 4395 }, { "epoch": 0.7502986857825568, "grad_norm": 0.4678935190727275, "learning_rate": 9.994879672299028e-06, "loss": 0.5836, "num_tokens": 413416099.0, "step": 4396 }, { "epoch": 0.7504693633725892, "grad_norm": 0.47284588434264996, "learning_rate": 9.98805256869773e-06, "loss": 0.593, "num_tokens": 413515223.0, "step": 4397 }, { "epoch": 0.7506400409626216, "grad_norm": 0.5050945082745006, "learning_rate": 9.981225465096434e-06, "loss": 0.603, "num_tokens": 413601484.0, "step": 4398 }, { "epoch": 0.750810718552654, "grad_norm": 0.45013357590581676, "learning_rate": 9.974398361495137e-06, "loss": 0.4949, "num_tokens": 413699285.0, "step": 4399 }, { "epoch": 0.7509813961426864, "grad_norm": 0.507124031246201, "learning_rate": 9.96757125789384e-06, "loss": 0.604, "num_tokens": 413784039.0, "step": 4400 }, { "epoch": 0.7511520737327189, "grad_norm": 0.4231696700203865, "learning_rate": 9.960744154292542e-06, "loss": 0.5121, "num_tokens": 413895326.0, "step": 4401 }, { "epoch": 0.7513227513227513, "grad_norm": 0.513428930327859, "learning_rate": 9.953917050691245e-06, "loss": 0.6086, "num_tokens": 413970531.0, "step": 4402 }, { "epoch": 0.7514934289127837, "grad_norm": 0.5607228295579075, "learning_rate": 9.947089947089947e-06, "loss": 0.5184, "num_tokens": 414026060.0, "step": 4403 }, { "epoch": 0.7516641065028162, "grad_norm": 0.5508821278979664, "learning_rate": 9.940262843488651e-06, "loss": 0.5861, "num_tokens": 414112427.0, "step": 4404 }, { "epoch": 0.7518347840928487, "grad_norm": 0.49856643266591516, "learning_rate": 9.933435739887353e-06, "loss": 0.6281, "num_tokens": 414210392.0, "step": 4405 }, { "epoch": 0.7520054616828811, "grad_norm": 0.4866378347925833, "learning_rate": 9.926608636286057e-06, "loss": 0.628, "num_tokens": 414304714.0, "step": 4406 }, { "epoch": 0.7521761392729135, "grad_norm": 0.4668886979717633, "learning_rate": 9.919781532684759e-06, "loss": 0.4907, "num_tokens": 414385191.0, "step": 4407 }, { "epoch": 0.7523468168629459, "grad_norm": 0.44974396176436543, "learning_rate": 9.912954429083463e-06, "loss": 0.5215, "num_tokens": 414500297.0, "step": 4408 }, { "epoch": 0.7525174944529783, "grad_norm": 0.4713162240824331, "learning_rate": 9.906127325482165e-06, "loss": 0.6623, "num_tokens": 414611173.0, "step": 4409 }, { "epoch": 0.7526881720430108, "grad_norm": 0.47983215938872664, "learning_rate": 9.899300221880867e-06, "loss": 0.5749, "num_tokens": 414707718.0, "step": 4410 }, { "epoch": 0.7528588496330432, "grad_norm": 0.45745925445811325, "learning_rate": 9.89247311827957e-06, "loss": 0.5405, "num_tokens": 414806439.0, "step": 4411 }, { "epoch": 0.7530295272230756, "grad_norm": 0.5082844804453913, "learning_rate": 9.885646014678274e-06, "loss": 0.5469, "num_tokens": 414881962.0, "step": 4412 }, { "epoch": 0.753200204813108, "grad_norm": 0.48107604396951764, "learning_rate": 9.878818911076977e-06, "loss": 0.5564, "num_tokens": 414970493.0, "step": 4413 }, { "epoch": 0.7533708824031404, "grad_norm": 0.4700975367055858, "learning_rate": 9.871991807475679e-06, "loss": 0.4943, "num_tokens": 415051537.0, "step": 4414 }, { "epoch": 0.7535415599931728, "grad_norm": 0.5021468995839762, "learning_rate": 9.865164703874382e-06, "loss": 0.5427, "num_tokens": 415132908.0, "step": 4415 }, { "epoch": 0.7537122375832054, "grad_norm": 0.4788233700929066, "learning_rate": 9.858337600273084e-06, "loss": 0.5467, "num_tokens": 415217356.0, "step": 4416 }, { "epoch": 0.7538829151732378, "grad_norm": 0.6673897632293267, "learning_rate": 9.851510496671788e-06, "loss": 0.6105, "num_tokens": 415331087.0, "step": 4417 }, { "epoch": 0.7540535927632702, "grad_norm": 0.4793050910515883, "learning_rate": 9.84468339307049e-06, "loss": 0.6405, "num_tokens": 415438355.0, "step": 4418 }, { "epoch": 0.7542242703533026, "grad_norm": 0.5428168980180111, "learning_rate": 9.837856289469192e-06, "loss": 0.5879, "num_tokens": 415515992.0, "step": 4419 }, { "epoch": 0.7543949479433351, "grad_norm": 0.45374307318641255, "learning_rate": 9.831029185867896e-06, "loss": 0.4631, "num_tokens": 415603585.0, "step": 4420 }, { "epoch": 0.7545656255333675, "grad_norm": 0.504396962141867, "learning_rate": 9.8242020822666e-06, "loss": 0.5079, "num_tokens": 415668989.0, "step": 4421 }, { "epoch": 0.7547363031233999, "grad_norm": 0.617372369654016, "learning_rate": 9.817374978665302e-06, "loss": 0.6583, "num_tokens": 415728280.0, "step": 4422 }, { "epoch": 0.7549069807134323, "grad_norm": 0.5431089783677541, "learning_rate": 9.810547875064006e-06, "loss": 0.6162, "num_tokens": 415802572.0, "step": 4423 }, { "epoch": 0.7550776583034647, "grad_norm": 0.5361980479412144, "learning_rate": 9.803720771462708e-06, "loss": 0.6506, "num_tokens": 415899652.0, "step": 4424 }, { "epoch": 0.7552483358934972, "grad_norm": 0.4184306874016687, "learning_rate": 9.79689366786141e-06, "loss": 0.5141, "num_tokens": 416010475.0, "step": 4425 }, { "epoch": 0.7554190134835296, "grad_norm": 0.5288051462699217, "learning_rate": 9.790066564260114e-06, "loss": 0.6031, "num_tokens": 416081967.0, "step": 4426 }, { "epoch": 0.755589691073562, "grad_norm": 0.47237305635257193, "learning_rate": 9.783239460658817e-06, "loss": 0.5827, "num_tokens": 416178952.0, "step": 4427 }, { "epoch": 0.7557603686635944, "grad_norm": 0.4619512827946518, "learning_rate": 9.77641235705752e-06, "loss": 0.4758, "num_tokens": 416264123.0, "step": 4428 }, { "epoch": 0.755931046253627, "grad_norm": 0.53584633033244, "learning_rate": 9.769585253456221e-06, "loss": 0.5908, "num_tokens": 416346619.0, "step": 4429 }, { "epoch": 0.7561017238436594, "grad_norm": 0.47083893072125776, "learning_rate": 9.762758149854925e-06, "loss": 0.547, "num_tokens": 416434061.0, "step": 4430 }, { "epoch": 0.7562724014336918, "grad_norm": 0.4766061151674145, "learning_rate": 9.755931046253627e-06, "loss": 0.5882, "num_tokens": 416528103.0, "step": 4431 }, { "epoch": 0.7564430790237242, "grad_norm": 0.4785649510670751, "learning_rate": 9.749103942652331e-06, "loss": 0.5587, "num_tokens": 416615126.0, "step": 4432 }, { "epoch": 0.7566137566137566, "grad_norm": 0.4414932917681694, "learning_rate": 9.742276839051033e-06, "loss": 0.543, "num_tokens": 416723468.0, "step": 4433 }, { "epoch": 0.756784434203789, "grad_norm": 0.5057221550025812, "learning_rate": 9.735449735449735e-06, "loss": 0.6204, "num_tokens": 416808169.0, "step": 4434 }, { "epoch": 0.7569551117938215, "grad_norm": 0.46592788187874246, "learning_rate": 9.728622631848439e-06, "loss": 0.491, "num_tokens": 416899205.0, "step": 4435 }, { "epoch": 0.7571257893838539, "grad_norm": 0.5677222784127941, "learning_rate": 9.721795528247143e-06, "loss": 0.5944, "num_tokens": 416971393.0, "step": 4436 }, { "epoch": 0.7572964669738863, "grad_norm": 0.48398148471118874, "learning_rate": 9.714968424645845e-06, "loss": 0.5986, "num_tokens": 417063000.0, "step": 4437 }, { "epoch": 0.7574671445639187, "grad_norm": 0.4302547660666367, "learning_rate": 9.708141321044547e-06, "loss": 0.5802, "num_tokens": 417182394.0, "step": 4438 }, { "epoch": 0.7576378221539511, "grad_norm": 0.53584224777, "learning_rate": 9.70131421744325e-06, "loss": 0.6026, "num_tokens": 417262778.0, "step": 4439 }, { "epoch": 0.7578084997439836, "grad_norm": 0.45091215848632504, "learning_rate": 9.694487113841954e-06, "loss": 0.5593, "num_tokens": 417357496.0, "step": 4440 }, { "epoch": 0.7579791773340161, "grad_norm": 0.4704919769506679, "learning_rate": 9.687660010240656e-06, "loss": 0.5861, "num_tokens": 417452156.0, "step": 4441 }, { "epoch": 0.7581498549240485, "grad_norm": 0.47206488440748234, "learning_rate": 9.680832906639359e-06, "loss": 0.5619, "num_tokens": 417544617.0, "step": 4442 }, { "epoch": 0.7583205325140809, "grad_norm": 0.44431901705383203, "learning_rate": 9.674005803038062e-06, "loss": 0.5785, "num_tokens": 417653348.0, "step": 4443 }, { "epoch": 0.7584912101041134, "grad_norm": 0.5421588661715374, "learning_rate": 9.667178699436764e-06, "loss": 0.6784, "num_tokens": 417751633.0, "step": 4444 }, { "epoch": 0.7586618876941458, "grad_norm": 0.45168457666970413, "learning_rate": 9.660351595835468e-06, "loss": 0.5026, "num_tokens": 417844504.0, "step": 4445 }, { "epoch": 0.7588325652841782, "grad_norm": 0.4846145736296778, "learning_rate": 9.65352449223417e-06, "loss": 0.5622, "num_tokens": 417927221.0, "step": 4446 }, { "epoch": 0.7590032428742106, "grad_norm": 0.45546712008511253, "learning_rate": 9.646697388632872e-06, "loss": 0.5406, "num_tokens": 418021885.0, "step": 4447 }, { "epoch": 0.759173920464243, "grad_norm": 0.5074577391146845, "learning_rate": 9.639870285031576e-06, "loss": 0.6139, "num_tokens": 418109827.0, "step": 4448 }, { "epoch": 0.7593445980542755, "grad_norm": 0.554466551284269, "learning_rate": 9.63304318143028e-06, "loss": 0.6127, "num_tokens": 418174755.0, "step": 4449 }, { "epoch": 0.7595152756443079, "grad_norm": 0.4503537733326652, "learning_rate": 9.626216077828982e-06, "loss": 0.5485, "num_tokens": 418268991.0, "step": 4450 }, { "epoch": 0.7596859532343403, "grad_norm": 0.4385008623586919, "learning_rate": 9.619388974227684e-06, "loss": 0.543, "num_tokens": 418367286.0, "step": 4451 }, { "epoch": 0.7598566308243727, "grad_norm": 0.47902203834350693, "learning_rate": 9.612561870626388e-06, "loss": 0.5738, "num_tokens": 418456021.0, "step": 4452 }, { "epoch": 0.7600273084144051, "grad_norm": 0.48235771620867257, "learning_rate": 9.60573476702509e-06, "loss": 0.5056, "num_tokens": 418547971.0, "step": 4453 }, { "epoch": 0.7601979860044377, "grad_norm": 0.42915176541265265, "learning_rate": 9.598907663423794e-06, "loss": 0.4778, "num_tokens": 418649918.0, "step": 4454 }, { "epoch": 0.7603686635944701, "grad_norm": 0.44832898906711677, "learning_rate": 9.592080559822496e-06, "loss": 0.5117, "num_tokens": 418741098.0, "step": 4455 }, { "epoch": 0.7605393411845025, "grad_norm": 0.4943682236286687, "learning_rate": 9.5852534562212e-06, "loss": 0.5655, "num_tokens": 418820464.0, "step": 4456 }, { "epoch": 0.7607100187745349, "grad_norm": 0.5562416507808026, "learning_rate": 9.578426352619901e-06, "loss": 0.5627, "num_tokens": 418893942.0, "step": 4457 }, { "epoch": 0.7608806963645673, "grad_norm": 0.5228132458211124, "learning_rate": 9.571599249018605e-06, "loss": 0.5658, "num_tokens": 418965203.0, "step": 4458 }, { "epoch": 0.7610513739545998, "grad_norm": 0.5163812048230894, "learning_rate": 9.564772145417307e-06, "loss": 0.6287, "num_tokens": 419051865.0, "step": 4459 }, { "epoch": 0.7612220515446322, "grad_norm": 0.47400328061836805, "learning_rate": 9.557945041816011e-06, "loss": 0.6036, "num_tokens": 419147428.0, "step": 4460 }, { "epoch": 0.7613927291346646, "grad_norm": 0.5055300628234156, "learning_rate": 9.551117938214713e-06, "loss": 0.6109, "num_tokens": 419237651.0, "step": 4461 }, { "epoch": 0.761563406724697, "grad_norm": 0.4971720869429387, "learning_rate": 9.544290834613415e-06, "loss": 0.6623, "num_tokens": 419337762.0, "step": 4462 }, { "epoch": 0.7617340843147294, "grad_norm": 0.4725981493813449, "learning_rate": 9.537463731012119e-06, "loss": 0.5342, "num_tokens": 419424210.0, "step": 4463 }, { "epoch": 0.7619047619047619, "grad_norm": 0.4756196201415142, "learning_rate": 9.530636627410823e-06, "loss": 0.5242, "num_tokens": 419502684.0, "step": 4464 }, { "epoch": 0.7620754394947943, "grad_norm": 0.47948915255643176, "learning_rate": 9.523809523809525e-06, "loss": 0.5806, "num_tokens": 419590646.0, "step": 4465 }, { "epoch": 0.7622461170848268, "grad_norm": 0.44918915041102403, "learning_rate": 9.516982420208227e-06, "loss": 0.5696, "num_tokens": 419699397.0, "step": 4466 }, { "epoch": 0.7624167946748592, "grad_norm": 0.45082740020220635, "learning_rate": 9.51015531660693e-06, "loss": 0.5631, "num_tokens": 419814838.0, "step": 4467 }, { "epoch": 0.7625874722648917, "grad_norm": 0.4930388300987612, "learning_rate": 9.503328213005634e-06, "loss": 0.5483, "num_tokens": 419897601.0, "step": 4468 }, { "epoch": 0.7627581498549241, "grad_norm": 0.4664389061401222, "learning_rate": 9.496501109404336e-06, "loss": 0.5548, "num_tokens": 419985533.0, "step": 4469 }, { "epoch": 0.7629288274449565, "grad_norm": 0.43422588998117245, "learning_rate": 9.489674005803038e-06, "loss": 0.532, "num_tokens": 420093614.0, "step": 4470 }, { "epoch": 0.7630995050349889, "grad_norm": 0.5164742395987232, "learning_rate": 9.482846902201742e-06, "loss": 0.5597, "num_tokens": 420203468.0, "step": 4471 }, { "epoch": 0.7632701826250213, "grad_norm": 0.4606169034850613, "learning_rate": 9.476019798600444e-06, "loss": 0.5633, "num_tokens": 420295795.0, "step": 4472 }, { "epoch": 0.7634408602150538, "grad_norm": 0.4298137177420179, "learning_rate": 9.469192694999148e-06, "loss": 0.5142, "num_tokens": 420401449.0, "step": 4473 }, { "epoch": 0.7636115378050862, "grad_norm": 0.5180350877667554, "learning_rate": 9.46236559139785e-06, "loss": 0.6878, "num_tokens": 420492757.0, "step": 4474 }, { "epoch": 0.7637822153951186, "grad_norm": 0.502604477702309, "learning_rate": 9.455538487796552e-06, "loss": 0.5908, "num_tokens": 420582341.0, "step": 4475 }, { "epoch": 0.763952892985151, "grad_norm": 0.5197702963335067, "learning_rate": 9.448711384195256e-06, "loss": 0.6118, "num_tokens": 420672640.0, "step": 4476 }, { "epoch": 0.7641235705751834, "grad_norm": 0.48318275521017234, "learning_rate": 9.44188428059396e-06, "loss": 0.5424, "num_tokens": 420755614.0, "step": 4477 }, { "epoch": 0.764294248165216, "grad_norm": 0.5551561645447183, "learning_rate": 9.435057176992662e-06, "loss": 0.5034, "num_tokens": 420817357.0, "step": 4478 }, { "epoch": 0.7644649257552484, "grad_norm": 0.45424288018532055, "learning_rate": 9.428230073391364e-06, "loss": 0.4805, "num_tokens": 420898052.0, "step": 4479 }, { "epoch": 0.7646356033452808, "grad_norm": 0.44062240568979877, "learning_rate": 9.421402969790068e-06, "loss": 0.5322, "num_tokens": 421007593.0, "step": 4480 }, { "epoch": 0.7648062809353132, "grad_norm": 0.4615501845499368, "learning_rate": 9.41457586618877e-06, "loss": 0.5579, "num_tokens": 421105291.0, "step": 4481 }, { "epoch": 0.7649769585253456, "grad_norm": 0.5183331439639892, "learning_rate": 9.407748762587473e-06, "loss": 0.5527, "num_tokens": 421174309.0, "step": 4482 }, { "epoch": 0.7651476361153781, "grad_norm": 0.47869916227293274, "learning_rate": 9.400921658986176e-06, "loss": 0.6004, "num_tokens": 421274236.0, "step": 4483 }, { "epoch": 0.7653183137054105, "grad_norm": 0.5415135050241976, "learning_rate": 9.394094555384878e-06, "loss": 0.6221, "num_tokens": 421348146.0, "step": 4484 }, { "epoch": 0.7654889912954429, "grad_norm": 0.5111591328330107, "learning_rate": 9.387267451783581e-06, "loss": 0.5464, "num_tokens": 421420258.0, "step": 4485 }, { "epoch": 0.7656596688854753, "grad_norm": 0.44018455713514754, "learning_rate": 9.380440348182285e-06, "loss": 0.5723, "num_tokens": 421530630.0, "step": 4486 }, { "epoch": 0.7658303464755077, "grad_norm": 0.4448382697916638, "learning_rate": 9.373613244580987e-06, "loss": 0.5464, "num_tokens": 421630743.0, "step": 4487 }, { "epoch": 0.7660010240655402, "grad_norm": 0.466854008802482, "learning_rate": 9.36678614097969e-06, "loss": 0.5247, "num_tokens": 421723985.0, "step": 4488 }, { "epoch": 0.7661717016555726, "grad_norm": 0.5036645525859088, "learning_rate": 9.359959037378393e-06, "loss": 0.5692, "num_tokens": 421803255.0, "step": 4489 }, { "epoch": 0.766342379245605, "grad_norm": 0.45553414531159214, "learning_rate": 9.353131933777095e-06, "loss": 0.6006, "num_tokens": 421921109.0, "step": 4490 }, { "epoch": 0.7665130568356375, "grad_norm": 0.49256463755469476, "learning_rate": 9.346304830175799e-06, "loss": 0.5789, "num_tokens": 422013849.0, "step": 4491 }, { "epoch": 0.76668373442567, "grad_norm": 0.4972082520999557, "learning_rate": 9.339477726574503e-06, "loss": 0.528, "num_tokens": 422087678.0, "step": 4492 }, { "epoch": 0.7668544120157024, "grad_norm": 0.6402870365652874, "learning_rate": 9.332650622973205e-06, "loss": 0.58, "num_tokens": 422150396.0, "step": 4493 }, { "epoch": 0.7670250896057348, "grad_norm": 0.421366844075591, "learning_rate": 9.325823519371907e-06, "loss": 0.5624, "num_tokens": 422262896.0, "step": 4494 }, { "epoch": 0.7671957671957672, "grad_norm": 0.44552251649091873, "learning_rate": 9.31899641577061e-06, "loss": 0.5827, "num_tokens": 422366479.0, "step": 4495 }, { "epoch": 0.7673664447857996, "grad_norm": 0.48638136438210045, "learning_rate": 9.312169312169313e-06, "loss": 0.5708, "num_tokens": 422458179.0, "step": 4496 }, { "epoch": 0.767537122375832, "grad_norm": 0.46572230937283754, "learning_rate": 9.305342208568016e-06, "loss": 0.5697, "num_tokens": 422554795.0, "step": 4497 }, { "epoch": 0.7677077999658645, "grad_norm": 0.4599964000306288, "learning_rate": 9.298515104966718e-06, "loss": 0.5934, "num_tokens": 422660493.0, "step": 4498 }, { "epoch": 0.7678784775558969, "grad_norm": 0.4392866483591171, "learning_rate": 9.29168800136542e-06, "loss": 0.5411, "num_tokens": 422785430.0, "step": 4499 }, { "epoch": 0.7680491551459293, "grad_norm": 0.4325895804353509, "learning_rate": 9.284860897764124e-06, "loss": 0.5835, "num_tokens": 422899511.0, "step": 4500 }, { "epoch": 0.7682198327359617, "grad_norm": 0.4675937459198648, "learning_rate": 9.278033794162828e-06, "loss": 0.5535, "num_tokens": 422994509.0, "step": 4501 }, { "epoch": 0.7683905103259941, "grad_norm": 0.4460707969710765, "learning_rate": 9.27120669056153e-06, "loss": 0.5222, "num_tokens": 423100947.0, "step": 4502 }, { "epoch": 0.7685611879160267, "grad_norm": 0.47763686181654635, "learning_rate": 9.264379586960232e-06, "loss": 0.5686, "num_tokens": 423192100.0, "step": 4503 }, { "epoch": 0.7687318655060591, "grad_norm": 0.4381582146220424, "learning_rate": 9.257552483358936e-06, "loss": 0.5936, "num_tokens": 423308594.0, "step": 4504 }, { "epoch": 0.7689025430960915, "grad_norm": 0.4344479110125472, "learning_rate": 9.25072537975764e-06, "loss": 0.6032, "num_tokens": 423428767.0, "step": 4505 }, { "epoch": 0.7690732206861239, "grad_norm": 0.4696103464978412, "learning_rate": 9.243898276156342e-06, "loss": 0.6242, "num_tokens": 423524637.0, "step": 4506 }, { "epoch": 0.7692438982761564, "grad_norm": 0.544846380279152, "learning_rate": 9.237071172555044e-06, "loss": 0.5515, "num_tokens": 423597044.0, "step": 4507 }, { "epoch": 0.7694145758661888, "grad_norm": 0.4966117392754593, "learning_rate": 9.230244068953748e-06, "loss": 0.5016, "num_tokens": 423671447.0, "step": 4508 }, { "epoch": 0.7695852534562212, "grad_norm": 0.4796027211022811, "learning_rate": 9.22341696535245e-06, "loss": 0.5526, "num_tokens": 423758401.0, "step": 4509 }, { "epoch": 0.7697559310462536, "grad_norm": 0.44402344683286826, "learning_rate": 9.216589861751153e-06, "loss": 0.6101, "num_tokens": 423870571.0, "step": 4510 }, { "epoch": 0.769926608636286, "grad_norm": 0.4737783929095333, "learning_rate": 9.209762758149855e-06, "loss": 0.5656, "num_tokens": 423960887.0, "step": 4511 }, { "epoch": 0.7700972862263185, "grad_norm": 0.4412598378314977, "learning_rate": 9.202935654548558e-06, "loss": 0.4825, "num_tokens": 424053301.0, "step": 4512 }, { "epoch": 0.7702679638163509, "grad_norm": 0.49604181567494016, "learning_rate": 9.196108550947261e-06, "loss": 0.6146, "num_tokens": 424146036.0, "step": 4513 }, { "epoch": 0.7704386414063833, "grad_norm": 0.4302833005846738, "learning_rate": 9.189281447345965e-06, "loss": 0.5836, "num_tokens": 424258778.0, "step": 4514 }, { "epoch": 0.7706093189964157, "grad_norm": 0.4856301526278776, "learning_rate": 9.182454343744667e-06, "loss": 0.5664, "num_tokens": 424351612.0, "step": 4515 }, { "epoch": 0.7707799965864482, "grad_norm": 0.4555318038380037, "learning_rate": 9.17562724014337e-06, "loss": 0.5853, "num_tokens": 424468352.0, "step": 4516 }, { "epoch": 0.7709506741764807, "grad_norm": 0.49433097491706823, "learning_rate": 9.168800136542073e-06, "loss": 0.4355, "num_tokens": 424532010.0, "step": 4517 }, { "epoch": 0.7711213517665131, "grad_norm": 0.4421706436874724, "learning_rate": 9.161973032940775e-06, "loss": 0.5419, "num_tokens": 424635667.0, "step": 4518 }, { "epoch": 0.7712920293565455, "grad_norm": 0.47582799417174243, "learning_rate": 9.155145929339479e-06, "loss": 0.6047, "num_tokens": 424736999.0, "step": 4519 }, { "epoch": 0.7714627069465779, "grad_norm": 0.47779959145446543, "learning_rate": 9.148318825738181e-06, "loss": 0.5619, "num_tokens": 424821898.0, "step": 4520 }, { "epoch": 0.7716333845366103, "grad_norm": 0.4457925740750154, "learning_rate": 9.141491722136883e-06, "loss": 0.519, "num_tokens": 424913099.0, "step": 4521 }, { "epoch": 0.7718040621266428, "grad_norm": 0.4845351346253027, "learning_rate": 9.134664618535587e-06, "loss": 0.4775, "num_tokens": 424995311.0, "step": 4522 }, { "epoch": 0.7719747397166752, "grad_norm": 0.4870649214438913, "learning_rate": 9.12783751493429e-06, "loss": 0.5311, "num_tokens": 425076533.0, "step": 4523 }, { "epoch": 0.7721454173067076, "grad_norm": 0.5047960364096198, "learning_rate": 9.121010411332993e-06, "loss": 0.5629, "num_tokens": 425160505.0, "step": 4524 }, { "epoch": 0.77231609489674, "grad_norm": 0.4774791890071037, "learning_rate": 9.114183307731695e-06, "loss": 0.6364, "num_tokens": 425266390.0, "step": 4525 }, { "epoch": 0.7724867724867724, "grad_norm": 0.4982697968608834, "learning_rate": 9.107356204130398e-06, "loss": 0.6118, "num_tokens": 425355016.0, "step": 4526 }, { "epoch": 0.7726574500768049, "grad_norm": 0.48494443072350496, "learning_rate": 9.1005291005291e-06, "loss": 0.5267, "num_tokens": 425442743.0, "step": 4527 }, { "epoch": 0.7728281276668374, "grad_norm": 0.4426526785579692, "learning_rate": 9.093701996927804e-06, "loss": 0.6169, "num_tokens": 425554291.0, "step": 4528 }, { "epoch": 0.7729988052568698, "grad_norm": 0.5278933852219229, "learning_rate": 9.086874893326508e-06, "loss": 0.6409, "num_tokens": 425640934.0, "step": 4529 }, { "epoch": 0.7731694828469022, "grad_norm": 0.5418593751130061, "learning_rate": 9.08004778972521e-06, "loss": 0.6371, "num_tokens": 425744271.0, "step": 4530 }, { "epoch": 0.7733401604369347, "grad_norm": 0.49844362826000577, "learning_rate": 9.073220686123912e-06, "loss": 0.6188, "num_tokens": 425840523.0, "step": 4531 }, { "epoch": 0.7735108380269671, "grad_norm": 0.4552683074353458, "learning_rate": 9.066393582522616e-06, "loss": 0.5444, "num_tokens": 425938272.0, "step": 4532 }, { "epoch": 0.7736815156169995, "grad_norm": 0.4272446088397507, "learning_rate": 9.05956647892132e-06, "loss": 0.5872, "num_tokens": 426065559.0, "step": 4533 }, { "epoch": 0.7738521932070319, "grad_norm": 0.4746114842469308, "learning_rate": 9.052739375320022e-06, "loss": 0.5118, "num_tokens": 426147897.0, "step": 4534 }, { "epoch": 0.7740228707970643, "grad_norm": 0.4570658926495669, "learning_rate": 9.045912271718724e-06, "loss": 0.5508, "num_tokens": 426254551.0, "step": 4535 }, { "epoch": 0.7741935483870968, "grad_norm": 0.49061508916507635, "learning_rate": 9.039085168117428e-06, "loss": 0.549, "num_tokens": 426344735.0, "step": 4536 }, { "epoch": 0.7743642259771292, "grad_norm": 0.4770853870665972, "learning_rate": 9.03225806451613e-06, "loss": 0.5804, "num_tokens": 426434480.0, "step": 4537 }, { "epoch": 0.7745349035671616, "grad_norm": 0.5089379414067082, "learning_rate": 9.025430960914833e-06, "loss": 0.6217, "num_tokens": 426526671.0, "step": 4538 }, { "epoch": 0.774705581157194, "grad_norm": 0.4824664243245564, "learning_rate": 9.018603857313535e-06, "loss": 0.5867, "num_tokens": 426617131.0, "step": 4539 }, { "epoch": 0.7748762587472265, "grad_norm": 0.46334079041523496, "learning_rate": 9.011776753712238e-06, "loss": 0.5214, "num_tokens": 426717112.0, "step": 4540 }, { "epoch": 0.775046936337259, "grad_norm": 0.5088525234917365, "learning_rate": 9.004949650110941e-06, "loss": 0.5795, "num_tokens": 426818080.0, "step": 4541 }, { "epoch": 0.7752176139272914, "grad_norm": 0.46875316995243166, "learning_rate": 8.998122546509645e-06, "loss": 0.5506, "num_tokens": 426913226.0, "step": 4542 }, { "epoch": 0.7753882915173238, "grad_norm": 0.5309452183948454, "learning_rate": 8.991295442908347e-06, "loss": 0.6116, "num_tokens": 426985218.0, "step": 4543 }, { "epoch": 0.7755589691073562, "grad_norm": 0.44376451728445676, "learning_rate": 8.98446833930705e-06, "loss": 0.6068, "num_tokens": 427095894.0, "step": 4544 }, { "epoch": 0.7757296466973886, "grad_norm": 0.5213292600816617, "learning_rate": 8.977641235705753e-06, "loss": 0.5886, "num_tokens": 427194182.0, "step": 4545 }, { "epoch": 0.7759003242874211, "grad_norm": 0.4573978683130957, "learning_rate": 8.970814132104455e-06, "loss": 0.5888, "num_tokens": 427295496.0, "step": 4546 }, { "epoch": 0.7760710018774535, "grad_norm": 0.46991400018446594, "learning_rate": 8.963987028503159e-06, "loss": 0.6199, "num_tokens": 427402652.0, "step": 4547 }, { "epoch": 0.7762416794674859, "grad_norm": 0.4575615911416777, "learning_rate": 8.95715992490186e-06, "loss": 0.5637, "num_tokens": 427504517.0, "step": 4548 }, { "epoch": 0.7764123570575183, "grad_norm": 0.4214203033876525, "learning_rate": 8.950332821300563e-06, "loss": 0.476, "num_tokens": 427609272.0, "step": 4549 }, { "epoch": 0.7765830346475507, "grad_norm": 0.45310210370323484, "learning_rate": 8.943505717699267e-06, "loss": 0.5463, "num_tokens": 427706959.0, "step": 4550 }, { "epoch": 0.7767537122375832, "grad_norm": 0.5017968125534078, "learning_rate": 8.93667861409797e-06, "loss": 0.5749, "num_tokens": 427782946.0, "step": 4551 }, { "epoch": 0.7769243898276156, "grad_norm": 0.48139278986226147, "learning_rate": 8.929851510496672e-06, "loss": 0.5725, "num_tokens": 427873389.0, "step": 4552 }, { "epoch": 0.7770950674176481, "grad_norm": 0.5333666968274469, "learning_rate": 8.923024406895375e-06, "loss": 0.4898, "num_tokens": 427929686.0, "step": 4553 }, { "epoch": 0.7772657450076805, "grad_norm": 0.4762745221295784, "learning_rate": 8.916197303294078e-06, "loss": 0.57, "num_tokens": 428023064.0, "step": 4554 }, { "epoch": 0.777436422597713, "grad_norm": 0.4497015992064834, "learning_rate": 8.90937019969278e-06, "loss": 0.5182, "num_tokens": 428113801.0, "step": 4555 }, { "epoch": 0.7776071001877454, "grad_norm": 0.4796222442567248, "learning_rate": 8.902543096091484e-06, "loss": 0.4844, "num_tokens": 428194877.0, "step": 4556 }, { "epoch": 0.7777777777777778, "grad_norm": 0.4559341996495817, "learning_rate": 8.895715992490186e-06, "loss": 0.5346, "num_tokens": 428287715.0, "step": 4557 }, { "epoch": 0.7779484553678102, "grad_norm": 0.48965669488191427, "learning_rate": 8.888888888888888e-06, "loss": 0.578, "num_tokens": 428371047.0, "step": 4558 }, { "epoch": 0.7781191329578426, "grad_norm": 0.4522941584080557, "learning_rate": 8.882061785287592e-06, "loss": 0.5559, "num_tokens": 428472636.0, "step": 4559 }, { "epoch": 0.778289810547875, "grad_norm": 0.48856879027383443, "learning_rate": 8.875234681686296e-06, "loss": 0.5835, "num_tokens": 428557320.0, "step": 4560 }, { "epoch": 0.7784604881379075, "grad_norm": 0.48421760626196886, "learning_rate": 8.868407578084998e-06, "loss": 0.545, "num_tokens": 428642280.0, "step": 4561 }, { "epoch": 0.7786311657279399, "grad_norm": 0.4012509199182392, "learning_rate": 8.861580474483702e-06, "loss": 0.569, "num_tokens": 428775455.0, "step": 4562 }, { "epoch": 0.7788018433179723, "grad_norm": 0.44142786312754967, "learning_rate": 8.854753370882404e-06, "loss": 0.5946, "num_tokens": 428893146.0, "step": 4563 }, { "epoch": 0.7789725209080047, "grad_norm": 0.49770792304713324, "learning_rate": 8.847926267281107e-06, "loss": 0.4907, "num_tokens": 428964969.0, "step": 4564 }, { "epoch": 0.7791431984980373, "grad_norm": 0.4654778908875265, "learning_rate": 8.84109916367981e-06, "loss": 0.6055, "num_tokens": 429063596.0, "step": 4565 }, { "epoch": 0.7793138760880697, "grad_norm": 0.494499183482781, "learning_rate": 8.834272060078513e-06, "loss": 0.5692, "num_tokens": 429150429.0, "step": 4566 }, { "epoch": 0.7794845536781021, "grad_norm": 0.46921031785447426, "learning_rate": 8.827444956477215e-06, "loss": 0.5601, "num_tokens": 429241188.0, "step": 4567 }, { "epoch": 0.7796552312681345, "grad_norm": 0.4764747188663704, "learning_rate": 8.820617852875917e-06, "loss": 0.6309, "num_tokens": 429351429.0, "step": 4568 }, { "epoch": 0.7798259088581669, "grad_norm": 0.5036611806146156, "learning_rate": 8.813790749274621e-06, "loss": 0.5691, "num_tokens": 429432206.0, "step": 4569 }, { "epoch": 0.7799965864481994, "grad_norm": 0.4700963740689947, "learning_rate": 8.806963645673325e-06, "loss": 0.6333, "num_tokens": 429550658.0, "step": 4570 }, { "epoch": 0.7801672640382318, "grad_norm": 0.4367878318534245, "learning_rate": 8.800136542072027e-06, "loss": 0.5275, "num_tokens": 429653275.0, "step": 4571 }, { "epoch": 0.7803379416282642, "grad_norm": 0.44579577522944247, "learning_rate": 8.793309438470729e-06, "loss": 0.5135, "num_tokens": 429754875.0, "step": 4572 }, { "epoch": 0.7805086192182966, "grad_norm": 0.49361128642347313, "learning_rate": 8.786482334869433e-06, "loss": 0.6328, "num_tokens": 429848126.0, "step": 4573 }, { "epoch": 0.780679296808329, "grad_norm": 0.46770657583914893, "learning_rate": 8.779655231268135e-06, "loss": 0.6048, "num_tokens": 429951925.0, "step": 4574 }, { "epoch": 0.7808499743983615, "grad_norm": 0.4290259108386704, "learning_rate": 8.772828127666839e-06, "loss": 0.5436, "num_tokens": 430057091.0, "step": 4575 }, { "epoch": 0.7810206519883939, "grad_norm": 0.472695336897098, "learning_rate": 8.76600102406554e-06, "loss": 0.5752, "num_tokens": 430148561.0, "step": 4576 }, { "epoch": 0.7811913295784264, "grad_norm": 0.4955730812773947, "learning_rate": 8.759173920464243e-06, "loss": 0.6567, "num_tokens": 430251601.0, "step": 4577 }, { "epoch": 0.7813620071684588, "grad_norm": 0.4388469846718159, "learning_rate": 8.752346816862947e-06, "loss": 0.4859, "num_tokens": 430350158.0, "step": 4578 }, { "epoch": 0.7815326847584912, "grad_norm": 0.5089645039736462, "learning_rate": 8.74551971326165e-06, "loss": 0.5055, "num_tokens": 430422916.0, "step": 4579 }, { "epoch": 0.7817033623485237, "grad_norm": 0.4848574930650972, "learning_rate": 8.738692609660352e-06, "loss": 0.5821, "num_tokens": 430513810.0, "step": 4580 }, { "epoch": 0.7818740399385561, "grad_norm": 0.5054786389256934, "learning_rate": 8.731865506059055e-06, "loss": 0.5402, "num_tokens": 430594178.0, "step": 4581 }, { "epoch": 0.7820447175285885, "grad_norm": 0.4608060943162128, "learning_rate": 8.725038402457758e-06, "loss": 0.5499, "num_tokens": 430687482.0, "step": 4582 }, { "epoch": 0.7822153951186209, "grad_norm": 0.45069466346455134, "learning_rate": 8.71821129885646e-06, "loss": 0.5428, "num_tokens": 430784142.0, "step": 4583 }, { "epoch": 0.7823860727086533, "grad_norm": 0.49610871363859954, "learning_rate": 8.711384195255164e-06, "loss": 0.5383, "num_tokens": 430861803.0, "step": 4584 }, { "epoch": 0.7825567502986858, "grad_norm": 0.5096610792213119, "learning_rate": 8.704557091653866e-06, "loss": 0.638, "num_tokens": 430953415.0, "step": 4585 }, { "epoch": 0.7827274278887182, "grad_norm": 0.4957544243552195, "learning_rate": 8.697729988052568e-06, "loss": 0.5235, "num_tokens": 431038809.0, "step": 4586 }, { "epoch": 0.7828981054787506, "grad_norm": 0.4816357608419736, "learning_rate": 8.690902884451272e-06, "loss": 0.5918, "num_tokens": 431133164.0, "step": 4587 }, { "epoch": 0.783068783068783, "grad_norm": 0.4675032096139556, "learning_rate": 8.684075780849976e-06, "loss": 0.5007, "num_tokens": 431223902.0, "step": 4588 }, { "epoch": 0.7832394606588154, "grad_norm": 0.4463678166796841, "learning_rate": 8.677248677248678e-06, "loss": 0.5653, "num_tokens": 431329238.0, "step": 4589 }, { "epoch": 0.783410138248848, "grad_norm": 0.46199259006788307, "learning_rate": 8.67042157364738e-06, "loss": 0.5178, "num_tokens": 431421426.0, "step": 4590 }, { "epoch": 0.7835808158388804, "grad_norm": 0.43626465550114807, "learning_rate": 8.663594470046084e-06, "loss": 0.5674, "num_tokens": 431530721.0, "step": 4591 }, { "epoch": 0.7837514934289128, "grad_norm": 0.4425831088710978, "learning_rate": 8.656767366444786e-06, "loss": 0.6462, "num_tokens": 431648986.0, "step": 4592 }, { "epoch": 0.7839221710189452, "grad_norm": 0.5387976837963264, "learning_rate": 8.64994026284349e-06, "loss": 0.5932, "num_tokens": 431718197.0, "step": 4593 }, { "epoch": 0.7840928486089777, "grad_norm": 0.504602722676754, "learning_rate": 8.643113159242192e-06, "loss": 0.5541, "num_tokens": 431802491.0, "step": 4594 }, { "epoch": 0.7842635261990101, "grad_norm": 0.5570452574510517, "learning_rate": 8.636286055640895e-06, "loss": 0.5717, "num_tokens": 431902905.0, "step": 4595 }, { "epoch": 0.7844342037890425, "grad_norm": 0.49290517963269287, "learning_rate": 8.629458952039597e-06, "loss": 0.5333, "num_tokens": 431986921.0, "step": 4596 }, { "epoch": 0.7846048813790749, "grad_norm": 0.4430669303056807, "learning_rate": 8.622631848438301e-06, "loss": 0.5853, "num_tokens": 432103760.0, "step": 4597 }, { "epoch": 0.7847755589691073, "grad_norm": 0.43263665470350543, "learning_rate": 8.615804744837005e-06, "loss": 0.5665, "num_tokens": 432217443.0, "step": 4598 }, { "epoch": 0.7849462365591398, "grad_norm": 0.4675827908849969, "learning_rate": 8.608977641235707e-06, "loss": 0.5636, "num_tokens": 432315986.0, "step": 4599 }, { "epoch": 0.7851169141491722, "grad_norm": 0.45187513898974707, "learning_rate": 8.602150537634409e-06, "loss": 0.5939, "num_tokens": 432431178.0, "step": 4600 }, { "epoch": 0.7852875917392046, "grad_norm": 0.41866053729639074, "learning_rate": 8.595323434033113e-06, "loss": 0.5919, "num_tokens": 432566264.0, "step": 4601 }, { "epoch": 0.7854582693292371, "grad_norm": 0.4392651647741495, "learning_rate": 8.588496330431815e-06, "loss": 0.5975, "num_tokens": 432676588.0, "step": 4602 }, { "epoch": 0.7856289469192695, "grad_norm": 0.46103188378027415, "learning_rate": 8.581669226830519e-06, "loss": 0.5788, "num_tokens": 432774404.0, "step": 4603 }, { "epoch": 0.785799624509302, "grad_norm": 0.4633664233935847, "learning_rate": 8.57484212322922e-06, "loss": 0.5786, "num_tokens": 432877683.0, "step": 4604 }, { "epoch": 0.7859703020993344, "grad_norm": 0.398142534115965, "learning_rate": 8.568015019627923e-06, "loss": 0.5377, "num_tokens": 433009951.0, "step": 4605 }, { "epoch": 0.7861409796893668, "grad_norm": 0.4622303752097327, "learning_rate": 8.561187916026627e-06, "loss": 0.5903, "num_tokens": 433116374.0, "step": 4606 }, { "epoch": 0.7863116572793992, "grad_norm": 0.4454738364301083, "learning_rate": 8.55436081242533e-06, "loss": 0.5417, "num_tokens": 433217661.0, "step": 4607 }, { "epoch": 0.7864823348694316, "grad_norm": 0.5461894819053416, "learning_rate": 8.547533708824032e-06, "loss": 0.7025, "num_tokens": 433302406.0, "step": 4608 }, { "epoch": 0.7866530124594641, "grad_norm": 0.48384519476435206, "learning_rate": 8.540706605222734e-06, "loss": 0.5855, "num_tokens": 433394191.0, "step": 4609 }, { "epoch": 0.7868236900494965, "grad_norm": 0.48109797204044713, "learning_rate": 8.533879501621438e-06, "loss": 0.6413, "num_tokens": 433492501.0, "step": 4610 }, { "epoch": 0.7869943676395289, "grad_norm": 0.45452814177225837, "learning_rate": 8.52705239802014e-06, "loss": 0.5759, "num_tokens": 433589255.0, "step": 4611 }, { "epoch": 0.7871650452295613, "grad_norm": 0.4408157451697141, "learning_rate": 8.520225294418844e-06, "loss": 0.5747, "num_tokens": 433693911.0, "step": 4612 }, { "epoch": 0.7873357228195937, "grad_norm": 0.5400864311781873, "learning_rate": 8.513398190817546e-06, "loss": 0.6007, "num_tokens": 433794118.0, "step": 4613 }, { "epoch": 0.7875064004096262, "grad_norm": 0.4144863244227111, "learning_rate": 8.506571087216248e-06, "loss": 0.5568, "num_tokens": 433909925.0, "step": 4614 }, { "epoch": 0.7876770779996587, "grad_norm": 0.427347547210067, "learning_rate": 8.499743983614952e-06, "loss": 0.5611, "num_tokens": 434024463.0, "step": 4615 }, { "epoch": 0.7878477555896911, "grad_norm": 0.6020443125918318, "learning_rate": 8.492916880013656e-06, "loss": 0.6249, "num_tokens": 434108612.0, "step": 4616 }, { "epoch": 0.7880184331797235, "grad_norm": 0.43223270534319364, "learning_rate": 8.486089776412358e-06, "loss": 0.5238, "num_tokens": 434214804.0, "step": 4617 }, { "epoch": 0.788189110769756, "grad_norm": 0.4434831823321924, "learning_rate": 8.47926267281106e-06, "loss": 0.5839, "num_tokens": 434317418.0, "step": 4618 }, { "epoch": 0.7883597883597884, "grad_norm": 0.4325608888522041, "learning_rate": 8.472435569209764e-06, "loss": 0.5654, "num_tokens": 434429906.0, "step": 4619 }, { "epoch": 0.7885304659498208, "grad_norm": 0.4806149639795102, "learning_rate": 8.465608465608466e-06, "loss": 0.5797, "num_tokens": 434521263.0, "step": 4620 }, { "epoch": 0.7887011435398532, "grad_norm": 0.47374016853348877, "learning_rate": 8.45878136200717e-06, "loss": 0.5365, "num_tokens": 434606801.0, "step": 4621 }, { "epoch": 0.7888718211298856, "grad_norm": 0.5195008072318411, "learning_rate": 8.451954258405872e-06, "loss": 0.5816, "num_tokens": 434686037.0, "step": 4622 }, { "epoch": 0.789042498719918, "grad_norm": 0.46900584101829657, "learning_rate": 8.445127154804574e-06, "loss": 0.5799, "num_tokens": 434797831.0, "step": 4623 }, { "epoch": 0.7892131763099505, "grad_norm": 0.41733832761401374, "learning_rate": 8.438300051203277e-06, "loss": 0.5922, "num_tokens": 434921400.0, "step": 4624 }, { "epoch": 0.7893838538999829, "grad_norm": 0.44359222910953233, "learning_rate": 8.431472947601981e-06, "loss": 0.589, "num_tokens": 435026579.0, "step": 4625 }, { "epoch": 0.7895545314900153, "grad_norm": 0.5298700464635128, "learning_rate": 8.424645844000683e-06, "loss": 0.6014, "num_tokens": 435107128.0, "step": 4626 }, { "epoch": 0.7897252090800478, "grad_norm": 0.44638202515282044, "learning_rate": 8.417818740399385e-06, "loss": 0.5614, "num_tokens": 435215881.0, "step": 4627 }, { "epoch": 0.7898958866700803, "grad_norm": 0.5016526106542457, "learning_rate": 8.410991636798089e-06, "loss": 0.6482, "num_tokens": 435306018.0, "step": 4628 }, { "epoch": 0.7900665642601127, "grad_norm": 0.4780506967021364, "learning_rate": 8.404164533196793e-06, "loss": 0.6216, "num_tokens": 435404001.0, "step": 4629 }, { "epoch": 0.7902372418501451, "grad_norm": 0.45978715919892893, "learning_rate": 8.397337429595495e-06, "loss": 0.5413, "num_tokens": 435498817.0, "step": 4630 }, { "epoch": 0.7904079194401775, "grad_norm": 0.4357432717585096, "learning_rate": 8.390510325994199e-06, "loss": 0.5223, "num_tokens": 435599883.0, "step": 4631 }, { "epoch": 0.7905785970302099, "grad_norm": 0.4442680183572049, "learning_rate": 8.3836832223929e-06, "loss": 0.6473, "num_tokens": 435711063.0, "step": 4632 }, { "epoch": 0.7907492746202424, "grad_norm": 0.500758003283685, "learning_rate": 8.376856118791603e-06, "loss": 0.5725, "num_tokens": 435796663.0, "step": 4633 }, { "epoch": 0.7909199522102748, "grad_norm": 0.5191335675844869, "learning_rate": 8.370029015190307e-06, "loss": 0.5261, "num_tokens": 435866081.0, "step": 4634 }, { "epoch": 0.7910906298003072, "grad_norm": 0.5979566571772553, "learning_rate": 8.36320191158901e-06, "loss": 0.6544, "num_tokens": 435926609.0, "step": 4635 }, { "epoch": 0.7912613073903396, "grad_norm": 0.45726854108560566, "learning_rate": 8.356374807987712e-06, "loss": 0.5217, "num_tokens": 436015005.0, "step": 4636 }, { "epoch": 0.791431984980372, "grad_norm": 0.7927775343042611, "learning_rate": 8.349547704386414e-06, "loss": 0.5485, "num_tokens": 436108526.0, "step": 4637 }, { "epoch": 0.7916026625704045, "grad_norm": 0.44920973042342965, "learning_rate": 8.342720600785118e-06, "loss": 0.5747, "num_tokens": 436211074.0, "step": 4638 }, { "epoch": 0.791773340160437, "grad_norm": 0.4822836248328942, "learning_rate": 8.33589349718382e-06, "loss": 0.6166, "num_tokens": 436306243.0, "step": 4639 }, { "epoch": 0.7919440177504694, "grad_norm": 0.4563287753253783, "learning_rate": 8.329066393582524e-06, "loss": 0.6283, "num_tokens": 436414296.0, "step": 4640 }, { "epoch": 0.7921146953405018, "grad_norm": 0.525407627849846, "learning_rate": 8.322239289981226e-06, "loss": 0.5997, "num_tokens": 436490305.0, "step": 4641 }, { "epoch": 0.7922853729305342, "grad_norm": 0.43797166215374833, "learning_rate": 8.315412186379928e-06, "loss": 0.6034, "num_tokens": 436615076.0, "step": 4642 }, { "epoch": 0.7924560505205667, "grad_norm": 0.4659227003901962, "learning_rate": 8.308585082778632e-06, "loss": 0.5769, "num_tokens": 436708678.0, "step": 4643 }, { "epoch": 0.7926267281105991, "grad_norm": 0.4347511103543041, "learning_rate": 8.301757979177336e-06, "loss": 0.532, "num_tokens": 436810295.0, "step": 4644 }, { "epoch": 0.7927974057006315, "grad_norm": 0.4929947822057612, "learning_rate": 8.294930875576038e-06, "loss": 0.5755, "num_tokens": 436897372.0, "step": 4645 }, { "epoch": 0.7929680832906639, "grad_norm": 0.44660059342540664, "learning_rate": 8.28810377197474e-06, "loss": 0.5349, "num_tokens": 436994532.0, "step": 4646 }, { "epoch": 0.7931387608806963, "grad_norm": 0.5354149399697585, "learning_rate": 8.281276668373444e-06, "loss": 0.5593, "num_tokens": 437065092.0, "step": 4647 }, { "epoch": 0.7933094384707288, "grad_norm": 0.47844954690907116, "learning_rate": 8.274449564772146e-06, "loss": 0.5148, "num_tokens": 437148781.0, "step": 4648 }, { "epoch": 0.7934801160607612, "grad_norm": 0.5391541644331918, "learning_rate": 8.26762246117085e-06, "loss": 0.5593, "num_tokens": 437217459.0, "step": 4649 }, { "epoch": 0.7936507936507936, "grad_norm": 0.4749223293470301, "learning_rate": 8.260795357569551e-06, "loss": 0.6505, "num_tokens": 437329320.0, "step": 4650 }, { "epoch": 0.793821471240826, "grad_norm": 0.43046038306084167, "learning_rate": 8.253968253968254e-06, "loss": 0.5078, "num_tokens": 437431474.0, "step": 4651 }, { "epoch": 0.7939921488308586, "grad_norm": 0.45041606611280705, "learning_rate": 8.247141150366957e-06, "loss": 0.6076, "num_tokens": 437555576.0, "step": 4652 }, { "epoch": 0.794162826420891, "grad_norm": 0.5467136829624468, "learning_rate": 8.240314046765661e-06, "loss": 0.6847, "num_tokens": 437639255.0, "step": 4653 }, { "epoch": 0.7943335040109234, "grad_norm": 0.5155956774945846, "learning_rate": 8.233486943164363e-06, "loss": 0.5401, "num_tokens": 437709778.0, "step": 4654 }, { "epoch": 0.7945041816009558, "grad_norm": 0.45093491332247954, "learning_rate": 8.226659839563065e-06, "loss": 0.518, "num_tokens": 437812022.0, "step": 4655 }, { "epoch": 0.7946748591909882, "grad_norm": 0.4870641646383607, "learning_rate": 8.219832735961769e-06, "loss": 0.5366, "num_tokens": 437891770.0, "step": 4656 }, { "epoch": 0.7948455367810207, "grad_norm": 0.49134484889626057, "learning_rate": 8.213005632360471e-06, "loss": 0.6033, "num_tokens": 437988305.0, "step": 4657 }, { "epoch": 0.7950162143710531, "grad_norm": 0.44440244613634977, "learning_rate": 8.206178528759175e-06, "loss": 0.5702, "num_tokens": 438091870.0, "step": 4658 }, { "epoch": 0.7951868919610855, "grad_norm": 0.5268637119108853, "learning_rate": 8.199351425157877e-06, "loss": 0.5373, "num_tokens": 438158229.0, "step": 4659 }, { "epoch": 0.7953575695511179, "grad_norm": 0.5775655646019227, "learning_rate": 8.19252432155658e-06, "loss": 0.5918, "num_tokens": 438217052.0, "step": 4660 }, { "epoch": 0.7955282471411503, "grad_norm": 0.5024238776609122, "learning_rate": 8.185697217955283e-06, "loss": 0.6908, "num_tokens": 438321610.0, "step": 4661 }, { "epoch": 0.7956989247311828, "grad_norm": 0.43373112751671394, "learning_rate": 8.178870114353986e-06, "loss": 0.4988, "num_tokens": 438428646.0, "step": 4662 }, { "epoch": 0.7958696023212152, "grad_norm": 0.43670897010782955, "learning_rate": 8.172043010752689e-06, "loss": 0.5892, "num_tokens": 438537130.0, "step": 4663 }, { "epoch": 0.7960402799112477, "grad_norm": 0.4863851549844351, "learning_rate": 8.165215907151392e-06, "loss": 0.5321, "num_tokens": 438616370.0, "step": 4664 }, { "epoch": 0.7962109575012801, "grad_norm": 0.4279499308461102, "learning_rate": 8.158388803550094e-06, "loss": 0.583, "num_tokens": 438731017.0, "step": 4665 }, { "epoch": 0.7963816350913125, "grad_norm": 0.43177232703506974, "learning_rate": 8.151561699948798e-06, "loss": 0.6101, "num_tokens": 438850451.0, "step": 4666 }, { "epoch": 0.796552312681345, "grad_norm": 0.47255085694124493, "learning_rate": 8.1447345963475e-06, "loss": 0.6493, "num_tokens": 438954493.0, "step": 4667 }, { "epoch": 0.7967229902713774, "grad_norm": 0.5000918217271694, "learning_rate": 8.137907492746204e-06, "loss": 0.5678, "num_tokens": 439033174.0, "step": 4668 }, { "epoch": 0.7968936678614098, "grad_norm": 0.43051004983527585, "learning_rate": 8.131080389144906e-06, "loss": 0.552, "num_tokens": 439146241.0, "step": 4669 }, { "epoch": 0.7970643454514422, "grad_norm": 0.46971760806463425, "learning_rate": 8.124253285543608e-06, "loss": 0.5236, "num_tokens": 439246240.0, "step": 4670 }, { "epoch": 0.7972350230414746, "grad_norm": 0.4660851937776103, "learning_rate": 8.117426181942312e-06, "loss": 0.6093, "num_tokens": 439349486.0, "step": 4671 }, { "epoch": 0.7974057006315071, "grad_norm": 0.4979952260345353, "learning_rate": 8.110599078341016e-06, "loss": 0.559, "num_tokens": 439430743.0, "step": 4672 }, { "epoch": 0.7975763782215395, "grad_norm": 0.42757577792360335, "learning_rate": 8.103771974739718e-06, "loss": 0.5109, "num_tokens": 439535835.0, "step": 4673 }, { "epoch": 0.7977470558115719, "grad_norm": 0.44250245049017345, "learning_rate": 8.09694487113842e-06, "loss": 0.5813, "num_tokens": 439647673.0, "step": 4674 }, { "epoch": 0.7979177334016043, "grad_norm": 0.5105691675240714, "learning_rate": 8.090117767537124e-06, "loss": 0.5487, "num_tokens": 439718961.0, "step": 4675 }, { "epoch": 0.7980884109916367, "grad_norm": 0.4554244975395649, "learning_rate": 8.083290663935826e-06, "loss": 0.5466, "num_tokens": 439813555.0, "step": 4676 }, { "epoch": 0.7982590885816693, "grad_norm": 0.39854777839388333, "learning_rate": 8.07646356033453e-06, "loss": 0.5492, "num_tokens": 439941597.0, "step": 4677 }, { "epoch": 0.7984297661717017, "grad_norm": 0.5701114977765167, "learning_rate": 8.069636456733231e-06, "loss": 0.646, "num_tokens": 440015890.0, "step": 4678 }, { "epoch": 0.7986004437617341, "grad_norm": 0.5250475357626101, "learning_rate": 8.062809353131933e-06, "loss": 0.5593, "num_tokens": 440085639.0, "step": 4679 }, { "epoch": 0.7987711213517665, "grad_norm": 0.5205267324314233, "learning_rate": 8.055982249530637e-06, "loss": 0.4823, "num_tokens": 440157518.0, "step": 4680 }, { "epoch": 0.798941798941799, "grad_norm": 0.49875640470294186, "learning_rate": 8.049155145929341e-06, "loss": 0.5638, "num_tokens": 440239322.0, "step": 4681 }, { "epoch": 0.7991124765318314, "grad_norm": 0.477046606877634, "learning_rate": 8.042328042328043e-06, "loss": 0.5859, "num_tokens": 440333845.0, "step": 4682 }, { "epoch": 0.7992831541218638, "grad_norm": 0.4932278481156632, "learning_rate": 8.035500938726745e-06, "loss": 0.5605, "num_tokens": 440419039.0, "step": 4683 }, { "epoch": 0.7994538317118962, "grad_norm": 0.5319684286051337, "learning_rate": 8.028673835125449e-06, "loss": 0.6639, "num_tokens": 440503380.0, "step": 4684 }, { "epoch": 0.7996245093019286, "grad_norm": 0.4612513862187699, "learning_rate": 8.021846731524151e-06, "loss": 0.5075, "num_tokens": 440593883.0, "step": 4685 }, { "epoch": 0.799795186891961, "grad_norm": 0.4674137788653919, "learning_rate": 8.015019627922855e-06, "loss": 0.557, "num_tokens": 440680930.0, "step": 4686 }, { "epoch": 0.7999658644819935, "grad_norm": 0.5515140314394225, "learning_rate": 8.008192524321557e-06, "loss": 0.6105, "num_tokens": 440750111.0, "step": 4687 }, { "epoch": 0.8001365420720259, "grad_norm": 0.4320931635748151, "learning_rate": 8.001365420720259e-06, "loss": 0.4867, "num_tokens": 440850875.0, "step": 4688 }, { "epoch": 0.8003072196620584, "grad_norm": 0.4793732479138382, "learning_rate": 7.994538317118963e-06, "loss": 0.5439, "num_tokens": 440931081.0, "step": 4689 }, { "epoch": 0.8004778972520908, "grad_norm": 0.4896389467587396, "learning_rate": 7.987711213517666e-06, "loss": 0.5816, "num_tokens": 441022067.0, "step": 4690 }, { "epoch": 0.8006485748421233, "grad_norm": 0.45087923104646044, "learning_rate": 7.980884109916368e-06, "loss": 0.5101, "num_tokens": 441112705.0, "step": 4691 }, { "epoch": 0.8008192524321557, "grad_norm": 0.4434833666927645, "learning_rate": 7.97405700631507e-06, "loss": 0.593, "num_tokens": 441217313.0, "step": 4692 }, { "epoch": 0.8009899300221881, "grad_norm": 0.47553182839320374, "learning_rate": 7.967229902713774e-06, "loss": 0.6504, "num_tokens": 441319193.0, "step": 4693 }, { "epoch": 0.8011606076122205, "grad_norm": 0.5653476793275856, "learning_rate": 7.960402799112478e-06, "loss": 0.6404, "num_tokens": 441392778.0, "step": 4694 }, { "epoch": 0.8013312852022529, "grad_norm": 0.46150968279771093, "learning_rate": 7.95357569551118e-06, "loss": 0.5816, "num_tokens": 441496001.0, "step": 4695 }, { "epoch": 0.8015019627922854, "grad_norm": 0.5034864622357038, "learning_rate": 7.946748591909882e-06, "loss": 0.5516, "num_tokens": 441584091.0, "step": 4696 }, { "epoch": 0.8016726403823178, "grad_norm": 0.4820755897959334, "learning_rate": 7.939921488308586e-06, "loss": 0.5819, "num_tokens": 441679608.0, "step": 4697 }, { "epoch": 0.8018433179723502, "grad_norm": 0.45651068791894567, "learning_rate": 7.933094384707288e-06, "loss": 0.485, "num_tokens": 441771699.0, "step": 4698 }, { "epoch": 0.8020139955623826, "grad_norm": 0.5371188820143241, "learning_rate": 7.926267281105992e-06, "loss": 0.6009, "num_tokens": 441840387.0, "step": 4699 }, { "epoch": 0.802184673152415, "grad_norm": 0.4347976007642283, "learning_rate": 7.919440177504696e-06, "loss": 0.6039, "num_tokens": 441953085.0, "step": 4700 }, { "epoch": 0.8023553507424476, "grad_norm": 0.43895848824732425, "learning_rate": 7.912613073903398e-06, "loss": 0.5554, "num_tokens": 442069085.0, "step": 4701 }, { "epoch": 0.80252602833248, "grad_norm": 0.4850531153241374, "learning_rate": 7.9057859703021e-06, "loss": 0.6859, "num_tokens": 442174850.0, "step": 4702 }, { "epoch": 0.8026967059225124, "grad_norm": 0.49048536742539856, "learning_rate": 7.898958866700803e-06, "loss": 0.5693, "num_tokens": 442257749.0, "step": 4703 }, { "epoch": 0.8028673835125448, "grad_norm": 0.456120240880761, "learning_rate": 7.892131763099506e-06, "loss": 0.5, "num_tokens": 442351312.0, "step": 4704 }, { "epoch": 0.8030380611025772, "grad_norm": 0.4851290433809586, "learning_rate": 7.88530465949821e-06, "loss": 0.5998, "num_tokens": 442437700.0, "step": 4705 }, { "epoch": 0.8032087386926097, "grad_norm": 0.48505172014800363, "learning_rate": 7.878477555896911e-06, "loss": 0.6141, "num_tokens": 442530534.0, "step": 4706 }, { "epoch": 0.8033794162826421, "grad_norm": 0.46628612721411083, "learning_rate": 7.871650452295613e-06, "loss": 0.6024, "num_tokens": 442628867.0, "step": 4707 }, { "epoch": 0.8035500938726745, "grad_norm": 0.4085305613049359, "learning_rate": 7.864823348694317e-06, "loss": 0.5818, "num_tokens": 442755406.0, "step": 4708 }, { "epoch": 0.8037207714627069, "grad_norm": 0.45995335142639193, "learning_rate": 7.857996245093021e-06, "loss": 0.5868, "num_tokens": 442854076.0, "step": 4709 }, { "epoch": 0.8038914490527393, "grad_norm": 0.49194037595173723, "learning_rate": 7.851169141491723e-06, "loss": 0.6284, "num_tokens": 442949230.0, "step": 4710 }, { "epoch": 0.8040621266427718, "grad_norm": 0.5070487707463428, "learning_rate": 7.844342037890425e-06, "loss": 0.6026, "num_tokens": 443034232.0, "step": 4711 }, { "epoch": 0.8042328042328042, "grad_norm": 0.6064606937520076, "learning_rate": 7.837514934289129e-06, "loss": 0.6276, "num_tokens": 443090295.0, "step": 4712 }, { "epoch": 0.8044034818228366, "grad_norm": 0.4822728047704988, "learning_rate": 7.830687830687831e-06, "loss": 0.581, "num_tokens": 443191475.0, "step": 4713 }, { "epoch": 0.8045741594128691, "grad_norm": 0.45686565761466347, "learning_rate": 7.823860727086535e-06, "loss": 0.548, "num_tokens": 443291806.0, "step": 4714 }, { "epoch": 0.8047448370029016, "grad_norm": 0.5078433389124268, "learning_rate": 7.817033623485237e-06, "loss": 0.6716, "num_tokens": 443392935.0, "step": 4715 }, { "epoch": 0.804915514592934, "grad_norm": 0.5297330657328968, "learning_rate": 7.810206519883939e-06, "loss": 0.6122, "num_tokens": 443503219.0, "step": 4716 }, { "epoch": 0.8050861921829664, "grad_norm": 0.4442754311519693, "learning_rate": 7.803379416282643e-06, "loss": 0.6364, "num_tokens": 443633907.0, "step": 4717 }, { "epoch": 0.8052568697729988, "grad_norm": 0.44523658274735073, "learning_rate": 7.796552312681346e-06, "loss": 0.5387, "num_tokens": 443732581.0, "step": 4718 }, { "epoch": 0.8054275473630312, "grad_norm": 0.4668061364369091, "learning_rate": 7.789725209080048e-06, "loss": 0.5953, "num_tokens": 443842089.0, "step": 4719 }, { "epoch": 0.8055982249530637, "grad_norm": 0.4527338502699434, "learning_rate": 7.78289810547875e-06, "loss": 0.5782, "num_tokens": 443948404.0, "step": 4720 }, { "epoch": 0.8057689025430961, "grad_norm": 0.4349686086567656, "learning_rate": 7.776071001877454e-06, "loss": 0.5588, "num_tokens": 444064646.0, "step": 4721 }, { "epoch": 0.8059395801331285, "grad_norm": 0.4178395799324611, "learning_rate": 7.769243898276156e-06, "loss": 0.505, "num_tokens": 444177487.0, "step": 4722 }, { "epoch": 0.8061102577231609, "grad_norm": 0.5492339505493958, "learning_rate": 7.76241679467486e-06, "loss": 0.5744, "num_tokens": 444244843.0, "step": 4723 }, { "epoch": 0.8062809353131933, "grad_norm": 0.5777868756945832, "learning_rate": 7.755589691073562e-06, "loss": 0.6124, "num_tokens": 444313789.0, "step": 4724 }, { "epoch": 0.8064516129032258, "grad_norm": 0.5660753830103177, "learning_rate": 7.748762587472266e-06, "loss": 0.648, "num_tokens": 444451307.0, "step": 4725 }, { "epoch": 0.8066222904932583, "grad_norm": 0.565788730801703, "learning_rate": 7.741935483870968e-06, "loss": 0.5554, "num_tokens": 444559978.0, "step": 4726 }, { "epoch": 0.8067929680832907, "grad_norm": 0.5077881760250416, "learning_rate": 7.735108380269672e-06, "loss": 0.5967, "num_tokens": 444645394.0, "step": 4727 }, { "epoch": 0.8069636456733231, "grad_norm": 0.5251965347626029, "learning_rate": 7.728281276668374e-06, "loss": 0.5582, "num_tokens": 444713209.0, "step": 4728 }, { "epoch": 0.8071343232633555, "grad_norm": 0.44665513693365444, "learning_rate": 7.721454173067076e-06, "loss": 0.6364, "num_tokens": 444835380.0, "step": 4729 }, { "epoch": 0.807305000853388, "grad_norm": 0.5262151675491631, "learning_rate": 7.71462706946578e-06, "loss": 0.6032, "num_tokens": 444910532.0, "step": 4730 }, { "epoch": 0.8074756784434204, "grad_norm": 0.4406408462039059, "learning_rate": 7.707799965864483e-06, "loss": 0.5854, "num_tokens": 445021087.0, "step": 4731 }, { "epoch": 0.8076463560334528, "grad_norm": 0.48509872502605433, "learning_rate": 7.700972862263185e-06, "loss": 0.5532, "num_tokens": 445112228.0, "step": 4732 }, { "epoch": 0.8078170336234852, "grad_norm": 0.5318598670696898, "learning_rate": 7.694145758661888e-06, "loss": 0.5411, "num_tokens": 445186727.0, "step": 4733 }, { "epoch": 0.8079877112135176, "grad_norm": 0.46175591129573507, "learning_rate": 7.687318655060591e-06, "loss": 0.5565, "num_tokens": 445278887.0, "step": 4734 }, { "epoch": 0.8081583888035501, "grad_norm": 0.5237995150968546, "learning_rate": 7.680491551459293e-06, "loss": 0.6402, "num_tokens": 445365170.0, "step": 4735 }, { "epoch": 0.8083290663935825, "grad_norm": 0.5204115640945969, "learning_rate": 7.673664447857997e-06, "loss": 0.592, "num_tokens": 445448142.0, "step": 4736 }, { "epoch": 0.8084997439836149, "grad_norm": 0.46033575180774333, "learning_rate": 7.666837344256701e-06, "loss": 0.5187, "num_tokens": 445540187.0, "step": 4737 }, { "epoch": 0.8086704215736474, "grad_norm": 0.5414392392584082, "learning_rate": 7.660010240655403e-06, "loss": 0.5108, "num_tokens": 445600929.0, "step": 4738 }, { "epoch": 0.8088410991636799, "grad_norm": 0.49365812499685696, "learning_rate": 7.653183137054105e-06, "loss": 0.5595, "num_tokens": 445687049.0, "step": 4739 }, { "epoch": 0.8090117767537123, "grad_norm": 0.4674204671236959, "learning_rate": 7.646356033452809e-06, "loss": 0.5302, "num_tokens": 445779468.0, "step": 4740 }, { "epoch": 0.8091824543437447, "grad_norm": 0.463834357586098, "learning_rate": 7.639528929851511e-06, "loss": 0.5908, "num_tokens": 445885688.0, "step": 4741 }, { "epoch": 0.8093531319337771, "grad_norm": 0.5180836879607043, "learning_rate": 7.632701826250215e-06, "loss": 0.5983, "num_tokens": 445966849.0, "step": 4742 }, { "epoch": 0.8095238095238095, "grad_norm": 0.43733858473361725, "learning_rate": 7.6258747226489176e-06, "loss": 0.5648, "num_tokens": 446069949.0, "step": 4743 }, { "epoch": 0.809694487113842, "grad_norm": 0.421098033902334, "learning_rate": 7.61904761904762e-06, "loss": 0.5108, "num_tokens": 446174476.0, "step": 4744 }, { "epoch": 0.8098651647038744, "grad_norm": 0.45775977878478563, "learning_rate": 7.6122205154463225e-06, "loss": 0.5109, "num_tokens": 446265575.0, "step": 4745 }, { "epoch": 0.8100358422939068, "grad_norm": 0.48272595996728374, "learning_rate": 7.6053934118450255e-06, "loss": 0.6086, "num_tokens": 446381498.0, "step": 4746 }, { "epoch": 0.8102065198839392, "grad_norm": 0.4120180210779112, "learning_rate": 7.5985663082437275e-06, "loss": 0.5533, "num_tokens": 446504853.0, "step": 4747 }, { "epoch": 0.8103771974739716, "grad_norm": 0.4482648690972832, "learning_rate": 7.591739204642431e-06, "loss": 0.5726, "num_tokens": 446615737.0, "step": 4748 }, { "epoch": 0.810547875064004, "grad_norm": 0.44542807746670415, "learning_rate": 7.584912101041134e-06, "loss": 0.4972, "num_tokens": 446709191.0, "step": 4749 }, { "epoch": 0.8107185526540365, "grad_norm": 0.5362469969304422, "learning_rate": 7.578084997439836e-06, "loss": 0.6435, "num_tokens": 446786360.0, "step": 4750 }, { "epoch": 0.810889230244069, "grad_norm": 0.5082461474855435, "learning_rate": 7.571257893838539e-06, "loss": 0.4849, "num_tokens": 446861908.0, "step": 4751 }, { "epoch": 0.8110599078341014, "grad_norm": 0.4681072380534358, "learning_rate": 7.564430790237243e-06, "loss": 0.5772, "num_tokens": 446970830.0, "step": 4752 }, { "epoch": 0.8112305854241338, "grad_norm": 0.5151206928149785, "learning_rate": 7.557603686635945e-06, "loss": 0.6221, "num_tokens": 447052470.0, "step": 4753 }, { "epoch": 0.8114012630141663, "grad_norm": 0.43845125865735257, "learning_rate": 7.550776583034648e-06, "loss": 0.5225, "num_tokens": 447153197.0, "step": 4754 }, { "epoch": 0.8115719406041987, "grad_norm": 0.4815680323746247, "learning_rate": 7.543949479433351e-06, "loss": 0.6382, "num_tokens": 447250490.0, "step": 4755 }, { "epoch": 0.8117426181942311, "grad_norm": 0.4961296233278673, "learning_rate": 7.537122375832055e-06, "loss": 0.5405, "num_tokens": 447347047.0, "step": 4756 }, { "epoch": 0.8119132957842635, "grad_norm": 0.4658964199326391, "learning_rate": 7.530295272230757e-06, "loss": 0.5568, "num_tokens": 447430936.0, "step": 4757 }, { "epoch": 0.8120839733742959, "grad_norm": 0.5062694397834623, "learning_rate": 7.52346816862946e-06, "loss": 0.6169, "num_tokens": 447514403.0, "step": 4758 }, { "epoch": 0.8122546509643284, "grad_norm": 0.45468790640144185, "learning_rate": 7.5166410650281625e-06, "loss": 0.549, "num_tokens": 447608957.0, "step": 4759 }, { "epoch": 0.8124253285543608, "grad_norm": 0.5002290619327998, "learning_rate": 7.5098139614268654e-06, "loss": 0.576, "num_tokens": 447691666.0, "step": 4760 }, { "epoch": 0.8125960061443932, "grad_norm": 0.5088123938297505, "learning_rate": 7.502986857825568e-06, "loss": 0.5503, "num_tokens": 447762242.0, "step": 4761 }, { "epoch": 0.8127666837344256, "grad_norm": 0.47884012155608563, "learning_rate": 7.496159754224271e-06, "loss": 0.558, "num_tokens": 447855470.0, "step": 4762 }, { "epoch": 0.8129373613244582, "grad_norm": 0.4742363471727603, "learning_rate": 7.489332650622973e-06, "loss": 0.5201, "num_tokens": 447939417.0, "step": 4763 }, { "epoch": 0.8131080389144906, "grad_norm": 0.4493923326126821, "learning_rate": 7.482505547021677e-06, "loss": 0.607, "num_tokens": 448048856.0, "step": 4764 }, { "epoch": 0.813278716504523, "grad_norm": 0.47240501095533605, "learning_rate": 7.47567844342038e-06, "loss": 0.5236, "num_tokens": 448138324.0, "step": 4765 }, { "epoch": 0.8134493940945554, "grad_norm": 0.5250939988068586, "learning_rate": 7.468851339819082e-06, "loss": 0.5509, "num_tokens": 448210602.0, "step": 4766 }, { "epoch": 0.8136200716845878, "grad_norm": 0.42710569176693125, "learning_rate": 7.462024236217785e-06, "loss": 0.5748, "num_tokens": 448320843.0, "step": 4767 }, { "epoch": 0.8137907492746203, "grad_norm": 0.4589262589434066, "learning_rate": 7.455197132616489e-06, "loss": 0.614, "num_tokens": 448425836.0, "step": 4768 }, { "epoch": 0.8139614268646527, "grad_norm": 0.4211510730261692, "learning_rate": 7.448370029015191e-06, "loss": 0.5238, "num_tokens": 448539891.0, "step": 4769 }, { "epoch": 0.8141321044546851, "grad_norm": 0.4820658228150474, "learning_rate": 7.441542925413894e-06, "loss": 0.6199, "num_tokens": 448636263.0, "step": 4770 }, { "epoch": 0.8143027820447175, "grad_norm": 0.4693880267045123, "learning_rate": 7.434715821812597e-06, "loss": 0.4356, "num_tokens": 448710316.0, "step": 4771 }, { "epoch": 0.8144734596347499, "grad_norm": 0.4330983283124619, "learning_rate": 7.427888718211299e-06, "loss": 0.5924, "num_tokens": 448827997.0, "step": 4772 }, { "epoch": 0.8146441372247823, "grad_norm": 0.48223230571088793, "learning_rate": 7.4210616146100025e-06, "loss": 0.5274, "num_tokens": 448914434.0, "step": 4773 }, { "epoch": 0.8148148148148148, "grad_norm": 0.504133452978231, "learning_rate": 7.414234511008705e-06, "loss": 0.5232, "num_tokens": 448988285.0, "step": 4774 }, { "epoch": 0.8149854924048472, "grad_norm": 0.463838356889875, "learning_rate": 7.4074074074074075e-06, "loss": 0.5589, "num_tokens": 449082330.0, "step": 4775 }, { "epoch": 0.8151561699948797, "grad_norm": 0.4692978057266646, "learning_rate": 7.40058030380611e-06, "loss": 0.5358, "num_tokens": 449168330.0, "step": 4776 }, { "epoch": 0.8153268475849121, "grad_norm": 0.4728371355760739, "learning_rate": 7.393753200204814e-06, "loss": 0.5117, "num_tokens": 449252821.0, "step": 4777 }, { "epoch": 0.8154975251749446, "grad_norm": 0.4318585181077747, "learning_rate": 7.386926096603516e-06, "loss": 0.5939, "num_tokens": 449371770.0, "step": 4778 }, { "epoch": 0.815668202764977, "grad_norm": 0.4787517666440525, "learning_rate": 7.380098993002219e-06, "loss": 0.5661, "num_tokens": 449495511.0, "step": 4779 }, { "epoch": 0.8158388803550094, "grad_norm": 0.4653853957540983, "learning_rate": 7.373271889400923e-06, "loss": 0.561, "num_tokens": 449594169.0, "step": 4780 }, { "epoch": 0.8160095579450418, "grad_norm": 0.5402013617270179, "learning_rate": 7.366444785799625e-06, "loss": 0.5873, "num_tokens": 449665113.0, "step": 4781 }, { "epoch": 0.8161802355350742, "grad_norm": 0.42668335797631934, "learning_rate": 7.359617682198328e-06, "loss": 0.5343, "num_tokens": 449772529.0, "step": 4782 }, { "epoch": 0.8163509131251067, "grad_norm": 0.5206181911701467, "learning_rate": 7.352790578597031e-06, "loss": 0.5275, "num_tokens": 449846623.0, "step": 4783 }, { "epoch": 0.8165215907151391, "grad_norm": 0.5151588232810184, "learning_rate": 7.345963474995733e-06, "loss": 0.5633, "num_tokens": 449926163.0, "step": 4784 }, { "epoch": 0.8166922683051715, "grad_norm": 0.5410831794574849, "learning_rate": 7.339136371394437e-06, "loss": 0.6205, "num_tokens": 450012887.0, "step": 4785 }, { "epoch": 0.8168629458952039, "grad_norm": 0.43468436465328264, "learning_rate": 7.3323092677931396e-06, "loss": 0.4892, "num_tokens": 450113031.0, "step": 4786 }, { "epoch": 0.8170336234852363, "grad_norm": 0.4445283319759447, "learning_rate": 7.325482164191842e-06, "loss": 0.5436, "num_tokens": 450215776.0, "step": 4787 }, { "epoch": 0.8172043010752689, "grad_norm": 0.4497836397792133, "learning_rate": 7.3186550605905445e-06, "loss": 0.5622, "num_tokens": 450315234.0, "step": 4788 }, { "epoch": 0.8173749786653013, "grad_norm": 0.4964352188705711, "learning_rate": 7.311827956989248e-06, "loss": 0.664, "num_tokens": 450406358.0, "step": 4789 }, { "epoch": 0.8175456562553337, "grad_norm": 0.4685021885322408, "learning_rate": 7.305000853387951e-06, "loss": 0.58, "num_tokens": 450497615.0, "step": 4790 }, { "epoch": 0.8177163338453661, "grad_norm": 0.5070255442682443, "learning_rate": 7.298173749786653e-06, "loss": 0.5447, "num_tokens": 450577276.0, "step": 4791 }, { "epoch": 0.8178870114353985, "grad_norm": 0.4500326196025653, "learning_rate": 7.291346646185356e-06, "loss": 0.5952, "num_tokens": 450688711.0, "step": 4792 }, { "epoch": 0.818057689025431, "grad_norm": 0.4642506042277794, "learning_rate": 7.28451954258406e-06, "loss": 0.5338, "num_tokens": 450793962.0, "step": 4793 }, { "epoch": 0.8182283666154634, "grad_norm": 0.4404146776648589, "learning_rate": 7.277692438982762e-06, "loss": 0.5637, "num_tokens": 450899438.0, "step": 4794 }, { "epoch": 0.8183990442054958, "grad_norm": 0.4531786457009963, "learning_rate": 7.270865335381465e-06, "loss": 0.5399, "num_tokens": 450997663.0, "step": 4795 }, { "epoch": 0.8185697217955282, "grad_norm": 0.4559654845396106, "learning_rate": 7.264038231780169e-06, "loss": 0.6374, "num_tokens": 451111021.0, "step": 4796 }, { "epoch": 0.8187403993855606, "grad_norm": 0.45223797694737833, "learning_rate": 7.257211128178871e-06, "loss": 0.6404, "num_tokens": 451220567.0, "step": 4797 }, { "epoch": 0.8189110769755931, "grad_norm": 0.5659478820177348, "learning_rate": 7.250384024577574e-06, "loss": 0.5707, "num_tokens": 451285684.0, "step": 4798 }, { "epoch": 0.8190817545656255, "grad_norm": 0.4936199028734154, "learning_rate": 7.243556920976277e-06, "loss": 0.6067, "num_tokens": 451375747.0, "step": 4799 }, { "epoch": 0.819252432155658, "grad_norm": 0.4898395981698786, "learning_rate": 7.236729817374979e-06, "loss": 0.6498, "num_tokens": 451476131.0, "step": 4800 }, { "epoch": 0.8194231097456904, "grad_norm": 0.42581410750767, "learning_rate": 7.2299027137736824e-06, "loss": 0.6105, "num_tokens": 451602384.0, "step": 4801 }, { "epoch": 0.8195937873357229, "grad_norm": 0.4688277714931043, "learning_rate": 7.223075610172385e-06, "loss": 0.5278, "num_tokens": 451689428.0, "step": 4802 }, { "epoch": 0.8197644649257553, "grad_norm": 0.47243035454378807, "learning_rate": 7.2162485065710874e-06, "loss": 0.6396, "num_tokens": 451793984.0, "step": 4803 }, { "epoch": 0.8199351425157877, "grad_norm": 0.5004576418263701, "learning_rate": 7.20942140296979e-06, "loss": 0.5978, "num_tokens": 451875464.0, "step": 4804 }, { "epoch": 0.8201058201058201, "grad_norm": 0.4556979668009154, "learning_rate": 7.202594299368494e-06, "loss": 0.5326, "num_tokens": 451970819.0, "step": 4805 }, { "epoch": 0.8202764976958525, "grad_norm": 0.5140390685139122, "learning_rate": 7.195767195767196e-06, "loss": 0.6391, "num_tokens": 452050765.0, "step": 4806 }, { "epoch": 0.820447175285885, "grad_norm": 0.46058445237963563, "learning_rate": 7.188940092165899e-06, "loss": 0.4989, "num_tokens": 452139203.0, "step": 4807 }, { "epoch": 0.8206178528759174, "grad_norm": 0.528945618369798, "learning_rate": 7.182112988564602e-06, "loss": 0.6143, "num_tokens": 452210812.0, "step": 4808 }, { "epoch": 0.8207885304659498, "grad_norm": 0.798434653757783, "learning_rate": 7.175285884963304e-06, "loss": 0.615, "num_tokens": 452327109.0, "step": 4809 }, { "epoch": 0.8209592080559822, "grad_norm": 0.4716228158056162, "learning_rate": 7.168458781362008e-06, "loss": 0.6463, "num_tokens": 452431972.0, "step": 4810 }, { "epoch": 0.8211298856460146, "grad_norm": 0.6051937926872181, "learning_rate": 7.161631677760711e-06, "loss": 0.5733, "num_tokens": 452537370.0, "step": 4811 }, { "epoch": 0.821300563236047, "grad_norm": 0.45049765504540507, "learning_rate": 7.154804574159413e-06, "loss": 0.5656, "num_tokens": 452639287.0, "step": 4812 }, { "epoch": 0.8214712408260796, "grad_norm": 0.46519679103622785, "learning_rate": 7.147977470558117e-06, "loss": 0.4908, "num_tokens": 452722759.0, "step": 4813 }, { "epoch": 0.821641918416112, "grad_norm": 0.5351894101038412, "learning_rate": 7.1411503669568195e-06, "loss": 0.5778, "num_tokens": 452787782.0, "step": 4814 }, { "epoch": 0.8218125960061444, "grad_norm": 0.5246186968868738, "learning_rate": 7.1343232633555216e-06, "loss": 0.5896, "num_tokens": 452871296.0, "step": 4815 }, { "epoch": 0.8219832735961768, "grad_norm": 0.4446435270397607, "learning_rate": 7.1274961597542245e-06, "loss": 0.5902, "num_tokens": 452978802.0, "step": 4816 }, { "epoch": 0.8221539511862093, "grad_norm": 0.48912288703458223, "learning_rate": 7.120669056152928e-06, "loss": 0.5867, "num_tokens": 453077100.0, "step": 4817 }, { "epoch": 0.8223246287762417, "grad_norm": 0.43483011911778546, "learning_rate": 7.11384195255163e-06, "loss": 0.5679, "num_tokens": 453191106.0, "step": 4818 }, { "epoch": 0.8224953063662741, "grad_norm": 0.5466205658766324, "learning_rate": 7.107014848950333e-06, "loss": 0.622, "num_tokens": 453263488.0, "step": 4819 }, { "epoch": 0.8226659839563065, "grad_norm": 0.496155463466404, "learning_rate": 7.100187745349036e-06, "loss": 0.6355, "num_tokens": 453356325.0, "step": 4820 }, { "epoch": 0.8228366615463389, "grad_norm": 0.4858798399221191, "learning_rate": 7.09336064174774e-06, "loss": 0.5324, "num_tokens": 453443711.0, "step": 4821 }, { "epoch": 0.8230073391363714, "grad_norm": 0.548845262048738, "learning_rate": 7.086533538146442e-06, "loss": 0.59, "num_tokens": 453512275.0, "step": 4822 }, { "epoch": 0.8231780167264038, "grad_norm": 0.5187960184066911, "learning_rate": 7.079706434545145e-06, "loss": 0.6097, "num_tokens": 453590499.0, "step": 4823 }, { "epoch": 0.8233486943164362, "grad_norm": 0.4898898060708721, "learning_rate": 7.072879330943848e-06, "loss": 0.5171, "num_tokens": 453669329.0, "step": 4824 }, { "epoch": 0.8235193719064687, "grad_norm": 0.465907450035548, "learning_rate": 7.06605222734255e-06, "loss": 0.4843, "num_tokens": 453767454.0, "step": 4825 }, { "epoch": 0.8236900494965012, "grad_norm": 0.536875510762242, "learning_rate": 7.059225123741254e-06, "loss": 0.5441, "num_tokens": 453830766.0, "step": 4826 }, { "epoch": 0.8238607270865336, "grad_norm": 0.49558978514735863, "learning_rate": 7.0523980201399566e-06, "loss": 0.6641, "num_tokens": 453928248.0, "step": 4827 }, { "epoch": 0.824031404676566, "grad_norm": 0.47327679725439853, "learning_rate": 7.045570916538659e-06, "loss": 0.5421, "num_tokens": 454036402.0, "step": 4828 }, { "epoch": 0.8242020822665984, "grad_norm": 0.49705718570462404, "learning_rate": 7.038743812937362e-06, "loss": 0.5982, "num_tokens": 454123024.0, "step": 4829 }, { "epoch": 0.8243727598566308, "grad_norm": 0.5111429114746827, "learning_rate": 7.031916709336065e-06, "loss": 0.6565, "num_tokens": 454210331.0, "step": 4830 }, { "epoch": 0.8245434374466633, "grad_norm": 0.45674295683881294, "learning_rate": 7.025089605734767e-06, "loss": 0.4613, "num_tokens": 454291839.0, "step": 4831 }, { "epoch": 0.8247141150366957, "grad_norm": 0.4588941386872689, "learning_rate": 7.01826250213347e-06, "loss": 0.5411, "num_tokens": 454382421.0, "step": 4832 }, { "epoch": 0.8248847926267281, "grad_norm": 0.43356609698131476, "learning_rate": 7.011435398532174e-06, "loss": 0.553, "num_tokens": 454489080.0, "step": 4833 }, { "epoch": 0.8250554702167605, "grad_norm": 0.4171367892860194, "learning_rate": 7.004608294930876e-06, "loss": 0.5738, "num_tokens": 454611396.0, "step": 4834 }, { "epoch": 0.8252261478067929, "grad_norm": 0.466673602098529, "learning_rate": 6.997781191329579e-06, "loss": 0.5307, "num_tokens": 454706651.0, "step": 4835 }, { "epoch": 0.8253968253968254, "grad_norm": 0.4269043972221862, "learning_rate": 6.990954087728282e-06, "loss": 0.558, "num_tokens": 454819769.0, "step": 4836 }, { "epoch": 0.8255675029868578, "grad_norm": 0.4977846928121336, "learning_rate": 6.984126984126984e-06, "loss": 0.5011, "num_tokens": 454905376.0, "step": 4837 }, { "epoch": 0.8257381805768903, "grad_norm": 0.4069335603089527, "learning_rate": 6.977299880525688e-06, "loss": 0.563, "num_tokens": 455030594.0, "step": 4838 }, { "epoch": 0.8259088581669227, "grad_norm": 0.5263199180398532, "learning_rate": 6.970472776924391e-06, "loss": 0.5695, "num_tokens": 455099221.0, "step": 4839 }, { "epoch": 0.8260795357569551, "grad_norm": 0.5567499802180942, "learning_rate": 6.963645673323093e-06, "loss": 0.5342, "num_tokens": 455157577.0, "step": 4840 }, { "epoch": 0.8262502133469876, "grad_norm": 0.4042203454769082, "learning_rate": 6.956818569721796e-06, "loss": 0.5378, "num_tokens": 455275744.0, "step": 4841 }, { "epoch": 0.82642089093702, "grad_norm": 0.44346444903758286, "learning_rate": 6.9499914661204995e-06, "loss": 0.585, "num_tokens": 455382356.0, "step": 4842 }, { "epoch": 0.8265915685270524, "grad_norm": 0.45102813126351843, "learning_rate": 6.9431643625192015e-06, "loss": 0.5478, "num_tokens": 455485741.0, "step": 4843 }, { "epoch": 0.8267622461170848, "grad_norm": 0.4176332736164302, "learning_rate": 6.9363372589179044e-06, "loss": 0.5446, "num_tokens": 455595873.0, "step": 4844 }, { "epoch": 0.8269329237071172, "grad_norm": 0.47813312135428854, "learning_rate": 6.929510155316607e-06, "loss": 0.5709, "num_tokens": 455688174.0, "step": 4845 }, { "epoch": 0.8271036012971497, "grad_norm": 0.45853158469849914, "learning_rate": 6.92268305171531e-06, "loss": 0.6746, "num_tokens": 455802832.0, "step": 4846 }, { "epoch": 0.8272742788871821, "grad_norm": 0.46402235465960795, "learning_rate": 6.915855948114013e-06, "loss": 0.5899, "num_tokens": 455898606.0, "step": 4847 }, { "epoch": 0.8274449564772145, "grad_norm": 0.44783196920901436, "learning_rate": 6.909028844512716e-06, "loss": 0.5424, "num_tokens": 455994500.0, "step": 4848 }, { "epoch": 0.8276156340672469, "grad_norm": 0.45684842896180383, "learning_rate": 6.902201740911418e-06, "loss": 0.5033, "num_tokens": 456082504.0, "step": 4849 }, { "epoch": 0.8277863116572794, "grad_norm": 0.4769975128151429, "learning_rate": 6.895374637310122e-06, "loss": 0.5965, "num_tokens": 456184210.0, "step": 4850 }, { "epoch": 0.8279569892473119, "grad_norm": 0.4595500644144209, "learning_rate": 6.888547533708825e-06, "loss": 0.5401, "num_tokens": 456278287.0, "step": 4851 }, { "epoch": 0.8281276668373443, "grad_norm": 0.47758143331727887, "learning_rate": 6.881720430107528e-06, "loss": 0.6254, "num_tokens": 456378473.0, "step": 4852 }, { "epoch": 0.8282983444273767, "grad_norm": 0.4846421028944029, "learning_rate": 6.87489332650623e-06, "loss": 0.5349, "num_tokens": 456462522.0, "step": 4853 }, { "epoch": 0.8284690220174091, "grad_norm": 0.45793425826420137, "learning_rate": 6.868066222904934e-06, "loss": 0.51, "num_tokens": 456547073.0, "step": 4854 }, { "epoch": 0.8286396996074415, "grad_norm": 0.44251707365781306, "learning_rate": 6.8612391193036365e-06, "loss": 0.5303, "num_tokens": 456643179.0, "step": 4855 }, { "epoch": 0.828810377197474, "grad_norm": 0.4720750530184715, "learning_rate": 6.854412015702339e-06, "loss": 0.5158, "num_tokens": 456720551.0, "step": 4856 }, { "epoch": 0.8289810547875064, "grad_norm": 0.4688793935103529, "learning_rate": 6.8475849121010415e-06, "loss": 0.4821, "num_tokens": 456802285.0, "step": 4857 }, { "epoch": 0.8291517323775388, "grad_norm": 0.5133716249893587, "learning_rate": 6.840757808499745e-06, "loss": 0.5771, "num_tokens": 456879334.0, "step": 4858 }, { "epoch": 0.8293224099675712, "grad_norm": 0.43160155587366983, "learning_rate": 6.833930704898447e-06, "loss": 0.498, "num_tokens": 456981448.0, "step": 4859 }, { "epoch": 0.8294930875576036, "grad_norm": 0.4884227909418608, "learning_rate": 6.82710360129715e-06, "loss": 0.5781, "num_tokens": 457063703.0, "step": 4860 }, { "epoch": 0.8296637651476361, "grad_norm": 0.463492975864288, "learning_rate": 6.820276497695853e-06, "loss": 0.6675, "num_tokens": 457176793.0, "step": 4861 }, { "epoch": 0.8298344427376686, "grad_norm": 0.5296148485456056, "learning_rate": 6.813449394094555e-06, "loss": 0.5339, "num_tokens": 457262529.0, "step": 4862 }, { "epoch": 0.830005120327701, "grad_norm": 0.4585641888563883, "learning_rate": 6.806622290493259e-06, "loss": 0.5915, "num_tokens": 457370926.0, "step": 4863 }, { "epoch": 0.8301757979177334, "grad_norm": 0.4750806893059253, "learning_rate": 6.799795186891962e-06, "loss": 0.5294, "num_tokens": 457455513.0, "step": 4864 }, { "epoch": 0.8303464755077659, "grad_norm": 0.5243929172846149, "learning_rate": 6.792968083290664e-06, "loss": 0.6128, "num_tokens": 457531823.0, "step": 4865 }, { "epoch": 0.8305171530977983, "grad_norm": 0.4680795394118142, "learning_rate": 6.786140979689368e-06, "loss": 0.5236, "num_tokens": 457623886.0, "step": 4866 }, { "epoch": 0.8306878306878307, "grad_norm": 0.44195536875726743, "learning_rate": 6.779313876088071e-06, "loss": 0.5194, "num_tokens": 457722717.0, "step": 4867 }, { "epoch": 0.8308585082778631, "grad_norm": 0.4256610700649767, "learning_rate": 6.772486772486773e-06, "loss": 0.5621, "num_tokens": 457844712.0, "step": 4868 }, { "epoch": 0.8310291858678955, "grad_norm": 0.46001147887070243, "learning_rate": 6.765659668885476e-06, "loss": 0.5409, "num_tokens": 457945351.0, "step": 4869 }, { "epoch": 0.831199863457928, "grad_norm": 0.4197927098358501, "learning_rate": 6.758832565284179e-06, "loss": 0.5384, "num_tokens": 458052750.0, "step": 4870 }, { "epoch": 0.8313705410479604, "grad_norm": 0.5125765083711391, "learning_rate": 6.7520054616828815e-06, "loss": 0.5684, "num_tokens": 458126923.0, "step": 4871 }, { "epoch": 0.8315412186379928, "grad_norm": 0.4993310015441435, "learning_rate": 6.745178358081584e-06, "loss": 0.5765, "num_tokens": 458212318.0, "step": 4872 }, { "epoch": 0.8317118962280252, "grad_norm": 0.38768482283394085, "learning_rate": 6.738351254480287e-06, "loss": 0.5218, "num_tokens": 458342394.0, "step": 4873 }, { "epoch": 0.8318825738180576, "grad_norm": 0.45266234730272725, "learning_rate": 6.731524150878989e-06, "loss": 0.6317, "num_tokens": 458450990.0, "step": 4874 }, { "epoch": 0.8320532514080902, "grad_norm": 0.5019341643074298, "learning_rate": 6.724697047277693e-06, "loss": 0.6762, "num_tokens": 458545705.0, "step": 4875 }, { "epoch": 0.8322239289981226, "grad_norm": 0.4635092949774453, "learning_rate": 6.717869943676396e-06, "loss": 0.6293, "num_tokens": 458657078.0, "step": 4876 }, { "epoch": 0.832394606588155, "grad_norm": 0.49547570854527484, "learning_rate": 6.711042840075098e-06, "loss": 0.4896, "num_tokens": 458727848.0, "step": 4877 }, { "epoch": 0.8325652841781874, "grad_norm": 0.46041723948626934, "learning_rate": 6.704215736473801e-06, "loss": 0.5008, "num_tokens": 458817159.0, "step": 4878 }, { "epoch": 0.8327359617682198, "grad_norm": 0.4772888563999419, "learning_rate": 6.697388632872505e-06, "loss": 0.5825, "num_tokens": 458913571.0, "step": 4879 }, { "epoch": 0.8329066393582523, "grad_norm": 0.4278034111618673, "learning_rate": 6.690561529271207e-06, "loss": 0.558, "num_tokens": 459036839.0, "step": 4880 }, { "epoch": 0.8330773169482847, "grad_norm": 0.5267081756042016, "learning_rate": 6.68373442566991e-06, "loss": 0.5451, "num_tokens": 459104395.0, "step": 4881 }, { "epoch": 0.8332479945383171, "grad_norm": 0.5234842925125288, "learning_rate": 6.6769073220686135e-06, "loss": 0.5956, "num_tokens": 459178330.0, "step": 4882 }, { "epoch": 0.8334186721283495, "grad_norm": 0.47065981816379104, "learning_rate": 6.670080218467316e-06, "loss": 0.6192, "num_tokens": 459295873.0, "step": 4883 }, { "epoch": 0.833589349718382, "grad_norm": 0.46660604333507916, "learning_rate": 6.6632531148660185e-06, "loss": 0.5334, "num_tokens": 459385174.0, "step": 4884 }, { "epoch": 0.8337600273084144, "grad_norm": 0.47590211275031086, "learning_rate": 6.6564260112647214e-06, "loss": 0.5261, "num_tokens": 459471019.0, "step": 4885 }, { "epoch": 0.8339307048984468, "grad_norm": 0.43785513946431853, "learning_rate": 6.649598907663425e-06, "loss": 0.6322, "num_tokens": 459594813.0, "step": 4886 }, { "epoch": 0.8341013824884793, "grad_norm": 0.46254940245315024, "learning_rate": 6.642771804062127e-06, "loss": 0.6477, "num_tokens": 459713402.0, "step": 4887 }, { "epoch": 0.8342720600785117, "grad_norm": 0.45354864446248366, "learning_rate": 6.63594470046083e-06, "loss": 0.5844, "num_tokens": 459810418.0, "step": 4888 }, { "epoch": 0.8344427376685442, "grad_norm": 0.49575447892918656, "learning_rate": 6.629117596859533e-06, "loss": 0.605, "num_tokens": 459893585.0, "step": 4889 }, { "epoch": 0.8346134152585766, "grad_norm": 0.4461403433815446, "learning_rate": 6.622290493258235e-06, "loss": 0.6246, "num_tokens": 460010549.0, "step": 4890 }, { "epoch": 0.834784092848609, "grad_norm": 0.4559507367576683, "learning_rate": 6.615463389656939e-06, "loss": 0.6008, "num_tokens": 460122586.0, "step": 4891 }, { "epoch": 0.8349547704386414, "grad_norm": 0.45514693170031584, "learning_rate": 6.608636286055642e-06, "loss": 0.5554, "num_tokens": 460222139.0, "step": 4892 }, { "epoch": 0.8351254480286738, "grad_norm": 0.45768894432374213, "learning_rate": 6.601809182454344e-06, "loss": 0.5459, "num_tokens": 460317841.0, "step": 4893 }, { "epoch": 0.8352961256187063, "grad_norm": 0.45889326263618335, "learning_rate": 6.594982078853047e-06, "loss": 0.516, "num_tokens": 460407812.0, "step": 4894 }, { "epoch": 0.8354668032087387, "grad_norm": 0.43652434017337427, "learning_rate": 6.588154975251751e-06, "loss": 0.5529, "num_tokens": 460510645.0, "step": 4895 }, { "epoch": 0.8356374807987711, "grad_norm": 0.49162457668122383, "learning_rate": 6.581327871650453e-06, "loss": 0.5918, "num_tokens": 460610174.0, "step": 4896 }, { "epoch": 0.8358081583888035, "grad_norm": 0.44168987978768487, "learning_rate": 6.574500768049156e-06, "loss": 0.5168, "num_tokens": 460712044.0, "step": 4897 }, { "epoch": 0.8359788359788359, "grad_norm": 0.49307024273375566, "learning_rate": 6.5676736644478585e-06, "loss": 0.5706, "num_tokens": 460819326.0, "step": 4898 }, { "epoch": 0.8361495135688684, "grad_norm": 0.4644340676590431, "learning_rate": 6.560846560846561e-06, "loss": 0.4931, "num_tokens": 460911555.0, "step": 4899 }, { "epoch": 0.8363201911589009, "grad_norm": 0.5789397454530446, "learning_rate": 6.554019457245264e-06, "loss": 0.5677, "num_tokens": 460976798.0, "step": 4900 }, { "epoch": 0.8364908687489333, "grad_norm": 0.5689157133676696, "learning_rate": 6.547192353643967e-06, "loss": 0.5731, "num_tokens": 461039252.0, "step": 4901 }, { "epoch": 0.8366615463389657, "grad_norm": 0.49317626129342196, "learning_rate": 6.540365250042669e-06, "loss": 0.4933, "num_tokens": 461116381.0, "step": 4902 }, { "epoch": 0.8368322239289981, "grad_norm": 0.4996379619219291, "learning_rate": 6.533538146441373e-06, "loss": 0.5076, "num_tokens": 461186911.0, "step": 4903 }, { "epoch": 0.8370029015190306, "grad_norm": 0.5202328474603903, "learning_rate": 6.526711042840076e-06, "loss": 0.5431, "num_tokens": 461264556.0, "step": 4904 }, { "epoch": 0.837173579109063, "grad_norm": 0.46886826283801164, "learning_rate": 6.519883939238778e-06, "loss": 0.5708, "num_tokens": 461367082.0, "step": 4905 }, { "epoch": 0.8373442566990954, "grad_norm": 0.4780985815017691, "learning_rate": 6.513056835637481e-06, "loss": 0.6419, "num_tokens": 461467087.0, "step": 4906 }, { "epoch": 0.8375149342891278, "grad_norm": 0.4491312073825426, "learning_rate": 6.506229732036185e-06, "loss": 0.5026, "num_tokens": 461569150.0, "step": 4907 }, { "epoch": 0.8376856118791602, "grad_norm": 0.5005403376201004, "learning_rate": 6.499402628434887e-06, "loss": 0.6025, "num_tokens": 461674459.0, "step": 4908 }, { "epoch": 0.8378562894691927, "grad_norm": 0.4801498510730418, "learning_rate": 6.49257552483359e-06, "loss": 0.5598, "num_tokens": 461760327.0, "step": 4909 }, { "epoch": 0.8380269670592251, "grad_norm": 0.5348669155580196, "learning_rate": 6.485748421232293e-06, "loss": 0.5634, "num_tokens": 461838988.0, "step": 4910 }, { "epoch": 0.8381976446492575, "grad_norm": 0.484848233801489, "learning_rate": 6.478921317630995e-06, "loss": 0.5666, "num_tokens": 461925962.0, "step": 4911 }, { "epoch": 0.83836832223929, "grad_norm": 0.4870041308847209, "learning_rate": 6.4720942140296985e-06, "loss": 0.546, "num_tokens": 462007132.0, "step": 4912 }, { "epoch": 0.8385389998293225, "grad_norm": 0.4506231970050041, "learning_rate": 6.465267110428401e-06, "loss": 0.538, "num_tokens": 462100750.0, "step": 4913 }, { "epoch": 0.8387096774193549, "grad_norm": 0.45791402000264647, "learning_rate": 6.4584400068271035e-06, "loss": 0.5177, "num_tokens": 462196678.0, "step": 4914 }, { "epoch": 0.8388803550093873, "grad_norm": 0.4559909143954396, "learning_rate": 6.451612903225806e-06, "loss": 0.6158, "num_tokens": 462300899.0, "step": 4915 }, { "epoch": 0.8390510325994197, "grad_norm": 0.45752494712160663, "learning_rate": 6.44478579962451e-06, "loss": 0.6186, "num_tokens": 462418844.0, "step": 4916 }, { "epoch": 0.8392217101894521, "grad_norm": 0.48742176321543074, "learning_rate": 6.437958696023213e-06, "loss": 0.6164, "num_tokens": 462508407.0, "step": 4917 }, { "epoch": 0.8393923877794846, "grad_norm": 0.5277346440695257, "learning_rate": 6.431131592421915e-06, "loss": 0.5599, "num_tokens": 462578791.0, "step": 4918 }, { "epoch": 0.839563065369517, "grad_norm": 0.5175008625968157, "learning_rate": 6.424304488820619e-06, "loss": 0.5389, "num_tokens": 462652299.0, "step": 4919 }, { "epoch": 0.8397337429595494, "grad_norm": 0.4072000940839893, "learning_rate": 6.417477385219322e-06, "loss": 0.532, "num_tokens": 462773256.0, "step": 4920 }, { "epoch": 0.8399044205495818, "grad_norm": 0.5354207320459092, "learning_rate": 6.410650281618024e-06, "loss": 0.5449, "num_tokens": 462842166.0, "step": 4921 }, { "epoch": 0.8400750981396142, "grad_norm": 0.4949535072696954, "learning_rate": 6.403823178016727e-06, "loss": 0.5229, "num_tokens": 462921412.0, "step": 4922 }, { "epoch": 0.8402457757296466, "grad_norm": 0.49139832827052105, "learning_rate": 6.3969960744154306e-06, "loss": 0.5513, "num_tokens": 463010111.0, "step": 4923 }, { "epoch": 0.8404164533196792, "grad_norm": 0.4888872382939385, "learning_rate": 6.390168970814133e-06, "loss": 0.6033, "num_tokens": 463095694.0, "step": 4924 }, { "epoch": 0.8405871309097116, "grad_norm": 0.42173923381028505, "learning_rate": 6.3833418672128355e-06, "loss": 0.5945, "num_tokens": 463218865.0, "step": 4925 }, { "epoch": 0.840757808499744, "grad_norm": 0.5172653719442127, "learning_rate": 6.3765147636115385e-06, "loss": 0.5019, "num_tokens": 463289194.0, "step": 4926 }, { "epoch": 0.8409284860897764, "grad_norm": 0.48541287746629275, "learning_rate": 6.3696876600102405e-06, "loss": 0.5578, "num_tokens": 463374617.0, "step": 4927 }, { "epoch": 0.8410991636798089, "grad_norm": 0.5240388010411126, "learning_rate": 6.362860556408944e-06, "loss": 0.5734, "num_tokens": 463453621.0, "step": 4928 }, { "epoch": 0.8412698412698413, "grad_norm": 0.45077644981956383, "learning_rate": 6.356033452807647e-06, "loss": 0.5665, "num_tokens": 463560736.0, "step": 4929 }, { "epoch": 0.8414405188598737, "grad_norm": 0.5120195772504303, "learning_rate": 6.349206349206349e-06, "loss": 0.575, "num_tokens": 463639727.0, "step": 4930 }, { "epoch": 0.8416111964499061, "grad_norm": 0.4640297197548327, "learning_rate": 6.342379245605052e-06, "loss": 0.5508, "num_tokens": 463732045.0, "step": 4931 }, { "epoch": 0.8417818740399385, "grad_norm": 0.44725700211862646, "learning_rate": 6.335552142003756e-06, "loss": 0.5851, "num_tokens": 463844996.0, "step": 4932 }, { "epoch": 0.841952551629971, "grad_norm": 0.4937551711871825, "learning_rate": 6.328725038402458e-06, "loss": 0.639, "num_tokens": 463930037.0, "step": 4933 }, { "epoch": 0.8421232292200034, "grad_norm": 0.4547325013484731, "learning_rate": 6.321897934801161e-06, "loss": 0.5335, "num_tokens": 464023787.0, "step": 4934 }, { "epoch": 0.8422939068100358, "grad_norm": 0.47783226717750654, "learning_rate": 6.315070831199865e-06, "loss": 0.5343, "num_tokens": 464105453.0, "step": 4935 }, { "epoch": 0.8424645844000682, "grad_norm": 0.4251037721552533, "learning_rate": 6.308243727598567e-06, "loss": 0.5723, "num_tokens": 464222691.0, "step": 4936 }, { "epoch": 0.8426352619901007, "grad_norm": 0.4655710665971173, "learning_rate": 6.30141662399727e-06, "loss": 0.5833, "num_tokens": 464323042.0, "step": 4937 }, { "epoch": 0.8428059395801332, "grad_norm": 0.46271848293200707, "learning_rate": 6.294589520395973e-06, "loss": 0.6355, "num_tokens": 464425157.0, "step": 4938 }, { "epoch": 0.8429766171701656, "grad_norm": 0.4922589610297303, "learning_rate": 6.287762416794675e-06, "loss": 0.4944, "num_tokens": 464493018.0, "step": 4939 }, { "epoch": 0.843147294760198, "grad_norm": 0.5023519731300099, "learning_rate": 6.2809353131933784e-06, "loss": 0.5336, "num_tokens": 464564937.0, "step": 4940 }, { "epoch": 0.8433179723502304, "grad_norm": 0.4696686845230933, "learning_rate": 6.274108209592081e-06, "loss": 0.574, "num_tokens": 464652488.0, "step": 4941 }, { "epoch": 0.8434886499402628, "grad_norm": 0.5304683392675822, "learning_rate": 6.267281105990783e-06, "loss": 0.5898, "num_tokens": 464724292.0, "step": 4942 }, { "epoch": 0.8436593275302953, "grad_norm": 0.4534995896410765, "learning_rate": 6.260454002389486e-06, "loss": 0.5417, "num_tokens": 464830028.0, "step": 4943 }, { "epoch": 0.8438300051203277, "grad_norm": 0.4647490478110065, "learning_rate": 6.25362689878819e-06, "loss": 0.5978, "num_tokens": 464926119.0, "step": 4944 }, { "epoch": 0.8440006827103601, "grad_norm": 0.48664088126400584, "learning_rate": 6.246799795186892e-06, "loss": 0.5375, "num_tokens": 465017000.0, "step": 4945 }, { "epoch": 0.8441713603003925, "grad_norm": 0.6206806037389087, "learning_rate": 6.239972691585595e-06, "loss": 0.5489, "num_tokens": 465105612.0, "step": 4946 }, { "epoch": 0.844342037890425, "grad_norm": 0.5006934459315261, "learning_rate": 6.233145587984298e-06, "loss": 0.509, "num_tokens": 465182302.0, "step": 4947 }, { "epoch": 0.8445127154804574, "grad_norm": 0.4966112852952901, "learning_rate": 6.226318484383002e-06, "loss": 0.5672, "num_tokens": 465261925.0, "step": 4948 }, { "epoch": 0.8446833930704899, "grad_norm": 0.41417003503217936, "learning_rate": 6.219491380781704e-06, "loss": 0.5812, "num_tokens": 465390207.0, "step": 4949 }, { "epoch": 0.8448540706605223, "grad_norm": 0.4309553425505083, "learning_rate": 6.212664277180407e-06, "loss": 0.5551, "num_tokens": 465501575.0, "step": 4950 }, { "epoch": 0.8450247482505547, "grad_norm": 0.45522028900013767, "learning_rate": 6.2058371735791105e-06, "loss": 0.6458, "num_tokens": 465625718.0, "step": 4951 }, { "epoch": 0.8451954258405872, "grad_norm": 0.4389599158163193, "learning_rate": 6.1990100699778126e-06, "loss": 0.5817, "num_tokens": 465744338.0, "step": 4952 }, { "epoch": 0.8453661034306196, "grad_norm": 0.4227358769393761, "learning_rate": 6.1921829663765155e-06, "loss": 0.5868, "num_tokens": 465863880.0, "step": 4953 }, { "epoch": 0.845536781020652, "grad_norm": 0.46635486336138454, "learning_rate": 6.185355862775218e-06, "loss": 0.5609, "num_tokens": 465960241.0, "step": 4954 }, { "epoch": 0.8457074586106844, "grad_norm": 0.43872333413666487, "learning_rate": 6.1785287591739205e-06, "loss": 0.5368, "num_tokens": 466067526.0, "step": 4955 }, { "epoch": 0.8458781362007168, "grad_norm": 0.5205888789222144, "learning_rate": 6.171701655572624e-06, "loss": 0.4672, "num_tokens": 466134867.0, "step": 4956 }, { "epoch": 0.8460488137907493, "grad_norm": 0.5001507171342837, "learning_rate": 6.164874551971327e-06, "loss": 0.5511, "num_tokens": 466211766.0, "step": 4957 }, { "epoch": 0.8462194913807817, "grad_norm": 0.48035157635720777, "learning_rate": 6.158047448370029e-06, "loss": 0.5221, "num_tokens": 466292528.0, "step": 4958 }, { "epoch": 0.8463901689708141, "grad_norm": 0.4413367146470332, "learning_rate": 6.151220344768732e-06, "loss": 0.567, "num_tokens": 466389040.0, "step": 4959 }, { "epoch": 0.8465608465608465, "grad_norm": 0.4399020608133824, "learning_rate": 6.144393241167436e-06, "loss": 0.5265, "num_tokens": 466493816.0, "step": 4960 }, { "epoch": 0.846731524150879, "grad_norm": 0.4675597861964928, "learning_rate": 6.137566137566138e-06, "loss": 0.579, "num_tokens": 466586881.0, "step": 4961 }, { "epoch": 0.8469022017409115, "grad_norm": 0.47014096220311874, "learning_rate": 6.130739033964841e-06, "loss": 0.5924, "num_tokens": 466682551.0, "step": 4962 }, { "epoch": 0.8470728793309439, "grad_norm": 0.4255090820324177, "learning_rate": 6.123911930363544e-06, "loss": 0.5333, "num_tokens": 466788729.0, "step": 4963 }, { "epoch": 0.8472435569209763, "grad_norm": 0.4540065922427179, "learning_rate": 6.117084826762246e-06, "loss": 0.6029, "num_tokens": 466907074.0, "step": 4964 }, { "epoch": 0.8474142345110087, "grad_norm": 0.4472707364464128, "learning_rate": 6.11025772316095e-06, "loss": 0.6514, "num_tokens": 467024702.0, "step": 4965 }, { "epoch": 0.8475849121010411, "grad_norm": 0.46190991022935485, "learning_rate": 6.1034306195596525e-06, "loss": 0.5853, "num_tokens": 467128248.0, "step": 4966 }, { "epoch": 0.8477555896910736, "grad_norm": 0.4556616361960912, "learning_rate": 6.096603515958355e-06, "loss": 0.5171, "num_tokens": 467218497.0, "step": 4967 }, { "epoch": 0.847926267281106, "grad_norm": 0.4853929441269046, "learning_rate": 6.089776412357058e-06, "loss": 0.5532, "num_tokens": 467314327.0, "step": 4968 }, { "epoch": 0.8480969448711384, "grad_norm": 0.47987875109464245, "learning_rate": 6.082949308755761e-06, "loss": 0.5237, "num_tokens": 467392646.0, "step": 4969 }, { "epoch": 0.8482676224611708, "grad_norm": 0.4258253480551358, "learning_rate": 6.076122205154463e-06, "loss": 0.6457, "num_tokens": 467520947.0, "step": 4970 }, { "epoch": 0.8484383000512032, "grad_norm": 0.46253018129612067, "learning_rate": 6.069295101553166e-06, "loss": 0.5419, "num_tokens": 467603638.0, "step": 4971 }, { "epoch": 0.8486089776412357, "grad_norm": 0.501106102940788, "learning_rate": 6.06246799795187e-06, "loss": 0.606, "num_tokens": 467691176.0, "step": 4972 }, { "epoch": 0.8487796552312681, "grad_norm": 0.5122151911421258, "learning_rate": 6.055640894350572e-06, "loss": 0.5561, "num_tokens": 467762605.0, "step": 4973 }, { "epoch": 0.8489503328213006, "grad_norm": 0.45377035383322745, "learning_rate": 6.048813790749275e-06, "loss": 0.5974, "num_tokens": 467863630.0, "step": 4974 }, { "epoch": 0.849121010411333, "grad_norm": 0.4693450580532555, "learning_rate": 6.041986687147978e-06, "loss": 0.5307, "num_tokens": 467955460.0, "step": 4975 }, { "epoch": 0.8492916880013655, "grad_norm": 0.4641537829880674, "learning_rate": 6.03515958354668e-06, "loss": 0.5209, "num_tokens": 468039799.0, "step": 4976 }, { "epoch": 0.8494623655913979, "grad_norm": 0.4478809090685906, "learning_rate": 6.028332479945384e-06, "loss": 0.5808, "num_tokens": 468141335.0, "step": 4977 }, { "epoch": 0.8496330431814303, "grad_norm": 0.5412303176121621, "learning_rate": 6.021505376344087e-06, "loss": 0.6263, "num_tokens": 468211472.0, "step": 4978 }, { "epoch": 0.8498037207714627, "grad_norm": 0.4300827149476992, "learning_rate": 6.014678272742789e-06, "loss": 0.5039, "num_tokens": 468316831.0, "step": 4979 }, { "epoch": 0.8499743983614951, "grad_norm": 0.4523808188898401, "learning_rate": 6.007851169141492e-06, "loss": 0.5606, "num_tokens": 468416586.0, "step": 4980 }, { "epoch": 0.8501450759515276, "grad_norm": 0.5163433803432683, "learning_rate": 6.0010240655401954e-06, "loss": 0.5752, "num_tokens": 468499652.0, "step": 4981 }, { "epoch": 0.85031575354156, "grad_norm": 0.5049551733618891, "learning_rate": 5.994196961938898e-06, "loss": 0.576, "num_tokens": 468586055.0, "step": 4982 }, { "epoch": 0.8504864311315924, "grad_norm": 0.47583352766646003, "learning_rate": 5.9873698583376e-06, "loss": 0.6931, "num_tokens": 468697014.0, "step": 4983 }, { "epoch": 0.8506571087216248, "grad_norm": 0.4479022690866181, "learning_rate": 5.980542754736303e-06, "loss": 0.559, "num_tokens": 468800759.0, "step": 4984 }, { "epoch": 0.8508277863116572, "grad_norm": 0.47424811701259717, "learning_rate": 5.973715651135007e-06, "loss": 0.5227, "num_tokens": 468884265.0, "step": 4985 }, { "epoch": 0.8509984639016898, "grad_norm": 0.502831386046737, "learning_rate": 5.966888547533709e-06, "loss": 0.6681, "num_tokens": 468976618.0, "step": 4986 }, { "epoch": 0.8511691414917222, "grad_norm": 0.5019056609504023, "learning_rate": 5.960061443932412e-06, "loss": 0.615, "num_tokens": 469070591.0, "step": 4987 }, { "epoch": 0.8513398190817546, "grad_norm": 0.5648291278884545, "learning_rate": 5.953234340331116e-06, "loss": 0.5831, "num_tokens": 469136441.0, "step": 4988 }, { "epoch": 0.851510496671787, "grad_norm": 0.48938913872236844, "learning_rate": 5.946407236729818e-06, "loss": 0.4827, "num_tokens": 469207299.0, "step": 4989 }, { "epoch": 0.8516811742618194, "grad_norm": 0.49890453742373925, "learning_rate": 5.939580133128521e-06, "loss": 0.5718, "num_tokens": 469290674.0, "step": 4990 }, { "epoch": 0.8518518518518519, "grad_norm": 0.43599316653260806, "learning_rate": 5.932753029527224e-06, "loss": 0.5188, "num_tokens": 469388407.0, "step": 4991 }, { "epoch": 0.8520225294418843, "grad_norm": 0.46605233461894896, "learning_rate": 5.925925925925926e-06, "loss": 0.5241, "num_tokens": 469481122.0, "step": 4992 }, { "epoch": 0.8521932070319167, "grad_norm": 0.4210575017614484, "learning_rate": 5.91909882232463e-06, "loss": 0.5678, "num_tokens": 469602184.0, "step": 4993 }, { "epoch": 0.8523638846219491, "grad_norm": 0.44107818211952005, "learning_rate": 5.9122717187233325e-06, "loss": 0.5876, "num_tokens": 469705854.0, "step": 4994 }, { "epoch": 0.8525345622119815, "grad_norm": 0.5032672691549811, "learning_rate": 5.9054446151220346e-06, "loss": 0.6238, "num_tokens": 469797995.0, "step": 4995 }, { "epoch": 0.852705239802014, "grad_norm": 0.4558838142452927, "learning_rate": 5.8986175115207375e-06, "loss": 0.6389, "num_tokens": 469913776.0, "step": 4996 }, { "epoch": 0.8528759173920464, "grad_norm": 0.4845665265015083, "learning_rate": 5.891790407919441e-06, "loss": 0.5687, "num_tokens": 470002163.0, "step": 4997 }, { "epoch": 0.8530465949820788, "grad_norm": 0.47434922331172363, "learning_rate": 5.884963304318143e-06, "loss": 0.5892, "num_tokens": 470096706.0, "step": 4998 }, { "epoch": 0.8532172725721113, "grad_norm": 0.5156649023741182, "learning_rate": 5.878136200716846e-06, "loss": 0.5854, "num_tokens": 470173344.0, "step": 4999 }, { "epoch": 0.8533879501621437, "grad_norm": 0.43543507939606246, "learning_rate": 5.871309097115549e-06, "loss": 0.5235, "num_tokens": 470278287.0, "step": 5000 }, { "epoch": 0.8535586277521762, "grad_norm": 0.44249546142736296, "learning_rate": 5.864481993514251e-06, "loss": 0.5368, "num_tokens": 470379820.0, "step": 5001 }, { "epoch": 0.8537293053422086, "grad_norm": 0.46547698260223785, "learning_rate": 5.857654889912955e-06, "loss": 0.6091, "num_tokens": 470476787.0, "step": 5002 }, { "epoch": 0.853899982932241, "grad_norm": 0.4897880483944473, "learning_rate": 5.850827786311658e-06, "loss": 0.6005, "num_tokens": 470565476.0, "step": 5003 }, { "epoch": 0.8540706605222734, "grad_norm": 0.5254322009703806, "learning_rate": 5.84400068271036e-06, "loss": 0.5696, "num_tokens": 470644341.0, "step": 5004 }, { "epoch": 0.8542413381123058, "grad_norm": 0.5461866350213606, "learning_rate": 5.837173579109064e-06, "loss": 0.5936, "num_tokens": 470741294.0, "step": 5005 }, { "epoch": 0.8544120157023383, "grad_norm": 0.46918546948538264, "learning_rate": 5.830346475507767e-06, "loss": 0.5747, "num_tokens": 470837964.0, "step": 5006 }, { "epoch": 0.8545826932923707, "grad_norm": 0.45706594418184043, "learning_rate": 5.823519371906469e-06, "loss": 0.5056, "num_tokens": 470932066.0, "step": 5007 }, { "epoch": 0.8547533708824031, "grad_norm": 0.48674917755486047, "learning_rate": 5.816692268305172e-06, "loss": 0.6109, "num_tokens": 471023964.0, "step": 5008 }, { "epoch": 0.8549240484724355, "grad_norm": 0.47275294316350747, "learning_rate": 5.809865164703875e-06, "loss": 0.6066, "num_tokens": 471125311.0, "step": 5009 }, { "epoch": 0.855094726062468, "grad_norm": 0.45239561109554344, "learning_rate": 5.8030380611025775e-06, "loss": 0.5673, "num_tokens": 471230984.0, "step": 5010 }, { "epoch": 0.8552654036525005, "grad_norm": 0.4383600798877033, "learning_rate": 5.79621095750128e-06, "loss": 0.5881, "num_tokens": 471345400.0, "step": 5011 }, { "epoch": 0.8554360812425329, "grad_norm": 0.4543568194902383, "learning_rate": 5.789383853899983e-06, "loss": 0.4937, "num_tokens": 471437449.0, "step": 5012 }, { "epoch": 0.8556067588325653, "grad_norm": 0.5154552720825754, "learning_rate": 5.782556750298687e-06, "loss": 0.6626, "num_tokens": 471529722.0, "step": 5013 }, { "epoch": 0.8557774364225977, "grad_norm": 0.47118380564578627, "learning_rate": 5.775729646697389e-06, "loss": 0.4742, "num_tokens": 471613265.0, "step": 5014 }, { "epoch": 0.8559481140126302, "grad_norm": 0.46719513903436755, "learning_rate": 5.768902543096092e-06, "loss": 0.5936, "num_tokens": 471706944.0, "step": 5015 }, { "epoch": 0.8561187916026626, "grad_norm": 0.4250630798777261, "learning_rate": 5.762075439494795e-06, "loss": 0.5197, "num_tokens": 471815593.0, "step": 5016 }, { "epoch": 0.856289469192695, "grad_norm": 0.5044876755814766, "learning_rate": 5.755248335893497e-06, "loss": 0.5881, "num_tokens": 471902100.0, "step": 5017 }, { "epoch": 0.8564601467827274, "grad_norm": 0.4514622170034405, "learning_rate": 5.748421232292201e-06, "loss": 0.5855, "num_tokens": 472012523.0, "step": 5018 }, { "epoch": 0.8566308243727598, "grad_norm": 0.45337881382150985, "learning_rate": 5.741594128690904e-06, "loss": 0.5983, "num_tokens": 472113955.0, "step": 5019 }, { "epoch": 0.8568015019627923, "grad_norm": 0.4287112962533035, "learning_rate": 5.734767025089606e-06, "loss": 0.579, "num_tokens": 472226845.0, "step": 5020 }, { "epoch": 0.8569721795528247, "grad_norm": 0.3819101503996246, "learning_rate": 5.7279399214883095e-06, "loss": 0.5729, "num_tokens": 472372018.0, "step": 5021 }, { "epoch": 0.8571428571428571, "grad_norm": 0.45043883023562864, "learning_rate": 5.7211128178870124e-06, "loss": 0.636, "num_tokens": 472490299.0, "step": 5022 }, { "epoch": 0.8573135347328896, "grad_norm": 0.4711425498400568, "learning_rate": 5.7142857142857145e-06, "loss": 0.5703, "num_tokens": 472581914.0, "step": 5023 }, { "epoch": 0.857484212322922, "grad_norm": 0.4458423843452687, "learning_rate": 5.7074586106844174e-06, "loss": 0.5252, "num_tokens": 472678585.0, "step": 5024 }, { "epoch": 0.8576548899129545, "grad_norm": 0.4599096087852025, "learning_rate": 5.700631507083121e-06, "loss": 0.6168, "num_tokens": 472777365.0, "step": 5025 }, { "epoch": 0.8578255675029869, "grad_norm": 0.4383181589106577, "learning_rate": 5.693804403481823e-06, "loss": 0.575, "num_tokens": 472889942.0, "step": 5026 }, { "epoch": 0.8579962450930193, "grad_norm": 0.4650462446390513, "learning_rate": 5.686977299880526e-06, "loss": 0.5644, "num_tokens": 472977741.0, "step": 5027 }, { "epoch": 0.8581669226830517, "grad_norm": 0.4386807517669245, "learning_rate": 5.680150196279229e-06, "loss": 0.4953, "num_tokens": 473069695.0, "step": 5028 }, { "epoch": 0.8583376002730841, "grad_norm": 0.4508844514106691, "learning_rate": 5.673323092677931e-06, "loss": 0.5474, "num_tokens": 473166982.0, "step": 5029 }, { "epoch": 0.8585082778631166, "grad_norm": 0.4954461985829233, "learning_rate": 5.666495989076635e-06, "loss": 0.5562, "num_tokens": 473246504.0, "step": 5030 }, { "epoch": 0.858678955453149, "grad_norm": 0.44626478783752643, "learning_rate": 5.659668885475338e-06, "loss": 0.5891, "num_tokens": 473357725.0, "step": 5031 }, { "epoch": 0.8588496330431814, "grad_norm": 0.4624381246607165, "learning_rate": 5.65284178187404e-06, "loss": 0.5856, "num_tokens": 473452618.0, "step": 5032 }, { "epoch": 0.8590203106332138, "grad_norm": 0.4991333212855299, "learning_rate": 5.646014678272743e-06, "loss": 0.6117, "num_tokens": 473542174.0, "step": 5033 }, { "epoch": 0.8591909882232462, "grad_norm": 0.463581050754311, "learning_rate": 5.639187574671447e-06, "loss": 0.5212, "num_tokens": 473630036.0, "step": 5034 }, { "epoch": 0.8593616658132787, "grad_norm": 0.45472577791929064, "learning_rate": 5.632360471070149e-06, "loss": 0.575, "num_tokens": 473727625.0, "step": 5035 }, { "epoch": 0.8595323434033112, "grad_norm": 0.4840482040093249, "learning_rate": 5.6255333674688516e-06, "loss": 0.5626, "num_tokens": 473813543.0, "step": 5036 }, { "epoch": 0.8597030209933436, "grad_norm": 0.48078993631829914, "learning_rate": 5.618706263867555e-06, "loss": 0.5994, "num_tokens": 473904017.0, "step": 5037 }, { "epoch": 0.859873698583376, "grad_norm": 0.47948329612309026, "learning_rate": 5.611879160266257e-06, "loss": 0.562, "num_tokens": 473992770.0, "step": 5038 }, { "epoch": 0.8600443761734085, "grad_norm": 0.4781366081574548, "learning_rate": 5.60505205666496e-06, "loss": 0.5589, "num_tokens": 474086398.0, "step": 5039 }, { "epoch": 0.8602150537634409, "grad_norm": 0.5039575300834054, "learning_rate": 5.598224953063663e-06, "loss": 0.5934, "num_tokens": 474183628.0, "step": 5040 }, { "epoch": 0.8603857313534733, "grad_norm": 0.524180301892005, "learning_rate": 5.591397849462365e-06, "loss": 0.6644, "num_tokens": 474270271.0, "step": 5041 }, { "epoch": 0.8605564089435057, "grad_norm": 0.4032997186702399, "learning_rate": 5.584570745861069e-06, "loss": 0.5654, "num_tokens": 474394543.0, "step": 5042 }, { "epoch": 0.8607270865335381, "grad_norm": 0.44599815880767735, "learning_rate": 5.577743642259772e-06, "loss": 0.4685, "num_tokens": 474484646.0, "step": 5043 }, { "epoch": 0.8608977641235706, "grad_norm": 0.4729150594952984, "learning_rate": 5.570916538658475e-06, "loss": 0.5447, "num_tokens": 474583919.0, "step": 5044 }, { "epoch": 0.861068441713603, "grad_norm": 0.47626122460640113, "learning_rate": 5.564089435057177e-06, "loss": 0.5302, "num_tokens": 474674086.0, "step": 5045 }, { "epoch": 0.8612391193036354, "grad_norm": 0.5171239267577753, "learning_rate": 5.557262331455881e-06, "loss": 0.5236, "num_tokens": 474746757.0, "step": 5046 }, { "epoch": 0.8614097968936678, "grad_norm": 0.5043361202024221, "learning_rate": 5.550435227854584e-06, "loss": 0.5296, "num_tokens": 474822009.0, "step": 5047 }, { "epoch": 0.8615804744837003, "grad_norm": 0.5048219548229176, "learning_rate": 5.543608124253286e-06, "loss": 0.6156, "num_tokens": 474906183.0, "step": 5048 }, { "epoch": 0.8617511520737328, "grad_norm": 0.48131944496639484, "learning_rate": 5.536781020651989e-06, "loss": 0.5456, "num_tokens": 474988670.0, "step": 5049 }, { "epoch": 0.8619218296637652, "grad_norm": 0.4666946173757857, "learning_rate": 5.529953917050692e-06, "loss": 0.4876, "num_tokens": 475072347.0, "step": 5050 }, { "epoch": 0.8620925072537976, "grad_norm": 0.6065070231880086, "learning_rate": 5.5231268134493945e-06, "loss": 0.6143, "num_tokens": 475164553.0, "step": 5051 }, { "epoch": 0.86226318484383, "grad_norm": 0.5333924061453027, "learning_rate": 5.516299709848097e-06, "loss": 0.5749, "num_tokens": 475236136.0, "step": 5052 }, { "epoch": 0.8624338624338624, "grad_norm": 0.4642926432490608, "learning_rate": 5.5094726062468e-06, "loss": 0.5521, "num_tokens": 475354074.0, "step": 5053 }, { "epoch": 0.8626045400238949, "grad_norm": 0.5017726866657745, "learning_rate": 5.502645502645503e-06, "loss": 0.6278, "num_tokens": 475439808.0, "step": 5054 }, { "epoch": 0.8627752176139273, "grad_norm": 0.47093465010391083, "learning_rate": 5.495818399044206e-06, "loss": 0.5258, "num_tokens": 475525814.0, "step": 5055 }, { "epoch": 0.8629458952039597, "grad_norm": 0.4174937360392457, "learning_rate": 5.488991295442909e-06, "loss": 0.6159, "num_tokens": 475662397.0, "step": 5056 }, { "epoch": 0.8631165727939921, "grad_norm": 0.46949629928368236, "learning_rate": 5.482164191841611e-06, "loss": 0.5804, "num_tokens": 475761304.0, "step": 5057 }, { "epoch": 0.8632872503840245, "grad_norm": 0.45780382546308734, "learning_rate": 5.475337088240315e-06, "loss": 0.5194, "num_tokens": 475845764.0, "step": 5058 }, { "epoch": 0.863457927974057, "grad_norm": 0.44415266763585676, "learning_rate": 5.468509984639018e-06, "loss": 0.5363, "num_tokens": 475942971.0, "step": 5059 }, { "epoch": 0.8636286055640894, "grad_norm": 0.5047677224913351, "learning_rate": 5.46168288103772e-06, "loss": 0.6036, "num_tokens": 476024031.0, "step": 5060 }, { "epoch": 0.8637992831541219, "grad_norm": 0.4513986730447121, "learning_rate": 5.454855777436423e-06, "loss": 0.6098, "num_tokens": 476132717.0, "step": 5061 }, { "epoch": 0.8639699607441543, "grad_norm": 0.5543797317985505, "learning_rate": 5.4480286738351265e-06, "loss": 0.6028, "num_tokens": 476207637.0, "step": 5062 }, { "epoch": 0.8641406383341868, "grad_norm": 0.4821037676775579, "learning_rate": 5.441201570233829e-06, "loss": 0.6016, "num_tokens": 476301915.0, "step": 5063 }, { "epoch": 0.8643113159242192, "grad_norm": 0.5054331814699304, "learning_rate": 5.4343744666325315e-06, "loss": 0.5083, "num_tokens": 476378352.0, "step": 5064 }, { "epoch": 0.8644819935142516, "grad_norm": 0.4030589092541839, "learning_rate": 5.4275473630312344e-06, "loss": 0.5942, "num_tokens": 476508972.0, "step": 5065 }, { "epoch": 0.864652671104284, "grad_norm": 0.525146654964081, "learning_rate": 5.4207202594299365e-06, "loss": 0.6508, "num_tokens": 476591423.0, "step": 5066 }, { "epoch": 0.8648233486943164, "grad_norm": 0.4591851357753713, "learning_rate": 5.41389315582864e-06, "loss": 0.5491, "num_tokens": 476686158.0, "step": 5067 }, { "epoch": 0.8649940262843488, "grad_norm": 0.4252034832719552, "learning_rate": 5.407066052227343e-06, "loss": 0.5279, "num_tokens": 476796068.0, "step": 5068 }, { "epoch": 0.8651647038743813, "grad_norm": 0.41637225263657424, "learning_rate": 5.400238948626045e-06, "loss": 0.6395, "num_tokens": 476924740.0, "step": 5069 }, { "epoch": 0.8653353814644137, "grad_norm": 0.48678971671554466, "learning_rate": 5.393411845024748e-06, "loss": 0.599, "num_tokens": 477019493.0, "step": 5070 }, { "epoch": 0.8655060590544461, "grad_norm": 0.4446276687029465, "learning_rate": 5.386584741423452e-06, "loss": 0.588, "num_tokens": 477120895.0, "step": 5071 }, { "epoch": 0.8656767366444785, "grad_norm": 0.49048930441986355, "learning_rate": 5.379757637822154e-06, "loss": 0.5382, "num_tokens": 477209100.0, "step": 5072 }, { "epoch": 0.8658474142345111, "grad_norm": 0.4372359119186143, "learning_rate": 5.372930534220857e-06, "loss": 0.5888, "num_tokens": 477324230.0, "step": 5073 }, { "epoch": 0.8660180918245435, "grad_norm": 0.4542502429028389, "learning_rate": 5.366103430619561e-06, "loss": 0.5458, "num_tokens": 477414416.0, "step": 5074 }, { "epoch": 0.8661887694145759, "grad_norm": 0.5262258080362914, "learning_rate": 5.359276327018263e-06, "loss": 0.6974, "num_tokens": 477501545.0, "step": 5075 }, { "epoch": 0.8663594470046083, "grad_norm": 0.47787152751890183, "learning_rate": 5.352449223416966e-06, "loss": 0.5435, "num_tokens": 477592426.0, "step": 5076 }, { "epoch": 0.8665301245946407, "grad_norm": 0.4841429704079997, "learning_rate": 5.345622119815669e-06, "loss": 0.5727, "num_tokens": 477681296.0, "step": 5077 }, { "epoch": 0.8667008021846732, "grad_norm": 0.4659974341802965, "learning_rate": 5.338795016214372e-06, "loss": 0.5534, "num_tokens": 477773415.0, "step": 5078 }, { "epoch": 0.8668714797747056, "grad_norm": 0.4689832520986852, "learning_rate": 5.331967912613074e-06, "loss": 0.5029, "num_tokens": 477849390.0, "step": 5079 }, { "epoch": 0.867042157364738, "grad_norm": 0.4524433640584122, "learning_rate": 5.325140809011777e-06, "loss": 0.609, "num_tokens": 477956312.0, "step": 5080 }, { "epoch": 0.8672128349547704, "grad_norm": 0.4848452837410558, "learning_rate": 5.31831370541048e-06, "loss": 0.5722, "num_tokens": 478040737.0, "step": 5081 }, { "epoch": 0.8673835125448028, "grad_norm": 0.41595700268898184, "learning_rate": 5.311486601809182e-06, "loss": 0.5462, "num_tokens": 478153199.0, "step": 5082 }, { "epoch": 0.8675541901348353, "grad_norm": 0.5067155384647308, "learning_rate": 5.304659498207886e-06, "loss": 0.5704, "num_tokens": 478242470.0, "step": 5083 }, { "epoch": 0.8677248677248677, "grad_norm": 0.4546905676335257, "learning_rate": 5.297832394606589e-06, "loss": 0.579, "num_tokens": 478344915.0, "step": 5084 }, { "epoch": 0.8678955453149002, "grad_norm": 0.5090824820666209, "learning_rate": 5.291005291005291e-06, "loss": 0.6522, "num_tokens": 478433795.0, "step": 5085 }, { "epoch": 0.8680662229049326, "grad_norm": 0.5440080298384428, "learning_rate": 5.284178187403994e-06, "loss": 0.5485, "num_tokens": 478504334.0, "step": 5086 }, { "epoch": 0.868236900494965, "grad_norm": 0.4053630201337281, "learning_rate": 5.277351083802698e-06, "loss": 0.5015, "num_tokens": 478622658.0, "step": 5087 }, { "epoch": 0.8684075780849975, "grad_norm": 0.4836081839721407, "learning_rate": 5.2705239802014e-06, "loss": 0.5713, "num_tokens": 478716846.0, "step": 5088 }, { "epoch": 0.8685782556750299, "grad_norm": 0.41597324395102575, "learning_rate": 5.263696876600103e-06, "loss": 0.5227, "num_tokens": 478832864.0, "step": 5089 }, { "epoch": 0.8687489332650623, "grad_norm": 0.4473354623677014, "learning_rate": 5.2568697729988065e-06, "loss": 0.5659, "num_tokens": 478938100.0, "step": 5090 }, { "epoch": 0.8689196108550947, "grad_norm": 0.4782342401897195, "learning_rate": 5.2500426693975086e-06, "loss": 0.5479, "num_tokens": 479025377.0, "step": 5091 }, { "epoch": 0.8690902884451271, "grad_norm": 0.46976976681840144, "learning_rate": 5.2432155657962115e-06, "loss": 0.5106, "num_tokens": 479110320.0, "step": 5092 }, { "epoch": 0.8692609660351596, "grad_norm": 0.49001353057991087, "learning_rate": 5.236388462194914e-06, "loss": 0.6295, "num_tokens": 479211488.0, "step": 5093 }, { "epoch": 0.869431643625192, "grad_norm": 0.4655898204242139, "learning_rate": 5.2295613585936165e-06, "loss": 0.5328, "num_tokens": 479300188.0, "step": 5094 }, { "epoch": 0.8696023212152244, "grad_norm": 0.42984808881706194, "learning_rate": 5.22273425499232e-06, "loss": 0.5288, "num_tokens": 479408661.0, "step": 5095 }, { "epoch": 0.8697729988052568, "grad_norm": 0.4761595750070644, "learning_rate": 5.215907151391023e-06, "loss": 0.6271, "num_tokens": 479513399.0, "step": 5096 }, { "epoch": 0.8699436763952892, "grad_norm": 0.4610632420837057, "learning_rate": 5.209080047789725e-06, "loss": 0.5423, "num_tokens": 479603315.0, "step": 5097 }, { "epoch": 0.8701143539853218, "grad_norm": 0.4896837422291437, "learning_rate": 5.202252944188428e-06, "loss": 0.5894, "num_tokens": 479691183.0, "step": 5098 }, { "epoch": 0.8702850315753542, "grad_norm": 0.4557469082530974, "learning_rate": 5.195425840587132e-06, "loss": 0.5119, "num_tokens": 479780750.0, "step": 5099 }, { "epoch": 0.8704557091653866, "grad_norm": 0.4465893628210023, "learning_rate": 5.188598736985834e-06, "loss": 0.5317, "num_tokens": 479876952.0, "step": 5100 }, { "epoch": 0.870626386755419, "grad_norm": 0.49317080135429997, "learning_rate": 5.181771633384537e-06, "loss": 0.6075, "num_tokens": 479966622.0, "step": 5101 }, { "epoch": 0.8707970643454515, "grad_norm": 0.4779903693083875, "learning_rate": 5.17494452978324e-06, "loss": 0.5653, "num_tokens": 480054953.0, "step": 5102 }, { "epoch": 0.8709677419354839, "grad_norm": 0.48485145405251073, "learning_rate": 5.168117426181942e-06, "loss": 0.5679, "num_tokens": 480135299.0, "step": 5103 }, { "epoch": 0.8711384195255163, "grad_norm": 0.529942257004737, "learning_rate": 5.161290322580646e-06, "loss": 0.5993, "num_tokens": 480207895.0, "step": 5104 }, { "epoch": 0.8713090971155487, "grad_norm": 0.4775816406390114, "learning_rate": 5.1544632189793485e-06, "loss": 0.5616, "num_tokens": 480301502.0, "step": 5105 }, { "epoch": 0.8714797747055811, "grad_norm": 0.49828595834758116, "learning_rate": 5.147636115378051e-06, "loss": 0.5332, "num_tokens": 480379226.0, "step": 5106 }, { "epoch": 0.8716504522956136, "grad_norm": 0.3850406628514721, "learning_rate": 5.140809011776754e-06, "loss": 0.5372, "num_tokens": 480510506.0, "step": 5107 }, { "epoch": 0.871821129885646, "grad_norm": 0.44972464134094686, "learning_rate": 5.133981908175457e-06, "loss": 0.5495, "num_tokens": 480605338.0, "step": 5108 }, { "epoch": 0.8719918074756784, "grad_norm": 0.4234185519075699, "learning_rate": 5.12715480457416e-06, "loss": 0.5376, "num_tokens": 480714239.0, "step": 5109 }, { "epoch": 0.8721624850657109, "grad_norm": 0.4337246627995907, "learning_rate": 5.120327700972862e-06, "loss": 0.4848, "num_tokens": 480813971.0, "step": 5110 }, { "epoch": 0.8723331626557433, "grad_norm": 0.4454840849786393, "learning_rate": 5.113500597371566e-06, "loss": 0.5908, "num_tokens": 480918255.0, "step": 5111 }, { "epoch": 0.8725038402457758, "grad_norm": 0.4582709757291227, "learning_rate": 5.106673493770269e-06, "loss": 0.5747, "num_tokens": 481018127.0, "step": 5112 }, { "epoch": 0.8726745178358082, "grad_norm": 0.4666246577486718, "learning_rate": 5.099846390168971e-06, "loss": 0.4899, "num_tokens": 481092929.0, "step": 5113 }, { "epoch": 0.8728451954258406, "grad_norm": 0.4822541169014824, "learning_rate": 5.093019286567674e-06, "loss": 0.517, "num_tokens": 481174645.0, "step": 5114 }, { "epoch": 0.873015873015873, "grad_norm": 0.4362059039926803, "learning_rate": 5.086192182966378e-06, "loss": 0.5701, "num_tokens": 481287685.0, "step": 5115 }, { "epoch": 0.8731865506059054, "grad_norm": 0.4509374853165365, "learning_rate": 5.07936507936508e-06, "loss": 0.5237, "num_tokens": 481383744.0, "step": 5116 }, { "epoch": 0.8733572281959379, "grad_norm": 0.423699677582795, "learning_rate": 5.072537975763783e-06, "loss": 0.5521, "num_tokens": 481496424.0, "step": 5117 }, { "epoch": 0.8735279057859703, "grad_norm": 0.4385807266943721, "learning_rate": 5.065710872162486e-06, "loss": 0.6094, "num_tokens": 481616083.0, "step": 5118 }, { "epoch": 0.8736985833760027, "grad_norm": 0.5203189787570264, "learning_rate": 5.058883768561188e-06, "loss": 0.5561, "num_tokens": 481687041.0, "step": 5119 }, { "epoch": 0.8738692609660351, "grad_norm": 0.4306905054019209, "learning_rate": 5.052056664959891e-06, "loss": 0.4996, "num_tokens": 481786062.0, "step": 5120 }, { "epoch": 0.8740399385560675, "grad_norm": 0.466790971398107, "learning_rate": 5.045229561358594e-06, "loss": 0.5019, "num_tokens": 481869285.0, "step": 5121 }, { "epoch": 0.8742106161461001, "grad_norm": 0.49448161390651535, "learning_rate": 5.038402457757296e-06, "loss": 0.5551, "num_tokens": 481948650.0, "step": 5122 }, { "epoch": 0.8743812937361325, "grad_norm": 0.4785616574301245, "learning_rate": 5.031575354155999e-06, "loss": 0.5643, "num_tokens": 482039542.0, "step": 5123 }, { "epoch": 0.8745519713261649, "grad_norm": 0.529021202278291, "learning_rate": 5.024748250554703e-06, "loss": 0.5712, "num_tokens": 482115314.0, "step": 5124 }, { "epoch": 0.8747226489161973, "grad_norm": 0.5367845336472805, "learning_rate": 5.017921146953405e-06, "loss": 0.4983, "num_tokens": 482180853.0, "step": 5125 }, { "epoch": 0.8748933265062298, "grad_norm": 0.44855980522529565, "learning_rate": 5.011094043352108e-06, "loss": 0.6375, "num_tokens": 482294976.0, "step": 5126 }, { "epoch": 0.8750640040962622, "grad_norm": 0.4378289717249907, "learning_rate": 5.004266939750812e-06, "loss": 0.5634, "num_tokens": 482408470.0, "step": 5127 }, { "epoch": 0.8752346816862946, "grad_norm": 0.4323729714032558, "learning_rate": 4.997439836149514e-06, "loss": 0.5855, "num_tokens": 482534070.0, "step": 5128 }, { "epoch": 0.875405359276327, "grad_norm": 0.4894003954658156, "learning_rate": 4.990612732548217e-06, "loss": 0.5797, "num_tokens": 482634835.0, "step": 5129 }, { "epoch": 0.8755760368663594, "grad_norm": 0.4325870037970568, "learning_rate": 4.98378562894692e-06, "loss": 0.5139, "num_tokens": 482735176.0, "step": 5130 }, { "epoch": 0.8757467144563919, "grad_norm": 0.5573914620109769, "learning_rate": 4.976958525345623e-06, "loss": 0.5636, "num_tokens": 482798210.0, "step": 5131 }, { "epoch": 0.8759173920464243, "grad_norm": 0.47348521944671706, "learning_rate": 4.9701314217443256e-06, "loss": 0.5923, "num_tokens": 482888660.0, "step": 5132 }, { "epoch": 0.8760880696364567, "grad_norm": 0.5017965092056168, "learning_rate": 4.9633043181430285e-06, "loss": 0.4953, "num_tokens": 482962970.0, "step": 5133 }, { "epoch": 0.8762587472264891, "grad_norm": 0.4384648598753509, "learning_rate": 4.956477214541731e-06, "loss": 0.5354, "num_tokens": 483068226.0, "step": 5134 }, { "epoch": 0.8764294248165216, "grad_norm": 0.4815551154956585, "learning_rate": 4.9496501109404335e-06, "loss": 0.5286, "num_tokens": 483153683.0, "step": 5135 }, { "epoch": 0.8766001024065541, "grad_norm": 0.4234594214687516, "learning_rate": 4.942823007339137e-06, "loss": 0.5912, "num_tokens": 483270945.0, "step": 5136 }, { "epoch": 0.8767707799965865, "grad_norm": 0.43736742724381233, "learning_rate": 4.935995903737839e-06, "loss": 0.5584, "num_tokens": 483377280.0, "step": 5137 }, { "epoch": 0.8769414575866189, "grad_norm": 0.4915416408522379, "learning_rate": 4.929168800136542e-06, "loss": 0.5449, "num_tokens": 483459122.0, "step": 5138 }, { "epoch": 0.8771121351766513, "grad_norm": 0.4264220197945617, "learning_rate": 4.922341696535245e-06, "loss": 0.644, "num_tokens": 483581112.0, "step": 5139 }, { "epoch": 0.8772828127666837, "grad_norm": 0.4735082888212155, "learning_rate": 4.915514592933948e-06, "loss": 0.537, "num_tokens": 483670473.0, "step": 5140 }, { "epoch": 0.8774534903567162, "grad_norm": 0.4193689557052416, "learning_rate": 4.908687489332651e-06, "loss": 0.5736, "num_tokens": 483800077.0, "step": 5141 }, { "epoch": 0.8776241679467486, "grad_norm": 0.47271826702118397, "learning_rate": 4.901860385731354e-06, "loss": 0.5946, "num_tokens": 483895151.0, "step": 5142 }, { "epoch": 0.877794845536781, "grad_norm": 0.44303890130464607, "learning_rate": 4.895033282130057e-06, "loss": 0.5008, "num_tokens": 483990821.0, "step": 5143 }, { "epoch": 0.8779655231268134, "grad_norm": 0.45467908861190803, "learning_rate": 4.88820617852876e-06, "loss": 0.5219, "num_tokens": 484086411.0, "step": 5144 }, { "epoch": 0.8781362007168458, "grad_norm": 0.45300161039394576, "learning_rate": 4.881379074927463e-06, "loss": 0.5651, "num_tokens": 484184665.0, "step": 5145 }, { "epoch": 0.8783068783068783, "grad_norm": 0.45275418240441667, "learning_rate": 4.8745519713261655e-06, "loss": 0.511, "num_tokens": 484271468.0, "step": 5146 }, { "epoch": 0.8784775558969108, "grad_norm": 0.5327310959133266, "learning_rate": 4.867724867724868e-06, "loss": 0.5993, "num_tokens": 484360028.0, "step": 5147 }, { "epoch": 0.8786482334869432, "grad_norm": 0.4440921193744442, "learning_rate": 4.860897764123571e-06, "loss": 0.5728, "num_tokens": 484462613.0, "step": 5148 }, { "epoch": 0.8788189110769756, "grad_norm": 0.40553349345586953, "learning_rate": 4.8540706605222734e-06, "loss": 0.5347, "num_tokens": 484578994.0, "step": 5149 }, { "epoch": 0.878989588667008, "grad_norm": 0.4556558405238685, "learning_rate": 4.847243556920977e-06, "loss": 0.5547, "num_tokens": 484679278.0, "step": 5150 }, { "epoch": 0.8791602662570405, "grad_norm": 0.4342408885003157, "learning_rate": 4.840416453319679e-06, "loss": 0.5575, "num_tokens": 484784942.0, "step": 5151 }, { "epoch": 0.8793309438470729, "grad_norm": 0.4733049154374729, "learning_rate": 4.833589349718382e-06, "loss": 0.5703, "num_tokens": 484876549.0, "step": 5152 }, { "epoch": 0.8795016214371053, "grad_norm": 0.5667701008029796, "learning_rate": 4.826762246117085e-06, "loss": 0.6091, "num_tokens": 484946813.0, "step": 5153 }, { "epoch": 0.8796722990271377, "grad_norm": 0.4387924270105301, "learning_rate": 4.819935142515788e-06, "loss": 0.5178, "num_tokens": 485043887.0, "step": 5154 }, { "epoch": 0.8798429766171701, "grad_norm": 0.5008457115202295, "learning_rate": 4.813108038914491e-06, "loss": 0.6035, "num_tokens": 485132315.0, "step": 5155 }, { "epoch": 0.8800136542072026, "grad_norm": 0.5167369888378072, "learning_rate": 4.806280935313194e-06, "loss": 0.5344, "num_tokens": 485206463.0, "step": 5156 }, { "epoch": 0.880184331797235, "grad_norm": 0.44150259759058486, "learning_rate": 4.799453831711897e-06, "loss": 0.5676, "num_tokens": 485309343.0, "step": 5157 }, { "epoch": 0.8803550093872674, "grad_norm": 0.42841026847835595, "learning_rate": 4.7926267281106e-06, "loss": 0.5257, "num_tokens": 485406396.0, "step": 5158 }, { "epoch": 0.8805256869772998, "grad_norm": 0.47330134787546013, "learning_rate": 4.785799624509303e-06, "loss": 0.572, "num_tokens": 485496664.0, "step": 5159 }, { "epoch": 0.8806963645673324, "grad_norm": 0.5313116006445662, "learning_rate": 4.7789725209080055e-06, "loss": 0.5853, "num_tokens": 485567533.0, "step": 5160 }, { "epoch": 0.8808670421573648, "grad_norm": 0.45516103078993886, "learning_rate": 4.772145417306708e-06, "loss": 0.5952, "num_tokens": 485664981.0, "step": 5161 }, { "epoch": 0.8810377197473972, "grad_norm": 0.5086206062251374, "learning_rate": 4.765318313705411e-06, "loss": 0.6891, "num_tokens": 485782414.0, "step": 5162 }, { "epoch": 0.8812083973374296, "grad_norm": 0.4312852476813892, "learning_rate": 4.758491210104113e-06, "loss": 0.4988, "num_tokens": 485877776.0, "step": 5163 }, { "epoch": 0.881379074927462, "grad_norm": 0.463616548912973, "learning_rate": 4.751664106502817e-06, "loss": 0.6298, "num_tokens": 485988874.0, "step": 5164 }, { "epoch": 0.8815497525174945, "grad_norm": 0.4594135092142407, "learning_rate": 4.744837002901519e-06, "loss": 0.5573, "num_tokens": 486103505.0, "step": 5165 }, { "epoch": 0.8817204301075269, "grad_norm": 0.41345097389702745, "learning_rate": 4.738009899300222e-06, "loss": 0.55, "num_tokens": 486223014.0, "step": 5166 }, { "epoch": 0.8818911076975593, "grad_norm": 0.46067888544004726, "learning_rate": 4.731182795698925e-06, "loss": 0.5137, "num_tokens": 486311045.0, "step": 5167 }, { "epoch": 0.8820617852875917, "grad_norm": 0.5232465912791857, "learning_rate": 4.724355692097628e-06, "loss": 0.5379, "num_tokens": 486378432.0, "step": 5168 }, { "epoch": 0.8822324628776241, "grad_norm": 0.4439572720818059, "learning_rate": 4.717528588496331e-06, "loss": 0.4917, "num_tokens": 486471168.0, "step": 5169 }, { "epoch": 0.8824031404676566, "grad_norm": 0.42694859689153397, "learning_rate": 4.710701484895034e-06, "loss": 0.5874, "num_tokens": 486592222.0, "step": 5170 }, { "epoch": 0.882573818057689, "grad_norm": 0.5342659576222201, "learning_rate": 4.703874381293737e-06, "loss": 0.6162, "num_tokens": 486679549.0, "step": 5171 }, { "epoch": 0.8827444956477215, "grad_norm": 0.47485522285058845, "learning_rate": 4.697047277692439e-06, "loss": 0.5851, "num_tokens": 486772356.0, "step": 5172 }, { "epoch": 0.8829151732377539, "grad_norm": 0.4671103027886709, "learning_rate": 4.6902201740911426e-06, "loss": 0.6925, "num_tokens": 486883996.0, "step": 5173 }, { "epoch": 0.8830858508277863, "grad_norm": 0.4389894040951316, "learning_rate": 4.683393070489845e-06, "loss": 0.6119, "num_tokens": 486998746.0, "step": 5174 }, { "epoch": 0.8832565284178188, "grad_norm": 0.5266939283833365, "learning_rate": 4.6765659668885476e-06, "loss": 0.6363, "num_tokens": 487077686.0, "step": 5175 }, { "epoch": 0.8834272060078512, "grad_norm": 0.44162934346566235, "learning_rate": 4.669738863287251e-06, "loss": 0.4934, "num_tokens": 487165338.0, "step": 5176 }, { "epoch": 0.8835978835978836, "grad_norm": 0.436950663961292, "learning_rate": 4.662911759685953e-06, "loss": 0.614, "num_tokens": 487276540.0, "step": 5177 }, { "epoch": 0.883768561187916, "grad_norm": 0.46050386891461514, "learning_rate": 4.656084656084656e-06, "loss": 0.5656, "num_tokens": 487380514.0, "step": 5178 }, { "epoch": 0.8839392387779484, "grad_norm": 0.3984561442798695, "learning_rate": 4.649257552483359e-06, "loss": 0.5669, "num_tokens": 487513669.0, "step": 5179 }, { "epoch": 0.8841099163679809, "grad_norm": 0.44380955306344655, "learning_rate": 4.642430448882062e-06, "loss": 0.6426, "num_tokens": 487623413.0, "step": 5180 }, { "epoch": 0.8842805939580133, "grad_norm": 0.44418346628267796, "learning_rate": 4.635603345280765e-06, "loss": 0.5389, "num_tokens": 487724659.0, "step": 5181 }, { "epoch": 0.8844512715480457, "grad_norm": 0.4330570181548016, "learning_rate": 4.628776241679468e-06, "loss": 0.5364, "num_tokens": 487833760.0, "step": 5182 }, { "epoch": 0.8846219491380781, "grad_norm": 0.48686337883250086, "learning_rate": 4.621949138078171e-06, "loss": 0.5541, "num_tokens": 487921471.0, "step": 5183 }, { "epoch": 0.8847926267281107, "grad_norm": 0.516388280299665, "learning_rate": 4.615122034476874e-06, "loss": 0.6371, "num_tokens": 488014177.0, "step": 5184 }, { "epoch": 0.8849633043181431, "grad_norm": 0.4543543527124441, "learning_rate": 4.608294930875577e-06, "loss": 0.5004, "num_tokens": 488098975.0, "step": 5185 }, { "epoch": 0.8851339819081755, "grad_norm": 0.5041917245353443, "learning_rate": 4.601467827274279e-06, "loss": 0.5184, "num_tokens": 488173107.0, "step": 5186 }, { "epoch": 0.8853046594982079, "grad_norm": 0.5377418122641905, "learning_rate": 4.5946407236729825e-06, "loss": 0.5984, "num_tokens": 488257664.0, "step": 5187 }, { "epoch": 0.8854753370882403, "grad_norm": 0.44817041259464097, "learning_rate": 4.587813620071685e-06, "loss": 0.5116, "num_tokens": 488350705.0, "step": 5188 }, { "epoch": 0.8856460146782728, "grad_norm": 0.40237793792168597, "learning_rate": 4.5809865164703875e-06, "loss": 0.5459, "num_tokens": 488478605.0, "step": 5189 }, { "epoch": 0.8858166922683052, "grad_norm": 0.45395920203691287, "learning_rate": 4.5741594128690904e-06, "loss": 0.5686, "num_tokens": 488580503.0, "step": 5190 }, { "epoch": 0.8859873698583376, "grad_norm": 0.4590472144417116, "learning_rate": 4.567332309267793e-06, "loss": 0.5171, "num_tokens": 488670131.0, "step": 5191 }, { "epoch": 0.88615804744837, "grad_norm": 0.43362577036135047, "learning_rate": 4.560505205666496e-06, "loss": 0.5203, "num_tokens": 488767790.0, "step": 5192 }, { "epoch": 0.8863287250384024, "grad_norm": 0.4726936632256063, "learning_rate": 4.553678102065199e-06, "loss": 0.5324, "num_tokens": 488854311.0, "step": 5193 }, { "epoch": 0.8864994026284349, "grad_norm": 0.4892278147377575, "learning_rate": 4.546850998463902e-06, "loss": 0.5697, "num_tokens": 488933585.0, "step": 5194 }, { "epoch": 0.8866700802184673, "grad_norm": 0.4277987958800885, "learning_rate": 4.540023894862605e-06, "loss": 0.5729, "num_tokens": 489057672.0, "step": 5195 }, { "epoch": 0.8868407578084997, "grad_norm": 0.4273362870251915, "learning_rate": 4.533196791261308e-06, "loss": 0.4588, "num_tokens": 489152209.0, "step": 5196 }, { "epoch": 0.8870114353985322, "grad_norm": 0.45181982468692405, "learning_rate": 4.526369687660011e-06, "loss": 0.6, "num_tokens": 489252233.0, "step": 5197 }, { "epoch": 0.8871821129885646, "grad_norm": 0.4507474686863346, "learning_rate": 4.519542584058714e-06, "loss": 0.6551, "num_tokens": 489360832.0, "step": 5198 }, { "epoch": 0.8873527905785971, "grad_norm": 0.5089691539571304, "learning_rate": 4.512715480457417e-06, "loss": 0.5681, "num_tokens": 489436773.0, "step": 5199 }, { "epoch": 0.8875234681686295, "grad_norm": 0.49609657600175555, "learning_rate": 4.505888376856119e-06, "loss": 0.5276, "num_tokens": 489512974.0, "step": 5200 }, { "epoch": 0.8876941457586619, "grad_norm": 0.41609160728281275, "learning_rate": 4.4990612732548225e-06, "loss": 0.5639, "num_tokens": 489630736.0, "step": 5201 }, { "epoch": 0.8878648233486943, "grad_norm": 0.5014240135080654, "learning_rate": 4.492234169653525e-06, "loss": 0.5905, "num_tokens": 489713462.0, "step": 5202 }, { "epoch": 0.8880355009387267, "grad_norm": 0.4406056320818226, "learning_rate": 4.4854070660522275e-06, "loss": 0.4697, "num_tokens": 489801979.0, "step": 5203 }, { "epoch": 0.8882061785287592, "grad_norm": 0.5406345893239002, "learning_rate": 4.47857996245093e-06, "loss": 0.5338, "num_tokens": 489869273.0, "step": 5204 }, { "epoch": 0.8883768561187916, "grad_norm": 0.4499636318960037, "learning_rate": 4.471752858849633e-06, "loss": 0.6095, "num_tokens": 489978105.0, "step": 5205 }, { "epoch": 0.888547533708824, "grad_norm": 0.48130895385973954, "learning_rate": 4.464925755248336e-06, "loss": 0.6109, "num_tokens": 490072731.0, "step": 5206 }, { "epoch": 0.8887182112988564, "grad_norm": 0.4394238456280044, "learning_rate": 4.458098651647039e-06, "loss": 0.5535, "num_tokens": 490174153.0, "step": 5207 }, { "epoch": 0.8888888888888888, "grad_norm": 0.4274347138809668, "learning_rate": 4.451271548045742e-06, "loss": 0.5516, "num_tokens": 490286333.0, "step": 5208 }, { "epoch": 0.8890595664789214, "grad_norm": 0.4913399383469133, "learning_rate": 4.444444444444444e-06, "loss": 0.5714, "num_tokens": 490375160.0, "step": 5209 }, { "epoch": 0.8892302440689538, "grad_norm": 0.5176861346811151, "learning_rate": 4.437617340843148e-06, "loss": 0.7168, "num_tokens": 490468360.0, "step": 5210 }, { "epoch": 0.8894009216589862, "grad_norm": 0.43732589749366185, "learning_rate": 4.430790237241851e-06, "loss": 0.5165, "num_tokens": 490567370.0, "step": 5211 }, { "epoch": 0.8895715992490186, "grad_norm": 0.4305830107383396, "learning_rate": 4.423963133640554e-06, "loss": 0.5019, "num_tokens": 490668013.0, "step": 5212 }, { "epoch": 0.889742276839051, "grad_norm": 0.4787488075004882, "learning_rate": 4.417136030039257e-06, "loss": 0.5993, "num_tokens": 490763992.0, "step": 5213 }, { "epoch": 0.8899129544290835, "grad_norm": 0.4240549573566833, "learning_rate": 4.410308926437959e-06, "loss": 0.5941, "num_tokens": 490878354.0, "step": 5214 }, { "epoch": 0.8900836320191159, "grad_norm": 0.5075261701586948, "learning_rate": 4.4034818228366625e-06, "loss": 0.5355, "num_tokens": 490971534.0, "step": 5215 }, { "epoch": 0.8902543096091483, "grad_norm": 0.47472776360807156, "learning_rate": 4.3966547192353646e-06, "loss": 0.6198, "num_tokens": 491073514.0, "step": 5216 }, { "epoch": 0.8904249871991807, "grad_norm": 0.53203848905701, "learning_rate": 4.3898276156340675e-06, "loss": 0.6302, "num_tokens": 491168440.0, "step": 5217 }, { "epoch": 0.8905956647892131, "grad_norm": 0.46157069897880515, "learning_rate": 4.38300051203277e-06, "loss": 0.5471, "num_tokens": 491263369.0, "step": 5218 }, { "epoch": 0.8907663423792456, "grad_norm": 0.48277258060687583, "learning_rate": 4.376173408431473e-06, "loss": 0.4805, "num_tokens": 491344742.0, "step": 5219 }, { "epoch": 0.890937019969278, "grad_norm": 0.4132220125294861, "learning_rate": 4.369346304830176e-06, "loss": 0.5656, "num_tokens": 491463315.0, "step": 5220 }, { "epoch": 0.8911076975593104, "grad_norm": 0.46498008340596725, "learning_rate": 4.362519201228879e-06, "loss": 0.5776, "num_tokens": 491568578.0, "step": 5221 }, { "epoch": 0.8912783751493429, "grad_norm": 0.4171225388760492, "learning_rate": 4.355692097627582e-06, "loss": 0.5288, "num_tokens": 491679440.0, "step": 5222 }, { "epoch": 0.8914490527393754, "grad_norm": 0.4755065757006446, "learning_rate": 4.348864994026284e-06, "loss": 0.5332, "num_tokens": 491769931.0, "step": 5223 }, { "epoch": 0.8916197303294078, "grad_norm": 0.4464705985392974, "learning_rate": 4.342037890424988e-06, "loss": 0.6027, "num_tokens": 491879616.0, "step": 5224 }, { "epoch": 0.8917904079194402, "grad_norm": 0.487494172405644, "learning_rate": 4.33521078682369e-06, "loss": 0.6147, "num_tokens": 491969467.0, "step": 5225 }, { "epoch": 0.8919610855094726, "grad_norm": 0.44583834722841853, "learning_rate": 4.328383683222393e-06, "loss": 0.5423, "num_tokens": 492070482.0, "step": 5226 }, { "epoch": 0.892131763099505, "grad_norm": 0.4938775498619923, "learning_rate": 4.321556579621096e-06, "loss": 0.6052, "num_tokens": 492152322.0, "step": 5227 }, { "epoch": 0.8923024406895375, "grad_norm": 0.483764849720068, "learning_rate": 4.314729476019799e-06, "loss": 0.4989, "num_tokens": 492241238.0, "step": 5228 }, { "epoch": 0.8924731182795699, "grad_norm": 0.4584380589858693, "learning_rate": 4.3079023724185025e-06, "loss": 0.5825, "num_tokens": 492344316.0, "step": 5229 }, { "epoch": 0.8926437958696023, "grad_norm": 0.46644091808051347, "learning_rate": 4.3010752688172045e-06, "loss": 0.5459, "num_tokens": 492430273.0, "step": 5230 }, { "epoch": 0.8928144734596347, "grad_norm": 0.45893163868217035, "learning_rate": 4.2942481652159075e-06, "loss": 0.6272, "num_tokens": 492540977.0, "step": 5231 }, { "epoch": 0.8929851510496671, "grad_norm": 0.4483047902349285, "learning_rate": 4.28742106161461e-06, "loss": 0.5308, "num_tokens": 492633609.0, "step": 5232 }, { "epoch": 0.8931558286396996, "grad_norm": 0.4673956932626181, "learning_rate": 4.280593958013313e-06, "loss": 0.5553, "num_tokens": 492731714.0, "step": 5233 }, { "epoch": 0.8933265062297321, "grad_norm": 0.43646667008655654, "learning_rate": 4.273766854412016e-06, "loss": 0.591, "num_tokens": 492840586.0, "step": 5234 }, { "epoch": 0.8934971838197645, "grad_norm": 0.45807847454009676, "learning_rate": 4.266939750810719e-06, "loss": 0.4998, "num_tokens": 492927011.0, "step": 5235 }, { "epoch": 0.8936678614097969, "grad_norm": 0.40746145168678793, "learning_rate": 4.260112647209422e-06, "loss": 0.5659, "num_tokens": 493051308.0, "step": 5236 }, { "epoch": 0.8938385389998293, "grad_norm": 0.44833343559637356, "learning_rate": 4.253285543608124e-06, "loss": 0.612, "num_tokens": 493154850.0, "step": 5237 }, { "epoch": 0.8940092165898618, "grad_norm": 0.4813280187860236, "learning_rate": 4.246458440006828e-06, "loss": 0.5435, "num_tokens": 493239322.0, "step": 5238 }, { "epoch": 0.8941798941798942, "grad_norm": 0.4752171813329055, "learning_rate": 4.23963133640553e-06, "loss": 0.4726, "num_tokens": 493314421.0, "step": 5239 }, { "epoch": 0.8943505717699266, "grad_norm": 0.46009344356661014, "learning_rate": 4.232804232804233e-06, "loss": 0.5306, "num_tokens": 493405543.0, "step": 5240 }, { "epoch": 0.894521249359959, "grad_norm": 0.559319226317564, "learning_rate": 4.225977129202936e-06, "loss": 0.6695, "num_tokens": 493501577.0, "step": 5241 }, { "epoch": 0.8946919269499914, "grad_norm": 0.41983807404503964, "learning_rate": 4.219150025601639e-06, "loss": 0.5405, "num_tokens": 493610751.0, "step": 5242 }, { "epoch": 0.8948626045400239, "grad_norm": 0.43899402802083015, "learning_rate": 4.212322922000342e-06, "loss": 0.5682, "num_tokens": 493722913.0, "step": 5243 }, { "epoch": 0.8950332821300563, "grad_norm": 0.40345130892200903, "learning_rate": 4.2054958183990445e-06, "loss": 0.5599, "num_tokens": 493862749.0, "step": 5244 }, { "epoch": 0.8952039597200887, "grad_norm": 0.4421066236081747, "learning_rate": 4.1986687147977474e-06, "loss": 0.5539, "num_tokens": 493965164.0, "step": 5245 }, { "epoch": 0.8953746373101212, "grad_norm": 0.43890805251864784, "learning_rate": 4.19184161119645e-06, "loss": 0.5338, "num_tokens": 494064937.0, "step": 5246 }, { "epoch": 0.8955453149001537, "grad_norm": 0.4373907115916884, "learning_rate": 4.185014507595153e-06, "loss": 0.5507, "num_tokens": 494169202.0, "step": 5247 }, { "epoch": 0.8957159924901861, "grad_norm": 0.5174422958683034, "learning_rate": 4.178187403993856e-06, "loss": 0.665, "num_tokens": 494256878.0, "step": 5248 }, { "epoch": 0.8958866700802185, "grad_norm": 0.4986774954926219, "learning_rate": 4.171360300392559e-06, "loss": 0.5699, "num_tokens": 494352086.0, "step": 5249 }, { "epoch": 0.8960573476702509, "grad_norm": 0.5031466847793998, "learning_rate": 4.164533196791262e-06, "loss": 0.4711, "num_tokens": 494419676.0, "step": 5250 }, { "epoch": 0.8962280252602833, "grad_norm": 0.5197423779542748, "learning_rate": 4.157706093189964e-06, "loss": 0.5968, "num_tokens": 494500896.0, "step": 5251 }, { "epoch": 0.8963987028503158, "grad_norm": 0.42510734723035165, "learning_rate": 4.150878989588668e-06, "loss": 0.5811, "num_tokens": 494615638.0, "step": 5252 }, { "epoch": 0.8965693804403482, "grad_norm": 0.46290936544695704, "learning_rate": 4.14405188598737e-06, "loss": 0.5755, "num_tokens": 494710824.0, "step": 5253 }, { "epoch": 0.8967400580303806, "grad_norm": 0.4911672261363778, "learning_rate": 4.137224782386073e-06, "loss": 0.5892, "num_tokens": 494798321.0, "step": 5254 }, { "epoch": 0.896910735620413, "grad_norm": 0.4254330966404156, "learning_rate": 4.130397678784776e-06, "loss": 0.5955, "num_tokens": 494913764.0, "step": 5255 }, { "epoch": 0.8970814132104454, "grad_norm": 0.6267557706404799, "learning_rate": 4.123570575183479e-06, "loss": 0.5538, "num_tokens": 494991612.0, "step": 5256 }, { "epoch": 0.8972520908004779, "grad_norm": 0.44241023701384286, "learning_rate": 4.1167434715821816e-06, "loss": 0.5173, "num_tokens": 495100635.0, "step": 5257 }, { "epoch": 0.8974227683905103, "grad_norm": 0.4520685654065106, "learning_rate": 4.1099163679808845e-06, "loss": 0.6038, "num_tokens": 495208779.0, "step": 5258 }, { "epoch": 0.8975934459805428, "grad_norm": 0.45945278134961287, "learning_rate": 4.103089264379587e-06, "loss": 0.5905, "num_tokens": 495304844.0, "step": 5259 }, { "epoch": 0.8977641235705752, "grad_norm": 0.5354509501914485, "learning_rate": 4.09626216077829e-06, "loss": 0.627, "num_tokens": 495392226.0, "step": 5260 }, { "epoch": 0.8979348011606076, "grad_norm": 0.4032340317534278, "learning_rate": 4.089435057176993e-06, "loss": 0.5156, "num_tokens": 495512637.0, "step": 5261 }, { "epoch": 0.8981054787506401, "grad_norm": 0.44771728717162596, "learning_rate": 4.082607953575696e-06, "loss": 0.6031, "num_tokens": 495619513.0, "step": 5262 }, { "epoch": 0.8982761563406725, "grad_norm": 0.4150649049796801, "learning_rate": 4.075780849974399e-06, "loss": 0.5512, "num_tokens": 495733717.0, "step": 5263 }, { "epoch": 0.8984468339307049, "grad_norm": 0.41658836781015235, "learning_rate": 4.068953746373102e-06, "loss": 0.4952, "num_tokens": 495840630.0, "step": 5264 }, { "epoch": 0.8986175115207373, "grad_norm": 0.45164505378619707, "learning_rate": 4.062126642771804e-06, "loss": 0.5941, "num_tokens": 495943098.0, "step": 5265 }, { "epoch": 0.8987881891107697, "grad_norm": 0.49526836468910035, "learning_rate": 4.055299539170508e-06, "loss": 0.6003, "num_tokens": 496022412.0, "step": 5266 }, { "epoch": 0.8989588667008022, "grad_norm": 0.48833819366985004, "learning_rate": 4.04847243556921e-06, "loss": 0.5905, "num_tokens": 496108632.0, "step": 5267 }, { "epoch": 0.8991295442908346, "grad_norm": 0.4843890520790355, "learning_rate": 4.041645331967913e-06, "loss": 0.5489, "num_tokens": 496190875.0, "step": 5268 }, { "epoch": 0.899300221880867, "grad_norm": 0.5262138519860232, "learning_rate": 4.034818228366616e-06, "loss": 0.5388, "num_tokens": 496260235.0, "step": 5269 }, { "epoch": 0.8994708994708994, "grad_norm": 0.4672629422057678, "learning_rate": 4.027991124765319e-06, "loss": 0.5886, "num_tokens": 496364399.0, "step": 5270 }, { "epoch": 0.899641577060932, "grad_norm": 0.4330535279412933, "learning_rate": 4.0211640211640215e-06, "loss": 0.5384, "num_tokens": 496462839.0, "step": 5271 }, { "epoch": 0.8998122546509644, "grad_norm": 0.4865475903140146, "learning_rate": 4.0143369175627245e-06, "loss": 0.5799, "num_tokens": 496562017.0, "step": 5272 }, { "epoch": 0.8999829322409968, "grad_norm": 0.5067583980027336, "learning_rate": 4.007509813961427e-06, "loss": 0.5004, "num_tokens": 496630754.0, "step": 5273 }, { "epoch": 0.9001536098310292, "grad_norm": 0.4209393556852556, "learning_rate": 4.0006827103601294e-06, "loss": 0.5506, "num_tokens": 496743497.0, "step": 5274 }, { "epoch": 0.9003242874210616, "grad_norm": 0.4730226998249523, "learning_rate": 3.993855606758833e-06, "loss": 0.5591, "num_tokens": 496832662.0, "step": 5275 }, { "epoch": 0.900494965011094, "grad_norm": 0.46340366833775526, "learning_rate": 3.987028503157535e-06, "loss": 0.5754, "num_tokens": 496928032.0, "step": 5276 }, { "epoch": 0.9006656426011265, "grad_norm": 0.49030775290988543, "learning_rate": 3.980201399556239e-06, "loss": 0.5547, "num_tokens": 497006267.0, "step": 5277 }, { "epoch": 0.9008363201911589, "grad_norm": 0.4598990434856833, "learning_rate": 3.973374295954941e-06, "loss": 0.5292, "num_tokens": 497088961.0, "step": 5278 }, { "epoch": 0.9010069977811913, "grad_norm": 0.4200127412228858, "learning_rate": 3.966547192353644e-06, "loss": 0.5692, "num_tokens": 497209785.0, "step": 5279 }, { "epoch": 0.9011776753712237, "grad_norm": 0.4733149788918243, "learning_rate": 3.959720088752348e-06, "loss": 0.5311, "num_tokens": 497305190.0, "step": 5280 }, { "epoch": 0.9013483529612561, "grad_norm": 0.511258204862203, "learning_rate": 3.95289298515105e-06, "loss": 0.6011, "num_tokens": 497391844.0, "step": 5281 }, { "epoch": 0.9015190305512886, "grad_norm": 0.4986935448845693, "learning_rate": 3.946065881549753e-06, "loss": 0.517, "num_tokens": 497465332.0, "step": 5282 }, { "epoch": 0.901689708141321, "grad_norm": 0.4221048728982547, "learning_rate": 3.939238777948456e-06, "loss": 0.5561, "num_tokens": 497579889.0, "step": 5283 }, { "epoch": 0.9018603857313535, "grad_norm": 0.42456218401772416, "learning_rate": 3.932411674347159e-06, "loss": 0.4864, "num_tokens": 497685220.0, "step": 5284 }, { "epoch": 0.9020310633213859, "grad_norm": 0.41559796086174405, "learning_rate": 3.9255845707458615e-06, "loss": 0.6378, "num_tokens": 497817766.0, "step": 5285 }, { "epoch": 0.9022017409114184, "grad_norm": 0.45055082196088553, "learning_rate": 3.9187574671445644e-06, "loss": 0.6185, "num_tokens": 497922914.0, "step": 5286 }, { "epoch": 0.9023724185014508, "grad_norm": 0.44013370605188984, "learning_rate": 3.911930363543267e-06, "loss": 0.5069, "num_tokens": 498023618.0, "step": 5287 }, { "epoch": 0.9025430960914832, "grad_norm": 0.42297426614304845, "learning_rate": 3.905103259941969e-06, "loss": 0.4891, "num_tokens": 498125697.0, "step": 5288 }, { "epoch": 0.9027137736815156, "grad_norm": 0.4895850031492476, "learning_rate": 3.898276156340673e-06, "loss": 0.5902, "num_tokens": 498218138.0, "step": 5289 }, { "epoch": 0.902884451271548, "grad_norm": 0.4748635414974263, "learning_rate": 3.891449052739375e-06, "loss": 0.5877, "num_tokens": 498307557.0, "step": 5290 }, { "epoch": 0.9030551288615805, "grad_norm": 0.47349889347471913, "learning_rate": 3.884621949138078e-06, "loss": 0.5867, "num_tokens": 498406404.0, "step": 5291 }, { "epoch": 0.9032258064516129, "grad_norm": 0.4947965654885615, "learning_rate": 3.877794845536781e-06, "loss": 0.6185, "num_tokens": 498494104.0, "step": 5292 }, { "epoch": 0.9033964840416453, "grad_norm": 0.5195272577243116, "learning_rate": 3.870967741935484e-06, "loss": 0.5555, "num_tokens": 498569990.0, "step": 5293 }, { "epoch": 0.9035671616316777, "grad_norm": 0.5879878941000674, "learning_rate": 3.864140638334187e-06, "loss": 0.5428, "num_tokens": 498672054.0, "step": 5294 }, { "epoch": 0.9037378392217101, "grad_norm": 0.4678973775788344, "learning_rate": 3.85731353473289e-06, "loss": 0.5706, "num_tokens": 498762496.0, "step": 5295 }, { "epoch": 0.9039085168117427, "grad_norm": 0.4541718023087249, "learning_rate": 3.850486431131593e-06, "loss": 0.5356, "num_tokens": 498852834.0, "step": 5296 }, { "epoch": 0.9040791944017751, "grad_norm": 0.44342211838314227, "learning_rate": 3.843659327530296e-06, "loss": 0.5587, "num_tokens": 498955265.0, "step": 5297 }, { "epoch": 0.9042498719918075, "grad_norm": 0.42282129227179205, "learning_rate": 3.8368322239289986e-06, "loss": 0.5154, "num_tokens": 499059743.0, "step": 5298 }, { "epoch": 0.9044205495818399, "grad_norm": 0.4669888274350278, "learning_rate": 3.8300051203277015e-06, "loss": 0.5856, "num_tokens": 499157690.0, "step": 5299 }, { "epoch": 0.9045912271718723, "grad_norm": 0.4408748130655521, "learning_rate": 3.823178016726404e-06, "loss": 0.513, "num_tokens": 499251744.0, "step": 5300 }, { "epoch": 0.9047619047619048, "grad_norm": 0.47752986786073437, "learning_rate": 3.816350913125107e-06, "loss": 0.6228, "num_tokens": 499344975.0, "step": 5301 }, { "epoch": 0.9049325823519372, "grad_norm": 0.4966600871599882, "learning_rate": 3.80952380952381e-06, "loss": 0.5536, "num_tokens": 499435579.0, "step": 5302 }, { "epoch": 0.9051032599419696, "grad_norm": 0.5158845682359557, "learning_rate": 3.8026967059225127e-06, "loss": 0.6439, "num_tokens": 499531909.0, "step": 5303 }, { "epoch": 0.905273937532002, "grad_norm": 0.49805457114656576, "learning_rate": 3.7958696023212156e-06, "loss": 0.5132, "num_tokens": 499605042.0, "step": 5304 }, { "epoch": 0.9054446151220344, "grad_norm": 0.4499461462579217, "learning_rate": 3.789042498719918e-06, "loss": 0.5871, "num_tokens": 499709150.0, "step": 5305 }, { "epoch": 0.9056152927120669, "grad_norm": 0.439065994718018, "learning_rate": 3.7822153951186215e-06, "loss": 0.5882, "num_tokens": 499815893.0, "step": 5306 }, { "epoch": 0.9057859703020993, "grad_norm": 0.4822563399315101, "learning_rate": 3.775388291517324e-06, "loss": 0.5449, "num_tokens": 499901992.0, "step": 5307 }, { "epoch": 0.9059566478921318, "grad_norm": 0.4422057863700933, "learning_rate": 3.7685611879160273e-06, "loss": 0.5453, "num_tokens": 500001443.0, "step": 5308 }, { "epoch": 0.9061273254821642, "grad_norm": 0.43104621541545146, "learning_rate": 3.76173408431473e-06, "loss": 0.5065, "num_tokens": 500099143.0, "step": 5309 }, { "epoch": 0.9062980030721967, "grad_norm": 0.4492082808191896, "learning_rate": 3.7549069807134327e-06, "loss": 0.5391, "num_tokens": 500193172.0, "step": 5310 }, { "epoch": 0.9064686806622291, "grad_norm": 0.5138298907923574, "learning_rate": 3.7480798771121356e-06, "loss": 0.608, "num_tokens": 500271856.0, "step": 5311 }, { "epoch": 0.9066393582522615, "grad_norm": 0.5003975025638752, "learning_rate": 3.7412527735108386e-06, "loss": 0.5244, "num_tokens": 500349395.0, "step": 5312 }, { "epoch": 0.9068100358422939, "grad_norm": 0.4533467488639566, "learning_rate": 3.734425669909541e-06, "loss": 0.5453, "num_tokens": 500444188.0, "step": 5313 }, { "epoch": 0.9069807134323263, "grad_norm": 0.45990949927691965, "learning_rate": 3.7275985663082444e-06, "loss": 0.6021, "num_tokens": 500544490.0, "step": 5314 }, { "epoch": 0.9071513910223588, "grad_norm": 0.46098139830768714, "learning_rate": 3.720771462706947e-06, "loss": 0.5652, "num_tokens": 500637770.0, "step": 5315 }, { "epoch": 0.9073220686123912, "grad_norm": 0.5210042113732044, "learning_rate": 3.7139443591056494e-06, "loss": 0.4725, "num_tokens": 500697719.0, "step": 5316 }, { "epoch": 0.9074927462024236, "grad_norm": 0.44512613880827945, "learning_rate": 3.7071172555043527e-06, "loss": 0.5612, "num_tokens": 500797049.0, "step": 5317 }, { "epoch": 0.907663423792456, "grad_norm": 0.5158750782437626, "learning_rate": 3.700290151903055e-06, "loss": 0.6351, "num_tokens": 500885808.0, "step": 5318 }, { "epoch": 0.9078341013824884, "grad_norm": 0.4267600511761505, "learning_rate": 3.693463048301758e-06, "loss": 0.5221, "num_tokens": 500988810.0, "step": 5319 }, { "epoch": 0.9080047789725209, "grad_norm": 0.5552799697604747, "learning_rate": 3.6866359447004615e-06, "loss": 0.5437, "num_tokens": 501054264.0, "step": 5320 }, { "epoch": 0.9081754565625534, "grad_norm": 0.46002129527354063, "learning_rate": 3.679808841099164e-06, "loss": 0.5802, "num_tokens": 501159235.0, "step": 5321 }, { "epoch": 0.9083461341525858, "grad_norm": 0.4537611988055724, "learning_rate": 3.6729817374978664e-06, "loss": 0.515, "num_tokens": 501255005.0, "step": 5322 }, { "epoch": 0.9085168117426182, "grad_norm": 0.5042673160811617, "learning_rate": 3.6661546338965698e-06, "loss": 0.475, "num_tokens": 501318543.0, "step": 5323 }, { "epoch": 0.9086874893326506, "grad_norm": 0.5046256632126798, "learning_rate": 3.6593275302952723e-06, "loss": 0.5746, "num_tokens": 501399760.0, "step": 5324 }, { "epoch": 0.9088581669226831, "grad_norm": 0.49985995076857975, "learning_rate": 3.6525004266939756e-06, "loss": 0.6416, "num_tokens": 501499039.0, "step": 5325 }, { "epoch": 0.9090288445127155, "grad_norm": 0.5066076843312194, "learning_rate": 3.645673323092678e-06, "loss": 0.6208, "num_tokens": 501597290.0, "step": 5326 }, { "epoch": 0.9091995221027479, "grad_norm": 0.5612005584788595, "learning_rate": 3.638846219491381e-06, "loss": 0.5211, "num_tokens": 501665830.0, "step": 5327 }, { "epoch": 0.9093701996927803, "grad_norm": 0.48459371276387153, "learning_rate": 3.6320191158900844e-06, "loss": 0.6183, "num_tokens": 501758843.0, "step": 5328 }, { "epoch": 0.9095408772828127, "grad_norm": 0.4354684606710131, "learning_rate": 3.625192012288787e-06, "loss": 0.6265, "num_tokens": 501889294.0, "step": 5329 }, { "epoch": 0.9097115548728452, "grad_norm": 0.4532610157075881, "learning_rate": 3.6183649086874893e-06, "loss": 0.5604, "num_tokens": 501991920.0, "step": 5330 }, { "epoch": 0.9098822324628776, "grad_norm": 0.44036428186362525, "learning_rate": 3.6115378050861927e-06, "loss": 0.5367, "num_tokens": 502092757.0, "step": 5331 }, { "epoch": 0.91005291005291, "grad_norm": 0.44784260507666557, "learning_rate": 3.604710701484895e-06, "loss": 0.5584, "num_tokens": 502189872.0, "step": 5332 }, { "epoch": 0.9102235876429425, "grad_norm": 0.5098483310552882, "learning_rate": 3.597883597883598e-06, "loss": 0.5433, "num_tokens": 502268903.0, "step": 5333 }, { "epoch": 0.910394265232975, "grad_norm": 0.5279586718227585, "learning_rate": 3.591056494282301e-06, "loss": 0.6459, "num_tokens": 502346243.0, "step": 5334 }, { "epoch": 0.9105649428230074, "grad_norm": 0.467028359145154, "learning_rate": 3.584229390681004e-06, "loss": 0.533, "num_tokens": 502450048.0, "step": 5335 }, { "epoch": 0.9107356204130398, "grad_norm": 0.4759699921064004, "learning_rate": 3.5774022870797064e-06, "loss": 0.5314, "num_tokens": 502547782.0, "step": 5336 }, { "epoch": 0.9109062980030722, "grad_norm": 0.47551355070149093, "learning_rate": 3.5705751834784098e-06, "loss": 0.5232, "num_tokens": 502636343.0, "step": 5337 }, { "epoch": 0.9110769755931046, "grad_norm": 0.46191322105220445, "learning_rate": 3.5637480798771122e-06, "loss": 0.5805, "num_tokens": 502736017.0, "step": 5338 }, { "epoch": 0.911247653183137, "grad_norm": 0.4034513223538862, "learning_rate": 3.556920976275815e-06, "loss": 0.5138, "num_tokens": 502858152.0, "step": 5339 }, { "epoch": 0.9114183307731695, "grad_norm": 0.48779656399502175, "learning_rate": 3.550093872674518e-06, "loss": 0.6483, "num_tokens": 502960297.0, "step": 5340 }, { "epoch": 0.9115890083632019, "grad_norm": 0.5044727072421453, "learning_rate": 3.543266769073221e-06, "loss": 0.4858, "num_tokens": 503027500.0, "step": 5341 }, { "epoch": 0.9117596859532343, "grad_norm": 0.44210841959825287, "learning_rate": 3.536439665471924e-06, "loss": 0.5938, "num_tokens": 503141835.0, "step": 5342 }, { "epoch": 0.9119303635432667, "grad_norm": 0.4647582847544203, "learning_rate": 3.529612561870627e-06, "loss": 0.5936, "num_tokens": 503240687.0, "step": 5343 }, { "epoch": 0.9121010411332992, "grad_norm": 0.4887299124223356, "learning_rate": 3.5227854582693293e-06, "loss": 0.5657, "num_tokens": 503325106.0, "step": 5344 }, { "epoch": 0.9122717187233317, "grad_norm": 0.4692336486779531, "learning_rate": 3.5159583546680327e-06, "loss": 0.4883, "num_tokens": 503406563.0, "step": 5345 }, { "epoch": 0.9124423963133641, "grad_norm": 0.4344388918316503, "learning_rate": 3.509131251066735e-06, "loss": 0.5447, "num_tokens": 503517910.0, "step": 5346 }, { "epoch": 0.9126130739033965, "grad_norm": 0.5001312674113143, "learning_rate": 3.502304147465438e-06, "loss": 0.5722, "num_tokens": 503599299.0, "step": 5347 }, { "epoch": 0.9127837514934289, "grad_norm": 0.5042623242992517, "learning_rate": 3.495477043864141e-06, "loss": 0.5053, "num_tokens": 503677789.0, "step": 5348 }, { "epoch": 0.9129544290834614, "grad_norm": 0.44027735843265986, "learning_rate": 3.488649940262844e-06, "loss": 0.6658, "num_tokens": 503794962.0, "step": 5349 }, { "epoch": 0.9131251066734938, "grad_norm": 0.42622970059420473, "learning_rate": 3.4818228366615464e-06, "loss": 0.6041, "num_tokens": 503914808.0, "step": 5350 }, { "epoch": 0.9132957842635262, "grad_norm": 0.45234575808231536, "learning_rate": 3.4749957330602497e-06, "loss": 0.5556, "num_tokens": 504019217.0, "step": 5351 }, { "epoch": 0.9134664618535586, "grad_norm": 0.5843350939887356, "learning_rate": 3.4681686294589522e-06, "loss": 0.5486, "num_tokens": 504102075.0, "step": 5352 }, { "epoch": 0.913637139443591, "grad_norm": 0.452333424504742, "learning_rate": 3.461341525857655e-06, "loss": 0.5682, "num_tokens": 504209305.0, "step": 5353 }, { "epoch": 0.9138078170336235, "grad_norm": 0.5201904006886146, "learning_rate": 3.454514422256358e-06, "loss": 0.615, "num_tokens": 504290293.0, "step": 5354 }, { "epoch": 0.9139784946236559, "grad_norm": 0.4671772810853668, "learning_rate": 3.447687318655061e-06, "loss": 0.644, "num_tokens": 504404746.0, "step": 5355 }, { "epoch": 0.9141491722136883, "grad_norm": 0.48217747051886156, "learning_rate": 3.440860215053764e-06, "loss": 0.6058, "num_tokens": 504495659.0, "step": 5356 }, { "epoch": 0.9143198498037207, "grad_norm": 0.4813663513243222, "learning_rate": 3.434033111452467e-06, "loss": 0.5417, "num_tokens": 504583780.0, "step": 5357 }, { "epoch": 0.9144905273937532, "grad_norm": 0.5065771384967627, "learning_rate": 3.4272060078511693e-06, "loss": 0.5182, "num_tokens": 504654568.0, "step": 5358 }, { "epoch": 0.9146612049837857, "grad_norm": 0.47327112357492124, "learning_rate": 3.4203789042498726e-06, "loss": 0.6349, "num_tokens": 504757506.0, "step": 5359 }, { "epoch": 0.9148318825738181, "grad_norm": 0.4721451244647943, "learning_rate": 3.413551800648575e-06, "loss": 0.6801, "num_tokens": 504869566.0, "step": 5360 }, { "epoch": 0.9150025601638505, "grad_norm": 0.4269208440250479, "learning_rate": 3.4067246970472776e-06, "loss": 0.4944, "num_tokens": 504965126.0, "step": 5361 }, { "epoch": 0.9151732377538829, "grad_norm": 0.5307606704310299, "learning_rate": 3.399897593445981e-06, "loss": 0.5975, "num_tokens": 505044772.0, "step": 5362 }, { "epoch": 0.9153439153439153, "grad_norm": 0.5120139516645136, "learning_rate": 3.393070489844684e-06, "loss": 0.5743, "num_tokens": 505122677.0, "step": 5363 }, { "epoch": 0.9155145929339478, "grad_norm": 0.4836326939491452, "learning_rate": 3.3862433862433864e-06, "loss": 0.5085, "num_tokens": 505205454.0, "step": 5364 }, { "epoch": 0.9156852705239802, "grad_norm": 0.4628408185250484, "learning_rate": 3.3794162826420897e-06, "loss": 0.5299, "num_tokens": 505297419.0, "step": 5365 }, { "epoch": 0.9158559481140126, "grad_norm": 0.4615744884596534, "learning_rate": 3.372589179040792e-06, "loss": 0.5992, "num_tokens": 505391757.0, "step": 5366 }, { "epoch": 0.916026625704045, "grad_norm": 0.44355401910644043, "learning_rate": 3.3657620754394947e-06, "loss": 0.5813, "num_tokens": 505506295.0, "step": 5367 }, { "epoch": 0.9161973032940774, "grad_norm": 0.508227686957493, "learning_rate": 3.358934971838198e-06, "loss": 0.5755, "num_tokens": 505587306.0, "step": 5368 }, { "epoch": 0.9163679808841099, "grad_norm": 0.44736300493398373, "learning_rate": 3.3521078682369005e-06, "loss": 0.4977, "num_tokens": 505678373.0, "step": 5369 }, { "epoch": 0.9165386584741424, "grad_norm": 0.4694351177040667, "learning_rate": 3.3452807646356034e-06, "loss": 0.5826, "num_tokens": 505774890.0, "step": 5370 }, { "epoch": 0.9167093360641748, "grad_norm": 0.6663876269586804, "learning_rate": 3.3384536610343068e-06, "loss": 0.4857, "num_tokens": 505841467.0, "step": 5371 }, { "epoch": 0.9168800136542072, "grad_norm": 0.4545208309977447, "learning_rate": 3.3316265574330093e-06, "loss": 0.5744, "num_tokens": 505935793.0, "step": 5372 }, { "epoch": 0.9170506912442397, "grad_norm": 0.43577996536259334, "learning_rate": 3.3247994538317126e-06, "loss": 0.526, "num_tokens": 506044197.0, "step": 5373 }, { "epoch": 0.9172213688342721, "grad_norm": 0.4517687197397171, "learning_rate": 3.317972350230415e-06, "loss": 0.5096, "num_tokens": 506126640.0, "step": 5374 }, { "epoch": 0.9173920464243045, "grad_norm": 0.4434609696709867, "learning_rate": 3.3111452466291176e-06, "loss": 0.5472, "num_tokens": 506230854.0, "step": 5375 }, { "epoch": 0.9175627240143369, "grad_norm": 0.4095116442825416, "learning_rate": 3.304318143027821e-06, "loss": 0.4664, "num_tokens": 506329798.0, "step": 5376 }, { "epoch": 0.9177334016043693, "grad_norm": 0.44844797047807383, "learning_rate": 3.2974910394265234e-06, "loss": 0.5337, "num_tokens": 506422757.0, "step": 5377 }, { "epoch": 0.9179040791944018, "grad_norm": 0.4606612162017382, "learning_rate": 3.2906639358252263e-06, "loss": 0.5425, "num_tokens": 506513278.0, "step": 5378 }, { "epoch": 0.9180747567844342, "grad_norm": 0.4663436776679643, "learning_rate": 3.2838368322239293e-06, "loss": 0.6006, "num_tokens": 506611363.0, "step": 5379 }, { "epoch": 0.9182454343744666, "grad_norm": 0.47923633415278416, "learning_rate": 3.277009728622632e-06, "loss": 0.5879, "num_tokens": 506696974.0, "step": 5380 }, { "epoch": 0.918416111964499, "grad_norm": 0.4468500000436874, "learning_rate": 3.2701826250213347e-06, "loss": 0.4459, "num_tokens": 506777407.0, "step": 5381 }, { "epoch": 0.9185867895545314, "grad_norm": 0.4308993336112712, "learning_rate": 3.263355521420038e-06, "loss": 0.5332, "num_tokens": 506885758.0, "step": 5382 }, { "epoch": 0.918757467144564, "grad_norm": 0.5172131972987359, "learning_rate": 3.2565284178187405e-06, "loss": 0.5173, "num_tokens": 506973308.0, "step": 5383 }, { "epoch": 0.9189281447345964, "grad_norm": 0.45274002543110164, "learning_rate": 3.2497013142174434e-06, "loss": 0.6092, "num_tokens": 507082105.0, "step": 5384 }, { "epoch": 0.9190988223246288, "grad_norm": 0.4607219849849767, "learning_rate": 3.2428742106161463e-06, "loss": 0.5345, "num_tokens": 507173133.0, "step": 5385 }, { "epoch": 0.9192694999146612, "grad_norm": 0.47702540063480336, "learning_rate": 3.2360471070148492e-06, "loss": 0.6318, "num_tokens": 507274714.0, "step": 5386 }, { "epoch": 0.9194401775046936, "grad_norm": 0.46291282299786557, "learning_rate": 3.2292200034135517e-06, "loss": 0.532, "num_tokens": 507357552.0, "step": 5387 }, { "epoch": 0.9196108550947261, "grad_norm": 0.510328737852426, "learning_rate": 3.222392899812255e-06, "loss": 0.559, "num_tokens": 507432380.0, "step": 5388 }, { "epoch": 0.9197815326847585, "grad_norm": 0.43221827463855134, "learning_rate": 3.2155657962109576e-06, "loss": 0.4916, "num_tokens": 507524964.0, "step": 5389 }, { "epoch": 0.9199522102747909, "grad_norm": 0.49071839606685885, "learning_rate": 3.208738692609661e-06, "loss": 0.5689, "num_tokens": 507609896.0, "step": 5390 }, { "epoch": 0.9201228878648233, "grad_norm": 0.4529707337077909, "learning_rate": 3.2019115890083634e-06, "loss": 0.5667, "num_tokens": 507704008.0, "step": 5391 }, { "epoch": 0.9202935654548557, "grad_norm": 0.4705618945398495, "learning_rate": 3.1950844854070663e-06, "loss": 0.5283, "num_tokens": 507786472.0, "step": 5392 }, { "epoch": 0.9204642430448882, "grad_norm": 0.4368348951514721, "learning_rate": 3.1882573818057692e-06, "loss": 0.6467, "num_tokens": 507906666.0, "step": 5393 }, { "epoch": 0.9206349206349206, "grad_norm": 0.4856911365651797, "learning_rate": 3.181430278204472e-06, "loss": 0.5725, "num_tokens": 507995357.0, "step": 5394 }, { "epoch": 0.9208055982249531, "grad_norm": 0.480623501903059, "learning_rate": 3.1746031746031746e-06, "loss": 0.5847, "num_tokens": 508081024.0, "step": 5395 }, { "epoch": 0.9209762758149855, "grad_norm": 0.4330889110878446, "learning_rate": 3.167776071001878e-06, "loss": 0.5984, "num_tokens": 508204548.0, "step": 5396 }, { "epoch": 0.921146953405018, "grad_norm": 0.5496395877686541, "learning_rate": 3.1609489674005805e-06, "loss": 0.6269, "num_tokens": 508282575.0, "step": 5397 }, { "epoch": 0.9213176309950504, "grad_norm": 0.5070305830374331, "learning_rate": 3.1541218637992834e-06, "loss": 0.572, "num_tokens": 508368310.0, "step": 5398 }, { "epoch": 0.9214883085850828, "grad_norm": 0.4802923825241698, "learning_rate": 3.1472947601979863e-06, "loss": 0.6821, "num_tokens": 508477610.0, "step": 5399 }, { "epoch": 0.9216589861751152, "grad_norm": 0.49262242031505504, "learning_rate": 3.1404676565966892e-06, "loss": 0.599, "num_tokens": 508560304.0, "step": 5400 }, { "epoch": 0.9218296637651476, "grad_norm": 0.441284349955824, "learning_rate": 3.1336405529953917e-06, "loss": 0.5156, "num_tokens": 508651135.0, "step": 5401 }, { "epoch": 0.92200034135518, "grad_norm": 0.46539202495827175, "learning_rate": 3.126813449394095e-06, "loss": 0.4922, "num_tokens": 508731694.0, "step": 5402 }, { "epoch": 0.9221710189452125, "grad_norm": 0.47408563692211886, "learning_rate": 3.1199863457927975e-06, "loss": 0.5506, "num_tokens": 508816599.0, "step": 5403 }, { "epoch": 0.9223416965352449, "grad_norm": 0.4793262627800329, "learning_rate": 3.113159242191501e-06, "loss": 0.5778, "num_tokens": 508902634.0, "step": 5404 }, { "epoch": 0.9225123741252773, "grad_norm": 0.47045120456264994, "learning_rate": 3.1063321385902034e-06, "loss": 0.5614, "num_tokens": 508993785.0, "step": 5405 }, { "epoch": 0.9226830517153097, "grad_norm": 0.4143208201064686, "learning_rate": 3.0995050349889063e-06, "loss": 0.5811, "num_tokens": 509126174.0, "step": 5406 }, { "epoch": 0.9228537293053423, "grad_norm": 0.45648810354693653, "learning_rate": 3.092677931387609e-06, "loss": 0.5624, "num_tokens": 509222562.0, "step": 5407 }, { "epoch": 0.9230244068953747, "grad_norm": 0.49760873402381495, "learning_rate": 3.085850827786312e-06, "loss": 0.5931, "num_tokens": 509304314.0, "step": 5408 }, { "epoch": 0.9231950844854071, "grad_norm": 0.44195396307117074, "learning_rate": 3.0790237241850146e-06, "loss": 0.626, "num_tokens": 509416256.0, "step": 5409 }, { "epoch": 0.9233657620754395, "grad_norm": 0.5010736235552447, "learning_rate": 3.072196620583718e-06, "loss": 0.6003, "num_tokens": 509497625.0, "step": 5410 }, { "epoch": 0.9235364396654719, "grad_norm": 0.43433897628966134, "learning_rate": 3.0653695169824204e-06, "loss": 0.5707, "num_tokens": 509608525.0, "step": 5411 }, { "epoch": 0.9237071172555044, "grad_norm": 0.4793365227798253, "learning_rate": 3.058542413381123e-06, "loss": 0.5655, "num_tokens": 509694932.0, "step": 5412 }, { "epoch": 0.9238777948455368, "grad_norm": 0.5143717558000056, "learning_rate": 3.0517153097798263e-06, "loss": 0.5763, "num_tokens": 509769200.0, "step": 5413 }, { "epoch": 0.9240484724355692, "grad_norm": 0.5395618516712574, "learning_rate": 3.044888206178529e-06, "loss": 0.7504, "num_tokens": 509858776.0, "step": 5414 }, { "epoch": 0.9242191500256016, "grad_norm": 0.46635891285169057, "learning_rate": 3.0380611025772317e-06, "loss": 0.5125, "num_tokens": 509943949.0, "step": 5415 }, { "epoch": 0.924389827615634, "grad_norm": 0.4494488633703432, "learning_rate": 3.031233998975935e-06, "loss": 0.5817, "num_tokens": 510045590.0, "step": 5416 }, { "epoch": 0.9245605052056665, "grad_norm": 0.4550341275664514, "learning_rate": 3.0244068953746375e-06, "loss": 0.5502, "num_tokens": 510143411.0, "step": 5417 }, { "epoch": 0.9247311827956989, "grad_norm": 0.4350397067524437, "learning_rate": 3.01757979177334e-06, "loss": 0.5585, "num_tokens": 510252678.0, "step": 5418 }, { "epoch": 0.9249018603857313, "grad_norm": 0.42392258307952024, "learning_rate": 3.0107526881720433e-06, "loss": 0.5555, "num_tokens": 510357202.0, "step": 5419 }, { "epoch": 0.9250725379757638, "grad_norm": 0.4487421745033821, "learning_rate": 3.003925584570746e-06, "loss": 0.5639, "num_tokens": 510456841.0, "step": 5420 }, { "epoch": 0.9252432155657963, "grad_norm": 0.47182627887793044, "learning_rate": 2.997098480969449e-06, "loss": 0.6146, "num_tokens": 510564207.0, "step": 5421 }, { "epoch": 0.9254138931558287, "grad_norm": 0.46414321262438124, "learning_rate": 2.9902713773681517e-06, "loss": 0.5029, "num_tokens": 510648743.0, "step": 5422 }, { "epoch": 0.9255845707458611, "grad_norm": 0.516516242165001, "learning_rate": 2.9834442737668546e-06, "loss": 0.5336, "num_tokens": 510722560.0, "step": 5423 }, { "epoch": 0.9257552483358935, "grad_norm": 0.48174668508465424, "learning_rate": 2.976617170165558e-06, "loss": 0.5804, "num_tokens": 510809998.0, "step": 5424 }, { "epoch": 0.9259259259259259, "grad_norm": 0.41912109464123964, "learning_rate": 2.9697900665642604e-06, "loss": 0.4858, "num_tokens": 510912204.0, "step": 5425 }, { "epoch": 0.9260966035159583, "grad_norm": 0.503262449455988, "learning_rate": 2.962962962962963e-06, "loss": 0.5833, "num_tokens": 510999449.0, "step": 5426 }, { "epoch": 0.9262672811059908, "grad_norm": 0.4779268206076635, "learning_rate": 2.9561358593616662e-06, "loss": 0.5649, "num_tokens": 511086154.0, "step": 5427 }, { "epoch": 0.9264379586960232, "grad_norm": 0.49099405726595846, "learning_rate": 2.9493087557603687e-06, "loss": 0.6107, "num_tokens": 511174827.0, "step": 5428 }, { "epoch": 0.9266086362860556, "grad_norm": 0.4777903644923014, "learning_rate": 2.9424816521590717e-06, "loss": 0.5219, "num_tokens": 511252690.0, "step": 5429 }, { "epoch": 0.926779313876088, "grad_norm": 0.44628146890233444, "learning_rate": 2.9356545485577746e-06, "loss": 0.533, "num_tokens": 511348246.0, "step": 5430 }, { "epoch": 0.9269499914661204, "grad_norm": 0.49653448104671577, "learning_rate": 2.9288274449564775e-06, "loss": 0.5427, "num_tokens": 511427311.0, "step": 5431 }, { "epoch": 0.927120669056153, "grad_norm": 0.48838384317970834, "learning_rate": 2.92200034135518e-06, "loss": 0.5492, "num_tokens": 511515344.0, "step": 5432 }, { "epoch": 0.9272913466461854, "grad_norm": 0.5194174310295145, "learning_rate": 2.9151732377538833e-06, "loss": 0.4875, "num_tokens": 511580785.0, "step": 5433 }, { "epoch": 0.9274620242362178, "grad_norm": 0.5165505175198224, "learning_rate": 2.908346134152586e-06, "loss": 0.6093, "num_tokens": 511667288.0, "step": 5434 }, { "epoch": 0.9276327018262502, "grad_norm": 0.4270965063043252, "learning_rate": 2.9015190305512887e-06, "loss": 0.6399, "num_tokens": 511796469.0, "step": 5435 }, { "epoch": 0.9278033794162827, "grad_norm": 0.47224477620629407, "learning_rate": 2.8946919269499916e-06, "loss": 0.5264, "num_tokens": 511881684.0, "step": 5436 }, { "epoch": 0.9279740570063151, "grad_norm": 0.4148512081232373, "learning_rate": 2.8878648233486946e-06, "loss": 0.6146, "num_tokens": 512002294.0, "step": 5437 }, { "epoch": 0.9281447345963475, "grad_norm": 0.5120083507604413, "learning_rate": 2.8810377197473975e-06, "loss": 0.6223, "num_tokens": 512084807.0, "step": 5438 }, { "epoch": 0.9283154121863799, "grad_norm": 0.49117912841888095, "learning_rate": 2.8742106161461004e-06, "loss": 0.5265, "num_tokens": 512167445.0, "step": 5439 }, { "epoch": 0.9284860897764123, "grad_norm": 0.46433127088843723, "learning_rate": 2.867383512544803e-06, "loss": 0.5753, "num_tokens": 512260351.0, "step": 5440 }, { "epoch": 0.9286567673664448, "grad_norm": 0.4961786020063404, "learning_rate": 2.8605564089435062e-06, "loss": 0.5586, "num_tokens": 512343980.0, "step": 5441 }, { "epoch": 0.9288274449564772, "grad_norm": 0.46220960862882904, "learning_rate": 2.8537293053422087e-06, "loss": 0.5009, "num_tokens": 512434076.0, "step": 5442 }, { "epoch": 0.9289981225465096, "grad_norm": 0.4301772561569583, "learning_rate": 2.8469022017409116e-06, "loss": 0.5908, "num_tokens": 512553711.0, "step": 5443 }, { "epoch": 0.929168800136542, "grad_norm": 0.5881770692803521, "learning_rate": 2.8400750981396145e-06, "loss": 0.4968, "num_tokens": 512606044.0, "step": 5444 }, { "epoch": 0.9293394777265745, "grad_norm": 0.48879189998064143, "learning_rate": 2.8332479945383175e-06, "loss": 0.5806, "num_tokens": 512692371.0, "step": 5445 }, { "epoch": 0.929510155316607, "grad_norm": 0.47435363308539635, "learning_rate": 2.82642089093702e-06, "loss": 0.5603, "num_tokens": 512780901.0, "step": 5446 }, { "epoch": 0.9296808329066394, "grad_norm": 0.5090768102613537, "learning_rate": 2.8195937873357233e-06, "loss": 0.573, "num_tokens": 512862091.0, "step": 5447 }, { "epoch": 0.9298515104966718, "grad_norm": 0.42757385207961474, "learning_rate": 2.8127666837344258e-06, "loss": 0.4727, "num_tokens": 512964575.0, "step": 5448 }, { "epoch": 0.9300221880867042, "grad_norm": 0.4612374710946291, "learning_rate": 2.8059395801331287e-06, "loss": 0.4902, "num_tokens": 513046447.0, "step": 5449 }, { "epoch": 0.9301928656767366, "grad_norm": 0.4941565602074959, "learning_rate": 2.7991124765318316e-06, "loss": 0.5738, "num_tokens": 513129537.0, "step": 5450 }, { "epoch": 0.9303635432667691, "grad_norm": 0.43215244026115285, "learning_rate": 2.7922853729305345e-06, "loss": 0.5726, "num_tokens": 513244599.0, "step": 5451 }, { "epoch": 0.9305342208568015, "grad_norm": 0.4877135402998334, "learning_rate": 2.7854582693292374e-06, "loss": 0.4889, "num_tokens": 513319661.0, "step": 5452 }, { "epoch": 0.9307048984468339, "grad_norm": 0.4527870201419127, "learning_rate": 2.7786311657279404e-06, "loss": 0.5367, "num_tokens": 513423184.0, "step": 5453 }, { "epoch": 0.9308755760368663, "grad_norm": 0.48083522486946056, "learning_rate": 2.771804062126643e-06, "loss": 0.6676, "num_tokens": 513524761.0, "step": 5454 }, { "epoch": 0.9310462536268987, "grad_norm": 0.4879883834399059, "learning_rate": 2.764976958525346e-06, "loss": 0.5653, "num_tokens": 513622790.0, "step": 5455 }, { "epoch": 0.9312169312169312, "grad_norm": 0.47873073710321423, "learning_rate": 2.7581498549240487e-06, "loss": 0.5557, "num_tokens": 513708444.0, "step": 5456 }, { "epoch": 0.9313876088069637, "grad_norm": 0.484117496003213, "learning_rate": 2.7513227513227516e-06, "loss": 0.6, "num_tokens": 513802635.0, "step": 5457 }, { "epoch": 0.9315582863969961, "grad_norm": 0.4524305303345735, "learning_rate": 2.7444956477214545e-06, "loss": 0.5549, "num_tokens": 513903567.0, "step": 5458 }, { "epoch": 0.9317289639870285, "grad_norm": 0.46421008407117, "learning_rate": 2.7376685441201574e-06, "loss": 0.4863, "num_tokens": 513985335.0, "step": 5459 }, { "epoch": 0.931899641577061, "grad_norm": 0.4960144391496249, "learning_rate": 2.73084144051886e-06, "loss": 0.5611, "num_tokens": 514081536.0, "step": 5460 }, { "epoch": 0.9320703191670934, "grad_norm": 0.45350122309643115, "learning_rate": 2.7240143369175633e-06, "loss": 0.5808, "num_tokens": 514179894.0, "step": 5461 }, { "epoch": 0.9322409967571258, "grad_norm": 0.4659693488161964, "learning_rate": 2.7171872333162658e-06, "loss": 0.5001, "num_tokens": 514262005.0, "step": 5462 }, { "epoch": 0.9324116743471582, "grad_norm": 0.4857942488143114, "learning_rate": 2.7103601297149683e-06, "loss": 0.5724, "num_tokens": 514344273.0, "step": 5463 }, { "epoch": 0.9325823519371906, "grad_norm": 0.44775546769024827, "learning_rate": 2.7035330261136716e-06, "loss": 0.5959, "num_tokens": 514446663.0, "step": 5464 }, { "epoch": 0.932753029527223, "grad_norm": 0.4422306455812576, "learning_rate": 2.696705922512374e-06, "loss": 0.5305, "num_tokens": 514543699.0, "step": 5465 }, { "epoch": 0.9329237071172555, "grad_norm": 0.49336792238559185, "learning_rate": 2.689878818911077e-06, "loss": 0.563, "num_tokens": 514631790.0, "step": 5466 }, { "epoch": 0.9330943847072879, "grad_norm": 0.4453933129973194, "learning_rate": 2.6830517153097803e-06, "loss": 0.6291, "num_tokens": 514742120.0, "step": 5467 }, { "epoch": 0.9332650622973203, "grad_norm": 0.4694408857525028, "learning_rate": 2.676224611708483e-06, "loss": 0.5089, "num_tokens": 514827519.0, "step": 5468 }, { "epoch": 0.9334357398873528, "grad_norm": 0.4207233672556042, "learning_rate": 2.669397508107186e-06, "loss": 0.5307, "num_tokens": 514945515.0, "step": 5469 }, { "epoch": 0.9336064174773853, "grad_norm": 0.4544195889910741, "learning_rate": 2.6625704045058887e-06, "loss": 0.4612, "num_tokens": 515025309.0, "step": 5470 }, { "epoch": 0.9337770950674177, "grad_norm": 0.4816280735807452, "learning_rate": 2.655743300904591e-06, "loss": 0.5363, "num_tokens": 515109291.0, "step": 5471 }, { "epoch": 0.9339477726574501, "grad_norm": 0.4536244500638378, "learning_rate": 2.6489161973032945e-06, "loss": 0.5253, "num_tokens": 515200678.0, "step": 5472 }, { "epoch": 0.9341184502474825, "grad_norm": 0.45245453556976917, "learning_rate": 2.642089093701997e-06, "loss": 0.5615, "num_tokens": 515307961.0, "step": 5473 }, { "epoch": 0.9342891278375149, "grad_norm": 0.42765704307036945, "learning_rate": 2.6352619901007e-06, "loss": 0.5418, "num_tokens": 515409463.0, "step": 5474 }, { "epoch": 0.9344598054275474, "grad_norm": 0.450625168074865, "learning_rate": 2.6284348864994032e-06, "loss": 0.5456, "num_tokens": 515500421.0, "step": 5475 }, { "epoch": 0.9346304830175798, "grad_norm": 0.4430650353118932, "learning_rate": 2.6216077828981057e-06, "loss": 0.5876, "num_tokens": 515607173.0, "step": 5476 }, { "epoch": 0.9348011606076122, "grad_norm": 0.420508080427054, "learning_rate": 2.6147806792968082e-06, "loss": 0.5916, "num_tokens": 515727203.0, "step": 5477 }, { "epoch": 0.9349718381976446, "grad_norm": 0.49688577057702865, "learning_rate": 2.6079535756955116e-06, "loss": 0.6135, "num_tokens": 515812621.0, "step": 5478 }, { "epoch": 0.935142515787677, "grad_norm": 0.8139477643923653, "learning_rate": 2.601126472094214e-06, "loss": 0.6045, "num_tokens": 515897785.0, "step": 5479 }, { "epoch": 0.9353131933777095, "grad_norm": 0.47014765101069933, "learning_rate": 2.594299368492917e-06, "loss": 0.6722, "num_tokens": 516009037.0, "step": 5480 }, { "epoch": 0.9354838709677419, "grad_norm": 0.4165830135842234, "learning_rate": 2.58747226489162e-06, "loss": 0.565, "num_tokens": 516127182.0, "step": 5481 }, { "epoch": 0.9356545485577744, "grad_norm": 0.428945965934401, "learning_rate": 2.580645161290323e-06, "loss": 0.4866, "num_tokens": 516222854.0, "step": 5482 }, { "epoch": 0.9358252261478068, "grad_norm": 0.4957030934664764, "learning_rate": 2.5738180576890253e-06, "loss": 0.6484, "num_tokens": 516322068.0, "step": 5483 }, { "epoch": 0.9359959037378393, "grad_norm": 0.501171466618241, "learning_rate": 2.5669909540877286e-06, "loss": 0.5186, "num_tokens": 516398968.0, "step": 5484 }, { "epoch": 0.9361665813278717, "grad_norm": 0.49839906533849676, "learning_rate": 2.560163850486431e-06, "loss": 0.5454, "num_tokens": 516474998.0, "step": 5485 }, { "epoch": 0.9363372589179041, "grad_norm": 0.47009340287576906, "learning_rate": 2.5533367468851345e-06, "loss": 0.607, "num_tokens": 516567257.0, "step": 5486 }, { "epoch": 0.9365079365079365, "grad_norm": 0.4625353700066961, "learning_rate": 2.546509643283837e-06, "loss": 0.6035, "num_tokens": 516665359.0, "step": 5487 }, { "epoch": 0.9366786140979689, "grad_norm": 0.5255053347477155, "learning_rate": 2.53968253968254e-06, "loss": 0.5328, "num_tokens": 516739692.0, "step": 5488 }, { "epoch": 0.9368492916880014, "grad_norm": 0.5305372377777674, "learning_rate": 2.532855436081243e-06, "loss": 0.5917, "num_tokens": 516815373.0, "step": 5489 }, { "epoch": 0.9370199692780338, "grad_norm": 0.48897474173761374, "learning_rate": 2.5260283324799457e-06, "loss": 0.5402, "num_tokens": 516900342.0, "step": 5490 }, { "epoch": 0.9371906468680662, "grad_norm": 0.5944463411211689, "learning_rate": 2.519201228878648e-06, "loss": 0.6885, "num_tokens": 516988696.0, "step": 5491 }, { "epoch": 0.9373613244580986, "grad_norm": 0.4639316545111113, "learning_rate": 2.5123741252773515e-06, "loss": 0.5946, "num_tokens": 517090051.0, "step": 5492 }, { "epoch": 0.937532002048131, "grad_norm": 0.4373133140786267, "learning_rate": 2.505547021676054e-06, "loss": 0.6699, "num_tokens": 517227887.0, "step": 5493 }, { "epoch": 0.9377026796381636, "grad_norm": 0.41634757077402823, "learning_rate": 2.498719918074757e-06, "loss": 0.5652, "num_tokens": 517347190.0, "step": 5494 }, { "epoch": 0.937873357228196, "grad_norm": 0.42750661427894593, "learning_rate": 2.49189281447346e-06, "loss": 0.5785, "num_tokens": 517467357.0, "step": 5495 }, { "epoch": 0.9380440348182284, "grad_norm": 0.5010172548743511, "learning_rate": 2.4850657108721628e-06, "loss": 0.5726, "num_tokens": 517548274.0, "step": 5496 }, { "epoch": 0.9382147124082608, "grad_norm": 0.46495038823435, "learning_rate": 2.4782386072708657e-06, "loss": 0.4931, "num_tokens": 517633394.0, "step": 5497 }, { "epoch": 0.9383853899982932, "grad_norm": 0.4510771739803765, "learning_rate": 2.4714115036695686e-06, "loss": 0.4872, "num_tokens": 517725156.0, "step": 5498 }, { "epoch": 0.9385560675883257, "grad_norm": 0.4728633715076294, "learning_rate": 2.464584400068271e-06, "loss": 0.6234, "num_tokens": 517825059.0, "step": 5499 }, { "epoch": 0.9387267451783581, "grad_norm": 0.4218492295186224, "learning_rate": 2.457757296466974e-06, "loss": 0.5136, "num_tokens": 517927619.0, "step": 5500 }, { "epoch": 0.9388974227683905, "grad_norm": 0.46377855929179773, "learning_rate": 2.450930192865677e-06, "loss": 0.6156, "num_tokens": 518037015.0, "step": 5501 }, { "epoch": 0.9390681003584229, "grad_norm": 0.48326162895368574, "learning_rate": 2.44410308926438e-06, "loss": 0.5576, "num_tokens": 518129120.0, "step": 5502 }, { "epoch": 0.9392387779484553, "grad_norm": 0.43400996049318297, "learning_rate": 2.4372759856630828e-06, "loss": 0.575, "num_tokens": 518229119.0, "step": 5503 }, { "epoch": 0.9394094555384878, "grad_norm": 0.46871091461209224, "learning_rate": 2.4304488820617857e-06, "loss": 0.6197, "num_tokens": 518345966.0, "step": 5504 }, { "epoch": 0.9395801331285202, "grad_norm": 0.501733458584167, "learning_rate": 2.4236217784604886e-06, "loss": 0.5791, "num_tokens": 518433646.0, "step": 5505 }, { "epoch": 0.9397508107185527, "grad_norm": 0.5568345940324755, "learning_rate": 2.416794674859191e-06, "loss": 0.55, "num_tokens": 518497066.0, "step": 5506 }, { "epoch": 0.9399214883085851, "grad_norm": 0.405751066492576, "learning_rate": 2.409967571257894e-06, "loss": 0.5551, "num_tokens": 518627917.0, "step": 5507 }, { "epoch": 0.9400921658986175, "grad_norm": 0.47192871976486583, "learning_rate": 2.403140467656597e-06, "loss": 0.5578, "num_tokens": 518715239.0, "step": 5508 }, { "epoch": 0.94026284348865, "grad_norm": 0.4569995647564533, "learning_rate": 2.3963133640553e-06, "loss": 0.6576, "num_tokens": 518820145.0, "step": 5509 }, { "epoch": 0.9404335210786824, "grad_norm": 0.4621951057723491, "learning_rate": 2.3894862604540028e-06, "loss": 0.5909, "num_tokens": 518920725.0, "step": 5510 }, { "epoch": 0.9406041986687148, "grad_norm": 0.5028932636584134, "learning_rate": 2.3826591568527057e-06, "loss": 0.5397, "num_tokens": 518994760.0, "step": 5511 }, { "epoch": 0.9407748762587472, "grad_norm": 0.44298377777394027, "learning_rate": 2.3758320532514086e-06, "loss": 0.5407, "num_tokens": 519094351.0, "step": 5512 }, { "epoch": 0.9409455538487796, "grad_norm": 0.48128618823381547, "learning_rate": 2.369004949650111e-06, "loss": 0.6084, "num_tokens": 519182564.0, "step": 5513 }, { "epoch": 0.9411162314388121, "grad_norm": 0.42169913286935884, "learning_rate": 2.362177846048814e-06, "loss": 0.5332, "num_tokens": 519282494.0, "step": 5514 }, { "epoch": 0.9412869090288445, "grad_norm": 0.4284489780730588, "learning_rate": 2.355350742447517e-06, "loss": 0.4606, "num_tokens": 519376380.0, "step": 5515 }, { "epoch": 0.9414575866188769, "grad_norm": 0.5117439330195109, "learning_rate": 2.3485236388462194e-06, "loss": 0.6465, "num_tokens": 519463067.0, "step": 5516 }, { "epoch": 0.9416282642089093, "grad_norm": 0.4722589415150123, "learning_rate": 2.3416965352449223e-06, "loss": 0.5541, "num_tokens": 519549808.0, "step": 5517 }, { "epoch": 0.9417989417989417, "grad_norm": 0.4996821841344559, "learning_rate": 2.3348694316436257e-06, "loss": 0.5878, "num_tokens": 519630463.0, "step": 5518 }, { "epoch": 0.9419696193889743, "grad_norm": 0.47087646994119897, "learning_rate": 2.328042328042328e-06, "loss": 0.5664, "num_tokens": 519715254.0, "step": 5519 }, { "epoch": 0.9421402969790067, "grad_norm": 0.4431299746142668, "learning_rate": 2.321215224441031e-06, "loss": 0.5734, "num_tokens": 519812761.0, "step": 5520 }, { "epoch": 0.9423109745690391, "grad_norm": 0.4520354448215237, "learning_rate": 2.314388120839734e-06, "loss": 0.5208, "num_tokens": 519904175.0, "step": 5521 }, { "epoch": 0.9424816521590715, "grad_norm": 0.4332533363041424, "learning_rate": 2.307561017238437e-06, "loss": 0.5981, "num_tokens": 520014990.0, "step": 5522 }, { "epoch": 0.942652329749104, "grad_norm": 0.40101880143200513, "learning_rate": 2.3007339136371394e-06, "loss": 0.5366, "num_tokens": 520147213.0, "step": 5523 }, { "epoch": 0.9428230073391364, "grad_norm": 0.4742377978890138, "learning_rate": 2.2939068100358423e-06, "loss": 0.5774, "num_tokens": 520235150.0, "step": 5524 }, { "epoch": 0.9429936849291688, "grad_norm": 0.41362470867889783, "learning_rate": 2.2870797064345452e-06, "loss": 0.511, "num_tokens": 520340148.0, "step": 5525 }, { "epoch": 0.9431643625192012, "grad_norm": 0.4583274958995653, "learning_rate": 2.280252602833248e-06, "loss": 0.5545, "num_tokens": 520442538.0, "step": 5526 }, { "epoch": 0.9433350401092336, "grad_norm": 0.43702246580489773, "learning_rate": 2.273425499231951e-06, "loss": 0.5123, "num_tokens": 520537092.0, "step": 5527 }, { "epoch": 0.943505717699266, "grad_norm": 0.5082019336358123, "learning_rate": 2.266598395630654e-06, "loss": 0.6131, "num_tokens": 520630112.0, "step": 5528 }, { "epoch": 0.9436763952892985, "grad_norm": 0.4446393936744557, "learning_rate": 2.259771292029357e-06, "loss": 0.6106, "num_tokens": 520739107.0, "step": 5529 }, { "epoch": 0.9438470728793309, "grad_norm": 0.46418902421865454, "learning_rate": 2.2529441884280594e-06, "loss": 0.6185, "num_tokens": 520841670.0, "step": 5530 }, { "epoch": 0.9440177504693634, "grad_norm": 0.49997033013949327, "learning_rate": 2.2461170848267623e-06, "loss": 0.6563, "num_tokens": 520934176.0, "step": 5531 }, { "epoch": 0.9441884280593958, "grad_norm": 0.45024575891637125, "learning_rate": 2.239289981225465e-06, "loss": 0.5746, "num_tokens": 521028519.0, "step": 5532 }, { "epoch": 0.9443591056494283, "grad_norm": 0.5512217015226967, "learning_rate": 2.232462877624168e-06, "loss": 0.5463, "num_tokens": 521105749.0, "step": 5533 }, { "epoch": 0.9445297832394607, "grad_norm": 0.42512112299975735, "learning_rate": 2.225635774022871e-06, "loss": 0.5521, "num_tokens": 521218240.0, "step": 5534 }, { "epoch": 0.9447004608294931, "grad_norm": 0.49709913094698155, "learning_rate": 2.218808670421574e-06, "loss": 0.5708, "num_tokens": 521301379.0, "step": 5535 }, { "epoch": 0.9448711384195255, "grad_norm": 0.46246395702015397, "learning_rate": 2.211981566820277e-06, "loss": 0.5673, "num_tokens": 521397450.0, "step": 5536 }, { "epoch": 0.945041816009558, "grad_norm": 0.46501344254222005, "learning_rate": 2.2051544632189794e-06, "loss": 0.6401, "num_tokens": 521496432.0, "step": 5537 }, { "epoch": 0.9452124935995904, "grad_norm": 0.495430024180871, "learning_rate": 2.1983273596176823e-06, "loss": 0.6013, "num_tokens": 521583965.0, "step": 5538 }, { "epoch": 0.9453831711896228, "grad_norm": 0.5039497736519625, "learning_rate": 2.191500256016385e-06, "loss": 0.526, "num_tokens": 521657152.0, "step": 5539 }, { "epoch": 0.9455538487796552, "grad_norm": 0.48819392177109505, "learning_rate": 2.184673152415088e-06, "loss": 0.5072, "num_tokens": 521740336.0, "step": 5540 }, { "epoch": 0.9457245263696876, "grad_norm": 0.47725965824188826, "learning_rate": 2.177846048813791e-06, "loss": 0.5258, "num_tokens": 521825166.0, "step": 5541 }, { "epoch": 0.94589520395972, "grad_norm": 0.440913540017944, "learning_rate": 2.171018945212494e-06, "loss": 0.58, "num_tokens": 521930629.0, "step": 5542 }, { "epoch": 0.9460658815497525, "grad_norm": 0.43112496451010596, "learning_rate": 2.1641918416111964e-06, "loss": 0.5928, "num_tokens": 522041496.0, "step": 5543 }, { "epoch": 0.946236559139785, "grad_norm": 0.47562846547546406, "learning_rate": 2.1573647380098994e-06, "loss": 0.5329, "num_tokens": 522126125.0, "step": 5544 }, { "epoch": 0.9464072367298174, "grad_norm": 0.512851084280365, "learning_rate": 2.1505376344086023e-06, "loss": 0.5827, "num_tokens": 522203149.0, "step": 5545 }, { "epoch": 0.9465779143198498, "grad_norm": 0.46177327020534625, "learning_rate": 2.143710530807305e-06, "loss": 0.567, "num_tokens": 522291092.0, "step": 5546 }, { "epoch": 0.9467485919098823, "grad_norm": 0.4793975122689328, "learning_rate": 2.136883427206008e-06, "loss": 0.4875, "num_tokens": 522365837.0, "step": 5547 }, { "epoch": 0.9469192694999147, "grad_norm": 0.44614643563467116, "learning_rate": 2.130056323604711e-06, "loss": 0.5119, "num_tokens": 522452306.0, "step": 5548 }, { "epoch": 0.9470899470899471, "grad_norm": 0.43252821019295157, "learning_rate": 2.123229220003414e-06, "loss": 0.6229, "num_tokens": 522572737.0, "step": 5549 }, { "epoch": 0.9472606246799795, "grad_norm": 0.5296908526619216, "learning_rate": 2.1164021164021164e-06, "loss": 0.5394, "num_tokens": 522642537.0, "step": 5550 }, { "epoch": 0.9474313022700119, "grad_norm": 0.4653246606441094, "learning_rate": 2.1095750128008193e-06, "loss": 0.6143, "num_tokens": 522759404.0, "step": 5551 }, { "epoch": 0.9476019798600444, "grad_norm": 0.49726987176830967, "learning_rate": 2.1027479091995223e-06, "loss": 0.6634, "num_tokens": 522852425.0, "step": 5552 }, { "epoch": 0.9477726574500768, "grad_norm": 0.5009676008761619, "learning_rate": 2.095920805598225e-06, "loss": 0.5427, "num_tokens": 522937335.0, "step": 5553 }, { "epoch": 0.9479433350401092, "grad_norm": 0.4693971345776611, "learning_rate": 2.089093701996928e-06, "loss": 0.5637, "num_tokens": 523026473.0, "step": 5554 }, { "epoch": 0.9481140126301416, "grad_norm": 0.49389874934244277, "learning_rate": 2.082266598395631e-06, "loss": 0.5033, "num_tokens": 523100789.0, "step": 5555 }, { "epoch": 0.9482846902201741, "grad_norm": 0.5060054378085181, "learning_rate": 2.075439494794334e-06, "loss": 0.5729, "num_tokens": 523184016.0, "step": 5556 }, { "epoch": 0.9484553678102066, "grad_norm": 0.4791512480208918, "learning_rate": 2.0686123911930364e-06, "loss": 0.5546, "num_tokens": 523275297.0, "step": 5557 }, { "epoch": 0.948626045400239, "grad_norm": 0.5033954265558841, "learning_rate": 2.0617852875917393e-06, "loss": 0.621, "num_tokens": 523354642.0, "step": 5558 }, { "epoch": 0.9487967229902714, "grad_norm": 0.46655081557621086, "learning_rate": 2.0549581839904422e-06, "loss": 0.5826, "num_tokens": 523453747.0, "step": 5559 }, { "epoch": 0.9489674005803038, "grad_norm": 0.4385954694693952, "learning_rate": 2.048131080389145e-06, "loss": 0.533, "num_tokens": 523548425.0, "step": 5560 }, { "epoch": 0.9491380781703362, "grad_norm": 0.48986864227597265, "learning_rate": 2.041303976787848e-06, "loss": 0.4933, "num_tokens": 523626578.0, "step": 5561 }, { "epoch": 0.9493087557603687, "grad_norm": 0.47659295770551036, "learning_rate": 2.034476873186551e-06, "loss": 0.5329, "num_tokens": 523714047.0, "step": 5562 }, { "epoch": 0.9494794333504011, "grad_norm": 0.47566133340272493, "learning_rate": 2.027649769585254e-06, "loss": 0.6278, "num_tokens": 523807852.0, "step": 5563 }, { "epoch": 0.9496501109404335, "grad_norm": 0.4443576342458348, "learning_rate": 2.0208226659839564e-06, "loss": 0.6057, "num_tokens": 523917794.0, "step": 5564 }, { "epoch": 0.9498207885304659, "grad_norm": 0.47584692106582427, "learning_rate": 2.0139955623826593e-06, "loss": 0.5552, "num_tokens": 524009716.0, "step": 5565 }, { "epoch": 0.9499914661204983, "grad_norm": 0.48995258197172425, "learning_rate": 2.0071684587813622e-06, "loss": 0.6287, "num_tokens": 524100870.0, "step": 5566 }, { "epoch": 0.9501621437105308, "grad_norm": 0.428005897737263, "learning_rate": 2.0003413551800647e-06, "loss": 0.5838, "num_tokens": 524216823.0, "step": 5567 }, { "epoch": 0.9503328213005633, "grad_norm": 0.49101891365942735, "learning_rate": 1.9935142515787676e-06, "loss": 0.5382, "num_tokens": 524292286.0, "step": 5568 }, { "epoch": 0.9505034988905957, "grad_norm": 0.45106099781214676, "learning_rate": 1.9866871479774706e-06, "loss": 0.6001, "num_tokens": 524405206.0, "step": 5569 }, { "epoch": 0.9506741764806281, "grad_norm": 0.4408496019973162, "learning_rate": 1.979860044376174e-06, "loss": 0.603, "num_tokens": 524517970.0, "step": 5570 }, { "epoch": 0.9508448540706606, "grad_norm": 0.5355440669928219, "learning_rate": 1.9730329407748764e-06, "loss": 0.6416, "num_tokens": 524591804.0, "step": 5571 }, { "epoch": 0.951015531660693, "grad_norm": 0.4865943152596659, "learning_rate": 1.9662058371735793e-06, "loss": 0.5468, "num_tokens": 524670952.0, "step": 5572 }, { "epoch": 0.9511862092507254, "grad_norm": 0.4435099098753487, "learning_rate": 1.9593787335722822e-06, "loss": 0.5563, "num_tokens": 524767317.0, "step": 5573 }, { "epoch": 0.9513568868407578, "grad_norm": 0.5432998010606401, "learning_rate": 1.9525516299709847e-06, "loss": 0.6554, "num_tokens": 524839249.0, "step": 5574 }, { "epoch": 0.9515275644307902, "grad_norm": 0.4716508405497249, "learning_rate": 1.9457245263696876e-06, "loss": 0.5918, "num_tokens": 524937565.0, "step": 5575 }, { "epoch": 0.9516982420208226, "grad_norm": 0.42541288928304094, "learning_rate": 1.9388974227683905e-06, "loss": 0.5053, "num_tokens": 525043861.0, "step": 5576 }, { "epoch": 0.9518689196108551, "grad_norm": 0.42408597910004026, "learning_rate": 1.9320703191670935e-06, "loss": 0.5714, "num_tokens": 525157776.0, "step": 5577 }, { "epoch": 0.9520395972008875, "grad_norm": 0.5168049422839908, "learning_rate": 1.9252432155657964e-06, "loss": 0.6048, "num_tokens": 525231587.0, "step": 5578 }, { "epoch": 0.9522102747909199, "grad_norm": 0.447444148774997, "learning_rate": 1.9184161119644993e-06, "loss": 0.5537, "num_tokens": 525322855.0, "step": 5579 }, { "epoch": 0.9523809523809523, "grad_norm": 0.43272308846181695, "learning_rate": 1.911589008363202e-06, "loss": 0.5186, "num_tokens": 525424176.0, "step": 5580 }, { "epoch": 0.9525516299709849, "grad_norm": 0.419078960502091, "learning_rate": 1.904761904761905e-06, "loss": 0.5686, "num_tokens": 525544654.0, "step": 5581 }, { "epoch": 0.9527223075610173, "grad_norm": 0.4766283450111657, "learning_rate": 1.8979348011606078e-06, "loss": 0.6453, "num_tokens": 525641410.0, "step": 5582 }, { "epoch": 0.9528929851510497, "grad_norm": 0.4534863386777123, "learning_rate": 1.8911076975593107e-06, "loss": 0.527, "num_tokens": 525737390.0, "step": 5583 }, { "epoch": 0.9530636627410821, "grad_norm": 0.5166528046982007, "learning_rate": 1.8842805939580137e-06, "loss": 0.5414, "num_tokens": 525811252.0, "step": 5584 }, { "epoch": 0.9532343403311145, "grad_norm": 0.48864370693164777, "learning_rate": 1.8774534903567164e-06, "loss": 0.4975, "num_tokens": 525884742.0, "step": 5585 }, { "epoch": 0.953405017921147, "grad_norm": 0.41397487645416814, "learning_rate": 1.8706263867554193e-06, "loss": 0.5391, "num_tokens": 526006370.0, "step": 5586 }, { "epoch": 0.9535756955111794, "grad_norm": 0.49524068929270776, "learning_rate": 1.8637992831541222e-06, "loss": 0.5773, "num_tokens": 526089919.0, "step": 5587 }, { "epoch": 0.9537463731012118, "grad_norm": 0.4665911749008316, "learning_rate": 1.8569721795528247e-06, "loss": 0.5314, "num_tokens": 526173389.0, "step": 5588 }, { "epoch": 0.9539170506912442, "grad_norm": 0.5174849130243897, "learning_rate": 1.8501450759515276e-06, "loss": 0.6467, "num_tokens": 526257430.0, "step": 5589 }, { "epoch": 0.9540877282812766, "grad_norm": 0.4294553752509148, "learning_rate": 1.8433179723502307e-06, "loss": 0.575, "num_tokens": 526366967.0, "step": 5590 }, { "epoch": 0.9542584058713091, "grad_norm": 0.4633548674133098, "learning_rate": 1.8364908687489332e-06, "loss": 0.5187, "num_tokens": 526452751.0, "step": 5591 }, { "epoch": 0.9544290834613415, "grad_norm": 0.45292851417477076, "learning_rate": 1.8296637651476361e-06, "loss": 0.5684, "num_tokens": 526552378.0, "step": 5592 }, { "epoch": 0.954599761051374, "grad_norm": 0.45599482368180416, "learning_rate": 1.822836661546339e-06, "loss": 0.5165, "num_tokens": 526641249.0, "step": 5593 }, { "epoch": 0.9547704386414064, "grad_norm": 0.42265867537011975, "learning_rate": 1.8160095579450422e-06, "loss": 0.5685, "num_tokens": 526758428.0, "step": 5594 }, { "epoch": 0.9549411162314388, "grad_norm": 0.48350722710141364, "learning_rate": 1.8091824543437447e-06, "loss": 0.5465, "num_tokens": 526844724.0, "step": 5595 }, { "epoch": 0.9551117938214713, "grad_norm": 0.4755454860873545, "learning_rate": 1.8023553507424476e-06, "loss": 0.5077, "num_tokens": 526924053.0, "step": 5596 }, { "epoch": 0.9552824714115037, "grad_norm": 0.4857378404402684, "learning_rate": 1.7955282471411505e-06, "loss": 0.554, "num_tokens": 527009473.0, "step": 5597 }, { "epoch": 0.9554531490015361, "grad_norm": 0.5268833361598596, "learning_rate": 1.7887011435398532e-06, "loss": 0.5772, "num_tokens": 527082276.0, "step": 5598 }, { "epoch": 0.9556238265915685, "grad_norm": 0.46143196554653304, "learning_rate": 1.7818740399385561e-06, "loss": 0.5695, "num_tokens": 527177970.0, "step": 5599 }, { "epoch": 0.955794504181601, "grad_norm": 0.4207952237765409, "learning_rate": 1.775046936337259e-06, "loss": 0.5508, "num_tokens": 527298055.0, "step": 5600 }, { "epoch": 0.9559651817716334, "grad_norm": 0.45847028570217063, "learning_rate": 1.768219832735962e-06, "loss": 0.5684, "num_tokens": 527402548.0, "step": 5601 }, { "epoch": 0.9561358593616658, "grad_norm": 0.3764487117060031, "learning_rate": 1.7613927291346647e-06, "loss": 0.5264, "num_tokens": 527546451.0, "step": 5602 }, { "epoch": 0.9563065369516982, "grad_norm": 0.49202400717648787, "learning_rate": 1.7545656255333676e-06, "loss": 0.5273, "num_tokens": 527623874.0, "step": 5603 }, { "epoch": 0.9564772145417306, "grad_norm": 0.49392708258510293, "learning_rate": 1.7477385219320705e-06, "loss": 0.4943, "num_tokens": 527706039.0, "step": 5604 }, { "epoch": 0.956647892131763, "grad_norm": 0.5234722613134056, "learning_rate": 1.7409114183307732e-06, "loss": 0.5409, "num_tokens": 527788059.0, "step": 5605 }, { "epoch": 0.9568185697217956, "grad_norm": 0.487795969948059, "learning_rate": 1.7340843147294761e-06, "loss": 0.5715, "num_tokens": 527878129.0, "step": 5606 }, { "epoch": 0.956989247311828, "grad_norm": 0.4605641698677367, "learning_rate": 1.727257211128179e-06, "loss": 0.5316, "num_tokens": 527967610.0, "step": 5607 }, { "epoch": 0.9571599249018604, "grad_norm": 0.47227584981124276, "learning_rate": 1.720430107526882e-06, "loss": 0.486, "num_tokens": 528044372.0, "step": 5608 }, { "epoch": 0.9573306024918928, "grad_norm": 0.47688294287048427, "learning_rate": 1.7136030039255846e-06, "loss": 0.5532, "num_tokens": 528135537.0, "step": 5609 }, { "epoch": 0.9575012800819253, "grad_norm": 0.4626319052288818, "learning_rate": 1.7067759003242876e-06, "loss": 0.6085, "num_tokens": 528240836.0, "step": 5610 }, { "epoch": 0.9576719576719577, "grad_norm": 0.4641923907514908, "learning_rate": 1.6999487967229905e-06, "loss": 0.58, "num_tokens": 528343096.0, "step": 5611 }, { "epoch": 0.9578426352619901, "grad_norm": 0.41276532314091224, "learning_rate": 1.6931216931216932e-06, "loss": 0.5009, "num_tokens": 528449318.0, "step": 5612 }, { "epoch": 0.9580133128520225, "grad_norm": 0.480380739714127, "learning_rate": 1.686294589520396e-06, "loss": 0.5409, "num_tokens": 528540231.0, "step": 5613 }, { "epoch": 0.9581839904420549, "grad_norm": 0.45189456729999017, "learning_rate": 1.679467485919099e-06, "loss": 0.5637, "num_tokens": 528642534.0, "step": 5614 }, { "epoch": 0.9583546680320874, "grad_norm": 0.5406397137116534, "learning_rate": 1.6726403823178017e-06, "loss": 0.6301, "num_tokens": 528747464.0, "step": 5615 }, { "epoch": 0.9585253456221198, "grad_norm": 0.4311526320655303, "learning_rate": 1.6658132787165046e-06, "loss": 0.5668, "num_tokens": 528861750.0, "step": 5616 }, { "epoch": 0.9586960232121522, "grad_norm": 0.40478539061303903, "learning_rate": 1.6589861751152075e-06, "loss": 0.5725, "num_tokens": 528985271.0, "step": 5617 }, { "epoch": 0.9588667008021847, "grad_norm": 0.45637152506792417, "learning_rate": 1.6521590715139105e-06, "loss": 0.5116, "num_tokens": 529068446.0, "step": 5618 }, { "epoch": 0.9590373783922171, "grad_norm": 0.4575763599330476, "learning_rate": 1.6453319679126132e-06, "loss": 0.5684, "num_tokens": 529162785.0, "step": 5619 }, { "epoch": 0.9592080559822496, "grad_norm": 0.40702879948585513, "learning_rate": 1.638504864311316e-06, "loss": 0.5178, "num_tokens": 529270887.0, "step": 5620 }, { "epoch": 0.959378733572282, "grad_norm": 0.4814516546075914, "learning_rate": 1.631677760710019e-06, "loss": 0.5493, "num_tokens": 529352755.0, "step": 5621 }, { "epoch": 0.9595494111623144, "grad_norm": 0.44247451915301417, "learning_rate": 1.6248506571087217e-06, "loss": 0.4932, "num_tokens": 529443903.0, "step": 5622 }, { "epoch": 0.9597200887523468, "grad_norm": 0.44847382414494447, "learning_rate": 1.6180235535074246e-06, "loss": 0.5516, "num_tokens": 529550383.0, "step": 5623 }, { "epoch": 0.9598907663423792, "grad_norm": 0.46673304931931586, "learning_rate": 1.6111964499061275e-06, "loss": 0.5716, "num_tokens": 529644643.0, "step": 5624 }, { "epoch": 0.9600614439324117, "grad_norm": 0.4409170235414359, "learning_rate": 1.6043693463048305e-06, "loss": 0.5976, "num_tokens": 529760840.0, "step": 5625 }, { "epoch": 0.9602321215224441, "grad_norm": 0.48975231362875005, "learning_rate": 1.5975422427035332e-06, "loss": 0.6236, "num_tokens": 529853727.0, "step": 5626 }, { "epoch": 0.9604027991124765, "grad_norm": 0.48060391454225504, "learning_rate": 1.590715139102236e-06, "loss": 0.6625, "num_tokens": 529956197.0, "step": 5627 }, { "epoch": 0.9605734767025089, "grad_norm": 0.44316777356188236, "learning_rate": 1.583888035500939e-06, "loss": 0.4665, "num_tokens": 530039666.0, "step": 5628 }, { "epoch": 0.9607441542925413, "grad_norm": 0.4953196781103376, "learning_rate": 1.5770609318996417e-06, "loss": 0.6171, "num_tokens": 530139793.0, "step": 5629 }, { "epoch": 0.9609148318825739, "grad_norm": 0.4492436369783889, "learning_rate": 1.5702338282983446e-06, "loss": 0.5887, "num_tokens": 530248268.0, "step": 5630 }, { "epoch": 0.9610855094726063, "grad_norm": 0.5272745840121872, "learning_rate": 1.5634067246970475e-06, "loss": 0.5742, "num_tokens": 530319745.0, "step": 5631 }, { "epoch": 0.9612561870626387, "grad_norm": 0.5151423123648355, "learning_rate": 1.5565796210957504e-06, "loss": 0.5843, "num_tokens": 530419259.0, "step": 5632 }, { "epoch": 0.9614268646526711, "grad_norm": 0.44743610719104593, "learning_rate": 1.5497525174944531e-06, "loss": 0.5963, "num_tokens": 530520088.0, "step": 5633 }, { "epoch": 0.9615975422427036, "grad_norm": 0.45047033937413733, "learning_rate": 1.542925413893156e-06, "loss": 0.5228, "num_tokens": 530605193.0, "step": 5634 }, { "epoch": 0.961768219832736, "grad_norm": 0.49408633868481094, "learning_rate": 1.536098310291859e-06, "loss": 0.4832, "num_tokens": 530677921.0, "step": 5635 }, { "epoch": 0.9619388974227684, "grad_norm": 0.46410768797768687, "learning_rate": 1.5292712066905615e-06, "loss": 0.5637, "num_tokens": 530773277.0, "step": 5636 }, { "epoch": 0.9621095750128008, "grad_norm": 0.4694094324968781, "learning_rate": 1.5224441030892646e-06, "loss": 0.5081, "num_tokens": 530857077.0, "step": 5637 }, { "epoch": 0.9622802526028332, "grad_norm": 0.5047157936353266, "learning_rate": 1.5156169994879675e-06, "loss": 0.5975, "num_tokens": 530946274.0, "step": 5638 }, { "epoch": 0.9624509301928657, "grad_norm": 0.4558146191218209, "learning_rate": 1.50878989588667e-06, "loss": 0.5014, "num_tokens": 531037874.0, "step": 5639 }, { "epoch": 0.9626216077828981, "grad_norm": 0.484504162464639, "learning_rate": 1.501962792285373e-06, "loss": 0.5178, "num_tokens": 531120166.0, "step": 5640 }, { "epoch": 0.9627922853729305, "grad_norm": 0.4795722578264394, "learning_rate": 1.4951356886840758e-06, "loss": 0.5675, "num_tokens": 531204557.0, "step": 5641 }, { "epoch": 0.9629629629629629, "grad_norm": 0.44757949270421316, "learning_rate": 1.488308585082779e-06, "loss": 0.5308, "num_tokens": 531297678.0, "step": 5642 }, { "epoch": 0.9631336405529954, "grad_norm": 0.4325471946601958, "learning_rate": 1.4814814814814815e-06, "loss": 0.6152, "num_tokens": 531412443.0, "step": 5643 }, { "epoch": 0.9633043181430279, "grad_norm": 0.4587536955262118, "learning_rate": 1.4746543778801844e-06, "loss": 0.5543, "num_tokens": 531506978.0, "step": 5644 }, { "epoch": 0.9634749957330603, "grad_norm": 0.4288090357808341, "learning_rate": 1.4678272742788873e-06, "loss": 0.5023, "num_tokens": 531608487.0, "step": 5645 }, { "epoch": 0.9636456733230927, "grad_norm": 0.5117093404435008, "learning_rate": 1.46100017067759e-06, "loss": 0.5228, "num_tokens": 531681671.0, "step": 5646 }, { "epoch": 0.9638163509131251, "grad_norm": 0.4415944257614078, "learning_rate": 1.454173067076293e-06, "loss": 0.6317, "num_tokens": 531803331.0, "step": 5647 }, { "epoch": 0.9639870285031575, "grad_norm": 0.4383931962718212, "learning_rate": 1.4473459634749958e-06, "loss": 0.5633, "num_tokens": 531924738.0, "step": 5648 }, { "epoch": 0.96415770609319, "grad_norm": 0.4838150815175649, "learning_rate": 1.4405188598736987e-06, "loss": 0.5221, "num_tokens": 532001180.0, "step": 5649 }, { "epoch": 0.9643283836832224, "grad_norm": 0.5247918576144031, "learning_rate": 1.4336917562724014e-06, "loss": 0.5681, "num_tokens": 532073946.0, "step": 5650 }, { "epoch": 0.9644990612732548, "grad_norm": 0.4432853465153793, "learning_rate": 1.4268646526711044e-06, "loss": 0.5467, "num_tokens": 532174761.0, "step": 5651 }, { "epoch": 0.9646697388632872, "grad_norm": 0.5250680767042077, "learning_rate": 1.4200375490698073e-06, "loss": 0.5578, "num_tokens": 532246723.0, "step": 5652 }, { "epoch": 0.9648404164533196, "grad_norm": 0.5861463932629221, "learning_rate": 1.41321044546851e-06, "loss": 0.5671, "num_tokens": 532302299.0, "step": 5653 }, { "epoch": 0.9650110940433521, "grad_norm": 0.4758280856222727, "learning_rate": 1.4063833418672129e-06, "loss": 0.4624, "num_tokens": 532376235.0, "step": 5654 }, { "epoch": 0.9651817716333846, "grad_norm": 0.4842886593295531, "learning_rate": 1.3995562382659158e-06, "loss": 0.614, "num_tokens": 532465702.0, "step": 5655 }, { "epoch": 0.965352449223417, "grad_norm": 0.4684751481092388, "learning_rate": 1.3927291346646187e-06, "loss": 0.543, "num_tokens": 532558346.0, "step": 5656 }, { "epoch": 0.9655231268134494, "grad_norm": 0.4869680989245708, "learning_rate": 1.3859020310633214e-06, "loss": 0.5977, "num_tokens": 532647191.0, "step": 5657 }, { "epoch": 0.9656938044034818, "grad_norm": 0.5072136320857673, "learning_rate": 1.3790749274620243e-06, "loss": 0.6, "num_tokens": 532733373.0, "step": 5658 }, { "epoch": 0.9658644819935143, "grad_norm": 0.44554293330010625, "learning_rate": 1.3722478238607273e-06, "loss": 0.5795, "num_tokens": 532832245.0, "step": 5659 }, { "epoch": 0.9660351595835467, "grad_norm": 0.4877720149831054, "learning_rate": 1.36542072025943e-06, "loss": 0.5248, "num_tokens": 532909654.0, "step": 5660 }, { "epoch": 0.9662058371735791, "grad_norm": 0.4220016289353896, "learning_rate": 1.3585936166581329e-06, "loss": 0.5438, "num_tokens": 533018119.0, "step": 5661 }, { "epoch": 0.9663765147636115, "grad_norm": 0.5356025315410168, "learning_rate": 1.3517665130568358e-06, "loss": 0.6078, "num_tokens": 533091266.0, "step": 5662 }, { "epoch": 0.966547192353644, "grad_norm": 0.4580175222272572, "learning_rate": 1.3449394094555385e-06, "loss": 0.6379, "num_tokens": 533191675.0, "step": 5663 }, { "epoch": 0.9667178699436764, "grad_norm": 0.4703004071263575, "learning_rate": 1.3381123058542414e-06, "loss": 0.5911, "num_tokens": 533282644.0, "step": 5664 }, { "epoch": 0.9668885475337088, "grad_norm": 0.4838847659715829, "learning_rate": 1.3312852022529443e-06, "loss": 0.6625, "num_tokens": 533379790.0, "step": 5665 }, { "epoch": 0.9670592251237412, "grad_norm": 0.49566401054822246, "learning_rate": 1.3244580986516472e-06, "loss": 0.5244, "num_tokens": 533461708.0, "step": 5666 }, { "epoch": 0.9672299027137736, "grad_norm": 0.45975935577258437, "learning_rate": 1.31763099505035e-06, "loss": 0.4884, "num_tokens": 533541139.0, "step": 5667 }, { "epoch": 0.9674005803038062, "grad_norm": 0.5030921341750876, "learning_rate": 1.3108038914490529e-06, "loss": 0.6637, "num_tokens": 533641234.0, "step": 5668 }, { "epoch": 0.9675712578938386, "grad_norm": 0.4570482930920054, "learning_rate": 1.3039767878477558e-06, "loss": 0.4772, "num_tokens": 533731232.0, "step": 5669 }, { "epoch": 0.967741935483871, "grad_norm": 0.43142764566693564, "learning_rate": 1.2971496842464585e-06, "loss": 0.561, "num_tokens": 533829447.0, "step": 5670 }, { "epoch": 0.9679126130739034, "grad_norm": 0.5020728120515804, "learning_rate": 1.2903225806451614e-06, "loss": 0.5327, "num_tokens": 533904379.0, "step": 5671 }, { "epoch": 0.9680832906639358, "grad_norm": 0.4594184343917237, "learning_rate": 1.2834954770438643e-06, "loss": 0.5312, "num_tokens": 533997839.0, "step": 5672 }, { "epoch": 0.9682539682539683, "grad_norm": 0.4579616536753627, "learning_rate": 1.2766683734425672e-06, "loss": 0.561, "num_tokens": 534090906.0, "step": 5673 }, { "epoch": 0.9684246458440007, "grad_norm": 0.41658895969719983, "learning_rate": 1.26984126984127e-06, "loss": 0.4822, "num_tokens": 534190018.0, "step": 5674 }, { "epoch": 0.9685953234340331, "grad_norm": 0.520580958150149, "learning_rate": 1.2630141662399729e-06, "loss": 0.5971, "num_tokens": 534269334.0, "step": 5675 }, { "epoch": 0.9687660010240655, "grad_norm": 0.4866720610057144, "learning_rate": 1.2561870626386758e-06, "loss": 0.515, "num_tokens": 534345058.0, "step": 5676 }, { "epoch": 0.9689366786140979, "grad_norm": 0.5704383624126329, "learning_rate": 1.2493599590373785e-06, "loss": 0.5417, "num_tokens": 534429056.0, "step": 5677 }, { "epoch": 0.9691073562041304, "grad_norm": 0.3952461485245926, "learning_rate": 1.2425328554360814e-06, "loss": 0.5545, "num_tokens": 534566415.0, "step": 5678 }, { "epoch": 0.9692780337941628, "grad_norm": 0.4562849690297509, "learning_rate": 1.2357057518347843e-06, "loss": 0.5787, "num_tokens": 534662118.0, "step": 5679 }, { "epoch": 0.9694487113841953, "grad_norm": 0.47571830245594016, "learning_rate": 1.228878648233487e-06, "loss": 0.6149, "num_tokens": 534759227.0, "step": 5680 }, { "epoch": 0.9696193889742277, "grad_norm": 0.494420180133051, "learning_rate": 1.22205154463219e-06, "loss": 0.5587, "num_tokens": 534843350.0, "step": 5681 }, { "epoch": 0.9697900665642601, "grad_norm": 0.48151808070109053, "learning_rate": 1.2152244410308928e-06, "loss": 0.5343, "num_tokens": 534927910.0, "step": 5682 }, { "epoch": 0.9699607441542926, "grad_norm": 0.40784385013871205, "learning_rate": 1.2083973374295955e-06, "loss": 0.5646, "num_tokens": 535051802.0, "step": 5683 }, { "epoch": 0.970131421744325, "grad_norm": 0.46029053497862027, "learning_rate": 1.2015702338282985e-06, "loss": 0.5177, "num_tokens": 535141352.0, "step": 5684 }, { "epoch": 0.9703020993343574, "grad_norm": 0.4484885623629113, "learning_rate": 1.1947431302270014e-06, "loss": 0.5609, "num_tokens": 535244831.0, "step": 5685 }, { "epoch": 0.9704727769243898, "grad_norm": 0.5016411494184984, "learning_rate": 1.1879160266257043e-06, "loss": 0.6381, "num_tokens": 535338839.0, "step": 5686 }, { "epoch": 0.9706434545144222, "grad_norm": 0.4412107013727558, "learning_rate": 1.181088923024407e-06, "loss": 0.5184, "num_tokens": 535432894.0, "step": 5687 }, { "epoch": 0.9708141321044547, "grad_norm": 0.47272565399465755, "learning_rate": 1.1742618194231097e-06, "loss": 0.6627, "num_tokens": 535534603.0, "step": 5688 }, { "epoch": 0.9709848096944871, "grad_norm": 0.45607643321560754, "learning_rate": 1.1674347158218128e-06, "loss": 0.631, "num_tokens": 535641543.0, "step": 5689 }, { "epoch": 0.9711554872845195, "grad_norm": 0.5151824252068921, "learning_rate": 1.1606076122205155e-06, "loss": 0.5654, "num_tokens": 535710623.0, "step": 5690 }, { "epoch": 0.9713261648745519, "grad_norm": 0.4560699379252708, "learning_rate": 1.1537805086192184e-06, "loss": 0.6417, "num_tokens": 535824936.0, "step": 5691 }, { "epoch": 0.9714968424645845, "grad_norm": 0.41151083149648027, "learning_rate": 1.1469534050179212e-06, "loss": 0.6649, "num_tokens": 535953736.0, "step": 5692 }, { "epoch": 0.9716675200546169, "grad_norm": 0.5417282107267116, "learning_rate": 1.140126301416624e-06, "loss": 0.621, "num_tokens": 536029434.0, "step": 5693 }, { "epoch": 0.9718381976446493, "grad_norm": 0.5451225064618821, "learning_rate": 1.133299197815327e-06, "loss": 0.5802, "num_tokens": 536109650.0, "step": 5694 }, { "epoch": 0.9720088752346817, "grad_norm": 0.48066667308755695, "learning_rate": 1.1264720942140297e-06, "loss": 0.5818, "num_tokens": 536208382.0, "step": 5695 }, { "epoch": 0.9721795528247141, "grad_norm": 0.47177014266548817, "learning_rate": 1.1196449906127326e-06, "loss": 0.6056, "num_tokens": 536302994.0, "step": 5696 }, { "epoch": 0.9723502304147466, "grad_norm": 0.45353413211242544, "learning_rate": 1.1128178870114355e-06, "loss": 0.5588, "num_tokens": 536401956.0, "step": 5697 }, { "epoch": 0.972520908004779, "grad_norm": 0.44201435809474693, "learning_rate": 1.1059907834101384e-06, "loss": 0.5854, "num_tokens": 536504421.0, "step": 5698 }, { "epoch": 0.9726915855948114, "grad_norm": 0.4656896812341324, "learning_rate": 1.0991636798088411e-06, "loss": 0.4991, "num_tokens": 536599614.0, "step": 5699 }, { "epoch": 0.9728622631848438, "grad_norm": 0.565612314521484, "learning_rate": 1.092336576207544e-06, "loss": 0.5754, "num_tokens": 536661661.0, "step": 5700 }, { "epoch": 0.9730329407748762, "grad_norm": 0.39011205750441497, "learning_rate": 1.085509472606247e-06, "loss": 0.5048, "num_tokens": 536777340.0, "step": 5701 }, { "epoch": 0.9732036183649087, "grad_norm": 0.49804649012811286, "learning_rate": 1.0786823690049497e-06, "loss": 0.5388, "num_tokens": 536853740.0, "step": 5702 }, { "epoch": 0.9733742959549411, "grad_norm": 0.4513520212697472, "learning_rate": 1.0718552654036526e-06, "loss": 0.5836, "num_tokens": 536949668.0, "step": 5703 }, { "epoch": 0.9735449735449735, "grad_norm": 0.4966302071332309, "learning_rate": 1.0650281618023555e-06, "loss": 0.5419, "num_tokens": 537025168.0, "step": 5704 }, { "epoch": 0.973715651135006, "grad_norm": 0.5113915399315336, "learning_rate": 1.0582010582010582e-06, "loss": 0.6291, "num_tokens": 537118594.0, "step": 5705 }, { "epoch": 0.9738863287250384, "grad_norm": 0.4834927483283794, "learning_rate": 1.0513739545997611e-06, "loss": 0.6078, "num_tokens": 537208405.0, "step": 5706 }, { "epoch": 0.9740570063150709, "grad_norm": 0.4280457212694096, "learning_rate": 1.044546850998464e-06, "loss": 0.5664, "num_tokens": 537324659.0, "step": 5707 }, { "epoch": 0.9742276839051033, "grad_norm": 0.48672688080291393, "learning_rate": 1.037719747397167e-06, "loss": 0.6326, "num_tokens": 537415315.0, "step": 5708 }, { "epoch": 0.9743983614951357, "grad_norm": 0.5116372746174793, "learning_rate": 1.0308926437958697e-06, "loss": 0.4659, "num_tokens": 537476425.0, "step": 5709 }, { "epoch": 0.9745690390851681, "grad_norm": 0.505012755386635, "learning_rate": 1.0240655401945726e-06, "loss": 0.5854, "num_tokens": 537554418.0, "step": 5710 }, { "epoch": 0.9747397166752005, "grad_norm": 0.49065387418747913, "learning_rate": 1.0172384365932755e-06, "loss": 0.4857, "num_tokens": 537630581.0, "step": 5711 }, { "epoch": 0.974910394265233, "grad_norm": 0.5608173574871514, "learning_rate": 1.0104113329919782e-06, "loss": 0.5187, "num_tokens": 537691848.0, "step": 5712 }, { "epoch": 0.9750810718552654, "grad_norm": 0.5277119640686474, "learning_rate": 1.0035842293906811e-06, "loss": 0.5728, "num_tokens": 537761086.0, "step": 5713 }, { "epoch": 0.9752517494452978, "grad_norm": 0.5272841751284647, "learning_rate": 9.967571257893838e-07, "loss": 0.6312, "num_tokens": 537858113.0, "step": 5714 }, { "epoch": 0.9754224270353302, "grad_norm": 0.41892308424228397, "learning_rate": 9.89930022188087e-07, "loss": 0.4945, "num_tokens": 537962812.0, "step": 5715 }, { "epoch": 0.9755931046253626, "grad_norm": 0.5162545255352943, "learning_rate": 9.831029185867897e-07, "loss": 0.6265, "num_tokens": 538041629.0, "step": 5716 }, { "epoch": 0.9757637822153952, "grad_norm": 0.40947587376113537, "learning_rate": 9.762758149854924e-07, "loss": 0.593, "num_tokens": 538164281.0, "step": 5717 }, { "epoch": 0.9759344598054276, "grad_norm": 0.4716062864351842, "learning_rate": 9.694487113841953e-07, "loss": 0.6729, "num_tokens": 538267977.0, "step": 5718 }, { "epoch": 0.97610513739546, "grad_norm": 0.43905097672300236, "learning_rate": 9.626216077828982e-07, "loss": 0.4962, "num_tokens": 538361586.0, "step": 5719 }, { "epoch": 0.9762758149854924, "grad_norm": 0.5089771541390959, "learning_rate": 9.55794504181601e-07, "loss": 0.6195, "num_tokens": 538447330.0, "step": 5720 }, { "epoch": 0.9764464925755248, "grad_norm": 0.4967359761465205, "learning_rate": 9.489674005803039e-07, "loss": 0.5667, "num_tokens": 538527195.0, "step": 5721 }, { "epoch": 0.9766171701655573, "grad_norm": 0.4939707004204201, "learning_rate": 9.421402969790068e-07, "loss": 0.6059, "num_tokens": 538628232.0, "step": 5722 }, { "epoch": 0.9767878477555897, "grad_norm": 0.47251415292716403, "learning_rate": 9.353131933777096e-07, "loss": 0.5415, "num_tokens": 538713885.0, "step": 5723 }, { "epoch": 0.9769585253456221, "grad_norm": 0.4813054970193649, "learning_rate": 9.284860897764123e-07, "loss": 0.51, "num_tokens": 538798735.0, "step": 5724 }, { "epoch": 0.9771292029356545, "grad_norm": 0.5132622856620368, "learning_rate": 9.216589861751154e-07, "loss": 0.5083, "num_tokens": 538868700.0, "step": 5725 }, { "epoch": 0.977299880525687, "grad_norm": 0.4257725404174097, "learning_rate": 9.148318825738181e-07, "loss": 0.5177, "num_tokens": 538975518.0, "step": 5726 }, { "epoch": 0.9774705581157194, "grad_norm": 0.4930256729794848, "learning_rate": 9.080047789725211e-07, "loss": 0.511, "num_tokens": 539051789.0, "step": 5727 }, { "epoch": 0.9776412357057518, "grad_norm": 0.4749437304592271, "learning_rate": 9.011776753712238e-07, "loss": 0.6154, "num_tokens": 539148826.0, "step": 5728 }, { "epoch": 0.9778119132957843, "grad_norm": 0.4330594693562733, "learning_rate": 8.943505717699266e-07, "loss": 0.553, "num_tokens": 539256740.0, "step": 5729 }, { "epoch": 0.9779825908858167, "grad_norm": 0.42186299418530376, "learning_rate": 8.875234681686295e-07, "loss": 0.6048, "num_tokens": 539381242.0, "step": 5730 }, { "epoch": 0.9781532684758492, "grad_norm": 0.40627473666857217, "learning_rate": 8.806963645673323e-07, "loss": 0.5291, "num_tokens": 539493047.0, "step": 5731 }, { "epoch": 0.9783239460658816, "grad_norm": 0.5380674795865472, "learning_rate": 8.738692609660352e-07, "loss": 0.633, "num_tokens": 539573082.0, "step": 5732 }, { "epoch": 0.978494623655914, "grad_norm": 0.41539564320329664, "learning_rate": 8.670421573647381e-07, "loss": 0.5632, "num_tokens": 539691278.0, "step": 5733 }, { "epoch": 0.9786653012459464, "grad_norm": 0.417791370840599, "learning_rate": 8.60215053763441e-07, "loss": 0.5318, "num_tokens": 539800829.0, "step": 5734 }, { "epoch": 0.9788359788359788, "grad_norm": 0.4942514338848114, "learning_rate": 8.533879501621438e-07, "loss": 0.5848, "num_tokens": 539890285.0, "step": 5735 }, { "epoch": 0.9790066564260113, "grad_norm": 0.45628469189496573, "learning_rate": 8.465608465608466e-07, "loss": 0.5322, "num_tokens": 539983069.0, "step": 5736 }, { "epoch": 0.9791773340160437, "grad_norm": 0.5127685309066037, "learning_rate": 8.397337429595495e-07, "loss": 0.6126, "num_tokens": 540068106.0, "step": 5737 }, { "epoch": 0.9793480116060761, "grad_norm": 0.49773979184798806, "learning_rate": 8.329066393582523e-07, "loss": 0.5232, "num_tokens": 540143233.0, "step": 5738 }, { "epoch": 0.9795186891961085, "grad_norm": 0.4765629881692624, "learning_rate": 8.260795357569552e-07, "loss": 0.5975, "num_tokens": 540235250.0, "step": 5739 }, { "epoch": 0.9796893667861409, "grad_norm": 0.482298377378701, "learning_rate": 8.19252432155658e-07, "loss": 0.5561, "num_tokens": 540322776.0, "step": 5740 }, { "epoch": 0.9798600443761734, "grad_norm": 0.4866030407007398, "learning_rate": 8.124253285543609e-07, "loss": 0.5399, "num_tokens": 540401689.0, "step": 5741 }, { "epoch": 0.9800307219662059, "grad_norm": 0.4682685273129796, "learning_rate": 8.055982249530638e-07, "loss": 0.5507, "num_tokens": 540499913.0, "step": 5742 }, { "epoch": 0.9802013995562383, "grad_norm": 0.49308372411405366, "learning_rate": 7.987711213517666e-07, "loss": 0.6328, "num_tokens": 540591654.0, "step": 5743 }, { "epoch": 0.9803720771462707, "grad_norm": 0.46765176340499853, "learning_rate": 7.919440177504695e-07, "loss": 0.576, "num_tokens": 540686608.0, "step": 5744 }, { "epoch": 0.9805427547363031, "grad_norm": 0.46346137468628523, "learning_rate": 7.851169141491723e-07, "loss": 0.6127, "num_tokens": 540794238.0, "step": 5745 }, { "epoch": 0.9807134323263356, "grad_norm": 0.5018134267778155, "learning_rate": 7.782898105478752e-07, "loss": 0.5912, "num_tokens": 540877741.0, "step": 5746 }, { "epoch": 0.980884109916368, "grad_norm": 0.5234366717426516, "learning_rate": 7.71462706946578e-07, "loss": 0.5525, "num_tokens": 540948394.0, "step": 5747 }, { "epoch": 0.9810547875064004, "grad_norm": 0.4717706580707317, "learning_rate": 7.646356033452807e-07, "loss": 0.4969, "num_tokens": 541027028.0, "step": 5748 }, { "epoch": 0.9812254650964328, "grad_norm": 0.4286027350232933, "learning_rate": 7.578084997439838e-07, "loss": 0.5368, "num_tokens": 541131877.0, "step": 5749 }, { "epoch": 0.9813961426864652, "grad_norm": 0.47432074717566675, "learning_rate": 7.509813961426865e-07, "loss": 0.6926, "num_tokens": 541233764.0, "step": 5750 }, { "epoch": 0.9815668202764977, "grad_norm": 0.5162730871686685, "learning_rate": 7.441542925413895e-07, "loss": 0.5262, "num_tokens": 541306909.0, "step": 5751 }, { "epoch": 0.9817374978665301, "grad_norm": 0.4503363869023697, "learning_rate": 7.373271889400922e-07, "loss": 0.4919, "num_tokens": 541393969.0, "step": 5752 }, { "epoch": 0.9819081754565625, "grad_norm": 0.4484652049830725, "learning_rate": 7.30500085338795e-07, "loss": 0.5684, "num_tokens": 541494815.0, "step": 5753 }, { "epoch": 0.982078853046595, "grad_norm": 0.5002920159378207, "learning_rate": 7.236729817374979e-07, "loss": 0.6664, "num_tokens": 541595229.0, "step": 5754 }, { "epoch": 0.9822495306366275, "grad_norm": 0.41579288377878176, "learning_rate": 7.168458781362007e-07, "loss": 0.5086, "num_tokens": 541708701.0, "step": 5755 }, { "epoch": 0.9824202082266599, "grad_norm": 0.41615194292620467, "learning_rate": 7.100187745349036e-07, "loss": 0.4533, "num_tokens": 541801020.0, "step": 5756 }, { "epoch": 0.9825908858166923, "grad_norm": 0.4636451974702676, "learning_rate": 7.031916709336064e-07, "loss": 0.5349, "num_tokens": 541886678.0, "step": 5757 }, { "epoch": 0.9827615634067247, "grad_norm": 0.47017049624810825, "learning_rate": 6.963645673323094e-07, "loss": 0.6183, "num_tokens": 541986337.0, "step": 5758 }, { "epoch": 0.9829322409967571, "grad_norm": 0.41868177143037305, "learning_rate": 6.895374637310122e-07, "loss": 0.5683, "num_tokens": 542108619.0, "step": 5759 }, { "epoch": 0.9831029185867896, "grad_norm": 0.5358132995476426, "learning_rate": 6.82710360129715e-07, "loss": 0.5343, "num_tokens": 542171448.0, "step": 5760 }, { "epoch": 0.983273596176822, "grad_norm": 0.5440031682509913, "learning_rate": 6.758832565284179e-07, "loss": 0.4744, "num_tokens": 542231619.0, "step": 5761 }, { "epoch": 0.9834442737668544, "grad_norm": 0.46607822785376823, "learning_rate": 6.690561529271207e-07, "loss": 0.6097, "num_tokens": 542332055.0, "step": 5762 }, { "epoch": 0.9836149513568868, "grad_norm": 0.484963210595089, "learning_rate": 6.622290493258236e-07, "loss": 0.554, "num_tokens": 542422079.0, "step": 5763 }, { "epoch": 0.9837856289469192, "grad_norm": 0.471235941690195, "learning_rate": 6.554019457245264e-07, "loss": 0.6064, "num_tokens": 542517877.0, "step": 5764 }, { "epoch": 0.9839563065369517, "grad_norm": 0.46796393390041524, "learning_rate": 6.485748421232292e-07, "loss": 0.5522, "num_tokens": 542610836.0, "step": 5765 }, { "epoch": 0.9841269841269841, "grad_norm": 0.49361002000212556, "learning_rate": 6.417477385219322e-07, "loss": 0.5607, "num_tokens": 542691617.0, "step": 5766 }, { "epoch": 0.9842976617170166, "grad_norm": 0.5254342524247596, "learning_rate": 6.34920634920635e-07, "loss": 0.4885, "num_tokens": 542754656.0, "step": 5767 }, { "epoch": 0.984468339307049, "grad_norm": 0.4169663030145664, "learning_rate": 6.280935313193379e-07, "loss": 0.501, "num_tokens": 542863104.0, "step": 5768 }, { "epoch": 0.9846390168970814, "grad_norm": 0.40682154763089406, "learning_rate": 6.212664277180407e-07, "loss": 0.5601, "num_tokens": 542983801.0, "step": 5769 }, { "epoch": 0.9848096944871139, "grad_norm": 0.48389110528109314, "learning_rate": 6.144393241167435e-07, "loss": 0.5844, "num_tokens": 543068499.0, "step": 5770 }, { "epoch": 0.9849803720771463, "grad_norm": 0.47475379851520055, "learning_rate": 6.076122205154464e-07, "loss": 0.548, "num_tokens": 543151943.0, "step": 5771 }, { "epoch": 0.9851510496671787, "grad_norm": 0.4130073569056687, "learning_rate": 6.007851169141492e-07, "loss": 0.5512, "num_tokens": 543269160.0, "step": 5772 }, { "epoch": 0.9853217272572111, "grad_norm": 0.49448963388140277, "learning_rate": 5.939580133128521e-07, "loss": 0.591, "num_tokens": 543352422.0, "step": 5773 }, { "epoch": 0.9854924048472435, "grad_norm": 0.39814397399944024, "learning_rate": 5.871309097115549e-07, "loss": 0.5081, "num_tokens": 543466609.0, "step": 5774 }, { "epoch": 0.985663082437276, "grad_norm": 0.4226450071260806, "learning_rate": 5.803038061102578e-07, "loss": 0.6391, "num_tokens": 543599863.0, "step": 5775 }, { "epoch": 0.9858337600273084, "grad_norm": 0.4406201146797656, "learning_rate": 5.734767025089606e-07, "loss": 0.6155, "num_tokens": 543724344.0, "step": 5776 }, { "epoch": 0.9860044376173408, "grad_norm": 0.511452064055497, "learning_rate": 5.666495989076635e-07, "loss": 0.5932, "num_tokens": 543803705.0, "step": 5777 }, { "epoch": 0.9861751152073732, "grad_norm": 0.46468299063228635, "learning_rate": 5.598224953063663e-07, "loss": 0.5798, "num_tokens": 543898260.0, "step": 5778 }, { "epoch": 0.9863457927974058, "grad_norm": 0.6020277311027128, "learning_rate": 5.529953917050692e-07, "loss": 0.5736, "num_tokens": 543976120.0, "step": 5779 }, { "epoch": 0.9865164703874382, "grad_norm": 0.4113567878084181, "learning_rate": 5.46168288103772e-07, "loss": 0.516, "num_tokens": 544082325.0, "step": 5780 }, { "epoch": 0.9866871479774706, "grad_norm": 0.4895290306960513, "learning_rate": 5.393411845024748e-07, "loss": 0.6113, "num_tokens": 544180348.0, "step": 5781 }, { "epoch": 0.986857825567503, "grad_norm": 0.4878580478588014, "learning_rate": 5.325140809011778e-07, "loss": 0.5637, "num_tokens": 544260470.0, "step": 5782 }, { "epoch": 0.9870285031575354, "grad_norm": 0.44723633074566177, "learning_rate": 5.256869772998806e-07, "loss": 0.5144, "num_tokens": 544351607.0, "step": 5783 }, { "epoch": 0.9871991807475679, "grad_norm": 0.45251588478193316, "learning_rate": 5.188598736985835e-07, "loss": 0.5047, "num_tokens": 544440539.0, "step": 5784 }, { "epoch": 0.9873698583376003, "grad_norm": 0.4288770163680816, "learning_rate": 5.120327700972863e-07, "loss": 0.5199, "num_tokens": 544539395.0, "step": 5785 }, { "epoch": 0.9875405359276327, "grad_norm": 0.4492201778042409, "learning_rate": 5.052056664959891e-07, "loss": 0.551, "num_tokens": 544640403.0, "step": 5786 }, { "epoch": 0.9877112135176651, "grad_norm": 0.42358607843684837, "learning_rate": 4.983785628946919e-07, "loss": 0.5075, "num_tokens": 544742428.0, "step": 5787 }, { "epoch": 0.9878818911076975, "grad_norm": 0.5008822842478289, "learning_rate": 4.915514592933948e-07, "loss": 0.4547, "num_tokens": 544806439.0, "step": 5788 }, { "epoch": 0.98805256869773, "grad_norm": 0.4435975279271749, "learning_rate": 4.847243556920976e-07, "loss": 0.6268, "num_tokens": 544927035.0, "step": 5789 }, { "epoch": 0.9882232462877624, "grad_norm": 0.4519129422274784, "learning_rate": 4.778972520908006e-07, "loss": 0.6297, "num_tokens": 545042222.0, "step": 5790 }, { "epoch": 0.9883939238777949, "grad_norm": 0.5710619897153714, "learning_rate": 4.710701484895034e-07, "loss": 0.5843, "num_tokens": 545103651.0, "step": 5791 }, { "epoch": 0.9885646014678273, "grad_norm": 0.46213765181675537, "learning_rate": 4.6424304488820617e-07, "loss": 0.4902, "num_tokens": 545190924.0, "step": 5792 }, { "epoch": 0.9887352790578597, "grad_norm": 0.5240770479956915, "learning_rate": 4.5741594128690903e-07, "loss": 0.6508, "num_tokens": 545277409.0, "step": 5793 }, { "epoch": 0.9889059566478922, "grad_norm": 0.38457019910662404, "learning_rate": 4.505888376856119e-07, "loss": 0.546, "num_tokens": 545405529.0, "step": 5794 }, { "epoch": 0.9890766342379246, "grad_norm": 0.4744382204003423, "learning_rate": 4.4376173408431476e-07, "loss": 0.4748, "num_tokens": 545484944.0, "step": 5795 }, { "epoch": 0.989247311827957, "grad_norm": 0.45362547116823715, "learning_rate": 4.369346304830176e-07, "loss": 0.6449, "num_tokens": 545600678.0, "step": 5796 }, { "epoch": 0.9894179894179894, "grad_norm": 0.42979674158179343, "learning_rate": 4.301075268817205e-07, "loss": 0.5401, "num_tokens": 545707393.0, "step": 5797 }, { "epoch": 0.9895886670080218, "grad_norm": 0.41168715165075287, "learning_rate": 4.232804232804233e-07, "loss": 0.5022, "num_tokens": 545813567.0, "step": 5798 }, { "epoch": 0.9897593445980543, "grad_norm": 0.47554267612887186, "learning_rate": 4.1645331967912616e-07, "loss": 0.486, "num_tokens": 545886852.0, "step": 5799 }, { "epoch": 0.9899300221880867, "grad_norm": 0.4934162549922142, "learning_rate": 4.09626216077829e-07, "loss": 0.5681, "num_tokens": 545965519.0, "step": 5800 }, { "epoch": 0.9901006997781191, "grad_norm": 0.45737253351999513, "learning_rate": 4.027991124765319e-07, "loss": 0.5346, "num_tokens": 546057977.0, "step": 5801 }, { "epoch": 0.9902713773681515, "grad_norm": 0.5084515806894331, "learning_rate": 3.9597200887523475e-07, "loss": 0.6432, "num_tokens": 546151277.0, "step": 5802 }, { "epoch": 0.9904420549581839, "grad_norm": 0.4364907527716057, "learning_rate": 3.891449052739376e-07, "loss": 0.4923, "num_tokens": 546237212.0, "step": 5803 }, { "epoch": 0.9906127325482165, "grad_norm": 0.4552953867831013, "learning_rate": 3.8231780167264037e-07, "loss": 0.5795, "num_tokens": 546330410.0, "step": 5804 }, { "epoch": 0.9907834101382489, "grad_norm": 0.5210594940351512, "learning_rate": 3.7549069807134323e-07, "loss": 0.4782, "num_tokens": 546399925.0, "step": 5805 }, { "epoch": 0.9909540877282813, "grad_norm": 0.4549062761605478, "learning_rate": 3.686635944700461e-07, "loss": 0.5242, "num_tokens": 546493457.0, "step": 5806 }, { "epoch": 0.9911247653183137, "grad_norm": 0.4825891441764143, "learning_rate": 3.6183649086874896e-07, "loss": 0.5735, "num_tokens": 546580799.0, "step": 5807 }, { "epoch": 0.9912954429083461, "grad_norm": 0.4869958287810855, "learning_rate": 3.550093872674518e-07, "loss": 0.5979, "num_tokens": 546669277.0, "step": 5808 }, { "epoch": 0.9914661204983786, "grad_norm": 0.4536960150733509, "learning_rate": 3.481822836661547e-07, "loss": 0.5801, "num_tokens": 546766880.0, "step": 5809 }, { "epoch": 0.991636798088411, "grad_norm": 0.4868874405663085, "learning_rate": 3.413551800648575e-07, "loss": 0.5912, "num_tokens": 546848734.0, "step": 5810 }, { "epoch": 0.9918074756784434, "grad_norm": 0.44502317355334464, "learning_rate": 3.3452807646356035e-07, "loss": 0.5233, "num_tokens": 546946804.0, "step": 5811 }, { "epoch": 0.9919781532684758, "grad_norm": 0.4619057757967984, "learning_rate": 3.277009728622632e-07, "loss": 0.6044, "num_tokens": 547059590.0, "step": 5812 }, { "epoch": 0.9921488308585082, "grad_norm": 0.4915887660059776, "learning_rate": 3.208738692609661e-07, "loss": 0.5442, "num_tokens": 547151972.0, "step": 5813 }, { "epoch": 0.9923195084485407, "grad_norm": 0.6255365390217545, "learning_rate": 3.1404676565966894e-07, "loss": 0.6096, "num_tokens": 547215153.0, "step": 5814 }, { "epoch": 0.9924901860385731, "grad_norm": 0.470822034125046, "learning_rate": 3.0721966205837175e-07, "loss": 0.5895, "num_tokens": 547311532.0, "step": 5815 }, { "epoch": 0.9926608636286056, "grad_norm": 0.4762608503265472, "learning_rate": 3.003925584570746e-07, "loss": 0.5878, "num_tokens": 547399396.0, "step": 5816 }, { "epoch": 0.992831541218638, "grad_norm": 0.47980906982732047, "learning_rate": 2.935654548557774e-07, "loss": 0.6335, "num_tokens": 547499933.0, "step": 5817 }, { "epoch": 0.9930022188086705, "grad_norm": 0.5657441826645131, "learning_rate": 2.867383512544803e-07, "loss": 0.629, "num_tokens": 547574103.0, "step": 5818 }, { "epoch": 0.9931728963987029, "grad_norm": 0.4444063312886428, "learning_rate": 2.7991124765318315e-07, "loss": 0.5804, "num_tokens": 547672508.0, "step": 5819 }, { "epoch": 0.9933435739887353, "grad_norm": 0.448718798103949, "learning_rate": 2.73084144051886e-07, "loss": 0.519, "num_tokens": 547770260.0, "step": 5820 }, { "epoch": 0.9935142515787677, "grad_norm": 0.4751972844234836, "learning_rate": 2.662570404505889e-07, "loss": 0.6257, "num_tokens": 547868830.0, "step": 5821 }, { "epoch": 0.9936849291688001, "grad_norm": 0.46057589833746626, "learning_rate": 2.5942993684929174e-07, "loss": 0.5535, "num_tokens": 547964765.0, "step": 5822 }, { "epoch": 0.9938556067588326, "grad_norm": 0.44560014091650313, "learning_rate": 2.5260283324799455e-07, "loss": 0.5509, "num_tokens": 548058706.0, "step": 5823 }, { "epoch": 0.994026284348865, "grad_norm": 0.43514683343912264, "learning_rate": 2.457757296466974e-07, "loss": 0.5677, "num_tokens": 548164612.0, "step": 5824 }, { "epoch": 0.9941969619388974, "grad_norm": 0.48262238747703023, "learning_rate": 2.389486260454003e-07, "loss": 0.4868, "num_tokens": 548241160.0, "step": 5825 }, { "epoch": 0.9943676395289298, "grad_norm": 0.48249569278875876, "learning_rate": 2.3212152244410309e-07, "loss": 0.6833, "num_tokens": 548350186.0, "step": 5826 }, { "epoch": 0.9945383171189622, "grad_norm": 0.4552530328454943, "learning_rate": 2.2529441884280595e-07, "loss": 0.5533, "num_tokens": 548440686.0, "step": 5827 }, { "epoch": 0.9947089947089947, "grad_norm": 0.4721088305117028, "learning_rate": 2.184673152415088e-07, "loss": 0.5155, "num_tokens": 548519817.0, "step": 5828 }, { "epoch": 0.9948796722990272, "grad_norm": 0.45953226419462434, "learning_rate": 2.1164021164021165e-07, "loss": 0.4888, "num_tokens": 548608305.0, "step": 5829 }, { "epoch": 0.9950503498890596, "grad_norm": 0.4932556194701304, "learning_rate": 2.048131080389145e-07, "loss": 0.557, "num_tokens": 548692143.0, "step": 5830 }, { "epoch": 0.995221027479092, "grad_norm": 0.40105844015861297, "learning_rate": 1.9798600443761737e-07, "loss": 0.5344, "num_tokens": 548808239.0, "step": 5831 }, { "epoch": 0.9953917050691244, "grad_norm": 0.4572615492244303, "learning_rate": 1.9115890083632018e-07, "loss": 0.5222, "num_tokens": 548895983.0, "step": 5832 }, { "epoch": 0.9955623826591569, "grad_norm": 0.42537586759942986, "learning_rate": 1.8433179723502305e-07, "loss": 0.535, "num_tokens": 549008709.0, "step": 5833 }, { "epoch": 0.9957330602491893, "grad_norm": 0.4111588633060106, "learning_rate": 1.775046936337259e-07, "loss": 0.5503, "num_tokens": 549119419.0, "step": 5834 }, { "epoch": 0.9959037378392217, "grad_norm": 0.4740006195035117, "learning_rate": 1.7067759003242875e-07, "loss": 0.5599, "num_tokens": 549208768.0, "step": 5835 }, { "epoch": 0.9960744154292541, "grad_norm": 0.4680097802183672, "learning_rate": 1.638504864311316e-07, "loss": 0.6226, "num_tokens": 549336406.0, "step": 5836 }, { "epoch": 0.9962450930192865, "grad_norm": 0.44807482137235505, "learning_rate": 1.5702338282983447e-07, "loss": 0.4866, "num_tokens": 549427878.0, "step": 5837 }, { "epoch": 0.996415770609319, "grad_norm": 0.4510415718016738, "learning_rate": 1.501962792285373e-07, "loss": 0.5399, "num_tokens": 549524270.0, "step": 5838 }, { "epoch": 0.9965864481993514, "grad_norm": 0.5170634447715512, "learning_rate": 1.4336917562724014e-07, "loss": 0.5634, "num_tokens": 549593266.0, "step": 5839 }, { "epoch": 0.9967571257893838, "grad_norm": 0.4261840906680157, "learning_rate": 1.36542072025943e-07, "loss": 0.5331, "num_tokens": 549699041.0, "step": 5840 }, { "epoch": 0.9969278033794163, "grad_norm": 0.4794547615538534, "learning_rate": 1.2971496842464587e-07, "loss": 0.6273, "num_tokens": 549794125.0, "step": 5841 }, { "epoch": 0.9970984809694488, "grad_norm": 0.47174951341644733, "learning_rate": 1.228878648233487e-07, "loss": 0.5615, "num_tokens": 549879603.0, "step": 5842 }, { "epoch": 0.9972691585594812, "grad_norm": 0.45584229427372397, "learning_rate": 1.1606076122205154e-07, "loss": 0.5502, "num_tokens": 549969185.0, "step": 5843 }, { "epoch": 0.9974398361495136, "grad_norm": 0.47408926476678104, "learning_rate": 1.092336576207544e-07, "loss": 0.4942, "num_tokens": 550050967.0, "step": 5844 }, { "epoch": 0.997610513739546, "grad_norm": 0.5147101847206296, "learning_rate": 1.0240655401945726e-07, "loss": 0.6703, "num_tokens": 550137084.0, "step": 5845 }, { "epoch": 0.9977811913295784, "grad_norm": 0.4030430309754854, "learning_rate": 9.557945041816009e-08, "loss": 0.5217, "num_tokens": 550250609.0, "step": 5846 }, { "epoch": 0.9979518689196109, "grad_norm": 0.4289449212846011, "learning_rate": 8.875234681686295e-08, "loss": 0.5395, "num_tokens": 550353200.0, "step": 5847 }, { "epoch": 0.9981225465096433, "grad_norm": 0.5082682750568082, "learning_rate": 8.19252432155658e-08, "loss": 0.5302, "num_tokens": 550431497.0, "step": 5848 }, { "epoch": 0.9982932240996757, "grad_norm": 0.49830500043947556, "learning_rate": 7.509813961426865e-08, "loss": 0.5426, "num_tokens": 550506850.0, "step": 5849 }, { "epoch": 0.9984639016897081, "grad_norm": 0.4512741565282109, "learning_rate": 6.82710360129715e-08, "loss": 0.5019, "num_tokens": 550592752.0, "step": 5850 }, { "epoch": 0.9986345792797405, "grad_norm": 0.44212043423918934, "learning_rate": 6.144393241167435e-08, "loss": 0.5574, "num_tokens": 550689952.0, "step": 5851 }, { "epoch": 0.998805256869773, "grad_norm": 0.4464405135724566, "learning_rate": 5.46168288103772e-08, "loss": 0.5864, "num_tokens": 550797460.0, "step": 5852 }, { "epoch": 0.9989759344598055, "grad_norm": 0.39251959885722437, "learning_rate": 4.7789725209080046e-08, "loss": 0.5537, "num_tokens": 550926143.0, "step": 5853 }, { "epoch": 0.9991466120498379, "grad_norm": 0.429358694610898, "learning_rate": 4.09626216077829e-08, "loss": 0.5022, "num_tokens": 551020494.0, "step": 5854 }, { "epoch": 0.9993172896398703, "grad_norm": 0.5129746771239546, "learning_rate": 3.413551800648575e-08, "loss": 0.612, "num_tokens": 551096687.0, "step": 5855 }, { "epoch": 0.9994879672299027, "grad_norm": 0.47713258367504513, "learning_rate": 2.73084144051886e-08, "loss": 0.5658, "num_tokens": 551186015.0, "step": 5856 }, { "epoch": 0.9996586448199352, "grad_norm": 0.43080212263534023, "learning_rate": 2.048131080389145e-08, "loss": 0.5923, "num_tokens": 551297060.0, "step": 5857 }, { "epoch": 0.9998293224099676, "grad_norm": 0.4522964254123883, "learning_rate": 1.36542072025943e-08, "loss": 0.6076, "num_tokens": 551403791.0, "step": 5858 }, { "epoch": 1.0, "grad_norm": 0.4695761395698389, "learning_rate": 6.82710360129715e-09, "loss": 0.499, "num_tokens": 551458209.0, "step": 5859 }, { "epoch": 1.0, "step": 5859, "total_flos": 438086667841536.0, "train_loss": 0.610091478802438, "train_runtime": 7786.7341, "train_samples_per_second": 12.038, "train_steps_per_second": 0.752 } ], "logging_steps": 1.0, "max_steps": 5859, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 438086667841536.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }