diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/0_trainer_state.json b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/0_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6025478b3bec60311c264b70d834225d3af06f74 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/0_trainer_state.json @@ -0,0 +1,7526 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 2498, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0008006405124099279, + "learning_rate": 2.415943612351265e-06, + "loss": 0.2467, + "step": 2 + }, + { + "epoch": 0.0016012810248198558, + "learning_rate": 2.4341906163790364e-06, + "loss": 0.0573, + "step": 4 + }, + { + "epoch": 0.0024019215372297837, + "learning_rate": 2.4524967251364995e-06, + "loss": 0.1694, + "step": 6 + }, + { + "epoch": 0.0032025620496397116, + "learning_rate": 2.4708617956148052e-06, + "loss": 0.2059, + "step": 8 + }, + { + "epoch": 0.0040032025620496394, + "learning_rate": 2.4892856843445236e-06, + "loss": 0.2581, + "step": 10 + }, + { + "epoch": 0.004803843074459567, + "learning_rate": 2.507768247396697e-06, + "loss": 0.0719, + "step": 12 + }, + { + "epoch": 0.005604483586869495, + "learning_rate": 2.5263093403840022e-06, + "loss": 0.3515, + "step": 14 + }, + { + "epoch": 0.006405124099279423, + "learning_rate": 2.5449088184619065e-06, + "loss": 0.0693, + "step": 16 + }, + { + "epoch": 0.007205764611689352, + "learning_rate": 2.5635665363297356e-06, + "loss": 0.1825, + "step": 18 + }, + { + "epoch": 0.008006405124099279, + "learning_rate": 2.5822823482318517e-06, + "loss": 0.1901, + "step": 20 + }, + { + "epoch": 0.008807045636509208, + "learning_rate": 2.6010561079587694e-06, + "loss": 0.6459, + "step": 22 + }, + { + "epoch": 0.009607686148919135, + "learning_rate": 2.6198876688483453e-06, + "loss": 0.1767, + "step": 24 + }, + { + "epoch": 0.010408326661329063, + "learning_rate": 2.6387768837868565e-06, + "loss": 0.4669, + "step": 26 + }, + { + "epoch": 0.01120896717373899, + "learning_rate": 2.6577236052101764e-06, + "loss": 0.1539, + "step": 28 + }, + { + "epoch": 0.01200960768614892, + "learning_rate": 2.6767276851049716e-06, + "loss": 0.1946, + "step": 30 + }, + { + "epoch": 0.012810248198558846, + "learning_rate": 2.6957889750097866e-06, + "loss": 0.2724, + "step": 32 + }, + { + "epoch": 0.013610888710968775, + "learning_rate": 2.7149073260162416e-06, + "loss": 0.0462, + "step": 34 + }, + { + "epoch": 0.014411529223378704, + "learning_rate": 2.7340825887701848e-06, + "loss": 0.0152, + "step": 36 + }, + { + "epoch": 0.01521216973578863, + "learning_rate": 2.7533146134728993e-06, + "loss": 0.1016, + "step": 38 + }, + { + "epoch": 0.016012810248198558, + "learning_rate": 2.772603249882202e-06, + "loss": 0.048, + "step": 40 + }, + { + "epoch": 0.016813450760608487, + "learning_rate": 2.7919483473136555e-06, + "loss": 0.1376, + "step": 42 + }, + { + "epoch": 0.017614091273018415, + "learning_rate": 2.81134975464178e-06, + "loss": 0.034, + "step": 44 + }, + { + "epoch": 0.018414731785428344, + "learning_rate": 2.8308073203011634e-06, + "loss": 0.1538, + "step": 46 + }, + { + "epoch": 0.01921537229783827, + "learning_rate": 2.850320892287688e-06, + "loss": 0.1127, + "step": 48 + }, + { + "epoch": 0.020016012810248198, + "learning_rate": 2.8698903181597026e-06, + "loss": 0.175, + "step": 50 + }, + { + "epoch": 0.020816653322658127, + "learning_rate": 2.889515445039256e-06, + "loss": 0.0457, + "step": 52 + }, + { + "epoch": 0.021617293835068056, + "learning_rate": 2.909196119613218e-06, + "loss": 0.4392, + "step": 54 + }, + { + "epoch": 0.02241793434747798, + "learning_rate": 2.928932188134529e-06, + "loss": 0.0658, + "step": 56 + }, + { + "epoch": 0.02321857485988791, + "learning_rate": 2.9487234964233724e-06, + "loss": 0.1111, + "step": 58 + }, + { + "epoch": 0.02401921537229784, + "learning_rate": 2.9685698898684355e-06, + "loss": 0.0388, + "step": 60 + }, + { + "epoch": 0.024819855884707767, + "learning_rate": 2.988471213428035e-06, + "loss": 0.1192, + "step": 62 + }, + { + "epoch": 0.025620496397117692, + "learning_rate": 3.00842731163137e-06, + "loss": 0.3528, + "step": 64 + }, + { + "epoch": 0.02642113690952762, + "learning_rate": 3.0284380285797733e-06, + "loss": 0.1812, + "step": 66 + }, + { + "epoch": 0.02722177742193755, + "learning_rate": 3.048503207947854e-06, + "loss": 0.0245, + "step": 68 + }, + { + "epoch": 0.02802241793434748, + "learning_rate": 3.068622692984767e-06, + "loss": 0.1994, + "step": 70 + }, + { + "epoch": 0.028823058446757407, + "learning_rate": 3.0887963265154187e-06, + "loss": 0.0103, + "step": 72 + }, + { + "epoch": 0.029623698959167333, + "learning_rate": 3.1090239509417364e-06, + "loss": 0.0254, + "step": 74 + }, + { + "epoch": 0.03042433947157726, + "learning_rate": 3.129305408243829e-06, + "loss": 0.0651, + "step": 76 + }, + { + "epoch": 0.03122497998398719, + "learning_rate": 3.1496405399812602e-06, + "loss": 0.0946, + "step": 78 + }, + { + "epoch": 0.032025620496397116, + "learning_rate": 3.17002918729432e-06, + "loss": 0.0301, + "step": 80 + }, + { + "epoch": 0.03282626100880705, + "learning_rate": 3.1904711909051967e-06, + "loss": 0.4394, + "step": 82 + }, + { + "epoch": 0.03362690152121697, + "learning_rate": 3.2109663911192622e-06, + "loss": 0.1513, + "step": 84 + }, + { + "epoch": 0.0344275420336269, + "learning_rate": 3.231514627826302e-06, + "loss": 0.112, + "step": 86 + }, + { + "epoch": 0.03522818254603683, + "learning_rate": 3.2521157405018146e-06, + "loss": 0.0642, + "step": 88 + }, + { + "epoch": 0.036028823058446756, + "learning_rate": 3.2727695682081897e-06, + "loss": 0.1087, + "step": 90 + }, + { + "epoch": 0.03682946357085669, + "learning_rate": 3.293475949595998e-06, + "loss": 0.3814, + "step": 92 + }, + { + "epoch": 0.03763010408326661, + "learning_rate": 3.314234722905302e-06, + "loss": 0.0535, + "step": 94 + }, + { + "epoch": 0.03843074459567654, + "learning_rate": 3.335045725966829e-06, + "loss": 0.3592, + "step": 96 + }, + { + "epoch": 0.03923138510808647, + "learning_rate": 3.355908796203301e-06, + "loss": 0.0818, + "step": 98 + }, + { + "epoch": 0.040032025620496396, + "learning_rate": 3.3768237706306716e-06, + "loss": 0.1316, + "step": 100 + }, + { + "epoch": 0.04083266613290633, + "learning_rate": 3.3977904858594534e-06, + "loss": 0.1642, + "step": 102 + }, + { + "epoch": 0.041633306645316254, + "learning_rate": 3.418808778095917e-06, + "loss": 0.1075, + "step": 104 + }, + { + "epoch": 0.04243394715772618, + "learning_rate": 3.4398784831434097e-06, + "loss": 0.3196, + "step": 106 + }, + { + "epoch": 0.04323458767013611, + "learning_rate": 3.460999436403676e-06, + "loss": 0.1469, + "step": 108 + }, + { + "epoch": 0.044035228182546036, + "learning_rate": 3.4821714728780654e-06, + "loss": 0.1458, + "step": 110 + }, + { + "epoch": 0.04483586869495596, + "learning_rate": 3.5033944271688624e-06, + "loss": 0.0183, + "step": 112 + }, + { + "epoch": 0.045636509207365894, + "learning_rate": 3.5246681334806177e-06, + "loss": 0.1096, + "step": 114 + }, + { + "epoch": 0.04643714971977582, + "learning_rate": 3.5459924256213596e-06, + "loss": 0.2373, + "step": 116 + }, + { + "epoch": 0.04723779023218575, + "learning_rate": 3.567367137003953e-06, + "loss": 0.9613, + "step": 118 + }, + { + "epoch": 0.04803843074459568, + "learning_rate": 3.588792100647368e-06, + "loss": 0.1215, + "step": 120 + }, + { + "epoch": 0.0488390712570056, + "learning_rate": 3.6102671491780393e-06, + "loss": 0.2998, + "step": 122 + }, + { + "epoch": 0.049639711769415534, + "learning_rate": 3.6317921148310965e-06, + "loss": 0.0343, + "step": 124 + }, + { + "epoch": 0.05044035228182546, + "learning_rate": 3.653366829451711e-06, + "loss": 0.256, + "step": 126 + }, + { + "epoch": 0.051240992794235385, + "learning_rate": 3.674991124496452e-06, + "loss": 0.0248, + "step": 128 + }, + { + "epoch": 0.05204163330664532, + "learning_rate": 3.696664831034521e-06, + "loss": 0.1913, + "step": 130 + }, + { + "epoch": 0.05284227381905524, + "learning_rate": 3.7183877797491143e-06, + "loss": 0.1421, + "step": 132 + }, + { + "epoch": 0.053642914331465175, + "learning_rate": 3.740159800938784e-06, + "loss": 0.0973, + "step": 134 + }, + { + "epoch": 0.0544435548438751, + "learning_rate": 3.7619807245186824e-06, + "loss": 0.008, + "step": 136 + }, + { + "epoch": 0.055244195356285025, + "learning_rate": 3.783850380021933e-06, + "loss": 0.1147, + "step": 138 + }, + { + "epoch": 0.05604483586869496, + "learning_rate": 3.8057685966010025e-06, + "loss": 0.6627, + "step": 140 + }, + { + "epoch": 0.05684547638110488, + "learning_rate": 3.827735203028956e-06, + "loss": 0.2326, + "step": 142 + }, + { + "epoch": 0.057646116893514815, + "learning_rate": 3.849750027700842e-06, + "loss": 0.0308, + "step": 144 + }, + { + "epoch": 0.05844675740592474, + "learning_rate": 3.871812898635011e-06, + "loss": 0.0637, + "step": 146 + }, + { + "epoch": 0.059247397918334666, + "learning_rate": 3.8939236434745184e-06, + "loss": 0.1526, + "step": 148 + }, + { + "epoch": 0.0600480384307446, + "learning_rate": 3.916082089488379e-06, + "loss": 0.0016, + "step": 150 + }, + { + "epoch": 0.06084867894315452, + "learning_rate": 3.938288063572962e-06, + "loss": 0.0926, + "step": 152 + }, + { + "epoch": 0.06164931945556445, + "learning_rate": 3.960541392253387e-06, + "loss": 0.1669, + "step": 154 + }, + { + "epoch": 0.06244995996797438, + "learning_rate": 3.982841901684792e-06, + "loss": 0.2158, + "step": 156 + }, + { + "epoch": 0.0632506004803843, + "learning_rate": 4.005189417653737e-06, + "loss": 0.148, + "step": 158 + }, + { + "epoch": 0.06405124099279423, + "learning_rate": 4.027583765579601e-06, + "loss": 0.0577, + "step": 160 + }, + { + "epoch": 0.06485188150520416, + "learning_rate": 4.050024770515873e-06, + "loss": 0.1165, + "step": 162 + }, + { + "epoch": 0.0656525220176141, + "learning_rate": 4.072512257151546e-06, + "loss": 0.1286, + "step": 164 + }, + { + "epoch": 0.06645316253002402, + "learning_rate": 4.095046049812541e-06, + "loss": 0.1151, + "step": 166 + }, + { + "epoch": 0.06725380304243395, + "learning_rate": 4.117625972462988e-06, + "loss": 0.0402, + "step": 168 + }, + { + "epoch": 0.06805444355484387, + "learning_rate": 4.1402518487066624e-06, + "loss": 0.2551, + "step": 170 + }, + { + "epoch": 0.0688550840672538, + "learning_rate": 4.1629235017883285e-06, + "loss": 0.4012, + "step": 172 + }, + { + "epoch": 0.06965572457966374, + "learning_rate": 4.1856407545951825e-06, + "loss": 0.2387, + "step": 174 + }, + { + "epoch": 0.07045636509207366, + "learning_rate": 4.208403429658151e-06, + "loss": 0.024, + "step": 176 + }, + { + "epoch": 0.07125700560448359, + "learning_rate": 4.2312113491533145e-06, + "loss": 0.1358, + "step": 178 + }, + { + "epoch": 0.07205764611689351, + "learning_rate": 4.254064334903347e-06, + "loss": 0.0094, + "step": 180 + }, + { + "epoch": 0.07285828662930344, + "learning_rate": 4.276962208378814e-06, + "loss": 0.1583, + "step": 182 + }, + { + "epoch": 0.07365892714171338, + "learning_rate": 4.299904790699619e-06, + "loss": 0.1238, + "step": 184 + }, + { + "epoch": 0.0744595676541233, + "learning_rate": 4.3228919026364345e-06, + "loss": 0.4382, + "step": 186 + }, + { + "epoch": 0.07526020816653323, + "learning_rate": 4.345923364612024e-06, + "loss": 0.3528, + "step": 188 + }, + { + "epoch": 0.07606084867894315, + "learning_rate": 4.368998996702686e-06, + "loss": 0.3852, + "step": 190 + }, + { + "epoch": 0.07686148919135308, + "learning_rate": 4.392118618639698e-06, + "loss": 0.0947, + "step": 192 + }, + { + "epoch": 0.07766212970376302, + "learning_rate": 4.415282049810643e-06, + "loss": 0.0537, + "step": 194 + }, + { + "epoch": 0.07846277021617294, + "learning_rate": 4.4384891092608795e-06, + "loss": 0.0766, + "step": 196 + }, + { + "epoch": 0.07926341072858287, + "learning_rate": 4.461739615694921e-06, + "loss": 0.0362, + "step": 198 + }, + { + "epoch": 0.08006405124099279, + "learning_rate": 4.485033387477915e-06, + "loss": 0.4005, + "step": 200 + }, + { + "epoch": 0.08086469175340272, + "learning_rate": 4.5083702426369715e-06, + "loss": 0.0108, + "step": 202 + }, + { + "epoch": 0.08166533226581266, + "learning_rate": 4.531749998862628e-06, + "loss": 0.3322, + "step": 204 + }, + { + "epoch": 0.08246597277822258, + "learning_rate": 4.555172473510324e-06, + "loss": 0.3022, + "step": 206 + }, + { + "epoch": 0.08326661329063251, + "learning_rate": 4.578637483601732e-06, + "loss": 0.1124, + "step": 208 + }, + { + "epoch": 0.08406725380304243, + "learning_rate": 4.602144845826234e-06, + "loss": 0.5849, + "step": 210 + }, + { + "epoch": 0.08486789431545236, + "learning_rate": 4.625694376542399e-06, + "loss": 0.0004, + "step": 212 + }, + { + "epoch": 0.08566853482786228, + "learning_rate": 4.649285891779326e-06, + "loss": 0.3145, + "step": 214 + }, + { + "epoch": 0.08646917534027222, + "learning_rate": 4.672919207238145e-06, + "loss": 0.2548, + "step": 216 + }, + { + "epoch": 0.08726981585268215, + "learning_rate": 4.696594138293421e-06, + "loss": 0.079, + "step": 218 + }, + { + "epoch": 0.08807045636509207, + "learning_rate": 4.720310499994664e-06, + "loss": 0.4485, + "step": 220 + }, + { + "epoch": 0.088871096877502, + "learning_rate": 4.744068107067673e-06, + "loss": 0.1557, + "step": 222 + }, + { + "epoch": 0.08967173738991192, + "learning_rate": 4.767866773916041e-06, + "loss": 0.1359, + "step": 224 + }, + { + "epoch": 0.09047237790232186, + "learning_rate": 4.79170631462264e-06, + "loss": 0.3935, + "step": 226 + }, + { + "epoch": 0.09127301841473179, + "learning_rate": 4.81558654295099e-06, + "loss": 0.1582, + "step": 228 + }, + { + "epoch": 0.09207365892714171, + "learning_rate": 4.839507272346751e-06, + "loss": 0.1028, + "step": 230 + }, + { + "epoch": 0.09287429943955164, + "learning_rate": 4.863468315939234e-06, + "loss": 0.0717, + "step": 232 + }, + { + "epoch": 0.09367493995196156, + "learning_rate": 4.8874694865427676e-06, + "loss": 0.3559, + "step": 234 + }, + { + "epoch": 0.0944755804643715, + "learning_rate": 4.911510596658202e-06, + "loss": 0.1527, + "step": 236 + }, + { + "epoch": 0.09527622097678143, + "learning_rate": 4.935591458474425e-06, + "loss": 0.1192, + "step": 238 + }, + { + "epoch": 0.09607686148919135, + "learning_rate": 4.959711883869734e-06, + "loss": 0.2082, + "step": 240 + }, + { + "epoch": 0.09687750200160128, + "learning_rate": 4.9838716844133665e-06, + "loss": 0.0934, + "step": 242 + }, + { + "epoch": 0.0976781425140112, + "learning_rate": 5.0080706713669435e-06, + "loss": 0.7068, + "step": 244 + }, + { + "epoch": 0.09847878302642114, + "learning_rate": 5.032308655686007e-06, + "loss": 0.1441, + "step": 246 + }, + { + "epoch": 0.09927942353883107, + "learning_rate": 5.056585448021398e-06, + "loss": 0.2424, + "step": 248 + }, + { + "epoch": 0.100080064051241, + "learning_rate": 5.080900858720789e-06, + "loss": 0.562, + "step": 250 + }, + { + "epoch": 0.10088070456365092, + "learning_rate": 5.105254697830208e-06, + "loss": 0.2064, + "step": 252 + }, + { + "epoch": 0.10168134507606084, + "learning_rate": 5.129646775095432e-06, + "loss": 0.1076, + "step": 254 + }, + { + "epoch": 0.10248198558847077, + "learning_rate": 5.154076899963514e-06, + "loss": 0.3241, + "step": 256 + }, + { + "epoch": 0.10328262610088071, + "learning_rate": 5.178544881584328e-06, + "loss": 0.1282, + "step": 258 + }, + { + "epoch": 0.10408326661329063, + "learning_rate": 5.203050528811959e-06, + "loss": 0.2354, + "step": 260 + }, + { + "epoch": 0.10488390712570056, + "learning_rate": 5.227593650206246e-06, + "loss": 0.2725, + "step": 262 + }, + { + "epoch": 0.10568454763811048, + "learning_rate": 5.2521740540343205e-06, + "loss": 0.096, + "step": 264 + }, + { + "epoch": 0.10648518815052041, + "learning_rate": 5.2767915482720164e-06, + "loss": 0.2634, + "step": 266 + }, + { + "epoch": 0.10728582866293035, + "learning_rate": 5.3014459406054295e-06, + "loss": 0.2161, + "step": 268 + }, + { + "epoch": 0.10808646917534027, + "learning_rate": 5.3261370384323904e-06, + "loss": 0.0682, + "step": 270 + }, + { + "epoch": 0.1088871096877502, + "learning_rate": 5.350864648864026e-06, + "loss": 0.0461, + "step": 272 + }, + { + "epoch": 0.10968775020016013, + "learning_rate": 5.375628578726181e-06, + "loss": 0.0681, + "step": 274 + }, + { + "epoch": 0.11048839071257005, + "learning_rate": 5.4004286345609665e-06, + "loss": 0.1726, + "step": 276 + }, + { + "epoch": 0.11128903122497999, + "learning_rate": 5.425264622628326e-06, + "loss": 0.0297, + "step": 278 + }, + { + "epoch": 0.11208967173738991, + "learning_rate": 5.450136348907444e-06, + "loss": 0.3649, + "step": 280 + }, + { + "epoch": 0.11289031224979984, + "learning_rate": 5.475043619098321e-06, + "loss": 0.1461, + "step": 282 + }, + { + "epoch": 0.11369095276220977, + "learning_rate": 5.499986238623329e-06, + "loss": 0.1071, + "step": 284 + }, + { + "epoch": 0.11449159327461969, + "learning_rate": 5.524964012628644e-06, + "loss": 0.0333, + "step": 286 + }, + { + "epoch": 0.11529223378702963, + "learning_rate": 5.549976745985809e-06, + "loss": 0.0659, + "step": 288 + }, + { + "epoch": 0.11609287429943956, + "learning_rate": 5.57502424329331e-06, + "loss": 0.1361, + "step": 290 + }, + { + "epoch": 0.11689351481184948, + "learning_rate": 5.6001063088780085e-06, + "loss": 0.6853, + "step": 292 + }, + { + "epoch": 0.1176941553242594, + "learning_rate": 5.62522274679673e-06, + "loss": 0.9823, + "step": 294 + }, + { + "epoch": 0.11849479583666933, + "learning_rate": 5.650373360837763e-06, + "loss": 0.2493, + "step": 296 + }, + { + "epoch": 0.11929543634907927, + "learning_rate": 5.675557954522462e-06, + "loss": 0.016, + "step": 298 + }, + { + "epoch": 0.1200960768614892, + "learning_rate": 5.700776331106674e-06, + "loss": 0.0424, + "step": 300 + }, + { + "epoch": 0.12089671737389912, + "learning_rate": 5.726028293582342e-06, + "loss": 0.0034, + "step": 302 + }, + { + "epoch": 0.12169735788630905, + "learning_rate": 5.751313644679071e-06, + "loss": 0.0171, + "step": 304 + }, + { + "epoch": 0.12249799839871897, + "learning_rate": 5.776632186865589e-06, + "loss": 0.0232, + "step": 306 + }, + { + "epoch": 0.1232986389111289, + "learning_rate": 5.8019837223513295e-06, + "loss": 0.1032, + "step": 308 + }, + { + "epoch": 0.12409927942353884, + "learning_rate": 5.827368053088032e-06, + "loss": 0.0758, + "step": 310 + }, + { + "epoch": 0.12489991993594876, + "learning_rate": 5.852784980771182e-06, + "loss": 0.0282, + "step": 312 + }, + { + "epoch": 0.1257005604483587, + "learning_rate": 5.878234306841637e-06, + "loss": 0.5472, + "step": 314 + }, + { + "epoch": 0.1265012009607686, + "learning_rate": 5.903715832487138e-06, + "loss": 0.0415, + "step": 316 + }, + { + "epoch": 0.12730184147317855, + "learning_rate": 5.929229358643925e-06, + "loss": 0.2705, + "step": 318 + }, + { + "epoch": 0.12810248198558846, + "learning_rate": 5.954774685998206e-06, + "loss": 0.11, + "step": 320 + }, + { + "epoch": 0.1289031224979984, + "learning_rate": 5.9803516149877475e-06, + "loss": 0.2075, + "step": 322 + }, + { + "epoch": 0.1297037630104083, + "learning_rate": 6.005959945803494e-06, + "loss": 0.0062, + "step": 324 + }, + { + "epoch": 0.13050440352281825, + "learning_rate": 6.03159947839103e-06, + "loss": 0.1936, + "step": 326 + }, + { + "epoch": 0.1313050440352282, + "learning_rate": 6.057270012452186e-06, + "loss": 0.2279, + "step": 328 + }, + { + "epoch": 0.1321056845476381, + "learning_rate": 6.082971347446654e-06, + "loss": 0.565, + "step": 330 + }, + { + "epoch": 0.13290632506004804, + "learning_rate": 6.108703282593461e-06, + "loss": 0.0678, + "step": 332 + }, + { + "epoch": 0.13370696557245795, + "learning_rate": 6.13446561687258e-06, + "loss": 0.0736, + "step": 334 + }, + { + "epoch": 0.1345076060848679, + "learning_rate": 6.160258149026557e-06, + "loss": 0.0189, + "step": 336 + }, + { + "epoch": 0.13530824659727783, + "learning_rate": 6.186080677561974e-06, + "loss": 0.3389, + "step": 338 + }, + { + "epoch": 0.13610888710968774, + "learning_rate": 6.2119330007511014e-06, + "loss": 0.037, + "step": 340 + }, + { + "epoch": 0.13690952762209768, + "learning_rate": 6.237814916633431e-06, + "loss": 0.1, + "step": 342 + }, + { + "epoch": 0.1377101681345076, + "learning_rate": 6.263726223017326e-06, + "loss": 0.0294, + "step": 344 + }, + { + "epoch": 0.13851080864691753, + "learning_rate": 6.289666717481496e-06, + "loss": 0.2595, + "step": 346 + }, + { + "epoch": 0.13931144915932747, + "learning_rate": 6.315636197376634e-06, + "loss": 0.04, + "step": 348 + }, + { + "epoch": 0.14011208967173738, + "learning_rate": 6.341634459827044e-06, + "loss": 0.0864, + "step": 350 + }, + { + "epoch": 0.14091273018414732, + "learning_rate": 6.3676613017321305e-06, + "loss": 0.101, + "step": 352 + }, + { + "epoch": 0.14171337069655723, + "learning_rate": 6.393716519768032e-06, + "loss": 0.0951, + "step": 354 + }, + { + "epoch": 0.14251401120896717, + "learning_rate": 6.419799910389257e-06, + "loss": 0.0108, + "step": 356 + }, + { + "epoch": 0.1433146517213771, + "learning_rate": 6.445911269830183e-06, + "loss": 0.1673, + "step": 358 + }, + { + "epoch": 0.14411529223378702, + "learning_rate": 6.472050394106689e-06, + "loss": 0.1196, + "step": 360 + }, + { + "epoch": 0.14491593274619696, + "learning_rate": 6.498217079017806e-06, + "loss": 0.1911, + "step": 362 + }, + { + "epoch": 0.14571657325860687, + "learning_rate": 6.524411120147204e-06, + "loss": 0.1379, + "step": 364 + }, + { + "epoch": 0.1465172137710168, + "learning_rate": 6.5506323128648654e-06, + "loss": 0.1161, + "step": 366 + }, + { + "epoch": 0.14731785428342675, + "learning_rate": 6.576880452328645e-06, + "loss": 0.399, + "step": 368 + }, + { + "epoch": 0.14811849479583666, + "learning_rate": 6.603155333485934e-06, + "loss": 0.1525, + "step": 370 + }, + { + "epoch": 0.1489191353082466, + "learning_rate": 6.6294567510751675e-06, + "loss": 0.3517, + "step": 372 + }, + { + "epoch": 0.14971977582065651, + "learning_rate": 6.655784499627476e-06, + "loss": 0.3868, + "step": 374 + }, + { + "epoch": 0.15052041633306645, + "learning_rate": 6.682138373468341e-06, + "loss": 0.0292, + "step": 376 + }, + { + "epoch": 0.1513210568454764, + "learning_rate": 6.7085181667191e-06, + "loss": 0.0172, + "step": 378 + }, + { + "epoch": 0.1521216973578863, + "learning_rate": 6.734923673298605e-06, + "loss": 0.1275, + "step": 380 + }, + { + "epoch": 0.15292233787029624, + "learning_rate": 6.761354686924883e-06, + "loss": 0.1193, + "step": 382 + }, + { + "epoch": 0.15372297838270615, + "learning_rate": 6.787811001116654e-06, + "loss": 0.2931, + "step": 384 + }, + { + "epoch": 0.1545236188951161, + "learning_rate": 6.8142924091949955e-06, + "loss": 0.6562, + "step": 386 + }, + { + "epoch": 0.15532425940752603, + "learning_rate": 6.840798704284939e-06, + "loss": 0.13, + "step": 388 + }, + { + "epoch": 0.15612489991993594, + "learning_rate": 6.867329679317144e-06, + "loss": 0.6252, + "step": 390 + }, + { + "epoch": 0.15692554043234588, + "learning_rate": 6.893885127029419e-06, + "loss": 0.2444, + "step": 392 + }, + { + "epoch": 0.1577261809447558, + "learning_rate": 6.920464839968391e-06, + "loss": 0.0536, + "step": 394 + }, + { + "epoch": 0.15852682145716573, + "learning_rate": 6.94706861049117e-06, + "loss": 0.2237, + "step": 396 + }, + { + "epoch": 0.15932746196957567, + "learning_rate": 6.973696230766884e-06, + "loss": 0.397, + "step": 398 + }, + { + "epoch": 0.16012810248198558, + "learning_rate": 7.000347492778341e-06, + "loss": 0.4228, + "step": 400 + }, + { + "epoch": 0.16092874299439552, + "learning_rate": 7.027022188323704e-06, + "loss": 0.2557, + "step": 402 + }, + { + "epoch": 0.16172938350680544, + "learning_rate": 7.05372010901803e-06, + "loss": 0.0858, + "step": 404 + }, + { + "epoch": 0.16253002401921537, + "learning_rate": 7.080441046294945e-06, + "loss": 0.1755, + "step": 406 + }, + { + "epoch": 0.1633306645316253, + "learning_rate": 7.1071847914082605e-06, + "loss": 0.0102, + "step": 408 + }, + { + "epoch": 0.16413130504403523, + "learning_rate": 7.133951135433656e-06, + "loss": 0.06, + "step": 410 + }, + { + "epoch": 0.16493194555644516, + "learning_rate": 7.160739869270219e-06, + "loss": 0.2886, + "step": 412 + }, + { + "epoch": 0.16573258606885508, + "learning_rate": 7.18755078364214e-06, + "loss": 0.3412, + "step": 414 + }, + { + "epoch": 0.16653322658126501, + "learning_rate": 7.214383669100317e-06, + "loss": 0.2079, + "step": 416 + }, + { + "epoch": 0.16733386709367493, + "learning_rate": 7.241238316024064e-06, + "loss": 0.5073, + "step": 418 + }, + { + "epoch": 0.16813450760608487, + "learning_rate": 7.268114514622635e-06, + "loss": 0.0016, + "step": 420 + }, + { + "epoch": 0.1689351481184948, + "learning_rate": 7.2950120549369204e-06, + "loss": 0.0156, + "step": 422 + }, + { + "epoch": 0.16973578863090472, + "learning_rate": 7.321930726841144e-06, + "loss": 0.0325, + "step": 424 + }, + { + "epoch": 0.17053642914331466, + "learning_rate": 7.348870320044395e-06, + "loss": 0.1904, + "step": 426 + }, + { + "epoch": 0.17133706965572457, + "learning_rate": 7.375830624092336e-06, + "loss": 0.1332, + "step": 428 + }, + { + "epoch": 0.1721377101681345, + "learning_rate": 7.402811428368824e-06, + "loss": 0.2039, + "step": 430 + }, + { + "epoch": 0.17293835068054444, + "learning_rate": 7.429812522097613e-06, + "loss": 0.1391, + "step": 432 + }, + { + "epoch": 0.17373899119295436, + "learning_rate": 7.4568336943439055e-06, + "loss": 0.0247, + "step": 434 + }, + { + "epoch": 0.1745396317053643, + "learning_rate": 7.4838747340160475e-06, + "loss": 0.5593, + "step": 436 + }, + { + "epoch": 0.1753402722177742, + "learning_rate": 7.510935429867233e-06, + "loss": 0.5083, + "step": 438 + }, + { + "epoch": 0.17614091273018415, + "learning_rate": 7.538015570497046e-06, + "loss": 0.0494, + "step": 440 + }, + { + "epoch": 0.17694155324259409, + "learning_rate": 7.5651149443531846e-06, + "loss": 0.0339, + "step": 442 + }, + { + "epoch": 0.177742193755004, + "learning_rate": 7.592233339733077e-06, + "loss": 0.103, + "step": 444 + }, + { + "epoch": 0.17854283426741394, + "learning_rate": 7.619370544785608e-06, + "loss": 0.0907, + "step": 446 + }, + { + "epoch": 0.17934347477982385, + "learning_rate": 7.646526347512665e-06, + "loss": 0.1347, + "step": 448 + }, + { + "epoch": 0.1801441152922338, + "learning_rate": 7.67370053577085e-06, + "loss": 0.5048, + "step": 450 + }, + { + "epoch": 0.18094475580464373, + "learning_rate": 7.70089289727319e-06, + "loss": 0.0254, + "step": 452 + }, + { + "epoch": 0.18174539631705364, + "learning_rate": 7.728103219590684e-06, + "loss": 0.2381, + "step": 454 + }, + { + "epoch": 0.18254603682946358, + "learning_rate": 7.755331290154041e-06, + "loss": 0.1525, + "step": 456 + }, + { + "epoch": 0.1833466773418735, + "learning_rate": 7.7825768962553e-06, + "loss": 0.0364, + "step": 458 + }, + { + "epoch": 0.18414731785428343, + "learning_rate": 7.809839825049565e-06, + "loss": 0.4224, + "step": 460 + }, + { + "epoch": 0.18494795836669337, + "learning_rate": 7.83711986355656e-06, + "loss": 0.1089, + "step": 462 + }, + { + "epoch": 0.18574859887910328, + "learning_rate": 7.864416798662347e-06, + "loss": 0.4363, + "step": 464 + }, + { + "epoch": 0.18654923939151322, + "learning_rate": 7.891730417121043e-06, + "loss": 0.0321, + "step": 466 + }, + { + "epoch": 0.18734987990392313, + "learning_rate": 7.919060505556376e-06, + "loss": 0.008, + "step": 468 + }, + { + "epoch": 0.18815052041633307, + "learning_rate": 7.946406850463435e-06, + "loss": 0.1374, + "step": 470 + }, + { + "epoch": 0.188951160928743, + "learning_rate": 7.973769238210291e-06, + "loss": 0.2525, + "step": 472 + }, + { + "epoch": 0.18975180144115292, + "learning_rate": 8.001147455039737e-06, + "loss": 0.2124, + "step": 474 + }, + { + "epoch": 0.19055244195356286, + "learning_rate": 8.028541287070858e-06, + "loss": 0.0513, + "step": 476 + }, + { + "epoch": 0.19135308246597277, + "learning_rate": 8.055950520300756e-06, + "loss": 0.2265, + "step": 478 + }, + { + "epoch": 0.1921537229783827, + "learning_rate": 8.083374940606256e-06, + "loss": 0.0198, + "step": 480 + }, + { + "epoch": 0.19295436349079265, + "learning_rate": 8.110814333745503e-06, + "loss": 0.5947, + "step": 482 + }, + { + "epoch": 0.19375500400320256, + "learning_rate": 8.138268485359684e-06, + "loss": 0.1962, + "step": 484 + }, + { + "epoch": 0.1945556445156125, + "learning_rate": 8.165737180974676e-06, + "loss": 0.1806, + "step": 486 + }, + { + "epoch": 0.1953562850280224, + "learning_rate": 8.193220206002785e-06, + "loss": 0.4964, + "step": 488 + }, + { + "epoch": 0.19615692554043235, + "learning_rate": 8.220717345744326e-06, + "loss": 0.1587, + "step": 490 + }, + { + "epoch": 0.1969575660528423, + "learning_rate": 8.248228385389349e-06, + "loss": 0.1606, + "step": 492 + }, + { + "epoch": 0.1977582065652522, + "learning_rate": 8.275753110019367e-06, + "loss": 0.1103, + "step": 494 + }, + { + "epoch": 0.19855884707766214, + "learning_rate": 8.303291304608936e-06, + "loss": 0.021, + "step": 496 + }, + { + "epoch": 0.19935948759007205, + "learning_rate": 8.330842754027378e-06, + "loss": 0.0049, + "step": 498 + }, + { + "epoch": 0.200160128102482, + "learning_rate": 8.358407243040524e-06, + "loss": 0.2578, + "step": 500 + }, + { + "epoch": 0.20096076861489193, + "learning_rate": 8.385984556312285e-06, + "loss": 0.1362, + "step": 502 + }, + { + "epoch": 0.20176140912730184, + "learning_rate": 8.413574478406386e-06, + "loss": 0.4179, + "step": 504 + }, + { + "epoch": 0.20256204963971178, + "learning_rate": 8.441176793788106e-06, + "loss": 0.1503, + "step": 506 + }, + { + "epoch": 0.2033626901521217, + "learning_rate": 8.468791286825856e-06, + "loss": 0.1755, + "step": 508 + }, + { + "epoch": 0.20416333066453163, + "learning_rate": 8.496417741792922e-06, + "loss": 0.1431, + "step": 510 + }, + { + "epoch": 0.20496397117694154, + "learning_rate": 8.524055942869135e-06, + "loss": 0.011, + "step": 512 + }, + { + "epoch": 0.20576461168935148, + "learning_rate": 8.551705674142616e-06, + "loss": 0.3312, + "step": 514 + }, + { + "epoch": 0.20656525220176142, + "learning_rate": 8.579366719611353e-06, + "loss": 0.019, + "step": 516 + }, + { + "epoch": 0.20736589271417133, + "learning_rate": 8.607038863184952e-06, + "loss": 0.0045, + "step": 518 + }, + { + "epoch": 0.20816653322658127, + "learning_rate": 8.634721888686368e-06, + "loss": 0.1427, + "step": 520 + }, + { + "epoch": 0.20896717373899118, + "learning_rate": 8.662415579853495e-06, + "loss": 0.0707, + "step": 522 + }, + { + "epoch": 0.20976781425140112, + "learning_rate": 8.690119720340907e-06, + "loss": 0.1926, + "step": 524 + }, + { + "epoch": 0.21056845476381106, + "learning_rate": 8.717834093721598e-06, + "loss": 0.1663, + "step": 526 + }, + { + "epoch": 0.21136909527622097, + "learning_rate": 8.74555848348857e-06, + "loss": 0.1228, + "step": 528 + }, + { + "epoch": 0.2121697357886309, + "learning_rate": 8.773292673056572e-06, + "loss": 0.235, + "step": 530 + }, + { + "epoch": 0.21297037630104082, + "learning_rate": 8.801036445763858e-06, + "loss": 0.0872, + "step": 532 + }, + { + "epoch": 0.21377101681345076, + "learning_rate": 8.828789584873757e-06, + "loss": 0.2412, + "step": 534 + }, + { + "epoch": 0.2145716573258607, + "learning_rate": 8.856551873576448e-06, + "loss": 0.1103, + "step": 536 + }, + { + "epoch": 0.2153722978382706, + "learning_rate": 8.884323094990613e-06, + "loss": 0.0017, + "step": 538 + }, + { + "epoch": 0.21617293835068055, + "learning_rate": 8.912103032165206e-06, + "loss": 0.0287, + "step": 540 + }, + { + "epoch": 0.21697357886309046, + "learning_rate": 8.939891468081036e-06, + "loss": 0.1293, + "step": 542 + }, + { + "epoch": 0.2177742193755004, + "learning_rate": 8.967688185652527e-06, + "loss": 0.0015, + "step": 544 + }, + { + "epoch": 0.21857485988791034, + "learning_rate": 8.995492967729449e-06, + "loss": 0.2504, + "step": 546 + }, + { + "epoch": 0.21937550040032025, + "learning_rate": 9.023305597098526e-06, + "loss": 0.1533, + "step": 548 + }, + { + "epoch": 0.2201761409127302, + "learning_rate": 9.051125856485175e-06, + "loss": 0.0213, + "step": 550 + }, + { + "epoch": 0.2209767814251401, + "learning_rate": 9.078953528555258e-06, + "loss": 0.9907, + "step": 552 + }, + { + "epoch": 0.22177742193755004, + "learning_rate": 9.106788395916682e-06, + "loss": 1.0512, + "step": 554 + }, + { + "epoch": 0.22257806244995998, + "learning_rate": 9.134630241121135e-06, + "loss": 0.0298, + "step": 556 + }, + { + "epoch": 0.2233787029623699, + "learning_rate": 9.162478846665854e-06, + "loss": 0.4309, + "step": 558 + }, + { + "epoch": 0.22417934347477983, + "learning_rate": 9.190333994995208e-06, + "loss": 0.1522, + "step": 560 + }, + { + "epoch": 0.22497998398718974, + "learning_rate": 9.218195468502469e-06, + "loss": 0.0953, + "step": 562 + }, + { + "epoch": 0.22578062449959968, + "learning_rate": 9.24606304953148e-06, + "loss": 0.5407, + "step": 564 + }, + { + "epoch": 0.22658126501200962, + "learning_rate": 9.273936520378426e-06, + "loss": 0.0858, + "step": 566 + }, + { + "epoch": 0.22738190552441953, + "learning_rate": 9.301815663293426e-06, + "loss": 0.2854, + "step": 568 + }, + { + "epoch": 0.22818254603682947, + "learning_rate": 9.329700260482286e-06, + "loss": 1.9709, + "step": 570 + }, + { + "epoch": 0.22898318654923938, + "learning_rate": 9.35759009410826e-06, + "loss": 0.0383, + "step": 572 + }, + { + "epoch": 0.22978382706164932, + "learning_rate": 9.38548494629364e-06, + "loss": 0.0332, + "step": 574 + }, + { + "epoch": 0.23058446757405926, + "learning_rate": 9.41338459912151e-06, + "loss": 0.1591, + "step": 576 + }, + { + "epoch": 0.23138510808646917, + "learning_rate": 9.441288834637507e-06, + "loss": 0.4677, + "step": 578 + }, + { + "epoch": 0.2321857485988791, + "learning_rate": 9.469197434851414e-06, + "loss": 0.108, + "step": 580 + }, + { + "epoch": 0.23298638911128902, + "learning_rate": 9.497110181738935e-06, + "loss": 0.254, + "step": 582 + }, + { + "epoch": 0.23378702962369896, + "learning_rate": 9.52502685724336e-06, + "loss": 0.2554, + "step": 584 + }, + { + "epoch": 0.2345876701361089, + "learning_rate": 9.552947243277342e-06, + "loss": 0.1302, + "step": 586 + }, + { + "epoch": 0.2353883106485188, + "learning_rate": 9.580871121724498e-06, + "loss": 0.2466, + "step": 588 + }, + { + "epoch": 0.23618895116092875, + "learning_rate": 9.608798274441153e-06, + "loss": 0.0709, + "step": 590 + }, + { + "epoch": 0.23698959167333866, + "learning_rate": 9.636728483258116e-06, + "loss": 0.2866, + "step": 592 + }, + { + "epoch": 0.2377902321857486, + "learning_rate": 9.664661529982263e-06, + "loss": 0.1067, + "step": 594 + }, + { + "epoch": 0.23859087269815854, + "learning_rate": 9.692597196398302e-06, + "loss": 0.2506, + "step": 596 + }, + { + "epoch": 0.23939151321056845, + "learning_rate": 9.720535264270526e-06, + "loss": 0.1105, + "step": 598 + }, + { + "epoch": 0.2401921537229784, + "learning_rate": 9.748475515344416e-06, + "loss": 0.2434, + "step": 600 + }, + { + "epoch": 0.2409927942353883, + "learning_rate": 9.776417731348403e-06, + "loss": 0.1571, + "step": 602 + }, + { + "epoch": 0.24179343474779824, + "learning_rate": 9.80436169399561e-06, + "loss": 0.5832, + "step": 604 + }, + { + "epoch": 0.24259407526020815, + "learning_rate": 9.832307184985473e-06, + "loss": 0.0777, + "step": 606 + }, + { + "epoch": 0.2433947157726181, + "learning_rate": 9.8602539860055e-06, + "loss": 0.2253, + "step": 608 + }, + { + "epoch": 0.24419535628502803, + "learning_rate": 9.888201878732946e-06, + "loss": 0.3303, + "step": 610 + }, + { + "epoch": 0.24499599679743794, + "learning_rate": 9.916150644836596e-06, + "loss": 0.233, + "step": 612 + }, + { + "epoch": 0.24579663730984788, + "learning_rate": 9.944100065978354e-06, + "loss": 0.1816, + "step": 614 + }, + { + "epoch": 0.2465972778222578, + "learning_rate": 9.972049923815011e-06, + "loss": 0.0678, + "step": 616 + }, + { + "epoch": 0.24739791833466773, + "learning_rate": 9.999999999999996e-06, + "loss": 0.2618, + "step": 618 + }, + { + "epoch": 0.24819855884707767, + "learning_rate": 1.0027950076184982e-05, + "loss": 0.0673, + "step": 620 + }, + { + "epoch": 0.24899919935948758, + "learning_rate": 1.0055899934021637e-05, + "loss": 0.1078, + "step": 622 + }, + { + "epoch": 0.24979983987189752, + "learning_rate": 1.0083849355163397e-05, + "loss": 0.0413, + "step": 624 + }, + { + "epoch": 0.25060048038430743, + "learning_rate": 1.0111798121267047e-05, + "loss": 0.0515, + "step": 626 + }, + { + "epoch": 0.2514011208967174, + "learning_rate": 1.0139746013994493e-05, + "loss": 0.2504, + "step": 628 + }, + { + "epoch": 0.2522017614091273, + "learning_rate": 1.016769281501452e-05, + "loss": 0.1164, + "step": 630 + }, + { + "epoch": 0.2530024019215372, + "learning_rate": 1.0195638306004383e-05, + "loss": 0.2182, + "step": 632 + }, + { + "epoch": 0.25380304243394713, + "learning_rate": 1.022358226865159e-05, + "loss": 0.1589, + "step": 634 + }, + { + "epoch": 0.2546036829463571, + "learning_rate": 1.0251524484655577e-05, + "loss": 0.1725, + "step": 636 + }, + { + "epoch": 0.255404323458767, + "learning_rate": 1.0279464735729467e-05, + "loss": 0.0579, + "step": 638 + }, + { + "epoch": 0.2562049639711769, + "learning_rate": 1.0307402803601691e-05, + "loss": 0.0485, + "step": 640 + }, + { + "epoch": 0.2570056044835869, + "learning_rate": 1.033533847001773e-05, + "loss": 0.4735, + "step": 642 + }, + { + "epoch": 0.2578062449959968, + "learning_rate": 1.0363271516741877e-05, + "loss": 0.0575, + "step": 644 + }, + { + "epoch": 0.2586068855084067, + "learning_rate": 1.039120172555884e-05, + "loss": 0.0828, + "step": 646 + }, + { + "epoch": 0.2594075260208166, + "learning_rate": 1.0419128878275495e-05, + "loss": 0.0701, + "step": 648 + }, + { + "epoch": 0.2602081665332266, + "learning_rate": 1.0447052756722651e-05, + "loss": 0.2886, + "step": 650 + }, + { + "epoch": 0.2610088070456365, + "learning_rate": 1.0474973142756632e-05, + "loss": 0.1189, + "step": 652 + }, + { + "epoch": 0.2618094475580464, + "learning_rate": 1.0502889818261058e-05, + "loss": 0.3615, + "step": 654 + }, + { + "epoch": 0.2626100880704564, + "learning_rate": 1.053080256514858e-05, + "loss": 0.177, + "step": 656 + }, + { + "epoch": 0.2634107285828663, + "learning_rate": 1.0558711165362488e-05, + "loss": 0.0059, + "step": 658 + }, + { + "epoch": 0.2642113690952762, + "learning_rate": 1.0586615400878484e-05, + "loss": 0.394, + "step": 660 + }, + { + "epoch": 0.26501200960768617, + "learning_rate": 1.0614515053706354e-05, + "loss": 0.177, + "step": 662 + }, + { + "epoch": 0.2658126501200961, + "learning_rate": 1.0642409905891733e-05, + "loss": 0.6303, + "step": 664 + }, + { + "epoch": 0.266613290632506, + "learning_rate": 1.0670299739517706e-05, + "loss": 0.3272, + "step": 666 + }, + { + "epoch": 0.2674139311449159, + "learning_rate": 1.0698184336706567e-05, + "loss": 0.2295, + "step": 668 + }, + { + "epoch": 0.2682145716573259, + "learning_rate": 1.0726063479621567e-05, + "loss": 0.1106, + "step": 670 + }, + { + "epoch": 0.2690152121697358, + "learning_rate": 1.0753936950468513e-05, + "loss": 0.0988, + "step": 672 + }, + { + "epoch": 0.2698158526821457, + "learning_rate": 1.0781804531497525e-05, + "loss": 0.5348, + "step": 674 + }, + { + "epoch": 0.27061649319455566, + "learning_rate": 1.0809666005004787e-05, + "loss": 0.1596, + "step": 676 + }, + { + "epoch": 0.2714171337069656, + "learning_rate": 1.083752115333414e-05, + "loss": 0.0666, + "step": 678 + }, + { + "epoch": 0.2722177742193755, + "learning_rate": 1.0865369758878858e-05, + "loss": 0.3488, + "step": 680 + }, + { + "epoch": 0.27301841473178545, + "learning_rate": 1.0893211604083311e-05, + "loss": 0.2719, + "step": 682 + }, + { + "epoch": 0.27381905524419536, + "learning_rate": 1.0921046471444737e-05, + "loss": 0.1288, + "step": 684 + }, + { + "epoch": 0.2746196957566053, + "learning_rate": 1.0948874143514818e-05, + "loss": 0.427, + "step": 686 + }, + { + "epoch": 0.2754203362690152, + "learning_rate": 1.0976694402901467e-05, + "loss": 0.1688, + "step": 688 + }, + { + "epoch": 0.27622097678142515, + "learning_rate": 1.1004507032270544e-05, + "loss": 0.2892, + "step": 690 + }, + { + "epoch": 0.27702161729383507, + "learning_rate": 1.1032311814347467e-05, + "loss": 0.1668, + "step": 692 + }, + { + "epoch": 0.277822257806245, + "learning_rate": 1.1060108531918955e-05, + "loss": 0.0432, + "step": 694 + }, + { + "epoch": 0.27862289831865494, + "learning_rate": 1.1087896967834787e-05, + "loss": 0.5289, + "step": 696 + }, + { + "epoch": 0.27942353883106485, + "learning_rate": 1.111567690500938e-05, + "loss": 0.1471, + "step": 698 + }, + { + "epoch": 0.28022417934347477, + "learning_rate": 1.1143448126423545e-05, + "loss": 0.0306, + "step": 700 + }, + { + "epoch": 0.28102481985588473, + "learning_rate": 1.1171210415126238e-05, + "loss": 0.2731, + "step": 702 + }, + { + "epoch": 0.28182546036829464, + "learning_rate": 1.1198963554236135e-05, + "loss": 0.5657, + "step": 704 + }, + { + "epoch": 0.28262610088070456, + "learning_rate": 1.122670732694342e-05, + "loss": 0.0508, + "step": 706 + }, + { + "epoch": 0.28342674139311447, + "learning_rate": 1.1254441516511425e-05, + "loss": 0.0586, + "step": 708 + }, + { + "epoch": 0.28422738190552443, + "learning_rate": 1.1282165906278395e-05, + "loss": 0.0144, + "step": 710 + }, + { + "epoch": 0.28502802241793435, + "learning_rate": 1.1309880279659087e-05, + "loss": 0.3224, + "step": 712 + }, + { + "epoch": 0.28582866293034426, + "learning_rate": 1.1337584420146496e-05, + "loss": 0.278, + "step": 714 + }, + { + "epoch": 0.2866293034427542, + "learning_rate": 1.1365278111313625e-05, + "loss": 0.5086, + "step": 716 + }, + { + "epoch": 0.28742994395516414, + "learning_rate": 1.1392961136815041e-05, + "loss": 0.2386, + "step": 718 + }, + { + "epoch": 0.28823058446757405, + "learning_rate": 1.142063328038864e-05, + "loss": 0.5037, + "step": 720 + }, + { + "epoch": 0.289031224979984, + "learning_rate": 1.1448294325857377e-05, + "loss": 0.1244, + "step": 722 + }, + { + "epoch": 0.2898318654923939, + "learning_rate": 1.1475944057130856e-05, + "loss": 0.1801, + "step": 724 + }, + { + "epoch": 0.29063250600480384, + "learning_rate": 1.150358225820707e-05, + "loss": 0.1845, + "step": 726 + }, + { + "epoch": 0.29143314651721375, + "learning_rate": 1.1531208713174138e-05, + "loss": 0.3542, + "step": 728 + }, + { + "epoch": 0.2922337870296237, + "learning_rate": 1.1558823206211887e-05, + "loss": 0.0561, + "step": 730 + }, + { + "epoch": 0.2930344275420336, + "learning_rate": 1.1586425521593607e-05, + "loss": 0.3418, + "step": 732 + }, + { + "epoch": 0.29383506805444354, + "learning_rate": 1.1614015443687708e-05, + "loss": 0.2323, + "step": 734 + }, + { + "epoch": 0.2946357085668535, + "learning_rate": 1.1641592756959467e-05, + "loss": 0.152, + "step": 736 + }, + { + "epoch": 0.2954363490792634, + "learning_rate": 1.1669157245972616e-05, + "loss": 0.1304, + "step": 738 + }, + { + "epoch": 0.2962369895916733, + "learning_rate": 1.1696708695391057e-05, + "loss": 0.4447, + "step": 740 + }, + { + "epoch": 0.29703763010408324, + "learning_rate": 1.1724246889980626e-05, + "loss": 0.0264, + "step": 742 + }, + { + "epoch": 0.2978382706164932, + "learning_rate": 1.1751771614610643e-05, + "loss": 0.0659, + "step": 744 + }, + { + "epoch": 0.2986389111289031, + "learning_rate": 1.1779282654255668e-05, + "loss": 0.1796, + "step": 746 + }, + { + "epoch": 0.29943955164131303, + "learning_rate": 1.180677979399721e-05, + "loss": 0.1283, + "step": 748 + }, + { + "epoch": 0.300240192153723, + "learning_rate": 1.1834262819025317e-05, + "loss": 0.0365, + "step": 750 + }, + { + "epoch": 0.3010408326661329, + "learning_rate": 1.1861731514640309e-05, + "loss": 0.2157, + "step": 752 + }, + { + "epoch": 0.3018414731785428, + "learning_rate": 1.188918566625449e-05, + "loss": 0.5453, + "step": 754 + }, + { + "epoch": 0.3026421136909528, + "learning_rate": 1.1916625059393739e-05, + "loss": 0.3992, + "step": 756 + }, + { + "epoch": 0.3034427542033627, + "learning_rate": 1.1944049479699241e-05, + "loss": 0.0596, + "step": 758 + }, + { + "epoch": 0.3042433947157726, + "learning_rate": 1.1971458712929133e-05, + "loss": 0.0521, + "step": 760 + }, + { + "epoch": 0.3050440352281825, + "learning_rate": 1.1998852544960256e-05, + "loss": 0.2155, + "step": 762 + }, + { + "epoch": 0.3058446757405925, + "learning_rate": 1.2026230761789702e-05, + "loss": 0.1666, + "step": 764 + }, + { + "epoch": 0.3066453162530024, + "learning_rate": 1.2053593149536557e-05, + "loss": 0.0528, + "step": 766 + }, + { + "epoch": 0.3074459567654123, + "learning_rate": 1.2080939494443618e-05, + "loss": 0.2306, + "step": 768 + }, + { + "epoch": 0.3082465972778223, + "learning_rate": 1.210826958287895e-05, + "loss": 0.0265, + "step": 770 + }, + { + "epoch": 0.3090472377902322, + "learning_rate": 1.2135583201337646e-05, + "loss": 0.04, + "step": 772 + }, + { + "epoch": 0.3098478783026421, + "learning_rate": 1.2162880136443434e-05, + "loss": 0.2041, + "step": 774 + }, + { + "epoch": 0.31064851881505207, + "learning_rate": 1.2190160174950428e-05, + "loss": 0.3985, + "step": 776 + }, + { + "epoch": 0.311449159327462, + "learning_rate": 1.2217423103744692e-05, + "loss": 0.4338, + "step": 778 + }, + { + "epoch": 0.3122497998398719, + "learning_rate": 1.2244668709845952e-05, + "loss": 0.0033, + "step": 780 + }, + { + "epoch": 0.3130504403522818, + "learning_rate": 1.2271896780409309e-05, + "loss": 0.1383, + "step": 782 + }, + { + "epoch": 0.31385108086469177, + "learning_rate": 1.2299107102726804e-05, + "loss": 0.1044, + "step": 784 + }, + { + "epoch": 0.3146517213771017, + "learning_rate": 1.2326299464229143e-05, + "loss": 0.6256, + "step": 786 + }, + { + "epoch": 0.3154523618895116, + "learning_rate": 1.2353473652487329e-05, + "loss": 0.1302, + "step": 788 + }, + { + "epoch": 0.31625300240192156, + "learning_rate": 1.2380629455214385e-05, + "loss": 0.1811, + "step": 790 + }, + { + "epoch": 0.31705364291433147, + "learning_rate": 1.2407766660266916e-05, + "loss": 0.7897, + "step": 792 + }, + { + "epoch": 0.3178542834267414, + "learning_rate": 1.2434885055646808e-05, + "loss": 0.0882, + "step": 794 + }, + { + "epoch": 0.31865492393915135, + "learning_rate": 1.2461984429502947e-05, + "loss": 0.2129, + "step": 796 + }, + { + "epoch": 0.31945556445156126, + "learning_rate": 1.2489064570132761e-05, + "loss": 0.2419, + "step": 798 + }, + { + "epoch": 0.32025620496397117, + "learning_rate": 1.2516125265983945e-05, + "loss": 0.0982, + "step": 800 + }, + { + "epoch": 0.3210568454763811, + "learning_rate": 1.2543166305656089e-05, + "loss": 0.0043, + "step": 802 + }, + { + "epoch": 0.32185748598879105, + "learning_rate": 1.257018747790238e-05, + "loss": 0.0432, + "step": 804 + }, + { + "epoch": 0.32265812650120096, + "learning_rate": 1.259718857163117e-05, + "loss": 0.3037, + "step": 806 + }, + { + "epoch": 0.32345876701361087, + "learning_rate": 1.2624169375907657e-05, + "loss": 0.1702, + "step": 808 + }, + { + "epoch": 0.32425940752602084, + "learning_rate": 1.2651129679955598e-05, + "loss": 0.3856, + "step": 810 + }, + { + "epoch": 0.32506004803843075, + "learning_rate": 1.2678069273158849e-05, + "loss": 0.133, + "step": 812 + }, + { + "epoch": 0.32586068855084066, + "learning_rate": 1.2704987945063073e-05, + "loss": 0.2291, + "step": 814 + }, + { + "epoch": 0.3266613290632506, + "learning_rate": 1.273188548537736e-05, + "loss": 0.6004, + "step": 816 + }, + { + "epoch": 0.32746196957566054, + "learning_rate": 1.2758761683975929e-05, + "loss": 0.2873, + "step": 818 + }, + { + "epoch": 0.32826261008807045, + "learning_rate": 1.2785616330899676e-05, + "loss": 0.0601, + "step": 820 + }, + { + "epoch": 0.32906325060048036, + "learning_rate": 1.2812449216357855e-05, + "loss": 0.023, + "step": 822 + }, + { + "epoch": 0.32986389111289033, + "learning_rate": 1.2839260130729776e-05, + "loss": 0.3317, + "step": 824 + }, + { + "epoch": 0.33066453162530024, + "learning_rate": 1.2866048864566336e-05, + "loss": 0.0976, + "step": 826 + }, + { + "epoch": 0.33146517213771015, + "learning_rate": 1.2892815208591734e-05, + "loss": 0.2702, + "step": 828 + }, + { + "epoch": 0.3322658126501201, + "learning_rate": 1.2919558953705047e-05, + "loss": 0.1872, + "step": 830 + }, + { + "epoch": 0.33306645316253003, + "learning_rate": 1.2946279890981966e-05, + "loss": 0.0484, + "step": 832 + }, + { + "epoch": 0.33386709367493994, + "learning_rate": 1.2972977811676289e-05, + "loss": 0.2853, + "step": 834 + }, + { + "epoch": 0.33466773418734985, + "learning_rate": 1.2999652507221652e-05, + "loss": 0.0551, + "step": 836 + }, + { + "epoch": 0.3354683746997598, + "learning_rate": 1.3026303769233109e-05, + "loss": 0.3515, + "step": 838 + }, + { + "epoch": 0.33626901521216973, + "learning_rate": 1.3052931389508822e-05, + "loss": 0.3938, + "step": 840 + }, + { + "epoch": 0.33706965572457964, + "learning_rate": 1.3079535160031601e-05, + "loss": 0.0773, + "step": 842 + }, + { + "epoch": 0.3378702962369896, + "learning_rate": 1.3106114872970575e-05, + "loss": 0.25, + "step": 844 + }, + { + "epoch": 0.3386709367493995, + "learning_rate": 1.313267032068285e-05, + "loss": 0.2299, + "step": 846 + }, + { + "epoch": 0.33947157726180943, + "learning_rate": 1.3159201295715054e-05, + "loss": 0.8993, + "step": 848 + }, + { + "epoch": 0.3402722177742194, + "learning_rate": 1.3185707590804997e-05, + "loss": 0.0636, + "step": 850 + }, + { + "epoch": 0.3410728582866293, + "learning_rate": 1.321218899888334e-05, + "loss": 0.2269, + "step": 852 + }, + { + "epoch": 0.3418734987990392, + "learning_rate": 1.3238645313075109e-05, + "loss": 0.3642, + "step": 854 + }, + { + "epoch": 0.34267413931144913, + "learning_rate": 1.326507632670139e-05, + "loss": 0.5907, + "step": 856 + }, + { + "epoch": 0.3434747798238591, + "learning_rate": 1.3291481833280894e-05, + "loss": 0.6678, + "step": 858 + }, + { + "epoch": 0.344275420336269, + "learning_rate": 1.3317861626531652e-05, + "loss": 0.0843, + "step": 860 + }, + { + "epoch": 0.3450760608486789, + "learning_rate": 1.3344215500372517e-05, + "loss": 0.1857, + "step": 862 + }, + { + "epoch": 0.3458767013610889, + "learning_rate": 1.3370543248924826e-05, + "loss": 0.2602, + "step": 864 + }, + { + "epoch": 0.3466773418734988, + "learning_rate": 1.3396844666514062e-05, + "loss": 0.2764, + "step": 866 + }, + { + "epoch": 0.3474779823859087, + "learning_rate": 1.3423119547671348e-05, + "loss": 0.2288, + "step": 868 + }, + { + "epoch": 0.3482786228983187, + "learning_rate": 1.344936768713513e-05, + "loss": 0.143, + "step": 870 + }, + { + "epoch": 0.3490792634107286, + "learning_rate": 1.347558887985279e-05, + "loss": 0.2076, + "step": 872 + }, + { + "epoch": 0.3498799039231385, + "learning_rate": 1.3501782920982189e-05, + "loss": 0.0369, + "step": 874 + }, + { + "epoch": 0.3506805444355484, + "learning_rate": 1.3527949605893305e-05, + "loss": 0.0689, + "step": 876 + }, + { + "epoch": 0.3514811849479584, + "learning_rate": 1.3554088730169812e-05, + "loss": 0.0862, + "step": 878 + }, + { + "epoch": 0.3522818254603683, + "learning_rate": 1.3580200089610739e-05, + "loss": 0.1249, + "step": 880 + }, + { + "epoch": 0.3530824659727782, + "learning_rate": 1.3606283480231962e-05, + "loss": 0.0659, + "step": 882 + }, + { + "epoch": 0.35388310648518817, + "learning_rate": 1.3632338698267863e-05, + "loss": 0.0604, + "step": 884 + }, + { + "epoch": 0.3546837469975981, + "learning_rate": 1.3658365540172948e-05, + "loss": 0.6569, + "step": 886 + }, + { + "epoch": 0.355484387510008, + "learning_rate": 1.368436380262336e-05, + "loss": 0.1914, + "step": 888 + }, + { + "epoch": 0.35628502802241796, + "learning_rate": 1.3710333282518497e-05, + "loss": 0.1759, + "step": 890 + }, + { + "epoch": 0.35708566853482787, + "learning_rate": 1.3736273776982667e-05, + "loss": 0.0608, + "step": 892 + }, + { + "epoch": 0.3578863090472378, + "learning_rate": 1.3762185083366562e-05, + "loss": 0.4837, + "step": 894 + }, + { + "epoch": 0.3586869495596477, + "learning_rate": 1.3788066999248893e-05, + "loss": 0.0775, + "step": 896 + }, + { + "epoch": 0.35948759007205766, + "learning_rate": 1.3813919322438018e-05, + "loss": 0.2043, + "step": 898 + }, + { + "epoch": 0.3602882305844676, + "learning_rate": 1.3839741850973435e-05, + "loss": 0.3837, + "step": 900 + }, + { + "epoch": 0.3610888710968775, + "learning_rate": 1.3865534383127413e-05, + "loss": 0.2239, + "step": 902 + }, + { + "epoch": 0.36188951160928745, + "learning_rate": 1.3891296717406533e-05, + "loss": 0.083, + "step": 904 + }, + { + "epoch": 0.36269015212169736, + "learning_rate": 1.391702865255334e-05, + "loss": 0.3167, + "step": 906 + }, + { + "epoch": 0.3634907926341073, + "learning_rate": 1.3942729987547808e-05, + "loss": 0.0993, + "step": 908 + }, + { + "epoch": 0.36429143314651724, + "learning_rate": 1.3968400521608962e-05, + "loss": 0.2865, + "step": 910 + }, + { + "epoch": 0.36509207365892715, + "learning_rate": 1.3994040054196498e-05, + "loss": 0.2562, + "step": 912 + }, + { + "epoch": 0.36589271417133706, + "learning_rate": 1.4019648385012245e-05, + "loss": 0.1762, + "step": 914 + }, + { + "epoch": 0.366693354683747, + "learning_rate": 1.4045225314001789e-05, + "loss": 0.1536, + "step": 916 + }, + { + "epoch": 0.36749399519615694, + "learning_rate": 1.4070770641356069e-05, + "loss": 0.4599, + "step": 918 + }, + { + "epoch": 0.36829463570856685, + "learning_rate": 1.4096284167512856e-05, + "loss": 0.2153, + "step": 920 + }, + { + "epoch": 0.36909527622097676, + "learning_rate": 1.4121765693158355e-05, + "loss": 0.0877, + "step": 922 + }, + { + "epoch": 0.36989591673338673, + "learning_rate": 1.4147215019228813e-05, + "loss": 0.5322, + "step": 924 + }, + { + "epoch": 0.37069655724579664, + "learning_rate": 1.4172631946911964e-05, + "loss": 0.3526, + "step": 926 + }, + { + "epoch": 0.37149719775820655, + "learning_rate": 1.4198016277648665e-05, + "loss": 0.323, + "step": 928 + }, + { + "epoch": 0.37229783827061647, + "learning_rate": 1.4223367813134406e-05, + "loss": 0.0472, + "step": 930 + }, + { + "epoch": 0.37309847878302643, + "learning_rate": 1.4248686355320922e-05, + "loss": 0.1587, + "step": 932 + }, + { + "epoch": 0.37389911929543634, + "learning_rate": 1.4273971706417653e-05, + "loss": 0.4038, + "step": 934 + }, + { + "epoch": 0.37469975980784626, + "learning_rate": 1.429922366889332e-05, + "loss": 0.2765, + "step": 936 + }, + { + "epoch": 0.3755004003202562, + "learning_rate": 1.4324442045477534e-05, + "loss": 0.0974, + "step": 938 + }, + { + "epoch": 0.37630104083266613, + "learning_rate": 1.4349626639162231e-05, + "loss": 0.2454, + "step": 940 + }, + { + "epoch": 0.37710168134507605, + "learning_rate": 1.4374777253203265e-05, + "loss": 0.1364, + "step": 942 + }, + { + "epoch": 0.377902321857486, + "learning_rate": 1.4399893691121985e-05, + "loss": 0.2448, + "step": 944 + }, + { + "epoch": 0.3787029623698959, + "learning_rate": 1.4424975756706684e-05, + "loss": 0.2071, + "step": 946 + }, + { + "epoch": 0.37950360288230583, + "learning_rate": 1.4450023254014185e-05, + "loss": 0.1957, + "step": 948 + }, + { + "epoch": 0.38030424339471575, + "learning_rate": 1.4475035987371348e-05, + "loss": 0.2507, + "step": 950 + }, + { + "epoch": 0.3811048839071257, + "learning_rate": 1.4500013761376663e-05, + "loss": 0.8129, + "step": 952 + }, + { + "epoch": 0.3819055244195356, + "learning_rate": 1.4524956380901674e-05, + "loss": 0.0641, + "step": 954 + }, + { + "epoch": 0.38270616493194554, + "learning_rate": 1.454986365109255e-05, + "loss": 0.1356, + "step": 956 + }, + { + "epoch": 0.3835068054443555, + "learning_rate": 1.4574735377371669e-05, + "loss": 0.3025, + "step": 958 + }, + { + "epoch": 0.3843074459567654, + "learning_rate": 1.4599571365439027e-05, + "loss": 0.2261, + "step": 960 + }, + { + "epoch": 0.3851080864691753, + "learning_rate": 1.4624371421273812e-05, + "loss": 0.0825, + "step": 962 + }, + { + "epoch": 0.3859087269815853, + "learning_rate": 1.4649135351135968e-05, + "loss": 0.1754, + "step": 964 + }, + { + "epoch": 0.3867093674939952, + "learning_rate": 1.4673862961567604e-05, + "loss": 0.0762, + "step": 966 + }, + { + "epoch": 0.3875100080064051, + "learning_rate": 1.4698554059394563e-05, + "loss": 0.2071, + "step": 968 + }, + { + "epoch": 0.388310648518815, + "learning_rate": 1.4723208451727977e-05, + "loss": 0.0323, + "step": 970 + }, + { + "epoch": 0.389111289031225, + "learning_rate": 1.4747825945965675e-05, + "loss": 0.0896, + "step": 972 + }, + { + "epoch": 0.3899119295436349, + "learning_rate": 1.4772406349793749e-05, + "loss": 0.3451, + "step": 974 + }, + { + "epoch": 0.3907125700560448, + "learning_rate": 1.4796949471188033e-05, + "loss": 0.0705, + "step": 976 + }, + { + "epoch": 0.3915132105684548, + "learning_rate": 1.4821455118415666e-05, + "loss": 0.2977, + "step": 978 + }, + { + "epoch": 0.3923138510808647, + "learning_rate": 1.4845923100036479e-05, + "loss": 0.0907, + "step": 980 + }, + { + "epoch": 0.3931144915932746, + "learning_rate": 1.4870353224904563e-05, + "loss": 0.0208, + "step": 982 + }, + { + "epoch": 0.3939151321056846, + "learning_rate": 1.4894745302169786e-05, + "loss": 0.0376, + "step": 984 + }, + { + "epoch": 0.3947157726180945, + "learning_rate": 1.4919099141279205e-05, + "loss": 0.8712, + "step": 986 + }, + { + "epoch": 0.3955164131305044, + "learning_rate": 1.4943414551978597e-05, + "loss": 0.5081, + "step": 988 + }, + { + "epoch": 0.3963170536429143, + "learning_rate": 1.4967691344313988e-05, + "loss": 0.2639, + "step": 990 + }, + { + "epoch": 0.3971176941553243, + "learning_rate": 1.499192932863305e-05, + "loss": 0.4575, + "step": 992 + }, + { + "epoch": 0.3979183346677342, + "learning_rate": 1.5016128315586626e-05, + "loss": 0.0815, + "step": 994 + }, + { + "epoch": 0.3987189751801441, + "learning_rate": 1.5040288116130261e-05, + "loss": 0.015, + "step": 996 + }, + { + "epoch": 0.39951961569255406, + "learning_rate": 1.5064408541525568e-05, + "loss": 0.3493, + "step": 998 + }, + { + "epoch": 0.400320256204964, + "learning_rate": 1.5088489403341793e-05, + "loss": 0.5574, + "step": 1000 + }, + { + "epoch": 0.4011208967173739, + "learning_rate": 1.5112530513457229e-05, + "loss": 0.0433, + "step": 1002 + }, + { + "epoch": 0.40192153722978385, + "learning_rate": 1.513653168406076e-05, + "loss": 0.4451, + "step": 1004 + }, + { + "epoch": 0.40272217774219377, + "learning_rate": 1.5160492727653245e-05, + "loss": 0.1424, + "step": 1006 + }, + { + "epoch": 0.4035228182546037, + "learning_rate": 1.5184413457049006e-05, + "loss": 0.0179, + "step": 1008 + }, + { + "epoch": 0.4043234587670136, + "learning_rate": 1.5208293685377354e-05, + "loss": 0.1899, + "step": 1010 + }, + { + "epoch": 0.40512409927942356, + "learning_rate": 1.5232133226083954e-05, + "loss": 0.1627, + "step": 1012 + }, + { + "epoch": 0.40592473979183347, + "learning_rate": 1.5255931892932322e-05, + "loss": 0.128, + "step": 1014 + }, + { + "epoch": 0.4067253803042434, + "learning_rate": 1.527968950000533e-05, + "loss": 0.0551, + "step": 1016 + }, + { + "epoch": 0.40752602081665334, + "learning_rate": 1.5303405861706574e-05, + "loss": 0.3644, + "step": 1018 + }, + { + "epoch": 0.40832666132906326, + "learning_rate": 1.532708079276185e-05, + "loss": 0.0188, + "step": 1020 + }, + { + "epoch": 0.40912730184147317, + "learning_rate": 1.5350714108220667e-05, + "loss": 0.0302, + "step": 1022 + }, + { + "epoch": 0.4099279423538831, + "learning_rate": 1.5374305623457594e-05, + "loss": 0.1886, + "step": 1024 + }, + { + "epoch": 0.41072858286629305, + "learning_rate": 1.539785515417376e-05, + "loss": 0.0798, + "step": 1026 + }, + { + "epoch": 0.41152922337870296, + "learning_rate": 1.542136251639826e-05, + "loss": 0.8809, + "step": 1028 + }, + { + "epoch": 0.41232986389111287, + "learning_rate": 1.5444827526489668e-05, + "loss": 0.1639, + "step": 1030 + }, + { + "epoch": 0.41313050440352284, + "learning_rate": 1.5468250001137368e-05, + "loss": 0.0516, + "step": 1032 + }, + { + "epoch": 0.41393114491593275, + "learning_rate": 1.5491629757363026e-05, + "loss": 0.2273, + "step": 1034 + }, + { + "epoch": 0.41473178542834266, + "learning_rate": 1.551496661252208e-05, + "loss": 0.3641, + "step": 1036 + }, + { + "epoch": 0.4155324259407526, + "learning_rate": 1.5538260384305073e-05, + "loss": 0.0152, + "step": 1038 + }, + { + "epoch": 0.41633306645316254, + "learning_rate": 1.5561510890739113e-05, + "loss": 0.1172, + "step": 1040 + }, + { + "epoch": 0.41713370696557245, + "learning_rate": 1.5584717950189353e-05, + "loss": 0.0946, + "step": 1042 + }, + { + "epoch": 0.41793434747798236, + "learning_rate": 1.5607881381360296e-05, + "loss": 0.0921, + "step": 1044 + }, + { + "epoch": 0.4187349879903923, + "learning_rate": 1.563100100329731e-05, + "loss": 0.1605, + "step": 1046 + }, + { + "epoch": 0.41953562850280224, + "learning_rate": 1.565407663538797e-05, + "loss": 0.0204, + "step": 1048 + }, + { + "epoch": 0.42033626901521215, + "learning_rate": 1.567710809736356e-05, + "loss": 0.0842, + "step": 1050 + }, + { + "epoch": 0.4211369095276221, + "learning_rate": 1.5700095209300376e-05, + "loss": 0.0796, + "step": 1052 + }, + { + "epoch": 0.42193755004003203, + "learning_rate": 1.572303779162118e-05, + "loss": 0.0992, + "step": 1054 + }, + { + "epoch": 0.42273819055244194, + "learning_rate": 1.5745935665096647e-05, + "loss": 0.0317, + "step": 1056 + }, + { + "epoch": 0.4235388310648519, + "learning_rate": 1.5768788650846677e-05, + "loss": 0.0394, + "step": 1058 + }, + { + "epoch": 0.4243394715772618, + "learning_rate": 1.5791596570341844e-05, + "loss": 0.1129, + "step": 1060 + }, + { + "epoch": 0.42514011208967173, + "learning_rate": 1.581435924540481e-05, + "loss": 0.0158, + "step": 1062 + }, + { + "epoch": 0.42594075260208164, + "learning_rate": 1.5837076498211666e-05, + "loss": 0.0748, + "step": 1064 + }, + { + "epoch": 0.4267413931144916, + "learning_rate": 1.5859748151293333e-05, + "loss": 0.4398, + "step": 1066 + }, + { + "epoch": 0.4275420336269015, + "learning_rate": 1.5882374027537005e-05, + "loss": 0.1629, + "step": 1068 + }, + { + "epoch": 0.42834267413931143, + "learning_rate": 1.5904953950187455e-05, + "loss": 0.4817, + "step": 1070 + }, + { + "epoch": 0.4291433146517214, + "learning_rate": 1.5927487742848448e-05, + "loss": 0.8606, + "step": 1072 + }, + { + "epoch": 0.4299439551641313, + "learning_rate": 1.594997522948412e-05, + "loss": 0.2745, + "step": 1074 + }, + { + "epoch": 0.4307445956765412, + "learning_rate": 1.5972416234420393e-05, + "loss": 0.1034, + "step": 1076 + }, + { + "epoch": 0.4315452361889512, + "learning_rate": 1.599481058234626e-05, + "loss": 0.6513, + "step": 1078 + }, + { + "epoch": 0.4323458767013611, + "learning_rate": 1.60171580983152e-05, + "loss": 0.0517, + "step": 1080 + }, + { + "epoch": 0.433146517213771, + "learning_rate": 1.6039458607746607e-05, + "loss": 0.011, + "step": 1082 + }, + { + "epoch": 0.4339471577261809, + "learning_rate": 1.606171193642703e-05, + "loss": 0.0504, + "step": 1084 + }, + { + "epoch": 0.4347477982385909, + "learning_rate": 1.6083917910511616e-05, + "loss": 0.1601, + "step": 1086 + }, + { + "epoch": 0.4355484387510008, + "learning_rate": 1.6106076356525474e-05, + "loss": 0.0823, + "step": 1088 + }, + { + "epoch": 0.4363490792634107, + "learning_rate": 1.6128187101364982e-05, + "loss": 0.1996, + "step": 1090 + }, + { + "epoch": 0.4371497197758207, + "learning_rate": 1.6150249972299153e-05, + "loss": 0.0065, + "step": 1092 + }, + { + "epoch": 0.4379503602882306, + "learning_rate": 1.617226479697104e-05, + "loss": 0.0838, + "step": 1094 + }, + { + "epoch": 0.4387510008006405, + "learning_rate": 1.6194231403398994e-05, + "loss": 0.0665, + "step": 1096 + }, + { + "epoch": 0.43955164131305047, + "learning_rate": 1.621614961997806e-05, + "loss": 0.2692, + "step": 1098 + }, + { + "epoch": 0.4403522818254604, + "learning_rate": 1.6238019275481313e-05, + "loss": 0.1905, + "step": 1100 + }, + { + "epoch": 0.4411529223378703, + "learning_rate": 1.6259840199061212e-05, + "loss": 0.0288, + "step": 1102 + }, + { + "epoch": 0.4419535628502802, + "learning_rate": 1.6281612220250883e-05, + "loss": 0.0064, + "step": 1104 + }, + { + "epoch": 0.44275420336269017, + "learning_rate": 1.6303335168965474e-05, + "loss": 0.2056, + "step": 1106 + }, + { + "epoch": 0.4435548438751001, + "learning_rate": 1.6325008875503543e-05, + "loss": 0.1396, + "step": 1108 + }, + { + "epoch": 0.44435548438751, + "learning_rate": 1.6346633170548285e-05, + "loss": 0.0021, + "step": 1110 + }, + { + "epoch": 0.44515612489991996, + "learning_rate": 1.6368207885168897e-05, + "loss": 0.2049, + "step": 1112 + }, + { + "epoch": 0.44595676541232987, + "learning_rate": 1.6389732850821957e-05, + "loss": 0.4087, + "step": 1114 + }, + { + "epoch": 0.4467574059247398, + "learning_rate": 1.641120789935263e-05, + "loss": 0.0183, + "step": 1116 + }, + { + "epoch": 0.4475580464371497, + "learning_rate": 1.6432632862996042e-05, + "loss": 0.194, + "step": 1118 + }, + { + "epoch": 0.44835868694955966, + "learning_rate": 1.6454007574378637e-05, + "loss": 0.195, + "step": 1120 + }, + { + "epoch": 0.44915932746196957, + "learning_rate": 1.6475331866519377e-05, + "loss": 0.0973, + "step": 1122 + }, + { + "epoch": 0.4499599679743795, + "learning_rate": 1.6496605572831134e-05, + "loss": 0.2849, + "step": 1124 + }, + { + "epoch": 0.45076060848678945, + "learning_rate": 1.6517828527121928e-05, + "loss": 0.0539, + "step": 1126 + }, + { + "epoch": 0.45156124899919936, + "learning_rate": 1.6539000563596318e-05, + "loss": 0.0263, + "step": 1128 + }, + { + "epoch": 0.45236188951160927, + "learning_rate": 1.6560121516856586e-05, + "loss": 0.0043, + "step": 1130 + }, + { + "epoch": 0.45316253002401924, + "learning_rate": 1.6581191221904077e-05, + "loss": 0.093, + "step": 1132 + }, + { + "epoch": 0.45396317053642915, + "learning_rate": 1.6602209514140542e-05, + "loss": 0.0711, + "step": 1134 + }, + { + "epoch": 0.45476381104883906, + "learning_rate": 1.6623176229369324e-05, + "loss": 0.2443, + "step": 1136 + }, + { + "epoch": 0.455564451561249, + "learning_rate": 1.6644091203796694e-05, + "loss": 0.4891, + "step": 1138 + }, + { + "epoch": 0.45636509207365894, + "learning_rate": 1.6664954274033168e-05, + "loss": 0.1636, + "step": 1140 + }, + { + "epoch": 0.45716573258606885, + "learning_rate": 1.6685765277094695e-05, + "loss": 0.2698, + "step": 1142 + }, + { + "epoch": 0.45796637309847876, + "learning_rate": 1.6706524050403996e-05, + "loss": 0.7748, + "step": 1144 + }, + { + "epoch": 0.45876701361088873, + "learning_rate": 1.6727230431791806e-05, + "loss": 0.1163, + "step": 1146 + }, + { + "epoch": 0.45956765412329864, + "learning_rate": 1.674788425949818e-05, + "loss": 0.0006, + "step": 1148 + }, + { + "epoch": 0.46036829463570855, + "learning_rate": 1.6768485372173696e-05, + "loss": 0.39, + "step": 1150 + }, + { + "epoch": 0.4611689351481185, + "learning_rate": 1.6789033608880735e-05, + "loss": 0.064, + "step": 1152 + }, + { + "epoch": 0.46196957566052843, + "learning_rate": 1.6809528809094798e-05, + "loss": 0.7769, + "step": 1154 + }, + { + "epoch": 0.46277021617293834, + "learning_rate": 1.6829970812705674e-05, + "loss": 0.151, + "step": 1156 + }, + { + "epoch": 0.46357085668534825, + "learning_rate": 1.6850359460018733e-05, + "loss": 0.5947, + "step": 1158 + }, + { + "epoch": 0.4643714971977582, + "learning_rate": 1.6870694591756165e-05, + "loss": 0.0909, + "step": 1160 + }, + { + "epoch": 0.46517213771016813, + "learning_rate": 1.689097604905826e-05, + "loss": 0.1042, + "step": 1162 + }, + { + "epoch": 0.46597277822257804, + "learning_rate": 1.6911203673484577e-05, + "loss": 0.4073, + "step": 1164 + }, + { + "epoch": 0.466773418734988, + "learning_rate": 1.6931377307015226e-05, + "loss": 0.0709, + "step": 1166 + }, + { + "epoch": 0.4675740592473979, + "learning_rate": 1.695149679205214e-05, + "loss": 0.1424, + "step": 1168 + }, + { + "epoch": 0.46837469975980783, + "learning_rate": 1.6971561971420222e-05, + "loss": 0.2204, + "step": 1170 + }, + { + "epoch": 0.4691753402722178, + "learning_rate": 1.6991572688368628e-05, + "loss": 0.5136, + "step": 1172 + }, + { + "epoch": 0.4699759807846277, + "learning_rate": 1.701152878657196e-05, + "loss": 0.1587, + "step": 1174 + }, + { + "epoch": 0.4707766212970376, + "learning_rate": 1.7031430110131562e-05, + "loss": 0.2293, + "step": 1176 + }, + { + "epoch": 0.47157726180944753, + "learning_rate": 1.705127650357662e-05, + "loss": 0.2638, + "step": 1178 + }, + { + "epoch": 0.4723779023218575, + "learning_rate": 1.7071067811865467e-05, + "loss": 0.4434, + "step": 1180 + }, + { + "epoch": 0.4731785428342674, + "learning_rate": 1.7090803880386778e-05, + "loss": 0.0871, + "step": 1182 + }, + { + "epoch": 0.4739791833466773, + "learning_rate": 1.7110484554960738e-05, + "loss": 0.2391, + "step": 1184 + }, + { + "epoch": 0.4747798238590873, + "learning_rate": 1.713010968184029e-05, + "loss": 0.7516, + "step": 1186 + }, + { + "epoch": 0.4755804643714972, + "learning_rate": 1.7149679107712306e-05, + "loss": 0.0321, + "step": 1188 + }, + { + "epoch": 0.4763811048839071, + "learning_rate": 1.716919267969883e-05, + "loss": 0.2413, + "step": 1190 + }, + { + "epoch": 0.4771817453963171, + "learning_rate": 1.7188650245358215e-05, + "loss": 0.3946, + "step": 1192 + }, + { + "epoch": 0.477982385908727, + "learning_rate": 1.7208051652686338e-05, + "loss": 0.2789, + "step": 1194 + }, + { + "epoch": 0.4787830264211369, + "learning_rate": 1.722739675011779e-05, + "loss": 0.0135, + "step": 1196 + }, + { + "epoch": 0.4795836669335468, + "learning_rate": 1.7246685386527095e-05, + "loss": 0.1909, + "step": 1198 + }, + { + "epoch": 0.4803843074459568, + "learning_rate": 1.726591741122981e-05, + "loss": 0.2864, + "step": 1200 + }, + { + "epoch": 0.4811849479583667, + "learning_rate": 1.7285092673983753e-05, + "loss": 0.3336, + "step": 1202 + }, + { + "epoch": 0.4819855884707766, + "learning_rate": 1.730421102499021e-05, + "loss": 0.1287, + "step": 1204 + }, + { + "epoch": 0.48278622898318657, + "learning_rate": 1.7323272314895022e-05, + "loss": 0.3104, + "step": 1206 + }, + { + "epoch": 0.4835868694955965, + "learning_rate": 1.734227639478982e-05, + "loss": 0.2498, + "step": 1208 + }, + { + "epoch": 0.4843875100080064, + "learning_rate": 1.736122311621314e-05, + "loss": 0.2466, + "step": 1210 + }, + { + "epoch": 0.4851881505204163, + "learning_rate": 1.738011233115165e-05, + "loss": 0.7192, + "step": 1212 + }, + { + "epoch": 0.4859887910328263, + "learning_rate": 1.7398943892041227e-05, + "loss": 0.1451, + "step": 1214 + }, + { + "epoch": 0.4867894315452362, + "learning_rate": 1.7417717651768144e-05, + "loss": 0.5076, + "step": 1216 + }, + { + "epoch": 0.4875900720576461, + "learning_rate": 1.743643346367026e-05, + "loss": 0.1385, + "step": 1218 + }, + { + "epoch": 0.48839071257005606, + "learning_rate": 1.7455091181538087e-05, + "loss": 0.192, + "step": 1220 + }, + { + "epoch": 0.489191353082466, + "learning_rate": 1.7473690659615992e-05, + "loss": 0.328, + "step": 1222 + }, + { + "epoch": 0.4899919935948759, + "learning_rate": 1.74922317526033e-05, + "loss": 0.1104, + "step": 1224 + }, + { + "epoch": 0.49079263410728585, + "learning_rate": 1.7510714315655474e-05, + "loss": 0.1754, + "step": 1226 + }, + { + "epoch": 0.49159327461969576, + "learning_rate": 1.752913820438519e-05, + "loss": 0.2398, + "step": 1228 + }, + { + "epoch": 0.4923939151321057, + "learning_rate": 1.7547503274863495e-05, + "loss": 0.136, + "step": 1230 + }, + { + "epoch": 0.4931945556445156, + "learning_rate": 1.756580938362096e-05, + "loss": 0.132, + "step": 1232 + }, + { + "epoch": 0.49399519615692555, + "learning_rate": 1.758405638764873e-05, + "loss": 0.2823, + "step": 1234 + }, + { + "epoch": 0.49479583666933546, + "learning_rate": 1.7602244144399693e-05, + "loss": 0.069, + "step": 1236 + }, + { + "epoch": 0.4955964771817454, + "learning_rate": 1.7620372511789604e-05, + "loss": 0.4614, + "step": 1238 + }, + { + "epoch": 0.49639711769415534, + "learning_rate": 1.7638441348198147e-05, + "loss": 0.2106, + "step": 1240 + }, + { + "epoch": 0.49719775820656525, + "learning_rate": 1.7656450512470077e-05, + "loss": 0.0966, + "step": 1242 + }, + { + "epoch": 0.49799839871897517, + "learning_rate": 1.7674399863916295e-05, + "loss": 0.1319, + "step": 1244 + }, + { + "epoch": 0.49879903923138513, + "learning_rate": 1.7692289262315e-05, + "loss": 0.2828, + "step": 1246 + }, + { + "epoch": 0.49959967974379504, + "learning_rate": 1.771011856791273e-05, + "loss": 0.5292, + "step": 1248 + }, + { + "epoch": 0.500400320256205, + "learning_rate": 1.7727887641425448e-05, + "loss": 0.1641, + "step": 1250 + }, + { + "epoch": 0.5012009607686149, + "learning_rate": 1.7745596344039712e-05, + "loss": 0.2002, + "step": 1252 + }, + { + "epoch": 0.5020016012810248, + "learning_rate": 1.7763244537413657e-05, + "loss": 0.2159, + "step": 1254 + }, + { + "epoch": 0.5028022417934348, + "learning_rate": 1.7780832083678116e-05, + "loss": 0.2908, + "step": 1256 + }, + { + "epoch": 0.5036028823058447, + "learning_rate": 1.7798358845437754e-05, + "loss": 0.1535, + "step": 1258 + }, + { + "epoch": 0.5044035228182546, + "learning_rate": 1.7815824685772035e-05, + "loss": 0.3213, + "step": 1260 + }, + { + "epoch": 0.5052041633306645, + "learning_rate": 1.7833229468236364e-05, + "loss": 0.0867, + "step": 1262 + }, + { + "epoch": 0.5060048038430744, + "learning_rate": 1.7850573056863156e-05, + "loss": 0.1262, + "step": 1264 + }, + { + "epoch": 0.5068054443554844, + "learning_rate": 1.786785531616285e-05, + "loss": 0.1487, + "step": 1266 + }, + { + "epoch": 0.5076060848678943, + "learning_rate": 1.7885076111125004e-05, + "loss": 0.2642, + "step": 1268 + }, + { + "epoch": 0.5084067253803043, + "learning_rate": 1.790223530721933e-05, + "loss": 0.0003, + "step": 1270 + }, + { + "epoch": 0.5092073658927142, + "learning_rate": 1.791933277039679e-05, + "loss": 0.145, + "step": 1272 + }, + { + "epoch": 0.5100080064051241, + "learning_rate": 1.7936368367090577e-05, + "loss": 0.174, + "step": 1274 + }, + { + "epoch": 0.510808646917534, + "learning_rate": 1.7953341964217183e-05, + "loss": 0.0573, + "step": 1276 + }, + { + "epoch": 0.5116092874299439, + "learning_rate": 1.7970253429177477e-05, + "loss": 0.6106, + "step": 1278 + }, + { + "epoch": 0.5124099279423538, + "learning_rate": 1.7987102629857696e-05, + "loss": 0.1791, + "step": 1280 + }, + { + "epoch": 0.5132105684547638, + "learning_rate": 1.800388943463047e-05, + "loss": 0.2116, + "step": 1282 + }, + { + "epoch": 0.5140112089671738, + "learning_rate": 1.8020613712355912e-05, + "loss": 0.1299, + "step": 1284 + }, + { + "epoch": 0.5148118494795837, + "learning_rate": 1.803727533238257e-05, + "loss": 0.1081, + "step": 1286 + }, + { + "epoch": 0.5156124899919936, + "learning_rate": 1.805387416454847e-05, + "loss": 0.3832, + "step": 1288 + }, + { + "epoch": 0.5164131305044035, + "learning_rate": 1.8070410079182195e-05, + "loss": 0.1825, + "step": 1290 + }, + { + "epoch": 0.5172137710168134, + "learning_rate": 1.8086882947103787e-05, + "loss": 0.142, + "step": 1292 + }, + { + "epoch": 0.5180144115292233, + "learning_rate": 1.8103292639625842e-05, + "loss": 0.0229, + "step": 1294 + }, + { + "epoch": 0.5188150520416333, + "learning_rate": 1.811963902855447e-05, + "loss": 0.1252, + "step": 1296 + }, + { + "epoch": 0.5196156925540433, + "learning_rate": 1.813592198619035e-05, + "loss": 0.0974, + "step": 1298 + }, + { + "epoch": 0.5204163330664532, + "learning_rate": 1.8152141385329658e-05, + "loss": 0.1453, + "step": 1300 + }, + { + "epoch": 0.5212169735788631, + "learning_rate": 1.816829709926509e-05, + "loss": 0.2697, + "step": 1302 + }, + { + "epoch": 0.522017614091273, + "learning_rate": 1.8184389001786895e-05, + "loss": 0.1717, + "step": 1304 + }, + { + "epoch": 0.5228182546036829, + "learning_rate": 1.8200416967183785e-05, + "loss": 0.0476, + "step": 1306 + }, + { + "epoch": 0.5236188951160928, + "learning_rate": 1.821638087024396e-05, + "loss": 0.0677, + "step": 1308 + }, + { + "epoch": 0.5244195356285029, + "learning_rate": 1.8232280586256097e-05, + "loss": 0.3593, + "step": 1310 + }, + { + "epoch": 0.5252201761409128, + "learning_rate": 1.8248115991010296e-05, + "loss": 0.0539, + "step": 1312 + }, + { + "epoch": 0.5260208166533227, + "learning_rate": 1.8263886960799055e-05, + "loss": 0.0399, + "step": 1314 + }, + { + "epoch": 0.5268214571657326, + "learning_rate": 1.8279593372418264e-05, + "loss": 1.1203, + "step": 1316 + }, + { + "epoch": 0.5276220976781425, + "learning_rate": 1.829523510316813e-05, + "loss": 0.018, + "step": 1318 + }, + { + "epoch": 0.5284227381905524, + "learning_rate": 1.8310812030854155e-05, + "loss": 0.4293, + "step": 1320 + }, + { + "epoch": 0.5292233787029623, + "learning_rate": 1.832632403378808e-05, + "loss": 0.2101, + "step": 1322 + }, + { + "epoch": 0.5300240192153723, + "learning_rate": 1.834177099078887e-05, + "loss": 0.3111, + "step": 1324 + }, + { + "epoch": 0.5308246597277823, + "learning_rate": 1.8357152781183606e-05, + "loss": 0.1155, + "step": 1326 + }, + { + "epoch": 0.5316253002401922, + "learning_rate": 1.8372469284808465e-05, + "loss": 0.2028, + "step": 1328 + }, + { + "epoch": 0.5324259407526021, + "learning_rate": 1.8387720382009665e-05, + "loss": 0.2867, + "step": 1330 + }, + { + "epoch": 0.533226581265012, + "learning_rate": 1.840290595364436e-05, + "loss": 0.2181, + "step": 1332 + }, + { + "epoch": 0.5340272217774219, + "learning_rate": 1.8418025881081606e-05, + "loss": 0.2585, + "step": 1334 + }, + { + "epoch": 0.5348278622898318, + "learning_rate": 1.8433080046203286e-05, + "loss": 0.662, + "step": 1336 + }, + { + "epoch": 0.5356285028022418, + "learning_rate": 1.844806833140501e-05, + "loss": 0.1319, + "step": 1338 + }, + { + "epoch": 0.5364291433146517, + "learning_rate": 1.8462990619597054e-05, + "loss": 0.0692, + "step": 1340 + }, + { + "epoch": 0.5372297838270617, + "learning_rate": 1.8477846794205258e-05, + "loss": 0.0794, + "step": 1342 + }, + { + "epoch": 0.5380304243394716, + "learning_rate": 1.8492636739171966e-05, + "loss": 0.0022, + "step": 1344 + }, + { + "epoch": 0.5388310648518815, + "learning_rate": 1.85073603389569e-05, + "loss": 0.0033, + "step": 1346 + }, + { + "epoch": 0.5396317053642914, + "learning_rate": 1.8522017478538067e-05, + "loss": 0.0789, + "step": 1348 + }, + { + "epoch": 0.5404323458767014, + "learning_rate": 1.8536608043412695e-05, + "loss": 0.0815, + "step": 1350 + }, + { + "epoch": 0.5412329863891113, + "learning_rate": 1.855113191959808e-05, + "loss": 0.1315, + "step": 1352 + }, + { + "epoch": 0.5420336269015212, + "learning_rate": 1.856558899363248e-05, + "loss": 0.3248, + "step": 1354 + }, + { + "epoch": 0.5428342674139311, + "learning_rate": 1.8579979152576063e-05, + "loss": 0.0728, + "step": 1356 + }, + { + "epoch": 0.5436349079263411, + "learning_rate": 1.85943022840117e-05, + "loss": 1.013, + "step": 1358 + }, + { + "epoch": 0.544435548438751, + "learning_rate": 1.8608558276045895e-05, + "loss": 0.4096, + "step": 1360 + }, + { + "epoch": 0.5452361889511609, + "learning_rate": 1.862274701730967e-05, + "loss": 0.2427, + "step": 1362 + }, + { + "epoch": 0.5460368294635709, + "learning_rate": 1.86368683969594e-05, + "loss": 0.3155, + "step": 1364 + }, + { + "epoch": 0.5468374699759808, + "learning_rate": 1.865092230467769e-05, + "loss": 0.4296, + "step": 1366 + }, + { + "epoch": 0.5476381104883907, + "learning_rate": 1.866490863067425e-05, + "loss": 0.2626, + "step": 1368 + }, + { + "epoch": 0.5484387510008006, + "learning_rate": 1.8678827265686753e-05, + "loss": 0.1588, + "step": 1370 + }, + { + "epoch": 0.5492393915132106, + "learning_rate": 1.8692678100981663e-05, + "loss": 0.1276, + "step": 1372 + }, + { + "epoch": 0.5500400320256205, + "learning_rate": 1.87064610283551e-05, + "loss": 0.102, + "step": 1374 + }, + { + "epoch": 0.5508406725380304, + "learning_rate": 1.8720175940133705e-05, + "loss": 0.1099, + "step": 1376 + }, + { + "epoch": 0.5516413130504404, + "learning_rate": 1.873382272917545e-05, + "loss": 0.3393, + "step": 1378 + }, + { + "epoch": 0.5524419535628503, + "learning_rate": 1.8747401288870472e-05, + "loss": 0.08, + "step": 1380 + }, + { + "epoch": 0.5532425940752602, + "learning_rate": 1.876091151314196e-05, + "loss": 0.2856, + "step": 1382 + }, + { + "epoch": 0.5540432345876701, + "learning_rate": 1.877435329644691e-05, + "loss": 0.2085, + "step": 1384 + }, + { + "epoch": 0.55484387510008, + "learning_rate": 1.8787726533776996e-05, + "loss": 0.4122, + "step": 1386 + }, + { + "epoch": 0.55564451561249, + "learning_rate": 1.8801031120659393e-05, + "loss": 0.1118, + "step": 1388 + }, + { + "epoch": 0.5564451561248999, + "learning_rate": 1.8814266953157557e-05, + "loss": 0.0669, + "step": 1390 + }, + { + "epoch": 0.5572457966373099, + "learning_rate": 1.8827433927872066e-05, + "loss": 0.411, + "step": 1392 + }, + { + "epoch": 0.5580464371497198, + "learning_rate": 1.8840531941941415e-05, + "loss": 0.0661, + "step": 1394 + }, + { + "epoch": 0.5588470776621297, + "learning_rate": 1.8853560893042854e-05, + "loss": 0.4688, + "step": 1396 + }, + { + "epoch": 0.5596477181745396, + "learning_rate": 1.8866520679393127e-05, + "loss": 0.0438, + "step": 1398 + }, + { + "epoch": 0.5604483586869495, + "learning_rate": 1.8879411199749303e-05, + "loss": 0.0017, + "step": 1400 + }, + { + "epoch": 0.5612489991993594, + "learning_rate": 1.889223235340958e-05, + "loss": 0.0591, + "step": 1402 + }, + { + "epoch": 0.5620496397117695, + "learning_rate": 1.8904984040214037e-05, + "loss": 0.4956, + "step": 1404 + }, + { + "epoch": 0.5628502802241794, + "learning_rate": 1.8917666160545436e-05, + "loss": 0.029, + "step": 1406 + }, + { + "epoch": 0.5636509207365893, + "learning_rate": 1.893027861533002e-05, + "loss": 0.1913, + "step": 1408 + }, + { + "epoch": 0.5644515612489992, + "learning_rate": 1.894282130603823e-05, + "loss": 0.2488, + "step": 1410 + }, + { + "epoch": 0.5652522017614091, + "learning_rate": 1.8955294134685528e-05, + "loss": 0.0893, + "step": 1412 + }, + { + "epoch": 0.566052842273819, + "learning_rate": 1.896769700383315e-05, + "loss": 0.3816, + "step": 1414 + }, + { + "epoch": 0.5668534827862289, + "learning_rate": 1.898002981658886e-05, + "loss": 0.3141, + "step": 1416 + }, + { + "epoch": 0.567654123298639, + "learning_rate": 1.899229247660769e-05, + "loss": 0.0574, + "step": 1418 + }, + { + "epoch": 0.5684547638110489, + "learning_rate": 1.9004484888092724e-05, + "loss": 0.5658, + "step": 1420 + }, + { + "epoch": 0.5692554043234588, + "learning_rate": 1.901660695579585e-05, + "loss": 0.4091, + "step": 1422 + }, + { + "epoch": 0.5700560448358687, + "learning_rate": 1.9028658585018455e-05, + "loss": 0.2643, + "step": 1424 + }, + { + "epoch": 0.5708566853482786, + "learning_rate": 1.9040639681612212e-05, + "loss": 0.0455, + "step": 1426 + }, + { + "epoch": 0.5716573258606885, + "learning_rate": 1.9052550151979816e-05, + "loss": 0.2142, + "step": 1428 + }, + { + "epoch": 0.5724579663730984, + "learning_rate": 1.9064389903075676e-05, + "loss": 0.2532, + "step": 1430 + }, + { + "epoch": 0.5732586068855084, + "learning_rate": 1.9076158842406674e-05, + "loss": 1.2189, + "step": 1432 + }, + { + "epoch": 0.5740592473979184, + "learning_rate": 1.9087856878032886e-05, + "loss": 0.1078, + "step": 1434 + }, + { + "epoch": 0.5748598879103283, + "learning_rate": 1.909948391856829e-05, + "loss": 0.3039, + "step": 1436 + }, + { + "epoch": 0.5756605284227382, + "learning_rate": 1.911103987318148e-05, + "loss": 0.2527, + "step": 1438 + }, + { + "epoch": 0.5764611689351481, + "learning_rate": 1.912252465159637e-05, + "loss": 0.1365, + "step": 1440 + }, + { + "epoch": 0.577261809447558, + "learning_rate": 1.913393816409294e-05, + "loss": 0.2057, + "step": 1442 + }, + { + "epoch": 0.578062449959968, + "learning_rate": 1.9145280321507872e-05, + "loss": 0.5137, + "step": 1444 + }, + { + "epoch": 0.5788630904723779, + "learning_rate": 1.9156551035235288e-05, + "loss": 0.0381, + "step": 1446 + }, + { + "epoch": 0.5796637309847879, + "learning_rate": 1.9167750217227454e-05, + "loss": 0.3175, + "step": 1448 + }, + { + "epoch": 0.5804643714971978, + "learning_rate": 1.9178877779995423e-05, + "loss": 0.3451, + "step": 1450 + }, + { + "epoch": 0.5812650120096077, + "learning_rate": 1.9189933636609747e-05, + "loss": 0.4345, + "step": 1452 + }, + { + "epoch": 0.5820656525220176, + "learning_rate": 1.9200917700701173e-05, + "loss": 0.2699, + "step": 1454 + }, + { + "epoch": 0.5828662930344275, + "learning_rate": 1.9211829886461274e-05, + "loss": 0.3656, + "step": 1456 + }, + { + "epoch": 0.5836669335468375, + "learning_rate": 1.9222670108643146e-05, + "loss": 0.0408, + "step": 1458 + }, + { + "epoch": 0.5844675740592474, + "learning_rate": 1.9233438282562085e-05, + "loss": 0.0889, + "step": 1460 + }, + { + "epoch": 0.5852682145716573, + "learning_rate": 1.924413432409622e-05, + "loss": 0.4903, + "step": 1462 + }, + { + "epoch": 0.5860688550840673, + "learning_rate": 1.925475814968719e-05, + "loss": 0.1423, + "step": 1464 + }, + { + "epoch": 0.5868694955964772, + "learning_rate": 1.926530967634078e-05, + "loss": 0.4906, + "step": 1466 + }, + { + "epoch": 0.5876701361088871, + "learning_rate": 1.9275788821627607e-05, + "loss": 0.2471, + "step": 1468 + }, + { + "epoch": 0.588470776621297, + "learning_rate": 1.9286195503683705e-05, + "loss": 0.3457, + "step": 1470 + }, + { + "epoch": 0.589271417133707, + "learning_rate": 1.9296529641211215e-05, + "loss": 0.2819, + "step": 1472 + }, + { + "epoch": 0.5900720576461169, + "learning_rate": 1.9306791153479004e-05, + "loss": 0.2074, + "step": 1474 + }, + { + "epoch": 0.5908726981585268, + "learning_rate": 1.9316979960323286e-05, + "loss": 0.1451, + "step": 1476 + }, + { + "epoch": 0.5916733386709367, + "learning_rate": 1.932709598214825e-05, + "loss": 0.2294, + "step": 1478 + }, + { + "epoch": 0.5924739791833467, + "learning_rate": 1.9337139139926707e-05, + "loss": 0.4399, + "step": 1480 + }, + { + "epoch": 0.5932746196957566, + "learning_rate": 1.9347109355200672e-05, + "loss": 0.2047, + "step": 1482 + }, + { + "epoch": 0.5940752602081665, + "learning_rate": 1.935700655008199e-05, + "loss": 0.2363, + "step": 1484 + }, + { + "epoch": 0.5948759007205765, + "learning_rate": 1.9366830647252967e-05, + "loss": 0.1318, + "step": 1486 + }, + { + "epoch": 0.5956765412329864, + "learning_rate": 1.9376581569966933e-05, + "loss": 0.1546, + "step": 1488 + }, + { + "epoch": 0.5964771817453963, + "learning_rate": 1.9386259242048883e-05, + "loss": 0.1202, + "step": 1490 + }, + { + "epoch": 0.5972778222578062, + "learning_rate": 1.939586358789602e-05, + "loss": 0.0085, + "step": 1492 + }, + { + "epoch": 0.5980784627702161, + "learning_rate": 1.940539453247842e-05, + "loss": 0.0263, + "step": 1494 + }, + { + "epoch": 0.5988791032826261, + "learning_rate": 1.9414852001339547e-05, + "loss": 0.2422, + "step": 1496 + }, + { + "epoch": 0.5996797437950361, + "learning_rate": 1.9424235920596863e-05, + "loss": 0.0332, + "step": 1498 + }, + { + "epoch": 0.600480384307446, + "learning_rate": 1.9433546216942423e-05, + "loss": 1.3784, + "step": 1500 + }, + { + "epoch": 0.6012810248198559, + "learning_rate": 1.944278281764342e-05, + "loss": 0.0198, + "step": 1502 + }, + { + "epoch": 0.6020816653322658, + "learning_rate": 1.945194565054276e-05, + "loss": 0.0887, + "step": 1504 + }, + { + "epoch": 0.6028823058446757, + "learning_rate": 1.9461034644059637e-05, + "loss": 0.3157, + "step": 1506 + }, + { + "epoch": 0.6036829463570856, + "learning_rate": 1.9470049727190073e-05, + "loss": 0.3134, + "step": 1508 + }, + { + "epoch": 0.6044835868694955, + "learning_rate": 1.9478990829507504e-05, + "loss": 0.1176, + "step": 1510 + }, + { + "epoch": 0.6052842273819056, + "learning_rate": 1.948785788116329e-05, + "loss": 0.0093, + "step": 1512 + }, + { + "epoch": 0.6060848678943155, + "learning_rate": 1.9496650812887286e-05, + "loss": 0.0659, + "step": 1514 + }, + { + "epoch": 0.6068855084067254, + "learning_rate": 1.9505369555988395e-05, + "loss": 0.1703, + "step": 1516 + }, + { + "epoch": 0.6076861489191353, + "learning_rate": 1.951401404235505e-05, + "loss": 0.0626, + "step": 1518 + }, + { + "epoch": 0.6084867894315452, + "learning_rate": 1.952258420445583e-05, + "loss": 0.1296, + "step": 1520 + }, + { + "epoch": 0.6092874299439551, + "learning_rate": 1.9531079975339912e-05, + "loss": 0.1258, + "step": 1522 + }, + { + "epoch": 0.610088070456365, + "learning_rate": 1.953950128863762e-05, + "loss": 0.0125, + "step": 1524 + }, + { + "epoch": 0.6108887109687751, + "learning_rate": 1.9547848078560975e-05, + "loss": 0.555, + "step": 1526 + }, + { + "epoch": 0.611689351481185, + "learning_rate": 1.9556120279904144e-05, + "loss": 0.0572, + "step": 1528 + }, + { + "epoch": 0.6124899919935949, + "learning_rate": 1.956431782804402e-05, + "loss": 0.2353, + "step": 1530 + }, + { + "epoch": 0.6132906325060048, + "learning_rate": 1.957244065894066e-05, + "loss": 0.4456, + "step": 1532 + }, + { + "epoch": 0.6140912730184147, + "learning_rate": 1.9580488709137858e-05, + "loss": 0.2388, + "step": 1534 + }, + { + "epoch": 0.6148919135308246, + "learning_rate": 1.9588461915763566e-05, + "loss": 0.1931, + "step": 1536 + }, + { + "epoch": 0.6156925540432346, + "learning_rate": 1.9596360216530436e-05, + "loss": 0.0611, + "step": 1538 + }, + { + "epoch": 0.6164931945556446, + "learning_rate": 1.9604183549736283e-05, + "loss": 0.067, + "step": 1540 + }, + { + "epoch": 0.6172938350680545, + "learning_rate": 1.961193185426459e-05, + "loss": 0.2984, + "step": 1542 + }, + { + "epoch": 0.6180944755804644, + "learning_rate": 1.9619605069584954e-05, + "loss": 0.0881, + "step": 1544 + }, + { + "epoch": 0.6188951160928743, + "learning_rate": 1.9627203135753573e-05, + "loss": 0.2874, + "step": 1546 + }, + { + "epoch": 0.6196957566052842, + "learning_rate": 1.9634725993413744e-05, + "loss": 0.4476, + "step": 1548 + }, + { + "epoch": 0.6204963971176941, + "learning_rate": 1.9642173583796265e-05, + "loss": 0.031, + "step": 1550 + }, + { + "epoch": 0.6212970376301041, + "learning_rate": 1.964954584871995e-05, + "loss": 0.0747, + "step": 1552 + }, + { + "epoch": 0.622097678142514, + "learning_rate": 1.9656842730592046e-05, + "loss": 0.4271, + "step": 1554 + }, + { + "epoch": 0.622898318654924, + "learning_rate": 1.966406417240872e-05, + "loss": 0.2952, + "step": 1556 + }, + { + "epoch": 0.6236989591673339, + "learning_rate": 1.967121011775546e-05, + "loss": 0.2226, + "step": 1558 + }, + { + "epoch": 0.6244995996797438, + "learning_rate": 1.967828051080755e-05, + "loss": 0.3785, + "step": 1560 + }, + { + "epoch": 0.6253002401921537, + "learning_rate": 1.9685275296330497e-05, + "loss": 0.0303, + "step": 1562 + }, + { + "epoch": 0.6261008807045636, + "learning_rate": 1.969219441968046e-05, + "loss": 0.3485, + "step": 1564 + }, + { + "epoch": 0.6269015212169736, + "learning_rate": 1.969903782680467e-05, + "loss": 0.2553, + "step": 1566 + }, + { + "epoch": 0.6277021617293835, + "learning_rate": 1.9705805464241856e-05, + "loss": 0.1104, + "step": 1568 + }, + { + "epoch": 0.6285028022417934, + "learning_rate": 1.9712497279122692e-05, + "loss": 0.0624, + "step": 1570 + }, + { + "epoch": 0.6293034427542034, + "learning_rate": 1.971911321917015e-05, + "loss": 0.0821, + "step": 1572 + }, + { + "epoch": 0.6301040832666133, + "learning_rate": 1.9725653232699962e-05, + "loss": 0.1675, + "step": 1574 + }, + { + "epoch": 0.6309047237790232, + "learning_rate": 1.9732117268621005e-05, + "loss": 0.0596, + "step": 1576 + }, + { + "epoch": 0.6317053642914331, + "learning_rate": 1.9738505276435692e-05, + "loss": 0.5358, + "step": 1578 + }, + { + "epoch": 0.6325060048038431, + "learning_rate": 1.9744817206240377e-05, + "loss": 0.0355, + "step": 1580 + }, + { + "epoch": 0.633306645316253, + "learning_rate": 1.9751053008725736e-05, + "loss": 0.1109, + "step": 1582 + }, + { + "epoch": 0.6341072858286629, + "learning_rate": 1.9757212635177177e-05, + "loss": 0.0341, + "step": 1584 + }, + { + "epoch": 0.6349079263410728, + "learning_rate": 1.9763296037475174e-05, + "loss": 0.025, + "step": 1586 + }, + { + "epoch": 0.6357085668534828, + "learning_rate": 1.976930316809569e-05, + "loss": 0.0736, + "step": 1588 + }, + { + "epoch": 0.6365092073658927, + "learning_rate": 1.9775233980110524e-05, + "loss": 0.0179, + "step": 1590 + }, + { + "epoch": 0.6373098478783027, + "learning_rate": 1.978108842718768e-05, + "loss": 0.0019, + "step": 1592 + }, + { + "epoch": 0.6381104883907126, + "learning_rate": 1.9786866463591732e-05, + "loss": 0.1085, + "step": 1594 + }, + { + "epoch": 0.6389111289031225, + "learning_rate": 1.9792568044184176e-05, + "loss": 0.0477, + "step": 1596 + }, + { + "epoch": 0.6397117694155324, + "learning_rate": 1.9798193124423804e-05, + "loss": 0.0333, + "step": 1598 + }, + { + "epoch": 0.6405124099279423, + "learning_rate": 1.9803741660367015e-05, + "loss": 0.2606, + "step": 1600 + }, + { + "epoch": 0.6413130504403523, + "learning_rate": 1.9809213608668185e-05, + "loss": 0.0667, + "step": 1602 + }, + { + "epoch": 0.6421136909527622, + "learning_rate": 1.9814608926580007e-05, + "loss": 0.7038, + "step": 1604 + }, + { + "epoch": 0.6429143314651722, + "learning_rate": 1.9819927571953807e-05, + "loss": 0.0585, + "step": 1606 + }, + { + "epoch": 0.6437149719775821, + "learning_rate": 1.9825169503239885e-05, + "loss": 0.167, + "step": 1608 + }, + { + "epoch": 0.644515612489992, + "learning_rate": 1.983033467948784e-05, + "loss": 0.0716, + "step": 1610 + }, + { + "epoch": 0.6453162530024019, + "learning_rate": 1.9835423060346892e-05, + "loss": 0.2851, + "step": 1612 + }, + { + "epoch": 0.6461168935148118, + "learning_rate": 1.9840434606066182e-05, + "loss": 0.0759, + "step": 1614 + }, + { + "epoch": 0.6469175340272217, + "learning_rate": 1.9845369277495102e-05, + "loss": 0.6611, + "step": 1616 + }, + { + "epoch": 0.6477181745396317, + "learning_rate": 1.9850227036083592e-05, + "loss": 0.2019, + "step": 1618 + }, + { + "epoch": 0.6485188150520417, + "learning_rate": 1.985500784388244e-05, + "loss": 0.4445, + "step": 1620 + }, + { + "epoch": 0.6493194555644516, + "learning_rate": 1.985971166354357e-05, + "loss": 0.0054, + "step": 1622 + }, + { + "epoch": 0.6501200960768615, + "learning_rate": 1.9864338458320366e-05, + "loss": 0.4025, + "step": 1624 + }, + { + "epoch": 0.6509207365892714, + "learning_rate": 1.986888819206792e-05, + "loss": 0.238, + "step": 1626 + }, + { + "epoch": 0.6517213771016813, + "learning_rate": 1.9873360829243323e-05, + "loss": 0.249, + "step": 1628 + }, + { + "epoch": 0.6525220176140912, + "learning_rate": 1.9877756334905983e-05, + "loss": 0.2378, + "step": 1630 + }, + { + "epoch": 0.6533226581265013, + "learning_rate": 1.9882074674717836e-05, + "loss": 0.265, + "step": 1632 + }, + { + "epoch": 0.6541232986389112, + "learning_rate": 1.988631581494365e-05, + "loss": 0.2361, + "step": 1634 + }, + { + "epoch": 0.6549239391513211, + "learning_rate": 1.989047972245129e-05, + "loss": 0.0343, + "step": 1636 + }, + { + "epoch": 0.655724579663731, + "learning_rate": 1.9894566364711965e-05, + "loss": 0.1859, + "step": 1638 + }, + { + "epoch": 0.6565252201761409, + "learning_rate": 1.989857570980049e-05, + "loss": 0.1679, + "step": 1640 + }, + { + "epoch": 0.6573258606885508, + "learning_rate": 1.990250772639552e-05, + "loss": 0.1719, + "step": 1642 + }, + { + "epoch": 0.6581265012009607, + "learning_rate": 1.9906362383779826e-05, + "loss": 0.1317, + "step": 1644 + }, + { + "epoch": 0.6589271417133707, + "learning_rate": 1.99101396518405e-05, + "loss": 0.313, + "step": 1646 + }, + { + "epoch": 0.6597277822257807, + "learning_rate": 1.9913839501069213e-05, + "loss": 0.1333, + "step": 1648 + }, + { + "epoch": 0.6605284227381906, + "learning_rate": 1.9917461902562435e-05, + "loss": 0.0822, + "step": 1650 + }, + { + "epoch": 0.6613290632506005, + "learning_rate": 1.9921006828021666e-05, + "loss": 0.1722, + "step": 1652 + }, + { + "epoch": 0.6621297037630104, + "learning_rate": 1.9924474249753652e-05, + "loss": 0.1906, + "step": 1654 + }, + { + "epoch": 0.6629303442754203, + "learning_rate": 1.9927864140670615e-05, + "loss": 0.1694, + "step": 1656 + }, + { + "epoch": 0.6637309847878302, + "learning_rate": 1.9931176474290438e-05, + "loss": 0.0376, + "step": 1658 + }, + { + "epoch": 0.6645316253002402, + "learning_rate": 1.99344112247369e-05, + "loss": 0.0567, + "step": 1660 + }, + { + "epoch": 0.6653322658126501, + "learning_rate": 1.9937568366739858e-05, + "loss": 0.2487, + "step": 1662 + }, + { + "epoch": 0.6661329063250601, + "learning_rate": 1.9940647875635463e-05, + "loss": 0.016, + "step": 1664 + }, + { + "epoch": 0.66693354683747, + "learning_rate": 1.9943649727366335e-05, + "loss": 0.9333, + "step": 1666 + }, + { + "epoch": 0.6677341873498799, + "learning_rate": 1.994657389848176e-05, + "loss": 0.4249, + "step": 1668 + }, + { + "epoch": 0.6685348278622898, + "learning_rate": 1.994942036613787e-05, + "loss": 0.6759, + "step": 1670 + }, + { + "epoch": 0.6693354683746997, + "learning_rate": 1.9952189108097825e-05, + "loss": 0.0906, + "step": 1672 + }, + { + "epoch": 0.6701361088871097, + "learning_rate": 1.995488010273198e-05, + "loss": 0.0132, + "step": 1674 + }, + { + "epoch": 0.6709367493995196, + "learning_rate": 1.9957493329018064e-05, + "loss": 0.0983, + "step": 1676 + }, + { + "epoch": 0.6717373899119295, + "learning_rate": 1.9960028766541336e-05, + "loss": 0.4848, + "step": 1678 + }, + { + "epoch": 0.6725380304243395, + "learning_rate": 1.996248639549475e-05, + "loss": 0.1295, + "step": 1680 + }, + { + "epoch": 0.6733386709367494, + "learning_rate": 1.9964866196679105e-05, + "loss": 0.8247, + "step": 1682 + }, + { + "epoch": 0.6741393114491593, + "learning_rate": 1.9967168151503196e-05, + "loss": 0.3808, + "step": 1684 + }, + { + "epoch": 0.6749399519615693, + "learning_rate": 1.9969392241983957e-05, + "loss": 0.0001, + "step": 1686 + }, + { + "epoch": 0.6757405924739792, + "learning_rate": 1.997153845074662e-05, + "loss": 0.4078, + "step": 1688 + }, + { + "epoch": 0.6765412329863891, + "learning_rate": 1.9973606761024813e-05, + "loss": 0.094, + "step": 1690 + }, + { + "epoch": 0.677341873498799, + "learning_rate": 1.997559715666073e-05, + "loss": 0.1079, + "step": 1692 + }, + { + "epoch": 0.678142514011209, + "learning_rate": 1.9977509622105233e-05, + "loss": 0.057, + "step": 1694 + }, + { + "epoch": 0.6789431545236189, + "learning_rate": 1.9979344142417986e-05, + "loss": 0.1165, + "step": 1696 + }, + { + "epoch": 0.6797437950360288, + "learning_rate": 1.9981100703267567e-05, + "loss": 0.2249, + "step": 1698 + }, + { + "epoch": 0.6805444355484388, + "learning_rate": 1.998277929093157e-05, + "loss": 0.3387, + "step": 1700 + }, + { + "epoch": 0.6813450760608487, + "learning_rate": 1.998437989229673e-05, + "loss": 0.0786, + "step": 1702 + }, + { + "epoch": 0.6821457165732586, + "learning_rate": 1.9985902494859023e-05, + "loss": 0.1919, + "step": 1704 + }, + { + "epoch": 0.6829463570856685, + "learning_rate": 1.998734708672375e-05, + "loss": 0.1849, + "step": 1706 + }, + { + "epoch": 0.6837469975980784, + "learning_rate": 1.9988713656605635e-05, + "loss": 0.0975, + "step": 1708 + }, + { + "epoch": 0.6845476381104884, + "learning_rate": 1.9990002193828923e-05, + "loss": 0.1772, + "step": 1710 + }, + { + "epoch": 0.6853482786228983, + "learning_rate": 1.9991212688327456e-05, + "loss": 0.218, + "step": 1712 + }, + { + "epoch": 0.6861489191353083, + "learning_rate": 1.9992345130644747e-05, + "loss": 0.2987, + "step": 1714 + }, + { + "epoch": 0.6869495596477182, + "learning_rate": 1.999339951193407e-05, + "loss": 0.067, + "step": 1716 + }, + { + "epoch": 0.6877502001601281, + "learning_rate": 1.9994375823958504e-05, + "loss": 0.1281, + "step": 1718 + }, + { + "epoch": 0.688550840672538, + "learning_rate": 1.9995274059091018e-05, + "loss": 0.2421, + "step": 1720 + }, + { + "epoch": 0.6893514811849479, + "learning_rate": 1.999609421031453e-05, + "loss": 0.4262, + "step": 1722 + }, + { + "epoch": 0.6901521216973578, + "learning_rate": 1.999683627122195e-05, + "loss": 0.2677, + "step": 1724 + }, + { + "epoch": 0.6909527622097679, + "learning_rate": 1.9997500236016233e-05, + "loss": 0.0343, + "step": 1726 + }, + { + "epoch": 0.6917534027221778, + "learning_rate": 1.9998086099510433e-05, + "loss": 0.1022, + "step": 1728 + }, + { + "epoch": 0.6925540432345877, + "learning_rate": 1.9998593857127736e-05, + "loss": 0.1873, + "step": 1730 + }, + { + "epoch": 0.6933546837469976, + "learning_rate": 1.99990235049015e-05, + "loss": 0.2784, + "step": 1732 + }, + { + "epoch": 0.6941553242594075, + "learning_rate": 1.9999375039475275e-05, + "loss": 0.0593, + "step": 1734 + }, + { + "epoch": 0.6949559647718174, + "learning_rate": 1.999964845810285e-05, + "loss": 0.0838, + "step": 1736 + }, + { + "epoch": 0.6957566052842273, + "learning_rate": 1.9999843758648253e-05, + "loss": 0.0984, + "step": 1738 + }, + { + "epoch": 0.6965572457966374, + "learning_rate": 1.999996093958578e-05, + "loss": 0.0006, + "step": 1740 + }, + { + "epoch": 0.6973578863090473, + "learning_rate": 2e-05, + "loss": 0.033, + "step": 1742 + }, + { + "epoch": 0.6981585268214572, + "learning_rate": 1.999996093958578e-05, + "loss": 0.264, + "step": 1744 + }, + { + "epoch": 0.6989591673338671, + "learning_rate": 1.9999843758648253e-05, + "loss": 0.2612, + "step": 1746 + }, + { + "epoch": 0.699759807846277, + "learning_rate": 1.999964845810285e-05, + "loss": 0.1759, + "step": 1748 + }, + { + "epoch": 0.7005604483586869, + "learning_rate": 1.9999375039475278e-05, + "loss": 0.2072, + "step": 1750 + }, + { + "epoch": 0.7013610888710968, + "learning_rate": 1.99990235049015e-05, + "loss": 0.4149, + "step": 1752 + }, + { + "epoch": 0.7021617293835068, + "learning_rate": 1.9998593857127736e-05, + "loss": 0.0984, + "step": 1754 + }, + { + "epoch": 0.7029623698959168, + "learning_rate": 1.9998086099510433e-05, + "loss": 0.9108, + "step": 1756 + }, + { + "epoch": 0.7037630104083267, + "learning_rate": 1.9997500236016233e-05, + "loss": 0.0378, + "step": 1758 + }, + { + "epoch": 0.7045636509207366, + "learning_rate": 1.999683627122195e-05, + "loss": 0.3632, + "step": 1760 + }, + { + "epoch": 0.7053642914331465, + "learning_rate": 1.999609421031453e-05, + "loss": 0.014, + "step": 1762 + }, + { + "epoch": 0.7061649319455564, + "learning_rate": 1.999527405909102e-05, + "loss": 0.3102, + "step": 1764 + }, + { + "epoch": 0.7069655724579663, + "learning_rate": 1.9994375823958504e-05, + "loss": 0.0602, + "step": 1766 + }, + { + "epoch": 0.7077662129703763, + "learning_rate": 1.999339951193407e-05, + "loss": 0.256, + "step": 1768 + }, + { + "epoch": 0.7085668534827863, + "learning_rate": 1.999234513064475e-05, + "loss": 0.2158, + "step": 1770 + }, + { + "epoch": 0.7093674939951962, + "learning_rate": 1.9991212688327456e-05, + "loss": 0.1384, + "step": 1772 + }, + { + "epoch": 0.7101681345076061, + "learning_rate": 1.9990002193828923e-05, + "loss": 0.1103, + "step": 1774 + }, + { + "epoch": 0.710968775020016, + "learning_rate": 1.9988713656605635e-05, + "loss": 0.1634, + "step": 1776 + }, + { + "epoch": 0.7117694155324259, + "learning_rate": 1.998734708672375e-05, + "loss": 0.0888, + "step": 1778 + }, + { + "epoch": 0.7125700560448359, + "learning_rate": 1.9985902494859026e-05, + "loss": 0.204, + "step": 1780 + }, + { + "epoch": 0.7133706965572458, + "learning_rate": 1.9984379892296735e-05, + "loss": 0.0226, + "step": 1782 + }, + { + "epoch": 0.7141713370696557, + "learning_rate": 1.9982779290931572e-05, + "loss": 0.3126, + "step": 1784 + }, + { + "epoch": 0.7149719775820657, + "learning_rate": 1.9981100703267567e-05, + "loss": 0.0467, + "step": 1786 + }, + { + "epoch": 0.7157726180944756, + "learning_rate": 1.997934414241799e-05, + "loss": 0.0295, + "step": 1788 + }, + { + "epoch": 0.7165732586068855, + "learning_rate": 1.9977509622105236e-05, + "loss": 0.5553, + "step": 1790 + }, + { + "epoch": 0.7173738991192954, + "learning_rate": 1.997559715666073e-05, + "loss": 0.6304, + "step": 1792 + }, + { + "epoch": 0.7181745396317054, + "learning_rate": 1.9973606761024813e-05, + "loss": 0.0665, + "step": 1794 + }, + { + "epoch": 0.7189751801441153, + "learning_rate": 1.997153845074662e-05, + "loss": 0.0303, + "step": 1796 + }, + { + "epoch": 0.7197758206565252, + "learning_rate": 1.9969392241983957e-05, + "loss": 0.0254, + "step": 1798 + }, + { + "epoch": 0.7205764611689351, + "learning_rate": 1.9967168151503193e-05, + "loss": 0.3363, + "step": 1800 + }, + { + "epoch": 0.7213771016813451, + "learning_rate": 1.996486619667911e-05, + "loss": 0.3805, + "step": 1802 + }, + { + "epoch": 0.722177742193755, + "learning_rate": 1.9962486395494753e-05, + "loss": 0.0144, + "step": 1804 + }, + { + "epoch": 0.7229783827061649, + "learning_rate": 1.9960028766541336e-05, + "loss": 0.2415, + "step": 1806 + }, + { + "epoch": 0.7237790232185749, + "learning_rate": 1.9957493329018064e-05, + "loss": 0.0346, + "step": 1808 + }, + { + "epoch": 0.7245796637309848, + "learning_rate": 1.995488010273198e-05, + "loss": 0.1156, + "step": 1810 + }, + { + "epoch": 0.7253803042433947, + "learning_rate": 1.9952189108097825e-05, + "loss": 0.2213, + "step": 1812 + }, + { + "epoch": 0.7261809447558046, + "learning_rate": 1.9949420366137873e-05, + "loss": 0.1772, + "step": 1814 + }, + { + "epoch": 0.7269815852682145, + "learning_rate": 1.994657389848176e-05, + "loss": 0.0259, + "step": 1816 + }, + { + "epoch": 0.7277822257806245, + "learning_rate": 1.994364972736634e-05, + "loss": 0.1447, + "step": 1818 + }, + { + "epoch": 0.7285828662930345, + "learning_rate": 1.9940647875635466e-05, + "loss": 0.0001, + "step": 1820 + }, + { + "epoch": 0.7293835068054444, + "learning_rate": 1.993756836673986e-05, + "loss": 0.1412, + "step": 1822 + }, + { + "epoch": 0.7301841473178543, + "learning_rate": 1.99344112247369e-05, + "loss": 0.3803, + "step": 1824 + }, + { + "epoch": 0.7309847878302642, + "learning_rate": 1.9931176474290438e-05, + "loss": 0.1956, + "step": 1826 + }, + { + "epoch": 0.7317854283426741, + "learning_rate": 1.9927864140670618e-05, + "loss": 0.1739, + "step": 1828 + }, + { + "epoch": 0.732586068855084, + "learning_rate": 1.9924474249753656e-05, + "loss": 0.3067, + "step": 1830 + }, + { + "epoch": 0.733386709367494, + "learning_rate": 1.9921006828021666e-05, + "loss": 0.1189, + "step": 1832 + }, + { + "epoch": 0.734187349879904, + "learning_rate": 1.9917461902562435e-05, + "loss": 0.2977, + "step": 1834 + }, + { + "epoch": 0.7349879903923139, + "learning_rate": 1.9913839501069213e-05, + "loss": 0.1836, + "step": 1836 + }, + { + "epoch": 0.7357886309047238, + "learning_rate": 1.9910139651840497e-05, + "loss": 0.3066, + "step": 1838 + }, + { + "epoch": 0.7365892714171337, + "learning_rate": 1.9906362383779826e-05, + "loss": 0.2902, + "step": 1840 + }, + { + "epoch": 0.7373899119295436, + "learning_rate": 1.9902507726395524e-05, + "loss": 0.1327, + "step": 1842 + }, + { + "epoch": 0.7381905524419535, + "learning_rate": 1.989857570980049e-05, + "loss": 0.034, + "step": 1844 + }, + { + "epoch": 0.7389911929543634, + "learning_rate": 1.9894566364711965e-05, + "loss": 0.2908, + "step": 1846 + }, + { + "epoch": 0.7397918334667735, + "learning_rate": 1.9890479722451292e-05, + "loss": 0.3809, + "step": 1848 + }, + { + "epoch": 0.7405924739791834, + "learning_rate": 1.988631581494365e-05, + "loss": 0.0179, + "step": 1850 + }, + { + "epoch": 0.7413931144915933, + "learning_rate": 1.9882074674717832e-05, + "loss": 0.4259, + "step": 1852 + }, + { + "epoch": 0.7421937550040032, + "learning_rate": 1.987775633490599e-05, + "loss": 0.5049, + "step": 1854 + }, + { + "epoch": 0.7429943955164131, + "learning_rate": 1.987336082924333e-05, + "loss": 0.0307, + "step": 1856 + }, + { + "epoch": 0.743795036028823, + "learning_rate": 1.986888819206792e-05, + "loss": 0.1995, + "step": 1858 + }, + { + "epoch": 0.7445956765412329, + "learning_rate": 1.986433845832037e-05, + "loss": 0.2614, + "step": 1860 + }, + { + "epoch": 0.745396317053643, + "learning_rate": 1.9859711663543573e-05, + "loss": 0.3215, + "step": 1862 + }, + { + "epoch": 0.7461969575660529, + "learning_rate": 1.9855007843882437e-05, + "loss": 0.0051, + "step": 1864 + }, + { + "epoch": 0.7469975980784628, + "learning_rate": 1.9850227036083592e-05, + "loss": 0.174, + "step": 1866 + }, + { + "epoch": 0.7477982385908727, + "learning_rate": 1.9845369277495105e-05, + "loss": 0.2507, + "step": 1868 + }, + { + "epoch": 0.7485988791032826, + "learning_rate": 1.9840434606066186e-05, + "loss": 0.2157, + "step": 1870 + }, + { + "epoch": 0.7493995196156925, + "learning_rate": 1.9835423060346892e-05, + "loss": 0.2949, + "step": 1872 + }, + { + "epoch": 0.7502001601281025, + "learning_rate": 1.983033467948784e-05, + "loss": 0.2882, + "step": 1874 + }, + { + "epoch": 0.7510008006405124, + "learning_rate": 1.9825169503239885e-05, + "loss": 0.1412, + "step": 1876 + }, + { + "epoch": 0.7518014411529224, + "learning_rate": 1.9819927571953804e-05, + "loss": 0.2432, + "step": 1878 + }, + { + "epoch": 0.7526020816653323, + "learning_rate": 1.9814608926580007e-05, + "loss": 0.1141, + "step": 1880 + }, + { + "epoch": 0.7534027221777422, + "learning_rate": 1.980921360866819e-05, + "loss": 0.0728, + "step": 1882 + }, + { + "epoch": 0.7542033626901521, + "learning_rate": 1.9803741660367018e-05, + "loss": 0.0084, + "step": 1884 + }, + { + "epoch": 0.755004003202562, + "learning_rate": 1.9798193124423804e-05, + "loss": 0.0052, + "step": 1886 + }, + { + "epoch": 0.755804643714972, + "learning_rate": 1.979256804418418e-05, + "loss": 1.0712, + "step": 1888 + }, + { + "epoch": 0.7566052842273819, + "learning_rate": 1.978686646359173e-05, + "loss": 0.0176, + "step": 1890 + }, + { + "epoch": 0.7574059247397918, + "learning_rate": 1.9781088427187677e-05, + "loss": 0.8224, + "step": 1892 + }, + { + "epoch": 0.7582065652522018, + "learning_rate": 1.9775233980110524e-05, + "loss": 0.0612, + "step": 1894 + }, + { + "epoch": 0.7590072057646117, + "learning_rate": 1.976930316809569e-05, + "loss": 1.2486, + "step": 1896 + }, + { + "epoch": 0.7598078462770216, + "learning_rate": 1.9763296037475177e-05, + "loss": 0.5712, + "step": 1898 + }, + { + "epoch": 0.7606084867894315, + "learning_rate": 1.9757212635177177e-05, + "loss": 0.253, + "step": 1900 + }, + { + "epoch": 0.7614091273018415, + "learning_rate": 1.9751053008725736e-05, + "loss": 0.3648, + "step": 1902 + }, + { + "epoch": 0.7622097678142514, + "learning_rate": 1.9744817206240374e-05, + "loss": 0.3473, + "step": 1904 + }, + { + "epoch": 0.7630104083266613, + "learning_rate": 1.9738505276435695e-05, + "loss": 0.2674, + "step": 1906 + }, + { + "epoch": 0.7638110488390712, + "learning_rate": 1.9732117268621005e-05, + "loss": 0.0053, + "step": 1908 + }, + { + "epoch": 0.7646116893514812, + "learning_rate": 1.9725653232699962e-05, + "loss": 0.0881, + "step": 1910 + }, + { + "epoch": 0.7654123298638911, + "learning_rate": 1.9719113219170152e-05, + "loss": 0.3642, + "step": 1912 + }, + { + "epoch": 0.7662129703763011, + "learning_rate": 1.9712497279122692e-05, + "loss": 0.041, + "step": 1914 + }, + { + "epoch": 0.767013610888711, + "learning_rate": 1.970580546424186e-05, + "loss": 0.3424, + "step": 1916 + }, + { + "epoch": 0.7678142514011209, + "learning_rate": 1.969903782680467e-05, + "loss": 0.2806, + "step": 1918 + }, + { + "epoch": 0.7686148919135308, + "learning_rate": 1.9692194419680463e-05, + "loss": 0.2913, + "step": 1920 + }, + { + "epoch": 0.7694155324259407, + "learning_rate": 1.96852752963305e-05, + "loss": 0.277, + "step": 1922 + }, + { + "epoch": 0.7702161729383507, + "learning_rate": 1.9678280510807552e-05, + "loss": 0.1203, + "step": 1924 + }, + { + "epoch": 0.7710168134507606, + "learning_rate": 1.9671210117755462e-05, + "loss": 0.03, + "step": 1926 + }, + { + "epoch": 0.7718174539631706, + "learning_rate": 1.966406417240872e-05, + "loss": 0.1387, + "step": 1928 + }, + { + "epoch": 0.7726180944755805, + "learning_rate": 1.9656842730592046e-05, + "loss": 0.196, + "step": 1930 + }, + { + "epoch": 0.7734187349879904, + "learning_rate": 1.964954584871995e-05, + "loss": 0.1097, + "step": 1932 + }, + { + "epoch": 0.7742193755004003, + "learning_rate": 1.9642173583796265e-05, + "loss": 0.0572, + "step": 1934 + }, + { + "epoch": 0.7750200160128102, + "learning_rate": 1.9634725993413744e-05, + "loss": 0.5186, + "step": 1936 + }, + { + "epoch": 0.7758206565252201, + "learning_rate": 1.9627203135753576e-05, + "loss": 0.0547, + "step": 1938 + }, + { + "epoch": 0.77662129703763, + "learning_rate": 1.9619605069584954e-05, + "loss": 0.4263, + "step": 1940 + }, + { + "epoch": 0.7774219375500401, + "learning_rate": 1.961193185426459e-05, + "loss": 0.0049, + "step": 1942 + }, + { + "epoch": 0.77822257806245, + "learning_rate": 1.9604183549736287e-05, + "loss": 0.3619, + "step": 1944 + }, + { + "epoch": 0.7790232185748599, + "learning_rate": 1.959636021653044e-05, + "loss": 0.3762, + "step": 1946 + }, + { + "epoch": 0.7798238590872698, + "learning_rate": 1.958846191576357e-05, + "loss": 1.2455, + "step": 1948 + }, + { + "epoch": 0.7806244995996797, + "learning_rate": 1.958048870913786e-05, + "loss": 0.6391, + "step": 1950 + }, + { + "epoch": 0.7814251401120896, + "learning_rate": 1.9572440658940667e-05, + "loss": 0.301, + "step": 1952 + }, + { + "epoch": 0.7822257806244995, + "learning_rate": 1.9564317828044022e-05, + "loss": 0.2228, + "step": 1954 + }, + { + "epoch": 0.7830264211369096, + "learning_rate": 1.955612027990415e-05, + "loss": 0.047, + "step": 1956 + }, + { + "epoch": 0.7838270616493195, + "learning_rate": 1.9547848078560982e-05, + "loss": 0.0756, + "step": 1958 + }, + { + "epoch": 0.7846277021617294, + "learning_rate": 1.953950128863763e-05, + "loss": 0.1306, + "step": 1960 + }, + { + "epoch": 0.7854283426741393, + "learning_rate": 1.9531079975339915e-05, + "loss": 0.1555, + "step": 1962 + }, + { + "epoch": 0.7862289831865492, + "learning_rate": 1.9522584204455835e-05, + "loss": 0.4522, + "step": 1964 + }, + { + "epoch": 0.7870296236989591, + "learning_rate": 1.9514014042355054e-05, + "loss": 0.4533, + "step": 1966 + }, + { + "epoch": 0.7878302642113691, + "learning_rate": 1.9505369555988395e-05, + "loss": 0.3241, + "step": 1968 + }, + { + "epoch": 0.7886309047237791, + "learning_rate": 1.9496650812887293e-05, + "loss": 0.0001, + "step": 1970 + }, + { + "epoch": 0.789431545236189, + "learning_rate": 1.9487857881163295e-05, + "loss": 0.1759, + "step": 1972 + }, + { + "epoch": 0.7902321857485989, + "learning_rate": 1.947899082950751e-05, + "loss": 0.1912, + "step": 1974 + }, + { + "epoch": 0.7910328262610088, + "learning_rate": 1.947004972719008e-05, + "loss": 0.0505, + "step": 1976 + }, + { + "epoch": 0.7918334667734187, + "learning_rate": 1.9461034644059637e-05, + "loss": 0.0515, + "step": 1978 + }, + { + "epoch": 0.7926341072858286, + "learning_rate": 1.945194565054276e-05, + "loss": 0.3746, + "step": 1980 + }, + { + "epoch": 0.7934347477982386, + "learning_rate": 1.9442782817643425e-05, + "loss": 0.0978, + "step": 1982 + }, + { + "epoch": 0.7942353883106485, + "learning_rate": 1.9433546216942433e-05, + "loss": 0.174, + "step": 1984 + }, + { + "epoch": 0.7950360288230585, + "learning_rate": 1.942423592059687e-05, + "loss": 0.115, + "step": 1986 + }, + { + "epoch": 0.7958366693354684, + "learning_rate": 1.941485200133955e-05, + "loss": 0.0653, + "step": 1988 + }, + { + "epoch": 0.7966373098478783, + "learning_rate": 1.9405394532478422e-05, + "loss": 0.1865, + "step": 1990 + }, + { + "epoch": 0.7974379503602882, + "learning_rate": 1.9395863587896025e-05, + "loss": 0.1241, + "step": 1992 + }, + { + "epoch": 0.7982385908726981, + "learning_rate": 1.938625924204888e-05, + "loss": 0.405, + "step": 1994 + }, + { + "epoch": 0.7990392313851081, + "learning_rate": 1.937658156996694e-05, + "loss": 0.2969, + "step": 1996 + }, + { + "epoch": 0.799839871897518, + "learning_rate": 1.9366830647252977e-05, + "loss": 0.3522, + "step": 1998 + }, + { + "epoch": 0.800640512409928, + "learning_rate": 1.9357006550082e-05, + "loss": 0.1591, + "step": 2000 + }, + { + "epoch": 0.8014411529223379, + "learning_rate": 1.9347109355200676e-05, + "loss": 0.1505, + "step": 2002 + }, + { + "epoch": 0.8022417934347478, + "learning_rate": 1.933713913992671e-05, + "loss": 0.0585, + "step": 2004 + }, + { + "epoch": 0.8030424339471577, + "learning_rate": 1.9327095982148255e-05, + "loss": 0.1003, + "step": 2006 + }, + { + "epoch": 0.8038430744595677, + "learning_rate": 1.9316979960323283e-05, + "loss": 0.4811, + "step": 2008 + }, + { + "epoch": 0.8046437149719776, + "learning_rate": 1.9306791153479017e-05, + "loss": 0.5298, + "step": 2010 + }, + { + "epoch": 0.8054443554843875, + "learning_rate": 1.9296529641211226e-05, + "loss": 0.0979, + "step": 2012 + }, + { + "epoch": 0.8062449959967974, + "learning_rate": 1.928619550368371e-05, + "loss": 0.0981, + "step": 2014 + }, + { + "epoch": 0.8070456365092074, + "learning_rate": 1.9275788821627607e-05, + "loss": 0.2901, + "step": 2016 + }, + { + "epoch": 0.8078462770216173, + "learning_rate": 1.9265309676340783e-05, + "loss": 0.0537, + "step": 2018 + }, + { + "epoch": 0.8086469175340272, + "learning_rate": 1.9254758149687187e-05, + "loss": 0.8714, + "step": 2020 + }, + { + "epoch": 0.8094475580464372, + "learning_rate": 1.9244134324096216e-05, + "loss": 0.821, + "step": 2022 + }, + { + "epoch": 0.8102481985588471, + "learning_rate": 1.9233438282562095e-05, + "loss": 0.0681, + "step": 2024 + }, + { + "epoch": 0.811048839071257, + "learning_rate": 1.9222670108643156e-05, + "loss": 0.2082, + "step": 2026 + }, + { + "epoch": 0.8118494795836669, + "learning_rate": 1.9211829886461278e-05, + "loss": 0.5836, + "step": 2028 + }, + { + "epoch": 0.8126501200960768, + "learning_rate": 1.9200917700701176e-05, + "loss": 0.1669, + "step": 2030 + }, + { + "epoch": 0.8134507606084868, + "learning_rate": 1.918993363660975e-05, + "loss": 0.1299, + "step": 2032 + }, + { + "epoch": 0.8142514011208967, + "learning_rate": 1.9178877779995416e-05, + "loss": 0.0688, + "step": 2034 + }, + { + "epoch": 0.8150520416333067, + "learning_rate": 1.916775021722745e-05, + "loss": 0.4094, + "step": 2036 + }, + { + "epoch": 0.8158526821457166, + "learning_rate": 1.9156551035235298e-05, + "loss": 0.0134, + "step": 2038 + }, + { + "epoch": 0.8166533226581265, + "learning_rate": 1.9145280321507872e-05, + "loss": 0.2536, + "step": 2040 + }, + { + "epoch": 0.8174539631705364, + "learning_rate": 1.9133938164092942e-05, + "loss": 0.2122, + "step": 2042 + }, + { + "epoch": 0.8182546036829463, + "learning_rate": 1.9122524651596372e-05, + "loss": 0.0984, + "step": 2044 + }, + { + "epoch": 0.8190552441953562, + "learning_rate": 1.9111039873181475e-05, + "loss": 0.2875, + "step": 2046 + }, + { + "epoch": 0.8198558847077662, + "learning_rate": 1.9099483918568287e-05, + "loss": 0.5369, + "step": 2048 + }, + { + "epoch": 0.8206565252201762, + "learning_rate": 1.90878568780329e-05, + "loss": 0.3338, + "step": 2050 + }, + { + "epoch": 0.8214571657325861, + "learning_rate": 1.907615884240668e-05, + "loss": 0.2758, + "step": 2052 + }, + { + "epoch": 0.822257806244996, + "learning_rate": 1.9064389903075683e-05, + "loss": 0.2697, + "step": 2054 + }, + { + "epoch": 0.8230584467574059, + "learning_rate": 1.905255015197982e-05, + "loss": 0.1285, + "step": 2056 + }, + { + "epoch": 0.8238590872698158, + "learning_rate": 1.9040639681612216e-05, + "loss": 0.2542, + "step": 2058 + }, + { + "epoch": 0.8246597277822257, + "learning_rate": 1.902865858501845e-05, + "loss": 0.1639, + "step": 2060 + }, + { + "epoch": 0.8254603682946358, + "learning_rate": 1.9016606955795843e-05, + "loss": 0.0512, + "step": 2062 + }, + { + "epoch": 0.8262610088070457, + "learning_rate": 1.9004484888092734e-05, + "loss": 0.0898, + "step": 2064 + }, + { + "epoch": 0.8270616493194556, + "learning_rate": 1.8992292476607695e-05, + "loss": 0.3781, + "step": 2066 + }, + { + "epoch": 0.8278622898318655, + "learning_rate": 1.8980029816588863e-05, + "loss": 0.2964, + "step": 2068 + }, + { + "epoch": 0.8286629303442754, + "learning_rate": 1.8967697003833156e-05, + "loss": 0.1906, + "step": 2070 + }, + { + "epoch": 0.8294635708566853, + "learning_rate": 1.8955294134685528e-05, + "loss": 0.2854, + "step": 2072 + }, + { + "epoch": 0.8302642113690952, + "learning_rate": 1.8942821306038227e-05, + "loss": 0.1701, + "step": 2074 + }, + { + "epoch": 0.8310648518815053, + "learning_rate": 1.893027861533003e-05, + "loss": 0.236, + "step": 2076 + }, + { + "epoch": 0.8318654923939152, + "learning_rate": 1.891766616054545e-05, + "loss": 0.2393, + "step": 2078 + }, + { + "epoch": 0.8326661329063251, + "learning_rate": 1.8904984040214043e-05, + "loss": 0.0004, + "step": 2080 + }, + { + "epoch": 0.833466773418735, + "learning_rate": 1.8892232353409582e-05, + "loss": 0.0981, + "step": 2082 + }, + { + "epoch": 0.8342674139311449, + "learning_rate": 1.8879411199749306e-05, + "loss": 0.1249, + "step": 2084 + }, + { + "epoch": 0.8350680544435548, + "learning_rate": 1.8866520679393124e-05, + "loss": 0.6806, + "step": 2086 + }, + { + "epoch": 0.8358686949559647, + "learning_rate": 1.885356089304285e-05, + "loss": 0.0441, + "step": 2088 + }, + { + "epoch": 0.8366693354683747, + "learning_rate": 1.884053194194143e-05, + "loss": 0.3056, + "step": 2090 + }, + { + "epoch": 0.8374699759807847, + "learning_rate": 1.882743392787207e-05, + "loss": 0.1716, + "step": 2092 + }, + { + "epoch": 0.8382706164931946, + "learning_rate": 1.881426695315756e-05, + "loss": 0.3304, + "step": 2094 + }, + { + "epoch": 0.8390712570056045, + "learning_rate": 1.8801031120659396e-05, + "loss": 0.1705, + "step": 2096 + }, + { + "epoch": 0.8398718975180144, + "learning_rate": 1.8787726533777003e-05, + "loss": 0.5106, + "step": 2098 + }, + { + "epoch": 0.8406725380304243, + "learning_rate": 1.877435329644691e-05, + "loss": 0.3935, + "step": 2100 + }, + { + "epoch": 0.8414731785428343, + "learning_rate": 1.8760911513141974e-05, + "loss": 0.0292, + "step": 2102 + }, + { + "epoch": 0.8422738190552442, + "learning_rate": 1.8747401288870482e-05, + "loss": 0.0359, + "step": 2104 + }, + { + "epoch": 0.8430744595676541, + "learning_rate": 1.8733822729175455e-05, + "loss": 0.0288, + "step": 2106 + }, + { + "epoch": 0.8438751000800641, + "learning_rate": 1.8720175940133712e-05, + "loss": 0.0732, + "step": 2108 + }, + { + "epoch": 0.844675740592474, + "learning_rate": 1.8706461028355107e-05, + "loss": 0.1904, + "step": 2110 + }, + { + "epoch": 0.8454763811048839, + "learning_rate": 1.8692678100981663e-05, + "loss": 0.1565, + "step": 2112 + }, + { + "epoch": 0.8462770216172938, + "learning_rate": 1.8678827265686753e-05, + "loss": 0.576, + "step": 2114 + }, + { + "epoch": 0.8470776621297038, + "learning_rate": 1.8664908630674264e-05, + "loss": 0.6103, + "step": 2116 + }, + { + "epoch": 0.8478783026421137, + "learning_rate": 1.86509223046777e-05, + "loss": 0.0349, + "step": 2118 + }, + { + "epoch": 0.8486789431545236, + "learning_rate": 1.8636868396959406e-05, + "loss": 0.2403, + "step": 2120 + }, + { + "epoch": 0.8494795836669335, + "learning_rate": 1.8622747017309676e-05, + "loss": 0.0048, + "step": 2122 + }, + { + "epoch": 0.8502802241793435, + "learning_rate": 1.8608558276045898e-05, + "loss": 0.0155, + "step": 2124 + }, + { + "epoch": 0.8510808646917534, + "learning_rate": 1.8594302284011697e-05, + "loss": 0.1246, + "step": 2126 + }, + { + "epoch": 0.8518815052041633, + "learning_rate": 1.8579979152576076e-05, + "loss": 0.1598, + "step": 2128 + }, + { + "epoch": 0.8526821457165733, + "learning_rate": 1.8565588993632498e-05, + "loss": 0.0322, + "step": 2130 + }, + { + "epoch": 0.8534827862289832, + "learning_rate": 1.8551131919598084e-05, + "loss": 0.3569, + "step": 2132 + }, + { + "epoch": 0.8542834267413931, + "learning_rate": 1.8536608043412702e-05, + "loss": 0.1195, + "step": 2134 + }, + { + "epoch": 0.855084067253803, + "learning_rate": 1.852201747853807e-05, + "loss": 0.0451, + "step": 2136 + }, + { + "epoch": 0.855884707766213, + "learning_rate": 1.8507360338956896e-05, + "loss": 0.0938, + "step": 2138 + }, + { + "epoch": 0.8566853482786229, + "learning_rate": 1.849263673917196e-05, + "loss": 0.3296, + "step": 2140 + }, + { + "epoch": 0.8574859887910328, + "learning_rate": 1.847784679420527e-05, + "loss": 0.0642, + "step": 2142 + }, + { + "epoch": 0.8582866293034428, + "learning_rate": 1.846299061959706e-05, + "loss": 0.1166, + "step": 2144 + }, + { + "epoch": 0.8590872698158527, + "learning_rate": 1.8448068331405018e-05, + "loss": 0.0455, + "step": 2146 + }, + { + "epoch": 0.8598879103282626, + "learning_rate": 1.8433080046203293e-05, + "loss": 0.0201, + "step": 2148 + }, + { + "epoch": 0.8606885508406725, + "learning_rate": 1.841802588108161e-05, + "loss": 0.0726, + "step": 2150 + }, + { + "epoch": 0.8614891913530824, + "learning_rate": 1.8402905953644356e-05, + "loss": 0.0901, + "step": 2152 + }, + { + "epoch": 0.8622898318654924, + "learning_rate": 1.838772038200968e-05, + "loss": 0.0186, + "step": 2154 + }, + { + "epoch": 0.8630904723779024, + "learning_rate": 1.837246928480848e-05, + "loss": 0.2786, + "step": 2156 + }, + { + "epoch": 0.8638911128903123, + "learning_rate": 1.8357152781183613e-05, + "loss": 0.0711, + "step": 2158 + }, + { + "epoch": 0.8646917534027222, + "learning_rate": 1.8341770990788874e-05, + "loss": 0.021, + "step": 2160 + }, + { + "epoch": 0.8654923939151321, + "learning_rate": 1.8326324033788087e-05, + "loss": 0.5279, + "step": 2162 + }, + { + "epoch": 0.866293034427542, + "learning_rate": 1.831081203085415e-05, + "loss": 0.1609, + "step": 2164 + }, + { + "epoch": 0.8670936749399519, + "learning_rate": 1.8295235103168128e-05, + "loss": 0.1316, + "step": 2166 + }, + { + "epoch": 0.8678943154523618, + "learning_rate": 1.8279593372418284e-05, + "loss": 0.2905, + "step": 2168 + }, + { + "epoch": 0.8686949559647719, + "learning_rate": 1.8263886960799072e-05, + "loss": 0.0491, + "step": 2170 + }, + { + "epoch": 0.8694955964771818, + "learning_rate": 1.8248115991010303e-05, + "loss": 0.3533, + "step": 2172 + }, + { + "epoch": 0.8702962369895917, + "learning_rate": 1.8232280586256104e-05, + "loss": 0.1822, + "step": 2174 + }, + { + "epoch": 0.8710968775020016, + "learning_rate": 1.8216380870243963e-05, + "loss": 0.079, + "step": 2176 + }, + { + "epoch": 0.8718975180144115, + "learning_rate": 1.820041696718378e-05, + "loss": 0.1392, + "step": 2178 + }, + { + "epoch": 0.8726981585268214, + "learning_rate": 1.8184389001786912e-05, + "loss": 0.2052, + "step": 2180 + }, + { + "epoch": 0.8734987990392313, + "learning_rate": 1.8168297099265108e-05, + "loss": 0.3134, + "step": 2182 + }, + { + "epoch": 0.8742994395516414, + "learning_rate": 1.815214138532966e-05, + "loss": 0.0791, + "step": 2184 + }, + { + "epoch": 0.8751000800640513, + "learning_rate": 1.8135921986190358e-05, + "loss": 0.1473, + "step": 2186 + }, + { + "epoch": 0.8759007205764612, + "learning_rate": 1.8119639028554475e-05, + "loss": 0.0087, + "step": 2188 + }, + { + "epoch": 0.8767013610888711, + "learning_rate": 1.8103292639625835e-05, + "loss": 0.1448, + "step": 2190 + }, + { + "epoch": 0.877502001601281, + "learning_rate": 1.808688294710378e-05, + "loss": 0.0578, + "step": 2192 + }, + { + "epoch": 0.8783026421136909, + "learning_rate": 1.807041007918221e-05, + "loss": 0.0475, + "step": 2194 + }, + { + "epoch": 0.8791032826261009, + "learning_rate": 1.805387416454849e-05, + "loss": 0.5049, + "step": 2196 + }, + { + "epoch": 0.8799039231385108, + "learning_rate": 1.8037275332382575e-05, + "loss": 0.4241, + "step": 2198 + }, + { + "epoch": 0.8807045636509208, + "learning_rate": 1.802061371235592e-05, + "loss": 0.0, + "step": 2200 + }, + { + "epoch": 0.8815052041633307, + "learning_rate": 1.8003889434630476e-05, + "loss": 0.427, + "step": 2202 + }, + { + "epoch": 0.8823058446757406, + "learning_rate": 1.7987102629857692e-05, + "loss": 0.0413, + "step": 2204 + }, + { + "epoch": 0.8831064851881505, + "learning_rate": 1.7970253429177494e-05, + "loss": 0.3957, + "step": 2206 + }, + { + "epoch": 0.8839071257005604, + "learning_rate": 1.7953341964217196e-05, + "loss": 0.0366, + "step": 2208 + }, + { + "epoch": 0.8847077662129704, + "learning_rate": 1.7936368367090583e-05, + "loss": 0.0093, + "step": 2210 + }, + { + "epoch": 0.8855084067253803, + "learning_rate": 1.7919332770396798e-05, + "loss": 0.0087, + "step": 2212 + }, + { + "epoch": 0.8863090472377902, + "learning_rate": 1.7902235307219336e-05, + "loss": 0.1281, + "step": 2214 + }, + { + "epoch": 0.8871096877502002, + "learning_rate": 1.7885076111125e-05, + "loss": 0.0003, + "step": 2216 + }, + { + "epoch": 0.8879103282626101, + "learning_rate": 1.7867855316162846e-05, + "loss": 0.0592, + "step": 2218 + }, + { + "epoch": 0.88871096877502, + "learning_rate": 1.7850573056863173e-05, + "loss": 0.0222, + "step": 2220 + }, + { + "epoch": 0.8895116092874299, + "learning_rate": 1.783322946823638e-05, + "loss": 0.1386, + "step": 2222 + }, + { + "epoch": 0.8903122497998399, + "learning_rate": 1.7815824685772042e-05, + "loss": 0.2224, + "step": 2224 + }, + { + "epoch": 0.8911128903122498, + "learning_rate": 1.779835884543776e-05, + "loss": 0.0014, + "step": 2226 + }, + { + "epoch": 0.8919135308246597, + "learning_rate": 1.7780832083678122e-05, + "loss": 0.0365, + "step": 2228 + }, + { + "epoch": 0.8927141713370697, + "learning_rate": 1.776324453741365e-05, + "loss": 0.315, + "step": 2230 + }, + { + "epoch": 0.8935148118494796, + "learning_rate": 1.774559634403971e-05, + "loss": 0.6204, + "step": 2232 + }, + { + "epoch": 0.8943154523618895, + "learning_rate": 1.7727887641425465e-05, + "loss": 0.0644, + "step": 2234 + }, + { + "epoch": 0.8951160928742994, + "learning_rate": 1.7710118567912732e-05, + "loss": 0.4255, + "step": 2236 + }, + { + "epoch": 0.8959167333867094, + "learning_rate": 1.7692289262315008e-05, + "loss": 0.1004, + "step": 2238 + }, + { + "epoch": 0.8967173738991193, + "learning_rate": 1.7674399863916298e-05, + "loss": 0.1559, + "step": 2240 + }, + { + "epoch": 0.8975180144115292, + "learning_rate": 1.765645051247007e-05, + "loss": 0.0007, + "step": 2242 + }, + { + "epoch": 0.8983186549239391, + "learning_rate": 1.7638441348198144e-05, + "loss": 0.0329, + "step": 2244 + }, + { + "epoch": 0.899119295436349, + "learning_rate": 1.762037251178961e-05, + "loss": 0.0871, + "step": 2246 + }, + { + "epoch": 0.899919935948759, + "learning_rate": 1.7602244144399713e-05, + "loss": 0.0455, + "step": 2248 + }, + { + "epoch": 0.900720576461169, + "learning_rate": 1.7584056387648738e-05, + "loss": 0.5892, + "step": 2250 + }, + { + "epoch": 0.9015212169735789, + "learning_rate": 1.7565809383620966e-05, + "loss": 0.079, + "step": 2252 + }, + { + "epoch": 0.9023218574859888, + "learning_rate": 1.7547503274863502e-05, + "loss": 0.1274, + "step": 2254 + }, + { + "epoch": 0.9031224979983987, + "learning_rate": 1.7529138204385186e-05, + "loss": 0.4296, + "step": 2256 + }, + { + "epoch": 0.9039231385108086, + "learning_rate": 1.7510714315655467e-05, + "loss": 0.7326, + "step": 2258 + }, + { + "epoch": 0.9047237790232185, + "learning_rate": 1.7492231752603305e-05, + "loss": 0.2269, + "step": 2260 + }, + { + "epoch": 0.9055244195356285, + "learning_rate": 1.7473690659616e-05, + "loss": 0.5769, + "step": 2262 + }, + { + "epoch": 0.9063250600480385, + "learning_rate": 1.7455091181538094e-05, + "loss": 0.9878, + "step": 2264 + }, + { + "epoch": 0.9071257005604484, + "learning_rate": 1.743643346367027e-05, + "loss": 0.0126, + "step": 2266 + }, + { + "epoch": 0.9079263410728583, + "learning_rate": 1.741771765176815e-05, + "loss": 0.0573, + "step": 2268 + }, + { + "epoch": 0.9087269815852682, + "learning_rate": 1.739894389204122e-05, + "loss": 0.0123, + "step": 2270 + }, + { + "epoch": 0.9095276220976781, + "learning_rate": 1.7380112331151657e-05, + "loss": 0.2645, + "step": 2272 + }, + { + "epoch": 0.910328262610088, + "learning_rate": 1.7361223116213146e-05, + "loss": 0.1383, + "step": 2274 + }, + { + "epoch": 0.911128903122498, + "learning_rate": 1.7342276394789825e-05, + "loss": 0.1959, + "step": 2276 + }, + { + "epoch": 0.911929543634908, + "learning_rate": 1.732327231489503e-05, + "loss": 0.2246, + "step": 2278 + }, + { + "epoch": 0.9127301841473179, + "learning_rate": 1.7304211024990216e-05, + "loss": 0.1226, + "step": 2280 + }, + { + "epoch": 0.9135308246597278, + "learning_rate": 1.728509267398376e-05, + "loss": 0.117, + "step": 2282 + }, + { + "epoch": 0.9143314651721377, + "learning_rate": 1.7265917411229803e-05, + "loss": 0.1093, + "step": 2284 + }, + { + "epoch": 0.9151321056845476, + "learning_rate": 1.7246685386527105e-05, + "loss": 0.1817, + "step": 2286 + }, + { + "epoch": 0.9159327461969575, + "learning_rate": 1.7227396750117802e-05, + "loss": 0.2221, + "step": 2288 + }, + { + "epoch": 0.9167333867093675, + "learning_rate": 1.7208051652686348e-05, + "loss": 0.0549, + "step": 2290 + }, + { + "epoch": 0.9175340272217775, + "learning_rate": 1.718865024535822e-05, + "loss": 0.2853, + "step": 2292 + }, + { + "epoch": 0.9183346677341874, + "learning_rate": 1.716919267969884e-05, + "loss": 0.308, + "step": 2294 + }, + { + "epoch": 0.9191353082465973, + "learning_rate": 1.7149679107712317e-05, + "loss": 0.1333, + "step": 2296 + }, + { + "epoch": 0.9199359487590072, + "learning_rate": 1.7130109681840298e-05, + "loss": 0.2974, + "step": 2298 + }, + { + "epoch": 0.9207365892714171, + "learning_rate": 1.711048455496075e-05, + "loss": 0.022, + "step": 2300 + }, + { + "epoch": 0.921537229783827, + "learning_rate": 1.7090803880386784e-05, + "loss": 0.0724, + "step": 2302 + }, + { + "epoch": 0.922337870296237, + "learning_rate": 1.7071067811865474e-05, + "loss": 0.073, + "step": 2304 + }, + { + "epoch": 0.923138510808647, + "learning_rate": 1.705127650357663e-05, + "loss": 0.1961, + "step": 2306 + }, + { + "epoch": 0.9239391513210569, + "learning_rate": 1.7031430110131566e-05, + "loss": 0.9507, + "step": 2308 + }, + { + "epoch": 0.9247397918334668, + "learning_rate": 1.701152878657197e-05, + "loss": 0.2302, + "step": 2310 + }, + { + "epoch": 0.9255404323458767, + "learning_rate": 1.699157268836863e-05, + "loss": 0.353, + "step": 2312 + }, + { + "epoch": 0.9263410728582866, + "learning_rate": 1.697156197142023e-05, + "loss": 0.0348, + "step": 2314 + }, + { + "epoch": 0.9271417133706965, + "learning_rate": 1.6951496792052148e-05, + "loss": 0.3032, + "step": 2316 + }, + { + "epoch": 0.9279423538831065, + "learning_rate": 1.6931377307015236e-05, + "loss": 0.1601, + "step": 2318 + }, + { + "epoch": 0.9287429943955164, + "learning_rate": 1.6911203673484583e-05, + "loss": 0.2476, + "step": 2320 + }, + { + "epoch": 0.9295436349079264, + "learning_rate": 1.6890976049058267e-05, + "loss": 0.288, + "step": 2322 + }, + { + "epoch": 0.9303442754203363, + "learning_rate": 1.687069459175619e-05, + "loss": 0.1142, + "step": 2324 + }, + { + "epoch": 0.9311449159327462, + "learning_rate": 1.6850359460018744e-05, + "loss": 0.0748, + "step": 2326 + }, + { + "epoch": 0.9319455564451561, + "learning_rate": 1.682997081270568e-05, + "loss": 0.1398, + "step": 2328 + }, + { + "epoch": 0.932746196957566, + "learning_rate": 1.6809528809094805e-05, + "loss": 0.075, + "step": 2330 + }, + { + "epoch": 0.933546837469976, + "learning_rate": 1.6789033608880742e-05, + "loss": 0.3647, + "step": 2332 + }, + { + "epoch": 0.9343474779823859, + "learning_rate": 1.67684853721737e-05, + "loss": 0.0252, + "step": 2334 + }, + { + "epoch": 0.9351481184947958, + "learning_rate": 1.6747884259498185e-05, + "loss": 0.4783, + "step": 2336 + }, + { + "epoch": 0.9359487590072058, + "learning_rate": 1.6727230431791826e-05, + "loss": 0.2695, + "step": 2338 + }, + { + "epoch": 0.9367493995196157, + "learning_rate": 1.6706524050404006e-05, + "loss": 0.2075, + "step": 2340 + }, + { + "epoch": 0.9375500400320256, + "learning_rate": 1.6685765277094702e-05, + "loss": 0.0415, + "step": 2342 + }, + { + "epoch": 0.9383506805444356, + "learning_rate": 1.6664954274033175e-05, + "loss": 0.0542, + "step": 2344 + }, + { + "epoch": 0.9391513210568455, + "learning_rate": 1.66440912037967e-05, + "loss": 0.0237, + "step": 2346 + }, + { + "epoch": 0.9399519615692554, + "learning_rate": 1.662317622936933e-05, + "loss": 0.3304, + "step": 2348 + }, + { + "epoch": 0.9407526020816653, + "learning_rate": 1.6602209514140562e-05, + "loss": 0.5609, + "step": 2350 + }, + { + "epoch": 0.9415532425940752, + "learning_rate": 1.6581191221904098e-05, + "loss": 0.6125, + "step": 2352 + }, + { + "epoch": 0.9423538831064852, + "learning_rate": 1.6560121516856592e-05, + "loss": 0.1366, + "step": 2354 + }, + { + "epoch": 0.9431545236188951, + "learning_rate": 1.6539000563596328e-05, + "loss": 0.003, + "step": 2356 + }, + { + "epoch": 0.9439551641313051, + "learning_rate": 1.651782852712194e-05, + "loss": 0.0442, + "step": 2358 + }, + { + "epoch": 0.944755804643715, + "learning_rate": 1.6496605572831127e-05, + "loss": 0.052, + "step": 2360 + }, + { + "epoch": 0.9455564451561249, + "learning_rate": 1.6475331866519387e-05, + "loss": 0.0048, + "step": 2362 + }, + { + "epoch": 0.9463570856685348, + "learning_rate": 1.6454007574378657e-05, + "loss": 0.1963, + "step": 2364 + }, + { + "epoch": 0.9471577261809447, + "learning_rate": 1.6432632862996062e-05, + "loss": 0.2465, + "step": 2366 + }, + { + "epoch": 0.9479583666933546, + "learning_rate": 1.6411207899352633e-05, + "loss": 0.0489, + "step": 2368 + }, + { + "epoch": 0.9487590072057646, + "learning_rate": 1.6389732850821964e-05, + "loss": 0.0135, + "step": 2370 + }, + { + "epoch": 0.9495596477181746, + "learning_rate": 1.6368207885168904e-05, + "loss": 0.1357, + "step": 2372 + }, + { + "epoch": 0.9503602882305845, + "learning_rate": 1.6346633170548275e-05, + "loss": 0.2982, + "step": 2374 + }, + { + "epoch": 0.9511609287429944, + "learning_rate": 1.6325008875503563e-05, + "loss": 0.0393, + "step": 2376 + }, + { + "epoch": 0.9519615692554043, + "learning_rate": 1.6303335168965495e-05, + "loss": 0.2672, + "step": 2378 + }, + { + "epoch": 0.9527622097678142, + "learning_rate": 1.628161222025089e-05, + "loss": 0.0149, + "step": 2380 + }, + { + "epoch": 0.9535628502802241, + "learning_rate": 1.625984019906122e-05, + "loss": 0.2072, + "step": 2382 + }, + { + "epoch": 0.9543634907926342, + "learning_rate": 1.623801927548132e-05, + "loss": 0.3846, + "step": 2384 + }, + { + "epoch": 0.9551641313050441, + "learning_rate": 1.6216149619978057e-05, + "loss": 0.0548, + "step": 2386 + }, + { + "epoch": 0.955964771817454, + "learning_rate": 1.6194231403398987e-05, + "loss": 0.0021, + "step": 2388 + }, + { + "epoch": 0.9567654123298639, + "learning_rate": 1.6172264796971063e-05, + "loss": 0.5501, + "step": 2390 + }, + { + "epoch": 0.9575660528422738, + "learning_rate": 1.6150249972299173e-05, + "loss": 0.381, + "step": 2392 + }, + { + "epoch": 0.9583666933546837, + "learning_rate": 1.612818710136499e-05, + "loss": 0.2256, + "step": 2394 + }, + { + "epoch": 0.9591673338670936, + "learning_rate": 1.6106076356525484e-05, + "loss": 0.1745, + "step": 2396 + }, + { + "epoch": 0.9599679743795037, + "learning_rate": 1.6083917910511623e-05, + "loss": 0.4644, + "step": 2398 + }, + { + "epoch": 0.9607686148919136, + "learning_rate": 1.6061711936427028e-05, + "loss": 0.001, + "step": 2400 + }, + { + "epoch": 0.9615692554043235, + "learning_rate": 1.60394586077466e-05, + "loss": 0.2483, + "step": 2402 + }, + { + "epoch": 0.9623698959167334, + "learning_rate": 1.6017158098315224e-05, + "loss": 0.0512, + "step": 2404 + }, + { + "epoch": 0.9631705364291433, + "learning_rate": 1.5994810582346266e-05, + "loss": 0.0671, + "step": 2406 + }, + { + "epoch": 0.9639711769415532, + "learning_rate": 1.5972416234420404e-05, + "loss": 0.1952, + "step": 2408 + }, + { + "epoch": 0.9647718174539631, + "learning_rate": 1.594997522948413e-05, + "loss": 0.2874, + "step": 2410 + }, + { + "epoch": 0.9655724579663731, + "learning_rate": 1.592748774284844e-05, + "loss": 0.102, + "step": 2412 + }, + { + "epoch": 0.966373098478783, + "learning_rate": 1.5904953950187448e-05, + "loss": 0.1618, + "step": 2414 + }, + { + "epoch": 0.967173738991193, + "learning_rate": 1.588237402753703e-05, + "loss": 0.3621, + "step": 2416 + }, + { + "epoch": 0.9679743795036029, + "learning_rate": 1.5859748151293354e-05, + "loss": 0.1427, + "step": 2418 + }, + { + "epoch": 0.9687750200160128, + "learning_rate": 1.5837076498211673e-05, + "loss": 0.2411, + "step": 2420 + }, + { + "epoch": 0.9695756605284227, + "learning_rate": 1.581435924540482e-05, + "loss": 0.094, + "step": 2422 + }, + { + "epoch": 0.9703763010408326, + "learning_rate": 1.579159657034185e-05, + "loss": 0.0049, + "step": 2424 + }, + { + "epoch": 0.9711769415532426, + "learning_rate": 1.5768788650846674e-05, + "loss": 0.1452, + "step": 2426 + }, + { + "epoch": 0.9719775820656525, + "learning_rate": 1.574593566509664e-05, + "loss": 0.2861, + "step": 2428 + }, + { + "epoch": 0.9727782225780625, + "learning_rate": 1.5723037791621203e-05, + "loss": 0.0181, + "step": 2430 + }, + { + "epoch": 0.9735788630904724, + "learning_rate": 1.5700095209300386e-05, + "loss": 0.0083, + "step": 2432 + }, + { + "epoch": 0.9743795036028823, + "learning_rate": 1.5677108097363565e-05, + "loss": 0.4448, + "step": 2434 + }, + { + "epoch": 0.9751801441152922, + "learning_rate": 1.5654076635387976e-05, + "loss": 0.1516, + "step": 2436 + }, + { + "epoch": 0.9759807846277022, + "learning_rate": 1.5631001003297302e-05, + "loss": 0.1595, + "step": 2438 + }, + { + "epoch": 0.9767814251401121, + "learning_rate": 1.560788138136029e-05, + "loss": 0.1581, + "step": 2440 + }, + { + "epoch": 0.977582065652522, + "learning_rate": 1.5584717950189373e-05, + "loss": 0.0335, + "step": 2442 + }, + { + "epoch": 0.978382706164932, + "learning_rate": 1.5561510890739137e-05, + "loss": 0.1403, + "step": 2444 + }, + { + "epoch": 0.9791833466773419, + "learning_rate": 1.5538260384305083e-05, + "loss": 0.2236, + "step": 2446 + }, + { + "epoch": 0.9799839871897518, + "learning_rate": 1.5514966612522088e-05, + "loss": 0.1106, + "step": 2448 + }, + { + "epoch": 0.9807846277021617, + "learning_rate": 1.5491629757363033e-05, + "loss": 0.2088, + "step": 2450 + }, + { + "epoch": 0.9815852682145717, + "learning_rate": 1.546825000113736e-05, + "loss": 0.608, + "step": 2452 + }, + { + "epoch": 0.9823859087269816, + "learning_rate": 1.544482752648966e-05, + "loss": 0.06, + "step": 2454 + }, + { + "epoch": 0.9831865492393915, + "learning_rate": 1.5421362516398285e-05, + "loss": 0.5576, + "step": 2456 + }, + { + "epoch": 0.9839871897518014, + "learning_rate": 1.539785515417377e-05, + "loss": 0.1275, + "step": 2458 + }, + { + "epoch": 0.9847878302642114, + "learning_rate": 1.5374305623457605e-05, + "loss": 1.3793, + "step": 2460 + }, + { + "epoch": 0.9855884707766213, + "learning_rate": 1.5350714108220677e-05, + "loss": 0.0663, + "step": 2462 + }, + { + "epoch": 0.9863891112890312, + "learning_rate": 1.532708079276186e-05, + "loss": 0.1428, + "step": 2464 + }, + { + "epoch": 0.9871897518014412, + "learning_rate": 1.5303405861706567e-05, + "loss": 0.9145, + "step": 2466 + }, + { + "epoch": 0.9879903923138511, + "learning_rate": 1.5279689500005353e-05, + "loss": 0.0227, + "step": 2468 + }, + { + "epoch": 0.988791032826261, + "learning_rate": 1.5255931892932344e-05, + "loss": 0.0544, + "step": 2470 + }, + { + "epoch": 0.9895916733386709, + "learning_rate": 1.5232133226083962e-05, + "loss": 0.4662, + "step": 2472 + }, + { + "epoch": 0.9903923138510808, + "learning_rate": 1.5208293685377362e-05, + "loss": 0.064, + "step": 2474 + }, + { + "epoch": 0.9911929543634908, + "learning_rate": 1.5184413457049014e-05, + "loss": 0.0645, + "step": 2476 + }, + { + "epoch": 0.9919935948759008, + "learning_rate": 1.5160492727653238e-05, + "loss": 0.3333, + "step": 2478 + }, + { + "epoch": 0.9927942353883107, + "learning_rate": 1.5136531684060753e-05, + "loss": 0.3651, + "step": 2480 + }, + { + "epoch": 0.9935948759007206, + "learning_rate": 1.5112530513457251e-05, + "loss": 0.1784, + "step": 2482 + }, + { + "epoch": 0.9943955164131305, + "learning_rate": 1.50884894033418e-05, + "loss": 0.0366, + "step": 2484 + }, + { + "epoch": 0.9951961569255404, + "learning_rate": 1.5064408541525578e-05, + "loss": 0.0181, + "step": 2486 + }, + { + "epoch": 0.9959967974379503, + "learning_rate": 1.504028811613027e-05, + "loss": 0.1543, + "step": 2488 + }, + { + "epoch": 0.9967974379503602, + "learning_rate": 1.5016128315586636e-05, + "loss": 0.2302, + "step": 2490 + }, + { + "epoch": 0.9975980784627703, + "learning_rate": 1.4991929328633043e-05, + "loss": 0.1723, + "step": 2492 + }, + { + "epoch": 0.9983987189751802, + "learning_rate": 1.4967691344314012e-05, + "loss": 0.1521, + "step": 2494 + }, + { + "epoch": 0.9991993594875901, + "learning_rate": 1.4943414551978622e-05, + "loss": 0.4803, + "step": 2496 + }, + { + "epoch": 1.0, + "learning_rate": 1.4919099141279214e-05, + "loss": 0.0601, + "step": 2498 + }, + { + "epoch": 1.0, + "step": 2498, + "total_flos": 1.5606287483011072e+16, + "train_loss": 0.21205857841617706, + "train_runtime": 3220.1664, + "train_samples_per_second": 6.206, + "train_steps_per_second": 0.776 + } + ], + "logging_steps": 2, + "max_steps": 2498, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": {}, + "total_flos": 1.5606287483011072e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round0.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round0.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b04a2edf8c97b507d03b9cf03ef8a34da46e1a8 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d947126c628438ba9d02f915ddee955ca5a48797cf023adac0b3268e98b96e +size 639793378 diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round1.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0c83c7f314ea55c2475252648c63d8b67653d54 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ffa64505aa4b1a82142e7c39ca1971c1f08f5d4b4e846e61cb8b56823401e22 +size 639793378 diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round2.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..0113a1d9af7522a739aa74aacd456565774ba0f7 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753ef7369cb87ba16731ae8518682a71ee44ae2fbc264b3d02ed42cc158fb671 +size 639793378 diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round3.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round3.pth new file mode 100644 index 0000000000000000000000000000000000000000..8830b839aacf624ea057f0038a5e76eea13dccbb --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_sft_scenario12_new_10000_random0_125_seed1/server_model_round3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804e679959facc911c5746a9efdbcd738e2d0d047f73f20205748f9664d9ac65 +size 639793378