diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/0_trainer_state.json b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/0_trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f0bee82b498540bef3897a3b0cee4dd10ad88dc5 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/0_trainer_state.json @@ -0,0 +1,15020 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 4996, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0008006405124099279, + "learning_rate": 2.406842319175051e-06, + "loss": 0.1837, + "step": 2 + }, + { + "epoch": 0.0008006405124099279, + "learning_rate": 2.415943612351265e-06, + "loss": 0.137, + "step": 4 + }, + { + "epoch": 0.0016012810248198558, + "learning_rate": 2.4250597173539104e-06, + "loss": 0.1388, + "step": 6 + }, + { + "epoch": 0.0016012810248198558, + "learning_rate": 2.4341906163790364e-06, + "loss": 0.3247, + "step": 8 + }, + { + "epoch": 0.0024019215372297837, + "learning_rate": 2.443336291593801e-06, + "loss": 0.1085, + "step": 10 + }, + { + "epoch": 0.0024019215372297837, + "learning_rate": 2.4524967251364995e-06, + "loss": 0.1875, + "step": 12 + }, + { + "epoch": 0.0032025620496397116, + "learning_rate": 2.461671899116598e-06, + "loss": 0.164, + "step": 14 + }, + { + "epoch": 0.0032025620496397116, + "learning_rate": 2.4708617956148052e-06, + "loss": 0.079, + "step": 16 + }, + { + "epoch": 0.0040032025620496394, + "learning_rate": 2.4800663966830417e-06, + "loss": 0.2091, + "step": 18 + }, + { + "epoch": 0.0040032025620496394, + "learning_rate": 2.4892856843445236e-06, + "loss": 0.1944, + "step": 20 + }, + { + "epoch": 0.004803843074459567, + "learning_rate": 2.4985196405937807e-06, + "loss": 0.1327, + "step": 22 + }, + { + "epoch": 0.004803843074459567, + "learning_rate": 2.507768247396697e-06, + "loss": 0.2811, + "step": 24 + }, + { + "epoch": 0.005604483586869495, + "learning_rate": 2.5170314866905443e-06, + "loss": 0.1624, + "step": 26 + }, + { + "epoch": 0.005604483586869495, + "learning_rate": 2.5263093403840022e-06, + "loss": 0.1611, + "step": 28 + }, + { + "epoch": 0.006405124099279423, + "learning_rate": 2.535601790357246e-06, + "loss": 0.214, + "step": 30 + }, + { + "epoch": 0.006405124099279423, + "learning_rate": 2.5449088184619065e-06, + "loss": 0.0992, + "step": 32 + }, + { + "epoch": 0.007205764611689352, + "learning_rate": 2.5542304065211578e-06, + "loss": 0.0958, + "step": 34 + }, + { + "epoch": 0.007205764611689352, + "learning_rate": 2.5635665363297356e-06, + "loss": 0.2021, + "step": 36 + }, + { + "epoch": 0.008006405124099279, + "learning_rate": 2.5729171896539763e-06, + "loss": 0.3269, + "step": 38 + }, + { + "epoch": 0.008006405124099279, + "learning_rate": 2.5822823482318517e-06, + "loss": 0.0858, + "step": 40 + }, + { + "epoch": 0.008807045636509208, + "learning_rate": 2.5916619937729915e-06, + "loss": 0.1152, + "step": 42 + }, + { + "epoch": 0.008807045636509208, + "learning_rate": 2.6010561079587694e-06, + "loss": 0.078, + "step": 44 + }, + { + "epoch": 0.009607686148919135, + "learning_rate": 2.6104646724422643e-06, + "loss": 0.2577, + "step": 46 + }, + { + "epoch": 0.009607686148919135, + "learning_rate": 2.6198876688483453e-06, + "loss": 0.083, + "step": 48 + }, + { + "epoch": 0.010408326661329063, + "learning_rate": 2.629325078773699e-06, + "loss": 0.6694, + "step": 50 + }, + { + "epoch": 0.010408326661329063, + "learning_rate": 2.6387768837868565e-06, + "loss": 0.1245, + "step": 52 + }, + { + "epoch": 0.01120896717373899, + "learning_rate": 2.648243065428239e-06, + "loss": 0.2879, + "step": 54 + }, + { + "epoch": 0.01120896717373899, + "learning_rate": 2.6577236052101764e-06, + "loss": 0.1754, + "step": 56 + }, + { + "epoch": 0.01200960768614892, + "learning_rate": 2.6672184846169934e-06, + "loss": 0.2798, + "step": 58 + }, + { + "epoch": 0.01200960768614892, + "learning_rate": 2.6767276851049716e-06, + "loss": 0.091, + "step": 60 + }, + { + "epoch": 0.012810248198558846, + "learning_rate": 2.686251188102439e-06, + "loss": 0.1506, + "step": 62 + }, + { + "epoch": 0.012810248198558846, + "learning_rate": 2.6957889750097866e-06, + "loss": 0.2014, + "step": 64 + }, + { + "epoch": 0.013610888710968775, + "learning_rate": 2.7053410271995085e-06, + "loss": 0.1241, + "step": 66 + }, + { + "epoch": 0.013610888710968775, + "learning_rate": 2.7149073260162416e-06, + "loss": 0.219, + "step": 68 + }, + { + "epoch": 0.014411529223378704, + "learning_rate": 2.724487852776785e-06, + "loss": 0.1256, + "step": 70 + }, + { + "epoch": 0.014411529223378704, + "learning_rate": 2.7340825887701848e-06, + "loss": 0.3652, + "step": 72 + }, + { + "epoch": 0.01521216973578863, + "learning_rate": 2.7436915152577038e-06, + "loss": 0.1133, + "step": 74 + }, + { + "epoch": 0.01521216973578863, + "learning_rate": 2.7533146134728993e-06, + "loss": 0.1085, + "step": 76 + }, + { + "epoch": 0.016012810248198558, + "learning_rate": 2.7629518646216522e-06, + "loss": 0.5842, + "step": 78 + }, + { + "epoch": 0.016012810248198558, + "learning_rate": 2.772603249882202e-06, + "loss": 0.6253, + "step": 80 + }, + { + "epoch": 0.016813450760608487, + "learning_rate": 2.782268750405185e-06, + "loss": 0.1312, + "step": 82 + }, + { + "epoch": 0.016813450760608487, + "learning_rate": 2.7919483473136555e-06, + "loss": 0.1036, + "step": 84 + }, + { + "epoch": 0.017614091273018415, + "learning_rate": 2.801642021703177e-06, + "loss": 0.1071, + "step": 86 + }, + { + "epoch": 0.017614091273018415, + "learning_rate": 2.81134975464178e-06, + "loss": 0.209, + "step": 88 + }, + { + "epoch": 0.018414731785428344, + "learning_rate": 2.821071527170053e-06, + "loss": 0.1211, + "step": 90 + }, + { + "epoch": 0.018414731785428344, + "learning_rate": 2.8308073203011634e-06, + "loss": 0.1012, + "step": 92 + }, + { + "epoch": 0.01921537229783827, + "learning_rate": 2.8405571150208945e-06, + "loss": 0.3513, + "step": 94 + }, + { + "epoch": 0.01921537229783827, + "learning_rate": 2.850320892287688e-06, + "loss": 0.2698, + "step": 96 + }, + { + "epoch": 0.020016012810248198, + "learning_rate": 2.860098633032663e-06, + "loss": 0.2184, + "step": 98 + }, + { + "epoch": 0.020016012810248198, + "learning_rate": 2.8698903181597026e-06, + "loss": 0.1107, + "step": 100 + }, + { + "epoch": 0.020816653322658127, + "learning_rate": 2.879695928545424e-06, + "loss": 0.1169, + "step": 102 + }, + { + "epoch": 0.020816653322658127, + "learning_rate": 2.889515445039256e-06, + "loss": 0.1433, + "step": 104 + }, + { + "epoch": 0.021617293835068056, + "learning_rate": 2.899348848463471e-06, + "loss": 0.0968, + "step": 106 + }, + { + "epoch": 0.021617293835068056, + "learning_rate": 2.909196119613218e-06, + "loss": 0.1201, + "step": 108 + }, + { + "epoch": 0.02241793434747798, + "learning_rate": 2.9190572392565643e-06, + "loss": 0.2138, + "step": 110 + }, + { + "epoch": 0.02241793434747798, + "learning_rate": 2.928932188134529e-06, + "loss": 0.1597, + "step": 112 + }, + { + "epoch": 0.02321857485988791, + "learning_rate": 2.9388209469611093e-06, + "loss": 0.0958, + "step": 114 + }, + { + "epoch": 0.02321857485988791, + "learning_rate": 2.9487234964233724e-06, + "loss": 0.1017, + "step": 116 + }, + { + "epoch": 0.02401921537229784, + "learning_rate": 2.9586398171814114e-06, + "loss": 0.1107, + "step": 118 + }, + { + "epoch": 0.02401921537229784, + "learning_rate": 2.9685698898684355e-06, + "loss": 0.04, + "step": 120 + }, + { + "epoch": 0.024819855884707767, + "learning_rate": 2.9785136950907987e-06, + "loss": 0.3738, + "step": 122 + }, + { + "epoch": 0.024819855884707767, + "learning_rate": 2.988471213428035e-06, + "loss": 0.0265, + "step": 124 + }, + { + "epoch": 0.025620496397117692, + "learning_rate": 2.9984424254328936e-06, + "loss": 0.1569, + "step": 126 + }, + { + "epoch": 0.025620496397117692, + "learning_rate": 3.00842731163137e-06, + "loss": 0.5498, + "step": 128 + }, + { + "epoch": 0.02642113690952762, + "learning_rate": 3.0184258525227895e-06, + "loss": 0.6811, + "step": 130 + }, + { + "epoch": 0.02642113690952762, + "learning_rate": 3.0284380285797733e-06, + "loss": 0.0504, + "step": 132 + }, + { + "epoch": 0.02722177742193755, + "learning_rate": 3.038463820248324e-06, + "loss": 0.2216, + "step": 134 + }, + { + "epoch": 0.02722177742193755, + "learning_rate": 3.048503207947854e-06, + "loss": 0.0719, + "step": 136 + }, + { + "epoch": 0.02802241793434748, + "learning_rate": 3.0585561720712207e-06, + "loss": 0.1581, + "step": 138 + }, + { + "epoch": 0.02802241793434748, + "learning_rate": 3.068622692984767e-06, + "loss": 0.0228, + "step": 140 + }, + { + "epoch": 0.028823058446757407, + "learning_rate": 3.0787027510283495e-06, + "loss": 0.1019, + "step": 142 + }, + { + "epoch": 0.028823058446757407, + "learning_rate": 3.0887963265154187e-06, + "loss": 0.209, + "step": 144 + }, + { + "epoch": 0.029623698959167333, + "learning_rate": 3.098903399732992e-06, + "loss": 0.4918, + "step": 146 + }, + { + "epoch": 0.029623698959167333, + "learning_rate": 3.1090239509417364e-06, + "loss": 0.0516, + "step": 148 + }, + { + "epoch": 0.03042433947157726, + "learning_rate": 3.1191579603759946e-06, + "loss": 0.1138, + "step": 150 + }, + { + "epoch": 0.03042433947157726, + "learning_rate": 3.129305408243829e-06, + "loss": 0.0958, + "step": 152 + }, + { + "epoch": 0.03122497998398719, + "learning_rate": 3.139466274727052e-06, + "loss": 0.1987, + "step": 154 + }, + { + "epoch": 0.03122497998398719, + "learning_rate": 3.1496405399812602e-06, + "loss": 0.1433, + "step": 156 + }, + { + "epoch": 0.032025620496397116, + "learning_rate": 3.159828184135917e-06, + "loss": 0.3302, + "step": 158 + }, + { + "epoch": 0.032025620496397116, + "learning_rate": 3.17002918729432e-06, + "loss": 0.04, + "step": 160 + }, + { + "epoch": 0.03282626100880705, + "learning_rate": 3.1802435295336908e-06, + "loss": 0.1231, + "step": 162 + }, + { + "epoch": 0.03282626100880705, + "learning_rate": 3.1904711909051967e-06, + "loss": 0.2238, + "step": 164 + }, + { + "epoch": 0.03362690152121697, + "learning_rate": 3.2007121514339924e-06, + "loss": 0.1847, + "step": 166 + }, + { + "epoch": 0.03362690152121697, + "learning_rate": 3.2109663911192622e-06, + "loss": 0.4912, + "step": 168 + }, + { + "epoch": 0.0344275420336269, + "learning_rate": 3.221233889934239e-06, + "loss": 0.4588, + "step": 170 + }, + { + "epoch": 0.0344275420336269, + "learning_rate": 3.231514627826302e-06, + "loss": 0.2498, + "step": 172 + }, + { + "epoch": 0.03522818254603683, + "learning_rate": 3.2418085847169344e-06, + "loss": 0.0887, + "step": 174 + }, + { + "epoch": 0.03522818254603683, + "learning_rate": 3.2521157405018146e-06, + "loss": 0.0328, + "step": 176 + }, + { + "epoch": 0.036028823058446756, + "learning_rate": 3.2624360750508457e-06, + "loss": 0.2531, + "step": 178 + }, + { + "epoch": 0.036028823058446756, + "learning_rate": 3.2727695682081897e-06, + "loss": 0.4032, + "step": 180 + }, + { + "epoch": 0.03682946357085669, + "learning_rate": 3.28311619979231e-06, + "loss": 0.2236, + "step": 182 + }, + { + "epoch": 0.03682946357085669, + "learning_rate": 3.293475949595998e-06, + "loss": 0.6358, + "step": 184 + }, + { + "epoch": 0.03763010408326661, + "learning_rate": 3.303848797386465e-06, + "loss": 0.1072, + "step": 186 + }, + { + "epoch": 0.03763010408326661, + "learning_rate": 3.314234722905302e-06, + "loss": 0.1338, + "step": 188 + }, + { + "epoch": 0.03843074459567654, + "learning_rate": 3.3246337058685697e-06, + "loss": 0.3992, + "step": 190 + }, + { + "epoch": 0.03843074459567654, + "learning_rate": 3.335045725966829e-06, + "loss": 0.0423, + "step": 192 + }, + { + "epoch": 0.03923138510808647, + "learning_rate": 3.3454707628651806e-06, + "loss": 0.3688, + "step": 194 + }, + { + "epoch": 0.03923138510808647, + "learning_rate": 3.355908796203301e-06, + "loss": 0.0216, + "step": 196 + }, + { + "epoch": 0.040032025620496396, + "learning_rate": 3.3663598055954716e-06, + "loss": 0.1035, + "step": 198 + }, + { + "epoch": 0.040032025620496396, + "learning_rate": 3.3768237706306716e-06, + "loss": 0.4789, + "step": 200 + }, + { + "epoch": 0.04083266613290633, + "learning_rate": 3.3873006708725365e-06, + "loss": 0.1658, + "step": 202 + }, + { + "epoch": 0.04083266613290633, + "learning_rate": 3.3977904858594534e-06, + "loss": 0.4219, + "step": 204 + }, + { + "epoch": 0.041633306645316254, + "learning_rate": 3.408293195104586e-06, + "loss": 0.1471, + "step": 206 + }, + { + "epoch": 0.041633306645316254, + "learning_rate": 3.418808778095917e-06, + "loss": 0.3087, + "step": 208 + }, + { + "epoch": 0.04243394715772618, + "learning_rate": 3.4293372142962845e-06, + "loss": 0.1581, + "step": 210 + }, + { + "epoch": 0.04243394715772618, + "learning_rate": 3.4398784831434097e-06, + "loss": 0.1016, + "step": 212 + }, + { + "epoch": 0.04323458767013611, + "learning_rate": 3.4504325640499936e-06, + "loss": 0.2027, + "step": 214 + }, + { + "epoch": 0.04323458767013611, + "learning_rate": 3.460999436403676e-06, + "loss": 0.105, + "step": 216 + }, + { + "epoch": 0.044035228182546036, + "learning_rate": 3.4715790795671232e-06, + "loss": 0.144, + "step": 218 + }, + { + "epoch": 0.044035228182546036, + "learning_rate": 3.4821714728780654e-06, + "loss": 0.4032, + "step": 220 + }, + { + "epoch": 0.04483586869495596, + "learning_rate": 3.4927765956493276e-06, + "loss": 0.0924, + "step": 222 + }, + { + "epoch": 0.04483586869495596, + "learning_rate": 3.5033944271688624e-06, + "loss": 0.1438, + "step": 224 + }, + { + "epoch": 0.045636509207365894, + "learning_rate": 3.514024946699842e-06, + "loss": 0.2157, + "step": 226 + }, + { + "epoch": 0.045636509207365894, + "learning_rate": 3.5246681334806177e-06, + "loss": 0.2133, + "step": 228 + }, + { + "epoch": 0.04643714971977582, + "learning_rate": 3.535323966724814e-06, + "loss": 0.0215, + "step": 230 + }, + { + "epoch": 0.04643714971977582, + "learning_rate": 3.5459924256213596e-06, + "loss": 0.028, + "step": 232 + }, + { + "epoch": 0.04723779023218575, + "learning_rate": 3.556673489334522e-06, + "loss": 0.1222, + "step": 234 + }, + { + "epoch": 0.04723779023218575, + "learning_rate": 3.567367137003953e-06, + "loss": 0.2998, + "step": 236 + }, + { + "epoch": 0.04803843074459568, + "learning_rate": 3.5780733477447127e-06, + "loss": 0.6442, + "step": 238 + }, + { + "epoch": 0.04803843074459568, + "learning_rate": 3.588792100647368e-06, + "loss": 0.3422, + "step": 240 + }, + { + "epoch": 0.0488390712570056, + "learning_rate": 3.5995233747779467e-06, + "loss": 0.078, + "step": 242 + }, + { + "epoch": 0.0488390712570056, + "learning_rate": 3.6102671491780393e-06, + "loss": 0.2073, + "step": 244 + }, + { + "epoch": 0.049639711769415534, + "learning_rate": 3.6210234028648216e-06, + "loss": 0.0957, + "step": 246 + }, + { + "epoch": 0.049639711769415534, + "learning_rate": 3.6317921148310965e-06, + "loss": 0.0095, + "step": 248 + }, + { + "epoch": 0.05044035228182546, + "learning_rate": 3.6425732640453235e-06, + "loss": 0.1492, + "step": 250 + }, + { + "epoch": 0.05044035228182546, + "learning_rate": 3.653366829451711e-06, + "loss": 0.0762, + "step": 252 + }, + { + "epoch": 0.051240992794235385, + "learning_rate": 3.6641727899701795e-06, + "loss": 0.209, + "step": 254 + }, + { + "epoch": 0.051240992794235385, + "learning_rate": 3.674991124496452e-06, + "loss": 0.1668, + "step": 256 + }, + { + "epoch": 0.05204163330664532, + "learning_rate": 3.6858218119020884e-06, + "loss": 0.078, + "step": 258 + }, + { + "epoch": 0.05204163330664532, + "learning_rate": 3.696664831034521e-06, + "loss": 0.0654, + "step": 260 + }, + { + "epoch": 0.05284227381905524, + "learning_rate": 3.7075201607170997e-06, + "loss": 0.0909, + "step": 262 + }, + { + "epoch": 0.05284227381905524, + "learning_rate": 3.7183877797491143e-06, + "loss": 0.0376, + "step": 264 + }, + { + "epoch": 0.053642914331465175, + "learning_rate": 3.729267666905899e-06, + "loss": 0.3431, + "step": 266 + }, + { + "epoch": 0.053642914331465175, + "learning_rate": 3.740159800938784e-06, + "loss": 0.0255, + "step": 268 + }, + { + "epoch": 0.0544435548438751, + "learning_rate": 3.751064160575195e-06, + "loss": 0.4538, + "step": 270 + }, + { + "epoch": 0.0544435548438751, + "learning_rate": 3.7619807245186824e-06, + "loss": 0.0347, + "step": 272 + }, + { + "epoch": 0.055244195356285025, + "learning_rate": 3.772909471448959e-06, + "loss": 0.6067, + "step": 274 + }, + { + "epoch": 0.055244195356285025, + "learning_rate": 3.783850380021933e-06, + "loss": 0.0212, + "step": 276 + }, + { + "epoch": 0.05604483586869496, + "learning_rate": 3.794803428869799e-06, + "loss": 0.7026, + "step": 278 + }, + { + "epoch": 0.05604483586869496, + "learning_rate": 3.8057685966010025e-06, + "loss": 0.3088, + "step": 280 + }, + { + "epoch": 0.05684547638110488, + "learning_rate": 3.816745861800334e-06, + "loss": 0.158, + "step": 282 + }, + { + "epoch": 0.05684547638110488, + "learning_rate": 3.827735203028956e-06, + "loss": 0.1093, + "step": 284 + }, + { + "epoch": 0.057646116893514815, + "learning_rate": 3.838736598824446e-06, + "loss": 0.3071, + "step": 286 + }, + { + "epoch": 0.057646116893514815, + "learning_rate": 3.849750027700842e-06, + "loss": 0.0633, + "step": 288 + }, + { + "epoch": 0.05844675740592474, + "learning_rate": 3.860775468148662e-06, + "loss": 0.3126, + "step": 290 + }, + { + "epoch": 0.05844675740592474, + "learning_rate": 3.871812898635011e-06, + "loss": 0.0558, + "step": 292 + }, + { + "epoch": 0.059247397918334666, + "learning_rate": 3.882862297603536e-06, + "loss": 0.6192, + "step": 294 + }, + { + "epoch": 0.059247397918334666, + "learning_rate": 3.8939236434745184e-06, + "loss": 0.1351, + "step": 296 + }, + { + "epoch": 0.0600480384307446, + "learning_rate": 3.904996914644913e-06, + "loss": 0.1579, + "step": 298 + }, + { + "epoch": 0.0600480384307446, + "learning_rate": 3.916082089488379e-06, + "loss": 0.0492, + "step": 300 + }, + { + "epoch": 0.06084867894315452, + "learning_rate": 3.927179146355317e-06, + "loss": 0.5427, + "step": 302 + }, + { + "epoch": 0.06084867894315452, + "learning_rate": 3.938288063572962e-06, + "loss": 0.0958, + "step": 304 + }, + { + "epoch": 0.06164931945556445, + "learning_rate": 3.949408819445345e-06, + "loss": 0.2609, + "step": 306 + }, + { + "epoch": 0.06164931945556445, + "learning_rate": 3.960541392253387e-06, + "loss": 0.27, + "step": 308 + }, + { + "epoch": 0.06244995996797438, + "learning_rate": 3.971685760254933e-06, + "loss": 0.4258, + "step": 310 + }, + { + "epoch": 0.06244995996797438, + "learning_rate": 3.982841901684792e-06, + "loss": 0.0762, + "step": 312 + }, + { + "epoch": 0.0632506004803843, + "learning_rate": 3.994009794754777e-06, + "loss": 0.0854, + "step": 314 + }, + { + "epoch": 0.0632506004803843, + "learning_rate": 4.005189417653737e-06, + "loss": 0.2072, + "step": 316 + }, + { + "epoch": 0.06405124099279423, + "learning_rate": 4.016380748547654e-06, + "loss": 0.1933, + "step": 318 + }, + { + "epoch": 0.06405124099279423, + "learning_rate": 4.027583765579601e-06, + "loss": 0.1992, + "step": 320 + }, + { + "epoch": 0.06485188150520416, + "learning_rate": 4.038798446869847e-06, + "loss": 0.2866, + "step": 322 + }, + { + "epoch": 0.06485188150520416, + "learning_rate": 4.050024770515873e-06, + "loss": 0.0506, + "step": 324 + }, + { + "epoch": 0.0656525220176141, + "learning_rate": 4.061262714592426e-06, + "loss": 0.1326, + "step": 326 + }, + { + "epoch": 0.0656525220176141, + "learning_rate": 4.072512257151546e-06, + "loss": 0.0592, + "step": 328 + }, + { + "epoch": 0.06645316253002402, + "learning_rate": 4.0837733762226584e-06, + "loss": 0.1291, + "step": 330 + }, + { + "epoch": 0.06645316253002402, + "learning_rate": 4.095046049812541e-06, + "loss": 0.2255, + "step": 332 + }, + { + "epoch": 0.06725380304243395, + "learning_rate": 4.106330255905417e-06, + "loss": 0.4503, + "step": 334 + }, + { + "epoch": 0.06725380304243395, + "learning_rate": 4.117625972462988e-06, + "loss": 0.1545, + "step": 336 + }, + { + "epoch": 0.06805444355484387, + "learning_rate": 4.128933177424475e-06, + "loss": 0.0849, + "step": 338 + }, + { + "epoch": 0.06805444355484387, + "learning_rate": 4.1402518487066624e-06, + "loss": 0.005, + "step": 340 + }, + { + "epoch": 0.0688550840672538, + "learning_rate": 4.151581964203924e-06, + "loss": 0.1231, + "step": 342 + }, + { + "epoch": 0.0688550840672538, + "learning_rate": 4.1629235017883285e-06, + "loss": 0.0066, + "step": 344 + }, + { + "epoch": 0.06965572457966374, + "learning_rate": 4.174276439309593e-06, + "loss": 0.2019, + "step": 346 + }, + { + "epoch": 0.06965572457966374, + "learning_rate": 4.1856407545951825e-06, + "loss": 0.0196, + "step": 348 + }, + { + "epoch": 0.07045636509207366, + "learning_rate": 4.197016425450347e-06, + "loss": 0.1949, + "step": 350 + }, + { + "epoch": 0.07045636509207366, + "learning_rate": 4.208403429658151e-06, + "loss": 0.3205, + "step": 352 + }, + { + "epoch": 0.07125700560448359, + "learning_rate": 4.219801744979517e-06, + "loss": 0.098, + "step": 354 + }, + { + "epoch": 0.07125700560448359, + "learning_rate": 4.2312113491533145e-06, + "loss": 0.0923, + "step": 356 + }, + { + "epoch": 0.07205764611689351, + "learning_rate": 4.242632219896328e-06, + "loss": 0.1154, + "step": 358 + }, + { + "epoch": 0.07205764611689351, + "learning_rate": 4.254064334903347e-06, + "loss": 0.0383, + "step": 360 + }, + { + "epoch": 0.07285828662930344, + "learning_rate": 4.2655076718472045e-06, + "loss": 0.2207, + "step": 362 + }, + { + "epoch": 0.07285828662930344, + "learning_rate": 4.276962208378814e-06, + "loss": 0.0514, + "step": 364 + }, + { + "epoch": 0.07365892714171338, + "learning_rate": 4.28842792212722e-06, + "loss": 0.5628, + "step": 366 + }, + { + "epoch": 0.07365892714171338, + "learning_rate": 4.299904790699619e-06, + "loss": 0.3137, + "step": 368 + }, + { + "epoch": 0.0744595676541233, + "learning_rate": 4.3113927916814665e-06, + "loss": 0.3749, + "step": 370 + }, + { + "epoch": 0.0744595676541233, + "learning_rate": 4.3228919026364345e-06, + "loss": 0.2242, + "step": 372 + }, + { + "epoch": 0.07526020816653323, + "learning_rate": 4.33440210110651e-06, + "loss": 0.3419, + "step": 374 + }, + { + "epoch": 0.07526020816653323, + "learning_rate": 4.345923364612024e-06, + "loss": 0.0688, + "step": 376 + }, + { + "epoch": 0.07606084867894315, + "learning_rate": 4.3574556706517035e-06, + "loss": 0.0983, + "step": 378 + }, + { + "epoch": 0.07606084867894315, + "learning_rate": 4.368998996702686e-06, + "loss": 0.0157, + "step": 380 + }, + { + "epoch": 0.07686148919135308, + "learning_rate": 4.380553320220638e-06, + "loss": 0.0847, + "step": 382 + }, + { + "epoch": 0.07686148919135308, + "learning_rate": 4.392118618639698e-06, + "loss": 0.4016, + "step": 384 + }, + { + "epoch": 0.07766212970376302, + "learning_rate": 4.403694869372589e-06, + "loss": 0.138, + "step": 386 + }, + { + "epoch": 0.07766212970376302, + "learning_rate": 4.415282049810643e-06, + "loss": 0.0347, + "step": 388 + }, + { + "epoch": 0.07846277021617294, + "learning_rate": 4.4268801373238454e-06, + "loss": 0.0908, + "step": 390 + }, + { + "epoch": 0.07846277021617294, + "learning_rate": 4.4384891092608795e-06, + "loss": 0.0011, + "step": 392 + }, + { + "epoch": 0.07926341072858287, + "learning_rate": 4.450108942949158e-06, + "loss": 0.5788, + "step": 394 + }, + { + "epoch": 0.07926341072858287, + "learning_rate": 4.461739615694921e-06, + "loss": 0.667, + "step": 396 + }, + { + "epoch": 0.08006405124099279, + "learning_rate": 4.473381104783201e-06, + "loss": 0.167, + "step": 398 + }, + { + "epoch": 0.08006405124099279, + "learning_rate": 4.485033387477915e-06, + "loss": 0.1626, + "step": 400 + }, + { + "epoch": 0.08086469175340272, + "learning_rate": 4.496696441021904e-06, + "loss": 1.0366, + "step": 402 + }, + { + "epoch": 0.08086469175340272, + "learning_rate": 4.5083702426369715e-06, + "loss": 0.3269, + "step": 404 + }, + { + "epoch": 0.08166533226581266, + "learning_rate": 4.520054769523929e-06, + "loss": 0.3985, + "step": 406 + }, + { + "epoch": 0.08166533226581266, + "learning_rate": 4.531749998862628e-06, + "loss": 0.1938, + "step": 408 + }, + { + "epoch": 0.08246597277822258, + "learning_rate": 4.543455907812063e-06, + "loss": 0.3644, + "step": 410 + }, + { + "epoch": 0.08246597277822258, + "learning_rate": 4.555172473510324e-06, + "loss": 0.0716, + "step": 412 + }, + { + "epoch": 0.08326661329063251, + "learning_rate": 4.566899673074706e-06, + "loss": 0.1137, + "step": 414 + }, + { + "epoch": 0.08326661329063251, + "learning_rate": 4.578637483601732e-06, + "loss": 0.4658, + "step": 416 + }, + { + "epoch": 0.08406725380304243, + "learning_rate": 4.590385882167206e-06, + "loss": 0.3881, + "step": 418 + }, + { + "epoch": 0.08406725380304243, + "learning_rate": 4.602144845826234e-06, + "loss": 0.5698, + "step": 420 + }, + { + "epoch": 0.08486789431545236, + "learning_rate": 4.613914351613337e-06, + "loss": 0.1149, + "step": 422 + }, + { + "epoch": 0.08486789431545236, + "learning_rate": 4.625694376542399e-06, + "loss": 0.149, + "step": 424 + }, + { + "epoch": 0.08566853482786228, + "learning_rate": 4.637484897606777e-06, + "loss": 0.4396, + "step": 426 + }, + { + "epoch": 0.08566853482786228, + "learning_rate": 4.649285891779326e-06, + "loss": 0.5493, + "step": 428 + }, + { + "epoch": 0.08646917534027222, + "learning_rate": 4.661097336012451e-06, + "loss": 0.4238, + "step": 430 + }, + { + "epoch": 0.08646917534027222, + "learning_rate": 4.672919207238145e-06, + "loss": 0.4509, + "step": 432 + }, + { + "epoch": 0.08726981585268215, + "learning_rate": 4.684751482368022e-06, + "loss": 0.2188, + "step": 434 + }, + { + "epoch": 0.08726981585268215, + "learning_rate": 4.696594138293421e-06, + "loss": 0.1277, + "step": 436 + }, + { + "epoch": 0.08807045636509207, + "learning_rate": 4.7084471518853656e-06, + "loss": 0.4742, + "step": 438 + }, + { + "epoch": 0.08807045636509207, + "learning_rate": 4.720310499994664e-06, + "loss": 0.3137, + "step": 440 + }, + { + "epoch": 0.088871096877502, + "learning_rate": 4.732184159451937e-06, + "loss": 0.3476, + "step": 442 + }, + { + "epoch": 0.088871096877502, + "learning_rate": 4.744068107067673e-06, + "loss": 0.141, + "step": 444 + }, + { + "epoch": 0.08967173738991192, + "learning_rate": 4.755962319632249e-06, + "loss": 0.3123, + "step": 446 + }, + { + "epoch": 0.08967173738991192, + "learning_rate": 4.767866773916041e-06, + "loss": 0.0884, + "step": 448 + }, + { + "epoch": 0.09047237790232186, + "learning_rate": 4.779781446669376e-06, + "loss": 0.2609, + "step": 450 + }, + { + "epoch": 0.09047237790232186, + "learning_rate": 4.79170631462264e-06, + "loss": 0.2777, + "step": 452 + }, + { + "epoch": 0.09127301841473179, + "learning_rate": 4.8036413544863095e-06, + "loss": 0.3376, + "step": 454 + }, + { + "epoch": 0.09127301841473179, + "learning_rate": 4.81558654295099e-06, + "loss": 0.3037, + "step": 456 + }, + { + "epoch": 0.09207365892714171, + "learning_rate": 4.827541856687471e-06, + "loss": 0.2645, + "step": 458 + }, + { + "epoch": 0.09207365892714171, + "learning_rate": 4.839507272346751e-06, + "loss": 0.2243, + "step": 460 + }, + { + "epoch": 0.09287429943955164, + "learning_rate": 4.8514827665601425e-06, + "loss": 0.1133, + "step": 462 + }, + { + "epoch": 0.09287429943955164, + "learning_rate": 4.863468315939234e-06, + "loss": 0.0677, + "step": 464 + }, + { + "epoch": 0.09367493995196156, + "learning_rate": 4.875463897075985e-06, + "loss": 0.1072, + "step": 466 + }, + { + "epoch": 0.09367493995196156, + "learning_rate": 4.8874694865427676e-06, + "loss": 0.1586, + "step": 468 + }, + { + "epoch": 0.0944755804643715, + "learning_rate": 4.899485060892404e-06, + "loss": 0.2633, + "step": 470 + }, + { + "epoch": 0.0944755804643715, + "learning_rate": 4.911510596658202e-06, + "loss": 0.0976, + "step": 472 + }, + { + "epoch": 0.09527622097678143, + "learning_rate": 4.9235460703540615e-06, + "loss": 0.1016, + "step": 474 + }, + { + "epoch": 0.09527622097678143, + "learning_rate": 4.935591458474425e-06, + "loss": 0.7014, + "step": 476 + }, + { + "epoch": 0.09607686148919135, + "learning_rate": 4.947646737494389e-06, + "loss": 0.5678, + "step": 478 + }, + { + "epoch": 0.09607686148919135, + "learning_rate": 4.959711883869734e-06, + "loss": 0.267, + "step": 480 + }, + { + "epoch": 0.09687750200160128, + "learning_rate": 4.9717868740369645e-06, + "loss": 0.2347, + "step": 482 + }, + { + "epoch": 0.09687750200160128, + "learning_rate": 4.9838716844133665e-06, + "loss": 0.1177, + "step": 484 + }, + { + "epoch": 0.0976781425140112, + "learning_rate": 4.9959662913970254e-06, + "loss": 0.1108, + "step": 486 + }, + { + "epoch": 0.0976781425140112, + "learning_rate": 5.0080706713669435e-06, + "loss": 0.0902, + "step": 488 + }, + { + "epoch": 0.09847878302642114, + "learning_rate": 5.02018480068299e-06, + "loss": 0.1663, + "step": 490 + }, + { + "epoch": 0.09847878302642114, + "learning_rate": 5.032308655686007e-06, + "loss": 0.105, + "step": 492 + }, + { + "epoch": 0.09927942353883107, + "learning_rate": 5.044442212697842e-06, + "loss": 0.2408, + "step": 494 + }, + { + "epoch": 0.09927942353883107, + "learning_rate": 5.056585448021398e-06, + "loss": 0.0514, + "step": 496 + }, + { + "epoch": 0.100080064051241, + "learning_rate": 5.068738337940655e-06, + "loss": 0.2519, + "step": 498 + }, + { + "epoch": 0.100080064051241, + "learning_rate": 5.080900858720789e-06, + "loss": 0.158, + "step": 500 + }, + { + "epoch": 0.10088070456365092, + "learning_rate": 5.093072986608116e-06, + "loss": 0.3834, + "step": 502 + }, + { + "epoch": 0.10088070456365092, + "learning_rate": 5.105254697830208e-06, + "loss": 0.0758, + "step": 504 + }, + { + "epoch": 0.10168134507606084, + "learning_rate": 5.1174459685959175e-06, + "loss": 0.5506, + "step": 506 + }, + { + "epoch": 0.10168134507606084, + "learning_rate": 5.129646775095432e-06, + "loss": 0.2481, + "step": 508 + }, + { + "epoch": 0.10248198558847077, + "learning_rate": 5.141857093500307e-06, + "loss": 0.4852, + "step": 510 + }, + { + "epoch": 0.10248198558847077, + "learning_rate": 5.154076899963514e-06, + "loss": 0.1585, + "step": 512 + }, + { + "epoch": 0.10328262610088071, + "learning_rate": 5.166306170619537e-06, + "loss": 0.1898, + "step": 514 + }, + { + "epoch": 0.10328262610088071, + "learning_rate": 5.178544881584328e-06, + "loss": 0.1211, + "step": 516 + }, + { + "epoch": 0.10408326661329063, + "learning_rate": 5.190793008955421e-06, + "loss": 0.2775, + "step": 518 + }, + { + "epoch": 0.10408326661329063, + "learning_rate": 5.203050528811959e-06, + "loss": 0.0987, + "step": 520 + }, + { + "epoch": 0.10488390712570056, + "learning_rate": 5.215317417214739e-06, + "loss": 0.0789, + "step": 522 + }, + { + "epoch": 0.10488390712570056, + "learning_rate": 5.227593650206246e-06, + "loss": 0.0017, + "step": 524 + }, + { + "epoch": 0.10568454763811048, + "learning_rate": 5.239879203810763e-06, + "loss": 0.0573, + "step": 526 + }, + { + "epoch": 0.10568454763811048, + "learning_rate": 5.2521740540343205e-06, + "loss": 0.0253, + "step": 528 + }, + { + "epoch": 0.10648518815052041, + "learning_rate": 5.264478176864811e-06, + "loss": 0.2421, + "step": 530 + }, + { + "epoch": 0.10648518815052041, + "learning_rate": 5.2767915482720164e-06, + "loss": 0.117, + "step": 532 + }, + { + "epoch": 0.10728582866293035, + "learning_rate": 5.289114144207656e-06, + "loss": 0.1657, + "step": 534 + }, + { + "epoch": 0.10728582866293035, + "learning_rate": 5.3014459406054295e-06, + "loss": 0.3285, + "step": 536 + }, + { + "epoch": 0.10808646917534027, + "learning_rate": 5.313786913381061e-06, + "loss": 0.1668, + "step": 538 + }, + { + "epoch": 0.10808646917534027, + "learning_rate": 5.3261370384323904e-06, + "loss": 0.2319, + "step": 540 + }, + { + "epoch": 0.1088871096877502, + "learning_rate": 5.338496291639341e-06, + "loss": 0.956, + "step": 542 + }, + { + "epoch": 0.1088871096877502, + "learning_rate": 5.350864648864026e-06, + "loss": 0.2241, + "step": 544 + }, + { + "epoch": 0.10968775020016013, + "learning_rate": 5.363242085950773e-06, + "loss": 0.1962, + "step": 546 + }, + { + "epoch": 0.10968775020016013, + "learning_rate": 5.375628578726181e-06, + "loss": 0.5747, + "step": 548 + }, + { + "epoch": 0.11048839071257005, + "learning_rate": 5.3880241029991434e-06, + "loss": 0.232, + "step": 550 + }, + { + "epoch": 0.11048839071257005, + "learning_rate": 5.4004286345609665e-06, + "loss": 0.4421, + "step": 552 + }, + { + "epoch": 0.11128903122497999, + "learning_rate": 5.412842149185316e-06, + "loss": 0.1223, + "step": 554 + }, + { + "epoch": 0.11128903122497999, + "learning_rate": 5.425264622628326e-06, + "loss": 0.1072, + "step": 556 + }, + { + "epoch": 0.11208967173738991, + "learning_rate": 5.437696030628639e-06, + "loss": 0.1282, + "step": 558 + }, + { + "epoch": 0.11208967173738991, + "learning_rate": 5.450136348907444e-06, + "loss": 0.046, + "step": 560 + }, + { + "epoch": 0.11289031224979984, + "learning_rate": 5.462585553168532e-06, + "loss": 0.1456, + "step": 562 + }, + { + "epoch": 0.11289031224979984, + "learning_rate": 5.475043619098321e-06, + "loss": 0.2147, + "step": 564 + }, + { + "epoch": 0.11369095276220977, + "learning_rate": 5.487510522365969e-06, + "loss": 0.2151, + "step": 566 + }, + { + "epoch": 0.11369095276220977, + "learning_rate": 5.499986238623329e-06, + "loss": 0.2098, + "step": 568 + }, + { + "epoch": 0.11449159327461969, + "learning_rate": 5.512470743505057e-06, + "loss": 0.1212, + "step": 570 + }, + { + "epoch": 0.11449159327461969, + "learning_rate": 5.524964012628644e-06, + "loss": 0.1138, + "step": 572 + }, + { + "epoch": 0.11529223378702963, + "learning_rate": 5.537466021594464e-06, + "loss": 0.3512, + "step": 574 + }, + { + "epoch": 0.11529223378702963, + "learning_rate": 5.549976745985809e-06, + "loss": 0.0874, + "step": 576 + }, + { + "epoch": 0.11609287429943956, + "learning_rate": 5.5624961613689934e-06, + "loss": 0.2841, + "step": 578 + }, + { + "epoch": 0.11609287429943956, + "learning_rate": 5.57502424329331e-06, + "loss": 0.0634, + "step": 580 + }, + { + "epoch": 0.11689351481184948, + "learning_rate": 5.5875609672911465e-06, + "loss": 0.1715, + "step": 582 + }, + { + "epoch": 0.11689351481184948, + "learning_rate": 5.6001063088780085e-06, + "loss": 0.684, + "step": 584 + }, + { + "epoch": 0.1176941553242594, + "learning_rate": 5.6126602435525725e-06, + "loss": 0.2229, + "step": 586 + }, + { + "epoch": 0.1176941553242594, + "learning_rate": 5.62522274679673e-06, + "loss": 0.7892, + "step": 588 + }, + { + "epoch": 0.11849479583666933, + "learning_rate": 5.637793794075625e-06, + "loss": 0.1324, + "step": 590 + }, + { + "epoch": 0.11849479583666933, + "learning_rate": 5.650373360837763e-06, + "loss": 0.2225, + "step": 592 + }, + { + "epoch": 0.11929543634907927, + "learning_rate": 5.662961422514961e-06, + "loss": 0.6194, + "step": 594 + }, + { + "epoch": 0.11929543634907927, + "learning_rate": 5.675557954522462e-06, + "loss": 0.0789, + "step": 596 + }, + { + "epoch": 0.1200960768614892, + "learning_rate": 5.688162932258965e-06, + "loss": 0.1673, + "step": 598 + }, + { + "epoch": 0.1200960768614892, + "learning_rate": 5.700776331106674e-06, + "loss": 0.3653, + "step": 600 + }, + { + "epoch": 0.12089671737389912, + "learning_rate": 5.713398126431353e-06, + "loss": 0.3828, + "step": 602 + }, + { + "epoch": 0.12089671737389912, + "learning_rate": 5.726028293582342e-06, + "loss": 0.6325, + "step": 604 + }, + { + "epoch": 0.12169735788630905, + "learning_rate": 5.738666807892684e-06, + "loss": 0.2609, + "step": 606 + }, + { + "epoch": 0.12169735788630905, + "learning_rate": 5.751313644679071e-06, + "loss": 0.0512, + "step": 608 + }, + { + "epoch": 0.12249799839871897, + "learning_rate": 5.763968779241957e-06, + "loss": 0.1663, + "step": 610 + }, + { + "epoch": 0.12249799839871897, + "learning_rate": 5.776632186865589e-06, + "loss": 0.4288, + "step": 612 + }, + { + "epoch": 0.1232986389111289, + "learning_rate": 5.7893038428180584e-06, + "loss": 0.3353, + "step": 614 + }, + { + "epoch": 0.1232986389111289, + "learning_rate": 5.8019837223513295e-06, + "loss": 0.1313, + "step": 616 + }, + { + "epoch": 0.12409927942353884, + "learning_rate": 5.814671800701357e-06, + "loss": 0.3143, + "step": 618 + }, + { + "epoch": 0.12409927942353884, + "learning_rate": 5.827368053088032e-06, + "loss": 0.1862, + "step": 620 + }, + { + "epoch": 0.12489991993594876, + "learning_rate": 5.840072454715297e-06, + "loss": 0.5415, + "step": 622 + }, + { + "epoch": 0.12489991993594876, + "learning_rate": 5.852784980771182e-06, + "loss": 0.7485, + "step": 624 + }, + { + "epoch": 0.1257005604483587, + "learning_rate": 5.865505606427848e-06, + "loss": 0.3478, + "step": 626 + }, + { + "epoch": 0.1257005604483587, + "learning_rate": 5.878234306841637e-06, + "loss": 0.1675, + "step": 628 + }, + { + "epoch": 0.1265012009607686, + "learning_rate": 5.890971057153105e-06, + "loss": 0.2724, + "step": 630 + }, + { + "epoch": 0.1265012009607686, + "learning_rate": 5.903715832487138e-06, + "loss": 0.0508, + "step": 632 + }, + { + "epoch": 0.12730184147317855, + "learning_rate": 5.916468607952892e-06, + "loss": 0.3178, + "step": 634 + }, + { + "epoch": 0.12730184147317855, + "learning_rate": 5.929229358643925e-06, + "loss": 0.1944, + "step": 636 + }, + { + "epoch": 0.12810248198558846, + "learning_rate": 5.941998059638212e-06, + "loss": 0.1959, + "step": 638 + }, + { + "epoch": 0.12810248198558846, + "learning_rate": 5.954774685998206e-06, + "loss": 0.6868, + "step": 640 + }, + { + "epoch": 0.1289031224979984, + "learning_rate": 5.9675592127708585e-06, + "loss": 0.1083, + "step": 642 + }, + { + "epoch": 0.1289031224979984, + "learning_rate": 5.9803516149877475e-06, + "loss": 0.2595, + "step": 644 + }, + { + "epoch": 0.1297037630104083, + "learning_rate": 5.993151867665015e-06, + "loss": 0.5629, + "step": 646 + }, + { + "epoch": 0.1297037630104083, + "learning_rate": 6.005959945803494e-06, + "loss": 0.1435, + "step": 648 + }, + { + "epoch": 0.13050440352281825, + "learning_rate": 6.01877582438873e-06, + "loss": 0.4032, + "step": 650 + }, + { + "epoch": 0.13050440352281825, + "learning_rate": 6.03159947839103e-06, + "loss": 0.0958, + "step": 652 + }, + { + "epoch": 0.1313050440352282, + "learning_rate": 6.0444308827655265e-06, + "loss": 2.0009, + "step": 654 + }, + { + "epoch": 0.1313050440352282, + "learning_rate": 6.057270012452186e-06, + "loss": 0.1283, + "step": 656 + }, + { + "epoch": 0.1321056845476381, + "learning_rate": 6.070116842375947e-06, + "loss": 0.4762, + "step": 658 + }, + { + "epoch": 0.1321056845476381, + "learning_rate": 6.082971347446654e-06, + "loss": 0.1662, + "step": 660 + }, + { + "epoch": 0.13290632506004804, + "learning_rate": 6.095833502559182e-06, + "loss": 0.2811, + "step": 662 + }, + { + "epoch": 0.13290632506004804, + "learning_rate": 6.108703282593461e-06, + "loss": 0.142, + "step": 664 + }, + { + "epoch": 0.13370696557245795, + "learning_rate": 6.121580662414533e-06, + "loss": 0.2816, + "step": 666 + }, + { + "epoch": 0.13370696557245795, + "learning_rate": 6.13446561687258e-06, + "loss": 0.1697, + "step": 668 + }, + { + "epoch": 0.1345076060848679, + "learning_rate": 6.147358120803041e-06, + "loss": 0.2465, + "step": 670 + }, + { + "epoch": 0.1345076060848679, + "learning_rate": 6.160258149026557e-06, + "loss": 0.2944, + "step": 672 + }, + { + "epoch": 0.13530824659727783, + "learning_rate": 6.173165676349095e-06, + "loss": 0.1375, + "step": 674 + }, + { + "epoch": 0.13530824659727783, + "learning_rate": 6.186080677561974e-06, + "loss": 0.1281, + "step": 676 + }, + { + "epoch": 0.13610888710968774, + "learning_rate": 6.1990031274419186e-06, + "loss": 0.3145, + "step": 678 + }, + { + "epoch": 0.13610888710968774, + "learning_rate": 6.2119330007511014e-06, + "loss": 0.5827, + "step": 680 + }, + { + "epoch": 0.13690952762209768, + "learning_rate": 6.224870272237185e-06, + "loss": 0.1274, + "step": 682 + }, + { + "epoch": 0.13690952762209768, + "learning_rate": 6.237814916633431e-06, + "loss": 0.1176, + "step": 684 + }, + { + "epoch": 0.1377101681345076, + "learning_rate": 6.250766908658652e-06, + "loss": 0.232, + "step": 686 + }, + { + "epoch": 0.1377101681345076, + "learning_rate": 6.263726223017326e-06, + "loss": 0.1949, + "step": 688 + }, + { + "epoch": 0.13851080864691753, + "learning_rate": 6.2766928343996314e-06, + "loss": 0.4071, + "step": 690 + }, + { + "epoch": 0.13851080864691753, + "learning_rate": 6.289666717481496e-06, + "loss": 0.1494, + "step": 692 + }, + { + "epoch": 0.13931144915932747, + "learning_rate": 6.3026478469246285e-06, + "loss": 0.237, + "step": 694 + }, + { + "epoch": 0.13931144915932747, + "learning_rate": 6.315636197376634e-06, + "loss": 0.2303, + "step": 696 + }, + { + "epoch": 0.14011208967173738, + "learning_rate": 6.328631743470968e-06, + "loss": 0.3638, + "step": 698 + }, + { + "epoch": 0.14011208967173738, + "learning_rate": 6.341634459827044e-06, + "loss": 0.2009, + "step": 700 + }, + { + "epoch": 0.14091273018414732, + "learning_rate": 6.354644321050279e-06, + "loss": 0.1447, + "step": 702 + }, + { + "epoch": 0.14091273018414732, + "learning_rate": 6.3676613017321305e-06, + "loss": 0.2465, + "step": 704 + }, + { + "epoch": 0.14171337069655723, + "learning_rate": 6.380685376450153e-06, + "loss": 0.1154, + "step": 706 + }, + { + "epoch": 0.14171337069655723, + "learning_rate": 6.393716519768032e-06, + "loss": 0.2026, + "step": 708 + }, + { + "epoch": 0.14251401120896717, + "learning_rate": 6.406754706235692e-06, + "loss": 0.2759, + "step": 710 + }, + { + "epoch": 0.14251401120896717, + "learning_rate": 6.419799910389257e-06, + "loss": 0.2683, + "step": 712 + }, + { + "epoch": 0.1433146517213771, + "learning_rate": 6.432852106751162e-06, + "loss": 0.0986, + "step": 714 + }, + { + "epoch": 0.1433146517213771, + "learning_rate": 6.445911269830183e-06, + "loss": 0.3629, + "step": 716 + }, + { + "epoch": 0.14411529223378702, + "learning_rate": 6.458977374121492e-06, + "loss": 0.4507, + "step": 718 + }, + { + "epoch": 0.14411529223378702, + "learning_rate": 6.472050394106689e-06, + "loss": 0.1346, + "step": 720 + }, + { + "epoch": 0.14491593274619696, + "learning_rate": 6.485130304253915e-06, + "loss": 0.1281, + "step": 722 + }, + { + "epoch": 0.14491593274619696, + "learning_rate": 6.498217079017806e-06, + "loss": 0.3212, + "step": 724 + }, + { + "epoch": 0.14571657325860687, + "learning_rate": 6.511310692839605e-06, + "loss": 0.4745, + "step": 726 + }, + { + "epoch": 0.14571657325860687, + "learning_rate": 6.524411120147204e-06, + "loss": 0.232, + "step": 728 + }, + { + "epoch": 0.1465172137710168, + "learning_rate": 6.537518335355182e-06, + "loss": 0.3634, + "step": 730 + }, + { + "epoch": 0.1465172137710168, + "learning_rate": 6.5506323128648654e-06, + "loss": 0.1944, + "step": 732 + }, + { + "epoch": 0.14731785428342675, + "learning_rate": 6.563753027064355e-06, + "loss": 0.2973, + "step": 734 + }, + { + "epoch": 0.14731785428342675, + "learning_rate": 6.576880452328645e-06, + "loss": 0.2412, + "step": 736 + }, + { + "epoch": 0.14811849479583666, + "learning_rate": 6.590014563019571e-06, + "loss": 0.3632, + "step": 738 + }, + { + "epoch": 0.14811849479583666, + "learning_rate": 6.603155333485934e-06, + "loss": 0.4961, + "step": 740 + }, + { + "epoch": 0.1489191353082466, + "learning_rate": 6.61630273806352e-06, + "loss": 0.2074, + "step": 742 + }, + { + "epoch": 0.1489191353082466, + "learning_rate": 6.6294567510751675e-06, + "loss": 0.2529, + "step": 744 + }, + { + "epoch": 0.14971977582065651, + "learning_rate": 6.642617346830784e-06, + "loss": 0.3805, + "step": 746 + }, + { + "epoch": 0.14971977582065651, + "learning_rate": 6.655784499627476e-06, + "loss": 0.1138, + "step": 748 + }, + { + "epoch": 0.15052041633306645, + "learning_rate": 6.6689581837494925e-06, + "loss": 0.5477, + "step": 750 + }, + { + "epoch": 0.15052041633306645, + "learning_rate": 6.682138373468341e-06, + "loss": 0.5494, + "step": 752 + }, + { + "epoch": 0.1513210568454764, + "learning_rate": 6.695325043042827e-06, + "loss": 0.1011, + "step": 754 + }, + { + "epoch": 0.1513210568454764, + "learning_rate": 6.7085181667191e-06, + "loss": 0.1012, + "step": 756 + }, + { + "epoch": 0.1521216973578863, + "learning_rate": 6.7217177187307e-06, + "loss": 0.1505, + "step": 758 + }, + { + "epoch": 0.1521216973578863, + "learning_rate": 6.734923673298605e-06, + "loss": 0.2461, + "step": 760 + }, + { + "epoch": 0.15292233787029624, + "learning_rate": 6.748136004631327e-06, + "loss": 0.0736, + "step": 762 + }, + { + "epoch": 0.15292233787029624, + "learning_rate": 6.761354686924883e-06, + "loss": 0.0537, + "step": 764 + }, + { + "epoch": 0.15372297838270615, + "learning_rate": 6.774579694362902e-06, + "loss": 0.3171, + "step": 766 + }, + { + "epoch": 0.15372297838270615, + "learning_rate": 6.787811001116654e-06, + "loss": 0.2944, + "step": 768 + }, + { + "epoch": 0.1545236188951161, + "learning_rate": 6.801048581345113e-06, + "loss": 0.4731, + "step": 770 + }, + { + "epoch": 0.1545236188951161, + "learning_rate": 6.8142924091949955e-06, + "loss": 0.215, + "step": 772 + }, + { + "epoch": 0.15532425940752603, + "learning_rate": 6.827542458800804e-06, + "loss": 0.1031, + "step": 774 + }, + { + "epoch": 0.15532425940752603, + "learning_rate": 6.840798704284939e-06, + "loss": 0.0111, + "step": 776 + }, + { + "epoch": 0.15612489991993594, + "learning_rate": 6.854061119757647e-06, + "loss": 0.129, + "step": 778 + }, + { + "epoch": 0.15612489991993594, + "learning_rate": 6.867329679317144e-06, + "loss": 0.3653, + "step": 780 + }, + { + "epoch": 0.15692554043234588, + "learning_rate": 6.880604357049646e-06, + "loss": 0.6752, + "step": 782 + }, + { + "epoch": 0.15692554043234588, + "learning_rate": 6.893885127029419e-06, + "loss": 0.0583, + "step": 784 + }, + { + "epoch": 0.1577261809447558, + "learning_rate": 6.907171963318815e-06, + "loss": 0.2816, + "step": 786 + }, + { + "epoch": 0.1577261809447558, + "learning_rate": 6.920464839968391e-06, + "loss": 0.2771, + "step": 788 + }, + { + "epoch": 0.15852682145716573, + "learning_rate": 6.9337637310168494e-06, + "loss": 0.263, + "step": 790 + }, + { + "epoch": 0.15852682145716573, + "learning_rate": 6.94706861049117e-06, + "loss": 0.052, + "step": 792 + }, + { + "epoch": 0.15932746196957567, + "learning_rate": 6.960379452406636e-06, + "loss": 0.1439, + "step": 794 + }, + { + "epoch": 0.15932746196957567, + "learning_rate": 6.973696230766884e-06, + "loss": 0.0985, + "step": 796 + }, + { + "epoch": 0.16012810248198558, + "learning_rate": 6.9870189195639595e-06, + "loss": 0.6513, + "step": 798 + }, + { + "epoch": 0.16012810248198558, + "learning_rate": 7.000347492778341e-06, + "loss": 0.2266, + "step": 800 + }, + { + "epoch": 0.16092874299439552, + "learning_rate": 7.013681924379073e-06, + "loss": 0.1196, + "step": 802 + }, + { + "epoch": 0.16092874299439552, + "learning_rate": 7.027022188323704e-06, + "loss": 0.0512, + "step": 804 + }, + { + "epoch": 0.16172938350680544, + "learning_rate": 7.040368258558412e-06, + "loss": 0.2521, + "step": 806 + }, + { + "epoch": 0.16172938350680544, + "learning_rate": 7.05372010901803e-06, + "loss": 0.0772, + "step": 808 + }, + { + "epoch": 0.16253002401921537, + "learning_rate": 7.0670777136261035e-06, + "loss": 0.0958, + "step": 810 + }, + { + "epoch": 0.16253002401921537, + "learning_rate": 7.080441046294945e-06, + "loss": 0.4974, + "step": 812 + }, + { + "epoch": 0.1633306645316253, + "learning_rate": 7.093810080925657e-06, + "loss": 0.6324, + "step": 814 + }, + { + "epoch": 0.1633306645316253, + "learning_rate": 7.1071847914082605e-06, + "loss": 0.1765, + "step": 816 + }, + { + "epoch": 0.16413130504403523, + "learning_rate": 7.120565151621638e-06, + "loss": 0.2464, + "step": 818 + }, + { + "epoch": 0.16413130504403523, + "learning_rate": 7.133951135433656e-06, + "loss": 0.0276, + "step": 820 + }, + { + "epoch": 0.16493194555644516, + "learning_rate": 7.1473427167012e-06, + "loss": 0.166, + "step": 822 + }, + { + "epoch": 0.16493194555644516, + "learning_rate": 7.160739869270219e-06, + "loss": 0.7568, + "step": 824 + }, + { + "epoch": 0.16573258606885508, + "learning_rate": 7.1741425669757854e-06, + "loss": 0.5495, + "step": 826 + }, + { + "epoch": 0.16573258606885508, + "learning_rate": 7.18755078364214e-06, + "loss": 0.2303, + "step": 828 + }, + { + "epoch": 0.16653322658126501, + "learning_rate": 7.200964493082727e-06, + "loss": 0.1223, + "step": 830 + }, + { + "epoch": 0.16653322658126501, + "learning_rate": 7.214383669100317e-06, + "loss": 0.1789, + "step": 832 + }, + { + "epoch": 0.16733386709367493, + "learning_rate": 7.227808285486952e-06, + "loss": 0.1132, + "step": 834 + }, + { + "epoch": 0.16733386709367493, + "learning_rate": 7.241238316024064e-06, + "loss": 0.2302, + "step": 836 + }, + { + "epoch": 0.16813450760608487, + "learning_rate": 7.254673734482513e-06, + "loss": 0.1941, + "step": 838 + }, + { + "epoch": 0.16813450760608487, + "learning_rate": 7.268114514622635e-06, + "loss": 0.1538, + "step": 840 + }, + { + "epoch": 0.1689351481184948, + "learning_rate": 7.2815606301942945e-06, + "loss": 0.246, + "step": 842 + }, + { + "epoch": 0.1689351481184948, + "learning_rate": 7.2950120549369204e-06, + "loss": 0.1976, + "step": 844 + }, + { + "epoch": 0.16973578863090472, + "learning_rate": 7.308468762579623e-06, + "loss": 0.3776, + "step": 846 + }, + { + "epoch": 0.16973578863090472, + "learning_rate": 7.321930726841144e-06, + "loss": 0.229, + "step": 848 + }, + { + "epoch": 0.17053642914331466, + "learning_rate": 7.3353979214299765e-06, + "loss": 0.1439, + "step": 850 + }, + { + "epoch": 0.17053642914331466, + "learning_rate": 7.348870320044395e-06, + "loss": 0.2369, + "step": 852 + }, + { + "epoch": 0.17133706965572457, + "learning_rate": 7.362347896372515e-06, + "loss": 0.2018, + "step": 854 + }, + { + "epoch": 0.17133706965572457, + "learning_rate": 7.375830624092336e-06, + "loss": 0.0117, + "step": 856 + }, + { + "epoch": 0.1721377101681345, + "learning_rate": 7.389318476871784e-06, + "loss": 0.2814, + "step": 858 + }, + { + "epoch": 0.1721377101681345, + "learning_rate": 7.402811428368824e-06, + "loss": 0.0235, + "step": 860 + }, + { + "epoch": 0.17293835068054444, + "learning_rate": 7.416309452231411e-06, + "loss": 0.1867, + "step": 862 + }, + { + "epoch": 0.17293835068054444, + "learning_rate": 7.429812522097613e-06, + "loss": 0.2095, + "step": 864 + }, + { + "epoch": 0.17373899119295436, + "learning_rate": 7.443320611595641e-06, + "loss": 0.307, + "step": 866 + }, + { + "epoch": 0.17373899119295436, + "learning_rate": 7.4568336943439055e-06, + "loss": 0.0347, + "step": 868 + }, + { + "epoch": 0.1745396317053643, + "learning_rate": 7.470351743951061e-06, + "loss": 0.1052, + "step": 870 + }, + { + "epoch": 0.1745396317053643, + "learning_rate": 7.4838747340160475e-06, + "loss": 0.0678, + "step": 872 + }, + { + "epoch": 0.1753402722177742, + "learning_rate": 7.497402638128209e-06, + "loss": 0.3855, + "step": 874 + }, + { + "epoch": 0.1753402722177742, + "learning_rate": 7.510935429867233e-06, + "loss": 0.2218, + "step": 876 + }, + { + "epoch": 0.17614091273018415, + "learning_rate": 7.52447308280329e-06, + "loss": 0.2914, + "step": 878 + }, + { + "epoch": 0.17614091273018415, + "learning_rate": 7.538015570497046e-06, + "loss": 0.0428, + "step": 880 + }, + { + "epoch": 0.17694155324259409, + "learning_rate": 7.551562866499732e-06, + "loss": 0.2521, + "step": 882 + }, + { + "epoch": 0.17694155324259409, + "learning_rate": 7.5651149443531846e-06, + "loss": 0.1713, + "step": 884 + }, + { + "epoch": 0.177742193755004, + "learning_rate": 7.578671777589884e-06, + "loss": 0.114, + "step": 886 + }, + { + "epoch": 0.177742193755004, + "learning_rate": 7.592233339733077e-06, + "loss": 0.0543, + "step": 888 + }, + { + "epoch": 0.17854283426741394, + "learning_rate": 7.605799604296721e-06, + "loss": 0.2854, + "step": 890 + }, + { + "epoch": 0.17854283426741394, + "learning_rate": 7.619370544785608e-06, + "loss": 0.4686, + "step": 892 + }, + { + "epoch": 0.17934347477982385, + "learning_rate": 7.632946134695396e-06, + "loss": 0.6396, + "step": 894 + }, + { + "epoch": 0.17934347477982385, + "learning_rate": 7.646526347512665e-06, + "loss": 0.0534, + "step": 896 + }, + { + "epoch": 0.1801441152922338, + "learning_rate": 7.660111156714964e-06, + "loss": 0.6108, + "step": 898 + }, + { + "epoch": 0.1801441152922338, + "learning_rate": 7.67370053577085e-06, + "loss": 0.2915, + "step": 900 + }, + { + "epoch": 0.18094475580464373, + "learning_rate": 7.687294458140006e-06, + "loss": 0.281, + "step": 902 + }, + { + "epoch": 0.18094475580464373, + "learning_rate": 7.70089289727319e-06, + "loss": 0.1215, + "step": 904 + }, + { + "epoch": 0.18174539631705364, + "learning_rate": 7.714495826612353e-06, + "loss": 0.246, + "step": 906 + }, + { + "epoch": 0.18174539631705364, + "learning_rate": 7.728103219590684e-06, + "loss": 0.123, + "step": 908 + }, + { + "epoch": 0.18254603682946358, + "learning_rate": 7.741715049632646e-06, + "loss": 0.1279, + "step": 910 + }, + { + "epoch": 0.18254603682946358, + "learning_rate": 7.755331290154041e-06, + "loss": 0.0398, + "step": 912 + }, + { + "epoch": 0.1833466773418735, + "learning_rate": 7.76895191456204e-06, + "loss": 0.2773, + "step": 914 + }, + { + "epoch": 0.1833466773418735, + "learning_rate": 7.7825768962553e-06, + "loss": 0.0247, + "step": 916 + }, + { + "epoch": 0.18414731785428343, + "learning_rate": 7.796206208623925e-06, + "loss": 0.3029, + "step": 918 + }, + { + "epoch": 0.18414731785428343, + "learning_rate": 7.809839825049565e-06, + "loss": 0.016, + "step": 920 + }, + { + "epoch": 0.18494795836669337, + "learning_rate": 7.82347771890548e-06, + "loss": 0.1394, + "step": 922 + }, + { + "epoch": 0.18494795836669337, + "learning_rate": 7.83711986355656e-06, + "loss": 0.0625, + "step": 924 + }, + { + "epoch": 0.18574859887910328, + "learning_rate": 7.850766232359408e-06, + "loss": 0.2673, + "step": 926 + }, + { + "epoch": 0.18574859887910328, + "learning_rate": 7.864416798662347e-06, + "loss": 0.0296, + "step": 928 + }, + { + "epoch": 0.18654923939151322, + "learning_rate": 7.878071535805564e-06, + "loss": 0.2151, + "step": 930 + }, + { + "epoch": 0.18654923939151322, + "learning_rate": 7.891730417121043e-06, + "loss": 0.0957, + "step": 932 + }, + { + "epoch": 0.18734987990392313, + "learning_rate": 7.90539341593269e-06, + "loss": 0.1187, + "step": 934 + }, + { + "epoch": 0.18734987990392313, + "learning_rate": 7.919060505556376e-06, + "loss": 0.0193, + "step": 936 + }, + { + "epoch": 0.18815052041633307, + "learning_rate": 7.932731659299978e-06, + "loss": 0.2859, + "step": 938 + }, + { + "epoch": 0.18815052041633307, + "learning_rate": 7.946406850463435e-06, + "loss": 0.0515, + "step": 940 + }, + { + "epoch": 0.188951160928743, + "learning_rate": 7.960086052338788e-06, + "loss": 0.3203, + "step": 942 + }, + { + "epoch": 0.188951160928743, + "learning_rate": 7.973769238210291e-06, + "loss": 0.028, + "step": 944 + }, + { + "epoch": 0.18975180144115292, + "learning_rate": 7.987456381354371e-06, + "loss": 0.5219, + "step": 946 + }, + { + "epoch": 0.18975180144115292, + "learning_rate": 8.001147455039737e-06, + "loss": 0.0784, + "step": 948 + }, + { + "epoch": 0.19055244195356286, + "learning_rate": 8.01484243252743e-06, + "loss": 0.4221, + "step": 950 + }, + { + "epoch": 0.19055244195356286, + "learning_rate": 8.028541287070858e-06, + "loss": 0.0357, + "step": 952 + }, + { + "epoch": 0.19135308246597277, + "learning_rate": 8.042243991915866e-06, + "loss": 0.0908, + "step": 954 + }, + { + "epoch": 0.19135308246597277, + "learning_rate": 8.055950520300756e-06, + "loss": 0.0167, + "step": 956 + }, + { + "epoch": 0.1921537229783827, + "learning_rate": 8.069660845456411e-06, + "loss": 0.6893, + "step": 958 + }, + { + "epoch": 0.1921537229783827, + "learning_rate": 8.083374940606256e-06, + "loss": 0.3342, + "step": 960 + }, + { + "epoch": 0.19295436349079265, + "learning_rate": 8.097092778966364e-06, + "loss": 0.5962, + "step": 962 + }, + { + "epoch": 0.19295436349079265, + "learning_rate": 8.110814333745503e-06, + "loss": 0.4965, + "step": 964 + }, + { + "epoch": 0.19375500400320256, + "learning_rate": 8.124539578145176e-06, + "loss": 0.1091, + "step": 966 + }, + { + "epoch": 0.19375500400320256, + "learning_rate": 8.138268485359684e-06, + "loss": 0.0374, + "step": 968 + }, + { + "epoch": 0.1945556445156125, + "learning_rate": 8.152001028576158e-06, + "loss": 0.2603, + "step": 970 + }, + { + "epoch": 0.1945556445156125, + "learning_rate": 8.165737180974676e-06, + "loss": 0.2151, + "step": 972 + }, + { + "epoch": 0.1953562850280224, + "learning_rate": 8.179476915728217e-06, + "loss": 0.4335, + "step": 974 + }, + { + "epoch": 0.1953562850280224, + "learning_rate": 8.193220206002785e-06, + "loss": 0.0152, + "step": 976 + }, + { + "epoch": 0.19615692554043235, + "learning_rate": 8.206967024957432e-06, + "loss": 0.3212, + "step": 978 + }, + { + "epoch": 0.19615692554043235, + "learning_rate": 8.220717345744326e-06, + "loss": 0.079, + "step": 980 + }, + { + "epoch": 0.1969575660528423, + "learning_rate": 8.234471141508773e-06, + "loss": 0.3126, + "step": 982 + }, + { + "epoch": 0.1969575660528423, + "learning_rate": 8.248228385389349e-06, + "loss": 0.145, + "step": 984 + }, + { + "epoch": 0.1977582065652522, + "learning_rate": 8.261989050517841e-06, + "loss": 0.6945, + "step": 986 + }, + { + "epoch": 0.1977582065652522, + "learning_rate": 8.275753110019367e-06, + "loss": 0.2941, + "step": 988 + }, + { + "epoch": 0.19855884707766214, + "learning_rate": 8.289520537012428e-06, + "loss": 0.2412, + "step": 990 + }, + { + "epoch": 0.19855884707766214, + "learning_rate": 8.303291304608936e-06, + "loss": 0.0904, + "step": 992 + }, + { + "epoch": 0.19935948759007205, + "learning_rate": 8.317065385914285e-06, + "loss": 0.5552, + "step": 994 + }, + { + "epoch": 0.19935948759007205, + "learning_rate": 8.330842754027378e-06, + "loss": 0.4889, + "step": 996 + }, + { + "epoch": 0.200160128102482, + "learning_rate": 8.344623382040752e-06, + "loss": 0.1383, + "step": 998 + }, + { + "epoch": 0.200160128102482, + "learning_rate": 8.358407243040524e-06, + "loss": 0.88, + "step": 1000 + }, + { + "epoch": 0.20096076861489193, + "learning_rate": 8.372194310106515e-06, + "loss": 0.1949, + "step": 1002 + }, + { + "epoch": 0.20096076861489193, + "learning_rate": 8.385984556312285e-06, + "loss": 0.1013, + "step": 1004 + }, + { + "epoch": 0.20176140912730184, + "learning_rate": 8.399777954725183e-06, + "loss": 0.158, + "step": 1006 + }, + { + "epoch": 0.20176140912730184, + "learning_rate": 8.413574478406386e-06, + "loss": 0.1358, + "step": 1008 + }, + { + "epoch": 0.20256204963971178, + "learning_rate": 8.427374100411022e-06, + "loss": 0.1601, + "step": 1010 + }, + { + "epoch": 0.20256204963971178, + "learning_rate": 8.441176793788106e-06, + "loss": 0.0809, + "step": 1012 + }, + { + "epoch": 0.2033626901521217, + "learning_rate": 8.454982531580687e-06, + "loss": 0.1624, + "step": 1014 + }, + { + "epoch": 0.2033626901521217, + "learning_rate": 8.468791286825856e-06, + "loss": 0.8361, + "step": 1016 + }, + { + "epoch": 0.20416333066453163, + "learning_rate": 8.482603032554812e-06, + "loss": 0.2148, + "step": 1018 + }, + { + "epoch": 0.20416333066453163, + "learning_rate": 8.496417741792922e-06, + "loss": 0.5845, + "step": 1020 + }, + { + "epoch": 0.20496397117694154, + "learning_rate": 8.510235387559738e-06, + "loss": 0.1581, + "step": 1022 + }, + { + "epoch": 0.20496397117694154, + "learning_rate": 8.524055942869135e-06, + "loss": 0.1794, + "step": 1024 + }, + { + "epoch": 0.20576461168935148, + "learning_rate": 8.537879380729254e-06, + "loss": 0.2084, + "step": 1026 + }, + { + "epoch": 0.20576461168935148, + "learning_rate": 8.551705674142616e-06, + "loss": 0.2956, + "step": 1028 + }, + { + "epoch": 0.20656525220176142, + "learning_rate": 8.565534796106175e-06, + "loss": 0.5345, + "step": 1030 + }, + { + "epoch": 0.20656525220176142, + "learning_rate": 8.579366719611353e-06, + "loss": 0.2775, + "step": 1032 + }, + { + "epoch": 0.20736589271417133, + "learning_rate": 8.593201417644091e-06, + "loss": 0.1895, + "step": 1034 + }, + { + "epoch": 0.20736589271417133, + "learning_rate": 8.607038863184952e-06, + "loss": 0.069, + "step": 1036 + }, + { + "epoch": 0.20816653322658127, + "learning_rate": 8.620879029209093e-06, + "loss": 0.1286, + "step": 1038 + }, + { + "epoch": 0.20816653322658127, + "learning_rate": 8.634721888686368e-06, + "loss": 0.4137, + "step": 1040 + }, + { + "epoch": 0.20896717373899118, + "learning_rate": 8.648567414581372e-06, + "loss": 0.2577, + "step": 1042 + }, + { + "epoch": 0.20896717373899118, + "learning_rate": 8.662415579853495e-06, + "loss": 0.1521, + "step": 1044 + }, + { + "epoch": 0.20976781425140112, + "learning_rate": 8.676266357456968e-06, + "loss": 0.2616, + "step": 1046 + }, + { + "epoch": 0.20976781425140112, + "learning_rate": 8.690119720340907e-06, + "loss": 0.114, + "step": 1048 + }, + { + "epoch": 0.21056845476381106, + "learning_rate": 8.703975641449426e-06, + "loss": 0.1495, + "step": 1050 + }, + { + "epoch": 0.21056845476381106, + "learning_rate": 8.717834093721598e-06, + "loss": 0.6946, + "step": 1052 + }, + { + "epoch": 0.21136909527622097, + "learning_rate": 8.731695050091561e-06, + "loss": 0.1582, + "step": 1054 + }, + { + "epoch": 0.21136909527622097, + "learning_rate": 8.74555848348857e-06, + "loss": 0.0481, + "step": 1056 + }, + { + "epoch": 0.2121697357886309, + "learning_rate": 8.759424366837035e-06, + "loss": 0.1945, + "step": 1058 + }, + { + "epoch": 0.2121697357886309, + "learning_rate": 8.773292673056572e-06, + "loss": 0.0642, + "step": 1060 + }, + { + "epoch": 0.21297037630104082, + "learning_rate": 8.787163375062113e-06, + "loss": 0.3856, + "step": 1062 + }, + { + "epoch": 0.21297037630104082, + "learning_rate": 8.801036445763858e-06, + "loss": 0.2244, + "step": 1064 + }, + { + "epoch": 0.21377101681345076, + "learning_rate": 8.8149118580674e-06, + "loss": 0.0743, + "step": 1066 + }, + { + "epoch": 0.21377101681345076, + "learning_rate": 8.828789584873757e-06, + "loss": 0.0157, + "step": 1068 + }, + { + "epoch": 0.2145716573258607, + "learning_rate": 8.84266959907943e-06, + "loss": 0.3323, + "step": 1070 + }, + { + "epoch": 0.2145716573258607, + "learning_rate": 8.856551873576448e-06, + "loss": 0.0831, + "step": 1072 + }, + { + "epoch": 0.2153722978382706, + "learning_rate": 8.870436381252412e-06, + "loss": 0.3486, + "step": 1074 + }, + { + "epoch": 0.2153722978382706, + "learning_rate": 8.884323094990613e-06, + "loss": 1.0135, + "step": 1076 + }, + { + "epoch": 0.21617293835068055, + "learning_rate": 8.89821198766998e-06, + "loss": 0.2306, + "step": 1078 + }, + { + "epoch": 0.21617293835068055, + "learning_rate": 8.912103032165206e-06, + "loss": 0.4392, + "step": 1080 + }, + { + "epoch": 0.21697357886309046, + "learning_rate": 8.925996201346779e-06, + "loss": 0.2244, + "step": 1082 + }, + { + "epoch": 0.21697357886309046, + "learning_rate": 8.939891468081036e-06, + "loss": 0.4113, + "step": 1084 + }, + { + "epoch": 0.2177742193755004, + "learning_rate": 8.953788805230209e-06, + "loss": 0.2755, + "step": 1086 + }, + { + "epoch": 0.2177742193755004, + "learning_rate": 8.967688185652527e-06, + "loss": 0.0494, + "step": 1088 + }, + { + "epoch": 0.21857485988791034, + "learning_rate": 8.981589582202184e-06, + "loss": 0.1752, + "step": 1090 + }, + { + "epoch": 0.21857485988791034, + "learning_rate": 8.995492967729449e-06, + "loss": 0.1019, + "step": 1092 + }, + { + "epoch": 0.21937550040032025, + "learning_rate": 9.009398315080712e-06, + "loss": 0.1083, + "step": 1094 + }, + { + "epoch": 0.21937550040032025, + "learning_rate": 9.023305597098526e-06, + "loss": 0.2046, + "step": 1096 + }, + { + "epoch": 0.2201761409127302, + "learning_rate": 9.037214786621669e-06, + "loss": 0.1159, + "step": 1098 + }, + { + "epoch": 0.2201761409127302, + "learning_rate": 9.051125856485175e-06, + "loss": 0.1266, + "step": 1100 + }, + { + "epoch": 0.2209767814251401, + "learning_rate": 9.065038779520457e-06, + "loss": 0.381, + "step": 1102 + }, + { + "epoch": 0.2209767814251401, + "learning_rate": 9.078953528555258e-06, + "loss": 0.4569, + "step": 1104 + }, + { + "epoch": 0.22177742193755004, + "learning_rate": 9.092870076413771e-06, + "loss": 0.1823, + "step": 1106 + }, + { + "epoch": 0.22177742193755004, + "learning_rate": 9.106788395916682e-06, + "loss": 0.155, + "step": 1108 + }, + { + "epoch": 0.22257806244995998, + "learning_rate": 9.120708459881203e-06, + "loss": 0.2775, + "step": 1110 + }, + { + "epoch": 0.22257806244995998, + "learning_rate": 9.134630241121135e-06, + "loss": 0.235, + "step": 1112 + }, + { + "epoch": 0.2233787029623699, + "learning_rate": 9.148553712446971e-06, + "loss": 0.0803, + "step": 1114 + }, + { + "epoch": 0.2233787029623699, + "learning_rate": 9.162478846665854e-06, + "loss": 0.3807, + "step": 1116 + }, + { + "epoch": 0.22417934347477983, + "learning_rate": 9.176405616581694e-06, + "loss": 0.3145, + "step": 1118 + }, + { + "epoch": 0.22417934347477983, + "learning_rate": 9.190333994995208e-06, + "loss": 0.4833, + "step": 1120 + }, + { + "epoch": 0.22497998398718974, + "learning_rate": 9.20426395470397e-06, + "loss": 0.3565, + "step": 1122 + }, + { + "epoch": 0.22497998398718974, + "learning_rate": 9.218195468502469e-06, + "loss": 0.222, + "step": 1124 + }, + { + "epoch": 0.22578062449959968, + "learning_rate": 9.232128509182136e-06, + "loss": 0.1821, + "step": 1126 + }, + { + "epoch": 0.22578062449959968, + "learning_rate": 9.24606304953148e-06, + "loss": 0.2193, + "step": 1128 + }, + { + "epoch": 0.22658126501200962, + "learning_rate": 9.259999062336021e-06, + "loss": 0.2552, + "step": 1130 + }, + { + "epoch": 0.22658126501200962, + "learning_rate": 9.273936520378426e-06, + "loss": 0.1217, + "step": 1132 + }, + { + "epoch": 0.22738190552441953, + "learning_rate": 9.287875396438536e-06, + "loss": 0.223, + "step": 1134 + }, + { + "epoch": 0.22738190552441953, + "learning_rate": 9.301815663293426e-06, + "loss": 0.6569, + "step": 1136 + }, + { + "epoch": 0.22818254603682947, + "learning_rate": 9.315757293717432e-06, + "loss": 0.2378, + "step": 1138 + }, + { + "epoch": 0.22818254603682947, + "learning_rate": 9.329700260482286e-06, + "loss": 0.153, + "step": 1140 + }, + { + "epoch": 0.22898318654923938, + "learning_rate": 9.343644536357053e-06, + "loss": 0.4707, + "step": 1142 + }, + { + "epoch": 0.22898318654923938, + "learning_rate": 9.35759009410826e-06, + "loss": 0.1035, + "step": 1144 + }, + { + "epoch": 0.22978382706164932, + "learning_rate": 9.37153690649993e-06, + "loss": 0.2942, + "step": 1146 + }, + { + "epoch": 0.22978382706164932, + "learning_rate": 9.38548494629364e-06, + "loss": 0.252, + "step": 1148 + }, + { + "epoch": 0.23058446757405926, + "learning_rate": 9.39943418624856e-06, + "loss": 0.2149, + "step": 1150 + }, + { + "epoch": 0.23058446757405926, + "learning_rate": 9.41338459912151e-06, + "loss": 0.2158, + "step": 1152 + }, + { + "epoch": 0.23138510808646917, + "learning_rate": 9.427336157667062e-06, + "loss": 0.3848, + "step": 1154 + }, + { + "epoch": 0.23138510808646917, + "learning_rate": 9.441288834637507e-06, + "loss": 0.6956, + "step": 1156 + }, + { + "epoch": 0.2321857485988791, + "learning_rate": 9.45524260278296e-06, + "loss": 0.182, + "step": 1158 + }, + { + "epoch": 0.2321857485988791, + "learning_rate": 9.469197434851414e-06, + "loss": 0.1821, + "step": 1160 + }, + { + "epoch": 0.23298638911128902, + "learning_rate": 9.483153303588777e-06, + "loss": 0.2151, + "step": 1162 + }, + { + "epoch": 0.23298638911128902, + "learning_rate": 9.497110181738935e-06, + "loss": 0.1823, + "step": 1164 + }, + { + "epoch": 0.23378702962369896, + "learning_rate": 9.511068042043785e-06, + "loss": 0.2086, + "step": 1166 + }, + { + "epoch": 0.23378702962369896, + "learning_rate": 9.52502685724336e-06, + "loss": 0.1339, + "step": 1168 + }, + { + "epoch": 0.2345876701361089, + "learning_rate": 9.538986600075773e-06, + "loss": 0.1976, + "step": 1170 + }, + { + "epoch": 0.2345876701361089, + "learning_rate": 9.552947243277342e-06, + "loss": 0.2006, + "step": 1172 + }, + { + "epoch": 0.2353883106485188, + "learning_rate": 9.566908759582633e-06, + "loss": 0.2605, + "step": 1174 + }, + { + "epoch": 0.2353883106485188, + "learning_rate": 9.580871121724498e-06, + "loss": 0.0686, + "step": 1176 + }, + { + "epoch": 0.23618895116092875, + "learning_rate": 9.594834302434123e-06, + "loss": 0.5887, + "step": 1178 + }, + { + "epoch": 0.23618895116092875, + "learning_rate": 9.608798274441153e-06, + "loss": 0.6413, + "step": 1180 + }, + { + "epoch": 0.23698959167333866, + "learning_rate": 9.622763010473628e-06, + "loss": 0.2964, + "step": 1182 + }, + { + "epoch": 0.23698959167333866, + "learning_rate": 9.636728483258116e-06, + "loss": 0.0442, + "step": 1184 + }, + { + "epoch": 0.2377902321857486, + "learning_rate": 9.650694665519747e-06, + "loss": 0.2219, + "step": 1186 + }, + { + "epoch": 0.2377902321857486, + "learning_rate": 9.664661529982263e-06, + "loss": 0.3407, + "step": 1188 + }, + { + "epoch": 0.23859087269815854, + "learning_rate": 9.678629049368077e-06, + "loss": 0.1783, + "step": 1190 + }, + { + "epoch": 0.23859087269815854, + "learning_rate": 9.692597196398302e-06, + "loss": 0.0235, + "step": 1192 + }, + { + "epoch": 0.23939151321056845, + "learning_rate": 9.706565943792879e-06, + "loss": 0.1508, + "step": 1194 + }, + { + "epoch": 0.23939151321056845, + "learning_rate": 9.720535264270526e-06, + "loss": 0.0139, + "step": 1196 + }, + { + "epoch": 0.2401921537229784, + "learning_rate": 9.734505130548855e-06, + "loss": 0.1419, + "step": 1198 + }, + { + "epoch": 0.2401921537229784, + "learning_rate": 9.748475515344416e-06, + "loss": 0.7775, + "step": 1200 + }, + { + "epoch": 0.2409927942353883, + "learning_rate": 9.762446391372746e-06, + "loss": 0.2755, + "step": 1202 + }, + { + "epoch": 0.2409927942353883, + "learning_rate": 9.776417731348403e-06, + "loss": 0.253, + "step": 1204 + }, + { + "epoch": 0.24179343474779824, + "learning_rate": 9.790389507985091e-06, + "loss": 0.508, + "step": 1206 + }, + { + "epoch": 0.24179343474779824, + "learning_rate": 9.80436169399561e-06, + "loss": 0.6494, + "step": 1208 + }, + { + "epoch": 0.24259407526020815, + "learning_rate": 9.81833426209198e-06, + "loss": 0.2596, + "step": 1210 + }, + { + "epoch": 0.24259407526020815, + "learning_rate": 9.832307184985473e-06, + "loss": 0.235, + "step": 1212 + }, + { + "epoch": 0.2433947157726181, + "learning_rate": 9.846280435386668e-06, + "loss": 0.0825, + "step": 1214 + }, + { + "epoch": 0.2433947157726181, + "learning_rate": 9.8602539860055e-06, + "loss": 0.0159, + "step": 1216 + }, + { + "epoch": 0.24419535628502803, + "learning_rate": 9.874227809551307e-06, + "loss": 0.6583, + "step": 1218 + }, + { + "epoch": 0.24419535628502803, + "learning_rate": 9.888201878732946e-06, + "loss": 0.0958, + "step": 1220 + }, + { + "epoch": 0.24499599679743794, + "learning_rate": 9.902176166258738e-06, + "loss": 0.2326, + "step": 1222 + }, + { + "epoch": 0.24499599679743794, + "learning_rate": 9.916150644836596e-06, + "loss": 0.3552, + "step": 1224 + }, + { + "epoch": 0.24579663730984788, + "learning_rate": 9.930125287174061e-06, + "loss": 0.5497, + "step": 1226 + }, + { + "epoch": 0.24579663730984788, + "learning_rate": 9.944100065978354e-06, + "loss": 0.1444, + "step": 1228 + }, + { + "epoch": 0.2465972778222578, + "learning_rate": 9.958074953956413e-06, + "loss": 0.1661, + "step": 1230 + }, + { + "epoch": 0.2465972778222578, + "learning_rate": 9.972049923815011e-06, + "loss": 0.5448, + "step": 1232 + }, + { + "epoch": 0.24739791833466773, + "learning_rate": 9.986024948260714e-06, + "loss": 0.2388, + "step": 1234 + }, + { + "epoch": 0.24739791833466773, + "learning_rate": 9.999999999999996e-06, + "loss": 0.2446, + "step": 1236 + }, + { + "epoch": 0.24819855884707767, + "learning_rate": 1.0013975051739277e-05, + "loss": 0.2243, + "step": 1238 + }, + { + "epoch": 0.24819855884707767, + "learning_rate": 1.0027950076184982e-05, + "loss": 0.3724, + "step": 1240 + }, + { + "epoch": 0.24899919935948758, + "learning_rate": 1.004192504604358e-05, + "loss": 0.1837, + "step": 1242 + }, + { + "epoch": 0.24899919935948758, + "learning_rate": 1.0055899934021637e-05, + "loss": 0.3612, + "step": 1244 + }, + { + "epoch": 0.24979983987189752, + "learning_rate": 1.006987471282593e-05, + "loss": 0.124, + "step": 1246 + }, + { + "epoch": 0.24979983987189752, + "learning_rate": 1.0083849355163397e-05, + "loss": 0.3383, + "step": 1248 + }, + { + "epoch": 0.25060048038430743, + "learning_rate": 1.0097823833741255e-05, + "loss": 0.3006, + "step": 1250 + }, + { + "epoch": 0.25060048038430743, + "learning_rate": 1.0111798121267047e-05, + "loss": 0.3046, + "step": 1252 + }, + { + "epoch": 0.2514011208967174, + "learning_rate": 1.0125772190448686e-05, + "loss": 0.3415, + "step": 1254 + }, + { + "epoch": 0.2514011208967174, + "learning_rate": 1.0139746013994493e-05, + "loss": 0.2615, + "step": 1256 + }, + { + "epoch": 0.2522017614091273, + "learning_rate": 1.0153719564613327e-05, + "loss": 0.0314, + "step": 1258 + }, + { + "epoch": 0.2522017614091273, + "learning_rate": 1.016769281501452e-05, + "loss": 0.0604, + "step": 1260 + }, + { + "epoch": 0.2530024019215372, + "learning_rate": 1.018166573790801e-05, + "loss": 0.315, + "step": 1262 + }, + { + "epoch": 0.2530024019215372, + "learning_rate": 1.0195638306004383e-05, + "loss": 0.0887, + "step": 1264 + }, + { + "epoch": 0.25380304243394713, + "learning_rate": 1.0209610492014904e-05, + "loss": 0.5316, + "step": 1266 + }, + { + "epoch": 0.25380304243394713, + "learning_rate": 1.022358226865159e-05, + "loss": 0.3689, + "step": 1268 + }, + { + "epoch": 0.2546036829463571, + "learning_rate": 1.0237553608627247e-05, + "loss": 0.1677, + "step": 1270 + }, + { + "epoch": 0.2546036829463571, + "learning_rate": 1.0251524484655577e-05, + "loss": 0.3043, + "step": 1272 + }, + { + "epoch": 0.255404323458767, + "learning_rate": 1.0265494869451138e-05, + "loss": 0.2319, + "step": 1274 + }, + { + "epoch": 0.255404323458767, + "learning_rate": 1.0279464735729467e-05, + "loss": 0.2709, + "step": 1276 + }, + { + "epoch": 0.2562049639711769, + "learning_rate": 1.0293434056207114e-05, + "loss": 0.215, + "step": 1278 + }, + { + "epoch": 0.2562049639711769, + "learning_rate": 1.0307402803601691e-05, + "loss": 0.1662, + "step": 1280 + }, + { + "epoch": 0.2570056044835869, + "learning_rate": 1.0321370950631918e-05, + "loss": 0.1603, + "step": 1282 + }, + { + "epoch": 0.2570056044835869, + "learning_rate": 1.033533847001773e-05, + "loss": 0.3312, + "step": 1284 + }, + { + "epoch": 0.2578062449959968, + "learning_rate": 1.0349305334480246e-05, + "loss": 0.3263, + "step": 1286 + }, + { + "epoch": 0.2578062449959968, + "learning_rate": 1.0363271516741877e-05, + "loss": 0.2638, + "step": 1288 + }, + { + "epoch": 0.2586068855084067, + "learning_rate": 1.0377236989526366e-05, + "loss": 0.1641, + "step": 1290 + }, + { + "epoch": 0.2586068855084067, + "learning_rate": 1.039120172555884e-05, + "loss": 0.4289, + "step": 1292 + }, + { + "epoch": 0.2594075260208166, + "learning_rate": 1.0405165697565868e-05, + "loss": 0.0813, + "step": 1294 + }, + { + "epoch": 0.2594075260208166, + "learning_rate": 1.0419128878275495e-05, + "loss": 0.0068, + "step": 1296 + }, + { + "epoch": 0.2602081665332266, + "learning_rate": 1.0433091240417362e-05, + "loss": 0.1943, + "step": 1298 + }, + { + "epoch": 0.2602081665332266, + "learning_rate": 1.0447052756722651e-05, + "loss": 0.1301, + "step": 1300 + }, + { + "epoch": 0.2610088070456365, + "learning_rate": 1.046101339992422e-05, + "loss": 0.1204, + "step": 1302 + }, + { + "epoch": 0.2610088070456365, + "learning_rate": 1.0474973142756632e-05, + "loss": 0.3643, + "step": 1304 + }, + { + "epoch": 0.2618094475580464, + "learning_rate": 1.0488931957956208e-05, + "loss": 0.5458, + "step": 1306 + }, + { + "epoch": 0.2618094475580464, + "learning_rate": 1.0502889818261058e-05, + "loss": 0.0403, + "step": 1308 + }, + { + "epoch": 0.2626100880704564, + "learning_rate": 1.0516846696411216e-05, + "loss": 0.2424, + "step": 1310 + }, + { + "epoch": 0.2626100880704564, + "learning_rate": 1.053080256514858e-05, + "loss": 0.1271, + "step": 1312 + }, + { + "epoch": 0.2634107285828663, + "learning_rate": 1.054475739721703e-05, + "loss": 0.8296, + "step": 1314 + }, + { + "epoch": 0.2634107285828663, + "learning_rate": 1.0558711165362488e-05, + "loss": 0.0591, + "step": 1316 + }, + { + "epoch": 0.2642113690952762, + "learning_rate": 1.0572663842332931e-05, + "loss": 0.142, + "step": 1318 + }, + { + "epoch": 0.2642113690952762, + "learning_rate": 1.0586615400878484e-05, + "loss": 0.1677, + "step": 1320 + }, + { + "epoch": 0.26501200960768617, + "learning_rate": 1.0600565813751433e-05, + "loss": 0.2612, + "step": 1322 + }, + { + "epoch": 0.26501200960768617, + "learning_rate": 1.0614515053706354e-05, + "loss": 0.11, + "step": 1324 + }, + { + "epoch": 0.2658126501200961, + "learning_rate": 1.0628463093500063e-05, + "loss": 0.4709, + "step": 1326 + }, + { + "epoch": 0.2658126501200961, + "learning_rate": 1.0642409905891733e-05, + "loss": 0.1879, + "step": 1328 + }, + { + "epoch": 0.266613290632506, + "learning_rate": 1.065635546364294e-05, + "loss": 0.1199, + "step": 1330 + }, + { + "epoch": 0.266613290632506, + "learning_rate": 1.0670299739517706e-05, + "loss": 0.109, + "step": 1332 + }, + { + "epoch": 0.2674139311449159, + "learning_rate": 1.0684242706282562e-05, + "loss": 0.3833, + "step": 1334 + }, + { + "epoch": 0.2674139311449159, + "learning_rate": 1.0698184336706567e-05, + "loss": 0.2179, + "step": 1336 + }, + { + "epoch": 0.2682145716573259, + "learning_rate": 1.0712124603561457e-05, + "loss": 0.2701, + "step": 1338 + }, + { + "epoch": 0.2682145716573259, + "learning_rate": 1.0726063479621567e-05, + "loss": 0.3134, + "step": 1340 + }, + { + "epoch": 0.2690152121697358, + "learning_rate": 1.0740000937663972e-05, + "loss": 0.3418, + "step": 1342 + }, + { + "epoch": 0.2690152121697358, + "learning_rate": 1.0753936950468513e-05, + "loss": 0.1581, + "step": 1344 + }, + { + "epoch": 0.2698158526821457, + "learning_rate": 1.0767871490817856e-05, + "loss": 0.2163, + "step": 1346 + }, + { + "epoch": 0.2698158526821457, + "learning_rate": 1.0781804531497525e-05, + "loss": 0.138, + "step": 1348 + }, + { + "epoch": 0.27061649319455566, + "learning_rate": 1.0795736045296023e-05, + "loss": 0.3642, + "step": 1350 + }, + { + "epoch": 0.27061649319455566, + "learning_rate": 1.0809666005004787e-05, + "loss": 0.0183, + "step": 1352 + }, + { + "epoch": 0.2714171337069656, + "learning_rate": 1.08235943834183e-05, + "loss": 0.5841, + "step": 1354 + }, + { + "epoch": 0.2714171337069656, + "learning_rate": 1.083752115333414e-05, + "loss": 0.201, + "step": 1356 + }, + { + "epoch": 0.2722177742193755, + "learning_rate": 1.0851446287553022e-05, + "loss": 0.4889, + "step": 1358 + }, + { + "epoch": 0.2722177742193755, + "learning_rate": 1.0865369758878858e-05, + "loss": 0.0572, + "step": 1360 + }, + { + "epoch": 0.27301841473178545, + "learning_rate": 1.087929154011879e-05, + "loss": 0.0833, + "step": 1362 + }, + { + "epoch": 0.27301841473178545, + "learning_rate": 1.0893211604083311e-05, + "loss": 0.0278, + "step": 1364 + }, + { + "epoch": 0.27381905524419536, + "learning_rate": 1.090712992358622e-05, + "loss": 0.1132, + "step": 1366 + }, + { + "epoch": 0.27381905524419536, + "learning_rate": 1.0921046471444737e-05, + "loss": 0.4376, + "step": 1368 + }, + { + "epoch": 0.2746196957566053, + "learning_rate": 1.0934961220479537e-05, + "loss": 0.1689, + "step": 1370 + }, + { + "epoch": 0.2746196957566053, + "learning_rate": 1.0948874143514818e-05, + "loss": 0.8235, + "step": 1372 + }, + { + "epoch": 0.2754203362690152, + "learning_rate": 1.0962785213378325e-05, + "loss": 0.3316, + "step": 1374 + }, + { + "epoch": 0.2754203362690152, + "learning_rate": 1.0976694402901467e-05, + "loss": 0.492, + "step": 1376 + }, + { + "epoch": 0.27622097678142515, + "learning_rate": 1.0990601684919282e-05, + "loss": 0.1865, + "step": 1378 + }, + { + "epoch": 0.27622097678142515, + "learning_rate": 1.1004507032270544e-05, + "loss": 0.3552, + "step": 1380 + }, + { + "epoch": 0.27702161729383507, + "learning_rate": 1.1018410417797809e-05, + "loss": 0.7587, + "step": 1382 + }, + { + "epoch": 0.27702161729383507, + "learning_rate": 1.1032311814347467e-05, + "loss": 0.3838, + "step": 1384 + }, + { + "epoch": 0.277822257806245, + "learning_rate": 1.1046211194769784e-05, + "loss": 0.3149, + "step": 1386 + }, + { + "epoch": 0.277822257806245, + "learning_rate": 1.1060108531918955e-05, + "loss": 0.46, + "step": 1388 + }, + { + "epoch": 0.27862289831865494, + "learning_rate": 1.1074003798653215e-05, + "loss": 0.1758, + "step": 1390 + }, + { + "epoch": 0.27862289831865494, + "learning_rate": 1.1087896967834787e-05, + "loss": 0.0706, + "step": 1392 + }, + { + "epoch": 0.27942353883106485, + "learning_rate": 1.1101788012330013e-05, + "loss": 0.1362, + "step": 1394 + }, + { + "epoch": 0.27942353883106485, + "learning_rate": 1.111567690500938e-05, + "loss": 0.1505, + "step": 1396 + }, + { + "epoch": 0.28022417934347477, + "learning_rate": 1.1129563618747581e-05, + "loss": 0.1617, + "step": 1398 + }, + { + "epoch": 0.28022417934347477, + "learning_rate": 1.1143448126423545e-05, + "loss": 0.1319, + "step": 1400 + }, + { + "epoch": 0.28102481985588473, + "learning_rate": 1.1157330400920563e-05, + "loss": 0.3119, + "step": 1402 + }, + { + "epoch": 0.28102481985588473, + "learning_rate": 1.1171210415126238e-05, + "loss": 0.1945, + "step": 1404 + }, + { + "epoch": 0.28182546036829464, + "learning_rate": 1.1185088141932594e-05, + "loss": 0.363, + "step": 1406 + }, + { + "epoch": 0.28182546036829464, + "learning_rate": 1.1198963554236135e-05, + "loss": 0.1512, + "step": 1408 + }, + { + "epoch": 0.28262610088070456, + "learning_rate": 1.121283662493788e-05, + "loss": 0.6766, + "step": 1410 + }, + { + "epoch": 0.28262610088070456, + "learning_rate": 1.122670732694342e-05, + "loss": 0.3273, + "step": 1412 + }, + { + "epoch": 0.28342674139311447, + "learning_rate": 1.1240575633162958e-05, + "loss": 0.791, + "step": 1414 + }, + { + "epoch": 0.28342674139311447, + "learning_rate": 1.1254441516511425e-05, + "loss": 0.0908, + "step": 1416 + }, + { + "epoch": 0.28422738190552443, + "learning_rate": 1.1268304949908434e-05, + "loss": 0.4785, + "step": 1418 + }, + { + "epoch": 0.28422738190552443, + "learning_rate": 1.1282165906278395e-05, + "loss": 0.1865, + "step": 1420 + }, + { + "epoch": 0.28502802241793435, + "learning_rate": 1.1296024358550565e-05, + "loss": 0.5856, + "step": 1422 + }, + { + "epoch": 0.28502802241793435, + "learning_rate": 1.1309880279659087e-05, + "loss": 0.3426, + "step": 1424 + }, + { + "epoch": 0.28582866293034426, + "learning_rate": 1.1323733642543024e-05, + "loss": 0.2391, + "step": 1426 + }, + { + "epoch": 0.28582866293034426, + "learning_rate": 1.1337584420146496e-05, + "loss": 0.1511, + "step": 1428 + }, + { + "epoch": 0.2866293034427542, + "learning_rate": 1.135143258541862e-05, + "loss": 0.2774, + "step": 1430 + }, + { + "epoch": 0.2866293034427542, + "learning_rate": 1.1365278111313625e-05, + "loss": 0.3206, + "step": 1432 + }, + { + "epoch": 0.28742994395516414, + "learning_rate": 1.13791209707909e-05, + "loss": 0.3635, + "step": 1434 + }, + { + "epoch": 0.28742994395516414, + "learning_rate": 1.1392961136815041e-05, + "loss": 0.2231, + "step": 1436 + }, + { + "epoch": 0.28823058446757405, + "learning_rate": 1.1406798582355902e-05, + "loss": 0.1714, + "step": 1438 + }, + { + "epoch": 0.28823058446757405, + "learning_rate": 1.142063328038864e-05, + "loss": 0.134, + "step": 1440 + }, + { + "epoch": 0.289031224979984, + "learning_rate": 1.1434465203893818e-05, + "loss": 0.2081, + "step": 1442 + }, + { + "epoch": 0.289031224979984, + "learning_rate": 1.1448294325857377e-05, + "loss": 0.2605, + "step": 1444 + }, + { + "epoch": 0.2898318654923939, + "learning_rate": 1.146212061927074e-05, + "loss": 0.2458, + "step": 1446 + }, + { + "epoch": 0.2898318654923939, + "learning_rate": 1.1475944057130856e-05, + "loss": 0.1125, + "step": 1448 + }, + { + "epoch": 0.29063250600480384, + "learning_rate": 1.1489764612440255e-05, + "loss": 0.4706, + "step": 1450 + }, + { + "epoch": 0.29063250600480384, + "learning_rate": 1.150358225820707e-05, + "loss": 0.1494, + "step": 1452 + }, + { + "epoch": 0.29143314651721375, + "learning_rate": 1.151739696744518e-05, + "loss": 0.1354, + "step": 1454 + }, + { + "epoch": 0.29143314651721375, + "learning_rate": 1.1531208713174138e-05, + "loss": 0.2099, + "step": 1456 + }, + { + "epoch": 0.2922337870296237, + "learning_rate": 1.1545017468419307e-05, + "loss": 0.2054, + "step": 1458 + }, + { + "epoch": 0.2922337870296237, + "learning_rate": 1.1558823206211887e-05, + "loss": 0.0943, + "step": 1460 + }, + { + "epoch": 0.2930344275420336, + "learning_rate": 1.1572625899588972e-05, + "loss": 0.3127, + "step": 1462 + }, + { + "epoch": 0.2930344275420336, + "learning_rate": 1.1586425521593607e-05, + "loss": 0.1961, + "step": 1464 + }, + { + "epoch": 0.29383506805444354, + "learning_rate": 1.1600222045274809e-05, + "loss": 0.2268, + "step": 1466 + }, + { + "epoch": 0.29383506805444354, + "learning_rate": 1.1614015443687708e-05, + "loss": 0.0103, + "step": 1468 + }, + { + "epoch": 0.2946357085668535, + "learning_rate": 1.1627805689893478e-05, + "loss": 0.2393, + "step": 1470 + }, + { + "epoch": 0.2946357085668535, + "learning_rate": 1.1641592756959467e-05, + "loss": 0.5504, + "step": 1472 + }, + { + "epoch": 0.2954363490792634, + "learning_rate": 1.1655376617959239e-05, + "loss": 0.7182, + "step": 1474 + }, + { + "epoch": 0.2954363490792634, + "learning_rate": 1.1669157245972616e-05, + "loss": 0.6106, + "step": 1476 + }, + { + "epoch": 0.2962369895916733, + "learning_rate": 1.1682934614085708e-05, + "loss": 0.4883, + "step": 1478 + }, + { + "epoch": 0.2962369895916733, + "learning_rate": 1.1696708695391057e-05, + "loss": 0.2193, + "step": 1480 + }, + { + "epoch": 0.29703763010408324, + "learning_rate": 1.1710479462987565e-05, + "loss": 0.188, + "step": 1482 + }, + { + "epoch": 0.29703763010408324, + "learning_rate": 1.1724246889980626e-05, + "loss": 0.2708, + "step": 1484 + }, + { + "epoch": 0.2978382706164932, + "learning_rate": 1.1738010949482152e-05, + "loss": 0.2946, + "step": 1486 + }, + { + "epoch": 0.2978382706164932, + "learning_rate": 1.1751771614610643e-05, + "loss": 0.4407, + "step": 1488 + }, + { + "epoch": 0.2986389111289031, + "learning_rate": 1.176552885849122e-05, + "loss": 0.7374, + "step": 1490 + }, + { + "epoch": 0.2986389111289031, + "learning_rate": 1.1779282654255668e-05, + "loss": 0.2638, + "step": 1492 + }, + { + "epoch": 0.29943955164131303, + "learning_rate": 1.1793032975042563e-05, + "loss": 0.5854, + "step": 1494 + }, + { + "epoch": 0.29943955164131303, + "learning_rate": 1.180677979399721e-05, + "loss": 0.344, + "step": 1496 + }, + { + "epoch": 0.300240192153723, + "learning_rate": 1.1820523084271775e-05, + "loss": 0.3481, + "step": 1498 + }, + { + "epoch": 0.300240192153723, + "learning_rate": 1.1834262819025317e-05, + "loss": 0.202, + "step": 1500 + }, + { + "epoch": 0.3010408326661329, + "learning_rate": 1.1847998971423835e-05, + "loss": 0.2194, + "step": 1502 + }, + { + "epoch": 0.3010408326661329, + "learning_rate": 1.1861731514640309e-05, + "loss": 0.1423, + "step": 1504 + }, + { + "epoch": 0.3018414731785428, + "learning_rate": 1.1875460421854816e-05, + "loss": 0.5414, + "step": 1506 + }, + { + "epoch": 0.3018414731785428, + "learning_rate": 1.188918566625449e-05, + "loss": 0.2507, + "step": 1508 + }, + { + "epoch": 0.3026421136909528, + "learning_rate": 1.1902907221033629e-05, + "loss": 0.2284, + "step": 1510 + }, + { + "epoch": 0.3026421136909528, + "learning_rate": 1.1916625059393739e-05, + "loss": 0.3953, + "step": 1512 + }, + { + "epoch": 0.3034427542033627, + "learning_rate": 1.1930339154543582e-05, + "loss": 0.1421, + "step": 1514 + }, + { + "epoch": 0.3034427542033627, + "learning_rate": 1.1944049479699241e-05, + "loss": 0.1371, + "step": 1516 + }, + { + "epoch": 0.3042433947157726, + "learning_rate": 1.1957756008084127e-05, + "loss": 0.134, + "step": 1518 + }, + { + "epoch": 0.3042433947157726, + "learning_rate": 1.1971458712929133e-05, + "loss": 0.1444, + "step": 1520 + }, + { + "epoch": 0.3050440352281825, + "learning_rate": 1.1985157567472563e-05, + "loss": 0.3414, + "step": 1522 + }, + { + "epoch": 0.3050440352281825, + "learning_rate": 1.1998852544960256e-05, + "loss": 0.1708, + "step": 1524 + }, + { + "epoch": 0.3058446757405925, + "learning_rate": 1.2012543618645622e-05, + "loss": 0.4088, + "step": 1526 + }, + { + "epoch": 0.3058446757405925, + "learning_rate": 1.2026230761789702e-05, + "loss": 0.5856, + "step": 1528 + }, + { + "epoch": 0.3066453162530024, + "learning_rate": 1.2039913947661205e-05, + "loss": 0.1267, + "step": 1530 + }, + { + "epoch": 0.3066453162530024, + "learning_rate": 1.2053593149536557e-05, + "loss": 0.1749, + "step": 1532 + }, + { + "epoch": 0.3074459567654123, + "learning_rate": 1.2067268340700016e-05, + "loss": 0.171, + "step": 1534 + }, + { + "epoch": 0.3074459567654123, + "learning_rate": 1.2080939494443618e-05, + "loss": 0.5205, + "step": 1536 + }, + { + "epoch": 0.3082465972778223, + "learning_rate": 1.2094606584067304e-05, + "loss": 0.2663, + "step": 1538 + }, + { + "epoch": 0.3082465972778223, + "learning_rate": 1.210826958287895e-05, + "loss": 0.5045, + "step": 1540 + }, + { + "epoch": 0.3090472377902322, + "learning_rate": 1.212192846419443e-05, + "loss": 0.2819, + "step": 1542 + }, + { + "epoch": 0.3090472377902322, + "learning_rate": 1.2135583201337646e-05, + "loss": 0.5187, + "step": 1544 + }, + { + "epoch": 0.3098478783026421, + "learning_rate": 1.2149233767640587e-05, + "loss": 0.1424, + "step": 1546 + }, + { + "epoch": 0.3098478783026421, + "learning_rate": 1.2162880136443434e-05, + "loss": 0.0775, + "step": 1548 + }, + { + "epoch": 0.31064851881505207, + "learning_rate": 1.2176522281094514e-05, + "loss": 0.2619, + "step": 1550 + }, + { + "epoch": 0.31064851881505207, + "learning_rate": 1.2190160174950428e-05, + "loss": 0.3382, + "step": 1552 + }, + { + "epoch": 0.311449159327462, + "learning_rate": 1.220379379137607e-05, + "loss": 0.1415, + "step": 1554 + }, + { + "epoch": 0.311449159327462, + "learning_rate": 1.2217423103744692e-05, + "loss": 0.8941, + "step": 1556 + }, + { + "epoch": 0.3122497998398719, + "learning_rate": 1.2231048085437953e-05, + "loss": 0.3341, + "step": 1558 + }, + { + "epoch": 0.3122497998398719, + "learning_rate": 1.2244668709845952e-05, + "loss": 0.1666, + "step": 1560 + }, + { + "epoch": 0.3130504403522818, + "learning_rate": 1.2258284950367347e-05, + "loss": 0.557, + "step": 1562 + }, + { + "epoch": 0.3130504403522818, + "learning_rate": 1.2271896780409309e-05, + "loss": 0.3639, + "step": 1564 + }, + { + "epoch": 0.31385108086469177, + "learning_rate": 1.228550417338764e-05, + "loss": 0.2868, + "step": 1566 + }, + { + "epoch": 0.31385108086469177, + "learning_rate": 1.2299107102726804e-05, + "loss": 0.4484, + "step": 1568 + }, + { + "epoch": 0.3146517213771017, + "learning_rate": 1.2312705541859985e-05, + "loss": 0.4792, + "step": 1570 + }, + { + "epoch": 0.3146517213771017, + "learning_rate": 1.2326299464229143e-05, + "loss": 0.4259, + "step": 1572 + }, + { + "epoch": 0.3154523618895116, + "learning_rate": 1.2339888843285029e-05, + "loss": 0.2203, + "step": 1574 + }, + { + "epoch": 0.3154523618895116, + "learning_rate": 1.2353473652487329e-05, + "loss": 0.2445, + "step": 1576 + }, + { + "epoch": 0.31625300240192156, + "learning_rate": 1.2367053865304597e-05, + "loss": 0.1356, + "step": 1578 + }, + { + "epoch": 0.31625300240192156, + "learning_rate": 1.2380629455214385e-05, + "loss": 0.1783, + "step": 1580 + }, + { + "epoch": 0.31705364291433147, + "learning_rate": 1.2394200395703273e-05, + "loss": 0.4181, + "step": 1582 + }, + { + "epoch": 0.31705364291433147, + "learning_rate": 1.2407766660266916e-05, + "loss": 0.5074, + "step": 1584 + }, + { + "epoch": 0.3178542834267414, + "learning_rate": 1.2421328222410109e-05, + "loss": 0.2392, + "step": 1586 + }, + { + "epoch": 0.3178542834267414, + "learning_rate": 1.2434885055646808e-05, + "loss": 0.2884, + "step": 1588 + }, + { + "epoch": 0.31865492393915135, + "learning_rate": 1.2448437133500262e-05, + "loss": 0.2466, + "step": 1590 + }, + { + "epoch": 0.31865492393915135, + "learning_rate": 1.2461984429502947e-05, + "loss": 0.2529, + "step": 1592 + }, + { + "epoch": 0.31945556445156126, + "learning_rate": 1.2475526917196703e-05, + "loss": 0.204, + "step": 1594 + }, + { + "epoch": 0.31945556445156126, + "learning_rate": 1.2489064570132761e-05, + "loss": 0.1597, + "step": 1596 + }, + { + "epoch": 0.32025620496397117, + "learning_rate": 1.2502597361871787e-05, + "loss": 0.1865, + "step": 1598 + }, + { + "epoch": 0.32025620496397117, + "learning_rate": 1.2516125265983945e-05, + "loss": 0.5956, + "step": 1600 + }, + { + "epoch": 0.3210568454763811, + "learning_rate": 1.2529648256048931e-05, + "loss": 0.4371, + "step": 1602 + }, + { + "epoch": 0.3210568454763811, + "learning_rate": 1.2543166305656089e-05, + "loss": 0.4267, + "step": 1604 + }, + { + "epoch": 0.32185748598879105, + "learning_rate": 1.2556679388404351e-05, + "loss": 0.1836, + "step": 1606 + }, + { + "epoch": 0.32185748598879105, + "learning_rate": 1.257018747790238e-05, + "loss": 0.107, + "step": 1608 + }, + { + "epoch": 0.32265812650120096, + "learning_rate": 1.2583690547768584e-05, + "loss": 0.2521, + "step": 1610 + }, + { + "epoch": 0.32265812650120096, + "learning_rate": 1.259718857163117e-05, + "loss": 0.0604, + "step": 1612 + }, + { + "epoch": 0.32345876701361087, + "learning_rate": 1.261068152312821e-05, + "loss": 0.4678, + "step": 1614 + }, + { + "epoch": 0.32345876701361087, + "learning_rate": 1.2624169375907657e-05, + "loss": 0.3044, + "step": 1616 + }, + { + "epoch": 0.32425940752602084, + "learning_rate": 1.2637652103627481e-05, + "loss": 0.2477, + "step": 1618 + }, + { + "epoch": 0.32425940752602084, + "learning_rate": 1.2651129679955598e-05, + "loss": 0.1585, + "step": 1620 + }, + { + "epoch": 0.32506004803843075, + "learning_rate": 1.2664602078570017e-05, + "loss": 0.1879, + "step": 1622 + }, + { + "epoch": 0.32506004803843075, + "learning_rate": 1.2678069273158849e-05, + "loss": 0.1581, + "step": 1624 + }, + { + "epoch": 0.32586068855084066, + "learning_rate": 1.2691531237420369e-05, + "loss": 0.352, + "step": 1626 + }, + { + "epoch": 0.32586068855084066, + "learning_rate": 1.2704987945063073e-05, + "loss": 0.2161, + "step": 1628 + }, + { + "epoch": 0.3266613290632506, + "learning_rate": 1.27184393698057e-05, + "loss": 0.2447, + "step": 1630 + }, + { + "epoch": 0.3266613290632506, + "learning_rate": 1.273188548537736e-05, + "loss": 0.1586, + "step": 1632 + }, + { + "epoch": 0.32746196957566054, + "learning_rate": 1.2745326265517481e-05, + "loss": 0.2529, + "step": 1634 + }, + { + "epoch": 0.32746196957566054, + "learning_rate": 1.2758761683975929e-05, + "loss": 0.2019, + "step": 1636 + }, + { + "epoch": 0.32826261008807045, + "learning_rate": 1.277219171451304e-05, + "loss": 0.2159, + "step": 1638 + }, + { + "epoch": 0.32826261008807045, + "learning_rate": 1.2785616330899676e-05, + "loss": 0.347, + "step": 1640 + }, + { + "epoch": 0.32906325060048036, + "learning_rate": 1.2799035506917265e-05, + "loss": 0.2066, + "step": 1642 + }, + { + "epoch": 0.32906325060048036, + "learning_rate": 1.2812449216357855e-05, + "loss": 0.0917, + "step": 1644 + }, + { + "epoch": 0.32986389111289033, + "learning_rate": 1.2825857433024208e-05, + "loss": 0.4873, + "step": 1646 + }, + { + "epoch": 0.32986389111289033, + "learning_rate": 1.2839260130729776e-05, + "loss": 0.151, + "step": 1648 + }, + { + "epoch": 0.33066453162530024, + "learning_rate": 1.2852657283298794e-05, + "loss": 0.4022, + "step": 1650 + }, + { + "epoch": 0.33066453162530024, + "learning_rate": 1.2866048864566336e-05, + "loss": 0.0321, + "step": 1652 + }, + { + "epoch": 0.33146517213771015, + "learning_rate": 1.2879434848378356e-05, + "loss": 0.3497, + "step": 1654 + }, + { + "epoch": 0.33146517213771015, + "learning_rate": 1.2892815208591734e-05, + "loss": 0.4356, + "step": 1656 + }, + { + "epoch": 0.3322658126501201, + "learning_rate": 1.2906189919074336e-05, + "loss": 0.2773, + "step": 1658 + }, + { + "epoch": 0.3322658126501201, + "learning_rate": 1.2919558953705047e-05, + "loss": 0.7321, + "step": 1660 + }, + { + "epoch": 0.33306645316253003, + "learning_rate": 1.293292228637389e-05, + "loss": 0.5336, + "step": 1662 + }, + { + "epoch": 0.33306645316253003, + "learning_rate": 1.2946279890981966e-05, + "loss": 0.1866, + "step": 1664 + }, + { + "epoch": 0.33386709367493994, + "learning_rate": 1.2959631741441583e-05, + "loss": 0.4319, + "step": 1666 + }, + { + "epoch": 0.33386709367493994, + "learning_rate": 1.2972977811676289e-05, + "loss": 0.0908, + "step": 1668 + }, + { + "epoch": 0.33466773418734985, + "learning_rate": 1.298631807562092e-05, + "loss": 0.1757, + "step": 1670 + }, + { + "epoch": 0.33466773418734985, + "learning_rate": 1.2999652507221652e-05, + "loss": 0.1134, + "step": 1672 + }, + { + "epoch": 0.3354683746997598, + "learning_rate": 1.3012981080436036e-05, + "loss": 0.9963, + "step": 1674 + }, + { + "epoch": 0.3354683746997598, + "learning_rate": 1.3026303769233109e-05, + "loss": 0.1905, + "step": 1676 + }, + { + "epoch": 0.33626901521216973, + "learning_rate": 1.3039620547593357e-05, + "loss": 0.1582, + "step": 1678 + }, + { + "epoch": 0.33626901521216973, + "learning_rate": 1.3052931389508822e-05, + "loss": 0.3927, + "step": 1680 + }, + { + "epoch": 0.33706965572457964, + "learning_rate": 1.3066236268983143e-05, + "loss": 0.2775, + "step": 1682 + }, + { + "epoch": 0.33706965572457964, + "learning_rate": 1.3079535160031601e-05, + "loss": 0.3273, + "step": 1684 + }, + { + "epoch": 0.3378702962369896, + "learning_rate": 1.3092828036681178e-05, + "loss": 0.3939, + "step": 1686 + }, + { + "epoch": 0.3378702962369896, + "learning_rate": 1.3106114872970575e-05, + "loss": 0.6645, + "step": 1688 + }, + { + "epoch": 0.3386709367493995, + "learning_rate": 1.3119395642950348e-05, + "loss": 0.5927, + "step": 1690 + }, + { + "epoch": 0.3386709367493995, + "learning_rate": 1.313267032068285e-05, + "loss": 0.1866, + "step": 1692 + }, + { + "epoch": 0.33947157726180943, + "learning_rate": 1.3145938880242346e-05, + "loss": 0.5036, + "step": 1694 + }, + { + "epoch": 0.33947157726180943, + "learning_rate": 1.3159201295715054e-05, + "loss": 0.2308, + "step": 1696 + }, + { + "epoch": 0.3402722177742194, + "learning_rate": 1.3172457541199188e-05, + "loss": 0.1594, + "step": 1698 + }, + { + "epoch": 0.3402722177742194, + "learning_rate": 1.3185707590804997e-05, + "loss": 0.1036, + "step": 1700 + }, + { + "epoch": 0.3410728582866293, + "learning_rate": 1.3198951418654882e-05, + "loss": 0.1588, + "step": 1702 + }, + { + "epoch": 0.3410728582866293, + "learning_rate": 1.321218899888334e-05, + "loss": 0.3016, + "step": 1704 + }, + { + "epoch": 0.3418734987990392, + "learning_rate": 1.322542030563709e-05, + "loss": 0.1278, + "step": 1706 + }, + { + "epoch": 0.3418734987990392, + "learning_rate": 1.3238645313075109e-05, + "loss": 0.198, + "step": 1708 + }, + { + "epoch": 0.34267413931144913, + "learning_rate": 1.3251863995368665e-05, + "loss": 0.3702, + "step": 1710 + }, + { + "epoch": 0.34267413931144913, + "learning_rate": 1.326507632670139e-05, + "loss": 0.3422, + "step": 1712 + }, + { + "epoch": 0.3434747798238591, + "learning_rate": 1.3278282281269293e-05, + "loss": 0.1581, + "step": 1714 + }, + { + "epoch": 0.3434747798238591, + "learning_rate": 1.3291481833280894e-05, + "loss": 0.1753, + "step": 1716 + }, + { + "epoch": 0.344275420336269, + "learning_rate": 1.3304674956957167e-05, + "loss": 0.1498, + "step": 1718 + }, + { + "epoch": 0.344275420336269, + "learning_rate": 1.3317861626531652e-05, + "loss": 0.9579, + "step": 1720 + }, + { + "epoch": 0.3450760608486789, + "learning_rate": 1.3331041816250503e-05, + "loss": 0.3371, + "step": 1722 + }, + { + "epoch": 0.3450760608486789, + "learning_rate": 1.3344215500372517e-05, + "loss": 0.29, + "step": 1724 + }, + { + "epoch": 0.3458767013610889, + "learning_rate": 1.335738265316921e-05, + "loss": 0.1687, + "step": 1726 + }, + { + "epoch": 0.3458767013610889, + "learning_rate": 1.3370543248924826e-05, + "loss": 0.0662, + "step": 1728 + }, + { + "epoch": 0.3466773418734988, + "learning_rate": 1.3383697261936472e-05, + "loss": 0.1648, + "step": 1730 + }, + { + "epoch": 0.3466773418734988, + "learning_rate": 1.3396844666514062e-05, + "loss": 0.1584, + "step": 1732 + }, + { + "epoch": 0.3474779823859087, + "learning_rate": 1.3409985436980422e-05, + "loss": 0.6786, + "step": 1734 + }, + { + "epoch": 0.3474779823859087, + "learning_rate": 1.3423119547671348e-05, + "loss": 0.1425, + "step": 1736 + }, + { + "epoch": 0.3482786228983187, + "learning_rate": 1.3436246972935638e-05, + "loss": 0.1852, + "step": 1738 + }, + { + "epoch": 0.3482786228983187, + "learning_rate": 1.344936768713513e-05, + "loss": 0.1847, + "step": 1740 + }, + { + "epoch": 0.3490792634107286, + "learning_rate": 1.346248166464481e-05, + "loss": 0.3442, + "step": 1742 + }, + { + "epoch": 0.3490792634107286, + "learning_rate": 1.347558887985279e-05, + "loss": 0.2026, + "step": 1744 + }, + { + "epoch": 0.3498799039231385, + "learning_rate": 1.348868930716039e-05, + "loss": 0.1953, + "step": 1746 + }, + { + "epoch": 0.3498799039231385, + "learning_rate": 1.3501782920982189e-05, + "loss": 0.2775, + "step": 1748 + }, + { + "epoch": 0.3506805444355484, + "learning_rate": 1.3514869695746078e-05, + "loss": 0.7842, + "step": 1750 + }, + { + "epoch": 0.3506805444355484, + "learning_rate": 1.3527949605893305e-05, + "loss": 0.1103, + "step": 1752 + }, + { + "epoch": 0.3514811849479584, + "learning_rate": 1.3541022625878501e-05, + "loss": 0.4597, + "step": 1754 + }, + { + "epoch": 0.3514811849479584, + "learning_rate": 1.3554088730169812e-05, + "loss": 0.155, + "step": 1756 + }, + { + "epoch": 0.3522818254603683, + "learning_rate": 1.3567147893248833e-05, + "loss": 0.3697, + "step": 1758 + }, + { + "epoch": 0.3522818254603683, + "learning_rate": 1.3580200089610739e-05, + "loss": 0.3635, + "step": 1760 + }, + { + "epoch": 0.3530824659727782, + "learning_rate": 1.3593245293764303e-05, + "loss": 0.2324, + "step": 1762 + }, + { + "epoch": 0.3530824659727782, + "learning_rate": 1.3606283480231962e-05, + "loss": 0.1206, + "step": 1764 + }, + { + "epoch": 0.35388310648518817, + "learning_rate": 1.361931462354984e-05, + "loss": 0.4571, + "step": 1766 + }, + { + "epoch": 0.35388310648518817, + "learning_rate": 1.3632338698267863e-05, + "loss": 0.2093, + "step": 1768 + }, + { + "epoch": 0.3546837469975981, + "learning_rate": 1.3645355678949715e-05, + "loss": 0.2104, + "step": 1770 + }, + { + "epoch": 0.3546837469975981, + "learning_rate": 1.3658365540172948e-05, + "loss": 0.1509, + "step": 1772 + }, + { + "epoch": 0.355484387510008, + "learning_rate": 1.3671368256529026e-05, + "loss": 0.3641, + "step": 1774 + }, + { + "epoch": 0.355484387510008, + "learning_rate": 1.368436380262336e-05, + "loss": 0.4026, + "step": 1776 + }, + { + "epoch": 0.35628502802241796, + "learning_rate": 1.3697352153075365e-05, + "loss": 0.2054, + "step": 1778 + }, + { + "epoch": 0.35628502802241796, + "learning_rate": 1.3710333282518497e-05, + "loss": 0.3873, + "step": 1780 + }, + { + "epoch": 0.35708566853482787, + "learning_rate": 1.3723307165600361e-05, + "loss": 0.2098, + "step": 1782 + }, + { + "epoch": 0.35708566853482787, + "learning_rate": 1.3736273776982667e-05, + "loss": 0.5413, + "step": 1784 + }, + { + "epoch": 0.3578863090472378, + "learning_rate": 1.3749233091341344e-05, + "loss": 0.6091, + "step": 1786 + }, + { + "epoch": 0.3578863090472378, + "learning_rate": 1.3762185083366562e-05, + "loss": 0.363, + "step": 1788 + }, + { + "epoch": 0.3586869495596477, + "learning_rate": 1.3775129727762808e-05, + "loss": 0.1509, + "step": 1790 + }, + { + "epoch": 0.3586869495596477, + "learning_rate": 1.3788066999248893e-05, + "loss": 0.5257, + "step": 1792 + }, + { + "epoch": 0.35948759007205766, + "learning_rate": 1.3800996872558075e-05, + "loss": 0.1339, + "step": 1794 + }, + { + "epoch": 0.35948759007205766, + "learning_rate": 1.3813919322438018e-05, + "loss": 0.2294, + "step": 1796 + }, + { + "epoch": 0.3602882305844676, + "learning_rate": 1.3826834323650899e-05, + "loss": 0.1752, + "step": 1798 + }, + { + "epoch": 0.3602882305844676, + "learning_rate": 1.3839741850973435e-05, + "loss": 0.2616, + "step": 1800 + }, + { + "epoch": 0.3610888710968775, + "learning_rate": 1.3852641879196952e-05, + "loss": 0.223, + "step": 1802 + }, + { + "epoch": 0.3610888710968775, + "learning_rate": 1.3865534383127413e-05, + "loss": 0.2532, + "step": 1804 + }, + { + "epoch": 0.36188951160928745, + "learning_rate": 1.387841933758546e-05, + "loss": 0.6906, + "step": 1806 + }, + { + "epoch": 0.36188951160928745, + "learning_rate": 1.3891296717406533e-05, + "loss": 0.1677, + "step": 1808 + }, + { + "epoch": 0.36269015212169736, + "learning_rate": 1.3904166497440812e-05, + "loss": 0.382, + "step": 1810 + }, + { + "epoch": 0.36269015212169736, + "learning_rate": 1.391702865255334e-05, + "loss": 0.0419, + "step": 1812 + }, + { + "epoch": 0.3634907926341073, + "learning_rate": 1.3929883157624046e-05, + "loss": 0.2186, + "step": 1814 + }, + { + "epoch": 0.3634907926341073, + "learning_rate": 1.3942729987547808e-05, + "loss": 0.1339, + "step": 1816 + }, + { + "epoch": 0.36429143314651724, + "learning_rate": 1.3955569117234468e-05, + "loss": 0.2884, + "step": 1818 + }, + { + "epoch": 0.36429143314651724, + "learning_rate": 1.3968400521608962e-05, + "loss": 0.4636, + "step": 1820 + }, + { + "epoch": 0.36509207365892715, + "learning_rate": 1.3981224175611265e-05, + "loss": 0.1977, + "step": 1822 + }, + { + "epoch": 0.36509207365892715, + "learning_rate": 1.3994040054196498e-05, + "loss": 0.1427, + "step": 1824 + }, + { + "epoch": 0.36589271417133706, + "learning_rate": 1.4006848132334979e-05, + "loss": 0.2076, + "step": 1826 + }, + { + "epoch": 0.36589271417133706, + "learning_rate": 1.4019648385012245e-05, + "loss": 0.2774, + "step": 1828 + }, + { + "epoch": 0.366693354683747, + "learning_rate": 1.4032440787229135e-05, + "loss": 0.2814, + "step": 1830 + }, + { + "epoch": 0.366693354683747, + "learning_rate": 1.4045225314001789e-05, + "loss": 0.0472, + "step": 1832 + }, + { + "epoch": 0.36749399519615694, + "learning_rate": 1.4058001940361781e-05, + "loss": 0.1134, + "step": 1834 + }, + { + "epoch": 0.36749399519615694, + "learning_rate": 1.4070770641356069e-05, + "loss": 0.1165, + "step": 1836 + }, + { + "epoch": 0.36829463570856685, + "learning_rate": 1.40835313920471e-05, + "loss": 0.5222, + "step": 1838 + }, + { + "epoch": 0.36829463570856685, + "learning_rate": 1.4096284167512856e-05, + "loss": 0.093, + "step": 1840 + }, + { + "epoch": 0.36909527622097676, + "learning_rate": 1.4109028942846888e-05, + "loss": 0.3191, + "step": 1842 + }, + { + "epoch": 0.36909527622097676, + "learning_rate": 1.4121765693158355e-05, + "loss": 0.0727, + "step": 1844 + }, + { + "epoch": 0.36989591673338673, + "learning_rate": 1.4134494393572146e-05, + "loss": 0.4688, + "step": 1846 + }, + { + "epoch": 0.36989591673338673, + "learning_rate": 1.4147215019228813e-05, + "loss": 0.473, + "step": 1848 + }, + { + "epoch": 0.37069655724579664, + "learning_rate": 1.4159927545284697e-05, + "loss": 0.3694, + "step": 1850 + }, + { + "epoch": 0.37069655724579664, + "learning_rate": 1.4172631946911964e-05, + "loss": 0.3039, + "step": 1852 + }, + { + "epoch": 0.37149719775820655, + "learning_rate": 1.4185328199298636e-05, + "loss": 0.5704, + "step": 1854 + }, + { + "epoch": 0.37149719775820655, + "learning_rate": 1.4198016277648665e-05, + "loss": 0.2662, + "step": 1856 + }, + { + "epoch": 0.37229783827061647, + "learning_rate": 1.4210696157181936e-05, + "loss": 0.1665, + "step": 1858 + }, + { + "epoch": 0.37229783827061647, + "learning_rate": 1.4223367813134406e-05, + "loss": 0.3147, + "step": 1860 + }, + { + "epoch": 0.37309847878302643, + "learning_rate": 1.4236031220758037e-05, + "loss": 0.3042, + "step": 1862 + }, + { + "epoch": 0.37309847878302643, + "learning_rate": 1.4248686355320922e-05, + "loss": 0.508, + "step": 1864 + }, + { + "epoch": 0.37389911929543634, + "learning_rate": 1.426133319210731e-05, + "loss": 0.382, + "step": 1866 + }, + { + "epoch": 0.37389911929543634, + "learning_rate": 1.4273971706417653e-05, + "loss": 0.2046, + "step": 1868 + }, + { + "epoch": 0.37469975980784626, + "learning_rate": 1.4286601873568642e-05, + "loss": 0.2612, + "step": 1870 + }, + { + "epoch": 0.37469975980784626, + "learning_rate": 1.429922366889332e-05, + "loss": 0.2237, + "step": 1872 + }, + { + "epoch": 0.3755004003202562, + "learning_rate": 1.431183706774103e-05, + "loss": 0.1515, + "step": 1874 + }, + { + "epoch": 0.3755004003202562, + "learning_rate": 1.4324442045477534e-05, + "loss": 0.1314, + "step": 1876 + }, + { + "epoch": 0.37630104083266613, + "learning_rate": 1.4337038577485035e-05, + "loss": 0.3572, + "step": 1878 + }, + { + "epoch": 0.37630104083266613, + "learning_rate": 1.4349626639162231e-05, + "loss": 0.2457, + "step": 1880 + }, + { + "epoch": 0.37710168134507605, + "learning_rate": 1.436220620592437e-05, + "loss": 0.2285, + "step": 1882 + }, + { + "epoch": 0.37710168134507605, + "learning_rate": 1.4374777253203265e-05, + "loss": 0.1202, + "step": 1884 + }, + { + "epoch": 0.377902321857486, + "learning_rate": 1.4387339756447422e-05, + "loss": 0.1896, + "step": 1886 + }, + { + "epoch": 0.377902321857486, + "learning_rate": 1.4399893691121985e-05, + "loss": 0.2798, + "step": 1888 + }, + { + "epoch": 0.3787029623698959, + "learning_rate": 1.4412439032708848e-05, + "loss": 0.4395, + "step": 1890 + }, + { + "epoch": 0.3787029623698959, + "learning_rate": 1.4424975756706684e-05, + "loss": 0.7018, + "step": 1892 + }, + { + "epoch": 0.37950360288230583, + "learning_rate": 1.4437503838631002e-05, + "loss": 0.1509, + "step": 1894 + }, + { + "epoch": 0.37950360288230583, + "learning_rate": 1.4450023254014185e-05, + "loss": 0.2884, + "step": 1896 + }, + { + "epoch": 0.38030424339471575, + "learning_rate": 1.4462533978405529e-05, + "loss": 0.1681, + "step": 1898 + }, + { + "epoch": 0.38030424339471575, + "learning_rate": 1.4475035987371348e-05, + "loss": 0.6103, + "step": 1900 + }, + { + "epoch": 0.3811048839071257, + "learning_rate": 1.4487529256494937e-05, + "loss": 0.4381, + "step": 1902 + }, + { + "epoch": 0.3811048839071257, + "learning_rate": 1.4500013761376663e-05, + "loss": 0.0341, + "step": 1904 + }, + { + "epoch": 0.3819055244195356, + "learning_rate": 1.4512489477634024e-05, + "loss": 0.2503, + "step": 1906 + }, + { + "epoch": 0.3819055244195356, + "learning_rate": 1.4524956380901674e-05, + "loss": 0.5279, + "step": 1908 + }, + { + "epoch": 0.38270616493194554, + "learning_rate": 1.4537414446831461e-05, + "loss": 0.3645, + "step": 1910 + }, + { + "epoch": 0.38270616493194554, + "learning_rate": 1.454986365109255e-05, + "loss": 0.2444, + "step": 1912 + }, + { + "epoch": 0.3835068054443555, + "learning_rate": 1.4562303969371357e-05, + "loss": 0.6764, + "step": 1914 + }, + { + "epoch": 0.3835068054443555, + "learning_rate": 1.4574735377371669e-05, + "loss": 0.1493, + "step": 1916 + }, + { + "epoch": 0.3843074459567654, + "learning_rate": 1.4587157850814679e-05, + "loss": 0.4251, + "step": 1918 + }, + { + "epoch": 0.3843074459567654, + "learning_rate": 1.4599571365439027e-05, + "loss": 0.5829, + "step": 1920 + }, + { + "epoch": 0.3851080864691753, + "learning_rate": 1.4611975897000849e-05, + "loss": 0.1838, + "step": 1922 + }, + { + "epoch": 0.3851080864691753, + "learning_rate": 1.4624371421273812e-05, + "loss": 0.4492, + "step": 1924 + }, + { + "epoch": 0.3859087269815853, + "learning_rate": 1.463675791404922e-05, + "loss": 0.1978, + "step": 1926 + }, + { + "epoch": 0.3859087269815853, + "learning_rate": 1.4649135351135968e-05, + "loss": 0.2612, + "step": 1928 + }, + { + "epoch": 0.3867093674939952, + "learning_rate": 1.4661503708360652e-05, + "loss": 0.2757, + "step": 1930 + }, + { + "epoch": 0.3867093674939952, + "learning_rate": 1.4673862961567604e-05, + "loss": 0.1295, + "step": 1932 + }, + { + "epoch": 0.3875100080064051, + "learning_rate": 1.4686213086618932e-05, + "loss": 0.1662, + "step": 1934 + }, + { + "epoch": 0.3875100080064051, + "learning_rate": 1.4698554059394563e-05, + "loss": 0.5028, + "step": 1936 + }, + { + "epoch": 0.388310648518815, + "learning_rate": 1.4710885855792338e-05, + "loss": 0.3159, + "step": 1938 + }, + { + "epoch": 0.388310648518815, + "learning_rate": 1.4723208451727977e-05, + "loss": 0.2233, + "step": 1940 + }, + { + "epoch": 0.389111289031225, + "learning_rate": 1.4735521823135184e-05, + "loss": 0.7031, + "step": 1942 + }, + { + "epoch": 0.389111289031225, + "learning_rate": 1.4747825945965675e-05, + "loss": 0.5282, + "step": 1944 + }, + { + "epoch": 0.3899119295436349, + "learning_rate": 1.4760120796189233e-05, + "loss": 0.2355, + "step": 1946 + }, + { + "epoch": 0.3899119295436349, + "learning_rate": 1.4772406349793749e-05, + "loss": 0.2714, + "step": 1948 + }, + { + "epoch": 0.3907125700560448, + "learning_rate": 1.4784682582785254e-05, + "loss": 0.098, + "step": 1950 + }, + { + "epoch": 0.3907125700560448, + "learning_rate": 1.4796949471188033e-05, + "loss": 0.0441, + "step": 1952 + }, + { + "epoch": 0.3915132105684548, + "learning_rate": 1.4809206991044571e-05, + "loss": 0.4596, + "step": 1954 + }, + { + "epoch": 0.3915132105684548, + "learning_rate": 1.4821455118415666e-05, + "loss": 0.48, + "step": 1956 + }, + { + "epoch": 0.3923138510808647, + "learning_rate": 1.4833693829380458e-05, + "loss": 0.602, + "step": 1958 + }, + { + "epoch": 0.3923138510808647, + "learning_rate": 1.4845923100036479e-05, + "loss": 0.2665, + "step": 1960 + }, + { + "epoch": 0.3931144915932746, + "learning_rate": 1.4858142906499686e-05, + "loss": 0.2872, + "step": 1962 + }, + { + "epoch": 0.3931144915932746, + "learning_rate": 1.4870353224904563e-05, + "loss": 0.2957, + "step": 1964 + }, + { + "epoch": 0.3939151321056846, + "learning_rate": 1.4882554031404075e-05, + "loss": 0.3213, + "step": 1966 + }, + { + "epoch": 0.3939151321056846, + "learning_rate": 1.4894745302169786e-05, + "loss": 0.4493, + "step": 1968 + }, + { + "epoch": 0.3947157726180945, + "learning_rate": 1.4906927013391879e-05, + "loss": 0.1186, + "step": 1970 + }, + { + "epoch": 0.3947157726180945, + "learning_rate": 1.4919099141279205e-05, + "loss": 0.0231, + "step": 1972 + }, + { + "epoch": 0.3955164131305044, + "learning_rate": 1.4931261662059338e-05, + "loss": 0.5704, + "step": 1974 + }, + { + "epoch": 0.3955164131305044, + "learning_rate": 1.4943414551978597e-05, + "loss": 0.1527, + "step": 1976 + }, + { + "epoch": 0.3963170536429143, + "learning_rate": 1.4955557787302151e-05, + "loss": 0.2607, + "step": 1978 + }, + { + "epoch": 0.3963170536429143, + "learning_rate": 1.4967691344313988e-05, + "loss": 0.4968, + "step": 1980 + }, + { + "epoch": 0.3971176941553243, + "learning_rate": 1.4979815199317005e-05, + "loss": 0.3033, + "step": 1982 + }, + { + "epoch": 0.3971176941553243, + "learning_rate": 1.499192932863305e-05, + "loss": 0.3226, + "step": 1984 + }, + { + "epoch": 0.3979183346677342, + "learning_rate": 1.5004033708602967e-05, + "loss": 0.2159, + "step": 1986 + }, + { + "epoch": 0.3979183346677342, + "learning_rate": 1.5016128315586626e-05, + "loss": 0.3821, + "step": 1988 + }, + { + "epoch": 0.3987189751801441, + "learning_rate": 1.5028213125963029e-05, + "loss": 0.3776, + "step": 1990 + }, + { + "epoch": 0.3987189751801441, + "learning_rate": 1.5040288116130261e-05, + "loss": 0.3598, + "step": 1992 + }, + { + "epoch": 0.39951961569255406, + "learning_rate": 1.5052353262505603e-05, + "loss": 0.1422, + "step": 1994 + }, + { + "epoch": 0.39951961569255406, + "learning_rate": 1.5064408541525568e-05, + "loss": 0.2193, + "step": 1996 + }, + { + "epoch": 0.400320256204964, + "learning_rate": 1.5076453929645933e-05, + "loss": 0.2446, + "step": 1998 + }, + { + "epoch": 0.400320256204964, + "learning_rate": 1.5088489403341793e-05, + "loss": 0.091, + "step": 2000 + }, + { + "epoch": 0.4011208967173739, + "learning_rate": 1.510051493910759e-05, + "loss": 0.1945, + "step": 2002 + }, + { + "epoch": 0.4011208967173739, + "learning_rate": 1.5112530513457229e-05, + "loss": 0.4264, + "step": 2004 + }, + { + "epoch": 0.40192153722978385, + "learning_rate": 1.512453610292401e-05, + "loss": 0.3213, + "step": 2006 + }, + { + "epoch": 0.40192153722978385, + "learning_rate": 1.513653168406076e-05, + "loss": 0.4588, + "step": 2008 + }, + { + "epoch": 0.40272217774219377, + "learning_rate": 1.514851723343985e-05, + "loss": 0.2394, + "step": 2010 + }, + { + "epoch": 0.40272217774219377, + "learning_rate": 1.5160492727653245e-05, + "loss": 0.2162, + "step": 2012 + }, + { + "epoch": 0.4035228182546037, + "learning_rate": 1.5172458143312522e-05, + "loss": 0.2277, + "step": 2014 + }, + { + "epoch": 0.4035228182546037, + "learning_rate": 1.5184413457049006e-05, + "loss": 0.1239, + "step": 2016 + }, + { + "epoch": 0.4043234587670136, + "learning_rate": 1.5196358645513685e-05, + "loss": 0.3698, + "step": 2018 + }, + { + "epoch": 0.4043234587670136, + "learning_rate": 1.5208293685377354e-05, + "loss": 0.6308, + "step": 2020 + }, + { + "epoch": 0.40512409927942356, + "learning_rate": 1.5220218553330618e-05, + "loss": 0.1513, + "step": 2022 + }, + { + "epoch": 0.40512409927942356, + "learning_rate": 1.5232133226083954e-05, + "loss": 0.1977, + "step": 2024 + }, + { + "epoch": 0.40592473979183347, + "learning_rate": 1.5244037680367744e-05, + "loss": 0.1753, + "step": 2026 + }, + { + "epoch": 0.40592473979183347, + "learning_rate": 1.5255931892932322e-05, + "loss": 0.154, + "step": 2028 + }, + { + "epoch": 0.4067253803042434, + "learning_rate": 1.5267815840548057e-05, + "loss": 0.352, + "step": 2030 + }, + { + "epoch": 0.4067253803042434, + "learning_rate": 1.527968950000533e-05, + "loss": 0.1966, + "step": 2032 + }, + { + "epoch": 0.40752602081665334, + "learning_rate": 1.529155284811463e-05, + "loss": 0.2521, + "step": 2034 + }, + { + "epoch": 0.40752602081665334, + "learning_rate": 1.5303405861706574e-05, + "loss": 0.3046, + "step": 2036 + }, + { + "epoch": 0.40832666132906326, + "learning_rate": 1.5315248517631975e-05, + "loss": 0.6125, + "step": 2038 + }, + { + "epoch": 0.40832666132906326, + "learning_rate": 1.532708079276185e-05, + "loss": 0.0514, + "step": 2040 + }, + { + "epoch": 0.40912730184147317, + "learning_rate": 1.5338902663987544e-05, + "loss": 0.1347, + "step": 2042 + }, + { + "epoch": 0.40912730184147317, + "learning_rate": 1.5350714108220667e-05, + "loss": 0.0644, + "step": 2044 + }, + { + "epoch": 0.4099279423538831, + "learning_rate": 1.5362515102393217e-05, + "loss": 0.7223, + "step": 2046 + }, + { + "epoch": 0.4099279423538831, + "learning_rate": 1.5374305623457594e-05, + "loss": 0.0498, + "step": 2048 + }, + { + "epoch": 0.41072858286629305, + "learning_rate": 1.5386085648386656e-05, + "loss": 0.151, + "step": 2050 + }, + { + "epoch": 0.41072858286629305, + "learning_rate": 1.539785515417376e-05, + "loss": 0.2163, + "step": 2052 + }, + { + "epoch": 0.41152922337870296, + "learning_rate": 1.540961411783279e-05, + "loss": 0.3696, + "step": 2054 + }, + { + "epoch": 0.41152922337870296, + "learning_rate": 1.542136251639826e-05, + "loss": 0.4409, + "step": 2056 + }, + { + "epoch": 0.41232986389111287, + "learning_rate": 1.5433100326925288e-05, + "loss": 0.4636, + "step": 2058 + }, + { + "epoch": 0.41232986389111287, + "learning_rate": 1.5444827526489668e-05, + "loss": 0.4201, + "step": 2060 + }, + { + "epoch": 0.41313050440352284, + "learning_rate": 1.545654409218793e-05, + "loss": 0.1682, + "step": 2062 + }, + { + "epoch": 0.41313050440352284, + "learning_rate": 1.5468250001137368e-05, + "loss": 0.133, + "step": 2064 + }, + { + "epoch": 0.41393114491593275, + "learning_rate": 1.5479945230476066e-05, + "loss": 0.1679, + "step": 2066 + }, + { + "epoch": 0.41393114491593275, + "learning_rate": 1.5491629757363026e-05, + "loss": 0.0951, + "step": 2068 + }, + { + "epoch": 0.41473178542834266, + "learning_rate": 1.550330355897809e-05, + "loss": 0.1947, + "step": 2070 + }, + { + "epoch": 0.41473178542834266, + "learning_rate": 1.551496661252208e-05, + "loss": 0.3252, + "step": 2072 + }, + { + "epoch": 0.4155324259407526, + "learning_rate": 1.5526618895216793e-05, + "loss": 0.3031, + "step": 2074 + }, + { + "epoch": 0.4155324259407526, + "learning_rate": 1.5538260384305073e-05, + "loss": 0.6415, + "step": 2076 + }, + { + "epoch": 0.41633306645316254, + "learning_rate": 1.5549891057050837e-05, + "loss": 0.2067, + "step": 2078 + }, + { + "epoch": 0.41633306645316254, + "learning_rate": 1.5561510890739113e-05, + "loss": 0.0631, + "step": 2080 + }, + { + "epoch": 0.41713370696557245, + "learning_rate": 1.557311986267615e-05, + "loss": 0.5288, + "step": 2082 + }, + { + "epoch": 0.41713370696557245, + "learning_rate": 1.5584717950189353e-05, + "loss": 0.5654, + "step": 2084 + }, + { + "epoch": 0.41793434747798236, + "learning_rate": 1.5596305130627404e-05, + "loss": 0.3229, + "step": 2086 + }, + { + "epoch": 0.41793434747798236, + "learning_rate": 1.5607881381360296e-05, + "loss": 0.2014, + "step": 2088 + }, + { + "epoch": 0.4187349879903923, + "learning_rate": 1.5619446679779357e-05, + "loss": 0.2087, + "step": 2090 + }, + { + "epoch": 0.4187349879903923, + "learning_rate": 1.563100100329731e-05, + "loss": 0.4383, + "step": 2092 + }, + { + "epoch": 0.41953562850280224, + "learning_rate": 1.564254432934829e-05, + "loss": 0.8945, + "step": 2094 + }, + { + "epoch": 0.41953562850280224, + "learning_rate": 1.565407663538797e-05, + "loss": 0.0433, + "step": 2096 + }, + { + "epoch": 0.42033626901521215, + "learning_rate": 1.5665597898893484e-05, + "loss": 0.4326, + "step": 2098 + }, + { + "epoch": 0.42033626901521215, + "learning_rate": 1.567710809736356e-05, + "loss": 0.2864, + "step": 2100 + }, + { + "epoch": 0.4211369095276221, + "learning_rate": 1.568860720831853e-05, + "loss": 0.3651, + "step": 2102 + }, + { + "epoch": 0.4211369095276221, + "learning_rate": 1.5700095209300376e-05, + "loss": 0.1667, + "step": 2104 + }, + { + "epoch": 0.42193755004003203, + "learning_rate": 1.5711572077872774e-05, + "loss": 0.3225, + "step": 2106 + }, + { + "epoch": 0.42193755004003203, + "learning_rate": 1.572303779162118e-05, + "loss": 0.2241, + "step": 2108 + }, + { + "epoch": 0.42273819055244194, + "learning_rate": 1.573449232815279e-05, + "loss": 0.3348, + "step": 2110 + }, + { + "epoch": 0.42273819055244194, + "learning_rate": 1.5745935665096647e-05, + "loss": 0.1496, + "step": 2112 + }, + { + "epoch": 0.4235388310648519, + "learning_rate": 1.5757367780103666e-05, + "loss": 0.2714, + "step": 2114 + }, + { + "epoch": 0.4235388310648519, + "learning_rate": 1.5768788650846677e-05, + "loss": 0.4636, + "step": 2116 + }, + { + "epoch": 0.4243394715772618, + "learning_rate": 1.5780198255020478e-05, + "loss": 0.397, + "step": 2118 + }, + { + "epoch": 0.4243394715772618, + "learning_rate": 1.5791596570341844e-05, + "loss": 0.1761, + "step": 2120 + }, + { + "epoch": 0.42514011208967173, + "learning_rate": 1.580298357454965e-05, + "loss": 0.2564, + "step": 2122 + }, + { + "epoch": 0.42514011208967173, + "learning_rate": 1.581435924540481e-05, + "loss": 0.4445, + "step": 2124 + }, + { + "epoch": 0.42594075260208164, + "learning_rate": 1.5825723560690403e-05, + "loss": 0.238, + "step": 2126 + }, + { + "epoch": 0.42594075260208164, + "learning_rate": 1.5837076498211666e-05, + "loss": 0.3033, + "step": 2128 + }, + { + "epoch": 0.4267413931144916, + "learning_rate": 1.5848418035796068e-05, + "loss": 0.2188, + "step": 2130 + }, + { + "epoch": 0.4267413931144916, + "learning_rate": 1.5859748151293333e-05, + "loss": 0.3723, + "step": 2132 + }, + { + "epoch": 0.4275420336269015, + "learning_rate": 1.587106682257552e-05, + "loss": 0.2064, + "step": 2134 + }, + { + "epoch": 0.4275420336269015, + "learning_rate": 1.5882374027537005e-05, + "loss": 0.226, + "step": 2136 + }, + { + "epoch": 0.42834267413931143, + "learning_rate": 1.5893669744094577e-05, + "loss": 0.1439, + "step": 2138 + }, + { + "epoch": 0.42834267413931143, + "learning_rate": 1.5904953950187455e-05, + "loss": 0.0465, + "step": 2140 + }, + { + "epoch": 0.4291433146517214, + "learning_rate": 1.591622662377734e-05, + "loss": 0.3633, + "step": 2142 + }, + { + "epoch": 0.4291433146517214, + "learning_rate": 1.5927487742848448e-05, + "loss": 0.1977, + "step": 2144 + }, + { + "epoch": 0.4299439551641313, + "learning_rate": 1.5938737285407567e-05, + "loss": 0.3223, + "step": 2146 + }, + { + "epoch": 0.4299439551641313, + "learning_rate": 1.594997522948412e-05, + "loss": 0.8772, + "step": 2148 + }, + { + "epoch": 0.4307445956765412, + "learning_rate": 1.5961201553130148e-05, + "loss": 0.1245, + "step": 2150 + }, + { + "epoch": 0.4307445956765412, + "learning_rate": 1.5972416234420393e-05, + "loss": 0.0522, + "step": 2152 + }, + { + "epoch": 0.4315452361889512, + "learning_rate": 1.598361925145234e-05, + "loss": 0.1581, + "step": 2154 + }, + { + "epoch": 0.4315452361889512, + "learning_rate": 1.599481058234626e-05, + "loss": 0.2856, + "step": 2156 + }, + { + "epoch": 0.4323458767013611, + "learning_rate": 1.6005990205245216e-05, + "loss": 0.415, + "step": 2158 + }, + { + "epoch": 0.4323458767013611, + "learning_rate": 1.60171580983152e-05, + "loss": 0.5578, + "step": 2160 + }, + { + "epoch": 0.433146517213771, + "learning_rate": 1.602831423974506e-05, + "loss": 0.1504, + "step": 2162 + }, + { + "epoch": 0.433146517213771, + "learning_rate": 1.6039458607746607e-05, + "loss": 0.4326, + "step": 2164 + }, + { + "epoch": 0.4339471577261809, + "learning_rate": 1.6050591180554648e-05, + "loss": 0.1494, + "step": 2166 + }, + { + "epoch": 0.4339471577261809, + "learning_rate": 1.606171193642703e-05, + "loss": 0.1186, + "step": 2168 + }, + { + "epoch": 0.4347477982385909, + "learning_rate": 1.6072820853644677e-05, + "loss": 0.1952, + "step": 2170 + }, + { + "epoch": 0.4347477982385909, + "learning_rate": 1.6083917910511616e-05, + "loss": 0.5925, + "step": 2172 + }, + { + "epoch": 0.4355484387510008, + "learning_rate": 1.6095003085355082e-05, + "loss": 0.2538, + "step": 2174 + }, + { + "epoch": 0.4355484387510008, + "learning_rate": 1.6106076356525474e-05, + "loss": 0.343, + "step": 2176 + }, + { + "epoch": 0.4363490792634107, + "learning_rate": 1.611713770239646e-05, + "loss": 0.2065, + "step": 2178 + }, + { + "epoch": 0.4363490792634107, + "learning_rate": 1.6128187101364982e-05, + "loss": 0.5187, + "step": 2180 + }, + { + "epoch": 0.4371497197758207, + "learning_rate": 1.6139224531851332e-05, + "loss": 0.1274, + "step": 2182 + }, + { + "epoch": 0.4371497197758207, + "learning_rate": 1.6150249972299153e-05, + "loss": 0.0868, + "step": 2184 + }, + { + "epoch": 0.4379503602882306, + "learning_rate": 1.616126340117555e-05, + "loss": 0.3844, + "step": 2186 + }, + { + "epoch": 0.4379503602882306, + "learning_rate": 1.617226479697104e-05, + "loss": 0.3451, + "step": 2188 + }, + { + "epoch": 0.4387510008006405, + "learning_rate": 1.618325413819966e-05, + "loss": 0.1991, + "step": 2190 + }, + { + "epoch": 0.4387510008006405, + "learning_rate": 1.6194231403398994e-05, + "loss": 0.0863, + "step": 2192 + }, + { + "epoch": 0.43955164131305047, + "learning_rate": 1.6205196571130194e-05, + "loss": 0.2099, + "step": 2194 + }, + { + "epoch": 0.43955164131305047, + "learning_rate": 1.621614961997806e-05, + "loss": 0.6277, + "step": 2196 + }, + { + "epoch": 0.4403522818254604, + "learning_rate": 1.6227090528551034e-05, + "loss": 0.0712, + "step": 2198 + }, + { + "epoch": 0.4403522818254604, + "learning_rate": 1.6238019275481313e-05, + "loss": 0.0873, + "step": 2200 + }, + { + "epoch": 0.4411529223378703, + "learning_rate": 1.62489358394248e-05, + "loss": 0.256, + "step": 2202 + }, + { + "epoch": 0.4411529223378703, + "learning_rate": 1.6259840199061212e-05, + "loss": 0.0645, + "step": 2204 + }, + { + "epoch": 0.4419535628502802, + "learning_rate": 1.6270732333094095e-05, + "loss": 0.2258, + "step": 2206 + }, + { + "epoch": 0.4419535628502802, + "learning_rate": 1.6281612220250883e-05, + "loss": 0.4092, + "step": 2208 + }, + { + "epoch": 0.44275420336269017, + "learning_rate": 1.6292479839282897e-05, + "loss": 0.2853, + "step": 2210 + }, + { + "epoch": 0.44275420336269017, + "learning_rate": 1.6303335168965474e-05, + "loss": 0.7046, + "step": 2212 + }, + { + "epoch": 0.4435548438751001, + "learning_rate": 1.6314178188097907e-05, + "loss": 0.5072, + "step": 2214 + }, + { + "epoch": 0.4435548438751001, + "learning_rate": 1.6325008875503543e-05, + "loss": 0.2541, + "step": 2216 + }, + { + "epoch": 0.44435548438751, + "learning_rate": 1.6335827210029816e-05, + "loss": 0.6665, + "step": 2218 + }, + { + "epoch": 0.44435548438751, + "learning_rate": 1.6346633170548285e-05, + "loss": 0.5315, + "step": 2220 + }, + { + "epoch": 0.44515612489991996, + "learning_rate": 1.635742673595467e-05, + "loss": 0.545, + "step": 2222 + }, + { + "epoch": 0.44515612489991996, + "learning_rate": 1.6368207885168897e-05, + "loss": 0.0607, + "step": 2224 + }, + { + "epoch": 0.44595676541232987, + "learning_rate": 1.6378976597135173e-05, + "loss": 0.1846, + "step": 2226 + }, + { + "epoch": 0.44595676541232987, + "learning_rate": 1.6389732850821957e-05, + "loss": 0.4225, + "step": 2228 + }, + { + "epoch": 0.4467574059247398, + "learning_rate": 1.640047662522205e-05, + "loss": 0.3091, + "step": 2230 + }, + { + "epoch": 0.4467574059247398, + "learning_rate": 1.641120789935263e-05, + "loss": 0.2481, + "step": 2232 + }, + { + "epoch": 0.4475580464371497, + "learning_rate": 1.6421926652255282e-05, + "loss": 0.2102, + "step": 2234 + }, + { + "epoch": 0.4475580464371497, + "learning_rate": 1.6432632862996042e-05, + "loss": 0.1568, + "step": 2236 + }, + { + "epoch": 0.44835868694955966, + "learning_rate": 1.6443326510665474e-05, + "loss": 0.2707, + "step": 2238 + }, + { + "epoch": 0.44835868694955966, + "learning_rate": 1.6454007574378637e-05, + "loss": 0.0835, + "step": 2240 + }, + { + "epoch": 0.44915932746196957, + "learning_rate": 1.646467603327518e-05, + "loss": 0.219, + "step": 2242 + }, + { + "epoch": 0.44915932746196957, + "learning_rate": 1.6475331866519377e-05, + "loss": 0.2712, + "step": 2244 + }, + { + "epoch": 0.4499599679743795, + "learning_rate": 1.6485975053300154e-05, + "loss": 0.4228, + "step": 2246 + }, + { + "epoch": 0.4499599679743795, + "learning_rate": 1.6496605572831134e-05, + "loss": 0.4714, + "step": 2248 + }, + { + "epoch": 0.45076060848678945, + "learning_rate": 1.650722340435067e-05, + "loss": 0.5088, + "step": 2250 + }, + { + "epoch": 0.45076060848678945, + "learning_rate": 1.6517828527121928e-05, + "loss": 0.1424, + "step": 2252 + }, + { + "epoch": 0.45156124899919936, + "learning_rate": 1.652842092043287e-05, + "loss": 0.1763, + "step": 2254 + }, + { + "epoch": 0.45156124899919936, + "learning_rate": 1.6539000563596318e-05, + "loss": 0.1936, + "step": 2256 + }, + { + "epoch": 0.45236188951160927, + "learning_rate": 1.6549567435950004e-05, + "loss": 0.8804, + "step": 2258 + }, + { + "epoch": 0.45236188951160927, + "learning_rate": 1.6560121516856586e-05, + "loss": 0.5306, + "step": 2260 + }, + { + "epoch": 0.45316253002401924, + "learning_rate": 1.6570662785703713e-05, + "loss": 0.2049, + "step": 2262 + }, + { + "epoch": 0.45316253002401924, + "learning_rate": 1.6581191221904077e-05, + "loss": 0.8036, + "step": 2264 + }, + { + "epoch": 0.45396317053642915, + "learning_rate": 1.6591706804895408e-05, + "loss": 0.2407, + "step": 2266 + }, + { + "epoch": 0.45396317053642915, + "learning_rate": 1.6602209514140542e-05, + "loss": 0.1464, + "step": 2268 + }, + { + "epoch": 0.45476381104883906, + "learning_rate": 1.6612699329127457e-05, + "loss": 0.198, + "step": 2270 + }, + { + "epoch": 0.45476381104883906, + "learning_rate": 1.6623176229369324e-05, + "loss": 0.1756, + "step": 2272 + }, + { + "epoch": 0.455564451561249, + "learning_rate": 1.6633640194404523e-05, + "loss": 0.5481, + "step": 2274 + }, + { + "epoch": 0.455564451561249, + "learning_rate": 1.6644091203796694e-05, + "loss": 0.0634, + "step": 2276 + }, + { + "epoch": 0.45636509207365894, + "learning_rate": 1.6654529237134816e-05, + "loss": 0.1461, + "step": 2278 + }, + { + "epoch": 0.45636509207365894, + "learning_rate": 1.6664954274033168e-05, + "loss": 0.217, + "step": 2280 + }, + { + "epoch": 0.45716573258606885, + "learning_rate": 1.667536629413143e-05, + "loss": 0.1683, + "step": 2282 + }, + { + "epoch": 0.45716573258606885, + "learning_rate": 1.6685765277094695e-05, + "loss": 0.4839, + "step": 2284 + }, + { + "epoch": 0.45796637309847876, + "learning_rate": 1.6696151202613527e-05, + "loss": 0.1511, + "step": 2286 + }, + { + "epoch": 0.45796637309847876, + "learning_rate": 1.6706524050403996e-05, + "loss": 0.5398, + "step": 2288 + }, + { + "epoch": 0.45876701361088873, + "learning_rate": 1.6716883800207685e-05, + "loss": 0.2436, + "step": 2290 + }, + { + "epoch": 0.45876701361088873, + "learning_rate": 1.6727230431791806e-05, + "loss": 0.2422, + "step": 2292 + }, + { + "epoch": 0.45956765412329864, + "learning_rate": 1.673756392494915e-05, + "loss": 0.1677, + "step": 2294 + }, + { + "epoch": 0.45956765412329864, + "learning_rate": 1.674788425949818e-05, + "loss": 0.1576, + "step": 2296 + }, + { + "epoch": 0.46036829463570855, + "learning_rate": 1.6758191415283063e-05, + "loss": 0.4196, + "step": 2298 + }, + { + "epoch": 0.46036829463570855, + "learning_rate": 1.6768485372173696e-05, + "loss": 0.1937, + "step": 2300 + }, + { + "epoch": 0.4611689351481185, + "learning_rate": 1.6778766110065755e-05, + "loss": 0.1729, + "step": 2302 + }, + { + "epoch": 0.4611689351481185, + "learning_rate": 1.6789033608880735e-05, + "loss": 0.2618, + "step": 2304 + }, + { + "epoch": 0.46196957566052843, + "learning_rate": 1.6799287848566e-05, + "loss": 0.5152, + "step": 2306 + }, + { + "epoch": 0.46196957566052843, + "learning_rate": 1.6809528809094798e-05, + "loss": 0.6898, + "step": 2308 + }, + { + "epoch": 0.46277021617293834, + "learning_rate": 1.6819756470466305e-05, + "loss": 0.4229, + "step": 2310 + }, + { + "epoch": 0.46277021617293834, + "learning_rate": 1.6829970812705674e-05, + "loss": 0.2503, + "step": 2312 + }, + { + "epoch": 0.46357085668534825, + "learning_rate": 1.684017181586408e-05, + "loss": 0.1583, + "step": 2314 + }, + { + "epoch": 0.46357085668534825, + "learning_rate": 1.6850359460018733e-05, + "loss": 0.2153, + "step": 2316 + }, + { + "epoch": 0.4643714971977582, + "learning_rate": 1.6860533725272943e-05, + "loss": 0.1977, + "step": 2318 + }, + { + "epoch": 0.4643714971977582, + "learning_rate": 1.6870694591756165e-05, + "loss": 0.1865, + "step": 2320 + }, + { + "epoch": 0.46517213771016813, + "learning_rate": 1.6880842039624e-05, + "loss": 0.1416, + "step": 2322 + }, + { + "epoch": 0.46517213771016813, + "learning_rate": 1.689097604905826e-05, + "loss": 0.3322, + "step": 2324 + }, + { + "epoch": 0.46597277822257804, + "learning_rate": 1.6901096600267e-05, + "loss": 0.202, + "step": 2326 + }, + { + "epoch": 0.46597277822257804, + "learning_rate": 1.6911203673484577e-05, + "loss": 0.206, + "step": 2328 + }, + { + "epoch": 0.466773418734988, + "learning_rate": 1.6921297248971645e-05, + "loss": 0.1342, + "step": 2330 + }, + { + "epoch": 0.466773418734988, + "learning_rate": 1.6931377307015226e-05, + "loss": 0.2286, + "step": 2332 + }, + { + "epoch": 0.4675740592473979, + "learning_rate": 1.6941443827928778e-05, + "loss": 0.3483, + "step": 2334 + }, + { + "epoch": 0.4675740592473979, + "learning_rate": 1.695149679205214e-05, + "loss": 0.0824, + "step": 2336 + }, + { + "epoch": 0.46837469975980783, + "learning_rate": 1.6961536179751672e-05, + "loss": 0.5755, + "step": 2338 + }, + { + "epoch": 0.46837469975980783, + "learning_rate": 1.6971561971420222e-05, + "loss": 0.1356, + "step": 2340 + }, + { + "epoch": 0.4691753402722178, + "learning_rate": 1.6981574147477204e-05, + "loss": 0.3308, + "step": 2342 + }, + { + "epoch": 0.4691753402722178, + "learning_rate": 1.6991572688368628e-05, + "loss": 0.255, + "step": 2344 + }, + { + "epoch": 0.4699759807846277, + "learning_rate": 1.70015575745671e-05, + "loss": 0.8065, + "step": 2346 + }, + { + "epoch": 0.4699759807846277, + "learning_rate": 1.701152878657196e-05, + "loss": 0.3164, + "step": 2348 + }, + { + "epoch": 0.4707766212970376, + "learning_rate": 1.7021486304909196e-05, + "loss": 0.2188, + "step": 2350 + }, + { + "epoch": 0.4707766212970376, + "learning_rate": 1.7031430110131562e-05, + "loss": 0.4395, + "step": 2352 + }, + { + "epoch": 0.47157726180944753, + "learning_rate": 1.7041360182818583e-05, + "loss": 0.1784, + "step": 2354 + }, + { + "epoch": 0.47157726180944753, + "learning_rate": 1.705127650357662e-05, + "loss": 0.246, + "step": 2356 + }, + { + "epoch": 0.4723779023218575, + "learning_rate": 1.7061179053038887e-05, + "loss": 0.6699, + "step": 2358 + }, + { + "epoch": 0.4723779023218575, + "learning_rate": 1.7071067811865467e-05, + "loss": 0.4094, + "step": 2360 + }, + { + "epoch": 0.4731785428342674, + "learning_rate": 1.708094276074343e-05, + "loss": 0.9815, + "step": 2362 + }, + { + "epoch": 0.4731785428342674, + "learning_rate": 1.7090803880386778e-05, + "loss": 0.0879, + "step": 2364 + }, + { + "epoch": 0.4739791833466773, + "learning_rate": 1.7100651151536525e-05, + "loss": 0.1681, + "step": 2366 + }, + { + "epoch": 0.4739791833466773, + "learning_rate": 1.7110484554960738e-05, + "loss": 0.2034, + "step": 2368 + }, + { + "epoch": 0.4747798238590873, + "learning_rate": 1.712030407145457e-05, + "loss": 0.4398, + "step": 2370 + }, + { + "epoch": 0.4747798238590873, + "learning_rate": 1.713010968184029e-05, + "loss": 0.1583, + "step": 2372 + }, + { + "epoch": 0.4755804643714972, + "learning_rate": 1.7139901366967332e-05, + "loss": 0.2618, + "step": 2374 + }, + { + "epoch": 0.4755804643714972, + "learning_rate": 1.7149679107712306e-05, + "loss": 0.0996, + "step": 2376 + }, + { + "epoch": 0.4763811048839071, + "learning_rate": 1.71594428849791e-05, + "loss": 0.3188, + "step": 2378 + }, + { + "epoch": 0.4763811048839071, + "learning_rate": 1.716919267969883e-05, + "loss": 0.3078, + "step": 2380 + }, + { + "epoch": 0.4771817453963171, + "learning_rate": 1.717892847282994e-05, + "loss": 0.298, + "step": 2382 + }, + { + "epoch": 0.4771817453963171, + "learning_rate": 1.7188650245358215e-05, + "loss": 0.4512, + "step": 2384 + }, + { + "epoch": 0.477982385908727, + "learning_rate": 1.7198357978296817e-05, + "loss": 0.2024, + "step": 2386 + }, + { + "epoch": 0.477982385908727, + "learning_rate": 1.7208051652686338e-05, + "loss": 0.1444, + "step": 2388 + }, + { + "epoch": 0.4787830264211369, + "learning_rate": 1.721773124959481e-05, + "loss": 0.3035, + "step": 2390 + }, + { + "epoch": 0.4787830264211369, + "learning_rate": 1.722739675011779e-05, + "loss": 0.1482, + "step": 2392 + }, + { + "epoch": 0.4795836669335468, + "learning_rate": 1.723704813537834e-05, + "loss": 0.2527, + "step": 2394 + }, + { + "epoch": 0.4795836669335468, + "learning_rate": 1.7246685386527095e-05, + "loss": 0.2582, + "step": 2396 + }, + { + "epoch": 0.4803843074459568, + "learning_rate": 1.725630848474229e-05, + "loss": 0.5233, + "step": 2398 + }, + { + "epoch": 0.4803843074459568, + "learning_rate": 1.726591741122981e-05, + "loss": 0.0761, + "step": 2400 + }, + { + "epoch": 0.4811849479583667, + "learning_rate": 1.727551214722321e-05, + "loss": 0.232, + "step": 2402 + }, + { + "epoch": 0.4811849479583667, + "learning_rate": 1.7285092673983753e-05, + "loss": 0.74, + "step": 2404 + }, + { + "epoch": 0.4819855884707766, + "learning_rate": 1.7294658972800488e-05, + "loss": 0.2872, + "step": 2406 + }, + { + "epoch": 0.4819855884707766, + "learning_rate": 1.730421102499021e-05, + "loss": 0.1952, + "step": 2408 + }, + { + "epoch": 0.48278622898318657, + "learning_rate": 1.7313748811897558e-05, + "loss": 0.4487, + "step": 2410 + }, + { + "epoch": 0.48278622898318657, + "learning_rate": 1.7323272314895022e-05, + "loss": 0.1421, + "step": 2412 + }, + { + "epoch": 0.4835868694955965, + "learning_rate": 1.7332781515383003e-05, + "loss": 0.2526, + "step": 2414 + }, + { + "epoch": 0.4835868694955965, + "learning_rate": 1.734227639478982e-05, + "loss": 0.1204, + "step": 2416 + }, + { + "epoch": 0.4843875100080064, + "learning_rate": 1.7351756934571758e-05, + "loss": 0.3345, + "step": 2418 + }, + { + "epoch": 0.4843875100080064, + "learning_rate": 1.736122311621314e-05, + "loss": 0.3129, + "step": 2420 + }, + { + "epoch": 0.4851881505204163, + "learning_rate": 1.7370674921226296e-05, + "loss": 0.4167, + "step": 2422 + }, + { + "epoch": 0.4851881505204163, + "learning_rate": 1.738011233115165e-05, + "loss": 0.4551, + "step": 2424 + }, + { + "epoch": 0.4859887910328263, + "learning_rate": 1.7389535327557733e-05, + "loss": 0.554, + "step": 2426 + }, + { + "epoch": 0.4859887910328263, + "learning_rate": 1.7398943892041227e-05, + "loss": 0.4983, + "step": 2428 + }, + { + "epoch": 0.4867894315452362, + "learning_rate": 1.7408338006227005e-05, + "loss": 0.4439, + "step": 2430 + }, + { + "epoch": 0.4867894315452362, + "learning_rate": 1.7417717651768144e-05, + "loss": 0.1658, + "step": 2432 + }, + { + "epoch": 0.4875900720576461, + "learning_rate": 1.7427082810346018e-05, + "loss": 0.2466, + "step": 2434 + }, + { + "epoch": 0.4875900720576461, + "learning_rate": 1.743643346367026e-05, + "loss": 0.2512, + "step": 2436 + }, + { + "epoch": 0.48839071257005606, + "learning_rate": 1.744576959347884e-05, + "loss": 0.238, + "step": 2438 + }, + { + "epoch": 0.48839071257005606, + "learning_rate": 1.7455091181538087e-05, + "loss": 0.2271, + "step": 2440 + }, + { + "epoch": 0.489191353082466, + "learning_rate": 1.746439820964275e-05, + "loss": 0.1682, + "step": 2442 + }, + { + "epoch": 0.489191353082466, + "learning_rate": 1.7473690659615992e-05, + "loss": 0.1304, + "step": 2444 + }, + { + "epoch": 0.4899919935948759, + "learning_rate": 1.748296851330945e-05, + "loss": 0.205, + "step": 2446 + }, + { + "epoch": 0.4899919935948759, + "learning_rate": 1.74922317526033e-05, + "loss": 0.1145, + "step": 2448 + }, + { + "epoch": 0.49079263410728585, + "learning_rate": 1.7501480359406217e-05, + "loss": 0.1598, + "step": 2450 + }, + { + "epoch": 0.49079263410728585, + "learning_rate": 1.7510714315655474e-05, + "loss": 0.1798, + "step": 2452 + }, + { + "epoch": 0.49159327461969576, + "learning_rate": 1.7519933603316955e-05, + "loss": 0.4794, + "step": 2454 + }, + { + "epoch": 0.49159327461969576, + "learning_rate": 1.752913820438519e-05, + "loss": 0.0358, + "step": 2456 + }, + { + "epoch": 0.4923939151321057, + "learning_rate": 1.7538328100883397e-05, + "loss": 0.5099, + "step": 2458 + }, + { + "epoch": 0.4923939151321057, + "learning_rate": 1.7547503274863495e-05, + "loss": 0.4349, + "step": 2460 + }, + { + "epoch": 0.4931945556445156, + "learning_rate": 1.7556663708406193e-05, + "loss": 0.1685, + "step": 2462 + }, + { + "epoch": 0.4931945556445156, + "learning_rate": 1.756580938362096e-05, + "loss": 0.202, + "step": 2464 + }, + { + "epoch": 0.49399519615692555, + "learning_rate": 1.7574940282646085e-05, + "loss": 0.4309, + "step": 2466 + }, + { + "epoch": 0.49399519615692555, + "learning_rate": 1.758405638764873e-05, + "loss": 0.5622, + "step": 2468 + }, + { + "epoch": 0.49479583666933546, + "learning_rate": 1.7593157680824946e-05, + "loss": 0.1961, + "step": 2470 + }, + { + "epoch": 0.49479583666933546, + "learning_rate": 1.7602244144399693e-05, + "loss": 0.1869, + "step": 2472 + }, + { + "epoch": 0.4955964771817454, + "learning_rate": 1.761131576062694e-05, + "loss": 0.1668, + "step": 2474 + }, + { + "epoch": 0.4955964771817454, + "learning_rate": 1.7620372511789604e-05, + "loss": 0.2749, + "step": 2476 + }, + { + "epoch": 0.49639711769415534, + "learning_rate": 1.7629414380199662e-05, + "loss": 0.2235, + "step": 2478 + }, + { + "epoch": 0.49639711769415534, + "learning_rate": 1.7638441348198147e-05, + "loss": 0.1031, + "step": 2480 + }, + { + "epoch": 0.49719775820656525, + "learning_rate": 1.7647453398155194e-05, + "loss": 0.3745, + "step": 2482 + }, + { + "epoch": 0.49719775820656525, + "learning_rate": 1.7656450512470077e-05, + "loss": 0.1008, + "step": 2484 + }, + { + "epoch": 0.49799839871897517, + "learning_rate": 1.7665432673571218e-05, + "loss": 0.7476, + "step": 2486 + }, + { + "epoch": 0.49799839871897517, + "learning_rate": 1.7674399863916295e-05, + "loss": 1.0966, + "step": 2488 + }, + { + "epoch": 0.49879903923138513, + "learning_rate": 1.768335206599217e-05, + "loss": 0.775, + "step": 2490 + }, + { + "epoch": 0.49879903923138513, + "learning_rate": 1.7692289262315e-05, + "loss": 0.2311, + "step": 2492 + }, + { + "epoch": 0.49959967974379504, + "learning_rate": 1.7701211435430256e-05, + "loss": 0.391, + "step": 2494 + }, + { + "epoch": 0.49959967974379504, + "learning_rate": 1.771011856791273e-05, + "loss": 0.5734, + "step": 2496 + }, + { + "epoch": 0.500400320256205, + "learning_rate": 1.771901064236659e-05, + "loss": 0.2815, + "step": 2498 + }, + { + "epoch": 0.500400320256205, + "learning_rate": 1.7727887641425448e-05, + "loss": 0.2396, + "step": 2500 + }, + { + "epoch": 0.5012009607686149, + "learning_rate": 1.773674954775232e-05, + "loss": 0.1344, + "step": 2502 + }, + { + "epoch": 0.5012009607686149, + "learning_rate": 1.7745596344039712e-05, + "loss": 0.3452, + "step": 2504 + }, + { + "epoch": 0.5020016012810248, + "learning_rate": 1.7754428013009637e-05, + "loss": 0.1867, + "step": 2506 + }, + { + "epoch": 0.5020016012810248, + "learning_rate": 1.7763244537413657e-05, + "loss": 0.3293, + "step": 2508 + }, + { + "epoch": 0.5028022417934348, + "learning_rate": 1.77720459000329e-05, + "loss": 0.2863, + "step": 2510 + }, + { + "epoch": 0.5028022417934348, + "learning_rate": 1.7780832083678116e-05, + "loss": 0.3385, + "step": 2512 + }, + { + "epoch": 0.5036028823058447, + "learning_rate": 1.7789603071189712e-05, + "loss": 0.3413, + "step": 2514 + }, + { + "epoch": 0.5036028823058447, + "learning_rate": 1.7798358845437754e-05, + "loss": 0.3642, + "step": 2516 + }, + { + "epoch": 0.5044035228182546, + "learning_rate": 1.780709938932202e-05, + "loss": 0.2378, + "step": 2518 + }, + { + "epoch": 0.5044035228182546, + "learning_rate": 1.7815824685772035e-05, + "loss": 0.2243, + "step": 2520 + }, + { + "epoch": 0.5052041633306645, + "learning_rate": 1.7824534717747115e-05, + "loss": 0.1958, + "step": 2522 + }, + { + "epoch": 0.5052041633306645, + "learning_rate": 1.7833229468236364e-05, + "loss": 0.4071, + "step": 2524 + }, + { + "epoch": 0.5060048038430744, + "learning_rate": 1.7841908920258767e-05, + "loss": 0.5398, + "step": 2526 + }, + { + "epoch": 0.5060048038430744, + "learning_rate": 1.7850573056863156e-05, + "loss": 0.3774, + "step": 2528 + }, + { + "epoch": 0.5068054443554844, + "learning_rate": 1.7859221861128284e-05, + "loss": 0.1713, + "step": 2530 + }, + { + "epoch": 0.5068054443554844, + "learning_rate": 1.786785531616285e-05, + "loss": 0.535, + "step": 2532 + }, + { + "epoch": 0.5076060848678943, + "learning_rate": 1.7876473405105528e-05, + "loss": 0.3927, + "step": 2534 + }, + { + "epoch": 0.5076060848678943, + "learning_rate": 1.7885076111125004e-05, + "loss": 0.1844, + "step": 2536 + }, + { + "epoch": 0.5084067253803043, + "learning_rate": 1.7893663417419995e-05, + "loss": 0.3155, + "step": 2538 + }, + { + "epoch": 0.5084067253803043, + "learning_rate": 1.790223530721933e-05, + "loss": 0.2506, + "step": 2540 + }, + { + "epoch": 0.5092073658927142, + "learning_rate": 1.791079176378191e-05, + "loss": 0.2709, + "step": 2542 + }, + { + "epoch": 0.5092073658927142, + "learning_rate": 1.791933277039679e-05, + "loss": 0.349, + "step": 2544 + }, + { + "epoch": 0.5100080064051241, + "learning_rate": 1.7927858310383202e-05, + "loss": 0.3345, + "step": 2546 + }, + { + "epoch": 0.5100080064051241, + "learning_rate": 1.7936368367090577e-05, + "loss": 0.1962, + "step": 2548 + }, + { + "epoch": 0.510808646917534, + "learning_rate": 1.794486292389858e-05, + "loss": 0.3233, + "step": 2550 + }, + { + "epoch": 0.510808646917534, + "learning_rate": 1.7953341964217183e-05, + "loss": 0.2564, + "step": 2552 + }, + { + "epoch": 0.5116092874299439, + "learning_rate": 1.7961805471486618e-05, + "loss": 0.2419, + "step": 2554 + }, + { + "epoch": 0.5116092874299439, + "learning_rate": 1.7970253429177477e-05, + "loss": 0.4263, + "step": 2556 + }, + { + "epoch": 0.5124099279423538, + "learning_rate": 1.797868582079072e-05, + "loss": 0.5227, + "step": 2558 + }, + { + "epoch": 0.5124099279423538, + "learning_rate": 1.7987102629857696e-05, + "loss": 0.091, + "step": 2560 + }, + { + "epoch": 0.5132105684547638, + "learning_rate": 1.7995503839940197e-05, + "loss": 0.3153, + "step": 2562 + }, + { + "epoch": 0.5132105684547638, + "learning_rate": 1.800388943463047e-05, + "loss": 0.0395, + "step": 2564 + }, + { + "epoch": 0.5140112089671738, + "learning_rate": 1.8012259397551283e-05, + "loss": 0.1586, + "step": 2566 + }, + { + "epoch": 0.5140112089671738, + "learning_rate": 1.8020613712355912e-05, + "loss": 0.1021, + "step": 2568 + }, + { + "epoch": 0.5148118494795837, + "learning_rate": 1.8028952362728197e-05, + "loss": 0.0406, + "step": 2570 + }, + { + "epoch": 0.5148118494795837, + "learning_rate": 1.803727533238257e-05, + "loss": 0.0778, + "step": 2572 + }, + { + "epoch": 0.5156124899919936, + "learning_rate": 1.804558260506409e-05, + "loss": 0.194, + "step": 2574 + }, + { + "epoch": 0.5156124899919936, + "learning_rate": 1.805387416454847e-05, + "loss": 0.1681, + "step": 2576 + }, + { + "epoch": 0.5164131305044035, + "learning_rate": 1.8062149994642135e-05, + "loss": 0.1868, + "step": 2578 + }, + { + "epoch": 0.5164131305044035, + "learning_rate": 1.8070410079182195e-05, + "loss": 0.0327, + "step": 2580 + }, + { + "epoch": 0.5172137710168134, + "learning_rate": 1.8078654402036526e-05, + "loss": 0.1116, + "step": 2582 + }, + { + "epoch": 0.5172137710168134, + "learning_rate": 1.8086882947103787e-05, + "loss": 0.0425, + "step": 2584 + }, + { + "epoch": 0.5180144115292233, + "learning_rate": 1.8095095698313452e-05, + "loss": 0.1516, + "step": 2586 + }, + { + "epoch": 0.5180144115292233, + "learning_rate": 1.8103292639625842e-05, + "loss": 0.1115, + "step": 2588 + }, + { + "epoch": 0.5188150520416333, + "learning_rate": 1.811147375503214e-05, + "loss": 1.0942, + "step": 2590 + }, + { + "epoch": 0.5188150520416333, + "learning_rate": 1.811963902855447e-05, + "loss": 0.0614, + "step": 2592 + }, + { + "epoch": 0.5196156925540433, + "learning_rate": 1.812778844424587e-05, + "loss": 0.2463, + "step": 2594 + }, + { + "epoch": 0.5196156925540433, + "learning_rate": 1.813592198619035e-05, + "loss": 0.472, + "step": 2596 + }, + { + "epoch": 0.5204163330664532, + "learning_rate": 1.814403963850293e-05, + "loss": 0.1681, + "step": 2598 + }, + { + "epoch": 0.5204163330664532, + "learning_rate": 1.8152141385329658e-05, + "loss": 0.1759, + "step": 2600 + }, + { + "epoch": 0.5212169735788631, + "learning_rate": 1.8160227210847636e-05, + "loss": 0.144, + "step": 2602 + }, + { + "epoch": 0.5212169735788631, + "learning_rate": 1.816829709926509e-05, + "loss": 0.1328, + "step": 2604 + }, + { + "epoch": 0.522017614091273, + "learning_rate": 1.8176351034821345e-05, + "loss": 0.3637, + "step": 2606 + }, + { + "epoch": 0.522017614091273, + "learning_rate": 1.8184389001786895e-05, + "loss": 0.3628, + "step": 2608 + }, + { + "epoch": 0.5228182546036829, + "learning_rate": 1.819241098446341e-05, + "loss": 0.5551, + "step": 2610 + }, + { + "epoch": 0.5228182546036829, + "learning_rate": 1.8200416967183785e-05, + "loss": 0.1671, + "step": 2612 + }, + { + "epoch": 0.5236188951160928, + "learning_rate": 1.8208406934312167e-05, + "loss": 0.3377, + "step": 2614 + }, + { + "epoch": 0.5236188951160928, + "learning_rate": 1.821638087024396e-05, + "loss": 0.1464, + "step": 2616 + }, + { + "epoch": 0.5244195356285029, + "learning_rate": 1.8224338759405917e-05, + "loss": 0.733, + "step": 2618 + }, + { + "epoch": 0.5244195356285029, + "learning_rate": 1.8232280586256097e-05, + "loss": 0.1686, + "step": 2620 + }, + { + "epoch": 0.5252201761409128, + "learning_rate": 1.8240206335283947e-05, + "loss": 0.29, + "step": 2622 + }, + { + "epoch": 0.5252201761409128, + "learning_rate": 1.8248115991010296e-05, + "loss": 0.362, + "step": 2624 + }, + { + "epoch": 0.5260208166533227, + "learning_rate": 1.825600953798743e-05, + "loss": 0.4373, + "step": 2626 + }, + { + "epoch": 0.5260208166533227, + "learning_rate": 1.8263886960799055e-05, + "loss": 0.038, + "step": 2628 + }, + { + "epoch": 0.5268214571657326, + "learning_rate": 1.8271748244060426e-05, + "loss": 0.2377, + "step": 2630 + }, + { + "epoch": 0.5268214571657326, + "learning_rate": 1.8279593372418264e-05, + "loss": 0.2393, + "step": 2632 + }, + { + "epoch": 0.5276220976781425, + "learning_rate": 1.8287422330550878e-05, + "loss": 0.6519, + "step": 2634 + }, + { + "epoch": 0.5276220976781425, + "learning_rate": 1.829523510316813e-05, + "loss": 0.4658, + "step": 2636 + }, + { + "epoch": 0.5284227381905524, + "learning_rate": 1.8303031675011515e-05, + "loss": 0.3502, + "step": 2638 + }, + { + "epoch": 0.5284227381905524, + "learning_rate": 1.8310812030854155e-05, + "loss": 0.3361, + "step": 2640 + }, + { + "epoch": 0.5292233787029623, + "learning_rate": 1.8318576155500838e-05, + "loss": 0.2481, + "step": 2642 + }, + { + "epoch": 0.5292233787029623, + "learning_rate": 1.832632403378808e-05, + "loss": 0.3521, + "step": 2644 + }, + { + "epoch": 0.5300240192153723, + "learning_rate": 1.8334055650584094e-05, + "loss": 0.1969, + "step": 2646 + }, + { + "epoch": 0.5300240192153723, + "learning_rate": 1.834177099078887e-05, + "loss": 0.1755, + "step": 2648 + }, + { + "epoch": 0.5308246597277823, + "learning_rate": 1.8349470039334173e-05, + "loss": 0.5506, + "step": 2650 + }, + { + "epoch": 0.5308246597277823, + "learning_rate": 1.8357152781183606e-05, + "loss": 0.2311, + "step": 2652 + }, + { + "epoch": 0.5316253002401922, + "learning_rate": 1.83648192013326e-05, + "loss": 0.2634, + "step": 2654 + }, + { + "epoch": 0.5316253002401922, + "learning_rate": 1.8372469284808465e-05, + "loss": 0.3706, + "step": 2656 + }, + { + "epoch": 0.5324259407526021, + "learning_rate": 1.8380103016670437e-05, + "loss": 0.5143, + "step": 2658 + }, + { + "epoch": 0.5324259407526021, + "learning_rate": 1.8387720382009665e-05, + "loss": 0.1756, + "step": 2660 + }, + { + "epoch": 0.533226581265012, + "learning_rate": 1.839532136594927e-05, + "loss": 0.2278, + "step": 2662 + }, + { + "epoch": 0.533226581265012, + "learning_rate": 1.840290595364436e-05, + "loss": 0.1966, + "step": 2664 + }, + { + "epoch": 0.5340272217774219, + "learning_rate": 1.8410474130282085e-05, + "loss": 0.5232, + "step": 2666 + }, + { + "epoch": 0.5340272217774219, + "learning_rate": 1.8418025881081606e-05, + "loss": 0.2224, + "step": 2668 + }, + { + "epoch": 0.5348278622898318, + "learning_rate": 1.8425561191294217e-05, + "loss": 0.1512, + "step": 2670 + }, + { + "epoch": 0.5348278622898318, + "learning_rate": 1.8433080046203286e-05, + "loss": 0.2844, + "step": 2672 + }, + { + "epoch": 0.5356285028022418, + "learning_rate": 1.8440582431124325e-05, + "loss": 0.1787, + "step": 2674 + }, + { + "epoch": 0.5356285028022418, + "learning_rate": 1.844806833140501e-05, + "loss": 0.2778, + "step": 2676 + }, + { + "epoch": 0.5364291433146517, + "learning_rate": 1.8455537732425223e-05, + "loss": 0.2382, + "step": 2678 + }, + { + "epoch": 0.5364291433146517, + "learning_rate": 1.8462990619597054e-05, + "loss": 0.1623, + "step": 2680 + }, + { + "epoch": 0.5372297838270617, + "learning_rate": 1.847042697836485e-05, + "loss": 0.2065, + "step": 2682 + }, + { + "epoch": 0.5372297838270617, + "learning_rate": 1.8477846794205258e-05, + "loss": 0.1673, + "step": 2684 + }, + { + "epoch": 0.5380304243394716, + "learning_rate": 1.84852500526272e-05, + "loss": 0.4928, + "step": 2686 + }, + { + "epoch": 0.5380304243394716, + "learning_rate": 1.8492636739171966e-05, + "loss": 0.1676, + "step": 2688 + }, + { + "epoch": 0.5388310648518815, + "learning_rate": 1.8500006839413183e-05, + "loss": 0.1712, + "step": 2690 + }, + { + "epoch": 0.5388310648518815, + "learning_rate": 1.85073603389569e-05, + "loss": 0.0793, + "step": 2692 + }, + { + "epoch": 0.5396317053642914, + "learning_rate": 1.851469722344155e-05, + "loss": 0.1865, + "step": 2694 + }, + { + "epoch": 0.5396317053642914, + "learning_rate": 1.8522017478538067e-05, + "loss": 0.1241, + "step": 2696 + }, + { + "epoch": 0.5404323458767014, + "learning_rate": 1.8529321089949817e-05, + "loss": 1.0657, + "step": 2698 + }, + { + "epoch": 0.5404323458767014, + "learning_rate": 1.8536608043412695e-05, + "loss": 0.3722, + "step": 2700 + }, + { + "epoch": 0.5412329863891113, + "learning_rate": 1.8543878324695122e-05, + "loss": 0.2222, + "step": 2702 + }, + { + "epoch": 0.5412329863891113, + "learning_rate": 1.855113191959808e-05, + "loss": 0.1297, + "step": 2704 + }, + { + "epoch": 0.5420336269015212, + "learning_rate": 1.8558368813955143e-05, + "loss": 0.1843, + "step": 2706 + }, + { + "epoch": 0.5420336269015212, + "learning_rate": 1.856558899363248e-05, + "loss": 0.0395, + "step": 2708 + }, + { + "epoch": 0.5428342674139311, + "learning_rate": 1.857279244452896e-05, + "loss": 0.177, + "step": 2710 + }, + { + "epoch": 0.5428342674139311, + "learning_rate": 1.8579979152576063e-05, + "loss": 0.1148, + "step": 2712 + }, + { + "epoch": 0.5436349079263411, + "learning_rate": 1.8587149103738e-05, + "loss": 0.5083, + "step": 2714 + }, + { + "epoch": 0.5436349079263411, + "learning_rate": 1.85943022840117e-05, + "loss": 0.0513, + "step": 2716 + }, + { + "epoch": 0.544435548438751, + "learning_rate": 1.8601438679426847e-05, + "loss": 0.5416, + "step": 2718 + }, + { + "epoch": 0.544435548438751, + "learning_rate": 1.8608558276045895e-05, + "loss": 0.1071, + "step": 2720 + }, + { + "epoch": 0.5452361889511609, + "learning_rate": 1.8615661059964134e-05, + "loss": 0.3568, + "step": 2722 + }, + { + "epoch": 0.5452361889511609, + "learning_rate": 1.862274701730967e-05, + "loss": 0.2019, + "step": 2724 + }, + { + "epoch": 0.5460368294635709, + "learning_rate": 1.862981613424347e-05, + "loss": 0.2101, + "step": 2726 + }, + { + "epoch": 0.5460368294635709, + "learning_rate": 1.86368683969594e-05, + "loss": 0.1669, + "step": 2728 + }, + { + "epoch": 0.5468374699759808, + "learning_rate": 1.864390379168423e-05, + "loss": 0.3327, + "step": 2730 + }, + { + "epoch": 0.5468374699759808, + "learning_rate": 1.865092230467769e-05, + "loss": 0.4152, + "step": 2732 + }, + { + "epoch": 0.5476381104883907, + "learning_rate": 1.8657923922232464e-05, + "loss": 0.6162, + "step": 2734 + }, + { + "epoch": 0.5476381104883907, + "learning_rate": 1.866490863067425e-05, + "loss": 0.1787, + "step": 2736 + }, + { + "epoch": 0.5484387510008006, + "learning_rate": 1.8671876416361763e-05, + "loss": 0.1753, + "step": 2738 + }, + { + "epoch": 0.5484387510008006, + "learning_rate": 1.8678827265686753e-05, + "loss": 0.1863, + "step": 2740 + }, + { + "epoch": 0.5492393915132106, + "learning_rate": 1.8685761165074073e-05, + "loss": 0.2162, + "step": 2742 + }, + { + "epoch": 0.5492393915132106, + "learning_rate": 1.8692678100981663e-05, + "loss": 0.3516, + "step": 2744 + }, + { + "epoch": 0.5500400320256205, + "learning_rate": 1.869957805990059e-05, + "loss": 0.4707, + "step": 2746 + }, + { + "epoch": 0.5500400320256205, + "learning_rate": 1.87064610283551e-05, + "loss": 0.4434, + "step": 2748 + }, + { + "epoch": 0.5508406725380304, + "learning_rate": 1.87133269929026e-05, + "loss": 0.3514, + "step": 2750 + }, + { + "epoch": 0.5508406725380304, + "learning_rate": 1.8720175940133705e-05, + "loss": 0.4731, + "step": 2752 + }, + { + "epoch": 0.5516413130504404, + "learning_rate": 1.8727007856672285e-05, + "loss": 0.1661, + "step": 2754 + }, + { + "epoch": 0.5516413130504404, + "learning_rate": 1.873382272917545e-05, + "loss": 0.2822, + "step": 2756 + }, + { + "epoch": 0.5524419535628503, + "learning_rate": 1.8740620544333607e-05, + "loss": 0.1664, + "step": 2758 + }, + { + "epoch": 0.5524419535628503, + "learning_rate": 1.8747401288870472e-05, + "loss": 0.6414, + "step": 2760 + }, + { + "epoch": 0.5532425940752602, + "learning_rate": 1.875416494954312e-05, + "loss": 0.1342, + "step": 2762 + }, + { + "epoch": 0.5532425940752602, + "learning_rate": 1.876091151314196e-05, + "loss": 0.256, + "step": 2764 + }, + { + "epoch": 0.5540432345876701, + "learning_rate": 1.8767640966490813e-05, + "loss": 0.6766, + "step": 2766 + }, + { + "epoch": 0.5540432345876701, + "learning_rate": 1.877435329644691e-05, + "loss": 0.0962, + "step": 2768 + }, + { + "epoch": 0.55484387510008, + "learning_rate": 1.878104848990093e-05, + "loss": 0.4713, + "step": 2770 + }, + { + "epoch": 0.55484387510008, + "learning_rate": 1.8787726533776996e-05, + "loss": 0.042, + "step": 2772 + }, + { + "epoch": 0.55564451561249, + "learning_rate": 1.879438741503277e-05, + "loss": 0.6157, + "step": 2774 + }, + { + "epoch": 0.55564451561249, + "learning_rate": 1.8801031120659393e-05, + "loss": 0.3977, + "step": 2776 + }, + { + "epoch": 0.5564451561248999, + "learning_rate": 1.8807657637681563e-05, + "loss": 0.3495, + "step": 2778 + }, + { + "epoch": 0.5564451561248999, + "learning_rate": 1.8814266953157557e-05, + "loss": 0.3486, + "step": 2780 + }, + { + "epoch": 0.5572457966373099, + "learning_rate": 1.8820859054179225e-05, + "loss": 0.3993, + "step": 2782 + }, + { + "epoch": 0.5572457966373099, + "learning_rate": 1.8827433927872066e-05, + "loss": 0.1276, + "step": 2784 + }, + { + "epoch": 0.5580464371497198, + "learning_rate": 1.883399156139519e-05, + "loss": 0.1528, + "step": 2786 + }, + { + "epoch": 0.5580464371497198, + "learning_rate": 1.8840531941941415e-05, + "loss": 0.0514, + "step": 2788 + }, + { + "epoch": 0.5588470776621297, + "learning_rate": 1.8847055056737233e-05, + "loss": 0.1769, + "step": 2790 + }, + { + "epoch": 0.5588470776621297, + "learning_rate": 1.8853560893042854e-05, + "loss": 0.1586, + "step": 2792 + }, + { + "epoch": 0.5596477181745396, + "learning_rate": 1.8860049438152244e-05, + "loss": 0.1765, + "step": 2794 + }, + { + "epoch": 0.5596477181745396, + "learning_rate": 1.8866520679393127e-05, + "loss": 0.27, + "step": 2796 + }, + { + "epoch": 0.5604483586869495, + "learning_rate": 1.8872974604127025e-05, + "loss": 0.1669, + "step": 2798 + }, + { + "epoch": 0.5604483586869495, + "learning_rate": 1.8879411199749303e-05, + "loss": 0.1201, + "step": 2800 + }, + { + "epoch": 0.5612489991993594, + "learning_rate": 1.8885830453689132e-05, + "loss": 0.5391, + "step": 2802 + }, + { + "epoch": 0.5612489991993594, + "learning_rate": 1.889223235340958e-05, + "loss": 0.1809, + "step": 2804 + }, + { + "epoch": 0.5620496397117695, + "learning_rate": 1.889861688640759e-05, + "loss": 0.2612, + "step": 2806 + }, + { + "epoch": 0.5620496397117695, + "learning_rate": 1.8904984040214037e-05, + "loss": 0.5066, + "step": 2808 + }, + { + "epoch": 0.5628502802241794, + "learning_rate": 1.891133380239373e-05, + "loss": 0.2505, + "step": 2810 + }, + { + "epoch": 0.5628502802241794, + "learning_rate": 1.8917666160545436e-05, + "loss": 0.2059, + "step": 2812 + }, + { + "epoch": 0.5636509207365893, + "learning_rate": 1.892398110230194e-05, + "loss": 0.5197, + "step": 2814 + }, + { + "epoch": 0.5636509207365893, + "learning_rate": 1.893027861533002e-05, + "loss": 0.2614, + "step": 2816 + }, + { + "epoch": 0.5644515612489992, + "learning_rate": 1.8936558687330485e-05, + "loss": 0.2859, + "step": 2818 + }, + { + "epoch": 0.5644515612489992, + "learning_rate": 1.894282130603823e-05, + "loss": 0.1955, + "step": 2820 + }, + { + "epoch": 0.5652522017614091, + "learning_rate": 1.8949066459222217e-05, + "loss": 0.4254, + "step": 2822 + }, + { + "epoch": 0.5652522017614091, + "learning_rate": 1.8955294134685528e-05, + "loss": 0.3996, + "step": 2824 + }, + { + "epoch": 0.566052842273819, + "learning_rate": 1.8961504320265382e-05, + "loss": 0.3123, + "step": 2826 + }, + { + "epoch": 0.566052842273819, + "learning_rate": 1.896769700383315e-05, + "loss": 0.5108, + "step": 2828 + }, + { + "epoch": 0.5668534827862289, + "learning_rate": 1.897387217329439e-05, + "loss": 0.2395, + "step": 2830 + }, + { + "epoch": 0.5668534827862289, + "learning_rate": 1.898002981658886e-05, + "loss": 0.1866, + "step": 2832 + }, + { + "epoch": 0.567654123298639, + "learning_rate": 1.8986169921690543e-05, + "loss": 0.224, + "step": 2834 + }, + { + "epoch": 0.567654123298639, + "learning_rate": 1.899229247660769e-05, + "loss": 0.0974, + "step": 2836 + }, + { + "epoch": 0.5684547638110489, + "learning_rate": 1.899839746938281e-05, + "loss": 0.2569, + "step": 2838 + }, + { + "epoch": 0.5684547638110489, + "learning_rate": 1.9004484888092724e-05, + "loss": 0.1579, + "step": 2840 + }, + { + "epoch": 0.5692554043234588, + "learning_rate": 1.9010554720848577e-05, + "loss": 0.3427, + "step": 2842 + }, + { + "epoch": 0.5692554043234588, + "learning_rate": 1.901660695579585e-05, + "loss": 0.0411, + "step": 2844 + }, + { + "epoch": 0.5700560448358687, + "learning_rate": 1.9022641581114392e-05, + "loss": 0.5222, + "step": 2846 + }, + { + "epoch": 0.5700560448358687, + "learning_rate": 1.9028658585018455e-05, + "loss": 0.3942, + "step": 2848 + }, + { + "epoch": 0.5708566853482786, + "learning_rate": 1.9034657955756695e-05, + "loss": 0.2259, + "step": 2850 + }, + { + "epoch": 0.5708566853482786, + "learning_rate": 1.9040639681612212e-05, + "loss": 0.2525, + "step": 2852 + }, + { + "epoch": 0.5716573258606885, + "learning_rate": 1.904660375090257e-05, + "loss": 0.5569, + "step": 2854 + }, + { + "epoch": 0.5716573258606885, + "learning_rate": 1.9052550151979816e-05, + "loss": 0.2262, + "step": 2856 + }, + { + "epoch": 0.5724579663730984, + "learning_rate": 1.905847887323049e-05, + "loss": 0.2463, + "step": 2858 + }, + { + "epoch": 0.5724579663730984, + "learning_rate": 1.9064389903075676e-05, + "loss": 0.2776, + "step": 2860 + }, + { + "epoch": 0.5732586068855084, + "learning_rate": 1.9070283229971007e-05, + "loss": 0.5323, + "step": 2862 + }, + { + "epoch": 0.5732586068855084, + "learning_rate": 1.9076158842406674e-05, + "loss": 0.2051, + "step": 2864 + }, + { + "epoch": 0.5740592473979184, + "learning_rate": 1.9082016728907496e-05, + "loss": 0.9686, + "step": 2866 + }, + { + "epoch": 0.5740592473979184, + "learning_rate": 1.9087856878032886e-05, + "loss": 0.3131, + "step": 2868 + }, + { + "epoch": 0.5748598879103283, + "learning_rate": 1.909367927837691e-05, + "loss": 0.4471, + "step": 2870 + }, + { + "epoch": 0.5748598879103283, + "learning_rate": 1.909948391856829e-05, + "loss": 0.2458, + "step": 2872 + }, + { + "epoch": 0.5756605284227382, + "learning_rate": 1.910527078727044e-05, + "loss": 0.2416, + "step": 2874 + }, + { + "epoch": 0.5756605284227382, + "learning_rate": 1.911103987318148e-05, + "loss": 0.2777, + "step": 2876 + }, + { + "epoch": 0.5764611689351481, + "learning_rate": 1.911679116503425e-05, + "loss": 0.3449, + "step": 2878 + }, + { + "epoch": 0.5764611689351481, + "learning_rate": 1.912252465159637e-05, + "loss": 0.2106, + "step": 2880 + }, + { + "epoch": 0.577261809447558, + "learning_rate": 1.9128240321670208e-05, + "loss": 0.2166, + "step": 2882 + }, + { + "epoch": 0.577261809447558, + "learning_rate": 1.913393816409294e-05, + "loss": 0.3659, + "step": 2884 + }, + { + "epoch": 0.578062449959968, + "learning_rate": 1.913961816773655e-05, + "loss": 0.4227, + "step": 2886 + }, + { + "epoch": 0.578062449959968, + "learning_rate": 1.9145280321507872e-05, + "loss": 0.1546, + "step": 2888 + }, + { + "epoch": 0.5788630904723779, + "learning_rate": 1.9150924614348588e-05, + "loss": 0.1763, + "step": 2890 + }, + { + "epoch": 0.5788630904723779, + "learning_rate": 1.9156551035235288e-05, + "loss": 0.55, + "step": 2892 + }, + { + "epoch": 0.5796637309847879, + "learning_rate": 1.916215957317944e-05, + "loss": 0.6128, + "step": 2894 + }, + { + "epoch": 0.5796637309847879, + "learning_rate": 1.9167750217227454e-05, + "loss": 0.0501, + "step": 2896 + }, + { + "epoch": 0.5804643714971978, + "learning_rate": 1.9173322956460675e-05, + "loss": 0.1496, + "step": 2898 + }, + { + "epoch": 0.5804643714971978, + "learning_rate": 1.9178877779995423e-05, + "loss": 0.0263, + "step": 2900 + }, + { + "epoch": 0.5812650120096077, + "learning_rate": 1.9184414676983006e-05, + "loss": 0.4598, + "step": 2902 + }, + { + "epoch": 0.5812650120096077, + "learning_rate": 1.9189933636609747e-05, + "loss": 0.2697, + "step": 2904 + }, + { + "epoch": 0.5820656525220176, + "learning_rate": 1.9195434648097003e-05, + "loss": 0.4029, + "step": 2906 + }, + { + "epoch": 0.5820656525220176, + "learning_rate": 1.9200917700701173e-05, + "loss": 0.1956, + "step": 2908 + }, + { + "epoch": 0.5828662930344275, + "learning_rate": 1.9206382783713738e-05, + "loss": 0.2976, + "step": 2910 + }, + { + "epoch": 0.5828662930344275, + "learning_rate": 1.9211829886461274e-05, + "loss": 0.1694, + "step": 2912 + }, + { + "epoch": 0.5836669335468375, + "learning_rate": 1.921725899830547e-05, + "loss": 0.3611, + "step": 2914 + }, + { + "epoch": 0.5836669335468375, + "learning_rate": 1.9222670108643146e-05, + "loss": 0.2782, + "step": 2916 + }, + { + "epoch": 0.5844675740592474, + "learning_rate": 1.92280632069063e-05, + "loss": 0.2271, + "step": 2918 + }, + { + "epoch": 0.5844675740592474, + "learning_rate": 1.9233438282562085e-05, + "loss": 0.135, + "step": 2920 + }, + { + "epoch": 0.5852682145716573, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.1275, + "step": 2922 + }, + { + "epoch": 0.5852682145716573, + "learning_rate": 1.924413432409622e-05, + "loss": 0.3228, + "step": 2924 + }, + { + "epoch": 0.5860688550840673, + "learning_rate": 1.924945526908497e-05, + "loss": 0.519, + "step": 2926 + }, + { + "epoch": 0.5860688550840673, + "learning_rate": 1.925475814968719e-05, + "loss": 0.121, + "step": 2928 + }, + { + "epoch": 0.5868694955964772, + "learning_rate": 1.9260042955546237e-05, + "loss": 0.6254, + "step": 2930 + }, + { + "epoch": 0.5868694955964772, + "learning_rate": 1.926530967634078e-05, + "loss": 0.1059, + "step": 2932 + }, + { + "epoch": 0.5876701361088871, + "learning_rate": 1.9270558301784795e-05, + "loss": 0.4969, + "step": 2934 + }, + { + "epoch": 0.5876701361088871, + "learning_rate": 1.9275788821627607e-05, + "loss": 0.1946, + "step": 2936 + }, + { + "epoch": 0.588470776621297, + "learning_rate": 1.9281001225653887e-05, + "loss": 0.3149, + "step": 2938 + }, + { + "epoch": 0.588470776621297, + "learning_rate": 1.9286195503683705e-05, + "loss": 0.4259, + "step": 2940 + }, + { + "epoch": 0.589271417133707, + "learning_rate": 1.9291371645572517e-05, + "loss": 0.4693, + "step": 2942 + }, + { + "epoch": 0.589271417133707, + "learning_rate": 1.9296529641211215e-05, + "loss": 0.0127, + "step": 2944 + }, + { + "epoch": 0.5900720576461169, + "learning_rate": 1.9301669480526115e-05, + "loss": 0.4233, + "step": 2946 + }, + { + "epoch": 0.5900720576461169, + "learning_rate": 1.9306791153479004e-05, + "loss": 0.984, + "step": 2948 + }, + { + "epoch": 0.5908726981585268, + "learning_rate": 1.931189465006714e-05, + "loss": 0.2328, + "step": 2950 + }, + { + "epoch": 0.5908726981585268, + "learning_rate": 1.9316979960323286e-05, + "loss": 0.2282, + "step": 2952 + }, + { + "epoch": 0.5916733386709367, + "learning_rate": 1.9322047074315717e-05, + "loss": 0.6037, + "step": 2954 + }, + { + "epoch": 0.5916733386709367, + "learning_rate": 1.932709598214825e-05, + "loss": 0.144, + "step": 2956 + }, + { + "epoch": 0.5924739791833467, + "learning_rate": 1.9332126673960262e-05, + "loss": 0.2022, + "step": 2958 + }, + { + "epoch": 0.5924739791833467, + "learning_rate": 1.9337139139926707e-05, + "loss": 0.1427, + "step": 2960 + }, + { + "epoch": 0.5932746196957566, + "learning_rate": 1.934213337025812e-05, + "loss": 0.0921, + "step": 2962 + }, + { + "epoch": 0.5932746196957566, + "learning_rate": 1.9347109355200672e-05, + "loss": 0.0734, + "step": 2964 + }, + { + "epoch": 0.5940752602081665, + "learning_rate": 1.9352067085036145e-05, + "loss": 0.1581, + "step": 2966 + }, + { + "epoch": 0.5940752602081665, + "learning_rate": 1.935700655008199e-05, + "loss": 0.1153, + "step": 2968 + }, + { + "epoch": 0.5948759007205765, + "learning_rate": 1.9361927740691327e-05, + "loss": 0.2135, + "step": 2970 + }, + { + "epoch": 0.5948759007205765, + "learning_rate": 1.9366830647252967e-05, + "loss": 0.1675, + "step": 2972 + }, + { + "epoch": 0.5956765412329864, + "learning_rate": 1.937171526019142e-05, + "loss": 0.4079, + "step": 2974 + }, + { + "epoch": 0.5956765412329864, + "learning_rate": 1.9376581569966933e-05, + "loss": 0.1283, + "step": 2976 + }, + { + "epoch": 0.5964771817453963, + "learning_rate": 1.9381429567075504e-05, + "loss": 0.2776, + "step": 2978 + }, + { + "epoch": 0.5964771817453963, + "learning_rate": 1.9386259242048883e-05, + "loss": 0.3433, + "step": 2980 + }, + { + "epoch": 0.5972778222578062, + "learning_rate": 1.93910705854546e-05, + "loss": 0.5167, + "step": 2982 + }, + { + "epoch": 0.5972778222578062, + "learning_rate": 1.939586358789602e-05, + "loss": 0.2453, + "step": 2984 + }, + { + "epoch": 0.5980784627702161, + "learning_rate": 1.9400638240012294e-05, + "loss": 0.2479, + "step": 2986 + }, + { + "epoch": 0.5980784627702161, + "learning_rate": 1.940539453247842e-05, + "loss": 0.1008, + "step": 2988 + }, + { + "epoch": 0.5988791032826261, + "learning_rate": 1.9410132456005262e-05, + "loss": 0.8446, + "step": 2990 + }, + { + "epoch": 0.5988791032826261, + "learning_rate": 1.9414852001339547e-05, + "loss": 0.0862, + "step": 2992 + }, + { + "epoch": 0.5996797437950361, + "learning_rate": 1.9419553159263896e-05, + "loss": 0.2936, + "step": 2994 + }, + { + "epoch": 0.5996797437950361, + "learning_rate": 1.9424235920596863e-05, + "loss": 0.0187, + "step": 2996 + }, + { + "epoch": 0.600480384307446, + "learning_rate": 1.94289002761929e-05, + "loss": 0.3349, + "step": 2998 + }, + { + "epoch": 0.600480384307446, + "learning_rate": 1.9433546216942423e-05, + "loss": 0.0936, + "step": 3000 + }, + { + "epoch": 0.6012810248198559, + "learning_rate": 1.943817373377181e-05, + "loss": 0.3229, + "step": 3002 + }, + { + "epoch": 0.6012810248198559, + "learning_rate": 1.944278281764342e-05, + "loss": 0.1426, + "step": 3004 + }, + { + "epoch": 0.6020816653322658, + "learning_rate": 1.944737345955561e-05, + "loss": 0.2377, + "step": 3006 + }, + { + "epoch": 0.6020816653322658, + "learning_rate": 1.945194565054276e-05, + "loss": 0.1163, + "step": 3008 + }, + { + "epoch": 0.6028823058446757, + "learning_rate": 1.945649938167528e-05, + "loss": 0.1426, + "step": 3010 + }, + { + "epoch": 0.6028823058446757, + "learning_rate": 1.9461034644059637e-05, + "loss": 0.1146, + "step": 3012 + }, + { + "epoch": 0.6036829463570856, + "learning_rate": 1.946555142883836e-05, + "loss": 0.3716, + "step": 3014 + }, + { + "epoch": 0.6036829463570856, + "learning_rate": 1.9470049727190073e-05, + "loss": 0.1954, + "step": 3016 + }, + { + "epoch": 0.6044835868694955, + "learning_rate": 1.9474529530329507e-05, + "loss": 0.4179, + "step": 3018 + }, + { + "epoch": 0.6044835868694955, + "learning_rate": 1.9478990829507504e-05, + "loss": 0.2286, + "step": 3020 + }, + { + "epoch": 0.6052842273819056, + "learning_rate": 1.9483433616011047e-05, + "loss": 0.5044, + "step": 3022 + }, + { + "epoch": 0.6052842273819056, + "learning_rate": 1.948785788116329e-05, + "loss": 0.4513, + "step": 3024 + }, + { + "epoch": 0.6060848678943155, + "learning_rate": 1.9492263616323533e-05, + "loss": 0.3038, + "step": 3026 + }, + { + "epoch": 0.6060848678943155, + "learning_rate": 1.9496650812887286e-05, + "loss": 0.2418, + "step": 3028 + }, + { + "epoch": 0.6068855084067254, + "learning_rate": 1.9501019462286263e-05, + "loss": 0.6364, + "step": 3030 + }, + { + "epoch": 0.6068855084067254, + "learning_rate": 1.9505369555988395e-05, + "loss": 0.0595, + "step": 3032 + }, + { + "epoch": 0.6076861489191353, + "learning_rate": 1.9509701085497842e-05, + "loss": 0.4716, + "step": 3034 + }, + { + "epoch": 0.6076861489191353, + "learning_rate": 1.951401404235505e-05, + "loss": 0.749, + "step": 3036 + }, + { + "epoch": 0.6084867894315452, + "learning_rate": 1.9518308418136718e-05, + "loss": 0.3639, + "step": 3038 + }, + { + "epoch": 0.6084867894315452, + "learning_rate": 1.952258420445583e-05, + "loss": 0.7821, + "step": 3040 + }, + { + "epoch": 0.6092874299439551, + "learning_rate": 1.952684139296169e-05, + "loss": 0.2778, + "step": 3042 + }, + { + "epoch": 0.6092874299439551, + "learning_rate": 1.9531079975339912e-05, + "loss": 0.0734, + "step": 3044 + }, + { + "epoch": 0.610088070456365, + "learning_rate": 1.9535299943312455e-05, + "loss": 0.3615, + "step": 3046 + }, + { + "epoch": 0.610088070456365, + "learning_rate": 1.953950128863762e-05, + "loss": 0.1937, + "step": 3048 + }, + { + "epoch": 0.6108887109687751, + "learning_rate": 1.9543684003110105e-05, + "loss": 0.1667, + "step": 3050 + }, + { + "epoch": 0.6108887109687751, + "learning_rate": 1.9547848078560975e-05, + "loss": 0.261, + "step": 3052 + }, + { + "epoch": 0.611689351481185, + "learning_rate": 1.9551993506857688e-05, + "loss": 0.1349, + "step": 3054 + }, + { + "epoch": 0.611689351481185, + "learning_rate": 1.9556120279904144e-05, + "loss": 0.5711, + "step": 3056 + }, + { + "epoch": 0.6124899919935949, + "learning_rate": 1.9560228389640664e-05, + "loss": 0.2478, + "step": 3058 + }, + { + "epoch": 0.6124899919935949, + "learning_rate": 1.956431782804402e-05, + "loss": 0.1349, + "step": 3060 + }, + { + "epoch": 0.6132906325060048, + "learning_rate": 1.956838858712744e-05, + "loss": 0.2274, + "step": 3062 + }, + { + "epoch": 0.6132906325060048, + "learning_rate": 1.957244065894066e-05, + "loss": 0.1054, + "step": 3064 + }, + { + "epoch": 0.6140912730184147, + "learning_rate": 1.9576474035569892e-05, + "loss": 0.3092, + "step": 3066 + }, + { + "epoch": 0.6140912730184147, + "learning_rate": 1.9580488709137858e-05, + "loss": 0.1817, + "step": 3068 + }, + { + "epoch": 0.6148919135308246, + "learning_rate": 1.9584484671803818e-05, + "loss": 0.2106, + "step": 3070 + }, + { + "epoch": 0.6148919135308246, + "learning_rate": 1.9588461915763566e-05, + "loss": 0.1778, + "step": 3072 + }, + { + "epoch": 0.6156925540432346, + "learning_rate": 1.9592420433249462e-05, + "loss": 0.2018, + "step": 3074 + }, + { + "epoch": 0.6156925540432346, + "learning_rate": 1.9596360216530436e-05, + "loss": 0.5225, + "step": 3076 + }, + { + "epoch": 0.6164931945556446, + "learning_rate": 1.9600281257912e-05, + "loss": 0.1496, + "step": 3078 + }, + { + "epoch": 0.6164931945556446, + "learning_rate": 1.9604183549736283e-05, + "loss": 0.208, + "step": 3080 + }, + { + "epoch": 0.6172938350680545, + "learning_rate": 1.960806708438202e-05, + "loss": 0.2119, + "step": 3082 + }, + { + "epoch": 0.6172938350680545, + "learning_rate": 1.961193185426459e-05, + "loss": 0.5395, + "step": 3084 + }, + { + "epoch": 0.6180944755804644, + "learning_rate": 1.9615777851836003e-05, + "loss": 0.2192, + "step": 3086 + }, + { + "epoch": 0.6180944755804644, + "learning_rate": 1.9619605069584954e-05, + "loss": 0.4713, + "step": 3088 + }, + { + "epoch": 0.6188951160928743, + "learning_rate": 1.962341350003679e-05, + "loss": 0.6812, + "step": 3090 + }, + { + "epoch": 0.6188951160928743, + "learning_rate": 1.9627203135753573e-05, + "loss": 0.1647, + "step": 3092 + }, + { + "epoch": 0.6196957566052842, + "learning_rate": 1.9630973969334068e-05, + "loss": 0.4365, + "step": 3094 + }, + { + "epoch": 0.6196957566052842, + "learning_rate": 1.9634725993413744e-05, + "loss": 0.205, + "step": 3096 + }, + { + "epoch": 0.6204963971176941, + "learning_rate": 1.9638459200664822e-05, + "loss": 0.1851, + "step": 3098 + }, + { + "epoch": 0.6204963971176941, + "learning_rate": 1.9642173583796265e-05, + "loss": 0.1681, + "step": 3100 + }, + { + "epoch": 0.6212970376301041, + "learning_rate": 1.9645869135553806e-05, + "loss": 0.2529, + "step": 3102 + }, + { + "epoch": 0.6212970376301041, + "learning_rate": 1.964954584871995e-05, + "loss": 0.1761, + "step": 3104 + }, + { + "epoch": 0.622097678142514, + "learning_rate": 1.965320371611399e-05, + "loss": 0.2018, + "step": 3106 + }, + { + "epoch": 0.622097678142514, + "learning_rate": 1.9656842730592046e-05, + "loss": 0.0646, + "step": 3108 + }, + { + "epoch": 0.622898318654924, + "learning_rate": 1.9660462885047032e-05, + "loss": 0.1787, + "step": 3110 + }, + { + "epoch": 0.622898318654924, + "learning_rate": 1.966406417240872e-05, + "loss": 0.2307, + "step": 3112 + }, + { + "epoch": 0.6236989591673339, + "learning_rate": 1.9667646585643703e-05, + "loss": 0.5142, + "step": 3114 + }, + { + "epoch": 0.6236989591673339, + "learning_rate": 1.967121011775546e-05, + "loss": 0.0361, + "step": 3116 + }, + { + "epoch": 0.6244995996797438, + "learning_rate": 1.967475476178433e-05, + "loss": 0.4309, + "step": 3118 + }, + { + "epoch": 0.6244995996797438, + "learning_rate": 1.967828051080755e-05, + "loss": 0.2464, + "step": 3120 + }, + { + "epoch": 0.6253002401921537, + "learning_rate": 1.9681787357939254e-05, + "loss": 0.6014, + "step": 3122 + }, + { + "epoch": 0.6253002401921537, + "learning_rate": 1.9685275296330497e-05, + "loss": 0.5234, + "step": 3124 + }, + { + "epoch": 0.6261008807045636, + "learning_rate": 1.968874431916926e-05, + "loss": 0.4965, + "step": 3126 + }, + { + "epoch": 0.6261008807045636, + "learning_rate": 1.969219441968046e-05, + "loss": 0.091, + "step": 3128 + }, + { + "epoch": 0.6269015212169736, + "learning_rate": 1.969562559112598e-05, + "loss": 0.2164, + "step": 3130 + }, + { + "epoch": 0.6269015212169736, + "learning_rate": 1.969903782680467e-05, + "loss": 0.716, + "step": 3132 + }, + { + "epoch": 0.6277021617293835, + "learning_rate": 1.970243112005235e-05, + "loss": 0.2961, + "step": 3134 + }, + { + "epoch": 0.6277021617293835, + "learning_rate": 1.9705805464241856e-05, + "loss": 0.3265, + "step": 3136 + }, + { + "epoch": 0.6285028022417934, + "learning_rate": 1.970916085278302e-05, + "loss": 0.2236, + "step": 3138 + }, + { + "epoch": 0.6285028022417934, + "learning_rate": 1.9712497279122692e-05, + "loss": 0.5319, + "step": 3140 + }, + { + "epoch": 0.6293034427542034, + "learning_rate": 1.9715814736744755e-05, + "loss": 0.1162, + "step": 3142 + }, + { + "epoch": 0.6293034427542034, + "learning_rate": 1.971911321917015e-05, + "loss": 0.0386, + "step": 3144 + }, + { + "epoch": 0.6301040832666133, + "learning_rate": 1.9722392719956864e-05, + "loss": 0.1987, + "step": 3146 + }, + { + "epoch": 0.6301040832666133, + "learning_rate": 1.9725653232699962e-05, + "loss": 0.2876, + "step": 3148 + }, + { + "epoch": 0.6309047237790232, + "learning_rate": 1.9728894751031595e-05, + "loss": 0.3842, + "step": 3150 + }, + { + "epoch": 0.6309047237790232, + "learning_rate": 1.9732117268621005e-05, + "loss": 0.7951, + "step": 3152 + }, + { + "epoch": 0.6317053642914331, + "learning_rate": 1.9735320779174545e-05, + "loss": 0.3069, + "step": 3154 + }, + { + "epoch": 0.6317053642914331, + "learning_rate": 1.9738505276435692e-05, + "loss": 0.1682, + "step": 3156 + }, + { + "epoch": 0.6325060048038431, + "learning_rate": 1.974167075418505e-05, + "loss": 0.2534, + "step": 3158 + }, + { + "epoch": 0.6325060048038431, + "learning_rate": 1.9744817206240377e-05, + "loss": 0.2263, + "step": 3160 + }, + { + "epoch": 0.633306645316253, + "learning_rate": 1.9747944626456577e-05, + "loss": 0.1977, + "step": 3162 + }, + { + "epoch": 0.633306645316253, + "learning_rate": 1.9751053008725736e-05, + "loss": 0.1496, + "step": 3164 + }, + { + "epoch": 0.6341072858286629, + "learning_rate": 1.975414234697712e-05, + "loss": 0.0671, + "step": 3166 + }, + { + "epoch": 0.6341072858286629, + "learning_rate": 1.9757212635177177e-05, + "loss": 0.0523, + "step": 3168 + }, + { + "epoch": 0.6349079263410728, + "learning_rate": 1.9760263867329568e-05, + "loss": 0.1675, + "step": 3170 + }, + { + "epoch": 0.6349079263410728, + "learning_rate": 1.9763296037475174e-05, + "loss": 0.6791, + "step": 3172 + }, + { + "epoch": 0.6357085668534828, + "learning_rate": 1.97663091396921e-05, + "loss": 0.1423, + "step": 3174 + }, + { + "epoch": 0.6357085668534828, + "learning_rate": 1.976930316809569e-05, + "loss": 0.3445, + "step": 3176 + }, + { + "epoch": 0.6365092073658927, + "learning_rate": 1.9772278116838543e-05, + "loss": 0.2066, + "step": 3178 + }, + { + "epoch": 0.6365092073658927, + "learning_rate": 1.9775233980110524e-05, + "loss": 0.1515, + "step": 3180 + }, + { + "epoch": 0.6373098478783027, + "learning_rate": 1.977817075213876e-05, + "loss": 0.4071, + "step": 3182 + }, + { + "epoch": 0.6373098478783027, + "learning_rate": 1.978108842718768e-05, + "loss": 0.2463, + "step": 3184 + }, + { + "epoch": 0.6381104883907126, + "learning_rate": 1.9783986999558994e-05, + "loss": 0.1663, + "step": 3186 + }, + { + "epoch": 0.6381104883907126, + "learning_rate": 1.9786866463591732e-05, + "loss": 0.62, + "step": 3188 + }, + { + "epoch": 0.6389111289031225, + "learning_rate": 1.9789726813662233e-05, + "loss": 0.1948, + "step": 3190 + }, + { + "epoch": 0.6389111289031225, + "learning_rate": 1.9792568044184176e-05, + "loss": 0.4769, + "step": 3192 + }, + { + "epoch": 0.6397117694155324, + "learning_rate": 1.979539014960858e-05, + "loss": 0.1599, + "step": 3194 + }, + { + "epoch": 0.6397117694155324, + "learning_rate": 1.9798193124423804e-05, + "loss": 0.1502, + "step": 3196 + }, + { + "epoch": 0.6405124099279423, + "learning_rate": 1.9800976963155584e-05, + "loss": 0.549, + "step": 3198 + }, + { + "epoch": 0.6405124099279423, + "learning_rate": 1.9803741660367015e-05, + "loss": 0.1827, + "step": 3200 + }, + { + "epoch": 0.6413130504403523, + "learning_rate": 1.980648721065859e-05, + "loss": 0.2668, + "step": 3202 + }, + { + "epoch": 0.6413130504403523, + "learning_rate": 1.9809213608668185e-05, + "loss": 0.2382, + "step": 3204 + }, + { + "epoch": 0.6421136909527622, + "learning_rate": 1.9811920849071092e-05, + "loss": 0.2692, + "step": 3206 + }, + { + "epoch": 0.6421136909527622, + "learning_rate": 1.9814608926580007e-05, + "loss": 0.1033, + "step": 3208 + }, + { + "epoch": 0.6429143314651722, + "learning_rate": 1.9817277835945057e-05, + "loss": 0.5423, + "step": 3210 + }, + { + "epoch": 0.6429143314651722, + "learning_rate": 1.9819927571953807e-05, + "loss": 0.4381, + "step": 3212 + }, + { + "epoch": 0.6437149719775821, + "learning_rate": 1.9822558129431263e-05, + "loss": 0.929, + "step": 3214 + }, + { + "epoch": 0.6437149719775821, + "learning_rate": 1.9825169503239885e-05, + "loss": 0.1673, + "step": 3216 + }, + { + "epoch": 0.644515612489992, + "learning_rate": 1.9827761688279606e-05, + "loss": 0.2194, + "step": 3218 + }, + { + "epoch": 0.644515612489992, + "learning_rate": 1.983033467948784e-05, + "loss": 0.0304, + "step": 3220 + }, + { + "epoch": 0.6453162530024019, + "learning_rate": 1.983288847183947e-05, + "loss": 0.2982, + "step": 3222 + }, + { + "epoch": 0.6453162530024019, + "learning_rate": 1.9835423060346892e-05, + "loss": 0.2106, + "step": 3224 + }, + { + "epoch": 0.6461168935148118, + "learning_rate": 1.9837938440059992e-05, + "loss": 0.4267, + "step": 3226 + }, + { + "epoch": 0.6461168935148118, + "learning_rate": 1.9840434606066182e-05, + "loss": 0.1768, + "step": 3228 + }, + { + "epoch": 0.6469175340272217, + "learning_rate": 1.9842911553490392e-05, + "loss": 0.5597, + "step": 3230 + }, + { + "epoch": 0.6469175340272217, + "learning_rate": 1.9845369277495102e-05, + "loss": 0.1497, + "step": 3232 + }, + { + "epoch": 0.6477181745396317, + "learning_rate": 1.984780777328031e-05, + "loss": 0.2465, + "step": 3234 + }, + { + "epoch": 0.6477181745396317, + "learning_rate": 1.9850227036083592e-05, + "loss": 0.1608, + "step": 3236 + }, + { + "epoch": 0.6485188150520417, + "learning_rate": 1.985262706118007e-05, + "loss": 0.2411, + "step": 3238 + }, + { + "epoch": 0.6485188150520417, + "learning_rate": 1.985500784388244e-05, + "loss": 0.2069, + "step": 3240 + }, + { + "epoch": 0.6493194555644516, + "learning_rate": 1.9857369379540982e-05, + "loss": 0.3757, + "step": 3242 + }, + { + "epoch": 0.6493194555644516, + "learning_rate": 1.985971166354357e-05, + "loss": 0.3775, + "step": 3244 + }, + { + "epoch": 0.6501200960768615, + "learning_rate": 1.986203469131567e-05, + "loss": 0.5834, + "step": 3246 + }, + { + "epoch": 0.6501200960768615, + "learning_rate": 1.9864338458320366e-05, + "loss": 0.5152, + "step": 3248 + }, + { + "epoch": 0.6509207365892714, + "learning_rate": 1.986662296005834e-05, + "loss": 0.5086, + "step": 3250 + }, + { + "epoch": 0.6509207365892714, + "learning_rate": 1.986888819206792e-05, + "loss": 0.5967, + "step": 3252 + }, + { + "epoch": 0.6517213771016813, + "learning_rate": 1.987113414992505e-05, + "loss": 0.4372, + "step": 3254 + }, + { + "epoch": 0.6517213771016813, + "learning_rate": 1.9873360829243323e-05, + "loss": 0.2861, + "step": 3256 + }, + { + "epoch": 0.6525220176140912, + "learning_rate": 1.9875568225674e-05, + "loss": 0.1581, + "step": 3258 + }, + { + "epoch": 0.6525220176140912, + "learning_rate": 1.9877756334905983e-05, + "loss": 0.1872, + "step": 3260 + }, + { + "epoch": 0.6533226581265013, + "learning_rate": 1.9879925152665845e-05, + "loss": 0.5262, + "step": 3262 + }, + { + "epoch": 0.6533226581265013, + "learning_rate": 1.9882074674717836e-05, + "loss": 0.183, + "step": 3264 + }, + { + "epoch": 0.6541232986389112, + "learning_rate": 1.9884204896863895e-05, + "loss": 0.3277, + "step": 3266 + }, + { + "epoch": 0.6541232986389112, + "learning_rate": 1.988631581494365e-05, + "loss": 0.1612, + "step": 3268 + }, + { + "epoch": 0.6549239391513211, + "learning_rate": 1.9888407424834433e-05, + "loss": 0.4648, + "step": 3270 + }, + { + "epoch": 0.6549239391513211, + "learning_rate": 1.989047972245129e-05, + "loss": 0.3959, + "step": 3272 + }, + { + "epoch": 0.655724579663731, + "learning_rate": 1.989253270374697e-05, + "loss": 0.2024, + "step": 3274 + }, + { + "epoch": 0.655724579663731, + "learning_rate": 1.9894566364711965e-05, + "loss": 0.0771, + "step": 3276 + }, + { + "epoch": 0.6565252201761409, + "learning_rate": 1.9896580701374482e-05, + "loss": 0.4497, + "step": 3278 + }, + { + "epoch": 0.6565252201761409, + "learning_rate": 1.989857570980049e-05, + "loss": 0.2759, + "step": 3280 + }, + { + "epoch": 0.6573258606885508, + "learning_rate": 1.9900551386093677e-05, + "loss": 0.1272, + "step": 3282 + }, + { + "epoch": 0.6573258606885508, + "learning_rate": 1.990250772639552e-05, + "loss": 0.3815, + "step": 3284 + }, + { + "epoch": 0.6581265012009607, + "learning_rate": 1.9904444726885236e-05, + "loss": 0.4654, + "step": 3286 + }, + { + "epoch": 0.6581265012009607, + "learning_rate": 1.9906362383779826e-05, + "loss": 0.4346, + "step": 3288 + }, + { + "epoch": 0.6589271417133707, + "learning_rate": 1.990826069333406e-05, + "loss": 0.3647, + "step": 3290 + }, + { + "epoch": 0.6589271417133707, + "learning_rate": 1.99101396518405e-05, + "loss": 0.4385, + "step": 3292 + }, + { + "epoch": 0.6597277822257807, + "learning_rate": 1.99119992556295e-05, + "loss": 0.352, + "step": 3294 + }, + { + "epoch": 0.6597277822257807, + "learning_rate": 1.9913839501069213e-05, + "loss": 0.3491, + "step": 3296 + }, + { + "epoch": 0.6605284227381906, + "learning_rate": 1.99156603845656e-05, + "loss": 0.3054, + "step": 3298 + }, + { + "epoch": 0.6605284227381906, + "learning_rate": 1.9917461902562435e-05, + "loss": 0.2773, + "step": 3300 + }, + { + "epoch": 0.6613290632506005, + "learning_rate": 1.9919244051541315e-05, + "loss": 0.2163, + "step": 3302 + }, + { + "epoch": 0.6613290632506005, + "learning_rate": 1.9921006828021666e-05, + "loss": 0.264, + "step": 3304 + }, + { + "epoch": 0.6621297037630104, + "learning_rate": 1.9922750228560746e-05, + "loss": 0.2482, + "step": 3306 + }, + { + "epoch": 0.6621297037630104, + "learning_rate": 1.9924474249753652e-05, + "loss": 0.696, + "step": 3308 + }, + { + "epoch": 0.6629303442754203, + "learning_rate": 1.9926178888233344e-05, + "loss": 0.1611, + "step": 3310 + }, + { + "epoch": 0.6629303442754203, + "learning_rate": 1.9927864140670615e-05, + "loss": 0.5024, + "step": 3312 + }, + { + "epoch": 0.6637309847878302, + "learning_rate": 1.9929530003774133e-05, + "loss": 0.3944, + "step": 3314 + }, + { + "epoch": 0.6637309847878302, + "learning_rate": 1.9931176474290438e-05, + "loss": 0.2715, + "step": 3316 + }, + { + "epoch": 0.6645316253002402, + "learning_rate": 1.993280354900393e-05, + "loss": 0.4165, + "step": 3318 + }, + { + "epoch": 0.6645316253002402, + "learning_rate": 1.99344112247369e-05, + "loss": 0.4235, + "step": 3320 + }, + { + "epoch": 0.6653322658126501, + "learning_rate": 1.9935999498349518e-05, + "loss": 0.1793, + "step": 3322 + }, + { + "epoch": 0.6653322658126501, + "learning_rate": 1.9937568366739858e-05, + "loss": 0.1954, + "step": 3324 + }, + { + "epoch": 0.6661329063250601, + "learning_rate": 1.9939117826843883e-05, + "loss": 0.1881, + "step": 3326 + }, + { + "epoch": 0.6661329063250601, + "learning_rate": 1.9940647875635463e-05, + "loss": 0.3484, + "step": 3328 + }, + { + "epoch": 0.66693354683747, + "learning_rate": 1.9942158510126384e-05, + "loss": 0.165, + "step": 3330 + }, + { + "epoch": 0.66693354683747, + "learning_rate": 1.9943649727366335e-05, + "loss": 0.2826, + "step": 3332 + }, + { + "epoch": 0.6677341873498799, + "learning_rate": 1.9945121524442944e-05, + "loss": 0.2468, + "step": 3334 + }, + { + "epoch": 0.6677341873498799, + "learning_rate": 1.994657389848176e-05, + "loss": 0.1852, + "step": 3336 + }, + { + "epoch": 0.6685348278622898, + "learning_rate": 1.9948006846646262e-05, + "loss": 0.6371, + "step": 3338 + }, + { + "epoch": 0.6685348278622898, + "learning_rate": 1.994942036613787e-05, + "loss": 0.1481, + "step": 3340 + }, + { + "epoch": 0.6693354683746997, + "learning_rate": 1.9950814454195953e-05, + "loss": 0.1688, + "step": 3342 + }, + { + "epoch": 0.6693354683746997, + "learning_rate": 1.9952189108097825e-05, + "loss": 0.6612, + "step": 3344 + }, + { + "epoch": 0.6701361088871097, + "learning_rate": 1.9953544325158755e-05, + "loss": 0.7063, + "step": 3346 + }, + { + "epoch": 0.6701361088871097, + "learning_rate": 1.995488010273198e-05, + "loss": 0.0452, + "step": 3348 + }, + { + "epoch": 0.6709367493995196, + "learning_rate": 1.9956196438208693e-05, + "loss": 0.5025, + "step": 3350 + }, + { + "epoch": 0.6709367493995196, + "learning_rate": 1.9957493329018064e-05, + "loss": 0.1038, + "step": 3352 + }, + { + "epoch": 0.6717373899119295, + "learning_rate": 1.9958770772627236e-05, + "loss": 0.1697, + "step": 3354 + }, + { + "epoch": 0.6717373899119295, + "learning_rate": 1.9960028766541336e-05, + "loss": 0.1717, + "step": 3356 + }, + { + "epoch": 0.6725380304243395, + "learning_rate": 1.9961267308303473e-05, + "loss": 0.2192, + "step": 3358 + }, + { + "epoch": 0.6725380304243395, + "learning_rate": 1.996248639549475e-05, + "loss": 0.1963, + "step": 3360 + }, + { + "epoch": 0.6733386709367494, + "learning_rate": 1.9963686025734262e-05, + "loss": 0.2069, + "step": 3362 + }, + { + "epoch": 0.6733386709367494, + "learning_rate": 1.9964866196679105e-05, + "loss": 0.2617, + "step": 3364 + }, + { + "epoch": 0.6741393114491593, + "learning_rate": 1.9966026906024377e-05, + "loss": 0.3077, + "step": 3366 + }, + { + "epoch": 0.6741393114491593, + "learning_rate": 1.9967168151503196e-05, + "loss": 0.2767, + "step": 3368 + }, + { + "epoch": 0.6749399519615693, + "learning_rate": 1.9968289930886675e-05, + "loss": 0.2471, + "step": 3370 + }, + { + "epoch": 0.6749399519615693, + "learning_rate": 1.9969392241983957e-05, + "loss": 0.0995, + "step": 3372 + }, + { + "epoch": 0.6757405924739792, + "learning_rate": 1.997047508264221e-05, + "loss": 0.3756, + "step": 3374 + }, + { + "epoch": 0.6757405924739792, + "learning_rate": 1.997153845074662e-05, + "loss": 0.4216, + "step": 3376 + }, + { + "epoch": 0.6765412329863891, + "learning_rate": 1.99725823442204e-05, + "loss": 0.5415, + "step": 3378 + }, + { + "epoch": 0.6765412329863891, + "learning_rate": 1.9973606761024813e-05, + "loss": 0.1883, + "step": 3380 + }, + { + "epoch": 0.677341873498799, + "learning_rate": 1.9974611699159142e-05, + "loss": 0.1124, + "step": 3382 + }, + { + "epoch": 0.677341873498799, + "learning_rate": 1.997559715666073e-05, + "loss": 0.0647, + "step": 3384 + }, + { + "epoch": 0.678142514011209, + "learning_rate": 1.9976563131604945e-05, + "loss": 0.1507, + "step": 3386 + }, + { + "epoch": 0.678142514011209, + "learning_rate": 1.9977509622105233e-05, + "loss": 0.2612, + "step": 3388 + }, + { + "epoch": 0.6789431545236189, + "learning_rate": 1.9978436626313065e-05, + "loss": 0.7532, + "step": 3390 + }, + { + "epoch": 0.6789431545236189, + "learning_rate": 1.9979344142417986e-05, + "loss": 0.3349, + "step": 3392 + }, + { + "epoch": 0.6797437950360288, + "learning_rate": 1.99802321686476e-05, + "loss": 0.5495, + "step": 3394 + }, + { + "epoch": 0.6797437950360288, + "learning_rate": 1.9981100703267567e-05, + "loss": 0.0274, + "step": 3396 + }, + { + "epoch": 0.6805444355484388, + "learning_rate": 1.9981949744581622e-05, + "loss": 0.542, + "step": 3398 + }, + { + "epoch": 0.6805444355484388, + "learning_rate": 1.998277929093157e-05, + "loss": 0.0597, + "step": 3400 + }, + { + "epoch": 0.6813450760608487, + "learning_rate": 1.9983589340697288e-05, + "loss": 0.6511, + "step": 3402 + }, + { + "epoch": 0.6813450760608487, + "learning_rate": 1.998437989229673e-05, + "loss": 0.3231, + "step": 3404 + }, + { + "epoch": 0.6821457165732586, + "learning_rate": 1.998515094418594e-05, + "loss": 0.1761, + "step": 3406 + }, + { + "epoch": 0.6821457165732586, + "learning_rate": 1.9985902494859023e-05, + "loss": 0.239, + "step": 3408 + }, + { + "epoch": 0.6829463570856685, + "learning_rate": 1.99866345428482e-05, + "loss": 0.2069, + "step": 3410 + }, + { + "epoch": 0.6829463570856685, + "learning_rate": 1.998734708672375e-05, + "loss": 0.2792, + "step": 3412 + }, + { + "epoch": 0.6837469975980784, + "learning_rate": 1.998804012509407e-05, + "loss": 0.4495, + "step": 3414 + }, + { + "epoch": 0.6837469975980784, + "learning_rate": 1.9988713656605635e-05, + "loss": 0.5576, + "step": 3416 + }, + { + "epoch": 0.6845476381104884, + "learning_rate": 1.9989367679943025e-05, + "loss": 0.3214, + "step": 3418 + }, + { + "epoch": 0.6845476381104884, + "learning_rate": 1.9990002193828923e-05, + "loss": 0.3858, + "step": 3420 + }, + { + "epoch": 0.6853482786228983, + "learning_rate": 1.9990617197024103e-05, + "loss": 0.4545, + "step": 3422 + }, + { + "epoch": 0.6853482786228983, + "learning_rate": 1.9991212688327456e-05, + "loss": 0.2006, + "step": 3424 + }, + { + "epoch": 0.6861489191353083, + "learning_rate": 1.999178866657597e-05, + "loss": 0.1961, + "step": 3426 + }, + { + "epoch": 0.6861489191353083, + "learning_rate": 1.9992345130644747e-05, + "loss": 0.4886, + "step": 3428 + }, + { + "epoch": 0.6869495596477182, + "learning_rate": 1.999288207944701e-05, + "loss": 0.1273, + "step": 3430 + }, + { + "epoch": 0.6869495596477182, + "learning_rate": 1.999339951193407e-05, + "loss": 0.1237, + "step": 3432 + }, + { + "epoch": 0.6877502001601281, + "learning_rate": 1.999389742709538e-05, + "loss": 0.2531, + "step": 3434 + }, + { + "epoch": 0.6877502001601281, + "learning_rate": 1.9994375823958504e-05, + "loss": 0.1505, + "step": 3436 + }, + { + "epoch": 0.688550840672538, + "learning_rate": 1.9994834701589113e-05, + "loss": 0.5149, + "step": 3438 + }, + { + "epoch": 0.688550840672538, + "learning_rate": 1.9995274059091018e-05, + "loss": 0.2244, + "step": 3440 + }, + { + "epoch": 0.6893514811849479, + "learning_rate": 1.999569389560614e-05, + "loss": 0.4143, + "step": 3442 + }, + { + "epoch": 0.6893514811849479, + "learning_rate": 1.999609421031453e-05, + "loss": 0.2074, + "step": 3444 + }, + { + "epoch": 0.6901521216973578, + "learning_rate": 1.9996475002434365e-05, + "loss": 0.1683, + "step": 3446 + }, + { + "epoch": 0.6901521216973578, + "learning_rate": 1.999683627122195e-05, + "loss": 0.5138, + "step": 3448 + }, + { + "epoch": 0.6909527622097679, + "learning_rate": 1.999717801597172e-05, + "loss": 0.5701, + "step": 3450 + }, + { + "epoch": 0.6909527622097679, + "learning_rate": 1.9997500236016233e-05, + "loss": 0.423, + "step": 3452 + }, + { + "epoch": 0.6917534027221778, + "learning_rate": 1.9997802930726195e-05, + "loss": 0.3856, + "step": 3454 + }, + { + "epoch": 0.6917534027221778, + "learning_rate": 1.9998086099510433e-05, + "loss": 0.2978, + "step": 3456 + }, + { + "epoch": 0.6925540432345877, + "learning_rate": 1.9998349741815916e-05, + "loss": 0.3094, + "step": 3458 + }, + { + "epoch": 0.6925540432345877, + "learning_rate": 1.9998593857127736e-05, + "loss": 0.1577, + "step": 3460 + }, + { + "epoch": 0.6933546837469976, + "learning_rate": 1.999881844496914e-05, + "loss": 0.211, + "step": 3462 + }, + { + "epoch": 0.6933546837469976, + "learning_rate": 1.99990235049015e-05, + "loss": 0.2351, + "step": 3464 + }, + { + "epoch": 0.6941553242594075, + "learning_rate": 1.9999209036524326e-05, + "loss": 0.3274, + "step": 3466 + }, + { + "epoch": 0.6941553242594075, + "learning_rate": 1.9999375039475275e-05, + "loss": 0.3728, + "step": 3468 + }, + { + "epoch": 0.6949559647718174, + "learning_rate": 1.999952151343014e-05, + "loss": 0.1854, + "step": 3470 + }, + { + "epoch": 0.6949559647718174, + "learning_rate": 1.999964845810285e-05, + "loss": 0.4345, + "step": 3472 + }, + { + "epoch": 0.6957566052842273, + "learning_rate": 1.9999755873245484e-05, + "loss": 0.2237, + "step": 3474 + }, + { + "epoch": 0.6957566052842273, + "learning_rate": 1.9999843758648253e-05, + "loss": 0.1767, + "step": 3476 + }, + { + "epoch": 0.6965572457966374, + "learning_rate": 1.999991211413952e-05, + "loss": 0.5157, + "step": 3478 + }, + { + "epoch": 0.6965572457966374, + "learning_rate": 1.999996093958578e-05, + "loss": 0.3625, + "step": 3480 + }, + { + "epoch": 0.6973578863090473, + "learning_rate": 1.9999990234891677e-05, + "loss": 0.4346, + "step": 3482 + }, + { + "epoch": 0.6973578863090473, + "learning_rate": 2e-05, + "loss": 0.1571, + "step": 3484 + }, + { + "epoch": 0.6981585268214572, + "learning_rate": 1.999999023489168e-05, + "loss": 0.1579, + "step": 3486 + }, + { + "epoch": 0.6981585268214572, + "learning_rate": 1.999996093958578e-05, + "loss": 0.1955, + "step": 3488 + }, + { + "epoch": 0.6989591673338671, + "learning_rate": 1.999991211413952e-05, + "loss": 0.1647, + "step": 3490 + }, + { + "epoch": 0.6989591673338671, + "learning_rate": 1.9999843758648253e-05, + "loss": 0.0961, + "step": 3492 + }, + { + "epoch": 0.699759807846277, + "learning_rate": 1.9999755873245484e-05, + "loss": 0.44, + "step": 3494 + }, + { + "epoch": 0.699759807846277, + "learning_rate": 1.999964845810285e-05, + "loss": 0.9857, + "step": 3496 + }, + { + "epoch": 0.7005604483586869, + "learning_rate": 1.999952151343014e-05, + "loss": 0.1852, + "step": 3498 + }, + { + "epoch": 0.7005604483586869, + "learning_rate": 1.9999375039475278e-05, + "loss": 0.3013, + "step": 3500 + }, + { + "epoch": 0.7013610888710968, + "learning_rate": 1.9999209036524326e-05, + "loss": 0.6867, + "step": 3502 + }, + { + "epoch": 0.7013610888710968, + "learning_rate": 1.99990235049015e-05, + "loss": 0.2531, + "step": 3504 + }, + { + "epoch": 0.7021617293835068, + "learning_rate": 1.999881844496914e-05, + "loss": 0.2156, + "step": 3506 + }, + { + "epoch": 0.7021617293835068, + "learning_rate": 1.9998593857127736e-05, + "loss": 0.2191, + "step": 3508 + }, + { + "epoch": 0.7029623698959168, + "learning_rate": 1.9998349741815916e-05, + "loss": 0.282, + "step": 3510 + }, + { + "epoch": 0.7029623698959168, + "learning_rate": 1.9998086099510433e-05, + "loss": 0.3974, + "step": 3512 + }, + { + "epoch": 0.7037630104083267, + "learning_rate": 1.9997802930726195e-05, + "loss": 0.4265, + "step": 3514 + }, + { + "epoch": 0.7037630104083267, + "learning_rate": 1.9997500236016233e-05, + "loss": 0.1428, + "step": 3516 + }, + { + "epoch": 0.7045636509207366, + "learning_rate": 1.999717801597172e-05, + "loss": 0.3773, + "step": 3518 + }, + { + "epoch": 0.7045636509207366, + "learning_rate": 1.999683627122195e-05, + "loss": 0.1995, + "step": 3520 + }, + { + "epoch": 0.7053642914331465, + "learning_rate": 1.9996475002434365e-05, + "loss": 0.4602, + "step": 3522 + }, + { + "epoch": 0.7053642914331465, + "learning_rate": 1.999609421031453e-05, + "loss": 0.2064, + "step": 3524 + }, + { + "epoch": 0.7061649319455564, + "learning_rate": 1.999569389560614e-05, + "loss": 0.1755, + "step": 3526 + }, + { + "epoch": 0.7061649319455564, + "learning_rate": 1.999527405909102e-05, + "loss": 0.5047, + "step": 3528 + }, + { + "epoch": 0.7069655724579663, + "learning_rate": 1.9994834701589113e-05, + "loss": 0.3721, + "step": 3530 + }, + { + "epoch": 0.7069655724579663, + "learning_rate": 1.9994375823958504e-05, + "loss": 0.1685, + "step": 3532 + }, + { + "epoch": 0.7077662129703763, + "learning_rate": 1.9993897427095378e-05, + "loss": 0.1963, + "step": 3534 + }, + { + "epoch": 0.7077662129703763, + "learning_rate": 1.999339951193407e-05, + "loss": 0.3616, + "step": 3536 + }, + { + "epoch": 0.7085668534827863, + "learning_rate": 1.999288207944701e-05, + "loss": 0.4263, + "step": 3538 + }, + { + "epoch": 0.7085668534827863, + "learning_rate": 1.999234513064475e-05, + "loss": 0.1666, + "step": 3540 + }, + { + "epoch": 0.7093674939951962, + "learning_rate": 1.999178866657597e-05, + "loss": 0.5756, + "step": 3542 + }, + { + "epoch": 0.7093674939951962, + "learning_rate": 1.9991212688327456e-05, + "loss": 0.1601, + "step": 3544 + }, + { + "epoch": 0.7101681345076061, + "learning_rate": 1.9990617197024103e-05, + "loss": 0.176, + "step": 3546 + }, + { + "epoch": 0.7101681345076061, + "learning_rate": 1.9990002193828923e-05, + "loss": 0.1501, + "step": 3548 + }, + { + "epoch": 0.710968775020016, + "learning_rate": 1.998936767994303e-05, + "loss": 0.1513, + "step": 3550 + }, + { + "epoch": 0.710968775020016, + "learning_rate": 1.9988713656605635e-05, + "loss": 0.0448, + "step": 3552 + }, + { + "epoch": 0.7117694155324259, + "learning_rate": 1.998804012509407e-05, + "loss": 0.2052, + "step": 3554 + }, + { + "epoch": 0.7117694155324259, + "learning_rate": 1.998734708672375e-05, + "loss": 0.1983, + "step": 3556 + }, + { + "epoch": 0.7125700560448359, + "learning_rate": 1.99866345428482e-05, + "loss": 0.1946, + "step": 3558 + }, + { + "epoch": 0.7125700560448359, + "learning_rate": 1.9985902494859026e-05, + "loss": 0.1585, + "step": 3560 + }, + { + "epoch": 0.7133706965572458, + "learning_rate": 1.998515094418594e-05, + "loss": 0.278, + "step": 3562 + }, + { + "epoch": 0.7133706965572458, + "learning_rate": 1.9984379892296735e-05, + "loss": 0.0863, + "step": 3564 + }, + { + "epoch": 0.7141713370696557, + "learning_rate": 1.9983589340697288e-05, + "loss": 0.5755, + "step": 3566 + }, + { + "epoch": 0.7141713370696557, + "learning_rate": 1.9982779290931572e-05, + "loss": 0.0929, + "step": 3568 + }, + { + "epoch": 0.7149719775820657, + "learning_rate": 1.9981949744581622e-05, + "loss": 0.1784, + "step": 3570 + }, + { + "epoch": 0.7149719775820657, + "learning_rate": 1.9981100703267567e-05, + "loss": 0.0969, + "step": 3572 + }, + { + "epoch": 0.7157726180944756, + "learning_rate": 1.99802321686476e-05, + "loss": 0.229, + "step": 3574 + }, + { + "epoch": 0.7157726180944756, + "learning_rate": 1.997934414241799e-05, + "loss": 0.4519, + "step": 3576 + }, + { + "epoch": 0.7165732586068855, + "learning_rate": 1.9978436626313068e-05, + "loss": 0.9369, + "step": 3578 + }, + { + "epoch": 0.7165732586068855, + "learning_rate": 1.9977509622105236e-05, + "loss": 0.0366, + "step": 3580 + }, + { + "epoch": 0.7173738991192954, + "learning_rate": 1.997656313160495e-05, + "loss": 0.059, + "step": 3582 + }, + { + "epoch": 0.7173738991192954, + "learning_rate": 1.997559715666073e-05, + "loss": 0.0156, + "step": 3584 + }, + { + "epoch": 0.7181745396317054, + "learning_rate": 1.9974611699159142e-05, + "loss": 0.4294, + "step": 3586 + }, + { + "epoch": 0.7181745396317054, + "learning_rate": 1.9973606761024813e-05, + "loss": 0.233, + "step": 3588 + }, + { + "epoch": 0.7189751801441153, + "learning_rate": 1.99725823442204e-05, + "loss": 0.168, + "step": 3590 + }, + { + "epoch": 0.7189751801441153, + "learning_rate": 1.997153845074662e-05, + "loss": 0.0714, + "step": 3592 + }, + { + "epoch": 0.7197758206565252, + "learning_rate": 1.9970475082642212e-05, + "loss": 0.1761, + "step": 3594 + }, + { + "epoch": 0.7197758206565252, + "learning_rate": 1.9969392241983957e-05, + "loss": 0.198, + "step": 3596 + }, + { + "epoch": 0.7205764611689351, + "learning_rate": 1.9968289930886675e-05, + "loss": 0.0076, + "step": 3598 + }, + { + "epoch": 0.7205764611689351, + "learning_rate": 1.9967168151503193e-05, + "loss": 0.0173, + "step": 3600 + }, + { + "epoch": 0.7213771016813451, + "learning_rate": 1.9966026906024377e-05, + "loss": 0.2869, + "step": 3602 + }, + { + "epoch": 0.7213771016813451, + "learning_rate": 1.996486619667911e-05, + "loss": 0.5903, + "step": 3604 + }, + { + "epoch": 0.722177742193755, + "learning_rate": 1.9963686025734262e-05, + "loss": 0.1797, + "step": 3606 + }, + { + "epoch": 0.722177742193755, + "learning_rate": 1.9962486395494753e-05, + "loss": 0.044, + "step": 3608 + }, + { + "epoch": 0.7229783827061649, + "learning_rate": 1.9961267308303473e-05, + "loss": 0.6855, + "step": 3610 + }, + { + "epoch": 0.7229783827061649, + "learning_rate": 1.9960028766541336e-05, + "loss": 0.6294, + "step": 3612 + }, + { + "epoch": 0.7237790232185749, + "learning_rate": 1.9958770772627236e-05, + "loss": 0.3414, + "step": 3614 + }, + { + "epoch": 0.7237790232185749, + "learning_rate": 1.9957493329018064e-05, + "loss": 0.2379, + "step": 3616 + }, + { + "epoch": 0.7245796637309848, + "learning_rate": 1.9956196438208693e-05, + "loss": 0.455, + "step": 3618 + }, + { + "epoch": 0.7245796637309848, + "learning_rate": 1.995488010273198e-05, + "loss": 0.2024, + "step": 3620 + }, + { + "epoch": 0.7253803042433947, + "learning_rate": 1.9953544325158755e-05, + "loss": 0.1667, + "step": 3622 + }, + { + "epoch": 0.7253803042433947, + "learning_rate": 1.9952189108097825e-05, + "loss": 0.471, + "step": 3624 + }, + { + "epoch": 0.7261809447558046, + "learning_rate": 1.9950814454195953e-05, + "loss": 0.3317, + "step": 3626 + }, + { + "epoch": 0.7261809447558046, + "learning_rate": 1.9949420366137873e-05, + "loss": 0.3127, + "step": 3628 + }, + { + "epoch": 0.7269815852682145, + "learning_rate": 1.9948006846646262e-05, + "loss": 0.168, + "step": 3630 + }, + { + "epoch": 0.7269815852682145, + "learning_rate": 1.994657389848176e-05, + "loss": 0.7609, + "step": 3632 + }, + { + "epoch": 0.7277822257806245, + "learning_rate": 1.9945121524442947e-05, + "loss": 0.4163, + "step": 3634 + }, + { + "epoch": 0.7277822257806245, + "learning_rate": 1.994364972736634e-05, + "loss": 0.3349, + "step": 3636 + }, + { + "epoch": 0.7285828662930345, + "learning_rate": 1.9942158510126384e-05, + "loss": 0.5226, + "step": 3638 + }, + { + "epoch": 0.7285828662930345, + "learning_rate": 1.9940647875635466e-05, + "loss": 0.2286, + "step": 3640 + }, + { + "epoch": 0.7293835068054444, + "learning_rate": 1.9939117826843887e-05, + "loss": 0.5597, + "step": 3642 + }, + { + "epoch": 0.7293835068054444, + "learning_rate": 1.993756836673986e-05, + "loss": 0.2156, + "step": 3644 + }, + { + "epoch": 0.7301841473178543, + "learning_rate": 1.9935999498349525e-05, + "loss": 0.3924, + "step": 3646 + }, + { + "epoch": 0.7301841473178543, + "learning_rate": 1.99344112247369e-05, + "loss": 0.5538, + "step": 3648 + }, + { + "epoch": 0.7309847878302642, + "learning_rate": 1.9932803549003932e-05, + "loss": 0.2051, + "step": 3650 + }, + { + "epoch": 0.7309847878302642, + "learning_rate": 1.9931176474290438e-05, + "loss": 0.3971, + "step": 3652 + }, + { + "epoch": 0.7317854283426741, + "learning_rate": 1.9929530003774136e-05, + "loss": 0.1989, + "step": 3654 + }, + { + "epoch": 0.7317854283426741, + "learning_rate": 1.9927864140670618e-05, + "loss": 0.2877, + "step": 3656 + }, + { + "epoch": 0.732586068855084, + "learning_rate": 1.9926178888233344e-05, + "loss": 0.3857, + "step": 3658 + }, + { + "epoch": 0.732586068855084, + "learning_rate": 1.9924474249753656e-05, + "loss": 0.4158, + "step": 3660 + }, + { + "epoch": 0.733386709367494, + "learning_rate": 1.9922750228560746e-05, + "loss": 0.3614, + "step": 3662 + }, + { + "epoch": 0.733386709367494, + "learning_rate": 1.9921006828021666e-05, + "loss": 0.2104, + "step": 3664 + }, + { + "epoch": 0.734187349879904, + "learning_rate": 1.9919244051541315e-05, + "loss": 0.3535, + "step": 3666 + }, + { + "epoch": 0.734187349879904, + "learning_rate": 1.9917461902562435e-05, + "loss": 0.1195, + "step": 3668 + }, + { + "epoch": 0.7349879903923139, + "learning_rate": 1.9915660384565603e-05, + "loss": 0.3613, + "step": 3670 + }, + { + "epoch": 0.7349879903923139, + "learning_rate": 1.9913839501069213e-05, + "loss": 0.166, + "step": 3672 + }, + { + "epoch": 0.7357886309047238, + "learning_rate": 1.9911999255629504e-05, + "loss": 0.1989, + "step": 3674 + }, + { + "epoch": 0.7357886309047238, + "learning_rate": 1.9910139651840497e-05, + "loss": 0.2196, + "step": 3676 + }, + { + "epoch": 0.7365892714171337, + "learning_rate": 1.990826069333406e-05, + "loss": 0.4604, + "step": 3678 + }, + { + "epoch": 0.7365892714171337, + "learning_rate": 1.9906362383779826e-05, + "loss": 0.1984, + "step": 3680 + }, + { + "epoch": 0.7373899119295436, + "learning_rate": 1.9904444726885236e-05, + "loss": 0.1669, + "step": 3682 + }, + { + "epoch": 0.7373899119295436, + "learning_rate": 1.9902507726395524e-05, + "loss": 0.082, + "step": 3684 + }, + { + "epoch": 0.7381905524419535, + "learning_rate": 1.9900551386093677e-05, + "loss": 0.2931, + "step": 3686 + }, + { + "epoch": 0.7381905524419535, + "learning_rate": 1.989857570980049e-05, + "loss": 0.2164, + "step": 3688 + }, + { + "epoch": 0.7389911929543634, + "learning_rate": 1.9896580701374482e-05, + "loss": 0.1678, + "step": 3690 + }, + { + "epoch": 0.7389911929543634, + "learning_rate": 1.9894566364711965e-05, + "loss": 0.0201, + "step": 3692 + }, + { + "epoch": 0.7397918334667735, + "learning_rate": 1.9892532703746977e-05, + "loss": 0.3052, + "step": 3694 + }, + { + "epoch": 0.7397918334667735, + "learning_rate": 1.9890479722451292e-05, + "loss": 0.3427, + "step": 3696 + }, + { + "epoch": 0.7405924739791834, + "learning_rate": 1.9888407424834437e-05, + "loss": 0.4775, + "step": 3698 + }, + { + "epoch": 0.7405924739791834, + "learning_rate": 1.988631581494365e-05, + "loss": 0.4975, + "step": 3700 + }, + { + "epoch": 0.7413931144915933, + "learning_rate": 1.9884204896863895e-05, + "loss": 0.4598, + "step": 3702 + }, + { + "epoch": 0.7413931144915933, + "learning_rate": 1.9882074674717832e-05, + "loss": 0.3838, + "step": 3704 + }, + { + "epoch": 0.7421937550040032, + "learning_rate": 1.9879925152665845e-05, + "loss": 0.1849, + "step": 3706 + }, + { + "epoch": 0.7421937550040032, + "learning_rate": 1.987775633490599e-05, + "loss": 0.2017, + "step": 3708 + }, + { + "epoch": 0.7429943955164131, + "learning_rate": 1.9875568225674005e-05, + "loss": 0.1008, + "step": 3710 + }, + { + "epoch": 0.7429943955164131, + "learning_rate": 1.987336082924333e-05, + "loss": 0.0474, + "step": 3712 + }, + { + "epoch": 0.743795036028823, + "learning_rate": 1.987113414992505e-05, + "loss": 0.1762, + "step": 3714 + }, + { + "epoch": 0.743795036028823, + "learning_rate": 1.986888819206792e-05, + "loss": 0.432, + "step": 3716 + }, + { + "epoch": 0.7445956765412329, + "learning_rate": 1.986662296005834e-05, + "loss": 0.465, + "step": 3718 + }, + { + "epoch": 0.7445956765412329, + "learning_rate": 1.986433845832037e-05, + "loss": 0.0209, + "step": 3720 + }, + { + "epoch": 0.745396317053643, + "learning_rate": 1.9862034691315678e-05, + "loss": 0.2399, + "step": 3722 + }, + { + "epoch": 0.745396317053643, + "learning_rate": 1.9859711663543573e-05, + "loss": 0.1987, + "step": 3724 + }, + { + "epoch": 0.7461969575660529, + "learning_rate": 1.9857369379540985e-05, + "loss": 0.5046, + "step": 3726 + }, + { + "epoch": 0.7461969575660529, + "learning_rate": 1.9855007843882437e-05, + "loss": 0.2266, + "step": 3728 + }, + { + "epoch": 0.7469975980784628, + "learning_rate": 1.985262706118007e-05, + "loss": 0.2417, + "step": 3730 + }, + { + "epoch": 0.7469975980784628, + "learning_rate": 1.9850227036083592e-05, + "loss": 0.1269, + "step": 3732 + }, + { + "epoch": 0.7477982385908727, + "learning_rate": 1.9847807773280314e-05, + "loss": 0.1664, + "step": 3734 + }, + { + "epoch": 0.7477982385908727, + "learning_rate": 1.9845369277495105e-05, + "loss": 0.1948, + "step": 3736 + }, + { + "epoch": 0.7485988791032826, + "learning_rate": 1.9842911553490396e-05, + "loss": 0.3393, + "step": 3738 + }, + { + "epoch": 0.7485988791032826, + "learning_rate": 1.9840434606066186e-05, + "loss": 1.2607, + "step": 3740 + }, + { + "epoch": 0.7493995196156925, + "learning_rate": 1.983793844005999e-05, + "loss": 0.0891, + "step": 3742 + }, + { + "epoch": 0.7493995196156925, + "learning_rate": 1.9835423060346892e-05, + "loss": 0.0716, + "step": 3744 + }, + { + "epoch": 0.7502001601281025, + "learning_rate": 1.9832888471839475e-05, + "loss": 0.3133, + "step": 3746 + }, + { + "epoch": 0.7502001601281025, + "learning_rate": 1.983033467948784e-05, + "loss": 0.3731, + "step": 3748 + }, + { + "epoch": 0.7510008006405124, + "learning_rate": 1.9827761688279613e-05, + "loss": 0.2168, + "step": 3750 + }, + { + "epoch": 0.7510008006405124, + "learning_rate": 1.9825169503239885e-05, + "loss": 0.3082, + "step": 3752 + }, + { + "epoch": 0.7518014411529224, + "learning_rate": 1.9822558129431263e-05, + "loss": 0.5453, + "step": 3754 + }, + { + "epoch": 0.7518014411529224, + "learning_rate": 1.9819927571953804e-05, + "loss": 0.2924, + "step": 3756 + }, + { + "epoch": 0.7526020816653323, + "learning_rate": 1.981727783594506e-05, + "loss": 0.1903, + "step": 3758 + }, + { + "epoch": 0.7526020816653323, + "learning_rate": 1.9814608926580007e-05, + "loss": 0.0273, + "step": 3760 + }, + { + "epoch": 0.7534027221777422, + "learning_rate": 1.9811920849071092e-05, + "loss": 0.2325, + "step": 3762 + }, + { + "epoch": 0.7534027221777422, + "learning_rate": 1.980921360866819e-05, + "loss": 0.4156, + "step": 3764 + }, + { + "epoch": 0.7542033626901521, + "learning_rate": 1.980648721065859e-05, + "loss": 0.3152, + "step": 3766 + }, + { + "epoch": 0.7542033626901521, + "learning_rate": 1.9803741660367018e-05, + "loss": 0.1907, + "step": 3768 + }, + { + "epoch": 0.755004003202562, + "learning_rate": 1.980097696315558e-05, + "loss": 0.3841, + "step": 3770 + }, + { + "epoch": 0.755004003202562, + "learning_rate": 1.9798193124423804e-05, + "loss": 0.0873, + "step": 3772 + }, + { + "epoch": 0.755804643714972, + "learning_rate": 1.979539014960858e-05, + "loss": 0.4258, + "step": 3774 + }, + { + "epoch": 0.755804643714972, + "learning_rate": 1.979256804418418e-05, + "loss": 0.1078, + "step": 3776 + }, + { + "epoch": 0.7566052842273819, + "learning_rate": 1.9789726813662233e-05, + "loss": 0.3728, + "step": 3778 + }, + { + "epoch": 0.7566052842273819, + "learning_rate": 1.978686646359173e-05, + "loss": 0.3641, + "step": 3780 + }, + { + "epoch": 0.7574059247397918, + "learning_rate": 1.9783986999558994e-05, + "loss": 0.5143, + "step": 3782 + }, + { + "epoch": 0.7574059247397918, + "learning_rate": 1.9781088427187677e-05, + "loss": 0.4478, + "step": 3784 + }, + { + "epoch": 0.7582065652522018, + "learning_rate": 1.9778170752138763e-05, + "loss": 0.1869, + "step": 3786 + }, + { + "epoch": 0.7582065652522018, + "learning_rate": 1.9775233980110524e-05, + "loss": 0.064, + "step": 3788 + }, + { + "epoch": 0.7590072057646117, + "learning_rate": 1.9772278116838546e-05, + "loss": 0.1504, + "step": 3790 + }, + { + "epoch": 0.7590072057646117, + "learning_rate": 1.976930316809569e-05, + "loss": 0.0338, + "step": 3792 + }, + { + "epoch": 0.7598078462770216, + "learning_rate": 1.97663091396921e-05, + "loss": 0.4835, + "step": 3794 + }, + { + "epoch": 0.7598078462770216, + "learning_rate": 1.9763296037475177e-05, + "loss": 1.1043, + "step": 3796 + }, + { + "epoch": 0.7606084867894315, + "learning_rate": 1.976026386732957e-05, + "loss": 0.3634, + "step": 3798 + }, + { + "epoch": 0.7606084867894315, + "learning_rate": 1.9757212635177177e-05, + "loss": 0.4011, + "step": 3800 + }, + { + "epoch": 0.7614091273018415, + "learning_rate": 1.9754142346977122e-05, + "loss": 0.1789, + "step": 3802 + }, + { + "epoch": 0.7614091273018415, + "learning_rate": 1.9751053008725736e-05, + "loss": 0.0058, + "step": 3804 + }, + { + "epoch": 0.7622097678142514, + "learning_rate": 1.9747944626456577e-05, + "loss": 0.6204, + "step": 3806 + }, + { + "epoch": 0.7622097678142514, + "learning_rate": 1.9744817206240374e-05, + "loss": 0.2192, + "step": 3808 + }, + { + "epoch": 0.7630104083266613, + "learning_rate": 1.9741670754185054e-05, + "loss": 0.1905, + "step": 3810 + }, + { + "epoch": 0.7630104083266613, + "learning_rate": 1.9738505276435695e-05, + "loss": 0.7769, + "step": 3812 + }, + { + "epoch": 0.7638110488390712, + "learning_rate": 1.9735320779174548e-05, + "loss": 0.571, + "step": 3814 + }, + { + "epoch": 0.7638110488390712, + "learning_rate": 1.9732117268621005e-05, + "loss": 0.5202, + "step": 3816 + }, + { + "epoch": 0.7646116893514812, + "learning_rate": 1.9728894751031595e-05, + "loss": 0.5054, + "step": 3818 + }, + { + "epoch": 0.7646116893514812, + "learning_rate": 1.9725653232699962e-05, + "loss": 0.2107, + "step": 3820 + }, + { + "epoch": 0.7654123298638911, + "learning_rate": 1.972239271995686e-05, + "loss": 0.2162, + "step": 3822 + }, + { + "epoch": 0.7654123298638911, + "learning_rate": 1.9719113219170152e-05, + "loss": 0.6044, + "step": 3824 + }, + { + "epoch": 0.7662129703763011, + "learning_rate": 1.9715814736744758e-05, + "loss": 0.2619, + "step": 3826 + }, + { + "epoch": 0.7662129703763011, + "learning_rate": 1.9712497279122692e-05, + "loss": 0.1986, + "step": 3828 + }, + { + "epoch": 0.767013610888711, + "learning_rate": 1.9709160852783022e-05, + "loss": 0.229, + "step": 3830 + }, + { + "epoch": 0.767013610888711, + "learning_rate": 1.970580546424186e-05, + "loss": 0.4985, + "step": 3832 + }, + { + "epoch": 0.7678142514011209, + "learning_rate": 1.9702431120052352e-05, + "loss": 0.1767, + "step": 3834 + }, + { + "epoch": 0.7678142514011209, + "learning_rate": 1.969903782680467e-05, + "loss": 0.1224, + "step": 3836 + }, + { + "epoch": 0.7686148919135308, + "learning_rate": 1.9695625591125984e-05, + "loss": 0.4602, + "step": 3838 + }, + { + "epoch": 0.7686148919135308, + "learning_rate": 1.9692194419680463e-05, + "loss": 0.3979, + "step": 3840 + }, + { + "epoch": 0.7694155324259407, + "learning_rate": 1.968874431916926e-05, + "loss": 0.3618, + "step": 3842 + }, + { + "epoch": 0.7694155324259407, + "learning_rate": 1.96852752963305e-05, + "loss": 0.1771, + "step": 3844 + }, + { + "epoch": 0.7702161729383507, + "learning_rate": 1.9681787357939257e-05, + "loss": 0.2169, + "step": 3846 + }, + { + "epoch": 0.7702161729383507, + "learning_rate": 1.9678280510807552e-05, + "loss": 0.6681, + "step": 3848 + }, + { + "epoch": 0.7710168134507606, + "learning_rate": 1.9674754761784334e-05, + "loss": 0.1674, + "step": 3850 + }, + { + "epoch": 0.7710168134507606, + "learning_rate": 1.9671210117755462e-05, + "loss": 0.3857, + "step": 3852 + }, + { + "epoch": 0.7718174539631706, + "learning_rate": 1.9667646585643706e-05, + "loss": 0.4017, + "step": 3854 + }, + { + "epoch": 0.7718174539631706, + "learning_rate": 1.966406417240872e-05, + "loss": 0.298, + "step": 3856 + }, + { + "epoch": 0.7726180944755805, + "learning_rate": 1.966046288504704e-05, + "loss": 0.2642, + "step": 3858 + }, + { + "epoch": 0.7726180944755805, + "learning_rate": 1.9656842730592046e-05, + "loss": 0.1519, + "step": 3860 + }, + { + "epoch": 0.7734187349879904, + "learning_rate": 1.965320371611399e-05, + "loss": 0.1718, + "step": 3862 + }, + { + "epoch": 0.7734187349879904, + "learning_rate": 1.964954584871995e-05, + "loss": 0.393, + "step": 3864 + }, + { + "epoch": 0.7742193755004003, + "learning_rate": 1.964586913555381e-05, + "loss": 0.23, + "step": 3866 + }, + { + "epoch": 0.7742193755004003, + "learning_rate": 1.9642173583796265e-05, + "loss": 0.1498, + "step": 3868 + }, + { + "epoch": 0.7750200160128102, + "learning_rate": 1.9638459200664822e-05, + "loss": 0.1593, + "step": 3870 + }, + { + "epoch": 0.7750200160128102, + "learning_rate": 1.9634725993413744e-05, + "loss": 0.2157, + "step": 3872 + }, + { + "epoch": 0.7758206565252201, + "learning_rate": 1.9630973969334068e-05, + "loss": 0.3213, + "step": 3874 + }, + { + "epoch": 0.7758206565252201, + "learning_rate": 1.9627203135753576e-05, + "loss": 0.0939, + "step": 3876 + }, + { + "epoch": 0.77662129703763, + "learning_rate": 1.9623413500036795e-05, + "loss": 0.3834, + "step": 3878 + }, + { + "epoch": 0.77662129703763, + "learning_rate": 1.9619605069584954e-05, + "loss": 0.253, + "step": 3880 + }, + { + "epoch": 0.7774219375500401, + "learning_rate": 1.9615777851836007e-05, + "loss": 0.224, + "step": 3882 + }, + { + "epoch": 0.7774219375500401, + "learning_rate": 1.961193185426459e-05, + "loss": 0.3822, + "step": 3884 + }, + { + "epoch": 0.77822257806245, + "learning_rate": 1.9608067084382025e-05, + "loss": 0.2108, + "step": 3886 + }, + { + "epoch": 0.77822257806245, + "learning_rate": 1.9604183549736287e-05, + "loss": 0.1212, + "step": 3888 + }, + { + "epoch": 0.7790232185748599, + "learning_rate": 1.9600281257912002e-05, + "loss": 0.2463, + "step": 3890 + }, + { + "epoch": 0.7790232185748599, + "learning_rate": 1.959636021653044e-05, + "loss": 0.198, + "step": 3892 + }, + { + "epoch": 0.7798238590872698, + "learning_rate": 1.9592420433249465e-05, + "loss": 0.3772, + "step": 3894 + }, + { + "epoch": 0.7798238590872698, + "learning_rate": 1.958846191576357e-05, + "loss": 0.1502, + "step": 3896 + }, + { + "epoch": 0.7806244995996797, + "learning_rate": 1.958448467180382e-05, + "loss": 0.3887, + "step": 3898 + }, + { + "epoch": 0.7806244995996797, + "learning_rate": 1.958048870913786e-05, + "loss": 1.0372, + "step": 3900 + }, + { + "epoch": 0.7814251401120896, + "learning_rate": 1.9576474035569895e-05, + "loss": 0.2238, + "step": 3902 + }, + { + "epoch": 0.7814251401120896, + "learning_rate": 1.9572440658940667e-05, + "loss": 0.0693, + "step": 3904 + }, + { + "epoch": 0.7822257806244995, + "learning_rate": 1.9568388587127448e-05, + "loss": 0.5662, + "step": 3906 + }, + { + "epoch": 0.7822257806244995, + "learning_rate": 1.9564317828044022e-05, + "loss": 0.333, + "step": 3908 + }, + { + "epoch": 0.7830264211369096, + "learning_rate": 1.9560228389640668e-05, + "loss": 0.4041, + "step": 3910 + }, + { + "epoch": 0.7830264211369096, + "learning_rate": 1.955612027990415e-05, + "loss": 0.2359, + "step": 3912 + }, + { + "epoch": 0.7838270616493195, + "learning_rate": 1.955199350685769e-05, + "loss": 0.4231, + "step": 3914 + }, + { + "epoch": 0.7838270616493195, + "learning_rate": 1.9547848078560982e-05, + "loss": 0.1687, + "step": 3916 + }, + { + "epoch": 0.7846277021617294, + "learning_rate": 1.954368400311011e-05, + "loss": 0.1581, + "step": 3918 + }, + { + "epoch": 0.7846277021617294, + "learning_rate": 1.953950128863763e-05, + "loss": 0.3495, + "step": 3920 + }, + { + "epoch": 0.7854283426741393, + "learning_rate": 1.9535299943312455e-05, + "loss": 0.3632, + "step": 3922 + }, + { + "epoch": 0.7854283426741393, + "learning_rate": 1.9531079975339915e-05, + "loss": 0.2938, + "step": 3924 + }, + { + "epoch": 0.7862289831865492, + "learning_rate": 1.9526841392961694e-05, + "loss": 0.104, + "step": 3926 + }, + { + "epoch": 0.7862289831865492, + "learning_rate": 1.9522584204455835e-05, + "loss": 0.0706, + "step": 3928 + }, + { + "epoch": 0.7870296236989591, + "learning_rate": 1.9518308418136728e-05, + "loss": 0.2953, + "step": 3930 + }, + { + "epoch": 0.7870296236989591, + "learning_rate": 1.9514014042355054e-05, + "loss": 0.2781, + "step": 3932 + }, + { + "epoch": 0.7878302642113691, + "learning_rate": 1.9509701085497852e-05, + "loss": 0.3613, + "step": 3934 + }, + { + "epoch": 0.7878302642113691, + "learning_rate": 1.9505369555988395e-05, + "loss": 0.8307, + "step": 3936 + }, + { + "epoch": 0.7886309047237791, + "learning_rate": 1.9501019462286266e-05, + "loss": 0.2325, + "step": 3938 + }, + { + "epoch": 0.7886309047237791, + "learning_rate": 1.9496650812887293e-05, + "loss": 0.3829, + "step": 3940 + }, + { + "epoch": 0.789431545236189, + "learning_rate": 1.9492263616323536e-05, + "loss": 0.2862, + "step": 3942 + }, + { + "epoch": 0.789431545236189, + "learning_rate": 1.9487857881163295e-05, + "loss": 0.1871, + "step": 3944 + }, + { + "epoch": 0.7902321857485989, + "learning_rate": 1.948343361601105e-05, + "loss": 0.2966, + "step": 3946 + }, + { + "epoch": 0.7902321857485989, + "learning_rate": 1.947899082950751e-05, + "loss": 0.3186, + "step": 3948 + }, + { + "epoch": 0.7910328262610088, + "learning_rate": 1.947452953032951e-05, + "loss": 0.2294, + "step": 3950 + }, + { + "epoch": 0.7910328262610088, + "learning_rate": 1.947004972719008e-05, + "loss": 0.335, + "step": 3952 + }, + { + "epoch": 0.7918334667734187, + "learning_rate": 1.9465551428838363e-05, + "loss": 0.1959, + "step": 3954 + }, + { + "epoch": 0.7918334667734187, + "learning_rate": 1.9461034644059637e-05, + "loss": 0.534, + "step": 3956 + }, + { + "epoch": 0.7926341072858286, + "learning_rate": 1.9456499381675285e-05, + "loss": 0.1668, + "step": 3958 + }, + { + "epoch": 0.7926341072858286, + "learning_rate": 1.945194565054276e-05, + "loss": 0.1361, + "step": 3960 + }, + { + "epoch": 0.7934347477982386, + "learning_rate": 1.9447373459555617e-05, + "loss": 0.2156, + "step": 3962 + }, + { + "epoch": 0.7934347477982386, + "learning_rate": 1.9442782817643425e-05, + "loss": 0.1359, + "step": 3964 + }, + { + "epoch": 0.7942353883106485, + "learning_rate": 1.9438173733771814e-05, + "loss": 0.168, + "step": 3966 + }, + { + "epoch": 0.7942353883106485, + "learning_rate": 1.9433546216942433e-05, + "loss": 0.596, + "step": 3968 + }, + { + "epoch": 0.7950360288230585, + "learning_rate": 1.9428900276192903e-05, + "loss": 0.2781, + "step": 3970 + }, + { + "epoch": 0.7950360288230585, + "learning_rate": 1.942423592059687e-05, + "loss": 0.7508, + "step": 3972 + }, + { + "epoch": 0.7958366693354684, + "learning_rate": 1.94195531592639e-05, + "loss": 0.2635, + "step": 3974 + }, + { + "epoch": 0.7958366693354684, + "learning_rate": 1.941485200133955e-05, + "loss": 0.0822, + "step": 3976 + }, + { + "epoch": 0.7966373098478783, + "learning_rate": 1.9410132456005262e-05, + "loss": 0.2761, + "step": 3978 + }, + { + "epoch": 0.7966373098478783, + "learning_rate": 1.9405394532478422e-05, + "loss": 0.5745, + "step": 3980 + }, + { + "epoch": 0.7974379503602882, + "learning_rate": 1.94006382400123e-05, + "loss": 0.3888, + "step": 3982 + }, + { + "epoch": 0.7974379503602882, + "learning_rate": 1.9395863587896025e-05, + "loss": 0.1216, + "step": 3984 + }, + { + "epoch": 0.7982385908726981, + "learning_rate": 1.939107058545461e-05, + "loss": 0.1513, + "step": 3986 + }, + { + "epoch": 0.7982385908726981, + "learning_rate": 1.938625924204888e-05, + "loss": 0.0319, + "step": 3988 + }, + { + "epoch": 0.7990392313851081, + "learning_rate": 1.9381429567075507e-05, + "loss": 0.503, + "step": 3990 + }, + { + "epoch": 0.7990392313851081, + "learning_rate": 1.937658156996694e-05, + "loss": 0.4072, + "step": 3992 + }, + { + "epoch": 0.799839871897518, + "learning_rate": 1.9371715260191425e-05, + "loss": 0.112, + "step": 3994 + }, + { + "epoch": 0.799839871897518, + "learning_rate": 1.9366830647252977e-05, + "loss": 0.1082, + "step": 3996 + }, + { + "epoch": 0.800640512409928, + "learning_rate": 1.936192774069133e-05, + "loss": 0.3999, + "step": 3998 + }, + { + "epoch": 0.800640512409928, + "learning_rate": 1.9357006550082e-05, + "loss": 0.1527, + "step": 4000 + }, + { + "epoch": 0.8014411529223379, + "learning_rate": 1.9352067085036145e-05, + "loss": 0.3613, + "step": 4002 + }, + { + "epoch": 0.8014411529223379, + "learning_rate": 1.9347109355200676e-05, + "loss": 0.1511, + "step": 4004 + }, + { + "epoch": 0.8022417934347478, + "learning_rate": 1.9342133370258124e-05, + "loss": 0.2964, + "step": 4006 + }, + { + "epoch": 0.8022417934347478, + "learning_rate": 1.933713913992671e-05, + "loss": 0.0088, + "step": 4008 + }, + { + "epoch": 0.8030424339471577, + "learning_rate": 1.9332126673960276e-05, + "loss": 0.4076, + "step": 4010 + }, + { + "epoch": 0.8030424339471577, + "learning_rate": 1.9327095982148255e-05, + "loss": 0.2023, + "step": 4012 + }, + { + "epoch": 0.8038430744595677, + "learning_rate": 1.932204707431572e-05, + "loss": 0.3831, + "step": 4014 + }, + { + "epoch": 0.8038430744595677, + "learning_rate": 1.9316979960323283e-05, + "loss": 0.3714, + "step": 4016 + }, + { + "epoch": 0.8046437149719776, + "learning_rate": 1.9311894650067146e-05, + "loss": 0.6496, + "step": 4018 + }, + { + "epoch": 0.8046437149719776, + "learning_rate": 1.9306791153479017e-05, + "loss": 0.6367, + "step": 4020 + }, + { + "epoch": 0.8054443554843875, + "learning_rate": 1.9301669480526118e-05, + "loss": 0.8116, + "step": 4022 + }, + { + "epoch": 0.8054443554843875, + "learning_rate": 1.9296529641211226e-05, + "loss": 0.7964, + "step": 4024 + }, + { + "epoch": 0.8062449959967974, + "learning_rate": 1.929137164557252e-05, + "loss": 0.1502, + "step": 4026 + }, + { + "epoch": 0.8062449959967974, + "learning_rate": 1.928619550368371e-05, + "loss": 0.2104, + "step": 4028 + }, + { + "epoch": 0.8070456365092074, + "learning_rate": 1.9281001225653883e-05, + "loss": 0.7397, + "step": 4030 + }, + { + "epoch": 0.8070456365092074, + "learning_rate": 1.9275788821627607e-05, + "loss": 0.5979, + "step": 4032 + }, + { + "epoch": 0.8078462770216173, + "learning_rate": 1.9270558301784808e-05, + "loss": 0.3664, + "step": 4034 + }, + { + "epoch": 0.8078462770216173, + "learning_rate": 1.9265309676340783e-05, + "loss": 0.1687, + "step": 4036 + }, + { + "epoch": 0.8086469175340272, + "learning_rate": 1.9260042955546247e-05, + "loss": 0.5936, + "step": 4038 + }, + { + "epoch": 0.8086469175340272, + "learning_rate": 1.9254758149687187e-05, + "loss": 0.1326, + "step": 4040 + }, + { + "epoch": 0.8094475580464372, + "learning_rate": 1.9249455269084972e-05, + "loss": 0.3855, + "step": 4042 + }, + { + "epoch": 0.8094475580464372, + "learning_rate": 1.9244134324096216e-05, + "loss": 0.229, + "step": 4044 + }, + { + "epoch": 0.8102481985588471, + "learning_rate": 1.9238795325112867e-05, + "loss": 0.2195, + "step": 4046 + }, + { + "epoch": 0.8102481985588471, + "learning_rate": 1.9233438282562095e-05, + "loss": 0.4748, + "step": 4048 + }, + { + "epoch": 0.811048839071257, + "learning_rate": 1.9228063206906302e-05, + "loss": 0.2376, + "step": 4050 + }, + { + "epoch": 0.811048839071257, + "learning_rate": 1.9222670108643156e-05, + "loss": 0.2422, + "step": 4052 + }, + { + "epoch": 0.8118494795836669, + "learning_rate": 1.9217258998305464e-05, + "loss": 0.5496, + "step": 4054 + }, + { + "epoch": 0.8118494795836669, + "learning_rate": 1.9211829886461278e-05, + "loss": 0.1845, + "step": 4056 + }, + { + "epoch": 0.8126501200960768, + "learning_rate": 1.9206382783713735e-05, + "loss": 0.1581, + "step": 4058 + }, + { + "epoch": 0.8126501200960768, + "learning_rate": 1.9200917700701176e-05, + "loss": 0.3557, + "step": 4060 + }, + { + "epoch": 0.8134507606084868, + "learning_rate": 1.9195434648097013e-05, + "loss": 0.2271, + "step": 4062 + }, + { + "epoch": 0.8134507606084868, + "learning_rate": 1.918993363660975e-05, + "loss": 0.2764, + "step": 4064 + }, + { + "epoch": 0.8142514011208967, + "learning_rate": 1.9184414676983013e-05, + "loss": 0.1428, + "step": 4066 + }, + { + "epoch": 0.8142514011208967, + "learning_rate": 1.9178877779995416e-05, + "loss": 0.2323, + "step": 4068 + }, + { + "epoch": 0.8150520416333067, + "learning_rate": 1.9173322956460678e-05, + "loss": 0.4734, + "step": 4070 + }, + { + "epoch": 0.8150520416333067, + "learning_rate": 1.916775021722745e-05, + "loss": 0.6129, + "step": 4072 + }, + { + "epoch": 0.8158526821457166, + "learning_rate": 1.9162159573179446e-05, + "loss": 0.2513, + "step": 4074 + }, + { + "epoch": 0.8158526821457166, + "learning_rate": 1.9156551035235298e-05, + "loss": 0.0878, + "step": 4076 + }, + { + "epoch": 0.8166533226581265, + "learning_rate": 1.915092461434859e-05, + "loss": 0.1764, + "step": 4078 + }, + { + "epoch": 0.8166533226581265, + "learning_rate": 1.9145280321507872e-05, + "loss": 0.1586, + "step": 4080 + }, + { + "epoch": 0.8174539631705364, + "learning_rate": 1.9139618167736547e-05, + "loss": 0.3045, + "step": 4082 + }, + { + "epoch": 0.8174539631705364, + "learning_rate": 1.9133938164092942e-05, + "loss": 0.1982, + "step": 4084 + }, + { + "epoch": 0.8182546036829463, + "learning_rate": 1.912824032167022e-05, + "loss": 0.2082, + "step": 4086 + }, + { + "epoch": 0.8182546036829463, + "learning_rate": 1.9122524651596372e-05, + "loss": 0.2981, + "step": 4088 + }, + { + "epoch": 0.8190552441953562, + "learning_rate": 1.911679116503426e-05, + "loss": 0.2617, + "step": 4090 + }, + { + "epoch": 0.8190552441953562, + "learning_rate": 1.9111039873181475e-05, + "loss": 0.2895, + "step": 4092 + }, + { + "epoch": 0.8198558847077662, + "learning_rate": 1.9105270787270446e-05, + "loss": 0.5754, + "step": 4094 + }, + { + "epoch": 0.8198558847077662, + "learning_rate": 1.9099483918568287e-05, + "loss": 0.3637, + "step": 4096 + }, + { + "epoch": 0.8206565252201762, + "learning_rate": 1.9093679278376913e-05, + "loss": 0.2712, + "step": 4098 + }, + { + "epoch": 0.8206565252201762, + "learning_rate": 1.90878568780329e-05, + "loss": 0.3055, + "step": 4100 + }, + { + "epoch": 0.8214571657325861, + "learning_rate": 1.90820167289075e-05, + "loss": 0.1704, + "step": 4102 + }, + { + "epoch": 0.8214571657325861, + "learning_rate": 1.907615884240668e-05, + "loss": 0.1078, + "step": 4104 + }, + { + "epoch": 0.822257806244996, + "learning_rate": 1.9070283229971003e-05, + "loss": 0.1762, + "step": 4106 + }, + { + "epoch": 0.822257806244996, + "learning_rate": 1.9064389903075683e-05, + "loss": 0.2959, + "step": 4108 + }, + { + "epoch": 0.8230584467574059, + "learning_rate": 1.9058478873230487e-05, + "loss": 0.5579, + "step": 4110 + }, + { + "epoch": 0.8230584467574059, + "learning_rate": 1.905255015197982e-05, + "loss": 0.1564, + "step": 4112 + }, + { + "epoch": 0.8238590872698158, + "learning_rate": 1.9046603750902585e-05, + "loss": 0.2448, + "step": 4114 + }, + { + "epoch": 0.8238590872698158, + "learning_rate": 1.9040639681612216e-05, + "loss": 0.1979, + "step": 4116 + }, + { + "epoch": 0.8246597277822257, + "learning_rate": 1.9034657955756702e-05, + "loss": 0.13, + "step": 4118 + }, + { + "epoch": 0.8246597277822257, + "learning_rate": 1.902865858501845e-05, + "loss": 0.1185, + "step": 4120 + }, + { + "epoch": 0.8254603682946358, + "learning_rate": 1.9022641581114396e-05, + "loss": 0.3634, + "step": 4122 + }, + { + "epoch": 0.8254603682946358, + "learning_rate": 1.9016606955795843e-05, + "loss": 0.2813, + "step": 4124 + }, + { + "epoch": 0.8262610088070457, + "learning_rate": 1.901055472084858e-05, + "loss": 0.2416, + "step": 4126 + }, + { + "epoch": 0.8262610088070457, + "learning_rate": 1.9004484888092734e-05, + "loss": 0.2049, + "step": 4128 + }, + { + "epoch": 0.8270616493194556, + "learning_rate": 1.8998397469382812e-05, + "loss": 0.2322, + "step": 4130 + }, + { + "epoch": 0.8270616493194556, + "learning_rate": 1.8992292476607695e-05, + "loss": 0.1342, + "step": 4132 + }, + { + "epoch": 0.8278622898318655, + "learning_rate": 1.898616992169054e-05, + "loss": 0.2779, + "step": 4134 + }, + { + "epoch": 0.8278622898318655, + "learning_rate": 1.8980029816588863e-05, + "loss": 0.1142, + "step": 4136 + }, + { + "epoch": 0.8286629303442754, + "learning_rate": 1.89738721732944e-05, + "loss": 0.1825, + "step": 4138 + }, + { + "epoch": 0.8286629303442754, + "learning_rate": 1.8967697003833156e-05, + "loss": 0.1075, + "step": 4140 + }, + { + "epoch": 0.8294635708566853, + "learning_rate": 1.8961504320265392e-05, + "loss": 0.6788, + "step": 4142 + }, + { + "epoch": 0.8294635708566853, + "learning_rate": 1.8955294134685528e-05, + "loss": 0.0285, + "step": 4144 + }, + { + "epoch": 0.8302642113690952, + "learning_rate": 1.8949066459222224e-05, + "loss": 0.1704, + "step": 4146 + }, + { + "epoch": 0.8302642113690952, + "learning_rate": 1.8942821306038227e-05, + "loss": 0.0975, + "step": 4148 + }, + { + "epoch": 0.8310648518815053, + "learning_rate": 1.8936558687330492e-05, + "loss": 0.3483, + "step": 4150 + }, + { + "epoch": 0.8310648518815053, + "learning_rate": 1.893027861533003e-05, + "loss": 0.0197, + "step": 4152 + }, + { + "epoch": 0.8318654923939152, + "learning_rate": 1.8923981102301944e-05, + "loss": 0.5074, + "step": 4154 + }, + { + "epoch": 0.8318654923939152, + "learning_rate": 1.891766616054545e-05, + "loss": 0.017, + "step": 4156 + }, + { + "epoch": 0.8326661329063251, + "learning_rate": 1.8911333802393725e-05, + "loss": 0.3443, + "step": 4158 + }, + { + "epoch": 0.8326661329063251, + "learning_rate": 1.8904984040214043e-05, + "loss": 1.0473, + "step": 4160 + }, + { + "epoch": 0.833466773418735, + "learning_rate": 1.8898616886407588e-05, + "loss": 0.0773, + "step": 4162 + }, + { + "epoch": 0.833466773418735, + "learning_rate": 1.8892232353409582e-05, + "loss": 0.0464, + "step": 4164 + }, + { + "epoch": 0.8342674139311449, + "learning_rate": 1.8885830453689146e-05, + "loss": 0.176, + "step": 4166 + }, + { + "epoch": 0.8342674139311449, + "learning_rate": 1.8879411199749306e-05, + "loss": 0.0313, + "step": 4168 + }, + { + "epoch": 0.8350680544435548, + "learning_rate": 1.8872974604127038e-05, + "loss": 0.3711, + "step": 4170 + }, + { + "epoch": 0.8350680544435548, + "learning_rate": 1.8866520679393124e-05, + "loss": 0.6589, + "step": 4172 + }, + { + "epoch": 0.8358686949559647, + "learning_rate": 1.8860049438152247e-05, + "loss": 0.2154, + "step": 4174 + }, + { + "epoch": 0.8358686949559647, + "learning_rate": 1.885356089304285e-05, + "loss": 0.2633, + "step": 4176 + }, + { + "epoch": 0.8366693354683747, + "learning_rate": 1.8847055056737236e-05, + "loss": 0.5028, + "step": 4178 + }, + { + "epoch": 0.8366693354683747, + "learning_rate": 1.884053194194143e-05, + "loss": 0.3358, + "step": 4180 + }, + { + "epoch": 0.8374699759807847, + "learning_rate": 1.8833991561395194e-05, + "loss": 0.2323, + "step": 4182 + }, + { + "epoch": 0.8374699759807847, + "learning_rate": 1.882743392787207e-05, + "loss": 0.5659, + "step": 4184 + }, + { + "epoch": 0.8382706164931946, + "learning_rate": 1.8820859054179225e-05, + "loss": 0.0664, + "step": 4186 + }, + { + "epoch": 0.8382706164931946, + "learning_rate": 1.881426695315756e-05, + "loss": 0.0416, + "step": 4188 + }, + { + "epoch": 0.8390712570056045, + "learning_rate": 1.8807657637681577e-05, + "loss": 0.2099, + "step": 4190 + }, + { + "epoch": 0.8390712570056045, + "learning_rate": 1.8801031120659396e-05, + "loss": 0.1769, + "step": 4192 + }, + { + "epoch": 0.8398718975180144, + "learning_rate": 1.8794387415032783e-05, + "loss": 0.2049, + "step": 4194 + }, + { + "epoch": 0.8398718975180144, + "learning_rate": 1.8787726533777003e-05, + "loss": 0.5707, + "step": 4196 + }, + { + "epoch": 0.8406725380304243, + "learning_rate": 1.8781048489900936e-05, + "loss": 0.3429, + "step": 4198 + }, + { + "epoch": 0.8406725380304243, + "learning_rate": 1.877435329644691e-05, + "loss": 0.5196, + "step": 4200 + }, + { + "epoch": 0.8414731785428343, + "learning_rate": 1.876764096649082e-05, + "loss": 0.2776, + "step": 4202 + }, + { + "epoch": 0.8414731785428343, + "learning_rate": 1.8760911513141974e-05, + "loss": 0.1203, + "step": 4204 + }, + { + "epoch": 0.8422738190552442, + "learning_rate": 1.8754164949543123e-05, + "loss": 0.4077, + "step": 4206 + }, + { + "epoch": 0.8422738190552442, + "learning_rate": 1.8747401288870482e-05, + "loss": 0.629, + "step": 4208 + }, + { + "epoch": 0.8430744595676541, + "learning_rate": 1.8740620544333604e-05, + "loss": 0.0998, + "step": 4210 + }, + { + "epoch": 0.8430744595676541, + "learning_rate": 1.8733822729175455e-05, + "loss": 0.0205, + "step": 4212 + }, + { + "epoch": 0.8438751000800641, + "learning_rate": 1.872700785667228e-05, + "loss": 0.1761, + "step": 4214 + }, + { + "epoch": 0.8438751000800641, + "learning_rate": 1.8720175940133712e-05, + "loss": 0.4468, + "step": 4216 + }, + { + "epoch": 0.844675740592474, + "learning_rate": 1.8713326992902612e-05, + "loss": 0.2239, + "step": 4218 + }, + { + "epoch": 0.844675740592474, + "learning_rate": 1.8706461028355107e-05, + "loss": 0.1588, + "step": 4220 + }, + { + "epoch": 0.8454763811048839, + "learning_rate": 1.8699578059900604e-05, + "loss": 0.8129, + "step": 4222 + }, + { + "epoch": 0.8454763811048839, + "learning_rate": 1.8692678100981663e-05, + "loss": 0.0581, + "step": 4224 + }, + { + "epoch": 0.8462770216172938, + "learning_rate": 1.868576116507408e-05, + "loss": 0.2173, + "step": 4226 + }, + { + "epoch": 0.8462770216172938, + "learning_rate": 1.8678827265686753e-05, + "loss": 0.0585, + "step": 4228 + }, + { + "epoch": 0.8470776621297038, + "learning_rate": 1.8671876416361767e-05, + "loss": 0.2385, + "step": 4230 + }, + { + "epoch": 0.8470776621297038, + "learning_rate": 1.8664908630674264e-05, + "loss": 0.6322, + "step": 4232 + }, + { + "epoch": 0.8478783026421137, + "learning_rate": 1.8657923922232467e-05, + "loss": 0.3056, + "step": 4234 + }, + { + "epoch": 0.8478783026421137, + "learning_rate": 1.86509223046777e-05, + "loss": 0.3098, + "step": 4236 + }, + { + "epoch": 0.8486789431545236, + "learning_rate": 1.8643903791684228e-05, + "loss": 0.8131, + "step": 4238 + }, + { + "epoch": 0.8486789431545236, + "learning_rate": 1.8636868396959406e-05, + "loss": 0.4263, + "step": 4240 + }, + { + "epoch": 0.8494795836669335, + "learning_rate": 1.8629816134243466e-05, + "loss": 0.5489, + "step": 4242 + }, + { + "epoch": 0.8494795836669335, + "learning_rate": 1.8622747017309676e-05, + "loss": 0.4834, + "step": 4244 + }, + { + "epoch": 0.8502802241793435, + "learning_rate": 1.8615661059964148e-05, + "loss": 0.3243, + "step": 4246 + }, + { + "epoch": 0.8502802241793435, + "learning_rate": 1.8608558276045898e-05, + "loss": 0.1349, + "step": 4248 + }, + { + "epoch": 0.8510808646917534, + "learning_rate": 1.860143867942685e-05, + "loss": 0.4736, + "step": 4250 + }, + { + "epoch": 0.8510808646917534, + "learning_rate": 1.8594302284011697e-05, + "loss": 0.1684, + "step": 4252 + }, + { + "epoch": 0.8518815052041633, + "learning_rate": 1.8587149103738006e-05, + "loss": 0.2282, + "step": 4254 + }, + { + "epoch": 0.8518815052041633, + "learning_rate": 1.8579979152576076e-05, + "loss": 0.187, + "step": 4256 + }, + { + "epoch": 0.8526821457165733, + "learning_rate": 1.8572792444528963e-05, + "loss": 0.3356, + "step": 4258 + }, + { + "epoch": 0.8526821457165733, + "learning_rate": 1.8565588993632498e-05, + "loss": 0.1199, + "step": 4260 + }, + { + "epoch": 0.8534827862289832, + "learning_rate": 1.8558368813955136e-05, + "loss": 0.4171, + "step": 4262 + }, + { + "epoch": 0.8534827862289832, + "learning_rate": 1.8551131919598084e-05, + "loss": 0.1853, + "step": 4264 + }, + { + "epoch": 0.8542834267413931, + "learning_rate": 1.854387832469512e-05, + "loss": 0.2303, + "step": 4266 + }, + { + "epoch": 0.8542834267413931, + "learning_rate": 1.8536608043412702e-05, + "loss": 0.3686, + "step": 4268 + }, + { + "epoch": 0.855084067253803, + "learning_rate": 1.8529321089949833e-05, + "loss": 0.7221, + "step": 4270 + }, + { + "epoch": 0.855084067253803, + "learning_rate": 1.852201747853807e-05, + "loss": 0.2138, + "step": 4272 + }, + { + "epoch": 0.855884707766213, + "learning_rate": 1.8514697223441565e-05, + "loss": 0.2426, + "step": 4274 + }, + { + "epoch": 0.855884707766213, + "learning_rate": 1.8507360338956896e-05, + "loss": 0.6411, + "step": 4276 + }, + { + "epoch": 0.8566853482786229, + "learning_rate": 1.850000683941319e-05, + "loss": 0.291, + "step": 4278 + }, + { + "epoch": 0.8566853482786229, + "learning_rate": 1.849263673917196e-05, + "loss": 0.0517, + "step": 4280 + }, + { + "epoch": 0.8574859887910328, + "learning_rate": 1.8485250052627205e-05, + "loss": 0.217, + "step": 4282 + }, + { + "epoch": 0.8574859887910328, + "learning_rate": 1.847784679420527e-05, + "loss": 0.1541, + "step": 4284 + }, + { + "epoch": 0.8582866293034428, + "learning_rate": 1.8470426978364857e-05, + "loss": 0.1623, + "step": 4286 + }, + { + "epoch": 0.8582866293034428, + "learning_rate": 1.846299061959706e-05, + "loss": 0.3731, + "step": 4288 + }, + { + "epoch": 0.8590872698158527, + "learning_rate": 1.845553773242522e-05, + "loss": 0.2323, + "step": 4290 + }, + { + "epoch": 0.8590872698158527, + "learning_rate": 1.8448068331405018e-05, + "loss": 0.2715, + "step": 4292 + }, + { + "epoch": 0.8598879103282626, + "learning_rate": 1.8440582431124322e-05, + "loss": 0.6596, + "step": 4294 + }, + { + "epoch": 0.8598879103282626, + "learning_rate": 1.8433080046203293e-05, + "loss": 0.2975, + "step": 4296 + }, + { + "epoch": 0.8606885508406725, + "learning_rate": 1.842556119129423e-05, + "loss": 0.1853, + "step": 4298 + }, + { + "epoch": 0.8606885508406725, + "learning_rate": 1.841802588108161e-05, + "loss": 0.0966, + "step": 4300 + }, + { + "epoch": 0.8614891913530824, + "learning_rate": 1.841047413028209e-05, + "loss": 0.159, + "step": 4302 + }, + { + "epoch": 0.8614891913530824, + "learning_rate": 1.8402905953644356e-05, + "loss": 0.0753, + "step": 4304 + }, + { + "epoch": 0.8622898318654924, + "learning_rate": 1.8395321365949273e-05, + "loss": 0.5291, + "step": 4306 + }, + { + "epoch": 0.8622898318654924, + "learning_rate": 1.838772038200968e-05, + "loss": 0.1877, + "step": 4308 + }, + { + "epoch": 0.8630904723779024, + "learning_rate": 1.838010301667044e-05, + "loss": 0.1699, + "step": 4310 + }, + { + "epoch": 0.8630904723779024, + "learning_rate": 1.837246928480848e-05, + "loss": 0.0826, + "step": 4312 + }, + { + "epoch": 0.8638911128903123, + "learning_rate": 1.8364819201332596e-05, + "loss": 0.2274, + "step": 4314 + }, + { + "epoch": 0.8638911128903123, + "learning_rate": 1.8357152781183613e-05, + "loss": 0.3852, + "step": 4316 + }, + { + "epoch": 0.8646917534027222, + "learning_rate": 1.834947003933417e-05, + "loss": 0.532, + "step": 4318 + }, + { + "epoch": 0.8646917534027222, + "learning_rate": 1.8341770990788874e-05, + "loss": 0.073, + "step": 4320 + }, + { + "epoch": 0.8654923939151321, + "learning_rate": 1.8334055650584107e-05, + "loss": 0.2157, + "step": 4322 + }, + { + "epoch": 0.8654923939151321, + "learning_rate": 1.8326324033788087e-05, + "loss": 0.0201, + "step": 4324 + }, + { + "epoch": 0.866293034427542, + "learning_rate": 1.8318576155500855e-05, + "loss": 0.2419, + "step": 4326 + }, + { + "epoch": 0.866293034427542, + "learning_rate": 1.831081203085415e-05, + "loss": 0.0429, + "step": 4328 + }, + { + "epoch": 0.8670936749399519, + "learning_rate": 1.830303167501152e-05, + "loss": 0.4884, + "step": 4330 + }, + { + "epoch": 0.8670936749399519, + "learning_rate": 1.8295235103168128e-05, + "loss": 0.0053, + "step": 4332 + }, + { + "epoch": 0.8678943154523618, + "learning_rate": 1.8287422330550885e-05, + "loss": 0.6323, + "step": 4334 + }, + { + "epoch": 0.8678943154523618, + "learning_rate": 1.8279593372418284e-05, + "loss": 0.0937, + "step": 4336 + }, + { + "epoch": 0.8686949559647719, + "learning_rate": 1.827174824406043e-05, + "loss": 0.3153, + "step": 4338 + }, + { + "epoch": 0.8686949559647719, + "learning_rate": 1.8263886960799072e-05, + "loss": 0.0579, + "step": 4340 + }, + { + "epoch": 0.8694955964771818, + "learning_rate": 1.8256009537987424e-05, + "loss": 0.2198, + "step": 4342 + }, + { + "epoch": 0.8694955964771818, + "learning_rate": 1.8248115991010303e-05, + "loss": 0.3042, + "step": 4344 + }, + { + "epoch": 0.8702962369895917, + "learning_rate": 1.8240206335283943e-05, + "loss": 0.207, + "step": 4346 + }, + { + "epoch": 0.8702962369895917, + "learning_rate": 1.8232280586256104e-05, + "loss": 0.3416, + "step": 4348 + }, + { + "epoch": 0.8710968775020016, + "learning_rate": 1.8224338759405934e-05, + "loss": 0.9246, + "step": 4350 + }, + { + "epoch": 0.8710968775020016, + "learning_rate": 1.8216380870243963e-05, + "loss": 0.0775, + "step": 4352 + }, + { + "epoch": 0.8718975180144115, + "learning_rate": 1.820840693431217e-05, + "loss": 0.2935, + "step": 4354 + }, + { + "epoch": 0.8718975180144115, + "learning_rate": 1.820041696718378e-05, + "loss": 0.6678, + "step": 4356 + }, + { + "epoch": 0.8726981585268214, + "learning_rate": 1.8192410984463416e-05, + "loss": 0.1903, + "step": 4358 + }, + { + "epoch": 0.8726981585268214, + "learning_rate": 1.8184389001786912e-05, + "loss": 0.6652, + "step": 4360 + }, + { + "epoch": 0.8734987990392313, + "learning_rate": 1.8176351034821352e-05, + "loss": 0.2199, + "step": 4362 + }, + { + "epoch": 0.8734987990392313, + "learning_rate": 1.8168297099265108e-05, + "loss": 0.1135, + "step": 4364 + }, + { + "epoch": 0.8742994395516414, + "learning_rate": 1.8160227210847642e-05, + "loss": 0.2087, + "step": 4366 + }, + { + "epoch": 0.8742994395516414, + "learning_rate": 1.815214138532966e-05, + "loss": 0.4475, + "step": 4368 + }, + { + "epoch": 0.8751000800640513, + "learning_rate": 1.8144039638502927e-05, + "loss": 0.1587, + "step": 4370 + }, + { + "epoch": 0.8751000800640513, + "learning_rate": 1.8135921986190358e-05, + "loss": 0.0433, + "step": 4372 + }, + { + "epoch": 0.8759007205764612, + "learning_rate": 1.8127788444245884e-05, + "loss": 0.6798, + "step": 4374 + }, + { + "epoch": 0.8759007205764612, + "learning_rate": 1.8119639028554475e-05, + "loss": 0.6588, + "step": 4376 + }, + { + "epoch": 0.8767013610888711, + "learning_rate": 1.8111473755032152e-05, + "loss": 0.4657, + "step": 4378 + }, + { + "epoch": 0.8767013610888711, + "learning_rate": 1.8103292639625835e-05, + "loss": 0.3322, + "step": 4380 + }, + { + "epoch": 0.877502001601281, + "learning_rate": 1.8095095698313456e-05, + "loss": 0.211, + "step": 4382 + }, + { + "epoch": 0.877502001601281, + "learning_rate": 1.808688294710378e-05, + "loss": 0.189, + "step": 4384 + }, + { + "epoch": 0.8783026421136909, + "learning_rate": 1.807865440203653e-05, + "loss": 0.8527, + "step": 4386 + }, + { + "epoch": 0.8783026421136909, + "learning_rate": 1.807041007918221e-05, + "loss": 0.5408, + "step": 4388 + }, + { + "epoch": 0.8791032826261009, + "learning_rate": 1.806214999464214e-05, + "loss": 0.1502, + "step": 4390 + }, + { + "epoch": 0.8791032826261009, + "learning_rate": 1.805387416454849e-05, + "loss": 0.5762, + "step": 4392 + }, + { + "epoch": 0.8799039231385108, + "learning_rate": 1.8045582605064087e-05, + "loss": 0.2893, + "step": 4394 + }, + { + "epoch": 0.8799039231385108, + "learning_rate": 1.8037275332382575e-05, + "loss": 0.376, + "step": 4396 + }, + { + "epoch": 0.8807045636509208, + "learning_rate": 1.802895236272819e-05, + "loss": 0.3249, + "step": 4398 + }, + { + "epoch": 0.8807045636509208, + "learning_rate": 1.802061371235592e-05, + "loss": 0.3931, + "step": 4400 + }, + { + "epoch": 0.8815052041633307, + "learning_rate": 1.80122593975513e-05, + "loss": 0.2533, + "step": 4402 + }, + { + "epoch": 0.8815052041633307, + "learning_rate": 1.8003889434630476e-05, + "loss": 0.2072, + "step": 4404 + }, + { + "epoch": 0.8823058446757406, + "learning_rate": 1.7995503839940204e-05, + "loss": 0.1587, + "step": 4406 + }, + { + "epoch": 0.8823058446757406, + "learning_rate": 1.7987102629857692e-05, + "loss": 0.3521, + "step": 4408 + }, + { + "epoch": 0.8831064851881505, + "learning_rate": 1.7978685820790725e-05, + "loss": 0.3734, + "step": 4410 + }, + { + "epoch": 0.8831064851881505, + "learning_rate": 1.7970253429177494e-05, + "loss": 0.2753, + "step": 4412 + }, + { + "epoch": 0.8839071257005604, + "learning_rate": 1.796180547148662e-05, + "loss": 0.1587, + "step": 4414 + }, + { + "epoch": 0.8839071257005604, + "learning_rate": 1.7953341964217196e-05, + "loss": 0.4716, + "step": 4416 + }, + { + "epoch": 0.8847077662129704, + "learning_rate": 1.7944862923898586e-05, + "loss": 0.3863, + "step": 4418 + }, + { + "epoch": 0.8847077662129704, + "learning_rate": 1.7936368367090583e-05, + "loss": 0.1854, + "step": 4420 + }, + { + "epoch": 0.8855084067253803, + "learning_rate": 1.7927858310383196e-05, + "loss": 0.3268, + "step": 4422 + }, + { + "epoch": 0.8855084067253803, + "learning_rate": 1.7919332770396798e-05, + "loss": 0.1508, + "step": 4424 + }, + { + "epoch": 0.8863090472377902, + "learning_rate": 1.7910791763781928e-05, + "loss": 0.1719, + "step": 4426 + }, + { + "epoch": 0.8863090472377902, + "learning_rate": 1.7902235307219336e-05, + "loss": 0.1763, + "step": 4428 + }, + { + "epoch": 0.8871096877502002, + "learning_rate": 1.789366341742001e-05, + "loss": 0.1335, + "step": 4430 + }, + { + "epoch": 0.8871096877502002, + "learning_rate": 1.7885076111125e-05, + "loss": 0.0577, + "step": 4432 + }, + { + "epoch": 0.8879103282626101, + "learning_rate": 1.7876473405105535e-05, + "loss": 0.3524, + "step": 4434 + }, + { + "epoch": 0.8879103282626101, + "learning_rate": 1.7867855316162846e-05, + "loss": 0.6029, + "step": 4436 + }, + { + "epoch": 0.88871096877502, + "learning_rate": 1.785922186112829e-05, + "loss": 0.3894, + "step": 4438 + }, + { + "epoch": 0.88871096877502, + "learning_rate": 1.7850573056863173e-05, + "loss": 0.3637, + "step": 4440 + }, + { + "epoch": 0.8895116092874299, + "learning_rate": 1.7841908920258774e-05, + "loss": 0.176, + "step": 4442 + }, + { + "epoch": 0.8895116092874299, + "learning_rate": 1.783322946823638e-05, + "loss": 0.2878, + "step": 4444 + }, + { + "epoch": 0.8903122497998399, + "learning_rate": 1.782453471774711e-05, + "loss": 0.1513, + "step": 4446 + }, + { + "epoch": 0.8903122497998399, + "learning_rate": 1.7815824685772042e-05, + "loss": 0.4897, + "step": 4448 + }, + { + "epoch": 0.8911128903122498, + "learning_rate": 1.7807099389322013e-05, + "loss": 0.3776, + "step": 4450 + }, + { + "epoch": 0.8911128903122498, + "learning_rate": 1.779835884543776e-05, + "loss": 0.2187, + "step": 4452 + }, + { + "epoch": 0.8919135308246597, + "learning_rate": 1.7789603071189733e-05, + "loss": 0.2069, + "step": 4454 + }, + { + "epoch": 0.8919135308246597, + "learning_rate": 1.7780832083678122e-05, + "loss": 0.1773, + "step": 4456 + }, + { + "epoch": 0.8927141713370697, + "learning_rate": 1.7772045900032912e-05, + "loss": 0.1587, + "step": 4458 + }, + { + "epoch": 0.8927141713370697, + "learning_rate": 1.776324453741365e-05, + "loss": 0.438, + "step": 4460 + }, + { + "epoch": 0.8935148118494796, + "learning_rate": 1.7754428013009644e-05, + "loss": 0.4262, + "step": 4462 + }, + { + "epoch": 0.8935148118494796, + "learning_rate": 1.774559634403971e-05, + "loss": 0.122, + "step": 4464 + }, + { + "epoch": 0.8943154523618895, + "learning_rate": 1.7736749547752327e-05, + "loss": 0.2068, + "step": 4466 + }, + { + "epoch": 0.8943154523618895, + "learning_rate": 1.7727887641425465e-05, + "loss": 0.5509, + "step": 4468 + }, + { + "epoch": 0.8951160928742994, + "learning_rate": 1.7719010642366597e-05, + "loss": 0.1901, + "step": 4470 + }, + { + "epoch": 0.8951160928742994, + "learning_rate": 1.7710118567912732e-05, + "loss": 0.418, + "step": 4472 + }, + { + "epoch": 0.8959167333867094, + "learning_rate": 1.770121143543025e-05, + "loss": 0.4342, + "step": 4474 + }, + { + "epoch": 0.8959167333867094, + "learning_rate": 1.7692289262315008e-05, + "loss": 0.4755, + "step": 4476 + }, + { + "epoch": 0.8967173738991193, + "learning_rate": 1.7683352065992174e-05, + "loss": 0.3034, + "step": 4478 + }, + { + "epoch": 0.8967173738991193, + "learning_rate": 1.7674399863916298e-05, + "loss": 0.2379, + "step": 4480 + }, + { + "epoch": 0.8975180144115292, + "learning_rate": 1.7665432673571238e-05, + "loss": 0.2379, + "step": 4482 + }, + { + "epoch": 0.8975180144115292, + "learning_rate": 1.765645051247007e-05, + "loss": 0.1511, + "step": 4484 + }, + { + "epoch": 0.8983186549239391, + "learning_rate": 1.7647453398155204e-05, + "loss": 0.2849, + "step": 4486 + }, + { + "epoch": 0.8983186549239391, + "learning_rate": 1.7638441348198144e-05, + "loss": 0.0123, + "step": 4488 + }, + { + "epoch": 0.899119295436349, + "learning_rate": 1.7629414380199672e-05, + "loss": 0.1866, + "step": 4490 + }, + { + "epoch": 0.899119295436349, + "learning_rate": 1.762037251178961e-05, + "loss": 0.4598, + "step": 4492 + }, + { + "epoch": 0.899919935948759, + "learning_rate": 1.7611315760626943e-05, + "loss": 0.1496, + "step": 4494 + }, + { + "epoch": 0.899919935948759, + "learning_rate": 1.7602244144399713e-05, + "loss": 0.0817, + "step": 4496 + }, + { + "epoch": 0.900720576461169, + "learning_rate": 1.7593157680824943e-05, + "loss": 0.4548, + "step": 4498 + }, + { + "epoch": 0.900720576461169, + "learning_rate": 1.7584056387648738e-05, + "loss": 0.1285, + "step": 4500 + }, + { + "epoch": 0.9015212169735789, + "learning_rate": 1.757494028264608e-05, + "loss": 0.2961, + "step": 4502 + }, + { + "epoch": 0.9015212169735789, + "learning_rate": 1.7565809383620966e-05, + "loss": 0.1537, + "step": 4504 + }, + { + "epoch": 0.9023218574859888, + "learning_rate": 1.7556663708406203e-05, + "loss": 0.2138, + "step": 4506 + }, + { + "epoch": 0.9023218574859888, + "learning_rate": 1.7547503274863502e-05, + "loss": 0.13, + "step": 4508 + }, + { + "epoch": 0.9031224979983987, + "learning_rate": 1.7538328100883404e-05, + "loss": 0.4411, + "step": 4510 + }, + { + "epoch": 0.9031224979983987, + "learning_rate": 1.7529138204385186e-05, + "loss": 0.1029, + "step": 4512 + }, + { + "epoch": 0.9039231385108086, + "learning_rate": 1.7519933603316962e-05, + "loss": 0.4227, + "step": 4514 + }, + { + "epoch": 0.9039231385108086, + "learning_rate": 1.7510714315655467e-05, + "loss": 0.6255, + "step": 4516 + }, + { + "epoch": 0.9047237790232185, + "learning_rate": 1.750148035940622e-05, + "loss": 0.1786, + "step": 4518 + }, + { + "epoch": 0.9047237790232185, + "learning_rate": 1.7492231752603305e-05, + "loss": 0.426, + "step": 4520 + }, + { + "epoch": 0.9055244195356285, + "learning_rate": 1.7482968513309458e-05, + "loss": 0.5727, + "step": 4522 + }, + { + "epoch": 0.9055244195356285, + "learning_rate": 1.7473690659616e-05, + "loss": 0.0447, + "step": 4524 + }, + { + "epoch": 0.9063250600480385, + "learning_rate": 1.7464398209642744e-05, + "loss": 0.333, + "step": 4526 + }, + { + "epoch": 0.9063250600480385, + "learning_rate": 1.7455091181538094e-05, + "loss": 0.5703, + "step": 4528 + }, + { + "epoch": 0.9071257005604484, + "learning_rate": 1.7445769593478842e-05, + "loss": 0.2161, + "step": 4530 + }, + { + "epoch": 0.9071257005604484, + "learning_rate": 1.743643346367027e-05, + "loss": 0.2706, + "step": 4532 + }, + { + "epoch": 0.9079263410728583, + "learning_rate": 1.7427082810346024e-05, + "loss": 0.1665, + "step": 4534 + }, + { + "epoch": 0.9079263410728583, + "learning_rate": 1.741771765176815e-05, + "loss": 0.2637, + "step": 4536 + }, + { + "epoch": 0.9087269815852682, + "learning_rate": 1.740833800622701e-05, + "loss": 0.3771, + "step": 4538 + }, + { + "epoch": 0.9087269815852682, + "learning_rate": 1.739894389204122e-05, + "loss": 0.0067, + "step": 4540 + }, + { + "epoch": 0.9095276220976781, + "learning_rate": 1.738953532755774e-05, + "loss": 0.3853, + "step": 4542 + }, + { + "epoch": 0.9095276220976781, + "learning_rate": 1.7380112331151657e-05, + "loss": 0.2051, + "step": 4544 + }, + { + "epoch": 0.910328262610088, + "learning_rate": 1.7370674921226306e-05, + "loss": 0.1955, + "step": 4546 + }, + { + "epoch": 0.910328262610088, + "learning_rate": 1.7361223116213146e-05, + "loss": 0.2711, + "step": 4548 + }, + { + "epoch": 0.911128903122498, + "learning_rate": 1.7351756934571764e-05, + "loss": 0.2293, + "step": 4550 + }, + { + "epoch": 0.911128903122498, + "learning_rate": 1.7342276394789825e-05, + "loss": 0.1236, + "step": 4552 + }, + { + "epoch": 0.911929543634908, + "learning_rate": 1.7332781515382996e-05, + "loss": 0.51, + "step": 4554 + }, + { + "epoch": 0.911929543634908, + "learning_rate": 1.732327231489503e-05, + "loss": 0.7844, + "step": 4556 + }, + { + "epoch": 0.9127301841473179, + "learning_rate": 1.7313748811897564e-05, + "loss": 0.2334, + "step": 4558 + }, + { + "epoch": 0.9127301841473179, + "learning_rate": 1.7304211024990216e-05, + "loss": 0.2295, + "step": 4560 + }, + { + "epoch": 0.9135308246597278, + "learning_rate": 1.7294658972800495e-05, + "loss": 0.1496, + "step": 4562 + }, + { + "epoch": 0.9135308246597278, + "learning_rate": 1.728509267398376e-05, + "loss": 0.0667, + "step": 4564 + }, + { + "epoch": 0.9143314651721377, + "learning_rate": 1.727551214722322e-05, + "loss": 0.3487, + "step": 4566 + }, + { + "epoch": 0.9143314651721377, + "learning_rate": 1.7265917411229803e-05, + "loss": 0.0767, + "step": 4568 + }, + { + "epoch": 0.9151321056845476, + "learning_rate": 1.72563084847423e-05, + "loss": 0.3192, + "step": 4570 + }, + { + "epoch": 0.9151321056845476, + "learning_rate": 1.7246685386527105e-05, + "loss": 0.3517, + "step": 4572 + }, + { + "epoch": 0.9159327461969575, + "learning_rate": 1.723704813537835e-05, + "loss": 0.2294, + "step": 4574 + }, + { + "epoch": 0.9159327461969575, + "learning_rate": 1.7227396750117802e-05, + "loss": 0.0792, + "step": 4576 + }, + { + "epoch": 0.9167333867093675, + "learning_rate": 1.7217731249594817e-05, + "loss": 0.6158, + "step": 4578 + }, + { + "epoch": 0.9167333867093675, + "learning_rate": 1.7208051652686348e-05, + "loss": 0.1384, + "step": 4580 + }, + { + "epoch": 0.9175340272217775, + "learning_rate": 1.7198357978296827e-05, + "loss": 0.4153, + "step": 4582 + }, + { + "epoch": 0.9175340272217775, + "learning_rate": 1.718865024535822e-05, + "loss": 0.0014, + "step": 4584 + }, + { + "epoch": 0.9183346677341874, + "learning_rate": 1.717892847282995e-05, + "loss": 0.2328, + "step": 4586 + }, + { + "epoch": 0.9183346677341874, + "learning_rate": 1.716919267969884e-05, + "loss": 0.3754, + "step": 4588 + }, + { + "epoch": 0.9191353082465973, + "learning_rate": 1.715944288497911e-05, + "loss": 0.1695, + "step": 4590 + }, + { + "epoch": 0.9191353082465973, + "learning_rate": 1.7149679107712317e-05, + "loss": 0.2707, + "step": 4592 + }, + { + "epoch": 0.9199359487590072, + "learning_rate": 1.713990136696734e-05, + "loss": 0.3555, + "step": 4594 + }, + { + "epoch": 0.9199359487590072, + "learning_rate": 1.7130109681840298e-05, + "loss": 0.4731, + "step": 4596 + }, + { + "epoch": 0.9207365892714171, + "learning_rate": 1.7120304071454578e-05, + "loss": 0.0322, + "step": 4598 + }, + { + "epoch": 0.9207365892714171, + "learning_rate": 1.711048455496075e-05, + "loss": 0.0551, + "step": 4600 + }, + { + "epoch": 0.921537229783827, + "learning_rate": 1.7100651151536532e-05, + "loss": 0.8615, + "step": 4602 + }, + { + "epoch": 0.921537229783827, + "learning_rate": 1.7090803880386784e-05, + "loss": 0.1954, + "step": 4604 + }, + { + "epoch": 0.922337870296237, + "learning_rate": 1.708094276074344e-05, + "loss": 0.2265, + "step": 4606 + }, + { + "epoch": 0.922337870296237, + "learning_rate": 1.7071067811865474e-05, + "loss": 0.1821, + "step": 4608 + }, + { + "epoch": 0.923138510808647, + "learning_rate": 1.7061179053038894e-05, + "loss": 0.5494, + "step": 4610 + }, + { + "epoch": 0.923138510808647, + "learning_rate": 1.705127650357663e-05, + "loss": 0.1867, + "step": 4612 + }, + { + "epoch": 0.9239391513210569, + "learning_rate": 1.704136018281859e-05, + "loss": 0.1756, + "step": 4614 + }, + { + "epoch": 0.9239391513210569, + "learning_rate": 1.7031430110131566e-05, + "loss": 0.6573, + "step": 4616 + }, + { + "epoch": 0.9247397918334668, + "learning_rate": 1.7021486304909202e-05, + "loss": 0.1342, + "step": 4618 + }, + { + "epoch": 0.9247397918334668, + "learning_rate": 1.701152878657197e-05, + "loss": 0.1762, + "step": 4620 + }, + { + "epoch": 0.9255404323458767, + "learning_rate": 1.700155757456711e-05, + "loss": 0.1346, + "step": 4622 + }, + { + "epoch": 0.9255404323458767, + "learning_rate": 1.699157268836863e-05, + "loss": 0.2233, + "step": 4624 + }, + { + "epoch": 0.9263410728582866, + "learning_rate": 1.6981574147477214e-05, + "loss": 0.1991, + "step": 4626 + }, + { + "epoch": 0.9263410728582866, + "learning_rate": 1.697156197142023e-05, + "loss": 0.0719, + "step": 4628 + }, + { + "epoch": 0.9271417133706965, + "learning_rate": 1.696153617975168e-05, + "loss": 0.2535, + "step": 4630 + }, + { + "epoch": 0.9271417133706965, + "learning_rate": 1.6951496792052148e-05, + "loss": 0.0063, + "step": 4632 + }, + { + "epoch": 0.9279423538831065, + "learning_rate": 1.694144382792878e-05, + "loss": 0.2162, + "step": 4634 + }, + { + "epoch": 0.9279423538831065, + "learning_rate": 1.6931377307015236e-05, + "loss": 0.1016, + "step": 4636 + }, + { + "epoch": 0.9287429943955164, + "learning_rate": 1.6921297248971652e-05, + "loss": 0.3514, + "step": 4638 + }, + { + "epoch": 0.9287429943955164, + "learning_rate": 1.6911203673484583e-05, + "loss": 0.1953, + "step": 4640 + }, + { + "epoch": 0.9295436349079264, + "learning_rate": 1.690109660026701e-05, + "loss": 0.3386, + "step": 4642 + }, + { + "epoch": 0.9295436349079264, + "learning_rate": 1.6890976049058267e-05, + "loss": 0.0428, + "step": 4644 + }, + { + "epoch": 0.9303442754203363, + "learning_rate": 1.688084203962401e-05, + "loss": 0.0296, + "step": 4646 + }, + { + "epoch": 0.9303442754203363, + "learning_rate": 1.687069459175619e-05, + "loss": 0.1098, + "step": 4648 + }, + { + "epoch": 0.9311449159327462, + "learning_rate": 1.6860533725272953e-05, + "loss": 0.4078, + "step": 4650 + }, + { + "epoch": 0.9311449159327462, + "learning_rate": 1.6850359460018744e-05, + "loss": 0.3788, + "step": 4652 + }, + { + "epoch": 0.9319455564451561, + "learning_rate": 1.6840171815864085e-05, + "loss": 0.4026, + "step": 4654 + }, + { + "epoch": 0.9319455564451561, + "learning_rate": 1.682997081270568e-05, + "loss": 0.0298, + "step": 4656 + }, + { + "epoch": 0.932746196957566, + "learning_rate": 1.681975647046631e-05, + "loss": 0.3147, + "step": 4658 + }, + { + "epoch": 0.932746196957566, + "learning_rate": 1.6809528809094805e-05, + "loss": 0.3, + "step": 4660 + }, + { + "epoch": 0.933546837469976, + "learning_rate": 1.6799287848566024e-05, + "loss": 0.6517, + "step": 4662 + }, + { + "epoch": 0.933546837469976, + "learning_rate": 1.6789033608880742e-05, + "loss": 0.238, + "step": 4664 + }, + { + "epoch": 0.9343474779823859, + "learning_rate": 1.6778766110065765e-05, + "loss": 0.1606, + "step": 4666 + }, + { + "epoch": 0.9343474779823859, + "learning_rate": 1.67684853721737e-05, + "loss": 0.4368, + "step": 4668 + }, + { + "epoch": 0.9351481184947958, + "learning_rate": 1.6758191415283066e-05, + "loss": 0.3007, + "step": 4670 + }, + { + "epoch": 0.9351481184947958, + "learning_rate": 1.6747884259498185e-05, + "loss": 0.0737, + "step": 4672 + }, + { + "epoch": 0.9359487590072058, + "learning_rate": 1.673756392494916e-05, + "loss": 0.5567, + "step": 4674 + }, + { + "epoch": 0.9359487590072058, + "learning_rate": 1.6727230431791826e-05, + "loss": 0.1951, + "step": 4676 + }, + { + "epoch": 0.9367493995196157, + "learning_rate": 1.671688380020769e-05, + "loss": 0.202, + "step": 4678 + }, + { + "epoch": 0.9367493995196157, + "learning_rate": 1.6706524050404006e-05, + "loss": 0.2464, + "step": 4680 + }, + { + "epoch": 0.9375500400320256, + "learning_rate": 1.6696151202613537e-05, + "loss": 0.6446, + "step": 4682 + }, + { + "epoch": 0.9375500400320256, + "learning_rate": 1.6685765277094702e-05, + "loss": 0.2138, + "step": 4684 + }, + { + "epoch": 0.9383506805444356, + "learning_rate": 1.6675366294131432e-05, + "loss": 0.0839, + "step": 4686 + }, + { + "epoch": 0.9383506805444356, + "learning_rate": 1.6664954274033175e-05, + "loss": 0.0096, + "step": 4688 + }, + { + "epoch": 0.9391513210568455, + "learning_rate": 1.6654529237134833e-05, + "loss": 0.1432, + "step": 4690 + }, + { + "epoch": 0.9391513210568455, + "learning_rate": 1.66440912037967e-05, + "loss": 0.0993, + "step": 4692 + }, + { + "epoch": 0.9399519615692554, + "learning_rate": 1.663364019440453e-05, + "loss": 0.5957, + "step": 4694 + }, + { + "epoch": 0.9399519615692554, + "learning_rate": 1.662317622936933e-05, + "loss": 0.1596, + "step": 4696 + }, + { + "epoch": 0.9407526020816653, + "learning_rate": 1.6612699329127467e-05, + "loss": 0.4715, + "step": 4698 + }, + { + "epoch": 0.9407526020816653, + "learning_rate": 1.6602209514140562e-05, + "loss": 0.7418, + "step": 4700 + }, + { + "epoch": 0.9415532425940752, + "learning_rate": 1.6591706804895415e-05, + "loss": 0.2233, + "step": 4702 + }, + { + "epoch": 0.9415532425940752, + "learning_rate": 1.6581191221904098e-05, + "loss": 0.1497, + "step": 4704 + }, + { + "epoch": 0.9423538831064852, + "learning_rate": 1.6570662785703716e-05, + "loss": 0.1512, + "step": 4706 + }, + { + "epoch": 0.9423538831064852, + "learning_rate": 1.6560121516856592e-05, + "loss": 0.1666, + "step": 4708 + }, + { + "epoch": 0.9431545236188951, + "learning_rate": 1.654956743595001e-05, + "loss": 0.1528, + "step": 4710 + }, + { + "epoch": 0.9431545236188951, + "learning_rate": 1.6539000563596328e-05, + "loss": 0.228, + "step": 4712 + }, + { + "epoch": 0.9439551641313051, + "learning_rate": 1.6528420920432893e-05, + "loss": 0.3298, + "step": 4714 + }, + { + "epoch": 0.9439551641313051, + "learning_rate": 1.651782852712194e-05, + "loss": 0.0243, + "step": 4716 + }, + { + "epoch": 0.944755804643715, + "learning_rate": 1.6507223404350686e-05, + "loss": 0.635, + "step": 4718 + }, + { + "epoch": 0.944755804643715, + "learning_rate": 1.6496605572831127e-05, + "loss": 0.1585, + "step": 4720 + }, + { + "epoch": 0.9455564451561249, + "learning_rate": 1.648597505330016e-05, + "loss": 0.4467, + "step": 4722 + }, + { + "epoch": 0.9455564451561249, + "learning_rate": 1.6475331866519387e-05, + "loss": 0.0793, + "step": 4724 + }, + { + "epoch": 0.9463570856685348, + "learning_rate": 1.6464676033275187e-05, + "loss": 0.2946, + "step": 4726 + }, + { + "epoch": 0.9463570856685348, + "learning_rate": 1.6454007574378657e-05, + "loss": 0.0438, + "step": 4728 + }, + { + "epoch": 0.9471577261809447, + "learning_rate": 1.644332651066548e-05, + "loss": 0.5859, + "step": 4730 + }, + { + "epoch": 0.9471577261809447, + "learning_rate": 1.6432632862996062e-05, + "loss": 0.2021, + "step": 4732 + }, + { + "epoch": 0.9479583666933546, + "learning_rate": 1.6421926652255275e-05, + "loss": 0.1627, + "step": 4734 + }, + { + "epoch": 0.9479583666933546, + "learning_rate": 1.6411207899352633e-05, + "loss": 0.3356, + "step": 4736 + }, + { + "epoch": 0.9487590072057646, + "learning_rate": 1.6400476625222057e-05, + "loss": 0.3043, + "step": 4738 + }, + { + "epoch": 0.9487590072057646, + "learning_rate": 1.6389732850821964e-05, + "loss": 0.256, + "step": 4740 + }, + { + "epoch": 0.9495596477181746, + "learning_rate": 1.6378976597135193e-05, + "loss": 0.3328, + "step": 4742 + }, + { + "epoch": 0.9495596477181746, + "learning_rate": 1.6368207885168904e-05, + "loss": 0.0391, + "step": 4744 + }, + { + "epoch": 0.9503602882305845, + "learning_rate": 1.635742673595468e-05, + "loss": 0.67, + "step": 4746 + }, + { + "epoch": 0.9503602882305845, + "learning_rate": 1.6346633170548275e-05, + "loss": 0.378, + "step": 4748 + }, + { + "epoch": 0.9511609287429944, + "learning_rate": 1.6335827210029823e-05, + "loss": 0.089, + "step": 4750 + }, + { + "epoch": 0.9511609287429944, + "learning_rate": 1.6325008875503563e-05, + "loss": 0.0366, + "step": 4752 + }, + { + "epoch": 0.9519615692554043, + "learning_rate": 1.6314178188097917e-05, + "loss": 0.4012, + "step": 4754 + }, + { + "epoch": 0.9519615692554043, + "learning_rate": 1.6303335168965495e-05, + "loss": 0.1774, + "step": 4756 + }, + { + "epoch": 0.9527622097678142, + "learning_rate": 1.6292479839282904e-05, + "loss": 0.2536, + "step": 4758 + }, + { + "epoch": 0.9527622097678142, + "learning_rate": 1.628161222025089e-05, + "loss": 0.2885, + "step": 4760 + }, + { + "epoch": 0.9535628502802241, + "learning_rate": 1.627073233309409e-05, + "loss": 0.2917, + "step": 4762 + }, + { + "epoch": 0.9535628502802241, + "learning_rate": 1.625984019906122e-05, + "loss": 0.0923, + "step": 4764 + }, + { + "epoch": 0.9543634907926342, + "learning_rate": 1.624893583942482e-05, + "loss": 0.3349, + "step": 4766 + }, + { + "epoch": 0.9543634907926342, + "learning_rate": 1.623801927548132e-05, + "loss": 0.0433, + "step": 4768 + }, + { + "epoch": 0.9551641313050441, + "learning_rate": 1.6227090528551058e-05, + "loss": 0.1419, + "step": 4770 + }, + { + "epoch": 0.9551641313050441, + "learning_rate": 1.6216149619978057e-05, + "loss": 0.1378, + "step": 4772 + }, + { + "epoch": 0.955964771817454, + "learning_rate": 1.6205196571130204e-05, + "loss": 0.2879, + "step": 4774 + }, + { + "epoch": 0.955964771817454, + "learning_rate": 1.6194231403398987e-05, + "loss": 0.1541, + "step": 4776 + }, + { + "epoch": 0.9567654123298639, + "learning_rate": 1.618325413819967e-05, + "loss": 0.411, + "step": 4778 + }, + { + "epoch": 0.9567654123298639, + "learning_rate": 1.6172264796971063e-05, + "loss": 0.0572, + "step": 4780 + }, + { + "epoch": 0.9575660528422738, + "learning_rate": 1.6161263401175555e-05, + "loss": 0.5453, + "step": 4782 + }, + { + "epoch": 0.9575660528422738, + "learning_rate": 1.6150249972299173e-05, + "loss": 0.1949, + "step": 4784 + }, + { + "epoch": 0.9583666933546837, + "learning_rate": 1.613922453185133e-05, + "loss": 0.7139, + "step": 4786 + }, + { + "epoch": 0.9583666933546837, + "learning_rate": 1.612818710136499e-05, + "loss": 0.4795, + "step": 4788 + }, + { + "epoch": 0.9591673338670936, + "learning_rate": 1.6117137702396454e-05, + "loss": 0.5209, + "step": 4790 + }, + { + "epoch": 0.9591673338670936, + "learning_rate": 1.6106076356525484e-05, + "loss": 0.4714, + "step": 4792 + }, + { + "epoch": 0.9599679743795037, + "learning_rate": 1.6095003085355103e-05, + "loss": 0.3584, + "step": 4794 + }, + { + "epoch": 0.9599679743795037, + "learning_rate": 1.6083917910511623e-05, + "loss": 0.3463, + "step": 4796 + }, + { + "epoch": 0.9607686148919136, + "learning_rate": 1.6072820853644688e-05, + "loss": 0.7911, + "step": 4798 + }, + { + "epoch": 0.9607686148919136, + "learning_rate": 1.6061711936427028e-05, + "loss": 0.2156, + "step": 4800 + }, + { + "epoch": 0.9615692554043235, + "learning_rate": 1.6050591180554658e-05, + "loss": 0.183, + "step": 4802 + }, + { + "epoch": 0.9615692554043235, + "learning_rate": 1.60394586077466e-05, + "loss": 0.1345, + "step": 4804 + }, + { + "epoch": 0.9623698959167334, + "learning_rate": 1.6028314239745068e-05, + "loss": 0.1276, + "step": 4806 + }, + { + "epoch": 0.9623698959167334, + "learning_rate": 1.6017158098315224e-05, + "loss": 0.2641, + "step": 4808 + }, + { + "epoch": 0.9631705364291433, + "learning_rate": 1.6005990205245226e-05, + "loss": 0.4003, + "step": 4810 + }, + { + "epoch": 0.9631705364291433, + "learning_rate": 1.5994810582346266e-05, + "loss": 0.0812, + "step": 4812 + }, + { + "epoch": 0.9639711769415532, + "learning_rate": 1.5983619251452334e-05, + "loss": 0.1702, + "step": 4814 + }, + { + "epoch": 0.9639711769415532, + "learning_rate": 1.5972416234420404e-05, + "loss": 0.1665, + "step": 4816 + }, + { + "epoch": 0.9647718174539631, + "learning_rate": 1.596120155313017e-05, + "loss": 0.2824, + "step": 4818 + }, + { + "epoch": 0.9647718174539631, + "learning_rate": 1.594997522948413e-05, + "loss": 0.0134, + "step": 4820 + }, + { + "epoch": 0.9655724579663731, + "learning_rate": 1.593873728540759e-05, + "loss": 0.1138, + "step": 4822 + }, + { + "epoch": 0.9655724579663731, + "learning_rate": 1.592748774284844e-05, + "loss": 0.1343, + "step": 4824 + }, + { + "epoch": 0.966373098478783, + "learning_rate": 1.5916226623777346e-05, + "loss": 0.2533, + "step": 4826 + }, + { + "epoch": 0.966373098478783, + "learning_rate": 1.5904953950187448e-05, + "loss": 0.1226, + "step": 4828 + }, + { + "epoch": 0.967173738991193, + "learning_rate": 1.5893669744094587e-05, + "loss": 0.1512, + "step": 4830 + }, + { + "epoch": 0.967173738991193, + "learning_rate": 1.588237402753703e-05, + "loss": 0.2555, + "step": 4832 + }, + { + "epoch": 0.9679743795036029, + "learning_rate": 1.5871066822575526e-05, + "loss": 0.2378, + "step": 4834 + }, + { + "epoch": 0.9679743795036029, + "learning_rate": 1.5859748151293354e-05, + "loss": 0.7478, + "step": 4836 + }, + { + "epoch": 0.9687750200160128, + "learning_rate": 1.5848418035796064e-05, + "loss": 0.152, + "step": 4838 + }, + { + "epoch": 0.9687750200160128, + "learning_rate": 1.5837076498211673e-05, + "loss": 0.0552, + "step": 4840 + }, + { + "epoch": 0.9695756605284227, + "learning_rate": 1.5825723560690396e-05, + "loss": 0.4747, + "step": 4842 + }, + { + "epoch": 0.9695756605284227, + "learning_rate": 1.581435924540482e-05, + "loss": 0.2445, + "step": 4844 + }, + { + "epoch": 0.9703763010408326, + "learning_rate": 1.580298357454967e-05, + "loss": 0.3226, + "step": 4846 + }, + { + "epoch": 0.9703763010408326, + "learning_rate": 1.579159657034185e-05, + "loss": 0.0665, + "step": 4848 + }, + { + "epoch": 0.9711769415532426, + "learning_rate": 1.5780198255020485e-05, + "loss": 0.2761, + "step": 4850 + }, + { + "epoch": 0.9711769415532426, + "learning_rate": 1.5768788650846674e-05, + "loss": 0.007, + "step": 4852 + }, + { + "epoch": 0.9719775820656525, + "learning_rate": 1.5757367780103672e-05, + "loss": 0.1608, + "step": 4854 + }, + { + "epoch": 0.9719775820656525, + "learning_rate": 1.574593566509664e-05, + "loss": 0.1789, + "step": 4856 + }, + { + "epoch": 0.9727782225780625, + "learning_rate": 1.5734492328152796e-05, + "loss": 0.0919, + "step": 4858 + }, + { + "epoch": 0.9727782225780625, + "learning_rate": 1.5723037791621203e-05, + "loss": 0.0056, + "step": 4860 + }, + { + "epoch": 0.9735788630904724, + "learning_rate": 1.5711572077872784e-05, + "loss": 0.1635, + "step": 4862 + }, + { + "epoch": 0.9735788630904724, + "learning_rate": 1.5700095209300386e-05, + "loss": 0.0626, + "step": 4864 + }, + { + "epoch": 0.9743795036028823, + "learning_rate": 1.568860720831852e-05, + "loss": 0.5397, + "step": 4866 + }, + { + "epoch": 0.9743795036028823, + "learning_rate": 1.5677108097363565e-05, + "loss": 0.0692, + "step": 4868 + }, + { + "epoch": 0.9751801441152922, + "learning_rate": 1.5665597898893508e-05, + "loss": 0.1807, + "step": 4870 + }, + { + "epoch": 0.9751801441152922, + "learning_rate": 1.5654076635387976e-05, + "loss": 0.029, + "step": 4872 + }, + { + "epoch": 0.9759807846277022, + "learning_rate": 1.5642544329348316e-05, + "loss": 0.3771, + "step": 4874 + }, + { + "epoch": 0.9759807846277022, + "learning_rate": 1.5631001003297302e-05, + "loss": 0.1662, + "step": 4876 + }, + { + "epoch": 0.9767814251401121, + "learning_rate": 1.5619446679779367e-05, + "loss": 0.5357, + "step": 4878 + }, + { + "epoch": 0.9767814251401121, + "learning_rate": 1.560788138136029e-05, + "loss": 0.5486, + "step": 4880 + }, + { + "epoch": 0.977582065652522, + "learning_rate": 1.5596305130627414e-05, + "loss": 0.2382, + "step": 4882 + }, + { + "epoch": 0.977582065652522, + "learning_rate": 1.5584717950189373e-05, + "loss": 0.1727, + "step": 4884 + }, + { + "epoch": 0.978382706164932, + "learning_rate": 1.5573119862676155e-05, + "loss": 0.1686, + "step": 4886 + }, + { + "epoch": 0.978382706164932, + "learning_rate": 1.5561510890739137e-05, + "loss": 0.0837, + "step": 4888 + }, + { + "epoch": 0.9791833466773419, + "learning_rate": 1.554989105705083e-05, + "loss": 0.1311, + "step": 4890 + }, + { + "epoch": 0.9791833466773419, + "learning_rate": 1.5538260384305083e-05, + "loss": 0.0826, + "step": 4892 + }, + { + "epoch": 0.9799839871897518, + "learning_rate": 1.5526618895216786e-05, + "loss": 0.1105, + "step": 4894 + }, + { + "epoch": 0.9799839871897518, + "learning_rate": 1.5514966612522088e-05, + "loss": 0.0088, + "step": 4896 + }, + { + "epoch": 0.9807846277021617, + "learning_rate": 1.5503303558978112e-05, + "loss": 0.5397, + "step": 4898 + }, + { + "epoch": 0.9807846277021617, + "learning_rate": 1.5491629757363033e-05, + "loss": 0.6078, + "step": 4900 + }, + { + "epoch": 0.9815852682145717, + "learning_rate": 1.547994523047609e-05, + "loss": 0.1693, + "step": 4902 + }, + { + "epoch": 0.9815852682145717, + "learning_rate": 1.546825000113736e-05, + "loss": 0.1275, + "step": 4904 + }, + { + "epoch": 0.9823859087269816, + "learning_rate": 1.545654409218794e-05, + "loss": 0.6113, + "step": 4906 + }, + { + "epoch": 0.9823859087269816, + "learning_rate": 1.544482752648966e-05, + "loss": 0.2083, + "step": 4908 + }, + { + "epoch": 0.9831865492393915, + "learning_rate": 1.5433100326925298e-05, + "loss": 0.0843, + "step": 4910 + }, + { + "epoch": 0.9831865492393915, + "learning_rate": 1.5421362516398285e-05, + "loss": 0.0569, + "step": 4912 + }, + { + "epoch": 0.9839871897518014, + "learning_rate": 1.5409614117832797e-05, + "loss": 0.4227, + "step": 4914 + }, + { + "epoch": 0.9839871897518014, + "learning_rate": 1.539785515417377e-05, + "loss": 0.0934, + "step": 4916 + }, + { + "epoch": 0.9847878302642114, + "learning_rate": 1.538608564838665e-05, + "loss": 0.153, + "step": 4918 + }, + { + "epoch": 0.9847878302642114, + "learning_rate": 1.5374305623457605e-05, + "loss": 0.0904, + "step": 4920 + }, + { + "epoch": 0.9855884707766213, + "learning_rate": 1.5362515102393244e-05, + "loss": 0.1268, + "step": 4922 + }, + { + "epoch": 0.9855884707766213, + "learning_rate": 1.5350714108220677e-05, + "loss": 0.5589, + "step": 4924 + }, + { + "epoch": 0.9863891112890312, + "learning_rate": 1.5338902663987564e-05, + "loss": 0.1444, + "step": 4926 + }, + { + "epoch": 0.9863891112890312, + "learning_rate": 1.532708079276186e-05, + "loss": 0.1909, + "step": 4928 + }, + { + "epoch": 0.9871897518014412, + "learning_rate": 1.531524851763198e-05, + "loss": 0.1268, + "step": 4930 + }, + { + "epoch": 0.9871897518014412, + "learning_rate": 1.5303405861706567e-05, + "loss": 0.1504, + "step": 4932 + }, + { + "epoch": 0.9879903923138511, + "learning_rate": 1.529155284811464e-05, + "loss": 0.1542, + "step": 4934 + }, + { + "epoch": 0.9879903923138511, + "learning_rate": 1.5279689500005353e-05, + "loss": 0.0221, + "step": 4936 + }, + { + "epoch": 0.988791032826261, + "learning_rate": 1.5267815840548067e-05, + "loss": 0.4188, + "step": 4938 + }, + { + "epoch": 0.988791032826261, + "learning_rate": 1.5255931892932344e-05, + "loss": 0.3513, + "step": 4940 + }, + { + "epoch": 0.9895916733386709, + "learning_rate": 1.5244037680367739e-05, + "loss": 0.1664, + "step": 4942 + }, + { + "epoch": 0.9895916733386709, + "learning_rate": 1.5232133226083962e-05, + "loss": 0.1242, + "step": 4944 + }, + { + "epoch": 0.9903923138510808, + "learning_rate": 1.522021855333061e-05, + "loss": 0.1204, + "step": 4946 + }, + { + "epoch": 0.9903923138510808, + "learning_rate": 1.5208293685377362e-05, + "loss": 0.2119, + "step": 4948 + }, + { + "epoch": 0.9911929543634908, + "learning_rate": 1.519635864551371e-05, + "loss": 0.1162, + "step": 4950 + }, + { + "epoch": 0.9911929543634908, + "learning_rate": 1.5184413457049014e-05, + "loss": 0.0457, + "step": 4952 + }, + { + "epoch": 0.9919935948759008, + "learning_rate": 1.5172458143312548e-05, + "loss": 0.2387, + "step": 4954 + }, + { + "epoch": 0.9919935948759008, + "learning_rate": 1.5160492727653238e-05, + "loss": 0.034, + "step": 4956 + }, + { + "epoch": 0.9927942353883107, + "learning_rate": 1.5148517233439858e-05, + "loss": 0.2563, + "step": 4958 + }, + { + "epoch": 0.9927942353883107, + "learning_rate": 1.5136531684060753e-05, + "loss": 0.2303, + "step": 4960 + }, + { + "epoch": 0.9935948759007206, + "learning_rate": 1.512453610292402e-05, + "loss": 0.3777, + "step": 4962 + }, + { + "epoch": 0.9935948759007206, + "learning_rate": 1.5112530513457251e-05, + "loss": 0.0183, + "step": 4964 + }, + { + "epoch": 0.9943955164131305, + "learning_rate": 1.5100514939107598e-05, + "loss": 0.1849, + "step": 4966 + }, + { + "epoch": 0.9943955164131305, + "learning_rate": 1.50884894033418e-05, + "loss": 0.0986, + "step": 4968 + }, + { + "epoch": 0.9951961569255404, + "learning_rate": 1.5076453929645927e-05, + "loss": 0.697, + "step": 4970 + }, + { + "epoch": 0.9951961569255404, + "learning_rate": 1.5064408541525578e-05, + "loss": 0.0108, + "step": 4972 + }, + { + "epoch": 0.9959967974379503, + "learning_rate": 1.505235326250563e-05, + "loss": 0.4589, + "step": 4974 + }, + { + "epoch": 0.9959967974379503, + "learning_rate": 1.504028811613027e-05, + "loss": 0.0098, + "step": 4976 + }, + { + "epoch": 0.9967974379503602, + "learning_rate": 1.5028213125963054e-05, + "loss": 0.3555, + "step": 4978 + }, + { + "epoch": 0.9967974379503602, + "learning_rate": 1.5016128315586636e-05, + "loss": 0.2235, + "step": 4980 + }, + { + "epoch": 0.9975980784627703, + "learning_rate": 1.5004033708602977e-05, + "loss": 0.1274, + "step": 4982 + }, + { + "epoch": 0.9975980784627703, + "learning_rate": 1.4991929328633043e-05, + "loss": 0.0136, + "step": 4984 + }, + { + "epoch": 0.9983987189751802, + "learning_rate": 1.4979815199317011e-05, + "loss": 0.9098, + "step": 4986 + }, + { + "epoch": 0.9983987189751802, + "learning_rate": 1.4967691344314012e-05, + "loss": 0.1939, + "step": 4988 + }, + { + "epoch": 0.9991993594875901, + "learning_rate": 1.495555778730216e-05, + "loss": 0.6106, + "step": 4990 + }, + { + "epoch": 0.9991993594875901, + "learning_rate": 1.4943414551978622e-05, + "loss": 0.017, + "step": 4992 + }, + { + "epoch": 1.0, + "learning_rate": 1.4931261662059333e-05, + "loss": 0.1969, + "step": 4994 + }, + { + "epoch": 1.0, + "learning_rate": 1.4919099141279214e-05, + "loss": 0.7851, + "step": 4996 + }, + { + "epoch": 1.0, + "step": 4996, + "total_flos": 2.927145290039296e+16, + "train_loss": 0.27889094776088064, + "train_runtime": 8503.2779, + "train_samples_per_second": 2.35, + "train_steps_per_second": 0.588 + } + ], + "logging_steps": 2, + "max_steps": 4996, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": {}, + "total_flos": 2.927145290039296e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round0.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round0.pth new file mode 100644 index 0000000000000000000000000000000000000000..4929dcb1326f376d0a13570f2ce34ce154591bef --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1429b8638f8f26d39670c2c450d26e04ee5e6aacd66d17bcbeb21b4bbed2dd21 +size 639793378 diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round1.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round1.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a86b1642ac9562118e2b50ee561acd5e307e618 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f433e41c0774412e2d54624d75e19252ea86c1417a4ab460e9092764d81bb2c +size 639793378 diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round2.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1117fbe79c6733118e13c14e174db95b3f81b67 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c858add8c9c9bc9679ef6ea731c6e25961adf98717bd17aba1b4a143012839b +size 639793378 diff --git a/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round3.pth b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round3.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4db193800c1d3a9c89a75437e6ba7485f96c043 --- /dev/null +++ b/client_states_v9_NEURIPS_DISJOINT_Memonly_LORA_llava_lr2e-5_bs1_gradacc32_iter0_125_infoBatch_scenario12_new_10000_random0_25_seed1/server_model_round3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a3b7595208c3e5400a1cf7e2264a15968c0770649c7ef2df76f2045cda7c2e8 +size 639793378