|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990762978015888, |
|
"eval_steps": 400, |
|
"global_step": 507, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001970564689943962, |
|
"grad_norm": 3.539861305637653, |
|
"learning_rate": 9.803921568627451e-09, |
|
"logits/chosen": -0.03196336328983307, |
|
"logits/rejected": -0.15967734158039093, |
|
"logps/chosen": -99.96153259277344, |
|
"logps/rejected": -93.94828033447266, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00985282344971981, |
|
"grad_norm": 3.74004975366523, |
|
"learning_rate": 4.901960784313725e-08, |
|
"logits/chosen": -0.042198292911052704, |
|
"logits/rejected": -0.34676456451416016, |
|
"logps/chosen": -112.20402526855469, |
|
"logps/rejected": -101.837646484375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": 0.0011955354129895568, |
|
"rewards/margins": 0.0004524323157966137, |
|
"rewards/rejected": 0.0007431029807776213, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01970564689943962, |
|
"grad_norm": 3.5769285168326417, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": -0.16411139070987701, |
|
"logits/rejected": -0.29173845052719116, |
|
"logps/chosen": -94.1719741821289, |
|
"logps/rejected": -96.02313232421875, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 4.2173640395049006e-05, |
|
"rewards/margins": -0.001043520518578589, |
|
"rewards/rejected": 0.001085694064386189, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02955847034915943, |
|
"grad_norm": 3.3653465070579673, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": -0.07717452943325043, |
|
"logits/rejected": -0.3865337371826172, |
|
"logps/chosen": -100.953125, |
|
"logps/rejected": -92.61229705810547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.00025451680994592607, |
|
"rewards/margins": -0.0001873960136435926, |
|
"rewards/rejected": -6.712078902637586e-05, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03941129379887924, |
|
"grad_norm": 3.373649973394512, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": -0.0971444696187973, |
|
"logits/rejected": -0.35578858852386475, |
|
"logps/chosen": -106.3487319946289, |
|
"logps/rejected": -102.8326416015625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0003346280718687922, |
|
"rewards/margins": 0.0001382694172207266, |
|
"rewards/rejected": -0.00047289757640101016, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.049264117248599054, |
|
"grad_norm": 3.224755352064536, |
|
"learning_rate": 2.4509803921568627e-07, |
|
"logits/chosen": -0.10794611275196075, |
|
"logits/rejected": -0.29162880778312683, |
|
"logps/chosen": -99.07095336914062, |
|
"logps/rejected": -95.20055389404297, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.00042314338497817516, |
|
"rewards/margins": 0.0012360246619209647, |
|
"rewards/rejected": -0.0008128813351504505, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05911694069831886, |
|
"grad_norm": 3.444342352448875, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -0.12300117313861847, |
|
"logits/rejected": -0.27830368280410767, |
|
"logps/chosen": -105.83805847167969, |
|
"logps/rejected": -104.1891860961914, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.002045437227934599, |
|
"rewards/margins": 0.004656647797673941, |
|
"rewards/rejected": -0.002611211035400629, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06896976414803867, |
|
"grad_norm": 3.5299163808469496, |
|
"learning_rate": 3.431372549019608e-07, |
|
"logits/chosen": -0.03614411875605583, |
|
"logits/rejected": -0.3109976053237915, |
|
"logps/chosen": -99.59745788574219, |
|
"logps/rejected": -98.72537231445312, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.000885780609678477, |
|
"rewards/margins": 0.0061457473784685135, |
|
"rewards/rejected": -0.005259966477751732, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07882258759775848, |
|
"grad_norm": 3.9275316344127242, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": -0.08717192709445953, |
|
"logits/rejected": -0.30741095542907715, |
|
"logps/chosen": -99.77064514160156, |
|
"logps/rejected": -96.07014465332031, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0018755150958895683, |
|
"rewards/margins": 0.007984376512467861, |
|
"rewards/rejected": -0.00985989160835743, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0886754110474783, |
|
"grad_norm": 3.5883285534109493, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": -0.032524555921554565, |
|
"logits/rejected": -0.3046508729457855, |
|
"logps/chosen": -96.41732788085938, |
|
"logps/rejected": -95.6390380859375, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.005956724286079407, |
|
"rewards/margins": 0.01741139218211174, |
|
"rewards/rejected": -0.023368116468191147, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09852823449719811, |
|
"grad_norm": 3.3794927211727392, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -0.12240082025527954, |
|
"logits/rejected": -0.28499796986579895, |
|
"logps/chosen": -106.6746597290039, |
|
"logps/rejected": -104.4252700805664, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.017990436404943466, |
|
"rewards/margins": 0.024439355358481407, |
|
"rewards/rejected": -0.042429789900779724, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10838105794691791, |
|
"grad_norm": 3.8844805866458234, |
|
"learning_rate": 4.999050767562379e-07, |
|
"logits/chosen": -0.06140371039509773, |
|
"logits/rejected": -0.36449694633483887, |
|
"logps/chosen": -112.74903869628906, |
|
"logps/rejected": -107.06253814697266, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.048891425132751465, |
|
"rewards/margins": 0.04517052322626114, |
|
"rewards/rejected": -0.0940619483590126, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11823388139663772, |
|
"grad_norm": 4.246075688346676, |
|
"learning_rate": 4.99519574616467e-07, |
|
"logits/chosen": -0.08420858532190323, |
|
"logits/rejected": -0.2296031415462494, |
|
"logps/chosen": -106.52708435058594, |
|
"logps/rejected": -114.71268463134766, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.0871507003903389, |
|
"rewards/margins": 0.07720854133367538, |
|
"rewards/rejected": -0.16435924172401428, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12808670484635754, |
|
"grad_norm": 4.078056870628684, |
|
"learning_rate": 4.988380179235842e-07, |
|
"logits/chosen": -0.04726668819785118, |
|
"logits/rejected": -0.2177656590938568, |
|
"logps/chosen": -117.36442565917969, |
|
"logps/rejected": -122.31159973144531, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15060417354106903, |
|
"rewards/margins": 0.0838489979505539, |
|
"rewards/rejected": -0.23445317149162292, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13793952829607734, |
|
"grad_norm": 3.820719733352528, |
|
"learning_rate": 4.978612153434526e-07, |
|
"logits/chosen": -0.05979006737470627, |
|
"logits/rejected": -0.19735023379325867, |
|
"logps/chosen": -124.60560607910156, |
|
"logps/rejected": -155.1808624267578, |
|
"loss": 0.6318, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2229953557252884, |
|
"rewards/margins": 0.31397417187690735, |
|
"rewards/rejected": -0.5369695425033569, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14779235174579716, |
|
"grad_norm": 5.031693747430946, |
|
"learning_rate": 4.965903258506806e-07, |
|
"logits/chosen": -0.059552647173404694, |
|
"logits/rejected": -0.19415248930454254, |
|
"logps/chosen": -136.63735961914062, |
|
"logps/rejected": -191.6803741455078, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.35170239210128784, |
|
"rewards/margins": 0.49619174003601074, |
|
"rewards/rejected": -0.8478941917419434, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15764517519551696, |
|
"grad_norm": 5.160412651014448, |
|
"learning_rate": 4.950268573535011e-07, |
|
"logits/chosen": -0.0038110867608338594, |
|
"logits/rejected": -0.15630409121513367, |
|
"logps/chosen": -147.3404083251953, |
|
"logps/rejected": -173.90728759765625, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.4520920217037201, |
|
"rewards/margins": 0.27063217759132385, |
|
"rewards/rejected": -0.7227243185043335, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16749799864523676, |
|
"grad_norm": 6.145536461291785, |
|
"learning_rate": 4.93172664904641e-07, |
|
"logits/chosen": 0.01297803781926632, |
|
"logits/rejected": -0.1908242255449295, |
|
"logps/chosen": -167.2682647705078, |
|
"logps/rejected": -198.43177795410156, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.589021623134613, |
|
"rewards/margins": 0.3708820641040802, |
|
"rewards/rejected": -0.9599035978317261, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1773508220949566, |
|
"grad_norm": 60.6627948159201, |
|
"learning_rate": 4.910299485003033e-07, |
|
"logits/chosen": -0.007486692164093256, |
|
"logits/rejected": -0.18025372922420502, |
|
"logps/chosen": -183.25140380859375, |
|
"logps/rejected": -288.912109375, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8218961954116821, |
|
"rewards/margins": 1.0762311220169067, |
|
"rewards/rejected": -1.8981273174285889, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1872036455446764, |
|
"grad_norm": 6.619891594866407, |
|
"learning_rate": 4.886012504698769e-07, |
|
"logits/chosen": -0.00334315188229084, |
|
"logits/rejected": -0.3066111207008362, |
|
"logps/chosen": -222.06753540039062, |
|
"logps/rejected": -272.4456481933594, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1969377994537354, |
|
"rewards/margins": 0.5773923397064209, |
|
"rewards/rejected": -1.7743301391601562, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19705646899439622, |
|
"grad_norm": 6.740434195327616, |
|
"learning_rate": 4.858894524594652e-07, |
|
"logits/chosen": -0.1177954450249672, |
|
"logits/rejected": -0.3193029761314392, |
|
"logps/chosen": -216.84396362304688, |
|
"logps/rejected": -431.8846740722656, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.120563268661499, |
|
"rewards/margins": 2.212986707687378, |
|
"rewards/rejected": -3.333550214767456, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20690929244411602, |
|
"grad_norm": 7.617693374089276, |
|
"learning_rate": 4.828977720128198e-07, |
|
"logits/chosen": -0.1505376100540161, |
|
"logits/rejected": -0.3399549126625061, |
|
"logps/chosen": -235.60617065429688, |
|
"logps/rejected": -358.7390441894531, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.3088951110839844, |
|
"rewards/margins": 1.3302855491638184, |
|
"rewards/rejected": -2.639180898666382, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21676211589383582, |
|
"grad_norm": 7.105788785064987, |
|
"learning_rate": 4.796297587537285e-07, |
|
"logits/chosen": -0.1506040096282959, |
|
"logits/rejected": -0.3250656723976135, |
|
"logps/chosen": -257.82464599609375, |
|
"logps/rejected": -416.75958251953125, |
|
"loss": 0.4453, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5325710773468018, |
|
"rewards/margins": 1.6099077463150024, |
|
"rewards/rejected": -3.1424789428710938, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22661493934355564, |
|
"grad_norm": 8.60769452133117, |
|
"learning_rate": 4.760892901743944e-07, |
|
"logits/chosen": -0.13159573078155518, |
|
"logits/rejected": -0.3428110182285309, |
|
"logps/chosen": -300.0444030761719, |
|
"logps/rejected": -514.811279296875, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -1.9326972961425781, |
|
"rewards/margins": 2.217697858810425, |
|
"rewards/rejected": -4.150395393371582, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23646776279327544, |
|
"grad_norm": 10.209138794878942, |
|
"learning_rate": 4.7228056703479626e-07, |
|
"logits/chosen": -0.16759036481380463, |
|
"logits/rejected": -0.3990747332572937, |
|
"logps/chosen": -299.72705078125, |
|
"logps/rejected": -433.87823486328125, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.9569202661514282, |
|
"rewards/margins": 1.37308669090271, |
|
"rewards/rejected": -3.3300068378448486, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24632058624299527, |
|
"grad_norm": 28.609743111268017, |
|
"learning_rate": 4.6820810837849535e-07, |
|
"logits/chosen": -0.20535437762737274, |
|
"logits/rejected": -0.4542999267578125, |
|
"logps/chosen": -307.52362060546875, |
|
"logps/rejected": -520.55322265625, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.0457329750061035, |
|
"rewards/margins": 2.181645393371582, |
|
"rewards/rejected": -4.227377891540527, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25617340969271507, |
|
"grad_norm": 11.650217681846684, |
|
"learning_rate": 4.63876746170797e-07, |
|
"logits/chosen": -0.23113389313220978, |
|
"logits/rejected": -0.47989240288734436, |
|
"logps/chosen": -344.0136413574219, |
|
"logps/rejected": -512.1621704101562, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.392538547515869, |
|
"rewards/margins": 1.7528173923492432, |
|
"rewards/rejected": -4.145355701446533, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2660262331424349, |
|
"grad_norm": 12.280394789341818, |
|
"learning_rate": 4.592916195656321e-07, |
|
"logits/chosen": -0.2849624454975128, |
|
"logits/rejected": -0.4641537070274353, |
|
"logps/chosen": -360.0565490722656, |
|
"logps/rejected": -593.3041381835938, |
|
"loss": 0.4143, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.587618112564087, |
|
"rewards/margins": 2.348435401916504, |
|
"rewards/rejected": -4.936053276062012, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27587905659215467, |
|
"grad_norm": 11.781977035196428, |
|
"learning_rate": 4.544581688079602e-07, |
|
"logits/chosen": -0.27669793367385864, |
|
"logits/rejected": -0.4249224066734314, |
|
"logps/chosen": -346.13238525390625, |
|
"logps/rejected": -542.8922119140625, |
|
"loss": 0.3764, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.43330454826355, |
|
"rewards/margins": 1.9707410335540771, |
|
"rewards/rejected": -4.404046058654785, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2857318800418745, |
|
"grad_norm": 12.725521218797944, |
|
"learning_rate": 4.493821287789272e-07, |
|
"logits/chosen": -0.29486262798309326, |
|
"logits/rejected": -0.45644649863243103, |
|
"logps/chosen": -350.2372131347656, |
|
"logps/rejected": -533.0991821289062, |
|
"loss": 0.3874, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.510737657546997, |
|
"rewards/margins": 1.8067106008529663, |
|
"rewards/rejected": -4.317447662353516, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2955847034915943, |
|
"grad_norm": 10.47948759187309, |
|
"learning_rate": 4.4406952219143934e-07, |
|
"logits/chosen": -0.20662228763103485, |
|
"logits/rejected": -0.47787055373191833, |
|
"logps/chosen": -331.3938903808594, |
|
"logps/rejected": -496.76922607421875, |
|
"loss": 0.3901, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2830259799957275, |
|
"rewards/margins": 1.7167097330093384, |
|
"rewards/rejected": -3.9997353553771973, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3054375269413141, |
|
"grad_norm": 9.243554014229469, |
|
"learning_rate": 4.38526652444224e-07, |
|
"logits/chosen": -0.23520083725452423, |
|
"logits/rejected": -0.45836538076400757, |
|
"logps/chosen": -338.83935546875, |
|
"logps/rejected": -600.7113037109375, |
|
"loss": 0.3776, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.335677146911621, |
|
"rewards/margins": 2.6365208625793457, |
|
"rewards/rejected": -4.972198009490967, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3152903503910339, |
|
"grad_norm": 16.302237845544973, |
|
"learning_rate": 4.3276009614285824e-07, |
|
"logits/chosen": -0.27397865056991577, |
|
"logits/rejected": -0.4509497582912445, |
|
"logps/chosen": -340.3817443847656, |
|
"logps/rejected": -519.3179321289062, |
|
"loss": 0.373, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.389813184738159, |
|
"rewards/margins": 1.820207953453064, |
|
"rewards/rejected": -4.210021018981934, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32514317384075375, |
|
"grad_norm": 12.790316412360088, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -0.3117635250091553, |
|
"logits/rejected": -0.5069926977157593, |
|
"logps/chosen": -408.1647033691406, |
|
"logps/rejected": -684.1016845703125, |
|
"loss": 0.3554, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.9762721061706543, |
|
"rewards/margins": 2.816100597381592, |
|
"rewards/rejected": -5.792372703552246, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3349959972904735, |
|
"grad_norm": 12.163515409448873, |
|
"learning_rate": 4.2058354920054043e-07, |
|
"logits/chosen": -0.32501062750816345, |
|
"logits/rejected": -0.4444299340248108, |
|
"logps/chosen": -392.1680603027344, |
|
"logps/rejected": -582.3343505859375, |
|
"loss": 0.3309, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.916874885559082, |
|
"rewards/margins": 1.9237887859344482, |
|
"rewards/rejected": -4.840663909912109, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34484882074019335, |
|
"grad_norm": 16.509396763840595, |
|
"learning_rate": 4.141880060119336e-07, |
|
"logits/chosen": -0.30287298560142517, |
|
"logits/rejected": -0.5002428293228149, |
|
"logps/chosen": -392.53900146484375, |
|
"logps/rejected": -621.8858642578125, |
|
"loss": 0.3799, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.818398952484131, |
|
"rewards/margins": 2.3253607749938965, |
|
"rewards/rejected": -5.143759250640869, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3547016441899132, |
|
"grad_norm": 19.861599589919685, |
|
"learning_rate": 4.0759765403198877e-07, |
|
"logits/chosen": -0.2992296814918518, |
|
"logits/rejected": -0.46128687262535095, |
|
"logps/chosen": -369.04058837890625, |
|
"logps/rejected": -594.5716552734375, |
|
"loss": 0.3318, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -2.6274733543395996, |
|
"rewards/margins": 2.262349843978882, |
|
"rewards/rejected": -4.889822959899902, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.364554467639633, |
|
"grad_norm": 20.974452355570566, |
|
"learning_rate": 4.008203127021797e-07, |
|
"logits/chosen": -0.25085026025772095, |
|
"logits/rejected": -0.46279406547546387, |
|
"logps/chosen": -358.7597351074219, |
|
"logps/rejected": -571.667724609375, |
|
"loss": 0.3218, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.5963218212127686, |
|
"rewards/margins": 2.143275022506714, |
|
"rewards/rejected": -4.739596843719482, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3744072910893528, |
|
"grad_norm": 14.073373162388602, |
|
"learning_rate": 3.9386402332652754e-07, |
|
"logits/chosen": -0.23608064651489258, |
|
"logits/rejected": -0.5011879205703735, |
|
"logps/chosen": -423.06573486328125, |
|
"logps/rejected": -701.5946044921875, |
|
"loss": 0.3171, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.2071080207824707, |
|
"rewards/margins": 2.7721924781799316, |
|
"rewards/rejected": -5.979300498962402, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3842601145390726, |
|
"grad_norm": 22.462855346029198, |
|
"learning_rate": 3.867370395306068e-07, |
|
"logits/chosen": -0.21024306118488312, |
|
"logits/rejected": -0.48892831802368164, |
|
"logps/chosen": -421.2330627441406, |
|
"logps/rejected": -660.6759643554688, |
|
"loss": 0.3264, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.2450497150421143, |
|
"rewards/margins": 2.429110527038574, |
|
"rewards/rejected": -5.674160003662109, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39411293798879243, |
|
"grad_norm": 12.932519297904385, |
|
"learning_rate": 3.794478174686328e-07, |
|
"logits/chosen": -0.29106825590133667, |
|
"logits/rejected": -0.4926213324069977, |
|
"logps/chosen": -411.04840087890625, |
|
"logps/rejected": -645.6761474609375, |
|
"loss": 0.344, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.0619587898254395, |
|
"rewards/margins": 2.4115443229675293, |
|
"rewards/rejected": -5.473503112792969, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4039657614385122, |
|
"grad_norm": 12.885808088365131, |
|
"learning_rate": 3.720050057902495e-07, |
|
"logits/chosen": -0.28087863326072693, |
|
"logits/rejected": -0.5519760847091675, |
|
"logps/chosen": -393.69659423828125, |
|
"logps/rejected": -613.2288208007812, |
|
"loss": 0.3363, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.869983196258545, |
|
"rewards/margins": 2.265076160430908, |
|
"rewards/rejected": -5.135059356689453, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41381858488823203, |
|
"grad_norm": 16.468484087079414, |
|
"learning_rate": 3.644174353789204e-07, |
|
"logits/chosen": -0.3173820376396179, |
|
"logits/rejected": -0.41476958990097046, |
|
"logps/chosen": -439.092529296875, |
|
"logps/rejected": -734.161865234375, |
|
"loss": 0.2861, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.3481502532958984, |
|
"rewards/margins": 2.985670566558838, |
|
"rewards/rejected": -6.3338212966918945, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.42367140833795186, |
|
"grad_norm": 22.743564527030898, |
|
"learning_rate": 3.566941088741009e-07, |
|
"logits/chosen": -0.32451528310775757, |
|
"logits/rejected": -0.4603727459907532, |
|
"logps/chosen": -506.1102600097656, |
|
"logps/rejected": -843.5612182617188, |
|
"loss": 0.3019, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -4.0731306076049805, |
|
"rewards/margins": 3.3347747325897217, |
|
"rewards/rejected": -7.407905578613281, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.43352423178767163, |
|
"grad_norm": 13.898026449525894, |
|
"learning_rate": 3.488441899896217e-07, |
|
"logits/chosen": -0.36527490615844727, |
|
"logits/rejected": -0.5059664845466614, |
|
"logps/chosen": -498.8099670410156, |
|
"logps/rejected": -819.1517333984375, |
|
"loss": 0.2831, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.9226620197296143, |
|
"rewards/margins": 3.1661388874053955, |
|
"rewards/rejected": -7.088801383972168, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.44337705523739146, |
|
"grad_norm": 13.52248766735538, |
|
"learning_rate": 3.408769926409574e-07, |
|
"logits/chosen": -0.2923319637775421, |
|
"logits/rejected": -0.5522831082344055, |
|
"logps/chosen": -440.209716796875, |
|
"logps/rejected": -757.4591064453125, |
|
"loss": 0.2934, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.353994369506836, |
|
"rewards/margins": 3.2045319080352783, |
|
"rewards/rejected": -6.558526039123535, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4532298786871113, |
|
"grad_norm": 25.763440445028106, |
|
"learning_rate": 3.3280196989428263e-07, |
|
"logits/chosen": -0.28175559639930725, |
|
"logits/rejected": -0.5395274758338928, |
|
"logps/chosen": -408.2548828125, |
|
"logps/rejected": -689.34326171875, |
|
"loss": 0.3182, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.0639290809631348, |
|
"rewards/margins": 2.8385584354400635, |
|
"rewards/rejected": -5.902487754821777, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.46308270213683106, |
|
"grad_norm": 25.454226685247495, |
|
"learning_rate": 3.2462870275042367e-07, |
|
"logits/chosen": -0.3556864261627197, |
|
"logits/rejected": -0.5125774145126343, |
|
"logps/chosen": -393.869140625, |
|
"logps/rejected": -721.3883666992188, |
|
"loss": 0.2842, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.9227001667022705, |
|
"rewards/margins": 3.2367324829101562, |
|
"rewards/rejected": -6.159432888031006, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4729355255865509, |
|
"grad_norm": 17.441738861595127, |
|
"learning_rate": 3.1636688877701806e-07, |
|
"logits/chosen": -0.2837878167629242, |
|
"logits/rejected": -0.4791291654109955, |
|
"logps/chosen": -416.69989013671875, |
|
"logps/rejected": -730.2517700195312, |
|
"loss": 0.2738, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.118884563446045, |
|
"rewards/margins": 3.163923978805542, |
|
"rewards/rejected": -6.282808303833008, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4827883490362707, |
|
"grad_norm": 15.400566409989613, |
|
"learning_rate": 3.080263306023669e-07, |
|
"logits/chosen": -0.30386677384376526, |
|
"logits/rejected": -0.5350344777107239, |
|
"logps/chosen": -433.35760498046875, |
|
"logps/rejected": -742.5108642578125, |
|
"loss": 0.2983, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.27720308303833, |
|
"rewards/margins": 3.1466774940490723, |
|
"rewards/rejected": -6.423880100250244, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.49264117248599054, |
|
"grad_norm": 13.243347184925653, |
|
"learning_rate": 2.996169242846328e-07, |
|
"logits/chosen": -0.2507760524749756, |
|
"logits/rejected": -0.5307763814926147, |
|
"logps/chosen": -420.93218994140625, |
|
"logps/rejected": -727.508056640625, |
|
"loss": 0.2864, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.2063565254211426, |
|
"rewards/margins": 3.135401725769043, |
|
"rewards/rejected": -6.3417582511901855, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5024939959357103, |
|
"grad_norm": 15.905893971492043, |
|
"learning_rate": 2.911486475701835e-07, |
|
"logits/chosen": -0.3323180675506592, |
|
"logits/rejected": -0.500321090221405, |
|
"logps/chosen": -417.96533203125, |
|
"logps/rejected": -669.329345703125, |
|
"loss": 0.3038, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.0756640434265137, |
|
"rewards/margins": 2.5683434009552, |
|
"rewards/rejected": -5.644008159637451, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5123468193854301, |
|
"grad_norm": 15.678554296764496, |
|
"learning_rate": 2.826315480550129e-07, |
|
"logits/chosen": -0.2549038529396057, |
|
"logits/rejected": -0.5205506682395935, |
|
"logps/chosen": -397.9513244628906, |
|
"logps/rejected": -706.6505126953125, |
|
"loss": 0.2709, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.9337005615234375, |
|
"rewards/margins": 3.1359124183654785, |
|
"rewards/rejected": -6.069613456726074, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.52219964283515, |
|
"grad_norm": 13.183965337617314, |
|
"learning_rate": 2.740757312632854e-07, |
|
"logits/chosen": -0.3502804636955261, |
|
"logits/rejected": -0.47828468680381775, |
|
"logps/chosen": -458.28009033203125, |
|
"logps/rejected": -756.2743530273438, |
|
"loss": 0.3101, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.547351837158203, |
|
"rewards/margins": 3.0040035247802734, |
|
"rewards/rejected": -6.551354885101318, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5320524662848698, |
|
"grad_norm": 14.251066041226494, |
|
"learning_rate": 2.654913486571487e-07, |
|
"logits/chosen": -0.29742032289505005, |
|
"logits/rejected": -0.512765109539032, |
|
"logps/chosen": -433.20281982421875, |
|
"logps/rejected": -742.1527099609375, |
|
"loss": 0.269, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.3142356872558594, |
|
"rewards/margins": 3.1091339588165283, |
|
"rewards/rejected": -6.423369407653809, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5419052897345896, |
|
"grad_norm": 15.970456495692506, |
|
"learning_rate": 2.5688858559204053e-07, |
|
"logits/chosen": -0.29328054189682007, |
|
"logits/rejected": -0.5653128027915955, |
|
"logps/chosen": -457.0283203125, |
|
"logps/rejected": -802.5032958984375, |
|
"loss": 0.2619, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.5228774547576904, |
|
"rewards/margins": 3.504716396331787, |
|
"rewards/rejected": -7.027594566345215, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5517581131843093, |
|
"grad_norm": 30.315717109626856, |
|
"learning_rate": 2.4827764923178246e-07, |
|
"logits/chosen": -0.2728544771671295, |
|
"logits/rejected": -0.5340480208396912, |
|
"logps/chosen": -508.76715087890625, |
|
"logps/rejected": -814.9278564453125, |
|
"loss": 0.3282, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.023861408233643, |
|
"rewards/margins": 3.080327272415161, |
|
"rewards/rejected": -7.104189395904541, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5616109366340292, |
|
"grad_norm": 12.181715177068432, |
|
"learning_rate": 2.3966875643779667e-07, |
|
"logits/chosen": -0.39192137122154236, |
|
"logits/rejected": -0.48601895570755005, |
|
"logps/chosen": -509.3650817871094, |
|
"logps/rejected": -919.6329956054688, |
|
"loss": 0.2728, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.043977737426758, |
|
"rewards/margins": 4.052145957946777, |
|
"rewards/rejected": -8.096123695373535, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.571463760083749, |
|
"grad_norm": 22.18619533848792, |
|
"learning_rate": 2.3107212164681774e-07, |
|
"logits/chosen": -0.2627003788948059, |
|
"logits/rejected": -0.512112557888031, |
|
"logps/chosen": -514.5509033203125, |
|
"logps/rejected": -777.1829833984375, |
|
"loss": 0.286, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.147472381591797, |
|
"rewards/margins": 2.6902387142181396, |
|
"rewards/rejected": -6.837711334228516, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5813165835334688, |
|
"grad_norm": 20.397352654188396, |
|
"learning_rate": 2.2249794475148019e-07, |
|
"logits/chosen": -0.2886350750923157, |
|
"logits/rejected": -0.533146858215332, |
|
"logps/chosen": -465.264892578125, |
|
"logps/rejected": -807.4385986328125, |
|
"loss": 0.2676, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.6474337577819824, |
|
"rewards/margins": 3.4217605590820312, |
|
"rewards/rejected": -7.0691938400268555, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5911694069831886, |
|
"grad_norm": 12.525676548560202, |
|
"learning_rate": 2.1395639899816332e-07, |
|
"logits/chosen": -0.35506710410118103, |
|
"logits/rejected": -0.4460170865058899, |
|
"logps/chosen": -424.7347106933594, |
|
"logps/rejected": -713.3164672851562, |
|
"loss": 0.2296, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.2564120292663574, |
|
"rewards/margins": 2.842578411102295, |
|
"rewards/rejected": -6.098990440368652, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6010222304329085, |
|
"grad_norm": 16.20564210489265, |
|
"learning_rate": 2.0545761891645177e-07, |
|
"logits/chosen": -0.2978189289569855, |
|
"logits/rejected": -0.5122548937797546, |
|
"logps/chosen": -471.35693359375, |
|
"logps/rejected": -824.7840576171875, |
|
"loss": 0.2482, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.6659018993377686, |
|
"rewards/margins": 3.483975887298584, |
|
"rewards/rejected": -7.14987850189209, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6108750538826282, |
|
"grad_norm": 16.59602026766405, |
|
"learning_rate": 1.9701168829453305e-07, |
|
"logits/chosen": -0.2841472625732422, |
|
"logits/rejected": -0.49612703919410706, |
|
"logps/chosen": -475.79559326171875, |
|
"logps/rejected": -832.6185302734375, |
|
"loss": 0.2832, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.748015880584717, |
|
"rewards/margins": 3.5668914318084717, |
|
"rewards/rejected": -7.314908027648926, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.620727877332348, |
|
"grad_norm": 14.834560333592078, |
|
"learning_rate": 1.886286282148002e-07, |
|
"logits/chosen": -0.2684577405452728, |
|
"logits/rejected": -0.5325924158096313, |
|
"logps/chosen": -491.4776306152344, |
|
"logps/rejected": -837.3449096679688, |
|
"loss": 0.2599, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.8538050651550293, |
|
"rewards/margins": 3.5408568382263184, |
|
"rewards/rejected": -7.394662380218506, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6305807007820678, |
|
"grad_norm": 11.160506827485138, |
|
"learning_rate": 1.8031838516385422e-07, |
|
"logits/chosen": -0.2939426898956299, |
|
"logits/rejected": -0.4827597141265869, |
|
"logps/chosen": -524.61865234375, |
|
"logps/rejected": -873.7683715820312, |
|
"loss": 0.294, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.23654842376709, |
|
"rewards/margins": 3.4801859855651855, |
|
"rewards/rejected": -7.716734409332275, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6404335242317877, |
|
"grad_norm": 25.16794488629472, |
|
"learning_rate": 1.7209081923101472e-07, |
|
"logits/chosen": -0.37951114773750305, |
|
"logits/rejected": -0.5442458391189575, |
|
"logps/chosen": -484.8412170410156, |
|
"logps/rejected": -843.31494140625, |
|
"loss": 0.2438, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.8335394859313965, |
|
"rewards/margins": 3.537074565887451, |
|
"rewards/rejected": -7.370614528656006, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6502863476815075, |
|
"grad_norm": 15.675332902369199, |
|
"learning_rate": 1.639556924093404e-07, |
|
"logits/chosen": -0.31534910202026367, |
|
"logits/rejected": -0.6055206656455994, |
|
"logps/chosen": -469.186767578125, |
|
"logps/rejected": -785.3204345703125, |
|
"loss": 0.2862, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.6316142082214355, |
|
"rewards/margins": 3.245419979095459, |
|
"rewards/rejected": -6.8770341873168945, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6601391711312273, |
|
"grad_norm": 12.550466940425805, |
|
"learning_rate": 1.5592265701304114e-07, |
|
"logits/chosen": -0.309912770986557, |
|
"logits/rejected": -0.580001950263977, |
|
"logps/chosen": -455.7469177246094, |
|
"logps/rejected": -795.7465209960938, |
|
"loss": 0.2543, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.550823211669922, |
|
"rewards/margins": 3.412391185760498, |
|
"rewards/rejected": -6.9632134437561035, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.669991994580947, |
|
"grad_norm": 14.379670239758621, |
|
"learning_rate": 1.4800124422502334e-07, |
|
"logits/chosen": -0.3434782028198242, |
|
"logits/rejected": -0.5037192106246948, |
|
"logps/chosen": -427.98486328125, |
|
"logps/rejected": -760.0518798828125, |
|
"loss": 0.2582, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.309481143951416, |
|
"rewards/margins": 3.3087706565856934, |
|
"rewards/rejected": -6.618251800537109, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6798448180306669, |
|
"grad_norm": 16.948885121756778, |
|
"learning_rate": 1.4020085278815743e-07, |
|
"logits/chosen": -0.3305511176586151, |
|
"logits/rejected": -0.510036289691925, |
|
"logps/chosen": -473.0335998535156, |
|
"logps/rejected": -838.19970703125, |
|
"loss": 0.2595, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.7052788734436035, |
|
"rewards/margins": 3.6906940937042236, |
|
"rewards/rejected": -7.39597225189209, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6896976414803867, |
|
"grad_norm": 13.672982554566653, |
|
"learning_rate": 1.3253073785368545e-07, |
|
"logits/chosen": -0.3359353244304657, |
|
"logits/rejected": -0.599565863609314, |
|
"logps/chosen": -450.7706604003906, |
|
"logps/rejected": -766.9132080078125, |
|
"loss": 0.2378, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.4409427642822266, |
|
"rewards/margins": 3.247152328491211, |
|
"rewards/rejected": -6.6880950927734375, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6995504649301065, |
|
"grad_norm": 17.198800285063513, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -0.3707168996334076, |
|
"logits/rejected": -0.5359587669372559, |
|
"logps/chosen": -492.1787109375, |
|
"logps/rejected": -895.4451904296875, |
|
"loss": 0.266, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.8479511737823486, |
|
"rewards/margins": 4.02894926071167, |
|
"rewards/rejected": -7.876900672912598, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7094032883798264, |
|
"grad_norm": 19.550687307980805, |
|
"learning_rate": 1.1761757443482285e-07, |
|
"logits/chosen": -0.2943348288536072, |
|
"logits/rejected": -0.6027869582176208, |
|
"logps/chosen": -481.8168029785156, |
|
"logps/rejected": -860.5421142578125, |
|
"loss": 0.2475, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.7917988300323486, |
|
"rewards/margins": 3.795625686645508, |
|
"rewards/rejected": -7.587424278259277, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7192561118295462, |
|
"grad_norm": 19.931316668144966, |
|
"learning_rate": 1.1039222039359644e-07, |
|
"logits/chosen": -0.32548245787620544, |
|
"logits/rejected": -0.5251120328903198, |
|
"logps/chosen": -466.42352294921875, |
|
"logps/rejected": -837.3107299804688, |
|
"loss": 0.2503, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.647482395172119, |
|
"rewards/margins": 3.7225258350372314, |
|
"rewards/rejected": -7.370007514953613, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.729108935279266, |
|
"grad_norm": 27.992034585729865, |
|
"learning_rate": 1.0333251074666608e-07, |
|
"logits/chosen": -0.26019373536109924, |
|
"logits/rejected": -0.5685330629348755, |
|
"logps/chosen": -508.7705078125, |
|
"logps/rejected": -834.8313598632812, |
|
"loss": 0.3009, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.032122611999512, |
|
"rewards/margins": 3.357773542404175, |
|
"rewards/rejected": -7.389896392822266, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7389617587289857, |
|
"grad_norm": 11.391526012410443, |
|
"learning_rate": 9.644682182758304e-08, |
|
"logits/chosen": -0.36888641119003296, |
|
"logits/rejected": -0.544438362121582, |
|
"logps/chosen": -501.13922119140625, |
|
"logps/rejected": -866.8273315429688, |
|
"loss": 0.267, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.018528938293457, |
|
"rewards/margins": 3.660531997680664, |
|
"rewards/rejected": -7.679060459136963, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7488145821787056, |
|
"grad_norm": 15.381753957706216, |
|
"learning_rate": 8.974332349459992e-08, |
|
"logits/chosen": -0.3592904210090637, |
|
"logits/rejected": -0.5230213403701782, |
|
"logps/chosen": -498.76397705078125, |
|
"logps/rejected": -873.2203369140625, |
|
"loss": 0.2913, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.937748432159424, |
|
"rewards/margins": 3.7432987689971924, |
|
"rewards/rejected": -7.681046962738037, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7586674056284254, |
|
"grad_norm": 12.706615805349994, |
|
"learning_rate": 8.322996943714672e-08, |
|
"logits/chosen": -0.3497922718524933, |
|
"logits/rejected": -0.5577541589736938, |
|
"logps/chosen": -512.2010498046875, |
|
"logps/rejected": -885.4027099609375, |
|
"loss": 0.2356, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.10722017288208, |
|
"rewards/margins": 3.7454257011413574, |
|
"rewards/rejected": -7.8526458740234375, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7685202290781452, |
|
"grad_norm": 14.360275587084523, |
|
"learning_rate": 7.691448773879256e-08, |
|
"logits/chosen": -0.3780885934829712, |
|
"logits/rejected": -0.5432588458061218, |
|
"logps/chosen": -449.843505859375, |
|
"logps/rejected": -840.5940551757812, |
|
"loss": 0.2795, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.502520799636841, |
|
"rewards/margins": 3.937730073928833, |
|
"rewards/rejected": -7.440250396728516, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.778373052527865, |
|
"grad_norm": 15.407447239164402, |
|
"learning_rate": 7.080437170788722e-08, |
|
"logits/chosen": -0.38400599360466003, |
|
"logits/rejected": -0.5888391733169556, |
|
"logps/chosen": -466.9847717285156, |
|
"logps/rejected": -768.0460205078125, |
|
"loss": 0.2824, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.600980281829834, |
|
"rewards/margins": 3.0612690448760986, |
|
"rewards/rejected": -6.6622490882873535, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7882258759775849, |
|
"grad_norm": 11.960557567104859, |
|
"learning_rate": 6.490687098676332e-08, |
|
"logits/chosen": -0.4111432433128357, |
|
"logits/rejected": -0.6113660335540771, |
|
"logps/chosen": -448.69500732421875, |
|
"logps/rejected": -851.4993896484375, |
|
"loss": 0.2753, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.424553394317627, |
|
"rewards/margins": 4.026993751525879, |
|
"rewards/rejected": -7.451546669006348, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7882258759775849, |
|
"eval_logits/chosen": -1.0704303979873657, |
|
"eval_logits/rejected": -0.814034640789032, |
|
"eval_logps/chosen": -502.4184875488281, |
|
"eval_logps/rejected": -705.5203857421875, |
|
"eval_loss": 0.7013445496559143, |
|
"eval_rewards/accuracies": 0.7059999704360962, |
|
"eval_rewards/chosen": -4.094460487365723, |
|
"eval_rewards/margins": 1.768728494644165, |
|
"eval_rewards/rejected": -5.863188743591309, |
|
"eval_runtime": 197.0588, |
|
"eval_samples_per_second": 10.144, |
|
"eval_steps_per_second": 1.269, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7980786994273046, |
|
"grad_norm": 16.569681590429596, |
|
"learning_rate": 5.9228982950048414e-08, |
|
"logits/chosen": -0.3508976995944977, |
|
"logits/rejected": -0.6730154752731323, |
|
"logps/chosen": -446.61688232421875, |
|
"logps/rejected": -841.1275634765625, |
|
"loss": 0.2751, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.3948426246643066, |
|
"rewards/margins": 4.001449108123779, |
|
"rewards/rejected": -7.396292209625244, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8079315228770244, |
|
"grad_norm": 30.74667877593499, |
|
"learning_rate": 5.3777444402291345e-08, |
|
"logits/chosen": -0.44255560636520386, |
|
"logits/rejected": -0.5289443135261536, |
|
"logps/chosen": -484.78363037109375, |
|
"logps/rejected": -847.515625, |
|
"loss": 0.2925, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.900998592376709, |
|
"rewards/margins": 3.6134960651397705, |
|
"rewards/rejected": -7.5144944190979, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8177843463267442, |
|
"grad_norm": 23.687222476192307, |
|
"learning_rate": 4.855872358475546e-08, |
|
"logits/chosen": -0.3782200217247009, |
|
"logits/rejected": -0.6156097650527954, |
|
"logps/chosen": -456.51220703125, |
|
"logps/rejected": -778.0684814453125, |
|
"loss": 0.266, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.4957103729248047, |
|
"rewards/margins": 3.254422664642334, |
|
"rewards/rejected": -6.750133514404297, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8276371697764641, |
|
"grad_norm": 19.649935116061837, |
|
"learning_rate": 4.357901250086107e-08, |
|
"logits/chosen": -0.42209166288375854, |
|
"logits/rejected": -0.5967798233032227, |
|
"logps/chosen": -440.5074157714844, |
|
"logps/rejected": -706.137451171875, |
|
"loss": 0.2765, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.3461577892303467, |
|
"rewards/margins": 2.7582011222839355, |
|
"rewards/rejected": -6.104359149932861, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8374899932261839, |
|
"grad_norm": 20.031137924167396, |
|
"learning_rate": 3.884421956938377e-08, |
|
"logits/chosen": -0.3795103430747986, |
|
"logits/rejected": -0.5693720579147339, |
|
"logps/chosen": -415.31976318359375, |
|
"logps/rejected": -812.3355712890625, |
|
"loss": 0.2851, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.1916797161102295, |
|
"rewards/margins": 3.95634126663208, |
|
"rewards/rejected": -7.1480207443237305, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8473428166759037, |
|
"grad_norm": 13.471904858040782, |
|
"learning_rate": 3.435996261412591e-08, |
|
"logits/chosen": -0.37054741382598877, |
|
"logits/rejected": -0.5658844113349915, |
|
"logps/chosen": -443.5189514160156, |
|
"logps/rejected": -789.6598510742188, |
|
"loss": 0.2174, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.431079864501953, |
|
"rewards/margins": 3.494525909423828, |
|
"rewards/rejected": -6.925606727600098, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8571956401256235, |
|
"grad_norm": 41.43328285681965, |
|
"learning_rate": 3.013156219837776e-08, |
|
"logits/chosen": -0.42507949471473694, |
|
"logits/rejected": -0.6270584464073181, |
|
"logps/chosen": -437.37841796875, |
|
"logps/rejected": -766.0833129882812, |
|
"loss": 0.2904, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.347297191619873, |
|
"rewards/margins": 3.312955856323242, |
|
"rewards/rejected": -6.660252571105957, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8670484635753433, |
|
"grad_norm": 14.214226148262764, |
|
"learning_rate": 2.6164035312078447e-08, |
|
"logits/chosen": -0.35758644342422485, |
|
"logits/rejected": -0.5952532291412354, |
|
"logps/chosen": -442.4962463378906, |
|
"logps/rejected": -813.2142944335938, |
|
"loss": 0.2391, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.3745460510253906, |
|
"rewards/margins": 3.7038657665252686, |
|
"rewards/rejected": -7.0784125328063965, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8769012870250631, |
|
"grad_norm": 12.722193098483684, |
|
"learning_rate": 2.2462089419165776e-08, |
|
"logits/chosen": -0.3699408173561096, |
|
"logits/rejected": -0.6010391116142273, |
|
"logps/chosen": -450.82745361328125, |
|
"logps/rejected": -841.6207885742188, |
|
"loss": 0.2671, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.4586853981018066, |
|
"rewards/margins": 3.931617259979248, |
|
"rewards/rejected": -7.3903021812438965, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8867541104747829, |
|
"grad_norm": 34.89251514032782, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"logits/chosen": -0.42120829224586487, |
|
"logits/rejected": -0.5930547118186951, |
|
"logps/chosen": -482.32049560546875, |
|
"logps/rejected": -877.3797607421875, |
|
"loss": 0.2528, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.762233018875122, |
|
"rewards/margins": 3.9991488456726074, |
|
"rewards/rejected": -7.761382102966309, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8966069339245027, |
|
"grad_norm": 11.63474736568058, |
|
"learning_rate": 1.5872189700736337e-08, |
|
"logits/chosen": -0.3359231948852539, |
|
"logits/rejected": -0.580034613609314, |
|
"logps/chosen": -476.1517028808594, |
|
"logps/rejected": -840.4141845703125, |
|
"loss": 0.2568, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.7410130500793457, |
|
"rewards/margins": 3.678678512573242, |
|
"rewards/rejected": -7.419691562652588, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9064597573742226, |
|
"grad_norm": 17.200657346282192, |
|
"learning_rate": 1.2992054780085692e-08, |
|
"logits/chosen": -0.42435139417648315, |
|
"logits/rejected": -0.5707032084465027, |
|
"logps/chosen": -480.90673828125, |
|
"logps/rejected": -864.6921997070312, |
|
"loss": 0.2681, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.753577470779419, |
|
"rewards/margins": 3.8412506580352783, |
|
"rewards/rejected": -7.594828128814697, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9163125808239424, |
|
"grad_norm": 15.168319053661703, |
|
"learning_rate": 1.0393129385436823e-08, |
|
"logits/chosen": -0.41962409019470215, |
|
"logits/rejected": -0.5582268238067627, |
|
"logps/chosen": -480.22650146484375, |
|
"logps/rejected": -835.0930786132812, |
|
"loss": 0.2639, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.7359073162078857, |
|
"rewards/margins": 3.5815021991729736, |
|
"rewards/rejected": -7.317408561706543, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9261654042736621, |
|
"grad_norm": 14.145988270109239, |
|
"learning_rate": 8.078497137373242e-09, |
|
"logits/chosen": -0.3046739399433136, |
|
"logits/rejected": -0.6777099967002869, |
|
"logps/chosen": -452.22406005859375, |
|
"logps/rejected": -792.261474609375, |
|
"loss": 0.2466, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.451488494873047, |
|
"rewards/margins": 3.4896602630615234, |
|
"rewards/rejected": -6.941148281097412, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.936018227723382, |
|
"grad_norm": 17.26395488122693, |
|
"learning_rate": 6.0509043431410945e-09, |
|
"logits/chosen": -0.47448891401290894, |
|
"logits/rejected": -0.5780371427536011, |
|
"logps/chosen": -473.33148193359375, |
|
"logps/rejected": -845.0810546875, |
|
"loss": 0.2735, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.652672290802002, |
|
"rewards/margins": 3.7301712036132812, |
|
"rewards/rejected": -7.382843971252441, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9458710511731018, |
|
"grad_norm": 13.502447902766075, |
|
"learning_rate": 4.312756738160145e-09, |
|
"logits/chosen": -0.4307557940483093, |
|
"logits/rejected": -0.609841525554657, |
|
"logps/chosen": -455.236572265625, |
|
"logps/rejected": -856.6760864257812, |
|
"loss": 0.2483, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.540134906768799, |
|
"rewards/margins": 3.9815449714660645, |
|
"rewards/rejected": -7.521679878234863, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9557238746228216, |
|
"grad_norm": 14.00663655974014, |
|
"learning_rate": 2.8661166316229223e-09, |
|
"logits/chosen": -0.34750238060951233, |
|
"logits/rejected": -0.5667217969894409, |
|
"logps/chosen": -460.013671875, |
|
"logps/rejected": -754.5597534179688, |
|
"loss": 0.2729, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.5511062145233154, |
|
"rewards/margins": 2.9699506759643555, |
|
"rewards/rejected": -6.52105712890625, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9655766980725414, |
|
"grad_norm": 14.399934375372629, |
|
"learning_rate": 1.7127004595681727e-09, |
|
"logits/chosen": -0.37273770570755005, |
|
"logits/rejected": -0.6115278005599976, |
|
"logps/chosen": -470.6495056152344, |
|
"logps/rejected": -818.9450073242188, |
|
"loss": 0.2724, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.666602611541748, |
|
"rewards/margins": 3.5529751777648926, |
|
"rewards/rejected": -7.219576835632324, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9754295215222613, |
|
"grad_norm": 13.760376876126982, |
|
"learning_rate": 8.538767483325383e-10, |
|
"logits/chosen": -0.3895708918571472, |
|
"logits/rejected": -0.6080259084701538, |
|
"logps/chosen": -463.2498474121094, |
|
"logps/rejected": -805.6424560546875, |
|
"loss": 0.2489, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.570629119873047, |
|
"rewards/margins": 3.4926178455352783, |
|
"rewards/rejected": -7.063246726989746, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9852823449719811, |
|
"grad_norm": 23.653347787632313, |
|
"learning_rate": 2.9066449079634404e-10, |
|
"logits/chosen": -0.37257179617881775, |
|
"logits/rejected": -0.5408270955085754, |
|
"logps/chosen": -501.0938415527344, |
|
"logps/rejected": -795.1988525390625, |
|
"loss": 0.2689, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.9710209369659424, |
|
"rewards/margins": 2.9601473808288574, |
|
"rewards/rejected": -6.931168556213379, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9951351684217008, |
|
"grad_norm": 14.616427588956885, |
|
"learning_rate": 2.3731937350224273e-11, |
|
"logits/chosen": -0.4466114044189453, |
|
"logits/rejected": -0.6212998032569885, |
|
"logps/chosen": -427.31536865234375, |
|
"logps/rejected": -899.2130126953125, |
|
"loss": 0.268, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.2273106575012207, |
|
"rewards/margins": 4.719491481781006, |
|
"rewards/rejected": -7.946801662445068, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9990762978015888, |
|
"step": 507, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3691282767280789, |
|
"train_runtime": 28319.5283, |
|
"train_samples_per_second": 2.294, |
|
"train_steps_per_second": 0.018 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 507, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|