|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9808917197452229, |
|
"eval_steps": 500, |
|
"global_step": 132, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 341.7267630440848, |
|
"epoch": 0.0074309978768577496, |
|
"grad_norm": 0.08086328746974374, |
|
"learning_rate": 0.0, |
|
"loss": -0.0006, |
|
"num_tokens": 3159359.0, |
|
"reward": 0.5170068130606696, |
|
"reward_std": 0.29308582487560453, |
|
"rewards/acc_reward_func": 0.5170068102223533, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"epoch": 0.014861995753715499, |
|
"grad_norm": 0.08087477809734007, |
|
"learning_rate": 7.142857142857142e-08, |
|
"loss": -0.0006, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001838355625901992, |
|
"epoch": 0.022292993630573247, |
|
"grad_norm": 0.07945717618543717, |
|
"learning_rate": 1.4285714285714285e-07, |
|
"loss": -0.0006, |
|
"step": 3 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001572738398127036, |
|
"epoch": 0.029723991507430998, |
|
"grad_norm": 0.0826048402223115, |
|
"learning_rate": 2.1428571428571426e-07, |
|
"loss": -0.0006, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 344.3208690824963, |
|
"epoch": 0.037154989384288746, |
|
"grad_norm": 0.07979158138671237, |
|
"learning_rate": 2.857142857142857e-07, |
|
"loss": -0.0028, |
|
"num_tokens": 6223398.0, |
|
"reward": 0.5000000081601597, |
|
"reward_std": 0.30536195990585147, |
|
"rewards/acc_reward_func": 0.5000000081601597, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001808156685867635, |
|
"epoch": 0.044585987261146494, |
|
"grad_norm": 0.08024143057101281, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"loss": -0.0028, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio": 0.00017867709487161067, |
|
"epoch": 0.05201698513800425, |
|
"grad_norm": 0.08005899196723062, |
|
"learning_rate": 4.285714285714285e-07, |
|
"loss": -0.0028, |
|
"step": 7 |
|
}, |
|
{ |
|
"clip_ratio": 0.00019149975127047148, |
|
"epoch": 0.059447983014861996, |
|
"grad_norm": 0.0796285692450267, |
|
"learning_rate": 5e-07, |
|
"loss": -0.0028, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 336.19728597005206, |
|
"epoch": 0.06687898089171974, |
|
"grad_norm": 0.0809301416262032, |
|
"learning_rate": 5.714285714285714e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 9136668.0, |
|
"reward": 0.49546485855465844, |
|
"reward_std": 0.3260552350963865, |
|
"rewards/acc_reward_func": 0.49546485855465844, |
|
"step": 9 |
|
}, |
|
{ |
|
"clip_ratio": 0.00021897819568125887, |
|
"epoch": 0.07430997876857749, |
|
"grad_norm": 0.08109016776097855, |
|
"learning_rate": 6.428571428571429e-07, |
|
"loss": 0.0007, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.00020755232045693056, |
|
"epoch": 0.08174097664543524, |
|
"grad_norm": 0.08087779783507208, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.0007, |
|
"step": 11 |
|
}, |
|
{ |
|
"clip_ratio": 0.00024463120367034294, |
|
"epoch": 0.08917197452229299, |
|
"grad_norm": 0.08067403590429052, |
|
"learning_rate": 7.857142857142856e-07, |
|
"loss": 0.0005, |
|
"step": 12 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 339.7154235839844, |
|
"epoch": 0.09660297239915075, |
|
"grad_norm": 0.0793808960257417, |
|
"learning_rate": 8.57142857142857e-07, |
|
"loss": -0.0056, |
|
"num_tokens": 12045721.0, |
|
"reward": 0.5907029594693866, |
|
"reward_std": 0.3145202554407574, |
|
"rewards/acc_reward_func": 0.5907029566310701, |
|
"step": 13 |
|
}, |
|
{ |
|
"clip_ratio": 0.00019346000789526095, |
|
"epoch": 0.1040339702760085, |
|
"grad_norm": 0.07996587273324629, |
|
"learning_rate": 9.285714285714285e-07, |
|
"loss": -0.0056, |
|
"step": 14 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002197972238978504, |
|
"epoch": 0.11146496815286625, |
|
"grad_norm": 0.07844004513920859, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0057, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002786624475750917, |
|
"epoch": 0.11889596602972399, |
|
"grad_norm": 0.07872371102981156, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0058, |
|
"step": 16 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 343.09864443824404, |
|
"epoch": 0.12632696390658174, |
|
"grad_norm": 0.06834766428561097, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0004, |
|
"num_tokens": 15049348.0, |
|
"reward": 0.5997732579708099, |
|
"reward_std": 0.2505793724031675, |
|
"rewards/acc_reward_func": 0.5997732522941771, |
|
"step": 17 |
|
}, |
|
{ |
|
"clip_ratio": 0.00017599576330255893, |
|
"epoch": 0.1337579617834395, |
|
"grad_norm": 0.06874704664521386, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0004, |
|
"step": 18 |
|
}, |
|
{ |
|
"clip_ratio": 0.0003469426301307976, |
|
"epoch": 0.14118895966029724, |
|
"grad_norm": 0.06727013413601377, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0002, |
|
"step": 19 |
|
}, |
|
{ |
|
"clip_ratio": 0.000489247930956827, |
|
"epoch": 0.14861995753715498, |
|
"grad_norm": 0.06670999385159201, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0001, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 350.2063555036272, |
|
"epoch": 0.15605095541401273, |
|
"grad_norm": 0.06650745108644616, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0001, |
|
"num_tokens": 18191946.0, |
|
"reward": 0.6530612352348509, |
|
"reward_std": 0.24400150775909424, |
|
"rewards/acc_reward_func": 0.6530612295582181, |
|
"step": 21 |
|
}, |
|
{ |
|
"clip_ratio": 0.00013373352599696124, |
|
"epoch": 0.16348195329087048, |
|
"grad_norm": 0.06586079666953687, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0001, |
|
"step": 22 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001875036643815249, |
|
"epoch": 0.17091295116772823, |
|
"grad_norm": 0.06545454216834161, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0002, |
|
"step": 23 |
|
}, |
|
{ |
|
"clip_ratio": 0.0003731138775557546, |
|
"epoch": 0.17834394904458598, |
|
"grad_norm": 0.06428724973892952, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0003, |
|
"step": 24 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 352.0895749046689, |
|
"epoch": 0.18577494692144372, |
|
"grad_norm": 0.06446210353107223, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0037, |
|
"num_tokens": 21159481.0, |
|
"reward": 0.5736961549236661, |
|
"reward_std": 0.2319913577465784, |
|
"rewards/acc_reward_func": 0.5736961492470333, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001779778396580479, |
|
"epoch": 0.1932059447983015, |
|
"grad_norm": 0.06458451199765729, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0038, |
|
"step": 26 |
|
}, |
|
{ |
|
"clip_ratio": 0.00031768538021770796, |
|
"epoch": 0.20063694267515925, |
|
"grad_norm": 0.06403775486903479, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0039, |
|
"step": 27 |
|
}, |
|
{ |
|
"clip_ratio": 0.0004978579201547074, |
|
"epoch": 0.208067940552017, |
|
"grad_norm": 0.06384970547978061, |
|
"learning_rate": 1e-06, |
|
"loss": -0.004, |
|
"step": 28 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 353.5793733142671, |
|
"epoch": 0.21549893842887474, |
|
"grad_norm": 0.07091531570599886, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0007, |
|
"num_tokens": 24284078.0, |
|
"reward": 0.5782313063031151, |
|
"reward_std": 0.27138930842989967, |
|
"rewards/acc_reward_func": 0.5782313006264823, |
|
"step": 29 |
|
}, |
|
{ |
|
"clip_ratio": 0.00023922019664453166, |
|
"epoch": 0.2229299363057325, |
|
"grad_norm": 0.07207676160001003, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0008, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.00028778909801899103, |
|
"epoch": 0.23036093418259024, |
|
"grad_norm": 0.08311384778120834, |
|
"learning_rate": 1e-06, |
|
"loss": -0.001, |
|
"step": 31 |
|
}, |
|
{ |
|
"clip_ratio": 0.0005288766468376187, |
|
"epoch": 0.23779193205944799, |
|
"grad_norm": 0.07065550775183596, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0012, |
|
"step": 32 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 374.4387802850632, |
|
"epoch": 0.24522292993630573, |
|
"grad_norm": 0.0689708415519406, |
|
"learning_rate": 1e-06, |
|
"loss": 0.003, |
|
"num_tokens": 27446489.0, |
|
"reward": 0.585034022728602, |
|
"reward_std": 0.2801268689689182, |
|
"rewards/acc_reward_func": 0.5850340198902857, |
|
"step": 33 |
|
}, |
|
{ |
|
"clip_ratio": 0.00021597234375630726, |
|
"epoch": 0.2526539278131635, |
|
"grad_norm": 0.06897131457392458, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0029, |
|
"step": 34 |
|
}, |
|
{ |
|
"clip_ratio": 0.0003070946047609184, |
|
"epoch": 0.26008492569002123, |
|
"grad_norm": 0.0674766183603089, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0028, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0005742841105010095, |
|
"epoch": 0.267515923566879, |
|
"grad_norm": 0.06745873238088902, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0026, |
|
"step": 36 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 379.7687145414807, |
|
"epoch": 0.2749469214437367, |
|
"grad_norm": 0.061592227582679786, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0005, |
|
"num_tokens": 30450929.0, |
|
"reward": 0.596371889823959, |
|
"reward_std": 0.2418635892016547, |
|
"rewards/acc_reward_func": 0.5963718841473261, |
|
"step": 37 |
|
}, |
|
{ |
|
"clip_ratio": 0.00013849885224425678, |
|
"epoch": 0.2823779193205945, |
|
"grad_norm": 0.06152688747932998, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0004, |
|
"step": 38 |
|
}, |
|
{ |
|
"clip_ratio": 0.00022306023882785148, |
|
"epoch": 0.2898089171974522, |
|
"grad_norm": 0.06152633134972845, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0003, |
|
"step": 39 |
|
}, |
|
{ |
|
"clip_ratio": 0.00031142526720595056, |
|
"epoch": 0.29723991507430997, |
|
"grad_norm": 0.0626993089929395, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0001, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 390.8605521065848, |
|
"epoch": 0.3046709129511677, |
|
"grad_norm": 0.0629758366767815, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0036, |
|
"num_tokens": 33596816.0, |
|
"reward": 0.636054433527447, |
|
"reward_std": 0.25603189283893224, |
|
"rewards/acc_reward_func": 0.6360544250124976, |
|
"step": 41 |
|
}, |
|
{ |
|
"clip_ratio": 0.00010988019805933748, |
|
"epoch": 0.31210191082802546, |
|
"grad_norm": 0.06378608109398301, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0035, |
|
"step": 42 |
|
}, |
|
{ |
|
"clip_ratio": 0.00025688905103985843, |
|
"epoch": 0.3195329087048832, |
|
"grad_norm": 0.06325047212115749, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0034, |
|
"step": 43 |
|
}, |
|
{ |
|
"clip_ratio": 0.00039352324078901715, |
|
"epoch": 0.32696390658174096, |
|
"grad_norm": 0.06227219214519032, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0032, |
|
"step": 44 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 380.9455857049851, |
|
"epoch": 0.3343949044585987, |
|
"grad_norm": 0.06916759874959395, |
|
"learning_rate": 1e-06, |
|
"loss": 0.005, |
|
"num_tokens": 36816368.0, |
|
"reward": 0.5861678095090956, |
|
"reward_std": 0.2711287704961641, |
|
"rewards/acc_reward_func": 0.5861678066707793, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.00013988154855074493, |
|
"epoch": 0.34182590233545646, |
|
"grad_norm": 0.06865178228849304, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0049, |
|
"step": 46 |
|
}, |
|
{ |
|
"clip_ratio": 0.00035448711389021595, |
|
"epoch": 0.3492569002123142, |
|
"grad_norm": 0.0674984372138465, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0047, |
|
"step": 47 |
|
}, |
|
{ |
|
"clip_ratio": 0.000534125243402308, |
|
"epoch": 0.35668789808917195, |
|
"grad_norm": 0.06713323621764225, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0045, |
|
"step": 48 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 366.5612284342448, |
|
"epoch": 0.3641188959660297, |
|
"grad_norm": 0.0628565554992099, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0007, |
|
"num_tokens": 39788825.0, |
|
"reward": 0.6405895806494213, |
|
"reward_std": 0.23035428832684243, |
|
"rewards/acc_reward_func": 0.6405895692961556, |
|
"step": 49 |
|
}, |
|
{ |
|
"clip_ratio": 0.00017204703319640387, |
|
"epoch": 0.37154989384288745, |
|
"grad_norm": 0.060413657444765324, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0008, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.00028196911201424274, |
|
"epoch": 0.37898089171974525, |
|
"grad_norm": 0.060029825820231635, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0009, |
|
"step": 51 |
|
}, |
|
{ |
|
"clip_ratio": 0.0005388078487677765, |
|
"epoch": 0.386411889596603, |
|
"grad_norm": 0.05859097094352694, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0011, |
|
"step": 52 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 370.1700730096726, |
|
"epoch": 0.39384288747346075, |
|
"grad_norm": 0.062489857926374105, |
|
"learning_rate": 1e-06, |
|
"loss": -0.002, |
|
"num_tokens": 42977507.0, |
|
"reward": 0.6337868599664598, |
|
"reward_std": 0.2415066155649367, |
|
"rewards/acc_reward_func": 0.6337868571281433, |
|
"step": 53 |
|
}, |
|
{ |
|
"clip_ratio": 0.00015676757123271403, |
|
"epoch": 0.4012738853503185, |
|
"grad_norm": 0.062323008512970825, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0021, |
|
"step": 54 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002369500718833435, |
|
"epoch": 0.40870488322717624, |
|
"grad_norm": 0.06229025216025021, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0023, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.00043342224342354926, |
|
"epoch": 0.416135881104034, |
|
"grad_norm": 0.061363769616563854, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0025, |
|
"step": 56 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 377.649664015997, |
|
"epoch": 0.42356687898089174, |
|
"grad_norm": 0.060693382900306965, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0069, |
|
"num_tokens": 45992960.0, |
|
"reward": 0.6213152082193465, |
|
"reward_std": 0.19496617785521916, |
|
"rewards/acc_reward_func": 0.6213151997043973, |
|
"step": 57 |
|
}, |
|
{ |
|
"clip_ratio": 0.00011746683992983197, |
|
"epoch": 0.4309978768577495, |
|
"grad_norm": 0.057906127015709595, |
|
"learning_rate": 1e-06, |
|
"loss": -0.007, |
|
"step": 58 |
|
}, |
|
{ |
|
"clip_ratio": 0.00015394135230703147, |
|
"epoch": 0.43842887473460723, |
|
"grad_norm": 0.05770813992767556, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0071, |
|
"step": 59 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002990371741318432, |
|
"epoch": 0.445859872611465, |
|
"grad_norm": 0.057169748017192656, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0073, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 385.07256789434524, |
|
"epoch": 0.45329087048832273, |
|
"grad_norm": 0.06503664456534936, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0007, |
|
"num_tokens": 48962556.0, |
|
"reward": 0.6394557903210322, |
|
"reward_std": 0.2468004703876518, |
|
"rewards/acc_reward_func": 0.6394557846443993, |
|
"step": 61 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001142212568083778, |
|
"epoch": 0.4607218683651805, |
|
"grad_norm": 0.07260193604541537, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0006, |
|
"step": 62 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002252710536205476, |
|
"epoch": 0.4681528662420382, |
|
"grad_norm": 0.06540521622742039, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0004, |
|
"step": 63 |
|
}, |
|
{ |
|
"clip_ratio": 0.00048679712857674096, |
|
"epoch": 0.47558386411889597, |
|
"grad_norm": 0.06392595201565687, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0001, |
|
"step": 64 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 369.952386765253, |
|
"epoch": 0.4830148619957537, |
|
"grad_norm": 0.0631263236426717, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0013, |
|
"num_tokens": 52081410.0, |
|
"reward": 0.5975056723469779, |
|
"reward_std": 0.24312191580732664, |
|
"rewards/acc_reward_func": 0.5975056723469779, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001545405653554813, |
|
"epoch": 0.49044585987261147, |
|
"grad_norm": 0.06318597960704911, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0012, |
|
"step": 66 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002261997837021703, |
|
"epoch": 0.4978768577494692, |
|
"grad_norm": 0.0629106951108031, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0011, |
|
"step": 67 |
|
}, |
|
{ |
|
"clip_ratio": 0.00041743737771563855, |
|
"epoch": 0.505307855626327, |
|
"grad_norm": 0.0632657220715682, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0008, |
|
"step": 68 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 364.53175281343005, |
|
"epoch": 0.5127388535031847, |
|
"grad_norm": 0.06136012395478247, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0006, |
|
"num_tokens": 55109647.0, |
|
"reward": 0.6020408258551643, |
|
"reward_std": 0.2137400745635941, |
|
"rewards/acc_reward_func": 0.6020408201785314, |
|
"step": 69 |
|
}, |
|
{ |
|
"clip_ratio": 0.00010235635393958849, |
|
"epoch": 0.5201698513800425, |
|
"grad_norm": 0.06243874921426781, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0007, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.00016079150147907924, |
|
"epoch": 0.5276008492569002, |
|
"grad_norm": 0.06340802042111414, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0008, |
|
"step": 71 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002864431884344889, |
|
"epoch": 0.535031847133758, |
|
"grad_norm": 0.060899777207080064, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0011, |
|
"step": 72 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 372.2551051548549, |
|
"epoch": 0.5424628450106157, |
|
"grad_norm": 0.06521913849953302, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0014, |
|
"num_tokens": 58184698.0, |
|
"reward": 0.5793650916644505, |
|
"reward_std": 0.23960900750188602, |
|
"rewards/acc_reward_func": 0.5793650888261341, |
|
"step": 73 |
|
}, |
|
{ |
|
"clip_ratio": 0.00015685576034316217, |
|
"epoch": 0.5498938428874734, |
|
"grad_norm": 0.0650407164221492, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0013, |
|
"step": 74 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002204552958054202, |
|
"epoch": 0.5573248407643312, |
|
"grad_norm": 0.06495775379721254, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0011, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0004874699469447868, |
|
"epoch": 0.564755838641189, |
|
"grad_norm": 0.06475707873807389, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0009, |
|
"step": 76 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 383.8061305454799, |
|
"epoch": 0.5721868365180467, |
|
"grad_norm": 0.06331746668722547, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0031, |
|
"num_tokens": 61126891.0, |
|
"reward": 0.6473922999132247, |
|
"reward_std": 0.2356209299039273, |
|
"rewards/acc_reward_func": 0.6473922942365918, |
|
"step": 77 |
|
}, |
|
{ |
|
"clip_ratio": 0.00015557293539002006, |
|
"epoch": 0.5796178343949044, |
|
"grad_norm": 0.06358059963704424, |
|
"learning_rate": 1e-06, |
|
"loss": 0.003, |
|
"step": 78 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002651862686477779, |
|
"epoch": 0.5870488322717622, |
|
"grad_norm": 0.06242581352023444, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0028, |
|
"step": 79 |
|
}, |
|
{ |
|
"clip_ratio": 0.0004893070893428687, |
|
"epoch": 0.5944798301486199, |
|
"grad_norm": 0.0609064485591393, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0026, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 350.4886692592076, |
|
"epoch": 0.6019108280254777, |
|
"grad_norm": 0.06314986431723936, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0029, |
|
"num_tokens": 64278222.0, |
|
"reward": 0.6802721172571182, |
|
"reward_std": 0.22581943603498594, |
|
"rewards/acc_reward_func": 0.6802721059038526, |
|
"step": 81 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001433593038416889, |
|
"epoch": 0.6093418259023354, |
|
"grad_norm": 0.06290949597812766, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0028, |
|
"step": 82 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002783082295320041, |
|
"epoch": 0.6167728237791932, |
|
"grad_norm": 0.06287850239253823, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0026, |
|
"step": 83 |
|
}, |
|
{ |
|
"clip_ratio": 0.0005158363062600117, |
|
"epoch": 0.6242038216560509, |
|
"grad_norm": 0.06275939403952029, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0024, |
|
"step": 84 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 359.168942406064, |
|
"epoch": 0.6316348195329087, |
|
"grad_norm": 0.07881948540281701, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0073, |
|
"num_tokens": 67289105.0, |
|
"reward": 0.6598639573369708, |
|
"reward_std": 0.24390507791013943, |
|
"rewards/acc_reward_func": 0.659863951660338, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.00013236766764228896, |
|
"epoch": 0.6390658174097664, |
|
"grad_norm": 0.0653084592600677, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0074, |
|
"step": 86 |
|
}, |
|
{ |
|
"clip_ratio": 0.00017881063554557928, |
|
"epoch": 0.6464968152866242, |
|
"grad_norm": 0.06524851177812671, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0077, |
|
"step": 87 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002778974991261272, |
|
"epoch": 0.6539278131634819, |
|
"grad_norm": 0.06451115804588689, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0079, |
|
"step": 88 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 357.64512852260043, |
|
"epoch": 0.6613588110403397, |
|
"grad_norm": 0.06630152283194597, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"num_tokens": 70432648.0, |
|
"reward": 0.6712018279802232, |
|
"reward_std": 0.24537649963583266, |
|
"rewards/acc_reward_func": 0.6712018194652739, |
|
"step": 89 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001349434973062238, |
|
"epoch": 0.6687898089171974, |
|
"grad_norm": 0.06559670392653466, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0001, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002572811865233927, |
|
"epoch": 0.6762208067940552, |
|
"grad_norm": 0.06520494024347283, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0004, |
|
"step": 91 |
|
}, |
|
{ |
|
"clip_ratio": 0.0007034737652810734, |
|
"epoch": 0.6836518046709129, |
|
"grad_norm": 0.06315058857400183, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0006, |
|
"step": 92 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 353.4535217285156, |
|
"epoch": 0.6910828025477707, |
|
"grad_norm": 0.05958332825443139, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0038, |
|
"num_tokens": 73419374.0, |
|
"reward": 0.6451247235139211, |
|
"reward_std": 0.20120730180115926, |
|
"rewards/acc_reward_func": 0.6451247235139211, |
|
"step": 93 |
|
}, |
|
{ |
|
"clip_ratio": 0.00012057262124255344, |
|
"epoch": 0.6985138004246284, |
|
"grad_norm": 0.060331181041075654, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0037, |
|
"step": 94 |
|
}, |
|
{ |
|
"clip_ratio": 0.00019830298922551308, |
|
"epoch": 0.7059447983014862, |
|
"grad_norm": 0.059953496435375425, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0035, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio": 0.0004421481661709203, |
|
"epoch": 0.7133757961783439, |
|
"grad_norm": 0.059328760204792295, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0032, |
|
"step": 96 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 351.6281244187128, |
|
"epoch": 0.7208067940552016, |
|
"grad_norm": 0.06476127865871668, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0009, |
|
"num_tokens": 76658560.0, |
|
"reward": 0.6360544292699724, |
|
"reward_std": 0.21411728167108127, |
|
"rewards/acc_reward_func": 0.6360544264316559, |
|
"step": 97 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001179320069717332, |
|
"epoch": 0.7282377919320594, |
|
"grad_norm": 0.06488361533451398, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0008, |
|
"step": 98 |
|
}, |
|
{ |
|
"clip_ratio": 0.00018475095047116547, |
|
"epoch": 0.7356687898089171, |
|
"grad_norm": 0.06472419917345307, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0006, |
|
"step": 99 |
|
}, |
|
{ |
|
"clip_ratio": 0.000287527184853042, |
|
"epoch": 0.7430997876857749, |
|
"grad_norm": 0.0639335874652328, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0003, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 345.3061276390439, |
|
"epoch": 0.7505307855626328, |
|
"grad_norm": 0.06715329563723746, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0025, |
|
"num_tokens": 79817980.0, |
|
"reward": 0.624716560045878, |
|
"reward_std": 0.24032448941753023, |
|
"rewards/acc_reward_func": 0.6247165515309289, |
|
"step": 101 |
|
}, |
|
{ |
|
"clip_ratio": 0.00013254733875371692, |
|
"epoch": 0.7579617834394905, |
|
"grad_norm": 0.06714369960521104, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0026, |
|
"step": 102 |
|
}, |
|
{ |
|
"clip_ratio": 0.00020292648419161283, |
|
"epoch": 0.7653927813163482, |
|
"grad_norm": 0.06738540272745804, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0029, |
|
"step": 103 |
|
}, |
|
{ |
|
"clip_ratio": 0.00046159424389424243, |
|
"epoch": 0.772823779193206, |
|
"grad_norm": 0.06683308540913054, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0032, |
|
"step": 104 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 344.98299589611236, |
|
"epoch": 0.7802547770700637, |
|
"grad_norm": 0.06213033347416386, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0061, |
|
"num_tokens": 82797505.0, |
|
"reward": 0.7063492139180502, |
|
"reward_std": 0.18706076290635837, |
|
"rewards/acc_reward_func": 0.7063492082414173, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 9.041415337595113e-05, |
|
"epoch": 0.7876857749469215, |
|
"grad_norm": 0.06177639834948758, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0061, |
|
"step": 106 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001607231185646794, |
|
"epoch": 0.7951167728237792, |
|
"grad_norm": 0.06109816753456417, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0059, |
|
"step": 107 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002942822845527969, |
|
"epoch": 0.802547770700637, |
|
"grad_norm": 0.05990890192520818, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0056, |
|
"step": 108 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 331.62925792875745, |
|
"epoch": 0.8099787685774947, |
|
"grad_norm": 0.0705484973603166, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0006, |
|
"num_tokens": 85780678.0, |
|
"reward": 0.6575963837759835, |
|
"reward_std": 0.22970955535059884, |
|
"rewards/acc_reward_func": 0.6575963809376671, |
|
"step": 109 |
|
}, |
|
{ |
|
"clip_ratio": 0.00013627276140531258, |
|
"epoch": 0.8174097664543525, |
|
"grad_norm": 0.07009285424608136, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0007, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002046585505013354, |
|
"epoch": 0.8248407643312102, |
|
"grad_norm": 0.0700883025067068, |
|
"learning_rate": 1e-06, |
|
"loss": -0.001, |
|
"step": 111 |
|
}, |
|
{ |
|
"clip_ratio": 0.0005059108720853969, |
|
"epoch": 0.832271762208068, |
|
"grad_norm": 0.06888801243376852, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0013, |
|
"step": 112 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 322.01588076636904, |
|
"epoch": 0.8397027600849257, |
|
"grad_norm": 0.06536166070879086, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0023, |
|
"num_tokens": 88940424.0, |
|
"reward": 0.742630402247111, |
|
"reward_std": 0.18359530522000223, |
|
"rewards/acc_reward_func": 0.7426303908938453, |
|
"step": 113 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001239214994018853, |
|
"epoch": 0.8471337579617835, |
|
"grad_norm": 0.06527450868376303, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0022, |
|
"step": 114 |
|
}, |
|
{ |
|
"clip_ratio": 0.00020105073227092536, |
|
"epoch": 0.8545647558386412, |
|
"grad_norm": 0.06460779643206258, |
|
"learning_rate": 1e-06, |
|
"loss": 0.002, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.00045002524607947895, |
|
"epoch": 0.861995753715499, |
|
"grad_norm": 0.06392727815491918, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0017, |
|
"step": 116 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 311.251706077939, |
|
"epoch": 0.8694267515923567, |
|
"grad_norm": 0.07018788315451174, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0027, |
|
"num_tokens": 91757766.0, |
|
"reward": 0.6519274456160409, |
|
"reward_std": 0.20784893667414076, |
|
"rewards/acc_reward_func": 0.6519274342627752, |
|
"step": 117 |
|
}, |
|
{ |
|
"clip_ratio": 0.000157653278403727, |
|
"epoch": 0.8768577494692145, |
|
"grad_norm": 0.0690532669370979, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0028, |
|
"step": 118 |
|
}, |
|
{ |
|
"clip_ratio": 0.00033690567943267524, |
|
"epoch": 0.8842887473460722, |
|
"grad_norm": 0.06835107752563815, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0031, |
|
"step": 119 |
|
}, |
|
{ |
|
"clip_ratio": 0.000745917763283831, |
|
"epoch": 0.89171974522293, |
|
"grad_norm": 0.06660313306459224, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0034, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 324.4727943057106, |
|
"epoch": 0.8991507430997877, |
|
"grad_norm": 0.06970834157267367, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0038, |
|
"num_tokens": 94686801.0, |
|
"reward": 0.6337868613856179, |
|
"reward_std": 0.19493895627203442, |
|
"rewards/acc_reward_func": 0.6337868528706687, |
|
"step": 121 |
|
}, |
|
{ |
|
"clip_ratio": 0.00011362713467817576, |
|
"epoch": 0.9065817409766455, |
|
"grad_norm": 0.06999963278535759, |
|
"learning_rate": 1e-06, |
|
"loss": -0.004, |
|
"step": 122 |
|
}, |
|
{ |
|
"clip_ratio": 0.00021538332068649608, |
|
"epoch": 0.9140127388535032, |
|
"grad_norm": 0.06972968350918567, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0043, |
|
"step": 123 |
|
}, |
|
{ |
|
"clip_ratio": 0.00044560064478511255, |
|
"epoch": 0.921443736730361, |
|
"grad_norm": 0.06882950080744477, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0046, |
|
"step": 124 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 325.98413231259303, |
|
"epoch": 0.9288747346072187, |
|
"grad_norm": 0.07128773867618274, |
|
"learning_rate": 1e-06, |
|
"loss": 0.001, |
|
"num_tokens": 97858699.0, |
|
"reward": 0.6553288144724709, |
|
"reward_std": 0.20388960643183618, |
|
"rewards/acc_reward_func": 0.6553288059575217, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001051227392010679, |
|
"epoch": 0.9363057324840764, |
|
"grad_norm": 0.07130043132021105, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0009, |
|
"step": 126 |
|
}, |
|
{ |
|
"clip_ratio": 0.00021789341259309802, |
|
"epoch": 0.9437367303609342, |
|
"grad_norm": 0.08439067946992163, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0006, |
|
"step": 127 |
|
}, |
|
{ |
|
"clip_ratio": 0.00045677864164601835, |
|
"epoch": 0.9511677282377919, |
|
"grad_norm": 0.0729135580640395, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0002, |
|
"step": 128 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 330.68141392299106, |
|
"epoch": 0.9585987261146497, |
|
"grad_norm": 0.07242470586815654, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0012, |
|
"num_tokens": 100697360.0, |
|
"reward": 0.6746031840642294, |
|
"reward_std": 0.21919804066419601, |
|
"rewards/acc_reward_func": 0.6746031812259129, |
|
"step": 129 |
|
}, |
|
{ |
|
"clip_ratio": 0.00016543883005700385, |
|
"epoch": 0.9660297239915074, |
|
"grad_norm": 0.07263658018223751, |
|
"learning_rate": 1e-06, |
|
"loss": 0.001, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.00029576754535637085, |
|
"epoch": 0.9734607218683652, |
|
"grad_norm": 0.07191823196569669, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0007, |
|
"step": 131 |
|
}, |
|
{ |
|
"clip_ratio": 0.0006197429174790159, |
|
"epoch": 0.9808917197452229, |
|
"grad_norm": 0.07137316493917, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0004, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9808917197452229, |
|
"step": 132, |
|
"total_flos": 0.0, |
|
"train_loss": -0.00028816385895769406, |
|
"train_runtime": 58537.3897, |
|
"train_samples_per_second": 0.084, |
|
"train_steps_per_second": 0.002 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 134, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|