|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.002948765204570586, |
|
"eval_steps": 500, |
|
"global_step": 12, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 576.9375, |
|
"epoch": 0.0002457304337142155, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 12535.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/reward_func": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 879.0625, |
|
"epoch": 0.000491460867428431, |
|
"grad_norm": 0.11151622953359844, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0776, |
|
"num_tokens": 27624.0, |
|
"reward": 0.125, |
|
"reward_std": 0.2314550280570984, |
|
"rewards/reward_func": 0.125, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 923.75, |
|
"epoch": 0.0007371913011426465, |
|
"grad_norm": 1.7342617962764825e-06, |
|
"kl": 0.0003814697265625, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 43892.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/reward_func": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 682.9375, |
|
"epoch": 0.000982921734856862, |
|
"grad_norm": 0.1605198642986145, |
|
"kl": 0.00042629241943359375, |
|
"learning_rate": 5e-07, |
|
"loss": -0.1736, |
|
"num_tokens": 56019.0, |
|
"reward": 0.1875, |
|
"reward_std": 0.408231720328331, |
|
"rewards/reward_func": 0.1875, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 729.5, |
|
"epoch": 0.0012286521685710775, |
|
"grad_norm": 0.15191784640809727, |
|
"kl": 0.00035190582275390625, |
|
"learning_rate": 5e-07, |
|
"loss": 0.1212, |
|
"num_tokens": 69587.0, |
|
"reward": 0.0625, |
|
"reward_std": 0.1767766922712326, |
|
"rewards/reward_func": 0.0625, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 680.3125, |
|
"epoch": 0.001474382602285293, |
|
"grad_norm": 0.00010216089929885942, |
|
"kl": 0.0011739730834960938, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 82064.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/reward_func": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 723.1875, |
|
"epoch": 0.0017201130359995086, |
|
"grad_norm": 0.053916157377856225, |
|
"kl": 0.00029850006103515625, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0843, |
|
"num_tokens": 96387.0, |
|
"reward": 0.0625, |
|
"reward_std": 0.1767766922712326, |
|
"rewards/reward_func": 0.0625, |
|
"step": 7 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 646.1875, |
|
"epoch": 0.001965843469713724, |
|
"grad_norm": 0.16676206881832856, |
|
"kl": 0.0003566741943359375, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0941, |
|
"num_tokens": 108918.0, |
|
"reward": 0.25, |
|
"reward_std": 0.4355512708425522, |
|
"rewards/reward_func": 0.25, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 605.875, |
|
"epoch": 0.0022115739034279398, |
|
"grad_norm": 1.5988321121918276e-06, |
|
"kl": 0.0003833770751953125, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 120428.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/reward_func": 0.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 551.4375, |
|
"epoch": 0.002457304337142155, |
|
"grad_norm": 0.16315342243657902, |
|
"kl": 0.00034236907958984375, |
|
"learning_rate": 5e-07, |
|
"loss": -0.0335, |
|
"num_tokens": 130291.0, |
|
"reward": 0.0625, |
|
"reward_std": 0.1767766922712326, |
|
"rewards/reward_func": 0.0625, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 757.3125, |
|
"epoch": 0.0027030347708563705, |
|
"grad_norm": 1.5633163558139792e-06, |
|
"kl": 0.00029850006103515625, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 149048.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/reward_func": 0.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 766.375, |
|
"epoch": 0.002948765204570586, |
|
"grad_norm": 0.08173214068073498, |
|
"kl": 0.00022268295288085938, |
|
"learning_rate": 5e-07, |
|
"loss": 0.1401, |
|
"num_tokens": 162550.0, |
|
"reward": 0.125, |
|
"reward_std": 0.2314550280570984, |
|
"rewards/reward_func": 0.125, |
|
"step": 12 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 4069, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|