|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.517241379310345, |
|
"eval_steps": 16, |
|
"global_step": 160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.034482758620689655, |
|
"eval_loss": 11.93175220489502, |
|
"eval_runtime": 0.8699, |
|
"eval_samples_per_second": 173.589, |
|
"eval_steps_per_second": 6.898, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 0.04096066579222679, |
|
"learning_rate": 9.995728791936504e-05, |
|
"loss": 11.9309, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5517241379310345, |
|
"eval_loss": 11.927915573120117, |
|
"eval_runtime": 0.9134, |
|
"eval_samples_per_second": 165.318, |
|
"eval_steps_per_second": 6.569, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 0.10107108950614929, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 11.9282, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 0.04157465696334839, |
|
"learning_rate": 9.491954909459895e-05, |
|
"loss": 11.9236, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.103448275862069, |
|
"eval_loss": 11.921222686767578, |
|
"eval_runtime": 0.939, |
|
"eval_samples_per_second": 160.809, |
|
"eval_steps_per_second": 6.39, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 0.025179261341691017, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 11.9214, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.6551724137931034, |
|
"eval_loss": 11.91930103302002, |
|
"eval_runtime": 0.8997, |
|
"eval_samples_per_second": 167.834, |
|
"eval_steps_per_second": 6.669, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 0.024184564128518105, |
|
"learning_rate": 8.231496189304704e-05, |
|
"loss": 11.9204, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 0.029018325731158257, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 11.9198, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.206896551724138, |
|
"eval_loss": 11.917800903320312, |
|
"eval_runtime": 0.8209, |
|
"eval_samples_per_second": 183.936, |
|
"eval_steps_per_second": 7.309, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 0.025336025282740593, |
|
"learning_rate": 6.426681121245527e-05, |
|
"loss": 11.919, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 0.027750149369239807, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 11.9188, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"eval_loss": 11.916128158569336, |
|
"eval_runtime": 0.8846, |
|
"eval_samples_per_second": 170.691, |
|
"eval_steps_per_second": 6.782, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.103448275862069, |
|
"grad_norm": 0.017773684114217758, |
|
"learning_rate": 4.381536843653262e-05, |
|
"loss": 11.9181, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.310344827586207, |
|
"eval_loss": 11.915090560913086, |
|
"eval_runtime": 0.9023, |
|
"eval_samples_per_second": 167.348, |
|
"eval_steps_per_second": 6.65, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.4482758620689653, |
|
"grad_norm": 0.02540409192442894, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 11.9176, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.793103448275862, |
|
"grad_norm": 0.022573214024305344, |
|
"learning_rate": 2.4405747545519963e-05, |
|
"loss": 11.9175, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.862068965517241, |
|
"eval_loss": 11.91474723815918, |
|
"eval_runtime": 0.8995, |
|
"eval_samples_per_second": 167.874, |
|
"eval_steps_per_second": 6.67, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 0.02882819063961506, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 11.9174, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.413793103448276, |
|
"eval_loss": 11.914271354675293, |
|
"eval_runtime": 0.8796, |
|
"eval_samples_per_second": 171.676, |
|
"eval_steps_per_second": 6.822, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 4.482758620689655, |
|
"grad_norm": 0.025010688230395317, |
|
"learning_rate": 9.307564136490254e-06, |
|
"loss": 11.9173, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.827586206896552, |
|
"grad_norm": 0.025940613821148872, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 11.917, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.9655172413793105, |
|
"eval_loss": 11.914034843444824, |
|
"eval_runtime": 0.8301, |
|
"eval_samples_per_second": 181.898, |
|
"eval_steps_per_second": 7.228, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 5.172413793103448, |
|
"grad_norm": 0.020017573609948158, |
|
"learning_rate": 1.064157733632276e-06, |
|
"loss": 11.9172, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.517241379310345, |
|
"grad_norm": 0.02289649285376072, |
|
"learning_rate": 0.0, |
|
"loss": 11.9169, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.517241379310345, |
|
"eval_loss": 11.914515495300293, |
|
"eval_runtime": 0.8607, |
|
"eval_samples_per_second": 175.434, |
|
"eval_steps_per_second": 6.971, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2590003691520.0, |
|
"train_batch_size": 7, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|