test-repo / last-checkpoint /trainer_state.json
SystemAdmin123's picture
Training in progress, step 160, checkpoint
e61e32d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.517241379310345,
"eval_steps": 16,
"global_step": 160,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.034482758620689655,
"eval_loss": 11.93175220489502,
"eval_runtime": 0.8699,
"eval_samples_per_second": 173.589,
"eval_steps_per_second": 6.898,
"step": 1
},
{
"epoch": 0.3448275862068966,
"grad_norm": 0.04096066579222679,
"learning_rate": 9.995728791936504e-05,
"loss": 11.9309,
"step": 10
},
{
"epoch": 0.5517241379310345,
"eval_loss": 11.927915573120117,
"eval_runtime": 0.9134,
"eval_samples_per_second": 165.318,
"eval_steps_per_second": 6.569,
"step": 16
},
{
"epoch": 0.6896551724137931,
"grad_norm": 0.10107108950614929,
"learning_rate": 9.847001329696653e-05,
"loss": 11.9282,
"step": 20
},
{
"epoch": 1.0344827586206897,
"grad_norm": 0.04157465696334839,
"learning_rate": 9.491954909459895e-05,
"loss": 11.9236,
"step": 30
},
{
"epoch": 1.103448275862069,
"eval_loss": 11.921222686767578,
"eval_runtime": 0.939,
"eval_samples_per_second": 160.809,
"eval_steps_per_second": 6.39,
"step": 32
},
{
"epoch": 1.3793103448275863,
"grad_norm": 0.025179261341691017,
"learning_rate": 8.945702546981969e-05,
"loss": 11.9214,
"step": 40
},
{
"epoch": 1.6551724137931034,
"eval_loss": 11.91930103302002,
"eval_runtime": 0.8997,
"eval_samples_per_second": 167.834,
"eval_steps_per_second": 6.669,
"step": 48
},
{
"epoch": 1.7241379310344827,
"grad_norm": 0.024184564128518105,
"learning_rate": 8.231496189304704e-05,
"loss": 11.9204,
"step": 50
},
{
"epoch": 2.0689655172413794,
"grad_norm": 0.029018325731158257,
"learning_rate": 7.379736965185368e-05,
"loss": 11.9198,
"step": 60
},
{
"epoch": 2.206896551724138,
"eval_loss": 11.917800903320312,
"eval_runtime": 0.8209,
"eval_samples_per_second": 183.936,
"eval_steps_per_second": 7.309,
"step": 64
},
{
"epoch": 2.413793103448276,
"grad_norm": 0.025336025282740593,
"learning_rate": 6.426681121245527e-05,
"loss": 11.919,
"step": 70
},
{
"epoch": 2.7586206896551726,
"grad_norm": 0.027750149369239807,
"learning_rate": 5.4128967273616625e-05,
"loss": 11.9188,
"step": 80
},
{
"epoch": 2.7586206896551726,
"eval_loss": 11.916128158569336,
"eval_runtime": 0.8846,
"eval_samples_per_second": 170.691,
"eval_steps_per_second": 6.782,
"step": 80
},
{
"epoch": 3.103448275862069,
"grad_norm": 0.017773684114217758,
"learning_rate": 4.381536843653262e-05,
"loss": 11.9181,
"step": 90
},
{
"epoch": 3.310344827586207,
"eval_loss": 11.915090560913086,
"eval_runtime": 0.9023,
"eval_samples_per_second": 167.348,
"eval_steps_per_second": 6.65,
"step": 96
},
{
"epoch": 3.4482758620689653,
"grad_norm": 0.02540409192442894,
"learning_rate": 3.3765026539765834e-05,
"loss": 11.9176,
"step": 100
},
{
"epoch": 3.793103448275862,
"grad_norm": 0.022573214024305344,
"learning_rate": 2.4405747545519963e-05,
"loss": 11.9175,
"step": 110
},
{
"epoch": 3.862068965517241,
"eval_loss": 11.91474723815918,
"eval_runtime": 0.8995,
"eval_samples_per_second": 167.874,
"eval_steps_per_second": 6.67,
"step": 112
},
{
"epoch": 4.137931034482759,
"grad_norm": 0.02882819063961506,
"learning_rate": 1.6135921418712956e-05,
"loss": 11.9174,
"step": 120
},
{
"epoch": 4.413793103448276,
"eval_loss": 11.914271354675293,
"eval_runtime": 0.8796,
"eval_samples_per_second": 171.676,
"eval_steps_per_second": 6.822,
"step": 128
},
{
"epoch": 4.482758620689655,
"grad_norm": 0.025010688230395317,
"learning_rate": 9.307564136490254e-06,
"loss": 11.9173,
"step": 130
},
{
"epoch": 4.827586206896552,
"grad_norm": 0.025940613821148872,
"learning_rate": 4.2113336672471245e-06,
"loss": 11.917,
"step": 140
},
{
"epoch": 4.9655172413793105,
"eval_loss": 11.914034843444824,
"eval_runtime": 0.8301,
"eval_samples_per_second": 181.898,
"eval_steps_per_second": 7.228,
"step": 144
},
{
"epoch": 5.172413793103448,
"grad_norm": 0.020017573609948158,
"learning_rate": 1.064157733632276e-06,
"loss": 11.9172,
"step": 150
},
{
"epoch": 5.517241379310345,
"grad_norm": 0.02289649285376072,
"learning_rate": 0.0,
"loss": 11.9169,
"step": 160
},
{
"epoch": 5.517241379310345,
"eval_loss": 11.914515495300293,
"eval_runtime": 0.8607,
"eval_samples_per_second": 175.434,
"eval_steps_per_second": 6.971,
"step": 160
}
],
"logging_steps": 10,
"max_steps": 160,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 40,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2590003691520.0,
"train_batch_size": 7,
"trial_name": null,
"trial_params": null
}