|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9912023460410557, |
|
"eval_steps": 10, |
|
"global_step": 340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05865102639296188, |
|
"eval_loss": 0.38869303464889526, |
|
"eval_runtime": 33.6966, |
|
"eval_samples_per_second": 22.465, |
|
"eval_steps_per_second": 5.639, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11730205278592376, |
|
"grad_norm": 1.1477874212327048, |
|
"learning_rate": 3.92156862745098e-06, |
|
"loss": 0.4096, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11730205278592376, |
|
"eval_loss": 0.33711880445480347, |
|
"eval_runtime": 32.8785, |
|
"eval_samples_per_second": 23.024, |
|
"eval_steps_per_second": 5.779, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17595307917888564, |
|
"eval_loss": 0.2933129668235779, |
|
"eval_runtime": 32.8713, |
|
"eval_samples_per_second": 23.029, |
|
"eval_steps_per_second": 5.78, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23460410557184752, |
|
"grad_norm": 1.121589060878037, |
|
"learning_rate": 7.84313725490196e-06, |
|
"loss": 0.3048, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23460410557184752, |
|
"eval_loss": 0.25960347056388855, |
|
"eval_runtime": 32.9112, |
|
"eval_samples_per_second": 23.001, |
|
"eval_steps_per_second": 5.773, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2932551319648094, |
|
"eval_loss": 0.24025067687034607, |
|
"eval_runtime": 32.9388, |
|
"eval_samples_per_second": 22.982, |
|
"eval_steps_per_second": 5.768, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3519061583577713, |
|
"grad_norm": 1.0677324544458529, |
|
"learning_rate": 9.990516643685222e-06, |
|
"loss": 0.2471, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3519061583577713, |
|
"eval_loss": 0.228533536195755, |
|
"eval_runtime": 32.9199, |
|
"eval_samples_per_second": 22.995, |
|
"eval_steps_per_second": 5.772, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.41055718475073316, |
|
"eval_loss": 0.21884050965309143, |
|
"eval_runtime": 32.9261, |
|
"eval_samples_per_second": 22.991, |
|
"eval_steps_per_second": 5.77, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46920821114369504, |
|
"grad_norm": 0.8221117546113363, |
|
"learning_rate": 9.901828808578846e-06, |
|
"loss": 0.2281, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.46920821114369504, |
|
"eval_loss": 0.21087835729122162, |
|
"eval_runtime": 32.9118, |
|
"eval_samples_per_second": 23.001, |
|
"eval_steps_per_second": 5.773, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5278592375366569, |
|
"eval_loss": 0.2049574851989746, |
|
"eval_runtime": 33.0853, |
|
"eval_samples_per_second": 22.88, |
|
"eval_steps_per_second": 5.743, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5865102639296188, |
|
"grad_norm": 0.7863445026416022, |
|
"learning_rate": 9.721431493385322e-06, |
|
"loss": 0.2073, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5865102639296188, |
|
"eval_loss": 0.20104646682739258, |
|
"eval_runtime": 33.0368, |
|
"eval_samples_per_second": 22.914, |
|
"eval_steps_per_second": 5.751, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"eval_loss": 0.19683966040611267, |
|
"eval_runtime": 32.895, |
|
"eval_samples_per_second": 23.013, |
|
"eval_steps_per_second": 5.776, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7038123167155426, |
|
"grad_norm": 0.7460954708103601, |
|
"learning_rate": 9.452699794345583e-06, |
|
"loss": 0.1911, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7038123167155426, |
|
"eval_loss": 0.1945473700761795, |
|
"eval_runtime": 32.8944, |
|
"eval_samples_per_second": 23.013, |
|
"eval_steps_per_second": 5.776, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7624633431085044, |
|
"eval_loss": 0.1912163645029068, |
|
"eval_runtime": 32.9495, |
|
"eval_samples_per_second": 22.975, |
|
"eval_steps_per_second": 5.766, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8211143695014663, |
|
"grad_norm": 0.7504426793167469, |
|
"learning_rate": 9.100661476680379e-06, |
|
"loss": 0.1876, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8211143695014663, |
|
"eval_loss": 0.18776217103004456, |
|
"eval_runtime": 32.9203, |
|
"eval_samples_per_second": 22.995, |
|
"eval_steps_per_second": 5.772, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8797653958944281, |
|
"eval_loss": 0.18520714342594147, |
|
"eval_runtime": 32.8973, |
|
"eval_samples_per_second": 23.011, |
|
"eval_steps_per_second": 5.776, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9384164222873901, |
|
"grad_norm": 0.7748459339656144, |
|
"learning_rate": 8.671902908935942e-06, |
|
"loss": 0.1887, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9384164222873901, |
|
"eval_loss": 0.18254177272319794, |
|
"eval_runtime": 32.8779, |
|
"eval_samples_per_second": 23.025, |
|
"eval_steps_per_second": 5.779, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9970674486803519, |
|
"eval_loss": 0.18051140010356903, |
|
"eval_runtime": 32.9923, |
|
"eval_samples_per_second": 22.945, |
|
"eval_steps_per_second": 5.759, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0527859237536656, |
|
"grad_norm": 0.9218927864042982, |
|
"learning_rate": 8.174445837049614e-06, |
|
"loss": 0.1553, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0527859237536656, |
|
"eval_loss": 0.18640676140785217, |
|
"eval_runtime": 32.9183, |
|
"eval_samples_per_second": 22.996, |
|
"eval_steps_per_second": 5.772, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1114369501466275, |
|
"eval_loss": 0.18077336251735687, |
|
"eval_runtime": 32.8676, |
|
"eval_samples_per_second": 23.032, |
|
"eval_steps_per_second": 5.781, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1700879765395895, |
|
"grad_norm": 0.9109052804191333, |
|
"learning_rate": 7.617597303598754e-06, |
|
"loss": 0.1332, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1700879765395895, |
|
"eval_loss": 0.1823471635580063, |
|
"eval_runtime": 32.9625, |
|
"eval_samples_per_second": 22.965, |
|
"eval_steps_per_second": 5.764, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2287390029325513, |
|
"eval_loss": 0.17943565547466278, |
|
"eval_runtime": 32.9683, |
|
"eval_samples_per_second": 22.961, |
|
"eval_steps_per_second": 5.763, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2873900293255132, |
|
"grad_norm": 0.8287150136550924, |
|
"learning_rate": 7.011775520129363e-06, |
|
"loss": 0.1349, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2873900293255132, |
|
"eval_loss": 0.17819999158382416, |
|
"eval_runtime": 32.9652, |
|
"eval_samples_per_second": 22.964, |
|
"eval_steps_per_second": 5.764, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3460410557184752, |
|
"eval_loss": 0.1771620512008667, |
|
"eval_runtime": 32.8691, |
|
"eval_samples_per_second": 23.031, |
|
"eval_steps_per_second": 5.781, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.404692082111437, |
|
"grad_norm": 0.7173372889400553, |
|
"learning_rate": 6.368314950360416e-06, |
|
"loss": 0.1333, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.404692082111437, |
|
"eval_loss": 0.17452633380889893, |
|
"eval_runtime": 32.8914, |
|
"eval_samples_per_second": 23.015, |
|
"eval_steps_per_second": 5.777, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4633431085043989, |
|
"eval_loss": 0.17504557967185974, |
|
"eval_runtime": 32.8301, |
|
"eval_samples_per_second": 23.058, |
|
"eval_steps_per_second": 5.787, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.5219941348973607, |
|
"grad_norm": 0.874094982171954, |
|
"learning_rate": 5.699254251008524e-06, |
|
"loss": 0.1338, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5219941348973607, |
|
"eval_loss": 0.17338429391384125, |
|
"eval_runtime": 32.8983, |
|
"eval_samples_per_second": 23.01, |
|
"eval_steps_per_second": 5.775, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5806451612903225, |
|
"eval_loss": 0.171478271484375, |
|
"eval_runtime": 32.8244, |
|
"eval_samples_per_second": 23.062, |
|
"eval_steps_per_second": 5.788, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6392961876832843, |
|
"grad_norm": 0.7979095239427625, |
|
"learning_rate": 5.017111037698477e-06, |
|
"loss": 0.1267, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6392961876832843, |
|
"eval_loss": 0.17220577597618103, |
|
"eval_runtime": 32.7725, |
|
"eval_samples_per_second": 23.099, |
|
"eval_steps_per_second": 5.798, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6979472140762464, |
|
"eval_loss": 0.16855686902999878, |
|
"eval_runtime": 32.8059, |
|
"eval_samples_per_second": 23.075, |
|
"eval_steps_per_second": 5.792, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.7565982404692082, |
|
"grad_norm": 0.705233520985163, |
|
"learning_rate": 4.334647689917734e-06, |
|
"loss": 0.1317, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7565982404692082, |
|
"eval_loss": 0.1680660992860794, |
|
"eval_runtime": 32.9544, |
|
"eval_samples_per_second": 22.971, |
|
"eval_steps_per_second": 5.766, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8152492668621703, |
|
"eval_loss": 0.16704507172107697, |
|
"eval_runtime": 32.8838, |
|
"eval_samples_per_second": 23.02, |
|
"eval_steps_per_second": 5.778, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.873900293255132, |
|
"grad_norm": 0.7535701160217406, |
|
"learning_rate": 3.6646325766256423e-06, |
|
"loss": 0.1251, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.873900293255132, |
|
"eval_loss": 0.16622412204742432, |
|
"eval_runtime": 32.8893, |
|
"eval_samples_per_second": 23.017, |
|
"eval_steps_per_second": 5.777, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.932551319648094, |
|
"eval_loss": 0.16477644443511963, |
|
"eval_runtime": 33.006, |
|
"eval_samples_per_second": 22.935, |
|
"eval_steps_per_second": 5.757, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9912023460410557, |
|
"grad_norm": 0.7373647805377234, |
|
"learning_rate": 3.019601169804216e-06, |
|
"loss": 0.1186, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.9912023460410557, |
|
"eval_loss": 0.1639869511127472, |
|
"eval_runtime": 32.9932, |
|
"eval_samples_per_second": 22.944, |
|
"eval_steps_per_second": 5.759, |
|
"step": 340 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 510, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 20, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 31189214167040.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|