End of training

Browse files

Files changed (5) hide show

README.md +2 -1
all_results.json +8 -0
train_results.json +8 -0
trainer_state.json +952 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: apache-2.0
 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: d1_code_long_paragraphs_0.3k
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # d1_code_long_paragraphs_0.3k
-This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on an unknown dataset.
 ## Model description

 base_model: Qwen/Qwen2.5-7B-Instruct
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: d1_code_long_paragraphs_0.3k
 # d1_code_long_paragraphs_0.3k
+This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mlfoundations-dev/d1_code_long_paragraphs_0.3k dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 13.0,
+    "total_flos": 8.969562657652736e+16,
+    "train_loss": 0.4498058209052453,
+    "train_runtime": 2844.3624,
+    "train_samples_per_second": 1.444,
+    "train_steps_per_second": 0.046
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 13.0,
+    "total_flos": 8.969562657652736e+16,
+    "train_loss": 0.4498058209052453,
+    "train_runtime": 2844.3624,
+    "train_samples_per_second": 1.444,
+    "train_steps_per_second": 0.046
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,952 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 13.0,
+  "eval_steps": 500,
+  "global_step": 130,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.1,
+      "grad_norm": 6.355769085572866,
+      "learning_rate": 7.692307692307694e-07,
+      "loss": 1.0538,
+      "step": 1
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 6.725796217074084,
+      "learning_rate": 1.5384615384615387e-06,
+      "loss": 1.0695,
+      "step": 2
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 6.459256519255048,
+      "learning_rate": 2.307692307692308e-06,
+      "loss": 1.0423,
+      "step": 3
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 5.999082264966995,
+      "learning_rate": 3.0769230769230774e-06,
+      "loss": 1.0264,
+      "step": 4
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 4.525029282049904,
+      "learning_rate": 3.846153846153847e-06,
+      "loss": 0.9858,
+      "step": 5
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 3.385668428730269,
+      "learning_rate": 4.615384615384616e-06,
+      "loss": 0.9473,
+      "step": 6
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 2.593967410267031,
+      "learning_rate": 5.384615384615385e-06,
+      "loss": 0.9616,
+      "step": 7
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 3.4423930422863394,
+      "learning_rate": 6.153846153846155e-06,
+      "loss": 0.9537,
+      "step": 8
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 4.2939943877977385,
+      "learning_rate": 6.923076923076923e-06,
+      "loss": 0.9712,
+      "step": 9
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 4.134153068892652,
+      "learning_rate": 7.692307692307694e-06,
+      "loss": 0.9329,
+      "step": 10
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 3.4101997913922175,
+      "learning_rate": 8.461538461538462e-06,
+      "loss": 0.9116,
+      "step": 11
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 3.4831682884643413,
+      "learning_rate": 9.230769230769232e-06,
+      "loss": 0.8906,
+      "step": 12
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 2.641980934804885,
+      "learning_rate": 1e-05,
+      "loss": 0.888,
+      "step": 13
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 1.7861730738789814,
+      "learning_rate": 9.998197638354428e-06,
+      "loss": 0.8401,
+      "step": 14
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 1.5706261318322143,
+      "learning_rate": 9.992791852820709e-06,
+      "loss": 0.8405,
+      "step": 15
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 1.3190250051338637,
+      "learning_rate": 9.983786540671052e-06,
+      "loss": 0.7853,
+      "step": 16
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 1.1657856245313856,
+      "learning_rate": 9.971188194237141e-06,
+      "loss": 0.809,
+      "step": 17
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 1.0936925998213487,
+      "learning_rate": 9.955005896229543e-06,
+      "loss": 0.7964,
+      "step": 18
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 1.1025321139904043,
+      "learning_rate": 9.935251313189564e-06,
+      "loss": 0.7674,
+      "step": 19
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.9637286919181353,
+      "learning_rate": 9.911938687078324e-06,
+      "loss": 0.7592,
+      "step": 20
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 0.7965546052760744,
+      "learning_rate": 9.885084825009085e-06,
+      "loss": 0.7301,
+      "step": 21
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 0.9345045295927191,
+      "learning_rate": 9.854709087130261e-06,
+      "loss": 0.7319,
+      "step": 22
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 0.827207962931123,
+      "learning_rate": 9.820833372667813e-06,
+      "loss": 0.7022,
+      "step": 23
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.9353036355923889,
+      "learning_rate": 9.783482104137127e-06,
+      "loss": 0.7054,
+      "step": 24
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.8388906024912361,
+      "learning_rate": 9.742682209735727e-06,
+      "loss": 0.6994,
+      "step": 25
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 1.0256190155173306,
+      "learning_rate": 9.698463103929542e-06,
+      "loss": 0.7086,
+      "step": 26
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 0.9561285472798247,
+      "learning_rate": 9.650856666246693e-06,
+      "loss": 0.7106,
+      "step": 27
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 0.813973955699604,
+      "learning_rate": 9.599897218294122e-06,
+      "loss": 0.6605,
+      "step": 28
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 0.9184475533414902,
+      "learning_rate": 9.54562149901362e-06,
+      "loss": 0.6821,
+      "step": 29
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.8718822822105823,
+      "learning_rate": 9.488068638195072e-06,
+      "loss": 0.6721,
+      "step": 30
+    },
+    {
+      "epoch": 3.1,
+      "grad_norm": 0.7142973011327313,
+      "learning_rate": 9.427280128266049e-06,
+      "loss": 0.6355,
+      "step": 31
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 0.7095705979851111,
+      "learning_rate": 9.363299794378072e-06,
+      "loss": 0.6443,
+      "step": 32
+    },
+    {
+      "epoch": 3.3,
+      "grad_norm": 0.6168686019002501,
+      "learning_rate": 9.296173762811084e-06,
+      "loss": 0.6372,
+      "step": 33
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 0.7282282683402107,
+      "learning_rate": 9.225950427718974e-06,
+      "loss": 0.5841,
+      "step": 34
+    },
+    {
+      "epoch": 3.5,
+      "grad_norm": 0.7295397144841858,
+      "learning_rate": 9.152680416240059e-06,
+      "loss": 0.6316,
+      "step": 35
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 0.6732055967428483,
+      "learning_rate": 9.076416551997721e-06,
+      "loss": 0.627,
+      "step": 36
+    },
+    {
+      "epoch": 3.7,
+      "grad_norm": 0.6528840869757524,
+      "learning_rate": 8.997213817017508e-06,
+      "loss": 0.5588,
+      "step": 37
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 0.6222655223189079,
+      "learning_rate": 8.915129312088112e-06,
+      "loss": 0.5578,
+      "step": 38
+    },
+    {
+      "epoch": 3.9,
+      "grad_norm": 0.7349520019461258,
+      "learning_rate": 8.83022221559489e-06,
+      "loss": 0.5864,
+      "step": 39
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.6896190181121432,
+      "learning_rate": 8.742553740855507e-06,
+      "loss": 0.5926,
+      "step": 40
+    },
+    {
+      "epoch": 4.1,
+      "grad_norm": 0.7123273822187205,
+      "learning_rate": 8.652187091988516e-06,
+      "loss": 0.5602,
+      "step": 41
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.670018566825503,
+      "learning_rate": 8.559187418346703e-06,
+      "loss": 0.5391,
+      "step": 42
+    },
+    {
+      "epoch": 4.3,
+      "grad_norm": 0.6706421454390135,
+      "learning_rate": 8.463621767547998e-06,
+      "loss": 0.5395,
+      "step": 43
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 0.6338543455646916,
+      "learning_rate": 8.36555903713785e-06,
+      "loss": 0.5163,
+      "step": 44
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 0.658926893392716,
+      "learning_rate": 8.265069924917925e-06,
+      "loss": 0.5291,
+      "step": 45
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 0.6994883784824381,
+      "learning_rate": 8.162226877976886e-06,
+      "loss": 0.5015,
+      "step": 46
+    },
+    {
+      "epoch": 4.7,
+      "grad_norm": 0.6942846700993004,
+      "learning_rate": 8.057104040460062e-06,
+      "loss": 0.4983,
+      "step": 47
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 0.6827714015671297,
+      "learning_rate": 7.949777200115617e-06,
+      "loss": 0.5043,
+      "step": 48
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 0.6347734217894093,
+      "learning_rate": 7.84032373365578e-06,
+      "loss": 0.5499,
+      "step": 49
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.7225122670017611,
+      "learning_rate": 7.728822550972523e-06,
+      "loss": 0.487,
+      "step": 50
+    },
+    {
+      "epoch": 5.1,
+      "grad_norm": 0.6640326473766751,
+      "learning_rate": 7.615354038247889e-06,
+      "loss": 0.4486,
+      "step": 51
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 0.6245036549599394,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.4577,
+      "step": 52
+    },
+    {
+      "epoch": 5.3,
+      "grad_norm": 0.799949180993253,
+      "learning_rate": 7.382843600106539e-06,
+      "loss": 0.4712,
+      "step": 53
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 0.6272988073596274,
+      "learning_rate": 7.263969301848188e-06,
+      "loss": 0.4452,
+      "step": 54
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 0.8123590971594787,
+      "learning_rate": 7.143462807015271e-06,
+      "loss": 0.4339,
+      "step": 55
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.6902047358374217,
+      "learning_rate": 7.021410994121525e-06,
+      "loss": 0.4274,
+      "step": 56
+    },
+    {
+      "epoch": 5.7,
+      "grad_norm": 0.6451724904881029,
+      "learning_rate": 6.897901855769483e-06,
+      "loss": 0.4314,
+      "step": 57
+    },
+    {
+      "epoch": 5.8,
+      "grad_norm": 0.7810229654484437,
+      "learning_rate": 6.773024435212678e-06,
+      "loss": 0.4373,
+      "step": 58
+    },
+    {
+      "epoch": 5.9,
+      "grad_norm": 0.6851866802377342,
+      "learning_rate": 6.646868762160399e-06,
+      "loss": 0.4397,
+      "step": 59
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.5659145554154605,
+      "learning_rate": 6.519525787871235e-06,
+      "loss": 0.4499,
+      "step": 60
+    },
+    {
+      "epoch": 6.1,
+      "grad_norm": 0.8733716275115586,
+      "learning_rate": 6.391087319582264e-06,
+      "loss": 0.4084,
+      "step": 61
+    },
+    {
+      "epoch": 6.2,
+      "grad_norm": 0.5673193273908216,
+      "learning_rate": 6.261645954321109e-06,
+      "loss": 0.3811,
+      "step": 62
+    },
+    {
+      "epoch": 6.3,
+      "grad_norm": 1.082939657326654,
+      "learning_rate": 6.131295012148613e-06,
+      "loss": 0.4065,
+      "step": 63
+    },
+    {
+      "epoch": 6.4,
+      "grad_norm": 0.7293073710983305,
+      "learning_rate": 6.000128468880223e-06,
+      "loss": 0.3793,
+      "step": 64
+    },
+    {
+      "epoch": 6.5,
+      "grad_norm": 0.8026538763909412,
+      "learning_rate": 5.8682408883346535e-06,
+      "loss": 0.3871,
+      "step": 65
+    },
+    {
+      "epoch": 6.6,
+      "grad_norm": 0.8307824369586567,
+      "learning_rate": 5.735727354158581e-06,
+      "loss": 0.3209,
+      "step": 66
+    },
+    {
+      "epoch": 6.7,
+      "grad_norm": 0.8391055191788069,
+      "learning_rate": 5.6026834012766155e-06,
+      "loss": 0.3437,
+      "step": 67
+    },
+    {
+      "epoch": 6.8,
+      "grad_norm": 0.700258127801532,
+      "learning_rate": 5.469204947015897e-06,
+      "loss": 0.3581,
+      "step": 68
+    },
+    {
+      "epoch": 6.9,
+      "grad_norm": 0.729209063683552,
+      "learning_rate": 5.335388221955012e-06,
+      "loss": 0.3967,
+      "step": 69
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.6477712484608066,
+      "learning_rate": 5.201329700547077e-06,
+      "loss": 0.3498,
+      "step": 70
+    },
+    {
+      "epoch": 7.1,
+      "grad_norm": 0.8240736558496558,
+      "learning_rate": 5.067126031566988e-06,
+      "loss": 0.3236,
+      "step": 71
+    },
+    {
+      "epoch": 7.2,
+      "grad_norm": 0.7866852557309202,
+      "learning_rate": 4.932873968433014e-06,
+      "loss": 0.2949,
+      "step": 72
+    },
+    {
+      "epoch": 7.3,
+      "grad_norm": 0.7666470015059026,
+      "learning_rate": 4.798670299452926e-06,
+      "loss": 0.2926,
+      "step": 73
+    },
+    {
+      "epoch": 7.4,
+      "grad_norm": 0.751803291907828,
+      "learning_rate": 4.664611778044988e-06,
+      "loss": 0.3526,
+      "step": 74
+    },
+    {
+      "epoch": 7.5,
+      "grad_norm": 0.9441667657147972,
+      "learning_rate": 4.530795052984104e-06,
+      "loss": 0.3286,
+      "step": 75
+    },
+    {
+      "epoch": 7.6,
+      "grad_norm": 0.6614312063551343,
+      "learning_rate": 4.397316598723385e-06,
+      "loss": 0.2884,
+      "step": 76
+    },
+    {
+      "epoch": 7.7,
+      "grad_norm": 0.6782553591922619,
+      "learning_rate": 4.264272645841419e-06,
+      "loss": 0.3176,
+      "step": 77
+    },
+    {
+      "epoch": 7.8,
+      "grad_norm": 0.8020010115543638,
+      "learning_rate": 4.131759111665349e-06,
+      "loss": 0.3128,
+      "step": 78
+    },
+    {
+      "epoch": 7.9,
+      "grad_norm": 0.6047840534546456,
+      "learning_rate": 3.999871531119779e-06,
+      "loss": 0.3264,
+      "step": 79
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.7006317191840805,
+      "learning_rate": 3.86870498785139e-06,
+      "loss": 0.2925,
+      "step": 80
+    },
+    {
+      "epoch": 8.1,
+      "grad_norm": 0.8175785828483745,
+      "learning_rate": 3.7383540456788915e-06,
+      "loss": 0.2609,
+      "step": 81
+    },
+    {
+      "epoch": 8.2,
+      "grad_norm": 0.6252429936534438,
+      "learning_rate": 3.6089126804177373e-06,
+      "loss": 0.2625,
+      "step": 82
+    },
+    {
+      "epoch": 8.3,
+      "grad_norm": 0.9757027403168241,
+      "learning_rate": 3.480474212128766e-06,
+      "loss": 0.2625,
+      "step": 83
+    },
+    {
+      "epoch": 8.4,
+      "grad_norm": 0.787826173262127,
+      "learning_rate": 3.3531312378396026e-06,
+      "loss": 0.2508,
+      "step": 84
+    },
+    {
+      "epoch": 8.5,
+      "grad_norm": 0.6119860664769008,
+      "learning_rate": 3.226975564787322e-06,
+      "loss": 0.271,
+      "step": 85
+    },
+    {
+      "epoch": 8.6,
+      "grad_norm": 0.7120003182441815,
+      "learning_rate": 3.1020981442305187e-06,
+      "loss": 0.2599,
+      "step": 86
+    },
+    {
+      "epoch": 8.7,
+      "grad_norm": 0.6035509962137439,
+      "learning_rate": 2.978589005878476e-06,
+      "loss": 0.259,
+      "step": 87
+    },
+    {
+      "epoch": 8.8,
+      "grad_norm": 0.6189259053460058,
+      "learning_rate": 2.8565371929847286e-06,
+      "loss": 0.2786,
+      "step": 88
+    },
+    {
+      "epoch": 8.9,
+      "grad_norm": 0.6364448262362423,
+      "learning_rate": 2.736030698151815e-06,
+      "loss": 0.2535,
+      "step": 89
+    },
+    {
+      "epoch": 9.0,
+      "grad_norm": 0.5779075096186825,
+      "learning_rate": 2.6171563998934605e-06,
+      "loss": 0.2686,
+      "step": 90
+    },
+    {
+      "epoch": 9.1,
+      "grad_norm": 0.8187626507294279,
+      "learning_rate": 2.5000000000000015e-06,
+      "loss": 0.2543,
+      "step": 91
+    },
+    {
+      "epoch": 9.2,
+      "grad_norm": 0.6290212293797883,
+      "learning_rate": 2.384645961752113e-06,
+      "loss": 0.224,
+      "step": 92
+    },
+    {
+      "epoch": 9.3,
+      "grad_norm": 0.5105484741650185,
+      "learning_rate": 2.2711774490274767e-06,
+      "loss": 0.243,
+      "step": 93
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 0.6525111719625585,
+      "learning_rate": 2.159676266344222e-06,
+      "loss": 0.1934,
+      "step": 94
+    },
+    {
+      "epoch": 9.5,
+      "grad_norm": 0.8135620179358877,
+      "learning_rate": 2.050222799884387e-06,
+      "loss": 0.2434,
+      "step": 95
+    },
+    {
+      "epoch": 9.6,
+      "grad_norm": 0.6136430854939934,
+      "learning_rate": 1.942895959539939e-06,
+      "loss": 0.2389,
+      "step": 96
+    },
+    {
+      "epoch": 9.7,
+      "grad_norm": 0.5621277796125248,
+      "learning_rate": 1.8377731220231144e-06,
+      "loss": 0.2246,
+      "step": 97
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.6153238847848149,
+      "learning_rate": 1.7349300750820758e-06,
+      "loss": 0.1852,
+      "step": 98
+    },
+    {
+      "epoch": 9.9,
+      "grad_norm": 0.5749792494703895,
+      "learning_rate": 1.6344409628621482e-06,
+      "loss": 0.215,
+      "step": 99
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.5490782621363017,
+      "learning_rate": 1.5363782324520033e-06,
+      "loss": 0.2478,
+      "step": 100
+    },
+    {
+      "epoch": 10.1,
+      "grad_norm": 0.5370472408719152,
+      "learning_rate": 1.4408125816532981e-06,
+      "loss": 0.1738,
+      "step": 101
+    },
+    {
+      "epoch": 10.2,
+      "grad_norm": 0.5103995369294385,
+      "learning_rate": 1.347812908011485e-06,
+      "loss": 0.2316,
+      "step": 102
+    },
+    {
+      "epoch": 10.3,
+      "grad_norm": 0.5384926824814891,
+      "learning_rate": 1.257446259144494e-06,
+      "loss": 0.2121,
+      "step": 103
+    },
+    {
+      "epoch": 10.4,
+      "grad_norm": 0.5436679337039739,
+      "learning_rate": 1.1697777844051105e-06,
+      "loss": 0.1843,
+      "step": 104
+    },
+    {
+      "epoch": 10.5,
+      "grad_norm": 0.5938010326332525,
+      "learning_rate": 1.0848706879118893e-06,
+      "loss": 0.2388,
+      "step": 105
+    },
+    {
+      "epoch": 10.6,
+      "grad_norm": 0.5929265334853338,
+      "learning_rate": 1.0027861829824953e-06,
+      "loss": 0.2056,
+      "step": 106
+    },
+    {
+      "epoch": 10.7,
+      "grad_norm": 0.47011857774401083,
+      "learning_rate": 9.235834480022788e-07,
+      "loss": 0.2002,
+      "step": 107
+    },
+    {
+      "epoch": 10.8,
+      "grad_norm": 0.49385701661144776,
+      "learning_rate": 8.473195837599419e-07,
+      "loss": 0.1744,
+      "step": 108
+    },
+    {
+      "epoch": 10.9,
+      "grad_norm": 0.506010826084403,
+      "learning_rate": 7.740495722810271e-07,
+      "loss": 0.1999,
+      "step": 109
+    },
+    {
+      "epoch": 11.0,
+      "grad_norm": 0.5074223422486763,
+      "learning_rate": 7.03826237188916e-07,
+      "loss": 0.1918,
+      "step": 110
+    },
+    {
+      "epoch": 11.1,
+      "grad_norm": 0.5371363986037742,
+      "learning_rate": 6.367002056219285e-07,
+      "loss": 0.1796,
+      "step": 111
+    },
+    {
+      "epoch": 11.2,
+      "grad_norm": 0.5210587656041868,
+      "learning_rate": 5.727198717339511e-07,
+      "loss": 0.1738,
+      "step": 112
+    },
+    {
+      "epoch": 11.3,
+      "grad_norm": 0.47561484117135805,
+      "learning_rate": 5.119313618049309e-07,
+      "loss": 0.2078,
+      "step": 113
+    },
+    {
+      "epoch": 11.4,
+      "grad_norm": 0.46168861486892365,
+      "learning_rate": 4.54378500986381e-07,
+      "loss": 0.2093,
+      "step": 114
+    },
+    {
+      "epoch": 11.5,
+      "grad_norm": 0.5524412647638766,
+      "learning_rate": 4.001027817058789e-07,
+      "loss": 0.2172,
+      "step": 115
+    },
+    {
+      "epoch": 11.6,
+      "grad_norm": 0.45594210195911783,
+      "learning_rate": 3.49143333753309e-07,
+      "loss": 0.1675,
+      "step": 116
+    },
+    {
+      "epoch": 11.7,
+      "grad_norm": 0.5516466886946935,
+      "learning_rate": 3.015368960704584e-07,
+      "loss": 0.1739,
+      "step": 117
+    },
+    {
+      "epoch": 11.8,
+      "grad_norm": 0.5231450036431963,
+      "learning_rate": 2.573177902642726e-07,
+      "loss": 0.187,
+      "step": 118
+    },
+    {
+      "epoch": 11.9,
+      "grad_norm": 0.5363110524794331,
+      "learning_rate": 2.1651789586287442e-07,
+      "loss": 0.1757,
+      "step": 119
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 0.519753376130612,
+      "learning_rate": 1.7916662733218848e-07,
+      "loss": 0.2041,
+      "step": 120
+    },
+    {
+      "epoch": 12.1,
+      "grad_norm": 0.43503074772379297,
+      "learning_rate": 1.4529091286973994e-07,
+      "loss": 0.1679,
+      "step": 121
+    },
+    {
+      "epoch": 12.2,
+      "grad_norm": 0.43985553309723496,
+      "learning_rate": 1.1491517499091498e-07,
+      "loss": 0.1927,
+      "step": 122
+    },
+    {
+      "epoch": 12.3,
+      "grad_norm": 0.4299185889135424,
+      "learning_rate": 8.80613129216762e-08,
+      "loss": 0.1882,
+      "step": 123
+    },
+    {
+      "epoch": 12.4,
+      "grad_norm": 0.4115820901227382,
+      "learning_rate": 6.474868681043578e-08,
+      "loss": 0.161,
+      "step": 124
+    },
+    {
+      "epoch": 12.5,
+      "grad_norm": 0.43869984814330887,
+      "learning_rate": 4.499410377045765e-08,
+      "loss": 0.1831,
+      "step": 125
+    },
+    {
+      "epoch": 12.6,
+      "grad_norm": 0.42332984157222775,
+      "learning_rate": 2.8811805762860578e-08,
+      "loss": 0.1807,
+      "step": 126
+    },
+    {
+      "epoch": 12.7,
+      "grad_norm": 0.4292928319812678,
+      "learning_rate": 1.6213459328950355e-08,
+      "loss": 0.2008,
+      "step": 127
+    },
+    {
+      "epoch": 12.8,
+      "grad_norm": 0.4296689410354326,
+      "learning_rate": 7.2081471792911914e-09,
+      "loss": 0.1804,
+      "step": 128
+    },
+    {
+      "epoch": 12.9,
+      "grad_norm": 0.429020498516221,
+      "learning_rate": 1.8023616455731253e-09,
+      "loss": 0.1886,
+      "step": 129
+    },
+    {
+      "epoch": 13.0,
+      "grad_norm": 0.4428955411596461,
+      "learning_rate": 0.0,
+      "loss": 0.2065,
+      "step": 130
+    },
+    {
+      "epoch": 13.0,
+      "step": 130,
+      "total_flos": 8.969562657652736e+16,
+      "train_loss": 0.4498058209052453,
+      "train_runtime": 2844.3624,
+      "train_samples_per_second": 1.444,
+      "train_steps_per_second": 0.046
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 130,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 13,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.969562657652736e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed