SystemAdmin123 commited on Feb 6

Commit

48e7a7d

verified ·

1 Parent(s): 1b73c15

Training in progress, step 280, checkpoint

Browse files

Files changed (19) hide show

last-checkpoint/config.json +21 -21
last-checkpoint/generation_config.json +3 -3
last-checkpoint/model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/special_tokens_map.json +7 -7
last-checkpoint/tokenizer.json +2 -2
last-checkpoint/tokenizer.model +2 -2
last-checkpoint/tokenizer_config.json +20 -10
last-checkpoint/trainer_state.json +68 -82
last-checkpoint/training_args.bin +1 -1

last-checkpoint/config.json CHANGED Viewed

@@ -1,31 +1,31 @@
 {
-  "_name_or_path": "facebook/opt-125m",
-  "_remove_final_layer_norm": false,
-  "activation_dropout": 0.0,
-  "activation_function": "relu",
   "architectures": [
-    "OPTForCausalLM"
   ],
   "attention_dropout": 0.0,
-  "bos_token_id": 2,
-  "do_layer_norm_before": true,
-  "dropout": 0.1,
-  "enable_bias": true,
   "eos_token_id": 2,
-  "ffn_dim": 3072,
-  "hidden_size": 768,
-  "init_std": 0.02,
-  "layer_norm_elementwise_affine": true,
-  "layerdrop": 0.0,
   "max_position_embeddings": 2048,
-  "model_type": "opt",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "prefix": "</s>",
   "torch_dtype": "bfloat16",
   "transformers_version": "4.48.1",
   "use_cache": false,
-  "vocab_size": 50265,
-  "word_embed_proj_dim": 768
 }

 {
+  "_name_or_path": "trl-internal-testing/tiny-random-LlamaForCausalLM",
   "architectures": [
+    "LlamaForCausalLM"
   ],
+  "attention_bias": false,
   "attention_dropout": 0.0,
+  "bos_token_id": 0,
   "eos_token_id": 2,
+  "head_dim": 4,
+  "hidden_act": "silu",
+  "hidden_size": 16,
+  "initializer_range": 0.02,
+  "intermediate_size": 64,
   "max_position_embeddings": 2048,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "pad_token_id": -1,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.48.1",
   "use_cache": false,
+  "vocab_size": 32000
 }

last-checkpoint/generation_config.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "_from_model_config": true,
-  "bos_token_id": 2,
   "do_sample": true,
-  "eos_token_id": 2,
-  "pad_token_id": 1,
   "transformers_version": "4.48.1"
 }

 {
   "_from_model_config": true,
+  "bos_token_id": 0,
   "do_sample": true,
+  "eos_token_id": 1,
+  "pad_token_id": 2,
   "transformers_version": "4.48.1"
 }

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3bf0e039882cb8534a21049a9eae422969efeb941e895f8a4c7ef11b6251311
-size 250490408

 version https://git-lfs.github.com/spec/v1
+oid sha256:b574796a04bdd5ba75f557b1e3a82835e37b6cbdc5653fbe0523f7ca548452fd
+size 2066752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d31cb99531247a407bd0ab189e70fe76932ae7ac80a92e7fff91acd93dd51fbf
-size 255266042

 version https://git-lfs.github.com/spec/v1
+oid sha256:e060cb2727f49ca568161340577be6a7238338c1d2ddeb535174896912bad9a8
+size 2162798

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4244e02b14075f292321e288260592ee7b67125cf684ebd26eaf0581f4594ece
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e165ab94e4c39d599db6d95416fe6b20ee12e2262e929ee099151db7ce619380
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8573e19fc09fe735b88c17bc4cb10c60d4f6d20c487d2a98e972328ce7086f7d
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9cbc25705f026b6aace4fa255cf880974da196014e31a032948b334d2ca7867
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7dd8d2332c4242fe782ac2b06dc65f086fe275dc54070dd8b1b6b55db6a523d2
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:80e9bf66df8c1928f5af3bf47317ecd1b0184ef7e9f158d6f951cc904ae9cbf3
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:039287d0fcebe2b584486f1236bd450eff963573bbcd49b6a7a20f79dcade161
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcd1ed78c2e5a24c53b901fdcd13d9f77e900b35f51f226710a851a3df6e162f
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b8b2b090ec98f40aee120da35bf56ce68af7ba61c9707550f5308abf642032b
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea83adac914f4574ef740ff11ec2d9bdb09c46e3947a15a096778da1586b49ee
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e37b7ea641f0a2b6ef3119bbb4eb22d8a507e41ea7a48aef62baedf6a95dbe96
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b13ecf7497db0407aadb3d8df50c07202876652d5684788d8266bbef04783351
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c57f7fefc0efd086a6225620bf4308282949e9ddfe6d9fae28dcc0b5a48f321
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a48ddde4793935f0588ab76c54160e3cba7ef48fcd34c2871252358b28649d52
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6b0003f5f4e356cedf7334396fc85ced7e03d5dd1f3501d809d58d22e801c14
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:99eb96a4a039cb5447a1119d95efed52c77ed0a4b17aaf96b194ac2390ed4870
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74485e67705dc36efbfb69b1e54f842e1ff07894d01bb0e36d6d2526a318b300
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0985d0257c892696fb7285dad69becde2a1197c31dbd94a987186cfae751de11
 size 1064

last-checkpoint/special_tokens_map.json CHANGED Viewed

@@ -1,29 +1,29 @@
 {
   "bos_token": {
-    "content": "</s>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
     "content": "</s>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "<pad>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
-    "content": "</s>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   }

 {
   "bos_token": {
+    "content": "<s>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
     "content": "</s>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<unk>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
+    "content": "<unk>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   }

last-checkpoint/tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a80800503667fe0bd639ad10c33879f747ba1582f369b06abb21f3f65d5ad3b
-size 3558658

 version https://git-lfs.github.com/spec/v1
+oid sha256:d606f46a8aa6f29f0a0abdec7c3ffddefc9f9bfe26919532d209a0a850e25029
+size 3619013

last-checkpoint/tokenizer.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
-size 493443

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -1,11 +1,20 @@
 {
   "add_bos_token": true,
-  "add_prefix_space": false,
   "added_tokens_decoder": {
     "1": {
-      "content": "<pad>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
@@ -13,21 +22,22 @@
     "2": {
       "content": "</s>",
       "lstrip": false,
-      "normalized": true,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
-  "bos_token": "</s>",
   "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
-  "errors": "replace",
   "extra_special_tokens": {},
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<pad>",
-  "tokenizer_class": "GPT2Tokenizer",
-  "unk_token": "</s>",
   "use_fast": true
 }

 {
   "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
   "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
     "1": {
+      "content": "<s>",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     "2": {
       "content": "</s>",
       "lstrip": false,
+      "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
+  "bos_token": "<s>",
   "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "<unk>",
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true,
   "use_fast": true
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,238 +1,224 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 100.0,
   "eval_steps": 200,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.3333333333333333,
-      "eval_loss": 3.4279379844665527,
-      "eval_runtime": 2.4852,
-      "eval_samples_per_second": 603.968,
-      "eval_steps_per_second": 2.414,
       "step": 1
     },
     {
       "epoch": 3.3333333333333335,
-      "grad_norm": 3.203125,
       "learning_rate": 0.00019945218953682734,
-      "loss": 6.2462,
       "step": 10
     },
     {
       "epoch": 6.666666666666667,
-      "grad_norm": 2.3125,
       "learning_rate": 0.00019781476007338058,
-      "loss": 5.4442,
       "step": 20
     },
     {
       "epoch": 10.0,
-      "grad_norm": 4.0625,
       "learning_rate": 0.00019510565162951537,
-      "loss": 5.126,
       "step": 30
     },
     {
       "epoch": 13.333333333333334,
-      "grad_norm": 4.5,
       "learning_rate": 0.0001913545457642601,
-      "loss": 4.8032,
       "step": 40
     },
     {
       "epoch": 16.666666666666668,
-      "grad_norm": 3.78125,
       "learning_rate": 0.00018660254037844388,
-      "loss": 4.5229,
       "step": 50
     },
     {
       "epoch": 20.0,
-      "grad_norm": 3.40625,
       "learning_rate": 0.00018090169943749476,
-      "loss": 4.2677,
       "step": 60
     },
     {
       "epoch": 23.333333333333332,
-      "grad_norm": 3.6875,
       "learning_rate": 0.00017431448254773944,
-      "loss": 4.1141,
       "step": 70
     },
     {
       "epoch": 26.666666666666668,
-      "grad_norm": 3.21875,
       "learning_rate": 0.00016691306063588583,
-      "loss": 3.9424,
       "step": 80
     },
     {
       "epoch": 30.0,
-      "grad_norm": 2.953125,
       "learning_rate": 0.00015877852522924732,
-      "loss": 3.7671,
       "step": 90
     },
     {
       "epoch": 33.333333333333336,
-      "grad_norm": 2.890625,
       "learning_rate": 0.00015000000000000001,
-      "loss": 3.6368,
       "step": 100
     },
     {
       "epoch": 36.666666666666664,
-      "grad_norm": 3.328125,
       "learning_rate": 0.00014067366430758004,
-      "loss": 3.5125,
       "step": 110
     },
     {
       "epoch": 40.0,
-      "grad_norm": 2.859375,
       "learning_rate": 0.00013090169943749476,
-      "loss": 3.3889,
       "step": 120
     },
     {
       "epoch": 43.333333333333336,
-      "grad_norm": 2.78125,
       "learning_rate": 0.00012079116908177593,
-      "loss": 3.2976,
       "step": 130
     },
     {
       "epoch": 46.666666666666664,
-      "grad_norm": 3.59375,
       "learning_rate": 0.00011045284632676536,
-      "loss": 3.2194,
       "step": 140
     },
     {
       "epoch": 50.0,
-      "grad_norm": 2.609375,
       "learning_rate": 0.0001,
-      "loss": 3.1415,
       "step": 150
     },
     {
       "epoch": 53.333333333333336,
-      "grad_norm": 2.703125,
       "learning_rate": 8.954715367323468e-05,
-      "loss": 3.0864,
       "step": 160
     },
     {
       "epoch": 56.666666666666664,
-      "grad_norm": 2.921875,
       "learning_rate": 7.920883091822408e-05,
-      "loss": 3.0292,
       "step": 170
     },
     {
       "epoch": 60.0,
-      "grad_norm": 2.703125,
       "learning_rate": 6.909830056250527e-05,
-      "loss": 3.0033,
       "step": 180
     },
     {
       "epoch": 63.333333333333336,
-      "grad_norm": 2.5,
       "learning_rate": 5.9326335692419995e-05,
-      "loss": 2.9631,
       "step": 190
     },
     {
       "epoch": 66.66666666666667,
-      "grad_norm": 1.9453125,
       "learning_rate": 5.000000000000002e-05,
-      "loss": 2.9294,
       "step": 200
     },
     {
       "epoch": 66.66666666666667,
-      "eval_loss": 3.0881452560424805,
-      "eval_runtime": 2.6366,
-      "eval_samples_per_second": 569.285,
-      "eval_steps_per_second": 2.276,
       "step": 200
     },
     {
       "epoch": 70.0,
-      "grad_norm": 2.234375,
       "learning_rate": 4.12214747707527e-05,
-      "loss": 2.8995,
       "step": 210
     },
     {
       "epoch": 73.33333333333333,
-      "grad_norm": 1.8125,
       "learning_rate": 3.308693936411421e-05,
-      "loss": 2.8788,
       "step": 220
     },
     {
       "epoch": 76.66666666666667,
-      "grad_norm": 1.3046875,
       "learning_rate": 2.5685517452260567e-05,
-      "loss": 2.8623,
       "step": 230
     },
     {
       "epoch": 80.0,
-      "grad_norm": 1.34375,
       "learning_rate": 1.9098300562505266e-05,
-      "loss": 2.8459,
       "step": 240
     },
     {
       "epoch": 83.33333333333333,
-      "grad_norm": 1.421875,
       "learning_rate": 1.339745962155613e-05,
-      "loss": 2.8431,
       "step": 250
     },
     {
       "epoch": 86.66666666666667,
-      "grad_norm": 1.421875,
       "learning_rate": 8.645454235739903e-06,
-      "loss": 2.8316,
       "step": 260
     },
     {
       "epoch": 90.0,
-      "grad_norm": 1.203125,
       "learning_rate": 4.8943483704846475e-06,
-      "loss": 2.8333,
       "step": 270
     },
     {
       "epoch": 93.33333333333333,
-      "grad_norm": 1.0703125,
       "learning_rate": 2.1852399266194314e-06,
-      "loss": 2.8327,
       "step": 280
-    },
-    {
-      "epoch": 96.66666666666667,
-      "grad_norm": 1.1640625,
-      "learning_rate": 5.478104631726711e-07,
-      "loss": 2.8307,
-      "step": 290
-    },
-    {
-      "epoch": 100.0,
-      "grad_norm": 1.1796875,
-      "learning_rate": 0.0,
-      "loss": 2.8357,
-      "step": 300
     }
   ],
   "logging_steps": 10,
@@ -247,12 +233,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.774796614362726e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 93.33333333333333,
   "eval_steps": 200,
+  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.3333333333333333,
+      "eval_loss": 10.375551223754883,
+      "eval_runtime": 0.9886,
+      "eval_samples_per_second": 1518.364,
+      "eval_steps_per_second": 6.069,
       "step": 1
     },
     {
       "epoch": 3.3333333333333335,
+      "grad_norm": 0.1015625,
       "learning_rate": 0.00019945218953682734,
+      "loss": 10.3718,
       "step": 10
     },
     {
       "epoch": 6.666666666666667,
+      "grad_norm": 0.162109375,
       "learning_rate": 0.00019781476007338058,
+      "loss": 10.3553,
       "step": 20
     },
     {
       "epoch": 10.0,
+      "grad_norm": 0.291015625,
       "learning_rate": 0.00019510565162951537,
+      "loss": 10.3251,
       "step": 30
     },
     {
       "epoch": 13.333333333333334,
+      "grad_norm": 0.330078125,
       "learning_rate": 0.0001913545457642601,
+      "loss": 10.2723,
       "step": 40
     },
     {
       "epoch": 16.666666666666668,
+      "grad_norm": 0.328125,
       "learning_rate": 0.00018660254037844388,
+      "loss": 10.2096,
       "step": 50
     },
     {
       "epoch": 20.0,
+      "grad_norm": 0.328125,
       "learning_rate": 0.00018090169943749476,
+      "loss": 10.1499,
       "step": 60
     },
     {
       "epoch": 23.333333333333332,
+      "grad_norm": 0.326171875,
       "learning_rate": 0.00017431448254773944,
+      "loss": 10.0935,
       "step": 70
     },
     {
       "epoch": 26.666666666666668,
+      "grad_norm": 0.333984375,
       "learning_rate": 0.00016691306063588583,
+      "loss": 10.0398,
       "step": 80
     },
     {
       "epoch": 30.0,
+      "grad_norm": 0.337890625,
       "learning_rate": 0.00015877852522924732,
+      "loss": 9.9895,
       "step": 90
     },
     {
       "epoch": 33.333333333333336,
+      "grad_norm": 0.341796875,
       "learning_rate": 0.00015000000000000001,
+      "loss": 9.9424,
       "step": 100
     },
     {
       "epoch": 36.666666666666664,
+      "grad_norm": 0.34375,
       "learning_rate": 0.00014067366430758004,
+      "loss": 9.8995,
       "step": 110
     },
     {
       "epoch": 40.0,
+      "grad_norm": 0.345703125,
       "learning_rate": 0.00013090169943749476,
+      "loss": 9.859,
       "step": 120
     },
     {
       "epoch": 43.333333333333336,
+      "grad_norm": 0.34765625,
       "learning_rate": 0.00012079116908177593,
+      "loss": 9.8216,
       "step": 130
     },
     {
       "epoch": 46.666666666666664,
+      "grad_norm": 0.3515625,
       "learning_rate": 0.00011045284632676536,
+      "loss": 9.7872,
       "step": 140
     },
     {
       "epoch": 50.0,
+      "grad_norm": 0.357421875,
       "learning_rate": 0.0001,
+      "loss": 9.7569,
       "step": 150
     },
     {
       "epoch": 53.333333333333336,
+      "grad_norm": 0.35546875,
       "learning_rate": 8.954715367323468e-05,
+      "loss": 9.7325,
       "step": 160
     },
     {
       "epoch": 56.666666666666664,
+      "grad_norm": 0.359375,
       "learning_rate": 7.920883091822408e-05,
+      "loss": 9.712,
       "step": 170
     },
     {
       "epoch": 60.0,
+      "grad_norm": 0.359375,
       "learning_rate": 6.909830056250527e-05,
+      "loss": 9.697,
       "step": 180
     },
     {
       "epoch": 63.333333333333336,
+      "grad_norm": 0.361328125,
       "learning_rate": 5.9326335692419995e-05,
+      "loss": 9.6841,
       "step": 190
     },
     {
       "epoch": 66.66666666666667,
+      "grad_norm": 0.361328125,
       "learning_rate": 5.000000000000002e-05,
+      "loss": 9.6746,
       "step": 200
     },
     {
       "epoch": 66.66666666666667,
+      "eval_loss": 9.681697845458984,
+      "eval_runtime": 0.8666,
+      "eval_samples_per_second": 1732.155,
+      "eval_steps_per_second": 6.924,
       "step": 200
     },
     {
       "epoch": 70.0,
+      "grad_norm": 0.36328125,
       "learning_rate": 4.12214747707527e-05,
+      "loss": 9.6678,
       "step": 210
     },
     {
       "epoch": 73.33333333333333,
+      "grad_norm": 0.36328125,
       "learning_rate": 3.308693936411421e-05,
+      "loss": 9.6641,
       "step": 220
     },
     {
       "epoch": 76.66666666666667,
+      "grad_norm": 0.36328125,
       "learning_rate": 2.5685517452260567e-05,
+      "loss": 9.6616,
       "step": 230
     },
     {
       "epoch": 80.0,
+      "grad_norm": 0.36328125,
       "learning_rate": 1.9098300562505266e-05,
+      "loss": 9.6605,
       "step": 240
     },
     {
       "epoch": 83.33333333333333,
+      "grad_norm": 0.365234375,
       "learning_rate": 1.339745962155613e-05,
+      "loss": 9.6596,
       "step": 250
     },
     {
       "epoch": 86.66666666666667,
+      "grad_norm": 0.36328125,
       "learning_rate": 8.645454235739903e-06,
+      "loss": 9.6597,
       "step": 260
     },
     {
       "epoch": 90.0,
+      "grad_norm": 0.36328125,
       "learning_rate": 4.8943483704846475e-06,
+      "loss": 9.6595,
       "step": 270
     },
     {
       "epoch": 93.33333333333333,
+      "grad_norm": 0.361328125,
       "learning_rate": 2.1852399266194314e-06,
+      "loss": 9.6595,
       "step": 280
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 458257575444480.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d65002d728f63d399f76fb4dbdab764ee962cbce3847fa016dc718e88306dd24
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5805d33db3bd7bbd9181584d8158e2b8616e3f18d56ec44b12ac78d4f859a79
 size 6840