End of training

Browse files

Files changed (4) hide show

README.md +3 -1
all_results.json +6 -6
train_results.json +6 -6
trainer_state.json +88 -10

README.md CHANGED Viewed

@@ -2,6 +2,8 @@
 base_model: mock_training_run/llama_configs/config.json
 tags:
 - generated_from_trainer
 model-index:
 - name: llama-wikitext
   results: []
@@ -12,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
 # llama-wikitext
-This model is a fine-tuned version of [mock_training_run/llama_configs/config.json](https://huggingface.co/mock_training_run/llama_configs/config.json) on the None dataset.
 ## Model description

 base_model: mock_training_run/llama_configs/config.json
 tags:
 - generated_from_trainer
+datasets:
+- wikitext
 model-index:
 - name: llama-wikitext
   results: []
 # llama-wikitext
+This model is a fine-tuned version of [mock_training_run/llama_configs/config.json](https://huggingface.co/mock_training_run/llama_configs/config.json) on the wikitext wikitext-103-v1 dataset.
 ## Model description

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.76,
-    "train_loss": 10.722885131835938,
-    "train_runtime": 23.7585,
-    "train_samples": 1343,
-    "train_samples_per_second": 56.527,
-    "train_steps_per_second": 0.084
 }

 {
+    "epoch": 1.0,
+    "train_loss": 5.974732427886038,
+    "train_runtime": 1543.433,
+    "train_samples": 67643,
+    "train_samples_per_second": 43.826,
+    "train_steps_per_second": 0.086
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 0.76,
-    "train_loss": 10.722885131835938,
-    "train_runtime": 23.7585,
-    "train_samples": 1343,
-    "train_samples_per_second": 56.527,
-    "train_steps_per_second": 0.084
 }

 {
+    "epoch": 1.0,
+    "train_loss": 5.974732427886038,
+    "train_runtime": 1543.433,
+    "train_samples": 67643,
+    "train_samples_per_second": 43.826,
+    "train_steps_per_second": 0.086
 }

trainer_state.json CHANGED Viewed

@@ -1,28 +1,106 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7619047619047619,
   "eval_steps": 100.0,
-  "global_step": 2,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.76,
-      "step": 2,
-      "total_flos": 4916124793700352.0,
-      "train_loss": 10.722885131835938,
-      "train_runtime": 23.7585,
-      "train_samples_per_second": 56.527,
-      "train_steps_per_second": 0.084
     }
   ],
   "logging_steps": 10,
-  "max_steps": 2,
   "num_train_epochs": 1,
   "save_steps": 100,
-  "total_flos": 4916124793700352.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9990539262062441,
   "eval_steps": 100.0,
+  "global_step": 132,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
+    {
+      "epoch": 0.08,
+      "learning_rate": 0.00029279999999999996,
+      "loss": 9.0737,
+      "step": 10
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 0.0002688,
+      "loss": 7.1389,
+      "step": 20
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 0.0002448,
+      "loss": 6.6649,
+      "step": 30
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 0.00022079999999999997,
+      "loss": 6.3274,
+      "step": 40
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 0.00019679999999999999,
+      "loss": 6.0455,
+      "step": 50
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 0.00017279999999999997,
+      "loss": 5.7722,
+      "step": 60
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 0.00014879999999999998,
+      "loss": 5.6097,
+      "step": 70
+    },
+    {
+      "epoch": 0.61,
+      "learning_rate": 0.00012479999999999997,
+      "loss": 5.4476,
+      "step": 80
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 0.0001008,
+      "loss": 5.3298,
+      "step": 90
+    },
     {
       "epoch": 0.76,
+      "learning_rate": 7.68e-05,
+      "loss": 5.2202,
+      "step": 100
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 5.279999999999999e-05,
+      "loss": 5.1345,
+      "step": 110
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 2.88e-05,
+      "loss": 5.067,
+      "step": 120
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 4.8e-06,
+      "loss": 5.0283,
+      "step": 130
+    },
+    {
+      "epoch": 1.0,
+      "step": 132,
+      "total_flos": 3.244642363842232e+17,
+      "train_loss": 5.974732427886038,
+      "train_runtime": 1543.433,
+      "train_samples_per_second": 43.826,
+      "train_steps_per_second": 0.086
     }
   ],
   "logging_steps": 10,
+  "max_steps": 132,
   "num_train_epochs": 1,
   "save_steps": 100,
+  "total_flos": 3.244642363842232e+17,
   "trial_name": null,
   "trial_params": null
 }