osbm commited on May 14, 2024

Commit

d66234e

verified ·

1 Parent(s): a417fba

Upload 19 files

Browse files

Files changed (19) hide show

README.md +21 -0
adapter_config.json +21 -0
adapter_model.bin +3 -0
checkpoint-300/README.md +21 -0
checkpoint-300/adapter_config.json +21 -0
checkpoint-300/adapter_model.bin +3 -0
checkpoint-300/optimizer.pt +3 -0
checkpoint-300/rng_state.pth +3 -0
checkpoint-300/scheduler.pt +3 -0
checkpoint-300/trainer_state.json +103 -0
checkpoint-300/training_args.bin +3 -0
checkpoint-350/README.md +21 -0
checkpoint-350/adapter_config.json +21 -0
checkpoint-350/adapter_model.bin +3 -0
checkpoint-350/optimizer.pt +3 -0
checkpoint-350/rng_state.pth +3 -0
checkpoint-350/scheduler.pt +3 -0
checkpoint-350/trainer_state.json +117 -0
checkpoint-350/training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: QuantizationMethod.BITS_AND_BYTES
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+- PEFT 0.5.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "huggyllama/llama-13b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89d68f68c57d2285a9842467dd248325fb228d9b6e738064665722a44ad36140
+size 26272202

checkpoint-300/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: QuantizationMethod.BITS_AND_BYTES
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+- PEFT 0.5.0

checkpoint-300/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "huggyllama/llama-13b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-300/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:208852bcd4c4d2e15939df532804de08a937f6aa6984d7a05cfdd0c06aea1b58
+size 26272202

checkpoint-300/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72497e178149970851e1fef8c420af9e8947d73b6f5e58a02a84ffe012367801
+size 52563258

checkpoint-300/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:875a034eff9c5d594adbe7a3c1a892b41caa763dfe833b6b76500f340b834ef1
+size 14244

checkpoint-300/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8b1de87f0ba3d95cc748fb3c6441e806561235c4d56857c438a531c9bda5144
+size 1064

checkpoint-300/trainer_state.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.4423337856173677,
+  "eval_steps": 50,
+  "global_step": 300,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.000980951231800518,
+      "loss": 1.7983,
+      "step": 50
+    },
+    {
+      "epoch": 0.41,
+      "eval_loss": 1.6800895929336548,
+      "eval_runtime": 79.4837,
+      "eval_samples_per_second": 10.304,
+      "eval_steps_per_second": 1.032,
+      "step": 50
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 0.0008643535534997409,
+      "loss": 1.648,
+      "step": 100
+    },
+    {
+      "epoch": 0.81,
+      "eval_loss": 1.6579192876815796,
+      "eval_runtime": 80.5139,
+      "eval_samples_per_second": 10.172,
+      "eval_steps_per_second": 1.018,
+      "step": 100
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 0.000666265691928808,
+      "loss": 1.6202,
+      "step": 150
+    },
+    {
+      "epoch": 1.22,
+      "eval_loss": 1.6476719379425049,
+      "eval_runtime": 81.2124,
+      "eval_samples_per_second": 10.085,
+      "eval_steps_per_second": 1.01,
+      "step": 150
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.00043099136249808665,
+      "loss": 1.5998,
+      "step": 200
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.6405467987060547,
+      "eval_runtime": 80.4615,
+      "eval_samples_per_second": 10.179,
+      "eval_steps_per_second": 1.019,
+      "step": 200
+    },
+    {
+      "epoch": 2.04,
+      "learning_rate": 0.00021115129043425184,
+      "loss": 1.5781,
+      "step": 250
+    },
+    {
+      "epoch": 2.04,
+      "eval_loss": 1.6351187229156494,
+      "eval_runtime": 79.4429,
+      "eval_samples_per_second": 10.309,
+      "eval_steps_per_second": 1.032,
+      "step": 250
+    },
+    {
+      "epoch": 2.44,
+      "learning_rate": 5.591422293498632e-05,
+      "loss": 1.5429,
+      "step": 300
+    },
+    {
+      "epoch": 2.44,
+      "eval_loss": 1.6372405290603638,
+      "eval_runtime": 80.2927,
+      "eval_samples_per_second": 10.2,
+      "eval_steps_per_second": 1.021,
+      "step": 300
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 350,
+  "num_train_epochs": 3,
+  "save_steps": 50,
+  "total_flos": 8.421150573099418e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-300/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da97d3940bba432b5b7b3e63fb89cd8284ed694adc54fd644c3d3775609b9ebd
+size 4408

checkpoint-350/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: QuantizationMethod.BITS_AND_BYTES
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+- PEFT 0.5.0

checkpoint-350/adapter_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "huggyllama/llama-13b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-350/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89d68f68c57d2285a9842467dd248325fb228d9b6e738064665722a44ad36140
+size 26272202

checkpoint-350/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:085ff8fe880f9bfbdc06c00129e893c6c1903227d9da354da01d7a868dd81e0f
+size 52563258

checkpoint-350/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11680d9405f4b2dda071b535e466403b2d487e48706b90da952a3b982a054b1f
+size 14244

checkpoint-350/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6dd94ce3103f26f5db9c664dceaf453ad6516fd669277d7c50dc5cef40087e2
+size 1064

checkpoint-350/trainer_state.json ADDED Viewed

	@@ -0,0 +1,117 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.8493894165535956,
+  "eval_steps": 50,
+  "global_step": 350,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.41,
+      "learning_rate": 0.000980951231800518,
+      "loss": 1.7983,
+      "step": 50
+    },
+    {
+      "epoch": 0.41,
+      "eval_loss": 1.6800895929336548,
+      "eval_runtime": 79.4837,
+      "eval_samples_per_second": 10.304,
+      "eval_steps_per_second": 1.032,
+      "step": 50
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 0.0008643535534997409,
+      "loss": 1.648,
+      "step": 100
+    },
+    {
+      "epoch": 0.81,
+      "eval_loss": 1.6579192876815796,
+      "eval_runtime": 80.5139,
+      "eval_samples_per_second": 10.172,
+      "eval_steps_per_second": 1.018,
+      "step": 100
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 0.000666265691928808,
+      "loss": 1.6202,
+      "step": 150
+    },
+    {
+      "epoch": 1.22,
+      "eval_loss": 1.6476719379425049,
+      "eval_runtime": 81.2124,
+      "eval_samples_per_second": 10.085,
+      "eval_steps_per_second": 1.01,
+      "step": 150
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 0.00043099136249808665,
+      "loss": 1.5998,
+      "step": 200
+    },
+    {
+      "epoch": 1.63,
+      "eval_loss": 1.6405467987060547,
+      "eval_runtime": 80.4615,
+      "eval_samples_per_second": 10.179,
+      "eval_steps_per_second": 1.019,
+      "step": 200
+    },
+    {
+      "epoch": 2.04,
+      "learning_rate": 0.00021115129043425184,
+      "loss": 1.5781,
+      "step": 250
+    },
+    {
+      "epoch": 2.04,
+      "eval_loss": 1.6351187229156494,
+      "eval_runtime": 79.4429,
+      "eval_samples_per_second": 10.309,
+      "eval_steps_per_second": 1.032,
+      "step": 250
+    },
+    {
+      "epoch": 2.44,
+      "learning_rate": 5.591422293498632e-05,
+      "loss": 1.5429,
+      "step": 300
+    },
+    {
+      "epoch": 2.44,
+      "eval_loss": 1.6372405290603638,
+      "eval_runtime": 80.2927,
+      "eval_samples_per_second": 10.2,
+      "eval_steps_per_second": 1.021,
+      "step": 300
+    },
+    {
+      "epoch": 2.85,
+      "learning_rate": 0.0,
+      "loss": 1.5488,
+      "step": 350
+    },
+    {
+      "epoch": 2.85,
+      "eval_loss": 1.6368159055709839,
+      "eval_runtime": 80.9154,
+      "eval_samples_per_second": 10.122,
+      "eval_steps_per_second": 1.013,
+      "step": 350
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 350,
+  "num_train_epochs": 3,
+  "save_steps": 50,
+  "total_flos": 9.826726868965786e+17,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-350/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da97d3940bba432b5b7b3e63fb89cd8284ed694adc54fd644c3d3775609b9ebd
+size 4408