edpowers commited on
Commit
bdfb628
·
verified ·
1 Parent(s): b027ace

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - alignment-handbook
6
  - generated_from_trainer
7
  - trl
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
+ - text-generation
6
  - alignment-handbook
7
  - generated_from_trainer
8
  - trl
adapter_config.json CHANGED
@@ -20,11 +20,11 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
 
24
  "o_proj",
25
  "q_proj",
26
- "v_proj",
27
- "k_proj"
28
  ],
29
  "task_type": "CAUSAL_LM",
30
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
+ "k_proj",
25
  "o_proj",
26
  "q_proj",
27
+ "gate_proj"
 
28
  ],
29
  "task_type": "CAUSAL_LM",
30
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68fa615f2e2431c2a4128c632f3e5b71376ad7a9a3fbb213177c8693f0e6db6d
3
  size 616639296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb6d6addf9a5964eb8913cf4d298675cb9044b190b16c9f90bc3a3143890a304
3
  size 616639296
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 5.9171597633136095,
3
  "eval_loss": 1.1831614971160889,
4
- "eval_runtime": 5.5457,
5
  "eval_samples": 169,
6
- "eval_samples_per_second": 16.049,
7
- "eval_steps_per_second": 2.164,
8
- "total_flos": 1.75885655212032e+17,
9
- "train_loss": 0.2793775268793106,
10
- "train_runtime": 1218.5958,
11
  "train_samples": 1346,
12
- "train_samples_per_second": 3.282,
13
- "train_steps_per_second": 0.821
14
  }
 
1
  {
2
+ "epoch": 5.923076923076923,
3
  "eval_loss": 1.1831614971160889,
4
+ "eval_runtime": 5.4341,
5
  "eval_samples": 169,
6
+ "eval_samples_per_second": 16.378,
7
+ "eval_steps_per_second": 2.208,
8
+ "total_flos": 1.7606154086724403e+17,
9
+ "train_loss": 4.579017792905604e-05,
10
+ "train_runtime": 1.6348,
11
  "train_samples": 1346,
12
+ "train_samples_per_second": 2446.747,
13
+ "train_steps_per_second": 611.687
14
  }
config.json CHANGED
@@ -4,8 +4,8 @@
4
  "MistralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 1,
8
- "eos_token_id": 2,
9
  "hidden_act": "silu",
10
  "hidden_size": 4096,
11
  "initializer_range": 0.02,
@@ -15,11 +15,12 @@
15
  "num_attention_heads": 32,
16
  "num_hidden_layers": 32,
17
  "num_key_value_heads": 8,
 
18
  "rms_norm_eps": 1e-05,
19
  "rope_theta": 1000000.0,
20
  "sliding_window": null,
21
  "tie_word_embeddings": false,
22
- "torch_dtype": "float32",
23
  "transformers_version": "4.40.0",
24
  "use_cache": true,
25
  "vocab_size": 32002
 
4
  "MistralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 32000,
8
+ "eos_token_id": 32001,
9
  "hidden_act": "silu",
10
  "hidden_size": 4096,
11
  "initializer_range": 0.02,
 
15
  "num_attention_heads": 32,
16
  "num_hidden_layers": 32,
17
  "num_key_value_heads": 8,
18
+ "pad_token_id": 32001,
19
  "rms_norm_eps": 1e-05,
20
  "rope_theta": 1000000.0,
21
  "sliding_window": null,
22
  "tie_word_embeddings": false,
23
+ "torch_dtype": "float16",
24
  "transformers_version": "4.40.0",
25
  "use_cache": true,
26
  "vocab_size": 32002
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.9171597633136095,
3
  "eval_loss": 1.1831614971160889,
4
- "eval_runtime": 5.5457,
5
  "eval_samples": 169,
6
- "eval_samples_per_second": 16.049,
7
- "eval_steps_per_second": 2.164
8
  }
 
1
  {
2
+ "epoch": 5.923076923076923,
3
  "eval_loss": 1.1831614971160889,
4
+ "eval_runtime": 5.4341,
5
  "eval_samples": 169,
6
+ "eval_samples_per_second": 16.378,
7
+ "eval_steps_per_second": 2.208
8
  }
special_tokens_map.json CHANGED
@@ -1,19 +1,23 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": "</s>",
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>",
21
  "unk_token": {
22
  "content": "<unk>",
23
  "lstrip": false,
tokenizer.json CHANGED
@@ -32,21 +32,21 @@
32
  },
33
  {
34
  "id": 32000,
35
- "content": "<new_token1>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": true,
40
- "special": false
41
  },
42
  {
43
  "id": 32001,
44
- "content": "<new_token2>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": true,
49
- "special": false
50
  }
51
  ],
52
  "normalizer": {
 
32
  },
33
  {
34
  "id": 32000,
35
+ "content": "<|im_start|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
  },
42
  {
43
  "id": 32001,
44
+ "content": "<|im_end|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
  }
51
  ],
52
  "normalizer": {
tokenizer_config.json CHANGED
@@ -27,30 +27,33 @@
27
  "special": true
28
  },
29
  "32000": {
30
- "content": "<new_token1>",
31
  "lstrip": false,
32
- "normalized": true,
33
  "rstrip": false,
34
  "single_word": false,
35
- "special": false
36
  },
37
  "32001": {
38
- "content": "<new_token2>",
39
  "lstrip": false,
40
- "normalized": true,
41
  "rstrip": false,
42
  "single_word": false,
43
- "special": false
44
  }
45
  },
46
- "additional_special_tokens": [],
47
- "bos_token": "<s>",
48
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
 
 
 
49
  "clean_up_tokenization_spaces": false,
50
- "eos_token": "</s>",
51
  "legacy": true,
52
  "model_max_length": 1000000000000000019884624838656,
53
- "pad_token": "</s>",
54
  "sp_model_kwargs": {},
55
  "spaces_between_special_tokens": false,
56
  "tokenizer_class": "LlamaTokenizer",
 
27
  "special": true
28
  },
29
  "32000": {
30
+ "content": "<|im_start|>",
31
  "lstrip": false,
32
+ "normalized": false,
33
  "rstrip": false,
34
  "single_word": false,
35
+ "special": true
36
  },
37
  "32001": {
38
+ "content": "<|im_end|>",
39
  "lstrip": false,
40
+ "normalized": false,
41
  "rstrip": false,
42
  "single_word": false,
43
+ "special": true
44
  }
45
  },
46
+ "additional_special_tokens": [
47
+ "<|im_start|>",
48
+ "<|im_end|>"
49
+ ],
50
+ "bos_token": "<|im_start|>",
51
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
52
  "clean_up_tokenization_spaces": false,
53
+ "eos_token": "<|im_end|>",
54
  "legacy": true,
55
  "model_max_length": 1000000000000000019884624838656,
56
+ "pad_token": "<|im_end|>",
57
  "sp_model_kwargs": {},
58
  "spaces_between_special_tokens": false,
59
  "tokenizer_class": "LlamaTokenizer",
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.9171597633136095,
3
- "total_flos": 1.75885655212032e+17,
4
- "train_loss": 0.2793775268793106,
5
- "train_runtime": 1218.5958,
6
  "train_samples": 1346,
7
- "train_samples_per_second": 3.282,
8
- "train_steps_per_second": 0.821
9
  }
 
1
  {
2
+ "epoch": 5.923076923076923,
3
+ "total_flos": 1.7606154086724403e+17,
4
+ "train_loss": 4.579017792905604e-05,
5
+ "train_runtime": 1.6348,
6
  "train_samples": 1346,
7
+ "train_samples_per_second": 2446.747,
8
+ "train_steps_per_second": 611.687
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.9171597633136095,
5
  "eval_steps": 25,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -609,13 +609,13 @@
609
  "step": 1000
610
  },
611
  {
612
- "epoch": 5.9171597633136095,
613
- "step": 1000,
614
- "total_flos": 1.75885655212032e+17,
615
- "train_loss": 0.2793775268793106,
616
- "train_runtime": 1218.5958,
617
- "train_samples_per_second": 3.282,
618
- "train_steps_per_second": 0.821
619
  }
620
  ],
621
  "logging_steps": 25,
@@ -623,7 +623,7 @@
623
  "num_input_tokens_seen": 0,
624
  "num_train_epochs": 6,
625
  "save_steps": 25,
626
- "total_flos": 1.75885655212032e+17,
627
  "train_batch_size": 1,
628
  "trial_name": null,
629
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.923076923076923,
5
  "eval_steps": 25,
6
+ "global_step": 1001,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
609
  "step": 1000
610
  },
611
  {
612
+ "epoch": 5.923076923076923,
613
+ "step": 1001,
614
+ "total_flos": 1.7606154086724403e+17,
615
+ "train_loss": 4.579017792905604e-05,
616
+ "train_runtime": 1.6348,
617
+ "train_samples_per_second": 2446.747,
618
+ "train_steps_per_second": 611.687
619
  }
620
  ],
621
  "logging_steps": 25,
 
623
  "num_input_tokens_seen": 0,
624
  "num_train_epochs": 6,
625
  "save_steps": 25,
626
+ "total_flos": 1.7606154086724403e+17,
627
  "train_batch_size": 1,
628
  "trial_name": null,
629
  "trial_params": null