SystemAdmin123 commited on
Commit
311c564
·
verified ·
1 Parent(s): e9fb6d1

Training in progress, step 40

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "JackFram/llama-68m",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "q_proj",
 
27
  "gate_proj",
 
28
  "down_proj",
29
- "o_proj",
30
- "up_proj",
31
  "k_proj",
32
- "v_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "peft-internal-testing/tiny-dummy-qwen2",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "v_proj",
27
+ "o_proj",
28
  "gate_proj",
29
+ "q_proj",
30
  "down_proj",
 
 
31
  "k_proj",
32
+ "up_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c65441d6870d9a0279debad33f958407443b27e597864c7d0e071f8707f2cd6c
3
- size 18091656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:159277feec320a7592284f9e3696651e378c75cb98a9616d5fc9a41efc0eec40
3
+ size 183784
added_tokens.json CHANGED
@@ -1,4 +1,5 @@
1
  {
2
- "<|im_end|>": 32000,
3
- "<|im_start|>": 32001
 
4
  }
 
1
  {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
  }
axolotl_config.yaml CHANGED
@@ -1,5 +1,6 @@
1
- base_model: unsloth/OpenHermes-2.5-Mistral-7B
2
- batch_size: 32
 
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
5
  datasets:
@@ -15,36 +16,42 @@ datasets:
15
  system_prompt: ''
16
  device_map: auto
17
  eval_sample_packing: false
18
- eval_steps: 200
19
  flash_attention: true
20
  gradient_checkpointing: true
21
  group_by_length: true
22
  hub_model_id: SystemAdmin123/test-repo
23
  hub_strategy: checkpoint
24
- learning_rate: 0.0002
25
  logging_steps: 10
 
 
 
 
26
  lr_scheduler: cosine
27
- max_steps: 10000
28
- micro_batch_size: 2
29
  model_type: AutoModelForCausalLM
30
- num_epochs: 100
31
  optimizer: adamw_bnb_8bit
32
  output_dir: /root/.sn56/axolotl/tmp/test-repo
33
  pad_to_sequence_len: true
34
  resize_token_embeddings_to_32x: false
35
  sample_packing: true
36
- save_steps: 200
37
  save_total_limit: 1
38
  sequence_len: 2048
39
- tokenizer_type: LlamaTokenizerFast
40
  torch_dtype: bf16
41
  training_args_kwargs:
 
42
  hub_private_repo: true
 
43
  trust_remote_code: true
44
- val_set_size: 0.1
45
  wandb_entity: ''
46
  wandb_mode: online
47
- wandb_name: unsloth/OpenHermes-2.5-Mistral-7B-argilla/databricks-dolly-15k-curated-en
48
  wandb_project: Gradients-On-Demand
49
  wandb_run: your_name
50
  wandb_runid: default
 
1
+ adapter: lora
2
+ base_model: peft-internal-testing/tiny-dummy-qwen2
3
+ batch_size: 64
4
  bf16: true
5
  chat_template: tokenizer_default_fallback_alpaca
6
  datasets:
 
16
  system_prompt: ''
17
  device_map: auto
18
  eval_sample_packing: false
19
+ eval_steps: 0.1
20
  flash_attention: true
21
  gradient_checkpointing: true
22
  group_by_length: true
23
  hub_model_id: SystemAdmin123/test-repo
24
  hub_strategy: checkpoint
25
+ learning_rate: 0.0001
26
  logging_steps: 10
27
+ lora_alpha: 256
28
+ lora_dropout: 0.1
29
+ lora_r: 128
30
+ lora_target_linear: true
31
  lr_scheduler: cosine
32
+ max_steps: 160.0
33
+ micro_batch_size: 7
34
  model_type: AutoModelForCausalLM
35
+ num_epochs: 10000
36
  optimizer: adamw_bnb_8bit
37
  output_dir: /root/.sn56/axolotl/tmp/test-repo
38
  pad_to_sequence_len: true
39
  resize_token_embeddings_to_32x: false
40
  sample_packing: true
41
+ save_steps: 40
42
  save_total_limit: 1
43
  sequence_len: 2048
44
+ tokenizer_type: Qwen2TokenizerFast
45
  torch_dtype: bf16
46
  training_args_kwargs:
47
+ disable_tqdm: true
48
  hub_private_repo: true
49
+ save_only_model: true
50
  trust_remote_code: true
51
+ val_set_size: 0.01
52
  wandb_entity: ''
53
  wandb_mode: online
54
+ wandb_name: peft-internal-testing/tiny-dummy-qwen2-argilla/databricks-dolly-15k-curated-en
55
  wandb_project: Gradients-On-Demand
56
  wandb_run: your_name
57
  wandb_runid: default
config.json CHANGED
@@ -1,28 +1,29 @@
1
  {
2
- "_name_or_path": "unsloth/OpenHermes-2.5-Mistral-7B",
 
3
  "architectures": [
4
- "MistralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 1,
8
- "eos_token_id": 32000,
9
- "head_dim": 128,
10
  "hidden_act": "silu",
11
- "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 14336,
14
  "max_position_embeddings": 32768,
15
- "model_type": "mistral",
16
- "num_attention_heads": 32,
17
- "num_hidden_layers": 32,
18
- "num_key_value_heads": 8,
19
- "pad_token_id": 0,
20
- "rms_norm_eps": 1e-05,
21
- "rope_theta": 10000.0,
22
- "sliding_window": 4096,
23
- "tie_word_embeddings": false,
24
- "torch_dtype": "bfloat16",
25
- "transformers_version": "4.48.1",
 
26
  "use_cache": false,
27
- "vocab_size": 32002
 
28
  }
 
1
  {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "peft-internal-testing/tiny-dummy-qwen2",
4
  "architectures": [
5
+ "Qwen2ForCausalLM"
6
  ],
7
  "attention_dropout": 0.0,
8
+ "eos_token_id": 151643,
 
 
9
  "hidden_act": "silu",
10
+ "hidden_size": 8,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 32,
13
  "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 4,
17
+ "num_hidden_layers": 2,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.47.1",
26
  "use_cache": false,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 151936
29
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,27 +1,17 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
  "eos_token": {
10
- "content": "<|im_end|>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "unk_token": {
24
- "content": "<unk>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
 
 
 
6
  "eos_token": {
7
+ "content": "<|endoftext|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
  "pad_token": {
14
+ "content": "<|endoftext|>",
 
 
 
 
 
 
 
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04222cd76979c181cd3f72c3bf6982fe2a09d9f4b8f23d82902efde18f1d0668
3
- size 3506125
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
+ size 11418266
tokenizer_config.json CHANGED
@@ -1,64 +1,45 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": true,
5
  "added_tokens_decoder": {
6
- "0": {
7
- "content": "<unk>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
13
  },
14
- "1": {
15
- "content": "<s>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "2": {
23
- "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
27
  "single_word": false,
28
  "special": true
29
  },
30
- "32000": {
31
  "content": "<|im_end|>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false,
36
  "special": true
37
- },
38
- "32001": {
39
- "content": "<|im_start|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": true
45
  }
46
  },
47
- "additional_special_tokens": [],
48
- "bos_token": "<s>",
 
 
 
49
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
50
  "clean_up_tokenization_spaces": false,
51
- "eos_token": "<|im_end|>",
 
52
  "extra_special_tokens": {},
53
- "legacy": true,
54
  "model_max_length": 32768,
55
- "pad_token": "<unk>",
56
- "padding_side": "right",
57
- "sp_model_kwargs": {},
58
- "spaces_between_special_tokens": false,
59
- "tokenizer_class": "LlamaTokenizer",
60
- "trust_remote_code": false,
61
- "unk_token": "<unk>",
62
- "use_default_system_prompt": true,
63
  "use_fast": true
64
  }
 
1
  {
2
+ "add_prefix_space": false,
 
 
3
  "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "151644": {
13
+ "content": "<|im_start|>",
 
 
 
 
 
 
 
 
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "151645": {
21
  "content": "<|im_end|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
 
 
 
 
 
 
 
 
27
  }
28
  },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|endoftext|>",
37
+ "errors": "replace",
38
  "extra_special_tokens": {},
 
39
  "model_max_length": 32768,
40
+ "pad_token": "<|endoftext|>",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null,
 
 
 
 
44
  "use_fast": true
45
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3eb8b3b2a8acd873a0906e2cee7771ee216bb2230a5edc2d984d58604ec971
3
- size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37476a182ec272b4a6a6254cda6d0ce55ff7d3844823bf9121f7eccf202f4723
3
+ size 6648
vocab.json CHANGED
The diff for this file is too large to render. See raw diff