SystemAdmin123 commited on
Commit
d11fbdc
·
verified ·
1 Parent(s): a92ca8b

Training in progress, step 400

Browse files
added_tokens.json CHANGED
@@ -1,5 +1,13 @@
1
  {
2
- "<|endoftext|>": 151643,
3
- "<|im_end|>": 151645,
4
- "<|im_start|>": 151644
 
 
 
 
 
 
 
 
5
  }
 
1
  {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
  }
axolotl_config.yaml CHANGED
@@ -1,4 +1,4 @@
1
- base_model: EleutherAI/pythia-70m-deduped
2
  batch_size: 32
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
@@ -36,15 +36,13 @@ sample_packing: false
36
  save_steps: 400
37
  save_total_limit: 1
38
  sequence_len: 2048
39
- special_tokens:
40
- pad_token: <|endoftext|>
41
- tokenizer_type: GPTNeoXTokenizerFast
42
  torch_dtype: bf16
43
  trust_remote_code: true
44
  val_set_size: 0.1
45
  wandb_entity: ''
46
  wandb_mode: online
47
- wandb_name: EleutherAI/pythia-70m-deduped-argilla/databricks-dolly-15k-curated-en
48
  wandb_project: Gradients-On-Demand
49
  wandb_run: your_name
50
  wandb_runid: default
 
1
+ base_model: Xenova/tiny-random-Phi3ForCausalLM
2
  batch_size: 32
3
  bf16: true
4
  chat_template: tokenizer_default_fallback_alpaca
 
36
  save_steps: 400
37
  save_total_limit: 1
38
  sequence_len: 2048
39
+ tokenizer_type: LlamaTokenizerFast
 
 
40
  torch_dtype: bf16
41
  trust_remote_code: true
42
  val_set_size: 0.1
43
  wandb_entity: ''
44
  wandb_mode: online
45
+ wandb_name: Xenova/tiny-random-Phi3ForCausalLM-argilla/databricks-dolly-15k-curated-en
46
  wandb_project: Gradients-On-Demand
47
  wandb_run: your_name
48
  wandb_runid: default
config.json CHANGED
@@ -1,32 +1,31 @@
1
  {
2
- "_name_or_path": "EleutherAI/pythia-70m-deduped",
3
  "architectures": [
4
- "GPTNeoXForCausalLM"
5
  ],
6
- "attention_bias": true,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 0,
9
- "classifier_dropout": 0.1,
10
- "eos_token_id": 0,
11
- "hidden_act": "gelu",
12
- "hidden_dropout": 0.0,
13
- "hidden_size": 512,
14
  "initializer_range": 0.02,
15
- "intermediate_size": 2048,
16
- "layer_norm_eps": 1e-05,
17
- "max_position_embeddings": 2048,
18
- "model_type": "gpt_neox",
19
- "num_attention_heads": 8,
20
- "num_hidden_layers": 6,
21
- "partial_rotary_factor": 0.25,
 
 
 
22
  "rope_scaling": null,
23
- "rope_theta": 10000,
24
- "rotary_emb_base": 10000,
25
- "rotary_pct": 0.25,
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
  "transformers_version": "4.48.1",
29
  "use_cache": false,
30
- "use_parallel_residual": true,
31
- "vocab_size": 50278
32
  }
 
1
  {
2
+ "_name_or_path": "Xenova/tiny-random-Phi3ForCausalLM",
3
  "architectures": [
4
+ "Phi3ForCausalLM"
5
  ],
 
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "embd_pdrop": 0.0,
9
+ "eos_token_id": 32000,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 32,
 
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 64,
14
+ "max_position_embeddings": 4096,
15
+ "model_type": "phi3",
16
+ "num_attention_heads": 4,
17
+ "num_hidden_layers": 2,
18
+ "num_key_value_heads": 4,
19
+ "original_max_position_embeddings": 4096,
20
+ "pad_token_id": 32000,
21
+ "resid_pdrop": 0.0,
22
+ "rms_norm_eps": 1e-05,
23
  "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "sliding_window": 2047,
 
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
  "transformers_version": "4.48.1",
29
  "use_cache": false,
30
+ "vocab_size": 32011
 
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87af69528d3a9f5f337cc3e1ceb9b547a6ef7f97a229fb620e469e88f32de172
3
- size 140808752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9887d0a75dcfd98e56a4c42489abac39f5f9adfc1e32e149aaacb6e76bd402cd
3
+ size 4140280
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": {
3
- "content": "<|endoftext|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
@@ -21,7 +21,7 @@
21
  "single_word": false
22
  },
23
  "unk_token": {
24
- "content": "<|endoftext|>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
 
1
  {
2
  "bos_token": {
3
+ "content": "<s>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
 
21
  "single_word": false
22
  },
23
  "unk_token": {
24
+ "content": "<unk>",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c95aac127870725c031582253de8b2d45c9e3e009d893f89bec47a9b26f974aa
3
- size 3564484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b013fe7282b7984bc14b1c64c2a70dd06b652a969810fbba6217f4ac70339f44
3
+ size 3621089
tokenizer_config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "add_bos_token": false,
3
  "add_eos_token": false,
4
- "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
  "0": {
7
- "content": "<|endoftext|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
@@ -12,214 +12,122 @@
12
  "special": true
13
  },
14
  "1": {
15
- "content": "<|padding|>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
21
  },
22
- "50254": {
23
- "content": " ",
24
- "lstrip": false,
25
- "normalized": true,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "50255": {
31
- "content": " ",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": false
37
- },
38
- "50256": {
39
- "content": " ",
40
- "lstrip": false,
41
- "normalized": true,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "50257": {
47
- "content": " ",
48
- "lstrip": false,
49
- "normalized": true,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "50258": {
55
- "content": " ",
56
- "lstrip": false,
57
- "normalized": true,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "50259": {
63
- "content": " ",
64
- "lstrip": false,
65
- "normalized": true,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": false
69
- },
70
- "50260": {
71
- "content": " ",
72
- "lstrip": false,
73
- "normalized": true,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": false
77
- },
78
- "50261": {
79
- "content": " ",
80
- "lstrip": false,
81
- "normalized": true,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": false
85
- },
86
- "50262": {
87
- "content": " ",
88
- "lstrip": false,
89
- "normalized": true,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": false
93
- },
94
- "50263": {
95
- "content": " ",
96
- "lstrip": false,
97
- "normalized": true,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": false
101
- },
102
- "50264": {
103
- "content": " ",
104
- "lstrip": false,
105
- "normalized": true,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": false
109
- },
110
- "50265": {
111
- "content": " ",
112
  "lstrip": false,
113
- "normalized": true,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": false
117
- },
118
- "50266": {
119
- "content": " ",
120
- "lstrip": false,
121
- "normalized": true,
122
- "rstrip": false,
123
  "single_word": false,
124
  "special": false
125
  },
126
- "50267": {
127
- "content": " ",
128
  "lstrip": false,
129
- "normalized": true,
130
  "rstrip": false,
131
  "single_word": false,
132
- "special": false
133
  },
134
- "50268": {
135
- "content": " ",
136
  "lstrip": false,
137
- "normalized": true,
138
- "rstrip": false,
139
  "single_word": false,
140
- "special": false
141
  },
142
- "50269": {
143
- "content": " ",
144
  "lstrip": false,
145
- "normalized": true,
146
- "rstrip": false,
147
  "single_word": false,
148
- "special": false
149
  },
150
- "50270": {
151
- "content": " ",
152
  "lstrip": false,
153
- "normalized": true,
154
- "rstrip": false,
155
  "single_word": false,
156
- "special": false
157
  },
158
- "50271": {
159
- "content": " ",
160
  "lstrip": false,
161
- "normalized": true,
162
- "rstrip": false,
163
  "single_word": false,
164
- "special": false
165
  },
166
- "50272": {
167
- "content": " ",
168
  "lstrip": false,
169
- "normalized": true,
170
- "rstrip": false,
171
  "single_word": false,
172
- "special": false
173
  },
174
- "50273": {
175
- "content": " ",
176
  "lstrip": false,
177
- "normalized": true,
178
- "rstrip": false,
179
  "single_word": false,
180
- "special": false
181
  },
182
- "50274": {
183
- "content": " ",
184
  "lstrip": false,
185
- "normalized": true,
186
- "rstrip": false,
187
  "single_word": false,
188
- "special": false
189
  },
190
- "50275": {
191
- "content": " ",
192
  "lstrip": false,
193
- "normalized": true,
194
- "rstrip": false,
195
  "single_word": false,
196
- "special": false
197
  },
198
- "50276": {
199
- "content": " ",
200
  "lstrip": false,
201
- "normalized": true,
202
- "rstrip": false,
203
  "single_word": false,
204
- "special": false
205
  },
206
- "50277": {
207
- "content": "[PAD]",
208
  "lstrip": false,
209
  "normalized": false,
210
- "rstrip": false,
211
  "single_word": false,
212
  "special": true
213
  }
214
  },
215
- "bos_token": "<|endoftext|>",
216
- "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}",
217
  "clean_up_tokenization_spaces": false,
218
  "eos_token": "<|endoftext|>",
219
  "extra_special_tokens": {},
220
- "model_max_length": 1000000000000000019884624838656,
 
221
  "pad_token": "<|endoftext|>",
222
- "tokenizer_class": "GPTNeoXTokenizer",
223
- "unk_token": "<|endoftext|>",
 
 
 
224
  "use_fast": true
225
  }
 
1
  {
2
+ "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
+ "content": "<unk>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
12
  "special": true
13
  },
14
  "1": {
15
+ "content": "<s>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
21
  },
22
+ "2": {
23
+ "content": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": true,
 
 
 
 
 
 
 
 
27
  "single_word": false,
28
  "special": false
29
  },
30
+ "32000": {
31
+ "content": "<|endoftext|>",
32
  "lstrip": false,
33
+ "normalized": false,
34
  "rstrip": false,
35
  "single_word": false,
36
+ "special": true
37
  },
38
+ "32001": {
39
+ "content": "<|assistant|>",
40
  "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": true,
43
  "single_word": false,
44
+ "special": true
45
  },
46
+ "32002": {
47
+ "content": "<|placeholder1|>",
48
  "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": true,
51
  "single_word": false,
52
+ "special": true
53
  },
54
+ "32003": {
55
+ "content": "<|placeholder2|>",
56
  "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": true,
59
  "single_word": false,
60
+ "special": true
61
  },
62
+ "32004": {
63
+ "content": "<|placeholder3|>",
64
  "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": true,
67
  "single_word": false,
68
+ "special": true
69
  },
70
+ "32005": {
71
+ "content": "<|placeholder4|>",
72
  "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": true,
75
  "single_word": false,
76
+ "special": true
77
  },
78
+ "32006": {
79
+ "content": "<|system|>",
80
  "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": true,
83
  "single_word": false,
84
+ "special": true
85
  },
86
+ "32007": {
87
+ "content": "<|end|>",
88
  "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": true,
91
  "single_word": false,
92
+ "special": true
93
  },
94
+ "32008": {
95
+ "content": "<|placeholder5|>",
96
  "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": true,
99
  "single_word": false,
100
+ "special": true
101
  },
102
+ "32009": {
103
+ "content": "<|placeholder6|>",
104
  "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": true,
107
  "single_word": false,
108
+ "special": true
109
  },
110
+ "32010": {
111
+ "content": "<|user|>",
112
  "lstrip": false,
113
  "normalized": false,
114
+ "rstrip": true,
115
  "single_word": false,
116
  "special": true
117
  }
118
  },
119
+ "bos_token": "<s>",
120
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "extra_special_tokens": {},
124
+ "legacy": true,
125
+ "model_max_length": 4096,
126
  "pad_token": "<|endoftext|>",
127
+ "padding_side": "left",
128
+ "sp_model_kwargs": {},
129
+ "tokenizer_class": "LlamaTokenizer",
130
+ "unk_token": "<unk>",
131
+ "use_default_system_prompt": false,
132
  "use_fast": true
133
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cb39bdcd9027f9839c24593aa51e9c2c6db52de8ff1d60ef6eace2d38a2b7bf
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffd459aa83253f73e371e129574dd0434ee79c2c18eb103b0e4428a34062eb2
3
  size 6840