minpeter commited on
Commit
e236499
·
verified ·
1 Parent(s): 5b04ab9

Training in progress, step 1000

Browse files
config.json CHANGED
@@ -4,20 +4,19 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 200000,
8
- "eos_token_id": 201136,
9
- "head_dim": 128,
10
  "hidden_act": "silu",
11
- "hidden_size": 1024,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 2752,
14
  "max_position_embeddings": 2048,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
- "num_attention_heads": 8,
18
- "num_hidden_layers": 12,
19
- "num_key_value_heads": 8,
20
- "pad_token_id": 200018,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-06,
23
  "rope_scaling": null,
@@ -26,5 +25,5 @@
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.51.3",
28
  "use_cache": false,
29
- "vocab_size": 201145
30
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 128256,
8
+ "eos_token_id": 128257,
9
+ "head_dim": 64,
10
  "hidden_act": "silu",
11
+ "hidden_size": 576,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
  "max_position_embeddings": 2048,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
+ "num_attention_heads": 9,
18
+ "num_hidden_layers": 30,
19
+ "num_key_value_heads": 3,
 
20
  "pretraining_tp": 1,
21
  "rms_norm_eps": 1e-06,
22
  "rope_scaling": null,
 
25
  "torch_dtype": "float32",
26
  "transformers_version": "4.51.3",
27
  "use_cache": false,
28
+ "vocab_size": 128268
29
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e198627cafd4909f26df24b47da1232b6a3ade8aa35236ad2a5215236a13162
3
- size 2255020440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0a8f14efc2f0a49f278cf4e8cff1666bcf2acf6cad8d1c11de34c006346398
3
+ size 1015903256
special_tokens_map.json CHANGED
@@ -2,6 +2,7 @@
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
  "<|im_end|>",
 
5
  "<tools>",
6
  "</tools>",
7
  "<tool_call>",
@@ -12,7 +13,7 @@
12
  "</think>"
13
  ],
14
  "bos_token": {
15
- "content": "<|begin_of_text|>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
@@ -25,11 +26,5 @@
25
  "rstrip": false,
26
  "single_word": false
27
  },
28
- "pad_token": {
29
- "content": "<|finetune_right_pad|>",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false
34
- }
35
  }
 
2
  "additional_special_tokens": [
3
  "<|im_start|>",
4
  "<|im_end|>",
5
+ "<|finetune_pad|>",
6
  "<tools>",
7
  "</tools>",
8
  "<tool_call>",
 
13
  "</think>"
14
  ],
15
  "bos_token": {
16
+ "content": "<|im_start|>",
17
  "lstrip": false,
18
  "normalized": false,
19
  "rstrip": false,
 
26
  "rstrip": false,
27
  "single_word": false
28
  },
29
+ "pad_token": "<finetune_pad>"
 
 
 
 
 
 
30
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be73bc5046bc7cc697ed444d8ebd4e4b8d0452b72fcc921521f075b303071d78
3
- size 27950454
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3ed6c2fb0cdfe4d06a4e33d02ba6c0a99b0afa85d0f8a8a50527aed2d1767d
3
+ size 17212221
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9de1c3be660bcc9e2ce5fe5d97a2e4bb4a46f3f275ef49426b9abd7097f4349
3
- size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc85aa028e904531eb3094cca8ea7d5c1b0f768d5faeeb4854ca98a752ec3db5
3
+ size 6904