sadkins65 commited on
Commit
82b42dd
·
verified ·
1 Parent(s): 9ac5210

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. config.json +7 -6
  2. model.safetensors +2 -2
  3. recipe.yaml +2 -1
  4. tokenizer.json +1 -6
config.json CHANGED
@@ -17,7 +17,10 @@
17
  "num_hidden_layers": 22,
18
  "num_key_value_heads": 4,
19
  "pretraining_tp": 1,
20
- "quantization_config": {
 
 
 
21
  "config_groups": {
22
  "group_0": {
23
  "input_activations": {
@@ -47,16 +50,14 @@
47
  }
48
  },
49
  "format": "fakequant",
50
- "global_compression_ratio": 1.4416825558107582,
51
  "ignore": [
52
- "model.layers.1.mlp.down_proj"
 
53
  ],
54
  "quant_method": "sparseml",
55
  "quantization_status": "frozen"
56
  },
57
- "rms_norm_eps": 1e-05,
58
- "rope_scaling": null,
59
- "rope_theta": 10000.0,
60
  "tie_word_embeddings": false,
61
  "torch_dtype": "float32",
62
  "transformers_version": "4.39.3",
 
17
  "num_hidden_layers": 22,
18
  "num_key_value_heads": 4,
19
  "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "sparseml_quantization_config": {
24
  "config_groups": {
25
  "group_0": {
26
  "input_activations": {
 
50
  }
51
  },
52
  "format": "fakequant",
53
+ "global_compression_ratio": 1.4375595368095078,
54
  "ignore": [
55
+ "model.layers.0.mlp.down_proj",
56
+ "lm_head"
57
  ],
58
  "quant_method": "sparseml",
59
  "quantization_status": "frozen"
60
  },
 
 
 
61
  "tie_word_embeddings": false,
62
  "torch_dtype": "float32",
63
  "transformers_version": "4.39.3",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3656a76cd6d5aa60503e2f9a7ad23309f2616026772517a401a3bec00ecdb1b2
3
- size 4400288268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3639b1bcb6a20c4ec6c2d27a85d2524d2599015058c5f57e533f4fe88d6ea21b
3
+ size 4400287914
recipe.yaml CHANGED
@@ -1,9 +1,10 @@
1
  test_stage:
2
  quant_modifiers:
3
  vLLMQuantizationModifier:
4
- ignore: [model.layers.1.mlp.down_proj]
5
  config_groups:
6
  group_0:
7
  weights: {num_bits: 8, type: int, symmetric: true, strategy: tensor}
8
  input_activations: {num_bits: 8, type: int, symmetric: false, strategy: tensor}
 
9
  targets: [Linear]
 
1
  test_stage:
2
  quant_modifiers:
3
  vLLMQuantizationModifier:
4
+ ignore: [lm_head, model.layers.0.mlp.down_proj]
5
  config_groups:
6
  group_0:
7
  weights: {num_bits: 8, type: int, symmetric: true, strategy: tensor}
8
  input_activations: {num_bits: 8, type: int, symmetric: false, strategy: tensor}
9
+ output_activations: null
10
  targets: [Linear]
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {