File size: 2,480 Bytes
174ae06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Copyright (c) 2025 NVIDIA CORPORATION.
# Licensed under the MIT license.

# Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license.
# LICENSE is in incl_licenses directory.

class QuantizationConfig:
    def __init__(self):
        self.qlinear_config = {
            "mlp_gate": {"all", "linear"},
            "mlp_up": {"all", "linear"},
            "mlp_down": {"all", "linear"},
            "attn_proj": {"all", "linear"},
            "attn_q": {"all", "linear"},
            "attn_k": {"all", "linear"},
            "attn_v": {"all", "linear"},
        }
        self.qact_config = {
            "mul_act_in1": {"all", "gelu"},
            "mul_act_in2": {"all", "gelu", "te_like"},
            "mul_act_out": {"all", "gelu", "te_like"},
            "mlp_act_sum": {"all", "mlp", "te_like"},
            "mlp_act_gate": {"all", "mlp", "te_like"},
            "mlp_act_up": {"all", "mlp", "te_like"},
            "mlp_act_in": {"all", "mlp", "te_like"},
            "mlp_act_out": {"all", "mlp"},
            "ln_attn_in": {"all", "layernorm"},
            "ln_mlp_in": {"all", "layernorm"},
            "ln_attn_out": {"all", "layernorm", "te_like"},
            "ln_mlp_out": {"all", "layernorm", "te_like"},
            "add_attn_in_re": {"all", "residual"},
            "add_attn_in_fx": {"all", "residual", "te_like"},
            "add_mlp_in_re": {"all", "residual"},
            "add_mlp_in_fx": {"all", "residual", "te_like"},
            "re_attn_out_re": {"all", "residual"},
            "re_attn_out_fx": {"all", "residual"},
            "re_mlp_out_re": {"all", "residual"},
            "re_mlp_out_fx": {"all", "residual"},
            "attn_qkv_sum": {"all", "attn", "te_like"},
            "attn_q_in": {"all", "attn", "te_like"},
            "attn_k_in": {"all", "attn", "te_like"},
            "attn_v_in": {"all", "attn", "te_like"},
            "attn_q_out": {"all", "attn", "te_like"},
            "attn_k_out": {"all", "attn", "te_like"},
            "attn_v_out": {"all", "attn", "te_like"},
            "attn_proj_in": {"all", "attn", "te_like"},
        }

        self.qgelu_config = {"mlp_gelu": {"all", "gelu"}}

        self.qlayernorm_config = {"ln_attn": {"all", "layernorm"}, "ln_mlp": {"all", "layernorm"}}

        self.qadd_config = {"add_attn": {"all", "residual"}, "add_mlp": {"all", "residual"}}

        self.qmul_config = {
            "mul_act": {"all", "gelu"},
        }


qconfig = QuantizationConfig()