File size: 1,842 Bytes
174ae06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Copyright (c) 2025 NVIDIA CORPORATION.
# Licensed under the MIT license.

# Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license.
# LICENSE is in incl_licenses directory.

from dataclasses import dataclass

from transformers import PretrainedConfig


@dataclass
class QuantizationConfig:
    quantize_model: str = "false"
    symm: bool = True
    epsilon: float = 1e-10
    fabit: str = "E4M3"
    fwbit: str = "E4M3"
    fobit: str = "E4M3"
    babit: str = "E5M2"
    bwbit: str = "E5M2"
    bobit: str = "E5M2"
    qchoice: str = "none"
    group_size: int = -1
    pad_to_multiple_of: int = 0
    weight_memory_efficient: bool = True

    # Legacy
    row_blocksize: int = -1
    col_blocksize: int = -1

    def __init__(
        self,
        quantize_model: str = "false",
        symm: bool = True,
        epsilon: float = 1e-10,
        fabit: str = "E4M3",
        fwbit: str = "E4M3",
        fobit: str = "E4M3",
        babit: str = "E5M2",
        bwbit: str = "E5M2",
        bobit: str = "E5M2",
        qchoice: str = "none",
        group_size: int = -1,
        pad_to_multiple_of: int = 0,
        weight_memory_efficient: bool = True,
        row_blocksize: int = -1,
        col_blocksize: int = -1,
        **kwargs,
    ):
        super().__init__()
        self.quantize_model = quantize_model
        self.symm = symm
        self.epsilon = epsilon
        self.fabit = fabit
        self.fwbit = fwbit
        self.fobit = fobit
        self.babit = babit
        self.bwbit = bwbit
        self.bobit = bobit
        self.qchoice = qchoice
        self.group_size = group_size
        self.pad_to_multiple_of = pad_to_multiple_of
        self.weight_memory_efficient = weight_memory_efficient

        self.row_blocksize = row_blocksize
        self.col_blocksize = col_blocksize