Spaces:

nvidia
/

audio-flamingo-3

Running on A100

Upload 225 files

174ae06 verified 5 days ago

1.84 kB

	# Copyright (c) 2025 NVIDIA CORPORATION.
	# Licensed under the MIT license.

	# Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license.
	# LICENSE is in incl_licenses directory.

	from dataclasses import dataclass

	from transformers import PretrainedConfig


	@dataclass
	class QuantizationConfig:
	quantize_model: str = "false"
	symm: bool = True
	epsilon: float = 1e-10
	fabit: str = "E4M3"
	fwbit: str = "E4M3"
	fobit: str = "E4M3"
	babit: str = "E5M2"
	bwbit: str = "E5M2"
	bobit: str = "E5M2"
	qchoice: str = "none"
	group_size: int = -1
	pad_to_multiple_of: int = 0
	weight_memory_efficient: bool = True

	# Legacy
	row_blocksize: int = -1
	col_blocksize: int = -1

	def __init__(
	self,
	quantize_model: str = "false",
	symm: bool = True,
	epsilon: float = 1e-10,
	fabit: str = "E4M3",
	fwbit: str = "E4M3",
	fobit: str = "E4M3",
	babit: str = "E5M2",
	bwbit: str = "E5M2",
	bobit: str = "E5M2",
	qchoice: str = "none",
	group_size: int = -1,
	pad_to_multiple_of: int = 0,
	weight_memory_efficient: bool = True,
	row_blocksize: int = -1,
	col_blocksize: int = -1,
	**kwargs,
	):
	super().__init__()
	self.quantize_model = quantize_model
	self.symm = symm
	self.epsilon = epsilon
	self.fabit = fabit
	self.fwbit = fwbit
	self.fobit = fobit
	self.babit = babit
	self.bwbit = bwbit
	self.bobit = bobit
	self.qchoice = qchoice
	self.group_size = group_size
	self.pad_to_multiple_of = pad_to_multiple_of
	self.weight_memory_efficient = weight_memory_efficient

	self.row_blocksize = row_blocksize
	self.col_blocksize = col_blocksize