Spaces:

nvidia
/

audio-flamingo-3

Running on A100

App Files Files Community

audio-flamingo-3 / llava /model /quantization /QMul.py

SreyanG-NVIDIA

Upload 225 files

174ae06 verified 3 days ago

raw

history blame contribute delete

2.16 kB

	# Copyright (c) 2025 NVIDIA CORPORATION.
	# Licensed under the MIT license.

	# Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license.
	# LICENSE is in incl_licenses directory.

	import torch
	import torch.nn as nn
	from torch.autograd.function import Function, InplaceFunction

	try:
	from .QAct import QAct_FPin, QAct_FPout
	from .Qconfig import qconfig
	from .QFunction import *
	from .utils import *

	except:
	from Qconfig import qconfig
	from utils import *
	from QFunction import *
	from .QAct import QAct_FPin, QAct_FPout

	import os
	from copy import deepcopy

	import matplotlib.pyplot as plt


	class QMul(nn.Module):
	def __init__(self, args=None, layer_type=""):
	super().__init__()
	self.args = deepcopy(args)
	self.layer_type = layer_type
	assert layer_type != "", "layer_type is not defined"
	assert layer_type in qconfig.qmul_config, f"{layer_type} not in qgelu_config"

	self.apply_quantize = list_has_common_element(args.qchoice, qconfig.qmul_config[layer_type])

	self.fbit = self.args.fabit if self.args.fabit else self.Ubit
	self.bbit = self.args.babit if self.args.babit else self.Ubit

	quantize_flag = format_string_with_condition(
	layer_type,
	{"apply": self.apply_quantize},
	self.args.symm,
	self.fbit,
	self.bbit,
	{"row": self.args.row_blocksize, "col": self.args.col_blocksize},
	)

	print(quantize_flag)

	self.Mul_in1 = QAct_FPout(args, layer_type=layer_type + "_in1")
	self.Mul_in2 = QAct_FPout(args, layer_type=layer_type + "_in2")
	self.Mul_out = QAct_FPin(args, layer_type=layer_type + "_out")

	def forward(self, Qinput1, Qinput2, Iscale1, Iscale2):
	# input shape is (Batch Size, Sequence Length, Hidden Size)
	input1 = self.Mul_in1(Qinput1, Iscale1)
	input2 = self.Mul_in2(Qinput2, Iscale2)
	output_fp = input1 * input2
	Qoutput, Oscale = self.Mul_out(output_fp)
	return Qoutput, Oscale


	if __name__ == "__main__":
	Sum = torch.load("tensor/QAct_nan_epoch16.pt")