Spaces:

nvidia
/

audio-flamingo-3

Running on A100

audio-flamingo-3 / llava /model /__init__.py

Upload 225 files

174ae06 verified 3 days ago

1.44 kB

	# Copyright (c) 2025 NVIDIA CORPORATION.
	# Licensed under the MIT license.

	# Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license.
	# LICENSE is in incl_licenses directory.

	from .language_model.llava_llama import LlavaLlamaConfig, LlavaLlamaModel

	# FP8 related comments, development in progress (PI: ligeng zhu, haochen xi)
	# NOTE: VLM + LLM
	# from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel
	# NOTE: Linear -> fp8, similar to transformer engine
	# from .language_model.qllama import QLlamaConfig, QLlamaForCausalLM, QLlamaModel
	# NOTE: Linear + Activation -> fp8, haochen's iclr version
	# from .language_model.qmemllama import QMemLlamaConfig, QMemLlamaForCausalLM, QMemLlamaModel
	"""
	TODO:
	linear(weights):
	simulated fp8: done
	real fp8: in-progress (code already implmented)
	activation:
	simulated fp8: done
	real fp8: in-progress (still coding)
	optimizers:
	current VILA: bf16
	simulated fp8: done
	real fp8 + fsdp (single node): done
	real fp8 + fsdp (multiple node): in-progress
	1. linear fp8
	2. activation fp8
	3. fp8 infernce example (load directly from a fp8 and fwd)
	4. bind fp8 related configs to QLlamaConfig {"coat_fp8_args": {}}
	"""
	from .language_model.fp8linearqwen2 import FP8LinearQwen2Config, FP8LinearQwen2Model
	from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel