SreyanG-NVIDIA's picture
Upload 225 files
174ae06 verified
# Copyright (c) 2025 NVIDIA CORPORATION.
# Licensed under the MIT license.
# Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license.
# LICENSE is in incl_licenses directory.
from .language_model.llava_llama import LlavaLlamaConfig, LlavaLlamaModel
# FP8 related comments, development in progress (PI: ligeng zhu, haochen xi)
# NOTE: VLM + LLM
# from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel
# NOTE: Linear -> fp8, similar to transformer engine
# from .language_model.qllama import QLlamaConfig, QLlamaForCausalLM, QLlamaModel
# NOTE: Linear + Activation -> fp8, haochen's iclr version
# from .language_model.qmemllama import QMemLlamaConfig, QMemLlamaForCausalLM, QMemLlamaModel
"""
TODO:
linear(weights):
simulated fp8: done
real fp8: in-progress (code already implmented)
activation:
simulated fp8: done
real fp8: in-progress (still coding)
optimizers:
current VILA: bf16
simulated fp8: done
real fp8 + fsdp (single node): done
real fp8 + fsdp (multiple node): in-progress
1. linear fp8
2. activation fp8
3. fp8 infernce example (load directly from a fp8 and fwd)
4. bind fp8 related configs to QLlamaConfig {"coat_fp8_args": {}}
"""
from .language_model.fp8linearqwen2 import FP8LinearQwen2Config, FP8LinearQwen2Model
from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel