Spaces:
Running
on
A100
Running
on
A100
# Copyright (c) 2025 NVIDIA CORPORATION. | |
# Licensed under the MIT license. | |
# Adapted from https://github.com/NVlabs/VILA/tree/main under the Apache 2.0 license. | |
# LICENSE is in incl_licenses directory. | |
from .language_model.llava_llama import LlavaLlamaConfig, LlavaLlamaModel | |
# FP8 related comments, development in progress (PI: ligeng zhu, haochen xi) | |
# NOTE: VLM + LLM | |
# from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel | |
# NOTE: Linear -> fp8, similar to transformer engine | |
# from .language_model.qllama import QLlamaConfig, QLlamaForCausalLM, QLlamaModel | |
# NOTE: Linear + Activation -> fp8, haochen's iclr version | |
# from .language_model.qmemllama import QMemLlamaConfig, QMemLlamaForCausalLM, QMemLlamaModel | |
""" | |
TODO: | |
linear(weights): | |
simulated fp8: done | |
real fp8: in-progress (code already implmented) | |
activation: | |
simulated fp8: done | |
real fp8: in-progress (still coding) | |
optimizers: | |
current VILA: bf16 | |
simulated fp8: done | |
real fp8 + fsdp (single node): done | |
real fp8 + fsdp (multiple node): in-progress | |
1. linear fp8 | |
2. activation fp8 | |
3. fp8 infernce example (load directly from a fp8 and fwd) | |
4. bind fp8 related configs to QLlamaConfig {"coat_fp8_args": {}} | |
""" | |
from .language_model.fp8linearqwen2 import FP8LinearQwen2Config, FP8LinearQwen2Model | |
from .language_model.qllava_qllama import QLlavaLlamaConfig, QLlavaLlamaModel | |