|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Smoke tests for Cosmos-Embed1 including Transformer Engine support.""" |
|
|
|
|
|
import os |
|
|
|
import pytest |
|
import torch |
|
from transformers import AutoConfig, AutoModel, AutoProcessor, AutoTokenizer |
|
|
|
MODEL_PATH = os.environ.get("COSMOS_EMBED1_MODEL_PATH", ".") |
|
|
|
|
|
def test_smoke() -> None: |
|
"""Original smoke test for basic functionality.""" |
|
|
|
preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True) |
|
model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).to("cuda", dtype=torch.bfloat16) |
|
|
|
with torch.no_grad(): |
|
text_inputs = preprocess(text=["a cat", "a dog"]).to("cuda", dtype=torch.bfloat16) |
|
text_out = model.get_text_embeddings(**text_inputs) |
|
assert text_out.text_proj.shape == (2, 768) |
|
|
|
video_inputs = preprocess(videos=torch.randint(0, 255, size=(2, 8, 3, 224, 224))).to( |
|
"cuda", dtype=torch.bfloat16 |
|
) |
|
video_out = model.get_video_embeddings(**video_inputs) |
|
assert video_out.visual_proj.shape == (2, 768) |
|
|
|
text_video_inputs = preprocess( |
|
text=["a cat", "a dog"], |
|
videos=torch.randint(0, 255, size=(2, 8, 3, 448, 448)), |
|
).to("cuda", dtype=torch.bfloat16) |
|
text_video_out = model(**text_video_inputs) |
|
assert text_video_out.text_proj.shape == text_video_out.visual_proj.shape == (2, 768) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True) |
|
assert len(tokenizer) == 30523 |
|
|
|
|
|
del model |
|
torch.cuda.empty_cache() |
|
|
|
|
|
def test_transformer_engine_available(): |
|
"""Test if Transformer Engine is available.""" |
|
try: |
|
import transformer_engine.pytorch as te |
|
|
|
|
|
assert True |
|
except ImportError: |
|
pytest.skip("Transformer Engine not available, skipping TE tests") |
|
|
|
|
|
def test_load_standard_model(): |
|
"""Test loading the standard (non-TE) model.""" |
|
model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16) |
|
assert model.transformer_engine == False |
|
assert hasattr(model, "visual_encoder") |
|
assert hasattr(model, "qformer") |
|
|
|
|
|
del model |
|
torch.cuda.empty_cache() |
|
|
|
|
|
def test_load_transformer_engine_model(): |
|
"""Test loading model with Transformer Engine enabled.""" |
|
try: |
|
import transformer_engine.pytorch as te |
|
except ImportError: |
|
pytest.skip("Transformer Engine not available, skipping TE tests") |
|
|
|
|
|
config = AutoConfig.from_pretrained(MODEL_PATH) |
|
config.transformer_engine = True |
|
config.use_fp8 = False |
|
|
|
model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16) |
|
|
|
assert model.transformer_engine == True |
|
assert model.use_fp8 == False |
|
assert hasattr(model, "visual_encoder") |
|
assert hasattr(model, "qformer") |
|
|
|
|
|
del model |
|
torch.cuda.empty_cache() |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test") |
|
def test_transformer_engine_inference(): |
|
"""Test inference with Transformer Engine model.""" |
|
try: |
|
import transformer_engine.pytorch as te |
|
except ImportError: |
|
pytest.skip("Transformer Engine not available, skipping TE tests") |
|
|
|
|
|
preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True) |
|
text_inputs = preprocess(text=["a cat"]).to("cuda", dtype=torch.bfloat16) |
|
|
|
|
|
model_standard = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16).to( |
|
"cuda" |
|
) |
|
|
|
with torch.no_grad(): |
|
text_out_std = model_standard.get_text_embeddings(**text_inputs) |
|
|
|
|
|
del model_standard |
|
torch.cuda.empty_cache() |
|
|
|
|
|
config = AutoConfig.from_pretrained(MODEL_PATH) |
|
config.transformer_engine = True |
|
config.use_fp8 = False |
|
|
|
model_te = AutoModel.from_pretrained( |
|
MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16 |
|
).to("cuda") |
|
|
|
with torch.no_grad(): |
|
text_out_te = model_te.get_text_embeddings(**text_inputs) |
|
|
|
|
|
assert text_out_std.text_proj.shape == text_out_te.text_proj.shape |
|
assert text_out_std.text_proj.shape == (1, 768) |
|
|
|
|
|
del model_te |
|
torch.cuda.empty_cache() |
|
|
|
|
|
@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test") |
|
def test_transformer_engine_fp8(): |
|
"""Test loading model with Transformer Engine + FP8 (requires substantial GPU memory).""" |
|
try: |
|
import transformer_engine.pytorch as te |
|
except ImportError: |
|
pytest.skip("Transformer Engine not available, skipping FP8 tests") |
|
|
|
|
|
torch.cuda.empty_cache() |
|
|
|
config = AutoConfig.from_pretrained(MODEL_PATH) |
|
config.transformer_engine = True |
|
config.use_fp8 = True |
|
|
|
model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16) |
|
|
|
assert model.transformer_engine == True |
|
assert model.use_fp8 == True |
|
|
|
|
|
del model |
|
torch.cuda.empty_cache() |
|
|
|
|
|
def test_transformer_engine_config_validation(): |
|
"""Test configuration validation for Transformer Engine.""" |
|
|
|
|
|
config = AutoConfig.from_pretrained(MODEL_PATH) |
|
config.transformer_engine = False |
|
config.use_fp8 = True |
|
|
|
with pytest.raises(ValueError, match="transformer_engine.*must be enabled.*use_fp8"): |
|
from modeling_vit import VisionTransformer |
|
|
|
VisionTransformer(transformer_engine=False, use_fp8=True) |
|
|