File size: 6,768 Bytes

ecf8cbe

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Smoke tests for Cosmos-Embed1 including Transformer Engine support."""

# Determine test model path - can be overridden via environment variable or use current directory
import os

import pytest
import torch
from transformers import AutoConfig, AutoModel, AutoProcessor, AutoTokenizer

MODEL_PATH = os.environ.get("COSMOS_EMBED1_MODEL_PATH", ".")


def test_smoke() -> None:
    """Original smoke test for basic functionality."""

    preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
    model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).to("cuda", dtype=torch.bfloat16)

    with torch.no_grad():
        text_inputs = preprocess(text=["a cat", "a dog"]).to("cuda", dtype=torch.bfloat16)
        text_out = model.get_text_embeddings(**text_inputs)
        assert text_out.text_proj.shape == (2, 768)

        video_inputs = preprocess(videos=torch.randint(0, 255, size=(2, 8, 3, 224, 224))).to(
            "cuda", dtype=torch.bfloat16
        )
        video_out = model.get_video_embeddings(**video_inputs)
        assert video_out.visual_proj.shape == (2, 768)

        text_video_inputs = preprocess(
            text=["a cat", "a dog"],
            videos=torch.randint(0, 255, size=(2, 8, 3, 448, 448)),
        ).to("cuda", dtype=torch.bfloat16)
        text_video_out = model(**text_video_inputs)
        assert text_video_out.text_proj.shape == text_video_out.visual_proj.shape == (2, 768)

    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
    assert len(tokenizer) == 30523

    # Clean up GPU memory after test
    del model
    torch.cuda.empty_cache()


def test_transformer_engine_available():
    """Test if Transformer Engine is available."""
    try:
        import transformer_engine.pytorch as te

        # If we get here, TE is available
        assert True
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping TE tests")


def test_load_standard_model():
    """Test loading the standard (non-TE) model."""
    model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16)
    assert model.transformer_engine == False
    assert hasattr(model, "visual_encoder")
    assert hasattr(model, "qformer")

    # Clean up
    del model
    torch.cuda.empty_cache()


def test_load_transformer_engine_model():
    """Test loading model with Transformer Engine enabled."""
    try:
        import transformer_engine.pytorch as te
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping TE tests")

    # Load config and enable transformer engine
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = True
    config.use_fp8 = False  # Start with FP8 disabled for basic test

    model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16)

    assert model.transformer_engine == True
    assert model.use_fp8 == False
    assert hasattr(model, "visual_encoder")
    assert hasattr(model, "qformer")

    # Clean up
    del model
    torch.cuda.empty_cache()


@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test")
def test_transformer_engine_inference():
    """Test inference with Transformer Engine model."""
    try:
        import transformer_engine.pytorch as te
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping TE tests")

    # Test text embeddings with standard model first
    preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
    text_inputs = preprocess(text=["a cat"]).to("cuda", dtype=torch.bfloat16)

    # Load standard model, run inference, then clean up
    model_standard = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16).to(
        "cuda"
    )

    with torch.no_grad():
        text_out_std = model_standard.get_text_embeddings(**text_inputs)

    # Clean up standard model before loading TE model
    del model_standard
    torch.cuda.empty_cache()

    # Now load TE model and run inference
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = True
    config.use_fp8 = False

    model_te = AutoModel.from_pretrained(
        MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16
    ).to("cuda")

    with torch.no_grad():
        text_out_te = model_te.get_text_embeddings(**text_inputs)

    # Check shapes match
    assert text_out_std.text_proj.shape == text_out_te.text_proj.shape
    assert text_out_std.text_proj.shape == (1, 768)

    # Clean up GPU memory
    del model_te
    torch.cuda.empty_cache()


@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test")
def test_transformer_engine_fp8():
    """Test loading model with Transformer Engine + FP8 (requires substantial GPU memory)."""
    try:
        import transformer_engine.pytorch as te
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping FP8 tests")

    # Clear memory before this memory-intensive test
    torch.cuda.empty_cache()

    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = True
    config.use_fp8 = True

    model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16)

    assert model.transformer_engine == True
    assert model.use_fp8 == True

    # Clean up
    del model
    torch.cuda.empty_cache()


def test_transformer_engine_config_validation():
    """Test configuration validation for Transformer Engine."""

    # Test that use_fp8=True without transformer_engine=True should fail
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = False
    config.use_fp8 = True

    with pytest.raises(ValueError, match="transformer_engine.*must be enabled.*use_fp8"):
        from modeling_vit import VisionTransformer

        VisionTransformer(transformer_engine=False, use_fp8=True)