Cosmos
Safetensors
NeMo
cosmos-embed1
nvidia
custom_code
File size: 6,768 Bytes
ecf8cbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Smoke tests for Cosmos-Embed1 including Transformer Engine support."""

# Determine test model path - can be overridden via environment variable or use current directory
import os

import pytest
import torch
from transformers import AutoConfig, AutoModel, AutoProcessor, AutoTokenizer

MODEL_PATH = os.environ.get("COSMOS_EMBED1_MODEL_PATH", ".")


def test_smoke() -> None:
    """Original smoke test for basic functionality."""

    preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
    model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).to("cuda", dtype=torch.bfloat16)

    with torch.no_grad():
        text_inputs = preprocess(text=["a cat", "a dog"]).to("cuda", dtype=torch.bfloat16)
        text_out = model.get_text_embeddings(**text_inputs)
        assert text_out.text_proj.shape == (2, 768)

        video_inputs = preprocess(videos=torch.randint(0, 255, size=(2, 8, 3, 224, 224))).to(
            "cuda", dtype=torch.bfloat16
        )
        video_out = model.get_video_embeddings(**video_inputs)
        assert video_out.visual_proj.shape == (2, 768)

        text_video_inputs = preprocess(
            text=["a cat", "a dog"],
            videos=torch.randint(0, 255, size=(2, 8, 3, 448, 448)),
        ).to("cuda", dtype=torch.bfloat16)
        text_video_out = model(**text_video_inputs)
        assert text_video_out.text_proj.shape == text_video_out.visual_proj.shape == (2, 768)

    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
    assert len(tokenizer) == 30523

    # Clean up GPU memory after test
    del model
    torch.cuda.empty_cache()


def test_transformer_engine_available():
    """Test if Transformer Engine is available."""
    try:
        import transformer_engine.pytorch as te

        # If we get here, TE is available
        assert True
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping TE tests")


def test_load_standard_model():
    """Test loading the standard (non-TE) model."""
    model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16)
    assert model.transformer_engine == False
    assert hasattr(model, "visual_encoder")
    assert hasattr(model, "qformer")

    # Clean up
    del model
    torch.cuda.empty_cache()


def test_load_transformer_engine_model():
    """Test loading model with Transformer Engine enabled."""
    try:
        import transformer_engine.pytorch as te
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping TE tests")

    # Load config and enable transformer engine
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = True
    config.use_fp8 = False  # Start with FP8 disabled for basic test

    model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16)

    assert model.transformer_engine == True
    assert model.use_fp8 == False
    assert hasattr(model, "visual_encoder")
    assert hasattr(model, "qformer")

    # Clean up
    del model
    torch.cuda.empty_cache()


@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test")
def test_transformer_engine_inference():
    """Test inference with Transformer Engine model."""
    try:
        import transformer_engine.pytorch as te
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping TE tests")

    # Test text embeddings with standard model first
    preprocess = AutoProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True)
    text_inputs = preprocess(text=["a cat"]).to("cuda", dtype=torch.bfloat16)

    # Load standard model, run inference, then clean up
    model_standard = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True, torch_dtype=torch.bfloat16).to(
        "cuda"
    )

    with torch.no_grad():
        text_out_std = model_standard.get_text_embeddings(**text_inputs)

    # Clean up standard model before loading TE model
    del model_standard
    torch.cuda.empty_cache()

    # Now load TE model and run inference
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = True
    config.use_fp8 = False

    model_te = AutoModel.from_pretrained(
        MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16
    ).to("cuda")

    with torch.no_grad():
        text_out_te = model_te.get_text_embeddings(**text_inputs)

    # Check shapes match
    assert text_out_std.text_proj.shape == text_out_te.text_proj.shape
    assert text_out_std.text_proj.shape == (1, 768)

    # Clean up GPU memory
    del model_te
    torch.cuda.empty_cache()


@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available, skipping GPU test")
def test_transformer_engine_fp8():
    """Test loading model with Transformer Engine + FP8 (requires substantial GPU memory)."""
    try:
        import transformer_engine.pytorch as te
    except ImportError:
        pytest.skip("Transformer Engine not available, skipping FP8 tests")

    # Clear memory before this memory-intensive test
    torch.cuda.empty_cache()

    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = True
    config.use_fp8 = True

    model = AutoModel.from_pretrained(MODEL_PATH, config=config, trust_remote_code=True, torch_dtype=torch.bfloat16)

    assert model.transformer_engine == True
    assert model.use_fp8 == True

    # Clean up
    del model
    torch.cuda.empty_cache()


def test_transformer_engine_config_validation():
    """Test configuration validation for Transformer Engine."""

    # Test that use_fp8=True without transformer_engine=True should fail
    config = AutoConfig.from_pretrained(MODEL_PATH)
    config.transformer_engine = False
    config.use_fp8 = True

    with pytest.raises(ValueError, match="transformer_engine.*must be enabled.*use_fp8"):
        from modeling_vit import VisionTransformer

        VisionTransformer(transformer_engine=False, use_fp8=True)