# Adopt new base image with cuDNN pre-installed FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04 # Set environment variables for non-interactive installations to prevent prompts during apt-get. ENV DEBIAN_FRONTEND=noninteractive ENV CONDA_DIR=/opt/conda WORKDIR /app # Install essential system dependencies from both Dockerfiles RUN apt-get update -y && apt-get install -qqy \ wget \ git \ build-essential \ libgl1-mesa-glx \ libglib2.0-0 \ rsync \ make \ libssl-dev zlib1g-dev \ libbz2-dev libreadline-dev libsqlite3-dev curl llvm \ libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev \ ffmpeg libsm6 libxext6 cmake libmagickwand-dev \ git-lfs \ && rm -rf /var/lib/apt/lists/* \ && git lfs install # Install Miniconda RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ /bin/bash miniconda.sh -b -p $CONDA_DIR && \ rm miniconda.sh && \ export PATH=$CONDA_DIR/bin:$PATH && \ conda clean --all --yes && \ conda config --set auto_activate_base false && \ conda config --add channels conda-forge # Set the global PATH for Conda's base environment immediately after installation. ENV PATH=$CONDA_DIR/bin:$PATH # Accept Conda Terms of Service for default channels. RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \ conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r # Copy all local project files into the container's working directory (/app). COPY . /app # Create the Conda environment named 'cosmos-predict1' using the provided YAML file. RUN conda env create -f cosmos-predict1.yaml # Set the default Conda environment to be activated and update PATH (for subsequent layers and runtime) ENV CONDA_DEFAULT_ENV=cosmos-predict1 ENV PATH=$CONDA_DIR/envs/cosmos-predict1/bin:$PATH # Install PyTorch and TorchVision via pip with specific CUDA index. RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ conda activate cosmos-predict1 && \ pip install --no-cache-dir \ torch==2.3.1 \ torchvision==0.18.1 \ torchaudio==2.3.1 \ --index-url https://download.pytorch.org/whl/cu121 # NEW: Dynamically find cudnn.h and symlink it to the Conda environment's include path. # This ensures Transformer Engine can find it during compilation. RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ conda activate cosmos-predict1 && \ CUDNN_HEADER_PATH=$(find "$CONDA_PREFIX/lib/python3.10/site-packages/nvidia/" -name "cudnn.h" | head -n 1) && \ if [ -f "$CUDNN_HEADER_PATH" ]; then \ echo "Found cudnn.h at: $CUDNN_HEADER_PATH"; \ mkdir -p "$CONDA_PREFIX/include" && \ ln -sf "$CUDNN_HEADER_PATH" "$CONDA_PREFIX/include/cudnn.h" || \ cp "$CUDNN_HEADER_PATH" "$CONDA_PREFIX/include/cudnn.h"; \ else \ echo "Error: cudnn.h not found in any expected location within Conda environment. This will likely cause compilation failures."; \ exit 1; \ fi # IMPORTANT: Symlink fix for Transformer Engine compilation (from INSTALL.md). # These symlinks are for other NVIDIA headers that might be in Python site-packages. RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ conda activate cosmos-predict1 && \ ln -sf "$CONDA_PREFIX"/lib/python3.10/site-packages/nvidia/*/include/* "$CONDA_PREFIX"/include/ || true && \ ln -sf "$CONDA_PREFIX"/lib/python3.10/site-packages/nvidia/*/include/* "$CONDA_PREFIX"/include/python3.10 || true # Install Transformer Engine by attempting to compile it. # Pass CUDA_HOME explicitly to ensure it looks in the correct Conda environment path. RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ conda activate cosmos-predict1 && \ CUDA_HOME=$CONDA_PREFIX pip install --no-cache-dir --no-build-isolation "transformer-engine[pytorch]==1.12.0" # Install Apex for inference. RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ conda activate cosmos-predict1 && \ git clone https://github.com/NVIDIA/apex /app/apex && \ CUDA_HOME=$CONDA_PREFIX pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" /app/apex # Install MoGe for inference. RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ conda activate cosmos-predict1 && \ pip install --no-cache-dir git+https://github.com/microsoft/MoGe.git # Make the start.sh script executable. RUN chmod +x /app/start.sh # Verification Steps RUN echo "Verifying Python and Conda installations..." RUN python --version RUN conda env list RUN echo "Verifying PyTorch and CUDA availability..." RUN conda run -n cosmos-predict1 python <