Spaces:
Build error
Build error
# Start from a clean NVIDIA CUDA base image. | |
FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 | |
# Set environment variables for non-interactive installations to prevent prompts during apt-get. | |
ENV DEBIAN_FRONTEND=noninteractive | |
ENV CONDA_DIR=/opt/conda | |
ENV PATH=$CONDA_DIR/bin:$PATH | |
WORKDIR /app | |
# Install essential system dependencies required for Miniconda and general build tools. | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
wget \ | |
git \ | |
build-essential \ | |
libgl1-mesa-glx \ | |
libglib2.0-0 \ | |
# Add any other system libraries that might be needed by transformer_engine or other deep learning libs | |
# For example, libstdc++6, libgomp1, etc., though usually covered by base image or build-essential | |
&& rm -rf /var/lib/apt/lists/* | |
# Install Miniconda | |
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \ | |
/bin/bash miniconda.sh -b -p $CONDA_DIR && \ | |
rm miniconda.sh && \ | |
conda clean --all --yes && \ | |
conda config --set auto_activate_base false && \ | |
conda config --add channels conda-forge | |
# Accept Conda Terms of Service for default channels. | |
RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ | |
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \ | |
conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r | |
# Copy all local project files into the container's working directory (/app). | |
COPY . /app | |
# Create the Conda environment named 'cosmos-predict1' using the provided YAML file. | |
RUN conda env create -f cosmos-predict1.yaml | |
# Set the default Conda environment to be activated. | |
ENV CONDA_DEFAULT_ENV=cosmos-predict1 | |
ENV PATH=$CONDA_DIR/envs/cosmos-predict1/bin:$PATH | |
# Install PyTorch and TorchVision via pip with specific CUDA index. | |
RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ | |
conda activate cosmos-predict1 && \ | |
pip install --no-cache-dir \ | |
torch==2.3.1 \ | |
torchvision==0.18.1 \ | |
torchaudio==2.3.1 \ | |
--index-url https://download.pytorch.org/whl/cu121 | |
# --- NEW: Install Transformer Engine separately after PyTorch and cuDNN are in place --- | |
# This ensures it can find the necessary CUDA/cuDNN libraries. | |
# Also add git to apt-get install above if not already there. | |
RUN . $CONDA_DIR/etc/profile.d/conda.sh && \ | |
conda activate cosmos-predict1 && \ | |
pip install --no-cache-dir transformer-engine[pytorch]==1.12.0 | |
# --- END NEW --- | |
# --- Verification Steps --- | |
RUN echo "Verifying Python and Conda installations..." | |
RUN python --version | |
RUN conda env list | |
RUN echo "Verifying PyTorch and CUDA availability..." | |
RUN conda run -n cosmos-predict1 python <<EOF | |
import torch | |
print('PyTorch Version: ' + torch.__version__) | |
print('CUDA Available: ' + str(torch.cuda.is_available())) | |
if torch.cuda.is_available(): | |
print('CUDA Device Name: ' + torch.cuda.get_device_name(0)) | |
else: | |
print('CUDA Device Name: N/A') | |
EOF | |
RUN [ $? -eq 0 ] || echo "PyTorch verification failed. Check dependencies in cosmos-predict1.yaml." | |
# --- End Verification Steps --- | |
# Make the start.sh script executable. | |
RUN chmod +x /app/start.sh | |
# Set the default command to run when the container starts. | |
CMD ["/app/start.sh"] |