elungky commited on
Commit
193cb9f
·
1 Parent(s): 9679875

Fix 'conda: not found' during Miniconda installation by updating PATH within the RUN command

Browse files
Files changed (1) hide show
  1. Dockerfile +21 -4
Dockerfile CHANGED
@@ -25,10 +25,12 @@ RUN apt-get update -y && apt-get install -qqy \
25
  && rm -rf /var/lib/apt/lists/* \
26
  && git lfs install # Initialize LFS system-wide
27
 
28
- # Install Miniconda (retain our existing approach)
29
  RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
30
  /bin/bash miniconda.sh -b -p $CONDA_DIR && \
31
  rm miniconda.sh && \
 
 
32
  conda clean --all --yes && \
33
  conda config --set auto_activate_base false && \
34
  conda config --add channels conda-forge
@@ -44,7 +46,7 @@ COPY . /app
44
  # Create the Conda environment named 'cosmos-predict1' using the provided YAML file.
45
  RUN conda env create -f cosmos-predict1.yaml
46
 
47
- # Set the default Conda environment to be activated and update PATH
48
  ENV CONDA_DEFAULT_ENV=cosmos-predict1
49
  ENV PATH=$CONDA_DIR/envs/cosmos-predict1/bin:$PATH
50
 
@@ -58,8 +60,6 @@ RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
58
  --index-url https://download.pytorch.org/whl/cu121
59
 
60
  # IMPORTANT: Symlink fix for Transformer Engine compilation.
61
- # The `nvidia/cuda` base images place CUDA libraries and headers in /usr/local/cuda.
62
- # We need to ensure that the build system can find cuDNN headers.
63
  ENV CONDA_PREFIX_FIX=/usr/local/cuda
64
  RUN ln -sf $CONDA_PREFIX_FIX/lib/python3.10/site-packages/nvidia/*/include/* $CONDA_PREFIX_FIX/include/ || true && \
65
  ln -sf $CONDA_PREFIX_FIX/lib/python3.10/site-packages/nvidia/*/include/* $CONDA_PREFIX_FIX/include/python3.10 || true
@@ -83,5 +83,22 @@ RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
83
  # Make the start.sh script executable.
84
  RUN chmod +x /app/start.sh
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  # Set the default command to run when the container starts.
87
  CMD ["/app/start.sh"]
 
25
  && rm -rf /var/lib/apt/lists/* \
26
  && git lfs install # Initialize LFS system-wide
27
 
28
+ # Install Miniconda (REVISED: Add $CONDA_DIR/bin to PATH within this RUN command)
29
  RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
30
  /bin/bash miniconda.sh -b -p $CONDA_DIR && \
31
  rm miniconda.sh && \
32
+ # Add Conda to PATH for subsequent commands in this RUN layer
33
+ export PATH=$CONDA_DIR/bin:$PATH && \
34
  conda clean --all --yes && \
35
  conda config --set auto_activate_base false && \
36
  conda config --add channels conda-forge
 
46
  # Create the Conda environment named 'cosmos-predict1' using the provided YAML file.
47
  RUN conda env create -f cosmos-predict1.yaml
48
 
49
+ # Set the default Conda environment to be activated and update PATH (for subsequent layers and runtime)
50
  ENV CONDA_DEFAULT_ENV=cosmos-predict1
51
  ENV PATH=$CONDA_DIR/envs/cosmos-predict1/bin:$PATH
52
 
 
60
  --index-url https://download.pytorch.org/whl/cu121
61
 
62
  # IMPORTANT: Symlink fix for Transformer Engine compilation.
 
 
63
  ENV CONDA_PREFIX_FIX=/usr/local/cuda
64
  RUN ln -sf $CONDA_PREFIX_FIX/lib/python3.10/site-packages/nvidia/*/include/* $CONDA_PREFIX_FIX/include/ || true && \
65
  ln -sf $CONDA_PREFIX_FIX/lib/python3.10/site-packages/nvidia/*/include/* $CONDA_PREFIX_FIX/include/python3.10 || true
 
83
  # Make the start.sh script executable.
84
  RUN chmod +x /app/start.sh
85
 
86
+ # --- Verification Steps ---
87
+ RUN echo "Verifying Python and Conda installations..."
88
+ RUN python --version
89
+ RUN conda env list
90
+ RUN echo "Verifying PyTorch and CUDA availability..."
91
+ RUN conda run -n cosmos-predict1 python <<EOF
92
+ import torch
93
+ print('PyTorch Version: ' + torch.__version__)
94
+ print('CUDA Available: ' + str(torch.cuda.is_available()))
95
+ if torch.cuda.is_available():
96
+ print('CUDA Device Name: ' + torch.cuda.get_device_name(0))
97
+ else:
98
+ print('CUDA Device Name: N/A')
99
+ EOF
100
+ RUN [ $? -eq 0 ] || echo "PyTorch verification failed. Check dependencies in cosmos-predict1.yaml."
101
+ # --- End Verification Steps ---
102
+
103
  # Set the default command to run when the container starts.
104
  CMD ["/app/start.sh"]