elungky commited on
Commit
5fa8a70
·
1 Parent(s): a1c20fc

Configure Dockerfile with provided cosmos-predict1.yaml and install pip deps

Browse files
Files changed (3) hide show
  1. Dockerfile +61 -6
  2. cosmos-predict1.yaml +16 -16
  3. start.sh +8 -46
Dockerfile CHANGED
@@ -1,15 +1,70 @@
1
- FROM elungky/gen3c:latest
 
 
 
2
 
 
 
 
 
 
 
 
 
3
  WORKDIR /app
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  COPY . /app
6
 
7
- # Install dependencies specific to the GUI/inference server
8
- # This assumes 'gui/requirements.txt' is directly under the '/app' directory after COPY.
9
- RUN pip install -r gui/requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- # Ensure start.sh is executable
12
  RUN chmod +x /app/start.sh
13
 
14
- # Use start.sh as the entrypoint for your Space
 
15
  CMD ["/app/start.sh"]
 
1
+ # Start from a clean NVIDIA CUDA base image.
2
+ # This provides the necessary CUDA runtime and development tools.
3
+ # Using 12.4.0-devel-ubuntu22.04 to align with the CUDA version specified in your cosmos-predict1.yaml.
4
+ FROM nvidia/cuda:12.4.0-devel-ubuntu22.04
5
 
6
+ # Set environment variables for non-interactive installations to prevent prompts during apt-get.
7
+ ENV DEBIAN_FRONTEND=noninteractive
8
+ # Define the base directory for Conda installation.
9
+ ENV CONDA_DIR=/opt/conda
10
+ # Add Conda's binary directory to the system's PATH.
11
+ ENV PATH=$CONDA_DIR/bin:$PATH
12
+
13
+ # Set the working directory inside the container. All subsequent commands will run from here.
14
  WORKDIR /app
15
 
16
+ # Install essential system dependencies required for Miniconda and general build processes.
17
+ # This includes wget for downloading, git for cloning (if needed), build-essential for compiling,
18
+ # and libgl1-mesa-glx for graphics-related libraries often used by ML frameworks.
19
+ RUN apt-get update && apt-get install -y --no-install-recommends \
20
+ wget \
21
+ git \
22
+ build-essential \
23
+ libgl1-mesa-glx \
24
+ # Clean up apt cache to reduce image size
25
+ && rm -rf /var/lib/apt/lists/*
26
+
27
+ # Install Miniconda:
28
+ # 1. Download the Miniconda installer script.
29
+ # 2. Run the installer in batch mode (-b) and specify the installation path (-p).
30
+ # 3. Remove the installer script to save space.
31
+ # 4. Clean Conda's package cache to further reduce image size.
32
+ # 5. Configure Conda to not auto-activate the base environment on shell startup.
33
+ # 6. Add the 'conda-forge' channel for broader package availability.
34
+ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
35
+ /bin/bash miniconda.sh -b -p $CONDA_DIR && \
36
+ rm miniconda.sh && \
37
+ conda clean --all --yes && \
38
+ conda config --set auto_activate_base false && \
39
+ conda config --add channels conda-forge
40
+
41
+ # Copy all local project files into the container's working directory (/app).
42
+ # This includes your cosmos-predict1.yaml, gui/requirements.txt, start.sh, etc.
43
  COPY . /app
44
 
45
+ # Create the Conda environment named 'cosmos-predict1' using the provided YAML file.
46
+ # This step will install all specified Python, PyTorch, CUDA, and pip dependencies.
47
+ RUN conda env create -f cosmos-predict1.yaml
48
+
49
+ # Set the default Conda environment to be activated.
50
+ ENV CONDA_DEFAULT_ENV=cosmos-predict1
51
+ # Add the newly created Conda environment's binary directory to the PATH.
52
+ # This ensures that executables (like python, pip, uvicorn) from this environment are found.
53
+ ENV PATH=$CONDA_DIR/envs/cosmos-predict1/bin:$PATH
54
+
55
+ # --- Verification Steps (Optional, but highly recommended for debugging) ---
56
+ # These commands help confirm that Python, Conda, and PyTorch are set up correctly.
57
+ RUN echo "Verifying Python and Conda installations..."
58
+ RUN python --version
59
+ RUN conda env list
60
+ RUN echo "Verifying PyTorch and CUDA availability..."
61
+ # Use 'conda run' to explicitly run the command within the 'cosmos-predict1' environment
62
+ RUN conda run -n cosmos-predict1 python -c "import torch; print(f'PyTorch Version: {torch.__version__}'); print(f'CUDA Available: {torch.cuda.is_available()}'); print(f'CUDA Device Name: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}')" || echo "PyTorch verification failed. Check dependencies in cosmos-predict1.yaml."
63
+ # --- End Verification Steps ---
64
 
65
+ # Make the start.sh script executable.
66
  RUN chmod +x /app/start.sh
67
 
68
+ # Set the default command to run when the container starts.
69
+ # This will execute your start.sh script.
70
  CMD ["/app/start.sh"]
cosmos-predict1.yaml CHANGED
@@ -1,22 +1,9 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
-
16
- # conda env create --file cosmos-predict1.yaml
17
  name: cosmos-predict1
18
  channels:
 
 
19
  - conda-forge
 
20
  dependencies:
21
  - python=3.10
22
  - pip=25.0
@@ -27,3 +14,16 @@ dependencies:
27
  - cuda=12.4
28
  - cuda-nvcc=12.4
29
  - cuda-toolkit=12.4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  name: cosmos-predict1
2
  channels:
3
+ - pytorch
4
+ - nvidia
5
  - conda-forge
6
+ - defaults
7
  dependencies:
8
  - python=3.10
9
  - pip=25.0
 
14
  - cuda=12.4
15
  - cuda-nvcc=12.4
16
  - cuda-toolkit=12.4
17
+ # Add PyTorch and Torchvision
18
+ - pytorch=2.5.0a0
19
+ - torchvision
20
+ - torchaudio
21
+ - pytorch-cuda=12.1
22
+ # Add pip dependencies directly in the YAML
23
+ - pip:
24
+ - -r file:gui/requirements.txt
25
+ - uvicorn[standard]
26
+ - fastapi
27
+ # Add any other crucial top-level pip dependencies here if your project needs them
28
+ # Example: - accelerate
29
+ # Example: - transformers
start.sh CHANGED
@@ -1,61 +1,23 @@
1
  #!/bin/bash
2
- set -eux # Keep this for detailed logging
3
 
4
  export CUDA_VISIBLE_DEVICES="0"
5
  export CUDA_HOME="/usr/local/cuda"
6
 
7
- echo "Attempting to locate and activate Conda environment..."
8
-
9
- # Add a common Conda binary path to PATH (keeping this just in case, but it's failing)
10
- export PATH="/opt/conda/bin:$PATH"
11
-
12
- # --- DIAGNOSTIC STEPS: List contents of common Conda installation directories ---
13
- echo "Listing contents of /opt/:"
14
- ls -la /opt/ || echo "ls /opt/ failed or directory not found."
15
- echo "Listing contents of /usr/local/:"
16
- ls -la /usr/local/ || echo "ls /usr/local/ failed or directory not found."
17
- echo "Listing contents of /root/ (if accessible):"
18
- ls -la /root/ || echo "ls /root/ failed or directory not found (may be permission denied)."
19
- echo "--- END DIAGNOSTIC ---"
20
-
21
- # 1. Try to find the 'conda' executable in the system's PATH
22
- CONDA_EXEC=$(which conda)
23
-
24
- if [ -z "$CONDA_EXEC" ]; then
25
- echo "ERROR: 'conda' executable still not found in PATH."
26
- echo "This strongly suggests Conda is either not installed in the base image 'elungky/gen3c:latest', or it's in a highly unusual and non-standard location."
27
- exit 1 # Exit here, no point in continuing if conda isn't found
28
- fi
29
-
30
- # The following lines will only execute if CONDA_EXEC is not empty
31
- echo "Found 'conda' executable at: $CONDA_EXEC"
32
-
33
- # 2. Derive the base Conda installation path from the executable's location.
34
- CONDA_BASE_PATH=$(dirname $(dirname "$CONDA_EXEC"))
35
- echo "Derived Conda base path: $CONDA_BASE_PATH"
36
-
37
- # 3. Construct the path to conda.sh script based on the derived base path
38
- CONDA_SH_PATH="$CONDA_BASE_PATH/etc/profile.d/conda.sh"
39
-
40
- if [ -f "$CONDA_SH_PATH" ]; then
41
- echo "Found conda.sh at: $CONDA_SH_PATH"
42
- source "$CONDA_SH_PATH" || { echo "ERROR: Failed to source conda.sh script at $CONDA_SH_PATH. Check permissions."; exit 1; }
43
- else
44
- echo "ERROR: conda.sh not found at expected location derived from 'conda' executable: $CONDA_SH_PATH"
45
- exit 1
46
- fi
47
-
48
- echo "Conda environment initialized successfully."
49
 
50
  # Activate the specific conda environment
51
- conda activate cosmos-predict1 || { echo "ERROR: Failed to activate conda environment 'cosmos-predict1'. Ensure it exists and is accessible for this user."; exit 1; }
 
 
52
 
53
- # Set PYTHONPATH after conda activation, as conda might adjust PATH/PYTHONPATH internally.
54
  export PYTHONPATH="/app:/app/gui/api"
55
 
56
  echo "Starting GEN3C FastAPI inference server..."
57
 
58
- export GEN3C_CKPT_PATH="/app/checkpoints"
59
  export GEN3C_GPU_COUNT=1
60
 
 
61
  exec uvicorn gui.api.server:app --host 0.0.0.0 --port 7860 --proxy-headers
 
1
  #!/bin/bash
2
+ set -eux # Keep this for debugging and seeing command execution
3
 
4
  export CUDA_VISIBLE_DEVICES="0"
5
  export CUDA_HOME="/usr/local/cuda"
6
 
7
+ echo "Activating Conda environment and starting server..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Activate the specific conda environment
10
+ # This now works because Conda is properly installed and in PATH by the Dockerfile
11
+ source $CONDA_DIR/etc/profile.d/conda.sh # Source the conda.sh for the shell
12
+ conda activate cosmos-predict1 # Activate the environment
13
 
14
+ # Set PYTHONPATH after conda activation for your specific project modules
15
  export PYTHONPATH="/app:/app/gui/api"
16
 
17
  echo "Starting GEN3C FastAPI inference server..."
18
 
19
+ export GEN3C_CKPT_PATH="/app/checkpoints" # Assuming /app/checkpoints is still correct
20
  export GEN3C_GPU_COUNT=1
21
 
22
+ # Start the FastAPI server
23
  exec uvicorn gui.api.server:app --host 0.0.0.0 --port 7860 --proxy-headers