elungky commited on
Commit
fb926f5
·
1 Parent(s): 598f651

Final Dockerfile syntax correction: ensure chmod +x is a standalone RUN command

Browse files
Files changed (1) hide show
  1. Dockerfile +23 -4
Dockerfile CHANGED
@@ -49,16 +49,35 @@ RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
49
  torchaudio==2.3.1 \
50
  --index-url https://download.pytorch.org/whl/cu121
51
 
52
- # Install Transformer Engine using the pre-built wheel
53
  # Ensure the filename matches your actual wheel file.
54
- COPY ./transformer_engine.whl /tmp/
 
 
55
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
56
  conda activate cosmos-predict1 && \
57
- pip install --no-cache-dir /tmp/transformer_engine.whl
58
 
59
  # Make the start.sh script executable.
60
- # THIS MUST BE A SEPARATE RUN COMMAND.
61
  RUN chmod +x /app/start.sh
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # Set the default command to run when the container starts.
64
  CMD ["/app/start.sh"]
 
49
  torchaudio==2.3.1 \
50
  --index-url https://download.pytorch.org/whl/cu121
51
 
52
+ # Copy the pre-built Transformer Engine wheel into the container
53
  # Ensure the filename matches your actual wheel file.
54
+ COPY ./transformer_engine_torch-1.12.0+cu121-cp310-cp310-linux_x86_64.whl /tmp/
55
+
56
+ # Install Transformer Engine using the pre-built wheel
57
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
58
  conda activate cosmos-predict1 && \
59
+ pip install --no-cache-dir /tmp/transformer_engine_torch-1.12.0+cu121-cp310-cp310-linux_x86_64.whl
60
 
61
  # Make the start.sh script executable.
62
+ # THIS IS A STANDALONE RUN COMMAND.
63
  RUN chmod +x /app/start.sh
64
 
65
+ # --- Verification Steps ---
66
+ RUN echo "Verifying Python and Conda installations..."
67
+ RUN python --version
68
+ RUN conda env list
69
+ RUN echo "Verifying PyTorch and CUDA availability..."
70
+ RUN conda run -n cosmos-predict1 python <<EOF
71
+ import torch
72
+ print('PyTorch Version: ' + torch.__version__)
73
+ print('CUDA Available: ' + str(torch.cuda.is_available()))
74
+ if torch.cuda.is_available():
75
+ print('CUDA Device Name: ' + torch.cuda.get_device_name(0))
76
+ else:
77
+ print('CUDA Device Name: N/A')
78
+ EOF
79
+ RUN [ $? -eq 0 ] || echo "PyTorch verification failed. Check dependencies in cosmos-predict1.yaml."
80
+ # --- End Verification Steps ---
81
+
82
  # Set the default command to run when the container starts.
83
  CMD ["/app/start.sh"]