elungky commited on
Commit
54bda79
·
1 Parent(s): 62d1e04

Attempt to fix Exit code 137 (OOM) by using --no-build-isolation for transformer-engine

Browse files
Files changed (1) hide show
  1. Dockerfile +2 -1
Dockerfile CHANGED
@@ -61,7 +61,8 @@ RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
61
  # Install Transformer Engine separately after PyTorch and cuDNN are in place and headers are linked.
62
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
63
  conda activate cosmos-predict1 && \
64
- pip install --no-cache-dir transformer-engine[pytorch]==1.12.0
 
65
 
66
  # --- Verification Steps ---
67
  RUN echo "Verifying Python and Conda installations..."
 
61
  # Install Transformer Engine separately after PyTorch and cuDNN are in place and headers are linked.
62
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
63
  conda activate cosmos-predict1 && \
64
+ #pip install transformer-engine[pytorch]==1.12.0
65
+ pip install --no-cache-dir --no-build-isolation transformer-engine[pytorch]==1.12.0
66
 
67
  # --- Verification Steps ---
68
  RUN echo "Verifying Python and Conda installations..."