elungky commited on
Commit
106eac9
·
1 Parent(s): 46cbf58

Removed header symlinks before transformer-engine install

Browse files
Files changed (1) hide show
  1. Dockerfile +0 -27
Dockerfile CHANGED
@@ -49,37 +49,10 @@ RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
49
  torchaudio==2.3.1 \
50
  --index-url https://download.pytorch.org/whl/cu121
51
 
52
- # --- NEW SECTION: Patch Transformer Engine linking issues by creating symlinks for headers ---
53
- # These commands must run AFTER conda env creation and BEFORE transformer-engine pip install.
54
- RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
55
- conda activate cosmos-predict1 && \
56
- echo "Creating symlinks for NVIDIA headers..." && \
57
- ln -sf $CONDA_PREFIX/lib/python3.10/site-packages/nvidia/*/include/* $CONDA_PREFIX/include/ && \
58
- ln -sf $CONDA_PREFIX/lib/python3.10/site-packages/nvidia/*/include/* $CONDA_PREFIX/include/python3.10/
59
- # --- END NEW SECTION ---
60
-
61
  # Install Transformer Engine separately after PyTorch and cuDNN are in place and headers are linked.
62
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
63
  conda activate cosmos-predict1 && \
64
  pip install transformer-engine[pytorch]==1.12.0
65
- #pip install --no-cache-dir --no-build-isolation transformer-engine[pytorch]==1.12.0
66
-
67
- # --- Verification Steps ---
68
- RUN echo "Verifying Python and Conda installations..."
69
- RUN python --version
70
- RUN conda env list
71
- RUN echo "Verifying PyTorch and CUDA availability..."
72
- RUN conda run -n cosmos-predict1 python <<EOF
73
- import torch
74
- print('PyTorch Version: ' + torch.__version__)
75
- print('CUDA Available: ' + str(torch.cuda.is_available()))
76
- if torch.cuda.is_available():
77
- print('CUDA Device Name: ' + torch.cuda.get_device_name(0))
78
- else:
79
- print('CUDA Device Name: N/A')
80
- EOF
81
- RUN [ $? -eq 0 ] || echo "PyTorch verification failed. Check dependencies in cosmos-predict1.yaml."
82
- # --- End Verification Steps ---
83
 
84
  # Make the start.sh script executable.
85
  RUN chmod +x /app/start.sh
 
49
  torchaudio==2.3.1 \
50
  --index-url https://download.pytorch.org/whl/cu121
51
 
 
 
 
 
 
 
 
 
 
52
  # Install Transformer Engine separately after PyTorch and cuDNN are in place and headers are linked.
53
  RUN . $CONDA_DIR/etc/profile.d/conda.sh && \
54
  conda activate cosmos-predict1 && \
55
  pip install transformer-engine[pytorch]==1.12.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Make the start.sh script executable.
58
  RUN chmod +x /app/start.sh