jbilcke-hf HF Staff commited on
Commit
e6e8290
·
1 Parent(s): e7fb669

meh, software rendering.. let's try EGL again

Browse files
Files changed (4) hide show
  1. Dockerfile +37 -6
  2. init_gpu.py +159 -0
  3. legacy/Dockerfile +126 -0
  4. start_server.sh +3 -0
Dockerfile CHANGED
@@ -11,7 +11,7 @@ ENV PYOPENGL_PLATFORM="egl"
11
  ENV NVIDIA_DRIVER_CAPABILITIES=compute,graphics,utility,video
12
 
13
  # Remove any third-party apt sources to avoid issues with expiring keys.
14
- # Install some basic utilities
15
  RUN rm -f /etc/apt/sources.list.d/*.list && \
16
  apt-get update && apt-get install -y --no-install-recommends \
17
  curl \
@@ -28,14 +28,41 @@ RUN rm -f /etc/apt/sources.list.d/*.list && \
28
  nano \
29
  bzip2 \
30
  libx11-6 \
31
- # if the next line does not work, we can also try this: libegl1-mesa-dev
32
- libegl1 \
33
- libgles2 \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  build-essential \
35
  libsndfile-dev \
36
  software-properties-common \
37
  && rm -rf /var/lib/apt/lists/*
38
- RUN echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_nvidia.so.0"}}' >> /usr/share/glvnd/egl_vendor.d/10_nvidia.json
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  RUN add-apt-repository ppa:flexiondotorg/nvtop && \
41
  apt-get upgrade -y && \
@@ -96,6 +123,10 @@ RUN mkdir /data && chown user:user /data
96
 
97
  RUN mkdir /data/samples && chown user:user /data/samples
98
 
 
 
 
 
99
  #######################################
100
  # End root user section
101
  #######################################
@@ -123,4 +154,4 @@ ENV PYTHONUNBUFFERED=1 \
123
  SYSTEM=spaces \
124
  SHELL=/bin/bash
125
 
126
- CMD ["./start_server.sh"]
 
11
  ENV NVIDIA_DRIVER_CAPABILITIES=compute,graphics,utility,video
12
 
13
  # Remove any third-party apt sources to avoid issues with expiring keys.
14
+ # Install some basic utilities AND critical EGL/Mesa packages
15
  RUN rm -f /etc/apt/sources.list.d/*.list && \
16
  apt-get update && apt-get install -y --no-install-recommends \
17
  curl \
 
28
  nano \
29
  bzip2 \
30
  libx11-6 \
31
+ # EGL and Mesa packages
32
+ libegl1-mesa \
33
+ libegl1-mesa-dev \
34
+ libgl1-mesa-glx \
35
+ libgl1-mesa-dri \
36
+ libgles2-mesa \
37
+ libgles2-mesa-dev \
38
+ mesa-utils \
39
+ mesa-utils-extra \
40
+ # more OpenGL stuff
41
+ libglfw3 \
42
+ libglfw3-dev \
43
+ freeglut3-dev \
44
+ # X11 libraries because why not
45
+ xvfb \
46
+ x11-utils \
47
+ # Build tools
48
  build-essential \
49
  libsndfile-dev \
50
  software-properties-common \
51
  && rm -rf /var/lib/apt/lists/*
52
+
53
+ # NVIDIA EGL vendor config stuff
54
+ RUN mkdir -p /usr/share/glvnd/egl_vendor.d && \
55
+ echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_nvidia.so.0"}}' > /usr/share/glvnd/egl_vendor.d/10_nvidia.json
56
+
57
+ RUN echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_mesa.so.0"}}' > /usr/share/glvnd/egl_vendor.d/50_mesa.json
58
+
59
+ # Set up library paths for EGL
60
+ ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
61
+
62
+ # more EGL environment variables
63
+ ENV EGL_PLATFORM=device
64
+ ENV MESA_GL_VERSION_OVERRIDE=4.5
65
+ ENV MESA_GLSL_VERSION_OVERRIDE=450
66
 
67
  RUN add-apt-repository ppa:flexiondotorg/nvtop && \
68
  apt-get upgrade -y && \
 
123
 
124
  RUN mkdir /data/samples && chown user:user /data/samples
125
 
126
+ # Create device nodes that might be needed for GPU access
127
+ RUN mkdir -p /dev/dri && \
128
+ chmod 755 /dev/dri
129
+
130
  #######################################
131
  # End root user section
132
  #######################################
 
154
  SYSTEM=spaces \
155
  SHELL=/bin/bash
156
 
157
+ CMD ["./start_server.sh"]
init_gpu.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ GPU initialization script for MuJoCo EGL rendering in containerized environments.
4
+ This should be run before starting the notebook to ensure GPU is properly set up.
5
+ """
6
+
7
+ import os
8
+ import subprocess
9
+ import sys
10
+
11
+ def check_nvidia_driver():
12
+ """Check if NVIDIA driver is accessible."""
13
+ try:
14
+ result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
15
+ if result.returncode == 0:
16
+ print("✓ NVIDIA driver accessible")
17
+ print(result.stdout.split('\n')[2]) # Driver info line
18
+ return True
19
+ else:
20
+ print("✗ NVIDIA driver not accessible")
21
+ return False
22
+ except FileNotFoundError:
23
+ print("✗ nvidia-smi not found")
24
+ return False
25
+
26
+ def check_egl_libs():
27
+ """Check if EGL libraries are available."""
28
+ import ctypes
29
+ libs_to_check = [
30
+ 'libEGL.so.1',
31
+ 'libGL.so.1',
32
+ 'libEGL_nvidia.so.0'
33
+ ]
34
+
35
+ for lib in libs_to_check:
36
+ try:
37
+ ctypes.CDLL(lib)
38
+ print(f"✓ {lib} loaded successfully")
39
+ except OSError as e:
40
+ print(f"✗ Failed to load {lib}: {e}")
41
+
42
+ def test_egl_device():
43
+ """Test EGL device creation."""
44
+ try:
45
+ # Try to create an EGL display
46
+ from OpenGL import EGL
47
+ import ctypes
48
+
49
+ # Get EGL display
50
+ display = EGL.eglGetDisplay(EGL.EGL_DEFAULT_DISPLAY)
51
+ if display == EGL.EGL_NO_DISPLAY:
52
+ print("✗ Failed to get EGL display")
53
+ return False
54
+
55
+ # Initialize EGL
56
+ major = ctypes.c_long()
57
+ minor = ctypes.c_long()
58
+ if not EGL.eglInitialize(display, ctypes.byref(major), ctypes.byref(minor)):
59
+ print("✗ Failed to initialize EGL")
60
+ return False
61
+
62
+ print(f"✓ EGL initialized successfully (version {major.value}.{minor.value})")
63
+
64
+ # Clean up
65
+ EGL.eglTerminate(display)
66
+ return True
67
+
68
+ except Exception as e:
69
+ print(f"✗ EGL test failed: {e}")
70
+ return False
71
+
72
+ def test_mujoco_rendering():
73
+ """Test MuJoCo rendering capability."""
74
+ try:
75
+ import mujoco
76
+
77
+ # Create a simple model
78
+ xml = """
79
+ <mujoco>
80
+ <worldbody>
81
+ <body>
82
+ <geom type="box" size="1 1 1"/>
83
+ </body>
84
+ </worldbody>
85
+ </mujoco>
86
+ """
87
+
88
+ model = mujoco.MjModel.from_xml_string(xml)
89
+
90
+ # Try to create a renderer (this is where EGL issues usually surface)
91
+ try:
92
+ renderer = mujoco.Renderer(model, height=240, width=320)
93
+ print("✓ MuJoCo renderer created successfully")
94
+ return True
95
+ except Exception as e:
96
+ print(f"✗ MuJoCo renderer creation failed: {e}")
97
+ return False
98
+
99
+ except ImportError:
100
+ print("✗ MuJoCo not installed")
101
+ return False
102
+ except Exception as e:
103
+ print(f"✗ MuJoCo test failed: {e}")
104
+ return False
105
+
106
+ def main():
107
+ """Run all GPU initialization checks."""
108
+ print("🔧 Initializing GPU for MuJoCo rendering...")
109
+ print("=" * 50)
110
+
111
+ # Set environment variables
112
+ os.environ['MUJOCO_GL'] = 'egl'
113
+ os.environ['PYOPENGL_PLATFORM'] = 'egl'
114
+ os.environ['EGL_PLATFORM'] = 'device'
115
+
116
+ print("Environment variables set:")
117
+ print(f" MUJOCO_GL: {os.environ.get('MUJOCO_GL')}")
118
+ print(f" PYOPENGL_PLATFORM: {os.environ.get('PYOPENGL_PLATFORM')}")
119
+ print(f" EGL_PLATFORM: {os.environ.get('EGL_PLATFORM')}")
120
+ print()
121
+
122
+ # Run checks
123
+ checks = [
124
+ ("NVIDIA Driver", check_nvidia_driver),
125
+ ("EGL Libraries", lambda: check_egl_libs() or True), # Always continue
126
+ ("EGL Device", test_egl_device),
127
+ ("MuJoCo Rendering", test_mujoco_rendering),
128
+ ]
129
+
130
+ results = []
131
+ for name, check_func in checks:
132
+ print(f"Checking {name}...")
133
+ try:
134
+ result = check_func()
135
+ results.append((name, result))
136
+ except Exception as e:
137
+ print(f"✗ {name} check failed with exception: {e}")
138
+ results.append((name, False))
139
+ print()
140
+
141
+ # Summary
142
+ print("=" * 50)
143
+ print("🔍 Summary:")
144
+ all_passed = True
145
+ for name, passed in results:
146
+ status = "✓ PASS" if passed else "✗ FAIL"
147
+ print(f" {name}: {status}")
148
+ if not passed:
149
+ all_passed = False
150
+
151
+ if all_passed:
152
+ print("\n🎉 All checks passed! GPU rendering should work.")
153
+ return 0
154
+ else:
155
+ print("\n⚠️ Some checks failed. GPU rendering may not work properly.")
156
+ return 1
157
+
158
+ if __name__ == "__main__":
159
+ sys.exit(main())
legacy/Dockerfile ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.8.1-devel-ubuntu22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive \
4
+ TZ=Europe/Paris
5
+
6
+ # for headless GPU rendering
7
+ ENV MUJOCO_GL="egl"
8
+ ENV PYOPENGL_PLATFORM="egl"
9
+
10
+ #requirements for headless GPU rendering, see also https://github.com/mmatl/pyrender/issues/149
11
+ ENV NVIDIA_DRIVER_CAPABILITIES=compute,graphics,utility,video
12
+
13
+ # Remove any third-party apt sources to avoid issues with expiring keys.
14
+ # Install some basic utilities
15
+ RUN rm -f /etc/apt/sources.list.d/*.list && \
16
+ apt-get update && apt-get install -y --no-install-recommends \
17
+ curl \
18
+ ca-certificates \
19
+ sudo \
20
+ git \
21
+ wget \
22
+ procps \
23
+ git-lfs \
24
+ zip \
25
+ unzip \
26
+ htop \
27
+ vim \
28
+ nano \
29
+ bzip2 \
30
+ libx11-6 \
31
+ # if the next line does not work, we can also try this: libegl1-mesa-dev
32
+ libegl1 \
33
+ libgles2 \
34
+ build-essential \
35
+ libsndfile-dev \
36
+ software-properties-common \
37
+ && rm -rf /var/lib/apt/lists/*
38
+ RUN echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_nvidia.so.0"}}' >> /usr/share/glvnd/egl_vendor.d/10_nvidia.json
39
+
40
+ RUN add-apt-repository ppa:flexiondotorg/nvtop && \
41
+ apt-get upgrade -y && \
42
+ apt-get install -y --no-install-recommends nvtop
43
+
44
+ RUN curl -fsSL https://deb.nodesource.com/setup_22.x | sudo bash - && \
45
+ apt-get install -y nodejs && \
46
+ npm install -g configurable-http-proxy
47
+
48
+ # Create a working directory
49
+ WORKDIR /app
50
+
51
+ # Create a non-root user and switch to it
52
+ RUN adduser --disabled-password --gecos '' --shell /bin/bash user \
53
+ && chown -R user:user /app
54
+ RUN echo "user ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-user
55
+ USER user
56
+
57
+ # All users can use /home/user as their home directory
58
+ ENV HOME=/home/user
59
+ RUN mkdir $HOME/.cache $HOME/.config \
60
+ && chmod -R 777 $HOME
61
+
62
+ # Set up the Conda environment
63
+ ENV CONDA_AUTO_UPDATE_CONDA=false \
64
+ PATH=$HOME/miniconda/bin:$PATH
65
+ RUN curl -sLo ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-py313_25.5.1-0-Linux-x86_64.sh \
66
+ && chmod +x ~/miniconda.sh \
67
+ && ~/miniconda.sh -b -p ~/miniconda \
68
+ && rm ~/miniconda.sh \
69
+ && conda clean -ya
70
+
71
+ ENV CONDA_OVERRIDE_CUDA="12.8"
72
+
73
+ # Make sure that Jax and cuSPARSE are properly installed for CUDA 12.x
74
+ RUN conda install nvidia/label/cuda-12.8.1::cuda-toolkit nvidia/label/cuda-12.8.1::libcusparse
75
+ RUN conda install "jaxlib=*=*cuda*" jax -c conda-forge
76
+
77
+ WORKDIR $HOME/app
78
+
79
+ #######################################
80
+ # Start root user section
81
+ #######################################
82
+
83
+ USER root
84
+
85
+ # User Debian packages
86
+ ## Security warning : Potential user code executed as root (build time)
87
+ RUN --mount=target=/root/packages.txt,source=packages.txt \
88
+ apt-get update && \
89
+ xargs -r -a /root/packages.txt apt-get install -y --no-install-recommends \
90
+ && rm -rf /var/lib/apt/lists/*
91
+
92
+ RUN --mount=target=/root/on_startup.sh,source=on_startup.sh,readwrite \
93
+ bash /root/on_startup.sh
94
+
95
+ RUN mkdir /data && chown user:user /data
96
+
97
+ RUN mkdir /data/samples && chown user:user /data/samples
98
+
99
+ #######################################
100
+ # End root user section
101
+ #######################################
102
+
103
+ USER user
104
+
105
+ # Python packages
106
+ RUN --mount=target=requirements.txt,source=requirements.txt \
107
+ pip install --no-cache-dir --upgrade -r requirements.txt
108
+
109
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
110
+ COPY --chown=user . $HOME/app
111
+
112
+ RUN chmod +x start_server.sh
113
+
114
+ COPY --chown=user login.html /home/user/miniconda/lib/python3.13/site-packages/jupyter_server/templates/login.html
115
+
116
+ COPY --chown=user samples/ /data/samples/
117
+
118
+ ENV PYTHONUNBUFFERED=1 \
119
+ GRADIO_ALLOW_FLAGGING=never \
120
+ GRADIO_NUM_PORTS=1 \
121
+ GRADIO_SERVER_NAME=0.0.0.0 \
122
+ GRADIO_THEME=huggingface \
123
+ SYSTEM=spaces \
124
+ SHELL=/bin/bash
125
+
126
+ CMD ["./start_server.sh"]
start_server.sh CHANGED
@@ -3,6 +3,9 @@ JUPYTER_TOKEN="${JUPYTER_TOKEN:=huggingface}"
3
 
4
  NOTEBOOK_DIR="/data"
5
 
 
 
 
6
  # this will download stuff used by Mujoco (the collection of models)
7
  python init_mujoco.py
8
 
 
3
 
4
  NOTEBOOK_DIR="/data"
5
 
6
+ # perform checks on the GPU configuration
7
+ python init_gpu.py
8
+
9
  # this will download stuff used by Mujoco (the collection of models)
10
  python init_mujoco.py
11