jbilcke-hf HF Staff commited on
Commit
9b8c4e3
Β·
1 Parent(s): e6e8290
Files changed (3) hide show
  1. Dockerfile +12 -2
  2. init_gpu.py +86 -34
  3. start_server.sh +10 -0
Dockerfile CHANGED
@@ -50,19 +50,29 @@ RUN rm -f /etc/apt/sources.list.d/*.list && \
50
  software-properties-common \
51
  && rm -rf /var/lib/apt/lists/*
52
 
53
- # NVIDIA EGL vendor config stuff
54
  RUN mkdir -p /usr/share/glvnd/egl_vendor.d && \
55
  echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_nvidia.so.0"}}' > /usr/share/glvnd/egl_vendor.d/10_nvidia.json
56
 
 
57
  RUN echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_mesa.so.0"}}' > /usr/share/glvnd/egl_vendor.d/50_mesa.json
58
 
 
 
 
 
 
 
 
 
59
  # Set up library paths for EGL
60
- ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
61
 
62
  # more EGL environment variables
63
  ENV EGL_PLATFORM=device
64
  ENV MESA_GL_VERSION_OVERRIDE=4.5
65
  ENV MESA_GLSL_VERSION_OVERRIDE=450
 
66
 
67
  RUN add-apt-repository ppa:flexiondotorg/nvtop && \
68
  apt-get upgrade -y && \
 
50
  software-properties-common \
51
  && rm -rf /var/lib/apt/lists/*
52
 
53
+ # Create the NVIDIA EGL vendor config
54
  RUN mkdir -p /usr/share/glvnd/egl_vendor.d && \
55
  echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_nvidia.so.0"}}' > /usr/share/glvnd/egl_vendor.d/10_nvidia.json
56
 
57
+ # Create additional EGL configs that might be needed
58
  RUN echo '{"file_format_version": "1.0.0", "ICD": {"library_path": "libEGL_mesa.so.0"}}' > /usr/share/glvnd/egl_vendor.d/50_mesa.json
59
 
60
+ # Create symlinks for NVIDIA EGL libraries if they exist in different locations
61
+ RUN if [ -f /usr/local/cuda/lib64/libEGL_nvidia.so.0 ]; then \
62
+ ln -sf /usr/local/cuda/lib64/libEGL_nvidia.so.0 /usr/lib/x86_64-linux-gnu/libEGL_nvidia.so.0; \
63
+ fi && \
64
+ if [ -f /usr/local/nvidia/lib64/libEGL_nvidia.so.0 ]; then \
65
+ ln -sf /usr/local/nvidia/lib64/libEGL_nvidia.so.0 /usr/lib/x86_64-linux-gnu/libEGL_nvidia.so.0; \
66
+ fi
67
+
68
  # Set up library paths for EGL
69
+ ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
70
 
71
  # more EGL environment variables
72
  ENV EGL_PLATFORM=device
73
  ENV MESA_GL_VERSION_OVERRIDE=4.5
74
  ENV MESA_GLSL_VERSION_OVERRIDE=450
75
+ ENV __EGL_VENDOR_LIBRARY_DIRS=/usr/share/glvnd/egl_vendor.d
76
 
77
  RUN add-apt-repository ppa:flexiondotorg/nvtop && \
78
  apt-get upgrade -y && \
init_gpu.py CHANGED
@@ -40,37 +40,62 @@ def check_egl_libs():
40
  print(f"βœ— Failed to load {lib}: {e}")
41
 
42
  def test_egl_device():
43
- """Test EGL device creation."""
44
  try:
45
- # Try to create an EGL display
46
  from OpenGL import EGL
47
  import ctypes
48
 
49
- # Get EGL display
50
- display = EGL.eglGetDisplay(EGL.EGL_DEFAULT_DISPLAY)
51
- if display == EGL.EGL_NO_DISPLAY:
52
- print("βœ— Failed to get EGL display")
53
- return False
54
-
55
- # Initialize EGL
56
- major = ctypes.c_long()
57
- minor = ctypes.c_long()
58
- if not EGL.eglInitialize(display, ctypes.byref(major), ctypes.byref(minor)):
59
- print("βœ— Failed to initialize EGL")
60
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- print(f"βœ“ EGL initialized successfully (version {major.value}.{minor.value})")
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- # Clean up
65
- EGL.eglTerminate(display)
66
- return True
67
 
68
  except Exception as e:
69
  print(f"βœ— EGL test failed: {e}")
70
  return False
71
 
72
  def test_mujoco_rendering():
73
- """Test MuJoCo rendering capability."""
74
  try:
75
  import mujoco
76
 
@@ -87,14 +112,38 @@ def test_mujoco_rendering():
87
 
88
  model = mujoco.MjModel.from_xml_string(xml)
89
 
90
- # Try to create a renderer (this is where EGL issues usually surface)
91
- try:
92
- renderer = mujoco.Renderer(model, height=240, width=320)
93
- print("βœ“ MuJoCo renderer created successfully")
94
- return True
95
- except Exception as e:
96
- print(f"βœ— MuJoCo renderer creation failed: {e}")
97
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  except ImportError:
100
  print("βœ— MuJoCo not installed")
@@ -111,7 +160,7 @@ def main():
111
  # Set environment variables
112
  os.environ['MUJOCO_GL'] = 'egl'
113
  os.environ['PYOPENGL_PLATFORM'] = 'egl'
114
- os.environ['EGL_PLATFORM'] = 'device'
115
 
116
  print("Environment variables set:")
117
  print(f" MUJOCO_GL: {os.environ.get('MUJOCO_GL')}")
@@ -141,18 +190,21 @@ def main():
141
  # Summary
142
  print("=" * 50)
143
  print("πŸ” Summary:")
144
- all_passed = True
 
 
 
145
  for name, passed in results:
146
  status = "βœ“ PASS" if passed else "βœ— FAIL"
147
  print(f" {name}: {status}")
148
- if not passed:
149
- all_passed = False
150
 
151
- if all_passed:
152
- print("\nπŸŽ‰ All checks passed! GPU rendering should work.")
 
153
  return 0
154
  else:
155
- print("\n⚠️ Some checks failed. GPU rendering may not work properly.")
 
156
  return 1
157
 
158
  if __name__ == "__main__":
 
40
  print(f"βœ— Failed to load {lib}: {e}")
41
 
42
  def test_egl_device():
43
+ """Test EGL device creation with multiple approaches."""
44
  try:
 
45
  from OpenGL import EGL
46
  import ctypes
47
 
48
+ # Method 1: Try platform device display (preferred for headless)
49
+ try:
50
+ display = EGL.eglGetPlatformDisplay(EGL.EGL_PLATFORM_DEVICE_EXT,
51
+ EGL.EGL_DEFAULT_DISPLAY,
52
+ None)
53
+ if display != EGL.EGL_NO_DISPLAY:
54
+ major = ctypes.c_long()
55
+ minor = ctypes.c_long()
56
+ if EGL.eglInitialize(display, ctypes.byref(major), ctypes.byref(minor)):
57
+ print(f"βœ“ EGL platform device initialized (version {major.value}.{minor.value})")
58
+ EGL.eglTerminate(display)
59
+ return True
60
+ except Exception as e:
61
+ print(f" Platform device method failed: {e}")
62
+
63
+ # Method 2: Try default display
64
+ try:
65
+ display = EGL.eglGetDisplay(EGL.EGL_DEFAULT_DISPLAY)
66
+ if display != EGL.EGL_NO_DISPLAY:
67
+ major = ctypes.c_long()
68
+ minor = ctypes.c_long()
69
+ if EGL.eglInitialize(display, ctypes.byref(major), ctypes.byref(minor)):
70
+ print(f"βœ“ EGL default display initialized (version {major.value}.{minor.value})")
71
+ EGL.eglTerminate(display)
72
+ return True
73
+ except Exception as e:
74
+ print(f" Default display method failed: {e}")
75
 
76
+ # Method 3: Try surfaceless context (what MuJoCo likely uses)
77
+ try:
78
+ os.environ['EGL_PLATFORM'] = 'surfaceless'
79
+ display = EGL.eglGetDisplay(EGL.EGL_DEFAULT_DISPLAY)
80
+ if display != EGL.EGL_NO_DISPLAY:
81
+ major = ctypes.c_long()
82
+ minor = ctypes.c_long()
83
+ if EGL.eglInitialize(display, ctypes.byref(major), ctypes.byref(minor)):
84
+ print(f"βœ“ EGL surfaceless display initialized (version {major.value}.{minor.value})")
85
+ EGL.eglTerminate(display)
86
+ return True
87
+ except Exception as e:
88
+ print(f" Surfaceless method failed: {e}")
89
 
90
+ print("βœ— All EGL initialization methods failed")
91
+ return False
 
92
 
93
  except Exception as e:
94
  print(f"βœ— EGL test failed: {e}")
95
  return False
96
 
97
  def test_mujoco_rendering():
98
+ """Test MuJoCo rendering capability with different approaches."""
99
  try:
100
  import mujoco
101
 
 
112
 
113
  model = mujoco.MjModel.from_xml_string(xml)
114
 
115
+ # Try different rendering approaches
116
+ approaches = [
117
+ ("Small resolution", {"height": 64, "width": 64}),
118
+ ("Default resolution", {"height": 240, "width": 320}),
119
+ ("Large resolution", {"height": 480, "width": 640}),
120
+ ]
121
+
122
+ for name, kwargs in approaches:
123
+ try:
124
+ renderer = mujoco.Renderer(model, **kwargs)
125
+ data = mujoco.MjData(model)
126
+ renderer.update_scene(data)
127
+ pixels = renderer.render()
128
+ print(f" βœ“ {name} ({kwargs['width']}x{kwargs['height']}): SUCCESS")
129
+ print(f" Image shape: {pixels.shape}, dtype: {pixels.dtype}")
130
+
131
+ # Test if we got actual rendered content (not all zeros)
132
+ if pixels.max() > 0:
133
+ print(f" βœ“ Non-zero pixels detected (max value: {pixels.max()})")
134
+ else:
135
+ print(f" ⚠️ All pixels are zero - may indicate rendering issue")
136
+
137
+ # Clean up
138
+ del renderer
139
+ return True
140
+
141
+ except Exception as e:
142
+ print(f" βœ— {name}: {e}")
143
+ continue
144
+
145
+ print("βœ— All MuJoCo rendering approaches failed")
146
+ return False
147
 
148
  except ImportError:
149
  print("βœ— MuJoCo not installed")
 
160
  # Set environment variables
161
  os.environ['MUJOCO_GL'] = 'egl'
162
  os.environ['PYOPENGL_PLATFORM'] = 'egl'
163
+ os.environ['EGL_PLATFORM'] = 'surfaceless' # Better for headless
164
 
165
  print("Environment variables set:")
166
  print(f" MUJOCO_GL: {os.environ.get('MUJOCO_GL')}")
 
190
  # Summary
191
  print("=" * 50)
192
  print("πŸ” Summary:")
193
+
194
+ # Check if the critical test (MuJoCo) passed
195
+ mujoco_passed = any(name == "MuJoCo Rendering" and passed for name, passed in results)
196
+
197
  for name, passed in results:
198
  status = "βœ“ PASS" if passed else "βœ— FAIL"
199
  print(f" {name}: {status}")
 
 
200
 
201
+ if mujoco_passed:
202
+ print("\nπŸŽ‰ MuJoCo rendering works! The notebook should work even if some EGL tests fail.")
203
+ print("πŸ’‘ Note: EGL device tests may fail but MuJoCo can still render successfully.")
204
  return 0
205
  else:
206
+ print("\n⚠️ MuJoCo rendering failed. GPU rendering will not work properly.")
207
+ print("πŸ’‘ Try checking the container GPU configuration or driver compatibility.")
208
  return 1
209
 
210
  if __name__ == "__main__":
start_server.sh CHANGED
@@ -3,6 +3,16 @@ JUPYTER_TOKEN="${JUPYTER_TOKEN:=huggingface}"
3
 
4
  NOTEBOOK_DIR="/data"
5
 
 
 
 
 
 
 
 
 
 
 
6
  # perform checks on the GPU configuration
7
  python init_gpu.py
8
 
 
3
 
4
  NOTEBOOK_DIR="/data"
5
 
6
+ # Set up NVIDIA EGL library links at runtime (in case they're mounted differently)
7
+ echo "πŸ”— Setting up NVIDIA EGL library links..."
8
+ for nvidia_lib_dir in /usr/local/nvidia/lib64 /usr/local/cuda/lib64 /usr/lib/nvidia; do
9
+ if [ -f "$nvidia_lib_dir/libEGL_nvidia.so.0" ]; then
10
+ echo "Found NVIDIA EGL library at $nvidia_lib_dir/libEGL_nvidia.so.0"
11
+ ln -sf "$nvidia_lib_dir/libEGL_nvidia.so.0" /usr/lib/x86_64-linux-gnu/libEGL_nvidia.so.0
12
+ break
13
+ fi
14
+ done
15
+
16
  # perform checks on the GPU configuration
17
  python init_gpu.py
18