{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "MpkYHwCqk7W-"
   },
   "source": [
    "![MuJoCo banner](https://raw.githubusercontent.com/google-deepmind/mujoco/main/banner.png)\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "xBSdkbmGN2K-"
   },
   "source": [
    "### Copyright notice"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "_UbO9uhtBSX5"
   },
   "source": [
    "> <p><small><small>Copyright 2025 DeepMind Technologies Limited.</small></p>\n",
    "> <p><small><small>Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at <a href=\"http://www.apache.org/licenses/LICENSE-2.0\">http://www.apache.org/licenses/LICENSE-2.0</a>.</small></small></p>\n",
    "> <p><small><small>Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.</small></small></p>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "dNIJkb_FM2Ux"
   },
   "source": [
    "# Locomotion in The Playground! <a href=\"https://colab.research.google.com/github/google-deepmind/mujoco_playground/blob/main/learning/notebooks/locomotion.ipynb\"  target=\"_blank\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" width=\"140\" align=\"center\"/></a>\n",
    "\n",
    "In this notebook, we'll walk through a few locomotion environments available in MuJoCo Playground.\n",
    "\n",
    "You can totally run this notebook on Hugging Face!\n",
    "\n",
    "**I recommend to use the <a href=\"https://huggingface.co/spaces/jbilcke-hf/train-robots-with-mujoco\" target=\"_blank\">following Space template</a> with a Nvidia GPU.**\n",
    "\n",
    "It will make things easier for you as most modules are already pre-installed (so installation steps will be super fast 😎)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "cellView": "form",
    "id": "Xqo7pyX-n72M"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting jax[cuda12]\n",
      "  Downloading jax-0.6.2-py3-none-any.whl.metadata (13 kB)\n",
      "Collecting jaxlib<=0.6.2,>=0.6.2 (from jax[cuda12])\n",
      "  Downloading jaxlib-0.6.2-cp313-cp313-manylinux2014_x86_64.whl.metadata (1.3 kB)\n",
      "Collecting ml_dtypes>=0.5.0 (from jax[cuda12])\n",
      "  Downloading ml_dtypes-0.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)\n",
      "Collecting numpy>=1.26 (from jax[cuda12])\n",
      "  Downloading numpy-2.3.1-cp313-cp313-manylinux_2_28_x86_64.whl.metadata (62 kB)\n",
      "Collecting opt_einsum (from jax[cuda12])\n",
      "  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n",
      "Collecting scipy>=1.12 (from jax[cuda12])\n",
      "  Downloading scipy-1.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)\n",
      "Collecting jax-cuda12-plugin<=0.6.2,>=0.6.2 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading jax_cuda12_plugin-0.6.2-cp313-cp313-manylinux2014_x86_64.whl.metadata (1.7 kB)\n",
      "Collecting jax-cuda12-pjrt==0.6.2 (from jax-cuda12-plugin<=0.6.2,>=0.6.2->jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading jax_cuda12_pjrt-0.6.2-py3-none-manylinux2014_x86_64.whl.metadata (579 bytes)\n",
      "Collecting nvidia-cublas-cu12>=12.1.3.1 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)\n",
      "Collecting nvidia-cuda-cupti-cu12>=12.1.105 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cuda_cupti_cu12-12.9.79-py3-none-manylinux_2_25_x86_64.whl.metadata (1.8 kB)\n",
      "Collecting nvidia-cuda-nvcc-cu12>=12.6.85 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cuda_nvcc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)\n",
      "Collecting nvidia-cuda-runtime-cu12>=12.1.105 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cuda_runtime_cu12-12.9.79-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)\n",
      "Collecting nvidia-cudnn-cu12<10.0,>=9.8 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl.metadata (1.8 kB)\n",
      "Collecting nvidia-cufft-cu12>=11.0.2.54 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cufft_cu12-11.4.1.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.8 kB)\n",
      "Collecting nvidia-cusolver-cu12>=11.4.5.107 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cusolver_cu12-11.7.5.82-py3-none-manylinux_2_27_x86_64.whl.metadata (1.9 kB)\n",
      "Collecting nvidia-cusparse-cu12>=12.1.0.106 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cusparse_cu12-12.5.10.65-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.8 kB)\n",
      "Collecting nvidia-nccl-cu12>=2.18.1 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.0 kB)\n",
      "Collecting nvidia-nvjitlink-cu12>=12.1.105 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)\n",
      "Collecting nvidia-cuda-nvrtc-cu12>=12.1.55 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)\n",
      "Collecting nvidia-nvshmem-cu12>=3.2.5 (from jax-cuda12-plugin[with-cuda]<=0.6.2,>=0.6.2; extra == \"cuda12\"->jax[cuda12])\n",
      "  Downloading nvidia_nvshmem_cu12-3.3.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.1 kB)\n",
      "Downloading jax-0.6.2-py3-none-any.whl (2.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.7/2.7 MB\u001b[0m \u001b[31m161.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading jax_cuda12_plugin-0.6.2-cp313-cp313-manylinux2014_x86_64.whl (15.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m167.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading jax_cuda12_pjrt-0.6.2-py3-none-manylinux2014_x86_64.whl (125.3 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.3/125.3 MB\u001b[0m \u001b[31m246.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading jaxlib-0.6.2-cp313-cp313-manylinux2014_x86_64.whl (89.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.9/89.9 MB\u001b[0m \u001b[31m221.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl (706.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m706.8/706.8 MB\u001b[0m \u001b[31m71.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading ml_dtypes-0.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.7/4.7 MB\u001b[0m \u001b[31m284.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading numpy-2.3.1-cp313-cp313-manylinux_2_28_x86_64.whl (16.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.6/16.6 MB\u001b[0m \u001b[31m213.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl (581.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m581.2/581.2 MB\u001b[0m \u001b[31m87.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.9.79-py3-none-manylinux_2_25_x86_64.whl (10.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.8/10.8 MB\u001b[0m \u001b[31m259.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cuda_nvcc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (40.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.5/40.5 MB\u001b[0m \u001b[31m236.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (89.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.6/89.6 MB\u001b[0m \u001b[31m226.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.9.79-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m220.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cufft_cu12-11.4.1.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (200.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.9/200.9 MB\u001b[0m \u001b[31m199.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cusolver_cu12-11.7.5.82-py3-none-manylinux_2_27_x86_64.whl (338.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m338.1/338.1 MB\u001b[0m \u001b[31m155.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_cusparse_cu12-12.5.10.65-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (366.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m366.5/366.5 MB\u001b[0m \u001b[31m144.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (322.3 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.3/322.3 MB\u001b[0m \u001b[31m169.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (39.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m39.7/39.7 MB\u001b[0m \u001b[31m267.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading nvidia_nvshmem_cu12-3.3.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (124.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.6/124.6 MB\u001b[0m \u001b[31m198.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading scipy-1.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.1 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.1/35.1 MB\u001b[0m \u001b[31m203.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n",
      "Installing collected packages: jax-cuda12-pjrt, opt_einsum, nvidia-nvshmem-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-nvcc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, jax-cuda12-plugin, scipy, nvidia-cusparse-cu12, nvidia-cufft-cu12, nvidia-cudnn-cu12, ml_dtypes, nvidia-cusolver-cu12, jaxlib, jax\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20/20\u001b[0m [jax]32m19/20\u001b[0m [jax]ib]cusolver-cu12]2]2]\n",
      "\u001b[1A\u001b[2KSuccessfully installed jax-0.6.2 jax-cuda12-pjrt-0.6.2 jax-cuda12-plugin-0.6.2 jaxlib-0.6.2 ml_dtypes-0.5.1 numpy-2.3.1 nvidia-cublas-cu12-12.9.1.4 nvidia-cuda-cupti-cu12-12.9.79 nvidia-cuda-nvcc-cu12-12.9.86 nvidia-cuda-nvrtc-cu12-12.9.86 nvidia-cuda-runtime-cu12-12.9.79 nvidia-cudnn-cu12-9.10.2.21 nvidia-cufft-cu12-11.4.1.4 nvidia-cusolver-cu12-11.7.5.82 nvidia-cusparse-cu12-12.5.10.65 nvidia-nccl-cu12-2.27.5 nvidia-nvjitlink-cu12-12.9.86 nvidia-nvshmem-cu12-3.3.9 opt_einsum-3.4.0 scipy-1.16.0\n",
      "Collecting mujoco\n",
      "  Downloading mujoco-3.3.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (44 kB)\n",
      "Collecting absl-py (from mujoco)\n",
      "  Downloading absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)\n",
      "Collecting etils[epath] (from mujoco)\n",
      "  Downloading etils-1.12.2-py3-none-any.whl.metadata (6.5 kB)\n",
      "Collecting glfw (from mujoco)\n",
      "  Downloading glfw-2.9.0-py2.py27.py3.py30.py31.py32.py33.py34.py35.py36.py37.py38.p39.p310.p311.p312.p313-none-manylinux_2_28_x86_64.whl.metadata (5.4 kB)\n",
      "Requirement already satisfied: numpy in /home/user/miniconda/lib/python3.13/site-packages (from mujoco) (2.3.1)\n",
      "Collecting pyopengl (from mujoco)\n",
      "  Downloading PyOpenGL-3.1.9-py3-none-any.whl.metadata (3.3 kB)\n",
      "Collecting fsspec (from etils[epath]->mujoco)\n",
      "  Downloading fsspec-2025.5.1-py3-none-any.whl.metadata (11 kB)\n",
      "Collecting importlib_resources (from etils[epath]->mujoco)\n",
      "  Downloading importlib_resources-6.5.2-py3-none-any.whl.metadata (3.9 kB)\n",
      "Requirement already satisfied: typing_extensions in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco) (4.12.2)\n",
      "Collecting zipp (from etils[epath]->mujoco)\n",
      "  Downloading zipp-3.23.0-py3-none-any.whl.metadata (3.6 kB)\n",
      "Downloading mujoco-3.3.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m178.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading absl_py-2.3.0-py3-none-any.whl (135 kB)\n",
      "Downloading etils-1.12.2-py3-none-any.whl (167 kB)\n",
      "Downloading fsspec-2025.5.1-py3-none-any.whl (199 kB)\n",
      "Downloading glfw-2.9.0-py2.py27.py3.py30.py31.py32.py33.py34.py35.py36.py37.py38.p39.p310.p311.p312.p313-none-manylinux_2_28_x86_64.whl (243 kB)\n",
      "Downloading importlib_resources-6.5.2-py3-none-any.whl (37 kB)\n",
      "Downloading PyOpenGL-3.1.9-py3-none-any.whl (3.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.2/3.2 MB\u001b[0m \u001b[31m228.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading zipp-3.23.0-py3-none-any.whl (10 kB)\n",
      "Installing collected packages: pyopengl, glfw, zipp, importlib_resources, fsspec, etils, absl-py, mujoco\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8/8\u001b[0m [mujoco]2m7/8\u001b[0m [mujoco]]\n",
      "\u001b[1A\u001b[2KSuccessfully installed absl-py-2.3.0 etils-1.12.2 fsspec-2025.5.1 glfw-2.9.0 importlib_resources-6.5.2 mujoco-3.3.3 pyopengl-3.1.9 zipp-3.23.0\n",
      "Collecting mujoco_mjx\n",
      "  Downloading mujoco_mjx-3.3.3-py3-none-any.whl.metadata (3.4 kB)\n",
      "Requirement already satisfied: absl-py in /home/user/miniconda/lib/python3.13/site-packages (from mujoco_mjx) (2.3.0)\n",
      "Requirement already satisfied: etils[epath] in /home/user/miniconda/lib/python3.13/site-packages (from mujoco_mjx) (1.12.2)\n",
      "Requirement already satisfied: jax in /home/user/miniconda/lib/python3.13/site-packages (from mujoco_mjx) (0.6.2)\n",
      "Requirement already satisfied: jaxlib in /home/user/miniconda/lib/python3.13/site-packages (from mujoco_mjx) (0.6.2)\n",
      "Requirement already satisfied: mujoco>=3.3.3.dev0 in /home/user/miniconda/lib/python3.13/site-packages (from mujoco_mjx) (3.3.3)\n",
      "Requirement already satisfied: scipy in /home/user/miniconda/lib/python3.13/site-packages (from mujoco_mjx) (1.16.0)\n",
      "Collecting trimesh (from mujoco_mjx)\n",
      "  Downloading trimesh-4.6.13-py3-none-any.whl.metadata (18 kB)\n",
      "Requirement already satisfied: glfw in /home/user/miniconda/lib/python3.13/site-packages (from mujoco>=3.3.3.dev0->mujoco_mjx) (2.9.0)\n",
      "Requirement already satisfied: numpy in /home/user/miniconda/lib/python3.13/site-packages (from mujoco>=3.3.3.dev0->mujoco_mjx) (2.3.1)\n",
      "Requirement already satisfied: pyopengl in /home/user/miniconda/lib/python3.13/site-packages (from mujoco>=3.3.3.dev0->mujoco_mjx) (3.1.9)\n",
      "Requirement already satisfied: fsspec in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco_mjx) (2025.5.1)\n",
      "Requirement already satisfied: importlib_resources in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco_mjx) (6.5.2)\n",
      "Requirement already satisfied: typing_extensions in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco_mjx) (4.12.2)\n",
      "Requirement already satisfied: zipp in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco_mjx) (3.23.0)\n",
      "Requirement already satisfied: ml_dtypes>=0.5.0 in /home/user/miniconda/lib/python3.13/site-packages (from jax->mujoco_mjx) (0.5.1)\n",
      "Requirement already satisfied: opt_einsum in /home/user/miniconda/lib/python3.13/site-packages (from jax->mujoco_mjx) (3.4.0)\n",
      "Downloading mujoco_mjx-3.3.3-py3-none-any.whl (6.7 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.7/6.7 MB\u001b[0m \u001b[31m138.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading trimesh-4.6.13-py3-none-any.whl (712 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m712.4/712.4 kB\u001b[0m \u001b[31m124.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: trimesh, mujoco_mjx\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [mujoco_mjx]2\u001b[0m [mujoco_mjx]\n",
      "\u001b[1A\u001b[2KSuccessfully installed mujoco_mjx-3.3.3 trimesh-4.6.13\n",
      "Collecting brax\n",
      "  Downloading brax-0.12.4-py3-none-any.whl.metadata (20 kB)\n",
      "Requirement already satisfied: absl-py in /home/user/miniconda/lib/python3.13/site-packages (from brax) (2.3.0)\n",
      "Requirement already satisfied: etils in /home/user/miniconda/lib/python3.13/site-packages (from brax) (1.12.2)\n",
      "Collecting flask (from brax)\n",
      "  Downloading flask-3.1.1-py3-none-any.whl.metadata (3.0 kB)\n",
      "Collecting flask-cors (from brax)\n",
      "  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)\n",
      "Collecting flax (from brax)\n",
      "  Downloading flax-0.10.6-py3-none-any.whl.metadata (11 kB)\n",
      "Requirement already satisfied: jax>=0.4.6 in /home/user/miniconda/lib/python3.13/site-packages (from brax) (0.6.2)\n",
      "Requirement already satisfied: jaxlib>=0.4.6 in /home/user/miniconda/lib/python3.13/site-packages (from brax) (0.6.2)\n",
      "Collecting jaxopt (from brax)\n",
      "  Downloading jaxopt-0.8.5-py3-none-any.whl.metadata (3.3 kB)\n",
      "Requirement already satisfied: jinja2 in /home/user/miniconda/lib/python3.13/site-packages (from brax) (3.1.6)\n",
      "Collecting ml-collections (from brax)\n",
      "  Downloading ml_collections-1.1.0-py3-none-any.whl.metadata (22 kB)\n",
      "Requirement already satisfied: mujoco in /home/user/miniconda/lib/python3.13/site-packages (from brax) (3.3.3)\n",
      "Requirement already satisfied: mujoco-mjx in /home/user/miniconda/lib/python3.13/site-packages (from brax) (3.3.3)\n",
      "Requirement already satisfied: numpy in /home/user/miniconda/lib/python3.13/site-packages (from brax) (2.3.1)\n",
      "Collecting optax (from brax)\n",
      "  Downloading optax-0.2.5-py3-none-any.whl.metadata (7.5 kB)\n",
      "Collecting orbax-checkpoint (from brax)\n",
      "  Downloading orbax_checkpoint-0.11.17-py3-none-any.whl.metadata (2.2 kB)\n",
      "Collecting pillow (from brax)\n",
      "  Downloading pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (9.0 kB)\n",
      "Requirement already satisfied: scipy in /home/user/miniconda/lib/python3.13/site-packages (from brax) (1.16.0)\n",
      "Collecting tensorboardx (from brax)\n",
      "  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)\n",
      "Requirement already satisfied: trimesh in /home/user/miniconda/lib/python3.13/site-packages (from brax) (4.6.13)\n",
      "Requirement already satisfied: typing-extensions in /home/user/miniconda/lib/python3.13/site-packages (from brax) (4.12.2)\n",
      "Requirement already satisfied: ml_dtypes>=0.5.0 in /home/user/miniconda/lib/python3.13/site-packages (from jax>=0.4.6->brax) (0.5.1)\n",
      "Requirement already satisfied: opt_einsum in /home/user/miniconda/lib/python3.13/site-packages (from jax>=0.4.6->brax) (3.4.0)\n",
      "Collecting blinker>=1.9.0 (from flask->brax)\n",
      "  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)\n",
      "Collecting click>=8.1.3 (from flask->brax)\n",
      "  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)\n",
      "Collecting itsdangerous>=2.2.0 (from flask->brax)\n",
      "  Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)\n",
      "Requirement already satisfied: markupsafe>=2.1.1 in /home/user/miniconda/lib/python3.13/site-packages (from flask->brax) (3.0.2)\n",
      "Collecting werkzeug>=3.1.0 (from flask->brax)\n",
      "  Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)\n",
      "Collecting msgpack (from flax->brax)\n",
      "  Downloading msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n",
      "Collecting tensorstore (from flax->brax)\n",
      "  Downloading tensorstore-0.1.75-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)\n",
      "Requirement already satisfied: rich>=11.1 in /home/user/miniconda/lib/python3.13/site-packages (from flax->brax) (13.9.4)\n",
      "Requirement already satisfied: PyYAML>=5.4.1 in /home/user/miniconda/lib/python3.13/site-packages (from flax->brax) (6.0.2)\n",
      "Collecting treescope>=0.1.7 (from flax->brax)\n",
      "  Downloading treescope-0.1.9-py3-none-any.whl.metadata (6.6 kB)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /home/user/miniconda/lib/python3.13/site-packages (from rich>=11.1->flax->brax) (2.2.0)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/user/miniconda/lib/python3.13/site-packages (from rich>=11.1->flax->brax) (2.19.1)\n",
      "Requirement already satisfied: mdurl~=0.1 in /home/user/miniconda/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich>=11.1->flax->brax) (0.1.0)\n",
      "Requirement already satisfied: glfw in /home/user/miniconda/lib/python3.13/site-packages (from mujoco->brax) (2.9.0)\n",
      "Requirement already satisfied: pyopengl in /home/user/miniconda/lib/python3.13/site-packages (from mujoco->brax) (3.1.9)\n",
      "Requirement already satisfied: fsspec in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco->brax) (2025.5.1)\n",
      "Requirement already satisfied: importlib_resources in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco->brax) (6.5.2)\n",
      "Requirement already satisfied: zipp in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco->brax) (3.23.0)\n",
      "Collecting chex>=0.1.87 (from optax->brax)\n",
      "  Downloading chex-0.1.89-py3-none-any.whl.metadata (17 kB)\n",
      "Requirement already satisfied: setuptools in /home/user/miniconda/lib/python3.13/site-packages (from chex>=0.1.87->optax->brax) (78.1.1)\n",
      "Collecting toolz>=0.9.0 (from chex>=0.1.87->optax->brax)\n",
      "  Downloading toolz-1.0.0-py3-none-any.whl.metadata (5.1 kB)\n",
      "Requirement already satisfied: nest_asyncio in /home/user/miniconda/lib/python3.13/site-packages (from orbax-checkpoint->brax) (1.6.0)\n",
      "Collecting protobuf (from orbax-checkpoint->brax)\n",
      "  Downloading protobuf-6.31.1-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)\n",
      "Collecting humanize (from orbax-checkpoint->brax)\n",
      "  Downloading humanize-4.12.3-py3-none-any.whl.metadata (7.8 kB)\n",
      "Collecting simplejson>=3.16.0 (from orbax-checkpoint->brax)\n",
      "  Downloading simplejson-3.20.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.3 kB)\n",
      "Requirement already satisfied: packaging in /home/user/miniconda/lib/python3.13/site-packages (from tensorboardx->brax) (24.2)\n",
      "Downloading brax-0.12.4-py3-none-any.whl (341 kB)\n",
      "Downloading flask-3.1.1-py3-none-any.whl (103 kB)\n",
      "Downloading blinker-1.9.0-py3-none-any.whl (8.5 kB)\n",
      "Downloading click-8.2.1-py3-none-any.whl (102 kB)\n",
      "Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)\n",
      "Downloading werkzeug-3.1.3-py3-none-any.whl (224 kB)\n",
      "Downloading flask_cors-6.0.1-py3-none-any.whl (13 kB)\n",
      "Downloading flax-0.10.6-py3-none-any.whl (447 kB)\n",
      "Downloading treescope-0.1.9-py3-none-any.whl (182 kB)\n",
      "Downloading jaxopt-0.8.5-py3-none-any.whl (172 kB)\n",
      "Downloading ml_collections-1.1.0-py3-none-any.whl (76 kB)\n",
      "Downloading msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (423 kB)\n",
      "Downloading optax-0.2.5-py3-none-any.whl (354 kB)\n",
      "Downloading chex-0.1.89-py3-none-any.whl (99 kB)\n",
      "Downloading toolz-1.0.0-py3-none-any.whl (56 kB)\n",
      "Downloading orbax_checkpoint-0.11.17-py3-none-any.whl (479 kB)\n",
      "Downloading simplejson-3.20.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (152 kB)\n",
      "Downloading tensorstore-0.1.75-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.8 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.8/18.8 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hDownloading humanize-4.12.3-py3-none-any.whl (128 kB)\n",
      "Downloading pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m40.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading protobuf-6.31.1-cp39-abi3-manylinux2014_x86_64.whl (321 kB)\n",
      "Downloading tensorboardx-2.6.4-py3-none-any.whl (87 kB)\n",
      "Installing collected packages: werkzeug, treescope, toolz, simplejson, protobuf, pillow, msgpack, ml-collections, itsdangerous, humanize, click, blinker, tensorstore, tensorboardx, flask, flask-cors, orbax-checkpoint, jaxopt, chex, optax, flax, brax\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m22/22\u001b[0m [brax]2m21/22\u001b[0m [brax]]]heckpoint]\n",
      "\u001b[1A\u001b[2KSuccessfully installed blinker-1.9.0 brax-0.12.4 chex-0.1.89 click-8.2.1 flask-3.1.1 flask-cors-6.0.1 flax-0.10.6 humanize-4.12.3 itsdangerous-2.2.0 jaxopt-0.8.5 ml-collections-1.1.0 msgpack-1.1.1 optax-0.2.5 orbax-checkpoint-0.11.17 pillow-11.3.0 protobuf-6.31.1 simplejson-3.20.1 tensorboardx-2.6.4 tensorstore-0.1.75 toolz-1.0.0 treescope-0.1.9 werkzeug-3.1.3\n",
      "Collecting mediapy\n",
      "  Downloading mediapy-1.2.4-py3-none-any.whl.metadata (4.8 kB)\n",
      "Requirement already satisfied: ipython in /home/user/miniconda/lib/python3.13/site-packages (from mediapy) (9.4.0)\n",
      "Collecting matplotlib (from mediapy)\n",
      "  Downloading matplotlib-3.10.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
      "Requirement already satisfied: numpy in /home/user/miniconda/lib/python3.13/site-packages (from mediapy) (2.3.1)\n",
      "Requirement already satisfied: Pillow in /home/user/miniconda/lib/python3.13/site-packages (from mediapy) (11.3.0)\n",
      "Requirement already satisfied: decorator in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (5.2.1)\n",
      "Requirement already satisfied: ipython-pygments-lexers in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (1.1.1)\n",
      "Requirement already satisfied: jedi>=0.16 in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (0.19.2)\n",
      "Requirement already satisfied: matplotlib-inline in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (0.1.7)\n",
      "Requirement already satisfied: pexpect>4.3 in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (4.9.0)\n",
      "Requirement already satisfied: prompt_toolkit<3.1.0,>=3.0.41 in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (3.0.51)\n",
      "Requirement already satisfied: pygments>=2.4.0 in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (2.19.1)\n",
      "Requirement already satisfied: stack_data in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (0.6.3)\n",
      "Requirement already satisfied: traitlets>=5.13.0 in /home/user/miniconda/lib/python3.13/site-packages (from ipython->mediapy) (5.14.3)\n",
      "Requirement already satisfied: wcwidth in /home/user/miniconda/lib/python3.13/site-packages (from prompt_toolkit<3.1.0,>=3.0.41->ipython->mediapy) (0.2.13)\n",
      "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /home/user/miniconda/lib/python3.13/site-packages (from jedi>=0.16->ipython->mediapy) (0.8.4)\n",
      "Requirement already satisfied: ptyprocess>=0.5 in /home/user/miniconda/lib/python3.13/site-packages (from pexpect>4.3->ipython->mediapy) (0.7.0)\n",
      "Collecting contourpy>=1.0.1 (from matplotlib->mediapy)\n",
      "  Downloading contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)\n",
      "Collecting cycler>=0.10 (from matplotlib->mediapy)\n",
      "  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
      "Collecting fonttools>=4.22.0 (from matplotlib->mediapy)\n",
      "  Downloading fonttools-4.58.4-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (106 kB)\n",
      "Collecting kiwisolver>=1.3.1 (from matplotlib->mediapy)\n",
      "  Downloading kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)\n",
      "Requirement already satisfied: packaging>=20.0 in /home/user/miniconda/lib/python3.13/site-packages (from matplotlib->mediapy) (24.2)\n",
      "Collecting pyparsing>=2.3.1 (from matplotlib->mediapy)\n",
      "  Downloading pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in /home/user/miniconda/lib/python3.13/site-packages (from matplotlib->mediapy) (2.9.0.post0)\n",
      "Requirement already satisfied: six>=1.5 in /home/user/miniconda/lib/python3.13/site-packages (from python-dateutil>=2.7->matplotlib->mediapy) (1.17.0)\n",
      "Requirement already satisfied: executing>=1.2.0 in /home/user/miniconda/lib/python3.13/site-packages (from stack_data->ipython->mediapy) (2.2.0)\n",
      "Requirement already satisfied: asttokens>=2.1.0 in /home/user/miniconda/lib/python3.13/site-packages (from stack_data->ipython->mediapy) (3.0.0)\n",
      "Requirement already satisfied: pure-eval in /home/user/miniconda/lib/python3.13/site-packages (from stack_data->ipython->mediapy) (0.2.3)\n",
      "Downloading mediapy-1.2.4-py3-none-any.whl (26 kB)\n",
      "Downloading matplotlib-3.10.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.6 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.6/8.6 MB\u001b[0m \u001b[31m49.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (322 kB)\n",
      "Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
      "Downloading fonttools-4.58.4-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl (4.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m34.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.5/1.5 MB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pyparsing-3.2.3-py3-none-any.whl (111 kB)\n",
      "Installing collected packages: pyparsing, kiwisolver, fonttools, cycler, contourpy, matplotlib, mediapy\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7/7\u001b[0m [mediapy]m6/7\u001b[0m [mediapy]ib]\n",
      "\u001b[1A\u001b[2KSuccessfully installed contourpy-1.3.2 cycler-0.12.1 fonttools-4.58.4 kiwisolver-1.4.8 matplotlib-3.10.3 mediapy-1.2.4 pyparsing-3.2.3\n"
     ]
    }
   ],
   "source": [
    "#@title Install pre-requisites\n",
    "!pip install \"jax[cuda12]\"\n",
    "!pip install mujoco\n",
    "!pip install mujoco_mjx\n",
    "!pip install brax\n",
    "!pip install mediapy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "cellView": "form",
    "id": "IbZxYDxzoz5R"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tue Jul  1 12:12:35 2025       \n",
      "+-----------------------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 570.158.01             Driver Version: 570.158.01     CUDA Version: 12.8     |\n",
      "|-----------------------------------------+------------------------+----------------------+\n",
      "| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |\n",
      "|                                         |                        |               MIG M. |\n",
      "|=========================================+========================+======================|\n",
      "|   0  NVIDIA L40S                    On  |   00000000:30:00.0 Off |                    0 |\n",
      "| N/A   38C    P8             36W /  350W |       0MiB /  46068MiB |      0%      Default |\n",
      "|                                         |                        |                  N/A |\n",
      "+-----------------------------------------+------------------------+----------------------+\n",
      "                                                                                         \n",
      "+-----------------------------------------------------------------------------------------+\n",
      "| Processes:                                                                              |\n",
      "|  GPU   GI   CI              PID   Type   Process name                        GPU Memory |\n",
      "|        ID   ID                                                               Usage      |\n",
      "|=========================================================================================|\n",
      "|  No running processes found                                                             |\n",
      "+-----------------------------------------------------------------------------------------+\n",
      "Setting environment variable to use GPU rendering:\n",
      "env: MUJOCO_GL=egl\n",
      "Checking that the installation succeeded:\n",
      "Installation successful.\n"
     ]
    }
   ],
   "source": [
    "# @title Check if MuJoCo installation was successful\n",
    "\n",
    "import distutils.util\n",
    "import os\n",
    "import subprocess\n",
    "\n",
    "if subprocess.run('nvidia-smi').returncode:\n",
    "  raise RuntimeError(\n",
    "      'Cannot communicate with GPU. '\n",
    "      'Make sure you are using a GPU Colab runtime. '\n",
    "      'Go to the Runtime menu and select Choose runtime type.'\n",
    "  )\n",
    "\n",
    "# Add an ICD config so that glvnd can pick up the Nvidia EGL driver.\n",
    "# This is usually installed as part of an Nvidia driver package, but the Colab\n",
    "# kernel doesn't install its driver via APT, and as a result the ICD is missing.\n",
    "# (https://github.com/NVIDIA/libglvnd/blob/master/src/EGL/icd_enumeration.md)\n",
    "NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'\n",
    "#if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):\n",
    "#  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:\n",
    "#    f.write(\"\"\"{\n",
    "#    \"file_format_version\" : \"1.0.0\",\n",
    "#    \"ICD\" : {\n",
    "#        \"library_path\" : \"libEGL_nvidia.so.0\"\n",
    "#    }\n",
    "#}\n",
    "#\"\"\")\n",
    "\n",
    "# Configure MuJoCo to use the EGL rendering backend (requires GPU)\n",
    "print('Setting environment variable to use GPU rendering:')\n",
    "%env MUJOCO_GL=egl\n",
    "\n",
    "try:\n",
    "  print('Checking that the installation succeeded:')\n",
    "  import mujoco\n",
    "\n",
    "  mujoco.MjModel.from_xml_string('<mujoco/>')\n",
    "except Exception as e:\n",
    "  raise e from RuntimeError(\n",
    "      'Something went wrong during installation. Check the shell output above '\n",
    "      'for more information.\\n'\n",
    "      'If using a hosted Colab runtime, make sure you enable GPU acceleration '\n",
    "      'by going to the Runtime menu and selecting \"Choose runtime type\".'\n",
    "  )\n",
    "\n",
    "print('Installation successful.')\n",
    "\n",
    "# Tell XLA to use Triton GEMM, this improves steps/sec by ~30% on some GPUs\n",
    "xla_flags = os.environ.get('XLA_FLAGS', '')\n",
    "xla_flags += ' --xla_gpu_triton_gemm_any=True'\n",
    "os.environ['XLA_FLAGS'] = xla_flags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "cellView": "form",
    "id": "T5f4w3Kq2X14"
   },
   "outputs": [],
   "source": [
    "# @title Import packages for plotting and creating graphics\n",
    "import json\n",
    "import itertools\n",
    "import time\n",
    "from typing import Callable, List, NamedTuple, Optional, Union\n",
    "import numpy as np\n",
    "\n",
    "# Graphics and plotting.\n",
    "#print(\"Installing mediapy:\")\n",
    "#!command -v ffmpeg >/dev/null || (apt update && apt install -y ffmpeg)\n",
    "#!pip install -q mediapy\n",
    "import mediapy as media\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# More legible printing from numpy.\n",
    "np.set_printoptions(precision=3, suppress=True, linewidth=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "cellView": "form",
    "id": "ObF1UXrkb0Nd"
   },
   "outputs": [],
   "source": [
    "# @title Import MuJoCo, MJX, and Brax\n",
    "from datetime import datetime\n",
    "import functools\n",
    "import os\n",
    "from typing import Any, Dict, Sequence, Tuple, Union\n",
    "from brax import base\n",
    "from brax import envs\n",
    "from brax import math\n",
    "from brax.base import Base, Motion, Transform\n",
    "from brax.base import State as PipelineState\n",
    "from brax.envs.base import Env, PipelineEnv, State\n",
    "from brax.io import html, mjcf, model\n",
    "from brax.mjx.base import State as MjxState\n",
    "from brax.training.agents.ppo import networks as ppo_networks\n",
    "from brax.training.agents.ppo import train as ppo\n",
    "from brax.training.agents.sac import networks as sac_networks\n",
    "from brax.training.agents.sac import train as sac\n",
    "from etils import epath\n",
    "from flax import struct\n",
    "from flax.training import orbax_utils\n",
    "from IPython.display import HTML, clear_output\n",
    "import jax\n",
    "from jax import numpy as jp\n",
    "from matplotlib import pyplot as plt\n",
    "import mediapy as media\n",
    "from ml_collections import config_dict\n",
    "import mujoco\n",
    "from mujoco import mjx\n",
    "import numpy as np\n",
    "from orbax import checkpoint as ocp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "cellView": "form",
    "id": "UoTLSx4cFRdy"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting playground\n",
      "  Downloading playground-0.0.5-py3-none-any.whl.metadata (8.7 kB)\n",
      "Requirement already satisfied: brax>=0.12.1 in /home/user/miniconda/lib/python3.13/site-packages (from playground) (0.12.4)\n",
      "Requirement already satisfied: etils in /home/user/miniconda/lib/python3.13/site-packages (from playground) (1.12.2)\n",
      "Requirement already satisfied: flax in /home/user/miniconda/lib/python3.13/site-packages (from playground) (0.10.6)\n",
      "Requirement already satisfied: jax in /home/user/miniconda/lib/python3.13/site-packages (from playground) (0.6.2)\n",
      "Collecting lxml (from playground)\n",
      "  Downloading lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.6 kB)\n",
      "Requirement already satisfied: ml-collections in /home/user/miniconda/lib/python3.13/site-packages (from playground) (1.1.0)\n",
      "Requirement already satisfied: mujoco-mjx>=3.2.7 in /home/user/miniconda/lib/python3.13/site-packages (from playground) (3.3.3)\n",
      "Requirement already satisfied: mujoco>=3.2.7 in /home/user/miniconda/lib/python3.13/site-packages (from playground) (3.3.3)\n",
      "Requirement already satisfied: tqdm in /home/user/miniconda/lib/python3.13/site-packages (from playground) (4.67.1)\n",
      "Requirement already satisfied: absl-py in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (2.3.0)\n",
      "Requirement already satisfied: flask in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (3.1.1)\n",
      "Requirement already satisfied: flask-cors in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (6.0.1)\n",
      "Requirement already satisfied: jaxlib>=0.4.6 in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (0.6.2)\n",
      "Requirement already satisfied: jaxopt in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (0.8.5)\n",
      "Requirement already satisfied: jinja2 in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (3.1.6)\n",
      "Requirement already satisfied: numpy in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (2.3.1)\n",
      "Requirement already satisfied: optax in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (0.2.5)\n",
      "Requirement already satisfied: orbax-checkpoint in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (0.11.17)\n",
      "Requirement already satisfied: pillow in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (11.3.0)\n",
      "Requirement already satisfied: scipy in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (1.16.0)\n",
      "Requirement already satisfied: tensorboardx in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (2.6.4)\n",
      "Requirement already satisfied: trimesh in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (4.6.13)\n",
      "Requirement already satisfied: typing-extensions in /home/user/miniconda/lib/python3.13/site-packages (from brax>=0.12.1->playground) (4.12.2)\n",
      "Requirement already satisfied: ml_dtypes>=0.5.0 in /home/user/miniconda/lib/python3.13/site-packages (from jax->playground) (0.5.1)\n",
      "Requirement already satisfied: opt_einsum in /home/user/miniconda/lib/python3.13/site-packages (from jax->playground) (3.4.0)\n",
      "Requirement already satisfied: glfw in /home/user/miniconda/lib/python3.13/site-packages (from mujoco>=3.2.7->playground) (2.9.0)\n",
      "Requirement already satisfied: pyopengl in /home/user/miniconda/lib/python3.13/site-packages (from mujoco>=3.2.7->playground) (3.1.9)\n",
      "Requirement already satisfied: fsspec in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco>=3.2.7->playground) (2025.5.1)\n",
      "Requirement already satisfied: importlib_resources in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco>=3.2.7->playground) (6.5.2)\n",
      "Requirement already satisfied: zipp in /home/user/miniconda/lib/python3.13/site-packages (from etils[epath]->mujoco>=3.2.7->playground) (3.23.0)\n",
      "Requirement already satisfied: blinker>=1.9.0 in /home/user/miniconda/lib/python3.13/site-packages (from flask->brax>=0.12.1->playground) (1.9.0)\n",
      "Requirement already satisfied: click>=8.1.3 in /home/user/miniconda/lib/python3.13/site-packages (from flask->brax>=0.12.1->playground) (8.2.1)\n",
      "Requirement already satisfied: itsdangerous>=2.2.0 in /home/user/miniconda/lib/python3.13/site-packages (from flask->brax>=0.12.1->playground) (2.2.0)\n",
      "Requirement already satisfied: markupsafe>=2.1.1 in /home/user/miniconda/lib/python3.13/site-packages (from flask->brax>=0.12.1->playground) (3.0.2)\n",
      "Requirement already satisfied: werkzeug>=3.1.0 in /home/user/miniconda/lib/python3.13/site-packages (from flask->brax>=0.12.1->playground) (3.1.3)\n",
      "Requirement already satisfied: msgpack in /home/user/miniconda/lib/python3.13/site-packages (from flax->playground) (1.1.1)\n",
      "Requirement already satisfied: tensorstore in /home/user/miniconda/lib/python3.13/site-packages (from flax->playground) (0.1.75)\n",
      "Requirement already satisfied: rich>=11.1 in /home/user/miniconda/lib/python3.13/site-packages (from flax->playground) (13.9.4)\n",
      "Requirement already satisfied: PyYAML>=5.4.1 in /home/user/miniconda/lib/python3.13/site-packages (from flax->playground) (6.0.2)\n",
      "Requirement already satisfied: treescope>=0.1.7 in /home/user/miniconda/lib/python3.13/site-packages (from flax->playground) (0.1.9)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /home/user/miniconda/lib/python3.13/site-packages (from rich>=11.1->flax->playground) (2.2.0)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /home/user/miniconda/lib/python3.13/site-packages (from rich>=11.1->flax->playground) (2.19.1)\n",
      "Requirement already satisfied: mdurl~=0.1 in /home/user/miniconda/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich>=11.1->flax->playground) (0.1.0)\n",
      "Requirement already satisfied: chex>=0.1.87 in /home/user/miniconda/lib/python3.13/site-packages (from optax->brax>=0.12.1->playground) (0.1.89)\n",
      "Requirement already satisfied: setuptools in /home/user/miniconda/lib/python3.13/site-packages (from chex>=0.1.87->optax->brax>=0.12.1->playground) (78.1.1)\n",
      "Requirement already satisfied: toolz>=0.9.0 in /home/user/miniconda/lib/python3.13/site-packages (from chex>=0.1.87->optax->brax>=0.12.1->playground) (1.0.0)\n",
      "Requirement already satisfied: nest_asyncio in /home/user/miniconda/lib/python3.13/site-packages (from orbax-checkpoint->brax>=0.12.1->playground) (1.6.0)\n",
      "Requirement already satisfied: protobuf in /home/user/miniconda/lib/python3.13/site-packages (from orbax-checkpoint->brax>=0.12.1->playground) (6.31.1)\n",
      "Requirement already satisfied: humanize in /home/user/miniconda/lib/python3.13/site-packages (from orbax-checkpoint->brax>=0.12.1->playground) (4.12.3)\n",
      "Requirement already satisfied: simplejson>=3.16.0 in /home/user/miniconda/lib/python3.13/site-packages (from orbax-checkpoint->brax>=0.12.1->playground) (3.20.1)\n",
      "Requirement already satisfied: packaging in /home/user/miniconda/lib/python3.13/site-packages (from tensorboardx->brax>=0.12.1->playground) (24.2)\n",
      "Downloading playground-0.0.5-py3-none-any.whl (7.4 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m88.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (5.2 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.2/5.2 MB\u001b[0m \u001b[31m163.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: lxml, playground\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2/2\u001b[0m [playground]2\u001b[0m [playground]\n",
      "\u001b[1A\u001b[2KSuccessfully installed lxml-6.0.0 playground-0.0.5\n"
     ]
    }
   ],
   "source": [
    "#@title Install MuJoCo Playground\n",
    "!pip install playground"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "cellView": "form",
    "id": "gYm2h7m8w3Nv"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mujoco_menagerie not found. Downloading...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Cloning mujoco_menagerie: ██████████| 100/100 [00:13<00:00]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking out commit 14ceccf557cc47240202f2354d684eca58ff8de4\n",
      "Successfully downloaded mujoco_menagerie\n"
     ]
    }
   ],
   "source": [
    "#@title Import The Playground\n",
    "\n",
    "from mujoco_playground import wrapper\n",
    "from mujoco_playground import registry"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "LcibXbyKt4FI"
   },
   "source": [
    "# Locomotion\n",
    "\n",
    "MuJoCo Playground contains a host of quadrupedal and bipedal environments (all listed below after running the command)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "id": "ox0Gze9Ct5AM"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('ApolloJoystickFlatTerrain',\n",
       " 'BarkourJoystick',\n",
       " 'BerkeleyHumanoidJoystickFlatTerrain',\n",
       " 'BerkeleyHumanoidJoystickRoughTerrain',\n",
       " 'G1JoystickFlatTerrain',\n",
       " 'G1JoystickRoughTerrain',\n",
       " 'Go1JoystickFlatTerrain',\n",
       " 'Go1JoystickRoughTerrain',\n",
       " 'Go1Getup',\n",
       " 'Go1Handstand',\n",
       " 'Go1Footstand',\n",
       " 'H1InplaceGaitTracking',\n",
       " 'H1JoystickGaitTracking',\n",
       " 'Op3Joystick',\n",
       " 'SpotFlatTerrainJoystick',\n",
       " 'SpotGetup',\n",
       " 'SpotJoystickGaitTracking',\n",
       " 'T1JoystickFlatTerrain',\n",
       " 'T1JoystickRoughTerrain')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "registry.locomotion.ALL_ENVS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "_R01tjWfI-i6"
   },
   "source": [
    "# Quadrupedal\n",
    "\n",
    "Let's jump right into quadrupedal locomotion! While we have environments available for the Google Barkour and Boston Dynamics Spot robots, the Unitree Go1 environment contains the most trainable policies that were transferred onto the real robot. We'll go right ahead and show a few policies using the Unitree Go1!\n",
    "\n",
    "First, let's train a joystick policy, which tracks linear and yaw velocity commands."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "kPJeoQeEJBSA"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:2025-07-01 11:54:03,909:jax._src.xla_bridge:794: An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n",
      "WARNING:jax._src.xla_bridge:An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.\n"
     ]
    }
   ],
   "source": [
    "env_name = 'Go1JoystickFlatTerrain'\n",
    "env = registry.load(env_name)\n",
    "env_cfg = registry.get_default_config(env_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "6n9UT9N1wR5K"
   },
   "outputs": [],
   "source": [
    "env_cfg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Thm7nZueM4cz"
   },
   "source": [
    "## Joystick\n",
    "\n",
    "Let's train the joystick policy and visualize rollouts:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "B9T_UVZYLDdM"
   },
   "outputs": [],
   "source": [
    "from mujoco_playground.config import locomotion_params\n",
    "ppo_params = locomotion_params.brax_ppo_config(env_name)\n",
    "ppo_params"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Aefr2OS01D9g"
   },
   "source": [
    "Domain randomization was used to make the policy robust to sim-to-real transfer. Certain environments in the Playground have domain randomization functions implemented. They're available in the registry and can be passed directly to brax RL algorithms. The [domain randomization](https://github.com/google-deepmind/mujoco_playground/blob/main/mujoco_playground/_src/locomotion/go1/randomize.py) function randomizes over friction, armature, center of mass of the torso, and link masses, amongst other simulation parameters."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "UVA4Bn681DZT"
   },
   "outputs": [],
   "source": [
    "registry.get_domain_randomizer(env_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "vBEEQyY6M5OC"
   },
   "source": [
    "### Train\n",
    "\n",
    "The policy takes 7 minutes to train on an RTX 4090."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "XKFzyP7wM5OD"
   },
   "outputs": [],
   "source": [
    "x_data, y_data, y_dataerr = [], [], []\n",
    "times = [datetime.now()]\n",
    "\n",
    "\n",
    "def progress(num_steps, metrics):\n",
    "  clear_output(wait=True)\n",
    "\n",
    "  times.append(datetime.now())\n",
    "  x_data.append(num_steps)\n",
    "  y_data.append(metrics[\"eval/episode_reward\"])\n",
    "  y_dataerr.append(metrics[\"eval/episode_reward_std\"])\n",
    "\n",
    "  plt.xlim([0, ppo_params[\"num_timesteps\"] * 1.25])\n",
    "  plt.xlabel(\"# environment steps\")\n",
    "  plt.ylabel(\"reward per episode\")\n",
    "  plt.title(f\"y={y_data[-1]:.3f}\")\n",
    "  plt.errorbar(x_data, y_data, yerr=y_dataerr, color=\"blue\")\n",
    "\n",
    "  display(plt.gcf())\n",
    "\n",
    "randomizer = registry.get_domain_randomizer(env_name)\n",
    "ppo_training_params = dict(ppo_params)\n",
    "network_factory = ppo_networks.make_ppo_networks\n",
    "if \"network_factory\" in ppo_params:\n",
    "  del ppo_training_params[\"network_factory\"]\n",
    "  network_factory = functools.partial(\n",
    "      ppo_networks.make_ppo_networks,\n",
    "      **ppo_params.network_factory\n",
    "  )\n",
    "\n",
    "train_fn = functools.partial(\n",
    "    ppo.train, **dict(ppo_training_params),\n",
    "    network_factory=network_factory,\n",
    "    randomization_fn=randomizer,\n",
    "    progress_fn=progress\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "FGrlulWbM5OD"
   },
   "outputs": [],
   "source": [
    "make_inference_fn, params, metrics = train_fn(\n",
    "    environment=env,\n",
    "    eval_env=registry.load(env_name, config=env_cfg),\n",
    "    wrap_env_fn=wrapper.wrap_for_brax_training,\n",
    ")\n",
    "print(f\"time to jit: {times[1] - times[0]}\")\n",
    "print(f\"time to train: {times[-1] - times[1]}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "AUxSNhq3UqmC"
   },
   "source": [
    "Let's rollout and render the resulting policy!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "RBM89g5A2Yoi"
   },
   "outputs": [],
   "source": [
    "# Enable perturbation in the eval env.\n",
    "env_cfg = registry.get_default_config(env_name)\n",
    "env_cfg.pert_config.enable = True\n",
    "env_cfg.pert_config.velocity_kick = [3.0, 6.0]\n",
    "env_cfg.pert_config.kick_wait_times = [5.0, 15.0]\n",
    "env_cfg.command_config.a = [1.5, 0.8, 2*jp.pi]\n",
    "eval_env = registry.load(env_name, config=env_cfg)\n",
    "velocity_kick_range = [0.0, 0.0]  # Disable velocity kick.\n",
    "kick_duration_range = [0.05, 0.2]\n",
    "\n",
    "jit_reset = jax.jit(eval_env.reset)\n",
    "jit_step = jax.jit(eval_env.step)\n",
    "jit_inference_fn = jax.jit(make_inference_fn(params, deterministic=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "id": "C_1CY9xDoUKw"
   },
   "outputs": [],
   "source": [
    "#@title Rollout and Render\n",
    "from mujoco_playground._src.gait import draw_joystick_command\n",
    "\n",
    "x_vel = 0.0  #@param {type: \"number\"}\n",
    "y_vel = 0.0  #@param {type: \"number\"}\n",
    "yaw_vel = 3.14  #@param {type: \"number\"}\n",
    "\n",
    "\n",
    "def sample_pert(rng):\n",
    "  rng, key1, key2 = jax.random.split(rng, 3)\n",
    "  pert_mag = jax.random.uniform(\n",
    "      key1, minval=velocity_kick_range[0], maxval=velocity_kick_range[1]\n",
    "  )\n",
    "  duration_seconds = jax.random.uniform(\n",
    "      key2, minval=kick_duration_range[0], maxval=kick_duration_range[1]\n",
    "  )\n",
    "  duration_steps = jp.round(duration_seconds / eval_env.dt).astype(jp.int32)\n",
    "  state.info[\"pert_mag\"] = pert_mag\n",
    "  state.info[\"pert_duration\"] = duration_steps\n",
    "  state.info[\"pert_duration_seconds\"] = duration_seconds\n",
    "  return rng\n",
    "\n",
    "\n",
    "rng = jax.random.PRNGKey(0)\n",
    "rollout = []\n",
    "modify_scene_fns = []\n",
    "\n",
    "swing_peak = []\n",
    "rewards = []\n",
    "linvel = []\n",
    "angvel = []\n",
    "track = []\n",
    "foot_vel = []\n",
    "rews = []\n",
    "contact = []\n",
    "command = jp.array([x_vel, y_vel, yaw_vel])\n",
    "\n",
    "state = jit_reset(rng)\n",
    "if state.info[\"steps_since_last_pert\"] < state.info[\"steps_until_next_pert\"]:\n",
    "  rng = sample_pert(rng)\n",
    "state.info[\"command\"] = command\n",
    "for i in range(env_cfg.episode_length):\n",
    "  if state.info[\"steps_since_last_pert\"] < state.info[\"steps_until_next_pert\"]:\n",
    "    rng = sample_pert(rng)\n",
    "  act_rng, rng = jax.random.split(rng)\n",
    "  ctrl, _ = jit_inference_fn(state.obs, act_rng)\n",
    "  state = jit_step(state, ctrl)\n",
    "  state.info[\"command\"] = command\n",
    "  rews.append(\n",
    "      {k: v for k, v in state.metrics.items() if k.startswith(\"reward/\")}\n",
    "  )\n",
    "  rollout.append(state)\n",
    "  swing_peak.append(state.info[\"swing_peak\"])\n",
    "  rewards.append(\n",
    "      {k[7:]: v for k, v in state.metrics.items() if k.startswith(\"reward/\")}\n",
    "  )\n",
    "  linvel.append(env.get_global_linvel(state.data))\n",
    "  angvel.append(env.get_gyro(state.data))\n",
    "  track.append(\n",
    "      env._reward_tracking_lin_vel(\n",
    "          state.info[\"command\"], env.get_local_linvel(state.data)\n",
    "      )\n",
    "  )\n",
    "\n",
    "  feet_vel = state.data.sensordata[env._foot_linvel_sensor_adr]\n",
    "  vel_xy = feet_vel[..., :2]\n",
    "  vel_norm = jp.sqrt(jp.linalg.norm(vel_xy, axis=-1))\n",
    "  foot_vel.append(vel_norm)\n",
    "\n",
    "  contact.append(state.info[\"last_contact\"])\n",
    "\n",
    "  xyz = np.array(state.data.xpos[env._torso_body_id])\n",
    "  xyz += np.array([0, 0, 0.2])\n",
    "  x_axis = state.data.xmat[env._torso_body_id, 0]\n",
    "  yaw = -np.arctan2(x_axis[1], x_axis[0])\n",
    "  modify_scene_fns.append(\n",
    "      functools.partial(\n",
    "          draw_joystick_command,\n",
    "          cmd=state.info[\"command\"],\n",
    "          xyz=xyz,\n",
    "          theta=yaw,\n",
    "          scl=abs(state.info[\"command\"][0])\n",
    "          / env_cfg.command_config.a[0],\n",
    "      )\n",
    "  )\n",
    "\n",
    "\n",
    "render_every = 2\n",
    "fps = 1.0 / eval_env.dt / render_every\n",
    "traj = rollout[::render_every]\n",
    "mod_fns = modify_scene_fns[::render_every]\n",
    "\n",
    "scene_option = mujoco.MjvOption()\n",
    "scene_option.geomgroup[2] = True\n",
    "scene_option.geomgroup[3] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_PERTFORCE] = True\n",
    "\n",
    "frames = eval_env.render(\n",
    "    traj,\n",
    "    camera=\"track\",\n",
    "    scene_option=scene_option,\n",
    "    width=640,\n",
    "    height=480,\n",
    "    modify_scene_fns=mod_fns,\n",
    ")\n",
    "media.show_video(frames, fps=fps, loop=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1QHdoJ2r30En"
   },
   "source": [
    "Let's visualize the feet positions and the positional drift compared to the commanded linear and angular velocity. This is useful for debugging how well the policy follows the commands!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "id": "gyyynm3ozEet"
   },
   "outputs": [],
   "source": [
    "#@title Plot each foot in a 2x2 grid.\n",
    "\n",
    "swing_peak = jp.array(swing_peak)\n",
    "names = [\"FR\", \"FL\", \"RR\", \"RL\"]\n",
    "colors = [\"r\", \"g\", \"b\", \"y\"]\n",
    "fig, axs = plt.subplots(2, 2)\n",
    "for i, ax in enumerate(axs.flat):\n",
    "  ax.plot(swing_peak[:, i], color=colors[i])\n",
    "  ax.set_ylim([0, env_cfg.reward_config.max_foot_height * 1.25])\n",
    "  ax.axhline(env_cfg.reward_config.max_foot_height, color=\"k\", linestyle=\"--\")\n",
    "  ax.set_title(names[i])\n",
    "  ax.set_xlabel(\"time\")\n",
    "  ax.set_ylabel(\"height\")\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "linvel_x = jp.array(linvel)[:, 0]\n",
    "linvel_y = jp.array(linvel)[:, 1]\n",
    "angvel_yaw = jp.array(angvel)[:, 2]\n",
    "\n",
    "# Plot whether velocity is within the command range.\n",
    "linvel_x = jp.convolve(linvel_x, jp.ones(10) / 10, mode=\"same\")\n",
    "linvel_y = jp.convolve(linvel_y, jp.ones(10) / 10, mode=\"same\")\n",
    "angvel_yaw = jp.convolve(angvel_yaw, jp.ones(10) / 10, mode=\"same\")\n",
    "\n",
    "fig, axes = plt.subplots(3, 1, figsize=(10, 10))\n",
    "axes[0].plot(linvel_x)\n",
    "axes[1].plot(linvel_y)\n",
    "axes[2].plot(angvel_yaw)\n",
    "\n",
    "axes[0].set_ylim(\n",
    "    -env_cfg.command_config.a[0], env_cfg.command_config.a[0]\n",
    ")\n",
    "axes[1].set_ylim(\n",
    "    -env_cfg.command_config.a[1], env_cfg.command_config.a[1]\n",
    ")\n",
    "axes[2].set_ylim(\n",
    "    -env_cfg.command_config.a[2], env_cfg.command_config.a[2]\n",
    ")\n",
    "\n",
    "for i, ax in enumerate(axes):\n",
    "  ax.axhline(state.info[\"command\"][i], color=\"red\", linestyle=\"--\")\n",
    "\n",
    "labels = [\"dx\", \"dy\", \"dyaw\"]\n",
    "for i, ax in enumerate(axes):\n",
    "  ax.set_ylabel(labels[i])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "t1QAHuYBQBbl"
   },
   "source": [
    "Now let's visualize what it looks like to slowly increase linear velocity commands."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "id": "Q0EuQiVlzh5u"
   },
   "outputs": [],
   "source": [
    "#@title Slowly increase linvel commands\n",
    "\n",
    "rng = jax.random.PRNGKey(0)\n",
    "rollout = []\n",
    "modify_scene_fns = []\n",
    "swing_peak = []\n",
    "linvel = []\n",
    "angvel = []\n",
    "\n",
    "x = -0.25\n",
    "command = jp.array([x, 0, 0])\n",
    "state = jit_reset(rng)\n",
    "for i in range(1_400):\n",
    "  # Increase the forward velocity by 0.25 m/s every 200 steps.\n",
    "  if i % 200 == 0:\n",
    "    x += 0.25\n",
    "    print(f\"Setting x to {x}\")\n",
    "    command = jp.array([x, 0, 0])\n",
    "  state.info[\"command\"] = command\n",
    "  if state.info[\"steps_since_last_pert\"] < state.info[\"steps_until_next_pert\"]:\n",
    "    rng = sample_pert(rng)\n",
    "  act_rng, rng = jax.random.split(rng)\n",
    "  ctrl, _ = jit_inference_fn(state.obs, act_rng)\n",
    "  state = jit_step(state, ctrl)\n",
    "  rollout.append(state)\n",
    "  swing_peak.append(state.info[\"swing_peak\"])\n",
    "  linvel.append(env.get_global_linvel(state.data))\n",
    "  angvel.append(env.get_gyro(state.data))\n",
    "  xyz = np.array(state.data.xpos[env._torso_body_id])\n",
    "  xyz += np.array([0, 0, 0.2])\n",
    "  x_axis = state.data.xmat[env._torso_body_id, 0]\n",
    "  yaw = -np.arctan2(x_axis[1], x_axis[0])\n",
    "  modify_scene_fns.append(\n",
    "      functools.partial(\n",
    "          draw_joystick_command,\n",
    "          cmd=command,\n",
    "          xyz=xyz,\n",
    "          theta=yaw,\n",
    "          scl=abs(command[0]) / env_cfg.command_config.a[0],\n",
    "      )\n",
    "  )\n",
    "\n",
    "\n",
    "# Plot each foot in a 2x2 grid.\n",
    "swing_peak = jp.array(swing_peak)\n",
    "names = [\"FR\", \"FL\", \"RR\", \"RL\"]\n",
    "colors = [\"r\", \"g\", \"b\", \"y\"]\n",
    "fig, axs = plt.subplots(2, 2)\n",
    "for i, ax in enumerate(axs.flat):\n",
    "  ax.plot(swing_peak[:, i], color=colors[i])\n",
    "  ax.set_ylim([0, env_cfg.reward_config.max_foot_height * 1.25])\n",
    "  ax.axhline(env_cfg.reward_config.max_foot_height, color=\"k\", linestyle=\"--\")\n",
    "  ax.set_title(names[i])\n",
    "  ax.set_xlabel(\"time\")\n",
    "  ax.set_ylabel(\"height\")\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "linvel_x = jp.array(linvel)[:, 0]\n",
    "linvel_y = jp.array(linvel)[:, 1]\n",
    "angvel_yaw = jp.array(angvel)[:, 2]\n",
    "\n",
    "# Plot whether velocity is within the command range.\n",
    "linvel_x = jp.convolve(linvel_x, jp.ones(10) / 10, mode=\"same\")\n",
    "linvel_y = jp.convolve(linvel_y, jp.ones(10) / 10, mode=\"same\")\n",
    "angvel_yaw = jp.convolve(angvel_yaw, jp.ones(10) / 10, mode=\"same\")\n",
    "\n",
    "fig, axes = plt.subplots(3, 1, figsize=(10, 10))\n",
    "axes[0].plot(linvel_x)\n",
    "axes[1].plot(linvel_y)\n",
    "axes[2].plot(angvel_yaw)\n",
    "\n",
    "axes[0].set_ylim(\n",
    "    -env_cfg.command_config.a[0], env_cfg.command_config.a[0]\n",
    ")\n",
    "axes[1].set_ylim(\n",
    "    -env_cfg.command_config.a[1], env_cfg.command_config.a[1]\n",
    ")\n",
    "axes[2].set_ylim(\n",
    "    -env_cfg.command_config.a[2], env_cfg.command_config.a[2]\n",
    ")\n",
    "\n",
    "for i, ax in enumerate(axes):\n",
    "  ax.axhline(state.info[\"command\"][i], color=\"red\", linestyle=\"--\")\n",
    "\n",
    "labels = [\"dx\", \"dy\", \"dyaw\"]\n",
    "for i, ax in enumerate(axes):\n",
    "  ax.set_ylabel(labels[i])\n",
    "\n",
    "\n",
    "render_every = 2\n",
    "fps = 1.0 / eval_env.dt / render_every\n",
    "print(f\"fps: {fps}\")\n",
    "\n",
    "traj = rollout[::render_every]\n",
    "mod_fns = modify_scene_fns[::render_every]\n",
    "assert len(traj) == len(mod_fns)\n",
    "\n",
    "scene_option = mujoco.MjvOption()\n",
    "scene_option.geomgroup[2] = True\n",
    "scene_option.geomgroup[3] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_PERTFORCE] = True\n",
    "\n",
    "frames = eval_env.render(\n",
    "    traj,\n",
    "    camera=\"track\",\n",
    "    height=480,\n",
    "    width=640,\n",
    "    modify_scene_fns=mod_fns,\n",
    "    scene_option=scene_option,\n",
    ")\n",
    "media.show_video(frames, fps=fps, loop=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "0RHZvXgmzrEJ"
   },
   "source": [
    "## Handstand\n",
    "\n",
    "Additional policies are available for the Unitree Go1 such as fall-recovery, handstand, and footstand policies. We'll use the handstand policy as an opportunity to demonstrate finetuning policies from prior checkpoints. This will allow us to quickly iterate on training curriculums by modifying the enviornment config between runs.\n",
    "\n",
    "For the Go1 handstand policy, we'll first train with the default configuration, and then add an energy penalty to make the policy smoother and more likely to transfer onto the robot."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "RYriZOAxzEk_"
   },
   "outputs": [],
   "source": [
    "from mujoco_playground.config import locomotion_params\n",
    "\n",
    "env_name = 'Go1Handstand'\n",
    "env = registry.load(env_name)\n",
    "env_cfg = registry.get_default_config(env_name)\n",
    "ppo_params = locomotion_params.brax_ppo_config(env_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "3nB5ugbdS5kk"
   },
   "source": [
    "Let's create a checkpoint directory and then train a policy with checkpointing."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "EyEDpHisS7eO"
   },
   "outputs": [],
   "source": [
    "ckpt_path = epath.Path(\"checkpoints\").resolve() / env_name\n",
    "ckpt_path.mkdir(parents=True, exist_ok=True)\n",
    "print(f\"{ckpt_path}\")\n",
    "\n",
    "with open(ckpt_path / \"config.json\", \"w\") as fp:\n",
    "  json.dump(env_cfg.to_dict(), fp, indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "lCRUYofXSNGT"
   },
   "outputs": [],
   "source": [
    "#@title Training fn definition\n",
    "x_data, y_data, y_dataerr = [], [], []\n",
    "times = [datetime.now()]\n",
    "\n",
    "\n",
    "def policy_params_fn(current_step, make_policy, params):\n",
    "  del make_policy  # Unused.\n",
    "  orbax_checkpointer = ocp.PyTreeCheckpointer()\n",
    "  save_args = orbax_utils.save_args_from_target(params)\n",
    "  path = ckpt_path / f\"{current_step}\"\n",
    "  orbax_checkpointer.save(path, params, force=True, save_args=save_args)\n",
    "\n",
    "\n",
    "def progress(num_steps, metrics):\n",
    "  clear_output(wait=True)\n",
    "\n",
    "  times.append(datetime.now())\n",
    "  x_data.append(num_steps)\n",
    "  y_data.append(metrics[\"eval/episode_reward\"])\n",
    "  y_dataerr.append(metrics[\"eval/episode_reward_std\"])\n",
    "\n",
    "  plt.xlim([0, ppo_params[\"num_timesteps\"] * 1.25])\n",
    "  plt.xlabel(\"# environment steps\")\n",
    "  plt.ylabel(\"reward per episode\")\n",
    "  plt.title(f\"y={y_data[-1]:.3f}\")\n",
    "  plt.errorbar(x_data, y_data, yerr=y_dataerr, color=\"blue\")\n",
    "\n",
    "  display(plt.gcf())\n",
    "\n",
    "randomizer = registry.get_domain_randomizer(env_name)\n",
    "ppo_training_params = dict(ppo_params)\n",
    "network_factory = ppo_networks.make_ppo_networks\n",
    "if \"network_factory\" in ppo_params:\n",
    "  del ppo_training_params[\"network_factory\"]\n",
    "  network_factory = functools.partial(\n",
    "      ppo_networks.make_ppo_networks,\n",
    "      **ppo_params.network_factory\n",
    "  )\n",
    "\n",
    "train_fn = functools.partial(\n",
    "    ppo.train, **dict(ppo_training_params),\n",
    "    network_factory=network_factory,\n",
    "    randomization_fn=randomizer,\n",
    "    progress_fn=progress,\n",
    "    policy_params_fn=policy_params_fn,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "A1oK80x1anPp"
   },
   "source": [
    "The initial policy takes 8 minutes to train on an RTX 4090."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "MY6P3abhSNGU"
   },
   "outputs": [],
   "source": [
    "make_inference_fn, params, metrics = train_fn(\n",
    "    environment=registry.load(env_name, config=env_cfg),\n",
    "    eval_env=registry.load(env_name, config=env_cfg),\n",
    "    wrap_env_fn=wrapper.wrap_for_brax_training,\n",
    ")\n",
    "print(f\"time to jit: {times[1] - times[0]}\")\n",
    "print(f\"time to train: {times[-1] - times[1]}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4s6PkZ4GWV4Z"
   },
   "source": [
    "Let's visualize the current policy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "id": "WiWOtc_6WbcX"
   },
   "outputs": [],
   "source": [
    "#@title Rollout and Render\n",
    "inference_fn = make_inference_fn(params, deterministic=True)\n",
    "jit_inference_fn = jax.jit(inference_fn)\n",
    "\n",
    "eval_env = registry.load(env_name, config=env_cfg)\n",
    "jit_reset = jax.jit(eval_env.reset)\n",
    "jit_step = jax.jit(eval_env.step)\n",
    "\n",
    "rng = jax.random.PRNGKey(12345)\n",
    "rollout = []\n",
    "rewards = []\n",
    "torso_height = []\n",
    "actions = []\n",
    "torques = []\n",
    "power = []\n",
    "qfrc_constraint = []\n",
    "qvels = []\n",
    "power1 = []\n",
    "power2 = []\n",
    "for _ in range(10):\n",
    "  rng, reset_rng = jax.random.split(rng)\n",
    "  state = jit_reset(reset_rng)\n",
    "  for i in range(env_cfg.episode_length // 2):\n",
    "    act_rng, rng = jax.random.split(rng)\n",
    "    ctrl, _ = jit_inference_fn(state.obs, act_rng)\n",
    "    actions.append(ctrl)\n",
    "    state = jit_step(state, ctrl)\n",
    "    rollout.append(state)\n",
    "    rewards.append(\n",
    "        {k[7:]: v for k, v in state.metrics.items() if k.startswith(\"reward/\")}\n",
    "    )\n",
    "    torso_height.append(state.data.qpos[2])\n",
    "    torques.append(state.data.actuator_force)\n",
    "    qvel = state.data.qvel[6:]\n",
    "    power.append(jp.sum(jp.abs(qvel * state.data.actuator_force)))\n",
    "    qfrc_constraint.append(jp.linalg.norm(state.data.qfrc_constraint[6:]))\n",
    "    qvels.append(jp.max(jp.abs(qvel)))\n",
    "    frc = state.data.actuator_force\n",
    "    qvel = state.data.qvel[6:]\n",
    "    power1.append(jp.sum(frc * qvel))\n",
    "    power2.append(jp.sum(jp.abs(frc * qvel)))\n",
    "\n",
    "\n",
    "render_every = 2\n",
    "fps = 1.0 / eval_env.dt / render_every\n",
    "traj = rollout[::render_every]\n",
    "\n",
    "scene_option = mujoco.MjvOption()\n",
    "scene_option.geomgroup[2] = True\n",
    "scene_option.geomgroup[3] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = False\n",
    "\n",
    "frames = eval_env.render(\n",
    "    traj, camera=\"side\", scene_option=scene_option, height=480, width=640\n",
    ")\n",
    "media.show_video(frames, fps=fps, loop=False)\n",
    "\n",
    "power = jp.array(power1)\n",
    "print(f\"Max power: {jp.max(power)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "v5p0Z3PPSRik"
   },
   "source": [
    "Notice that the above policy looks jittery and unlikely to transfer on the robot. The max power output is also quite high.\n",
    "\n",
    "The sim-to-real deployment of the handstand policy was trained using a curriculum on the `energy_termination_threshold`, `energy` and `dof_acc`, which are config values that penalize high torques and high power output. Let's finetune the above policy with a decreased  `energy_termination_threshold`, as well as non-zero values for `energy` and `dof_acc` rewards to get a smoother policy."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "hrjoVL-_WN-r"
   },
   "source": [
    "### Finetune the previous checkpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jTxAySRSSu96"
   },
   "outputs": [],
   "source": [
    "env_cfg = registry.get_default_config(env_name)\n",
    "env_cfg.energy_termination_threshold = 400  # lower energy termination threshold\n",
    "env_cfg.reward_config.energy = -0.003  # non-zero negative `energy` reward\n",
    "env_cfg.reward_config.dof_acc = -2.5e-7  # non-zero negative `dof_acc` reward\n",
    "\n",
    "FINETUNE_PATH = epath.Path(ckpt_path)\n",
    "latest_ckpts = list(FINETUNE_PATH.glob(\"*\"))\n",
    "latest_ckpts = [ckpt for ckpt in latest_ckpts if ckpt.is_dir()]\n",
    "latest_ckpts.sort(key=lambda x: int(x.name))\n",
    "latest_ckpt = latest_ckpts[-1]\n",
    "restore_checkpoint_path = latest_ckpt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "_M5IqOR6z4bV"
   },
   "outputs": [],
   "source": [
    "x_data, y_data, y_dataerr = [], [], []\n",
    "times = [datetime.now()]\n",
    "\n",
    "make_inference_fn, params, metrics = train_fn(\n",
    "    environment=registry.load(env_name, config=env_cfg),\n",
    "    eval_env=registry.load(env_name, config=env_cfg),\n",
    "    wrap_env_fn=wrapper.wrap_for_brax_training,\n",
    "    restore_checkpoint_path=restore_checkpoint_path,  # restore from the checkpoint!\n",
    "    seed=1,\n",
    ")\n",
    "print(f\"time to jit: {times[1] - times[0]}\")\n",
    "print(f\"time to train: {times[-1] - times[1]}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "id": "tzG8eY2lz4dk"
   },
   "outputs": [],
   "source": [
    "#@title Rollout and Render Finetune Policy\n",
    "inference_fn = make_inference_fn(params, deterministic=True)\n",
    "jit_inference_fn = jax.jit(inference_fn)\n",
    "\n",
    "eval_env = registry.load(env_name, config=env_cfg)\n",
    "jit_reset = jax.jit(eval_env.reset)\n",
    "jit_step = jax.jit(eval_env.step)\n",
    "\n",
    "rng = jax.random.PRNGKey(12345)\n",
    "rollout = []\n",
    "rewards = []\n",
    "torso_height = []\n",
    "actions = []\n",
    "torques = []\n",
    "power = []\n",
    "qfrc_constraint = []\n",
    "qvels = []\n",
    "power1 = []\n",
    "power2 = []\n",
    "for _ in range(10):\n",
    "  rng, reset_rng = jax.random.split(rng)\n",
    "  state = jit_reset(reset_rng)\n",
    "  for i in range(env_cfg.episode_length // 2):\n",
    "    act_rng, rng = jax.random.split(rng)\n",
    "    ctrl, _ = jit_inference_fn(state.obs, act_rng)\n",
    "    actions.append(ctrl)\n",
    "    state = jit_step(state, ctrl)\n",
    "    rollout.append(state)\n",
    "    rewards.append(\n",
    "        {k[7:]: v for k, v in state.metrics.items() if k.startswith(\"reward/\")}\n",
    "    )\n",
    "    torso_height.append(state.data.qpos[2])\n",
    "    torques.append(state.data.actuator_force)\n",
    "    qvel = state.data.qvel[6:]\n",
    "    power.append(jp.sum(jp.abs(qvel * state.data.actuator_force)))\n",
    "    qfrc_constraint.append(jp.linalg.norm(state.data.qfrc_constraint[6:]))\n",
    "    qvels.append(jp.max(jp.abs(qvel)))\n",
    "    frc = state.data.actuator_force\n",
    "    qvel = state.data.qvel[6:]\n",
    "    power1.append(jp.sum(frc * qvel))\n",
    "    power2.append(jp.sum(jp.abs(frc * qvel)))\n",
    "\n",
    "\n",
    "render_every = 2\n",
    "fps = 1.0 / eval_env.dt / render_every\n",
    "traj = rollout[::render_every]\n",
    "\n",
    "scene_option = mujoco.MjvOption()\n",
    "scene_option.geomgroup[2] = True\n",
    "scene_option.geomgroup[3] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = False\n",
    "\n",
    "frames = eval_env.render(\n",
    "    traj, camera=\"side\", scene_option=scene_option, height=480, width=640\n",
    ")\n",
    "media.show_video(frames, fps=fps, loop=False)\n",
    "\n",
    "power = jp.array(power1)\n",
    "print(f\"Max power: {jp.max(power)}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "yCyibqGMiAca"
   },
   "source": [
    "The final policy should exhibit smoother behavior and have less power output! Feel free to finetune the policy some more using different reward terms to get the best behavior."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "26o77FfWXvVp"
   },
   "source": [
    "# Bipedal\n",
    "\n",
    "MuJoCo Playground also comes with a host of bipedal environments, such as the Berkely Humanoid and the Unitree G1/H1. Let's demonstrate a joystick policy on the Berkeley Humanoid. The initial policy takes 17 minutes to train on an RTX 4090."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ESNd18FUanPt"
   },
   "outputs": [],
   "source": [
    "env_name = 'BerkeleyHumanoidJoystickFlatTerrain'\n",
    "env = registry.load(env_name)\n",
    "env_cfg = registry.get_default_config(env_name)\n",
    "ppo_params = locomotion_params.brax_ppo_config(env_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "nibLoRu8anPt"
   },
   "outputs": [],
   "source": [
    "x_data, y_data, y_dataerr = [], [], []\n",
    "times = [datetime.now()]\n",
    "\n",
    "randomizer = registry.get_domain_randomizer(env_name)\n",
    "ppo_training_params = dict(ppo_params)\n",
    "network_factory = ppo_networks.make_ppo_networks\n",
    "if \"network_factory\" in ppo_params:\n",
    "  del ppo_training_params[\"network_factory\"]\n",
    "  network_factory = functools.partial(\n",
    "      ppo_networks.make_ppo_networks,\n",
    "      **ppo_params.network_factory\n",
    "  )\n",
    "\n",
    "train_fn = functools.partial(\n",
    "    ppo.train, **dict(ppo_training_params),\n",
    "    network_factory=network_factory,\n",
    "    randomization_fn=randomizer,\n",
    "    progress_fn=progress\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "16dqomv0anPt"
   },
   "outputs": [],
   "source": [
    "make_inference_fn, params, metrics = train_fn(\n",
    "    environment=env,\n",
    "    eval_env=registry.load(env_name, config=env_cfg),\n",
    "    wrap_env_fn=wrapper.wrap_for_brax_training,\n",
    ")\n",
    "print(f\"time to jit: {times[1] - times[0]}\")\n",
    "print(f\"time to train: {times[-1] - times[1]}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "cellView": "form",
    "id": "sBHDF-JFanPt"
   },
   "outputs": [],
   "source": [
    "#@title Rollout and Render\n",
    "from mujoco_playground._src.gait import draw_joystick_command\n",
    "\n",
    "env = registry.load(env_name)\n",
    "eval_env = registry.load(env_name)\n",
    "jit_reset = jax.jit(eval_env.reset)\n",
    "jit_step = jax.jit(eval_env.step)\n",
    "jit_inference_fn = jax.jit(make_inference_fn(params, deterministic=True))\n",
    "\n",
    "rng = jax.random.PRNGKey(1)\n",
    "\n",
    "rollout = []\n",
    "modify_scene_fns = []\n",
    "\n",
    "x_vel = 1.0  #@param {type: \"number\"}\n",
    "y_vel = 0.0  #@param {type: \"number\"}\n",
    "yaw_vel = 0.0  #@param {type: \"number\"}\n",
    "command = jp.array([x_vel, y_vel, yaw_vel])\n",
    "\n",
    "phase_dt = 2 * jp.pi * eval_env.dt * 1.5\n",
    "phase = jp.array([0, jp.pi])\n",
    "\n",
    "for j in range(1):\n",
    "  print(f\"episode {j}\")\n",
    "  state = jit_reset(rng)\n",
    "  state.info[\"phase_dt\"] = phase_dt\n",
    "  state.info[\"phase\"] = phase\n",
    "  for i in range(env_cfg.episode_length):\n",
    "    act_rng, rng = jax.random.split(rng)\n",
    "    ctrl, _ = jit_inference_fn(state.obs, act_rng)\n",
    "    state = jit_step(state, ctrl)\n",
    "    if state.done:\n",
    "      break\n",
    "    state.info[\"command\"] = command\n",
    "    rollout.append(state)\n",
    "\n",
    "    xyz = np.array(state.data.xpos[eval_env.mj_model.body(\"torso\").id])\n",
    "    xyz += np.array([0, 0.0, 0])\n",
    "    x_axis = state.data.xmat[eval_env._torso_body_id, 0]\n",
    "    yaw = -np.arctan2(x_axis[1], x_axis[0])\n",
    "    modify_scene_fns.append(\n",
    "        functools.partial(\n",
    "            draw_joystick_command,\n",
    "            cmd=state.info[\"command\"],\n",
    "            xyz=xyz,\n",
    "            theta=yaw,\n",
    "            scl=np.linalg.norm(state.info[\"command\"]),\n",
    "        )\n",
    "    )\n",
    "\n",
    "render_every = 1\n",
    "fps = 1.0 / eval_env.dt / render_every\n",
    "print(f\"fps: {fps}\")\n",
    "traj = rollout[::render_every]\n",
    "mod_fns = modify_scene_fns[::render_every]\n",
    "\n",
    "scene_option = mujoco.MjvOption()\n",
    "scene_option.geomgroup[2] = True\n",
    "scene_option.geomgroup[3] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = False\n",
    "scene_option.flags[mujoco.mjtVisFlag.mjVIS_PERTFORCE] = False\n",
    "\n",
    "frames = eval_env.render(\n",
    "    traj,\n",
    "    camera=\"track\",\n",
    "    scene_option=scene_option,\n",
    "    width=640*2,\n",
    "    height=480,\n",
    "    modify_scene_fns=mod_fns,\n",
    ")\n",
    "media.show_video(frames, fps=fps, loop=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "CBtrAqns35sI"
   },
   "source": [
    "🙌 Hasta la vista!"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "A100",
   "machine_shape": "hm",
   "private_outputs": true,
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}