File size: 5,656 Bytes

d189cfc

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Z97NsWqYepIS"
      },
      "source": [
        "## **Step 1: Setting Up ExecuTorch**\n",
        "\n",
        "*   If using a Google colab notebook then please get a High-RAM instance to run this notebook.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "id": "cAsZThj3dFo7"
      },
      "outputs": [],
      "source": [
        "! touch /content/executorch; rm -rf /content/executorch\n",
        "! git clone https://github.com/pytorch/executorch ; cd /content/executorch; git submodule sync ; git submodule update --init"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "J58Rbuptspfj"
      },
      "outputs": [],
      "source": [
        "import sys\n",
        "# This is a workaround for now\n",
        "!mkdir -p /usr/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages/torchgen/packaged/ATen/native/\n",
        "!cp /usr/local/lib/python{sys.version_info.major}.{sys.version_info.minor}/dist-packages/torchgen/packaged/ATen/native/* /usr/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages/torchgen/packaged/ATen/native/"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "N1WgIQyqd5ra"
      },
      "outputs": [],
      "source": [
        "import sysconfig; lib_path = sysconfig.get_paths()[\"purelib\"]\n",
        "! cd /content/executorch; CMAKE_PREFIX_PATH={lib_path} EXECUTORCH_BUILD_XNNPACK=ON bash ./install_executorch.sh"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "xEcuuYbIfE3L"
      },
      "outputs": [],
      "source": [
        "!cd /content/executorch; examples/models/llama/install_requirements.sh"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UX-ZS052fP6D"
      },
      "source": [
        "## **Step 2. Download DeepSeek-R1-Distill-Llama-8B models**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "tWK81UfzlxLr"
      },
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "BsL1yxiUfi01"
      },
      "outputs": [],
      "source": [
        "!huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Llama-8B --local-dir /content/models/DeepSeek-R1-Distill-Llama-8B --local-dir-use-symlinks False"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6QE-6XPWr4j9"
      },
      "source": [
        "## **Step 3: Export to ExecuTorch**"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install torchtune"
      ],
      "metadata": {
        "id": "YrQLa1ST-uCP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from torchtune.models import convert_weights\n",
        "from torchtune.training import FullModelHFCheckpointer\n",
        "import torch\n",
        "\n",
        "# Convert from safetensors to TorchTune. Suppose the model has been downloaded from Hugging Face\n",
        "checkpointer = FullModelHFCheckpointer(\n",
        "    checkpoint_dir='/content/models/DeepSeek-R1-Distill-Llama-8B',\n",
        "    checkpoint_files=['model-00001-of-000002.safetensors', 'model-00002-of-000002.safetensors'],\n",
        "    output_dir='/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/' ,\n",
        "    model_type='LLAMA3' # or other types that TorchTune supports\n",
        ")\n",
        "\n",
        "print(\"loading checkpoint\")\n",
        "sd = checkpointer.load_checkpoint()\n",
        "\n",
        "# Convert from TorchTune to Meta (PyTorch native)\n",
        "sd = convert_weights.tune_to_meta(sd['model'])\n",
        "\n",
        "print(\"saving checkpoint\")\n",
        "torch.save(sd, \"/tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth\")"
      ],
      "metadata": {
        "id": "Zphh3FVu-2Wa"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Download https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct/blob/main/original/params.json and place it in /tmp/params.json"
      ],
      "metadata": {
        "id": "UBWLe4Gu_OTK"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "id": "hGkyrU5lnNop"
      },
      "outputs": [],
      "source": [
        "!cd /content/executorch; python -m examples.models.llama.export_llama \\\n",
        "    --checkpoint /tmp/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/checkpoint.pth \\\n",
        "\t-p /tmp/params.json \\\n",
        "\t-kv \\\n",
        "\t--use_sdpa_with_kv_cache \\\n",
        "\t-X \\\n",
        "\t-qmode 8da4w \\\n",
        "\t--group_size 128 \\\n",
        "\t-d fp16 \\\n",
        "\t--metadata '{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}' \\\n",
        "\t--embedding-quantize 4,32 \\\n",
        "\t--output_name=\"DeepSeek-R1-Distill-Llama-8B.pte\""
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm"
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}