{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "C_YSfsRILGPG",
        "tags": [],
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "107ed765-da2b-4d6e-e562-43c5573d8566"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "fatal: destination path 'multi_model_phi_3' already exists and is not an empty directory.\n"
          ]
        }
      ],
      "source": [
        "!git clone https://github.com/AkashDataScience/multi_model_phi_3"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "CBVAhJBULs5R",
        "tags": [],
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "3843bc68-eac9-45aa-a061-b284ae3ddefd"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/multi_model_phi_3\n"
          ]
        }
      ],
      "source": [
        "%cd multi_model_phi_3"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "75koL8tzLxKS",
        "tags": [],
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7c5217f4-e70b-4d6d-bda3-65e94878b0e5"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting clip@ git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1 (from -r requirements.txt (line 2))\n",
            "  Using cached clip-1.0-py3-none-any.whl\n",
            "Requirement already satisfied: bitsandbytes==0.43.3 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 1)) (0.43.3)\n",
            "Requirement already satisfied: colorama==0.4.6 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 3)) (0.4.6)\n",
            "Requirement already satisfied: datasets==3.0.0 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 4)) (3.0.0)\n",
            "Requirement already satisfied: dill==0.3.8 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 5)) (0.3.8)\n",
            "Requirement already satisfied: multiprocess==0.70.16 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 6)) (0.70.16)\n",
            "Requirement already satisfied: numpy==1.26.4 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 7)) (1.26.4)\n",
            "Requirement already satisfied: pandas==2.2.2 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 8)) (2.2.2)\n",
            "Requirement already satisfied: peft==0.12.0 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 9)) (0.12.0)\n",
            "Requirement already satisfied: shtab==1.7.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 10)) (1.7.1)\n",
            "Requirement already satisfied: tokenizers==0.19.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 11)) (0.19.1)\n",
            "Requirement already satisfied: torch==2.4.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 12)) (2.4.1+cu121)\n",
            "Requirement already satisfied: torchvision==0.19.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 13)) (0.19.1+cu121)\n",
            "Requirement already satisfied: tqdm==4.66.5 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 14)) (4.66.5)\n",
            "Requirement already satisfied: transformers==4.44.2 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 15)) (4.44.2)\n",
            "Requirement already satisfied: treelib==1.7.0 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 16)) (1.7.0)\n",
            "Requirement already satisfied: trl==0.10.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 17)) (0.10.1)\n",
            "Requirement already satisfied: typing_extensions==4.12.2 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 18)) (4.12.2)\n",
            "Requirement already satisfied: tyro==0.8.10 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 19)) (0.8.10)\n",
            "Requirement already satisfied: tzdata==2024.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 20)) (2024.1)\n",
            "Requirement already satisfied: urllib3==2.2.3 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 21)) (2.2.3)\n",
            "Requirement already satisfied: wcwidth==0.2.13 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 22)) (0.2.13)\n",
            "Requirement already satisfied: xxhash==3.5.0 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 23)) (3.5.0)\n",
            "Requirement already satisfied: yarl==1.11.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 24)) (1.11.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets==3.0.0->-r requirements.txt (line 4)) (3.16.1)\n",
            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets==3.0.0->-r requirements.txt (line 4)) (16.1.0)\n",
            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets==3.0.0->-r requirements.txt (line 4)) (2.32.3)\n",
            "Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets==3.0.0->-r requirements.txt (line 4)) (2024.6.1)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets==3.0.0->-r requirements.txt (line 4)) (3.10.5)\n",
            "Requirement already satisfied: huggingface-hub>=0.22.0 in /usr/local/lib/python3.10/dist-packages (from datasets==3.0.0->-r requirements.txt (line 4)) (0.24.7)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets==3.0.0->-r requirements.txt (line 4)) (24.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets==3.0.0->-r requirements.txt (line 4)) (6.0.2)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2->-r requirements.txt (line 8)) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas==2.2.2->-r requirements.txt (line 8)) (2024.2)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft==0.12.0->-r requirements.txt (line 9)) (5.9.5)\n",
            "Requirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.12.0->-r requirements.txt (line 9)) (0.34.2)\n",
            "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft==0.12.0->-r requirements.txt (line 9)) (0.4.5)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.4.1->-r requirements.txt (line 12)) (1.13.3)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.4.1->-r requirements.txt (line 12)) (3.3)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.4.1->-r requirements.txt (line 12)) (3.1.4)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision==0.19.1->-r requirements.txt (line 13)) (10.4.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.44.2->-r requirements.txt (line 15)) (2024.9.11)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from treelib==1.7.0->-r requirements.txt (line 16)) (1.16.0)\n",
            "Requirement already satisfied: docstring-parser>=0.16 in /usr/local/lib/python3.10/dist-packages (from tyro==0.8.10->-r requirements.txt (line 19)) (0.16)\n",
            "Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro==0.8.10->-r requirements.txt (line 19)) (13.9.1)\n",
            "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.10/dist-packages (from yarl==1.11.1->-r requirements.txt (line 24)) (3.10)\n",
            "Requirement already satisfied: multidict>=4.0 in /usr/local/lib/python3.10/dist-packages (from yarl==1.11.1->-r requirements.txt (line 24)) (6.1.0)\n",
            "Requirement already satisfied: ftfy in /usr/local/lib/python3.10/dist-packages (from clip@ git+https://github.com/openai/CLIP.git@dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1->-r requirements.txt (line 2)) (6.2.3)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==3.0.0->-r requirements.txt (line 4)) (2.4.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==3.0.0->-r requirements.txt (line 4)) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==3.0.0->-r requirements.txt (line 4)) (24.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==3.0.0->-r requirements.txt (line 4)) (1.4.1)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets==3.0.0->-r requirements.txt (line 4)) (4.0.3)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets==3.0.0->-r requirements.txt (line 4)) (3.3.2)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets==3.0.0->-r requirements.txt (line 4)) (2024.8.30)\n",
            "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro==0.8.10->-r requirements.txt (line 19)) (3.0.0)\n",
            "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro==0.8.10->-r requirements.txt (line 19)) (2.18.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.4.1->-r requirements.txt (line 12)) (2.1.5)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.4.1->-r requirements.txt (line 12)) (1.3.0)\n",
            "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro==0.8.10->-r requirements.txt (line 19)) (0.1.2)\n"
          ]
        }
      ],
      "source": [
        "!pip install -r requirements.txt"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QauI2fQjWWTg",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "fa8e0f93-d988-4108-93f7-15a7281a1b21"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "/content/multi_model_phi_3/image_finetuning/finetuning\n"
          ]
        }
      ],
      "source": [
        "%cd image_finetuning/finetuning"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!wget -c https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/llava_instruct_150k.json"
      ],
      "metadata": {
        "id": "koXJ8mCciYYn",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "ad30c91c-59f7-48c3-c6c9-7cd824dbdaaf"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2024-10-10 15:16:24--  https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/llava_instruct_150k.json\n",
            "Resolving huggingface.co (huggingface.co)... 3.165.160.12, 3.165.160.59, 3.165.160.11, ...\n",
            "Connecting to huggingface.co (huggingface.co)|3.165.160.12|:443... connected.\n",
            "HTTP request sent, awaiting response... 302 Found\n",
            "Location: https://cdn-lfs.hf.co/repos/4d/41/4d41ea1e2709f0e68e9e361e4218192b9620c5a3f2cb8055bc625942b6cd3039/6b68bc5ca2bfd8a71119af0e8454929668ccda6a334955ccc95d114fc8d082fa?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27llava_instruct_150k.json%3B+filename%3D%22llava_instruct_150k.json%22%3B&response-content-type=application%2Fjson&Expires=1728832584&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyODgzMjU4NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy80ZC80MS80ZDQxZWExZTI3MDlmMGU2OGU5ZTM2MWU0MjE4MTkyYjk2MjBjNWEzZjJjYjgwNTViYzYyNTk0MmI2Y2QzMDM5LzZiNjhiYzVjYTJiZmQ4YTcxMTE5YWYwZTg0NTQ5Mjk2NjhjY2RhNmEzMzQ5NTVjY2M5NWQxMTRmYzhkMDgyZmE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qJnJlc3BvbnNlLWNvbnRlbnQtdHlwZT0qIn1dfQ__&Signature=hSNSHtz4qcHoAKL%7EBQFjBgq04GmcG2H-ajjYJrixr%7EHufuWwWQMy5AcuLKkDmolFgE8M82AnKQ08idN5ZvzJcgcoyt4QLWmrwLFRMnkORPQNFAoZk9FKvkthxfpdIdLtTZoPb6BqMg5l4SeggvOSC5q8%7EtfC5ASQMw%7ExqIqSGPTo9yIb-CfLXyE3Ceef8E7MIfW8s796ZpgilPx1zhl4cx8s2DyieL84KckvhYxf2Lc5MRBZnUdl0sUuvHBlC7SCr5lB2v-W1veTiqwur9fSpQ4uawD1BApft-zlSA84DnjssFWqhBa-T49X5-P2fGLmwAPcyVlUT17%7EvhHc-reAJg__&Key-Pair-Id=K3RPWS32NSSJCE [following]\n",
            "--2024-10-10 15:16:25--  https://cdn-lfs.hf.co/repos/4d/41/4d41ea1e2709f0e68e9e361e4218192b9620c5a3f2cb8055bc625942b6cd3039/6b68bc5ca2bfd8a71119af0e8454929668ccda6a334955ccc95d114fc8d082fa?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27llava_instruct_150k.json%3B+filename%3D%22llava_instruct_150k.json%22%3B&response-content-type=application%2Fjson&Expires=1728832584&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyODgzMjU4NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9yZXBvcy80ZC80MS80ZDQxZWExZTI3MDlmMGU2OGU5ZTM2MWU0MjE4MTkyYjk2MjBjNWEzZjJjYjgwNTViYzYyNTk0MmI2Y2QzMDM5LzZiNjhiYzVjYTJiZmQ4YTcxMTE5YWYwZTg0NTQ5Mjk2NjhjY2RhNmEzMzQ5NTVjY2M5NWQxMTRmYzhkMDgyZmE%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qJnJlc3BvbnNlLWNvbnRlbnQtdHlwZT0qIn1dfQ__&Signature=hSNSHtz4qcHoAKL%7EBQFjBgq04GmcG2H-ajjYJrixr%7EHufuWwWQMy5AcuLKkDmolFgE8M82AnKQ08idN5ZvzJcgcoyt4QLWmrwLFRMnkORPQNFAoZk9FKvkthxfpdIdLtTZoPb6BqMg5l4SeggvOSC5q8%7EtfC5ASQMw%7ExqIqSGPTo9yIb-CfLXyE3Ceef8E7MIfW8s796ZpgilPx1zhl4cx8s2DyieL84KckvhYxf2Lc5MRBZnUdl0sUuvHBlC7SCr5lB2v-W1veTiqwur9fSpQ4uawD1BApft-zlSA84DnjssFWqhBa-T49X5-P2fGLmwAPcyVlUT17%7EvhHc-reAJg__&Key-Pair-Id=K3RPWS32NSSJCE\n",
            "Resolving cdn-lfs.hf.co (cdn-lfs.hf.co)... 18.172.170.21, 18.172.170.29, 18.172.170.5, ...\n",
            "Connecting to cdn-lfs.hf.co (cdn-lfs.hf.co)|18.172.170.21|:443... connected.\n",
            "HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable\n",
            "\n",
            "    The file is already fully retrieved; nothing to do.\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "B325mAHNtJCB",
        "tags": [],
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "f8108b8c120d4f49ac2b0fa38d6213c3",
            "868fd8ed1fb5432ab6d6761b4e4ce17d",
            "f75b8c1cb15a4dc9871b28a286dd3b82",
            "a08098910db44feabc38e65bf4a55379",
            "13be5acd97ab44babec61cedcf5b2a3a",
            "239ab0a871684811ae7a3e16daa8991a",
            "4ca7774f57454d74bd1b7c9445030038",
            "557ecfce51574b8db9236bdc8d0bd555",
            "bc0e09ab397f42879b8c874eb10e6a2b",
            "8dec270eb8c649649f6b95ddde159a0f",
            "f9d65533a8fd4310b1466713c22d8255"
          ]
        },
        "outputId": "1a2eb33f-62b2-4f96-d3f5-365869a0aa0b"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1 distributed training: True, 16-bits training: False\n",
            "INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
            "_n_gpu=1,\n",
            "accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "batch_eval_metrics=False,\n",
            "bf16=True,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_persistent_workers=False,\n",
            "dataloader_pin_memory=True,\n",
            "dataloader_prefetch_factor=None,\n",
            "ddp_backend=None,\n",
            "ddp_broadcast_buffers=None,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "dispatch_batches=None,\n",
            "do_eval=False,\n",
            "do_predict=False,\n",
            "do_train=False,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_do_concat_batches=True,\n",
            "eval_on_start=False,\n",
            "eval_steps=None,\n",
            "eval_strategy=no,\n",
            "eval_use_gather_object=False,\n",
            "evaluation_strategy=None,\n",
            "fp16=False,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=True,\n",
            "gradient_checkpointing_kwargs={'use_reentrant': False},\n",
            "greater_is_better=None,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_always_push=False,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "include_num_input_tokens_seen=False,\n",
            "include_tokens_per_second=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=5e-06,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=False,\n",
            "local_rank=0,\n",
            "log_level=info,\n",
            "log_level_replica=warning,\n",
            "log_on_each_node=True,\n",
            "logging_dir=./checkpoint_dir/runs/Oct10_15-16-37_33ba61f47fc9,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=20,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_kwargs={},\n",
            "lr_scheduler_type=cosine,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=60,\n",
            "metric_for_best_model=None,\n",
            "mp_parameters=,\n",
            "neftune_noise_alpha=None,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1,\n",
            "optim=adamw_torch,\n",
            "optim_args=None,\n",
            "optim_target_modules=None,\n",
            "output_dir=./checkpoint_dir,\n",
            "overwrite_output_dir=True,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=4,\n",
            "per_device_train_batch_size=4,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=False,\n",
            "report_to=['tensorboard'],\n",
            "restore_callback_states_from_checkpoint=False,\n",
            "resume_from_checkpoint=None,\n",
            "run_name=./checkpoint_dir,\n",
            "save_on_each_node=False,\n",
            "save_only_model=False,\n",
            "save_safetensors=True,\n",
            "save_steps=60,\n",
            "save_strategy=steps,\n",
            "save_total_limit=1,\n",
            "seed=0,\n",
            "skip_memory_metrics=True,\n",
            "split_batches=None,\n",
            "tf32=None,\n",
            "torch_compile=False,\n",
            "torch_compile_backend=None,\n",
            "torch_compile_mode=None,\n",
            "torch_empty_cache_steps=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_cpu=False,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.2,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            ")\n",
            "INFO:__main__:PEFT parameters LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='CAUSAL_LM', inference_mode=False, r=16, target_modules={'qkv_proj', 'o_proj'}, lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False))\n",
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n",
            "[INFO|configuration_utils.py:733] 2024-10-10 15:16:39,682 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/config.json\n",
            "[INFO|configuration_utils.py:733] 2024-10-10 15:16:39,856 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/config.json\n",
            "[INFO|configuration_utils.py:800] 2024-10-10 15:16:39,858 >> Model config Phi3Config {\n",
            "  \"_name_or_path\": \"microsoft/phi-3-mini-4k-instruct\",\n",
            "  \"architectures\": [\n",
            "    \"Phi3ForCausalLM\"\n",
            "  ],\n",
            "  \"attention_bias\": false,\n",
            "  \"attention_dropout\": 0.0,\n",
            "  \"auto_map\": {\n",
            "    \"AutoConfig\": \"microsoft/phi-3-mini-4k-instruct--configuration_phi3.Phi3Config\",\n",
            "    \"AutoModelForCausalLM\": \"microsoft/phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM\"\n",
            "  },\n",
            "  \"bos_token_id\": 1,\n",
            "  \"embd_pdrop\": 0.0,\n",
            "  \"eos_token_id\": 32000,\n",
            "  \"hidden_act\": \"silu\",\n",
            "  \"hidden_size\": 3072,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 8192,\n",
            "  \"max_position_embeddings\": 4096,\n",
            "  \"model_type\": \"phi3\",\n",
            "  \"num_attention_heads\": 32,\n",
            "  \"num_hidden_layers\": 32,\n",
            "  \"num_key_value_heads\": 32,\n",
            "  \"original_max_position_embeddings\": 4096,\n",
            "  \"pad_token_id\": 32000,\n",
            "  \"resid_pdrop\": 0.0,\n",
            "  \"rms_norm_eps\": 1e-05,\n",
            "  \"rope_scaling\": null,\n",
            "  \"rope_theta\": 10000.0,\n",
            "  \"sliding_window\": 2047,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"torch_dtype\": \"bfloat16\",\n",
            "  \"transformers_version\": \"4.44.2\",\n",
            "  \"use_cache\": false,\n",
            "  \"vocab_size\": 32064\n",
            "}\n",
            "\n",
            "WARNING:transformers_modules.microsoft.phi-3-mini-4k-instruct.0a67737cc96d2554230f90338b163bc6380a2a85.modeling_phi3:`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.\n",
            "WARNING:transformers_modules.microsoft.phi-3-mini-4k-instruct.0a67737cc96d2554230f90338b163bc6380a2a85.modeling_phi3:Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.\n",
            "[INFO|modeling_utils.py:3678] 2024-10-10 15:16:40,221 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/model.safetensors.index.json\n",
            "[INFO|modeling_utils.py:1606] 2024-10-10 15:16:40,225 >> Instantiating Phi3ForCausalLM model under default dtype torch.bfloat16.\n",
            "[INFO|configuration_utils.py:1038] 2024-10-10 15:16:40,228 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 1,\n",
            "  \"eos_token_id\": 32000,\n",
            "  \"pad_token_id\": 32000,\n",
            "  \"use_cache\": false\n",
            "}\n",
            "\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f8108b8c120d4f49ac2b0fa38d6213c3"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[INFO|modeling_utils.py:4507] 2024-10-10 15:17:11,062 >> All model checkpoint weights were used when initializing Phi3ForCausalLM.\n",
            "\n",
            "[INFO|modeling_utils.py:4515] 2024-10-10 15:17:11,070 >> All the weights of Phi3ForCausalLM were initialized from the model checkpoint at microsoft/phi-3-mini-4k-instruct.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use Phi3ForCausalLM for predictions without further training.\n",
            "[INFO|configuration_utils.py:993] 2024-10-10 15:17:11,251 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/generation_config.json\n",
            "[INFO|configuration_utils.py:1038] 2024-10-10 15:17:11,253 >> Generate config GenerationConfig {\n",
            "  \"bos_token_id\": 1,\n",
            "  \"eos_token_id\": [\n",
            "    32000,\n",
            "    32001,\n",
            "    32007\n",
            "  ],\n",
            "  \"pad_token_id\": 32000\n",
            "}\n",
            "\n",
            "[INFO|tokenization_utils_base.py:2269] 2024-10-10 15:17:11,768 >> loading file tokenizer.model from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/tokenizer.model\n",
            "[INFO|tokenization_utils_base.py:2269] 2024-10-10 15:17:11,769 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:2269] 2024-10-10 15:17:11,771 >> loading file added_tokens.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/added_tokens.json\n",
            "[INFO|tokenization_utils_base.py:2269] 2024-10-10 15:17:11,772 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/special_tokens_map.json\n",
            "[INFO|tokenization_utils_base.py:2269] 2024-10-10 15:17:11,775 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--microsoft--phi-3-mini-4k-instruct/snapshots/0a67737cc96d2554230f90338b163bc6380a2a85/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2513] 2024-10-10 15:17:11,857 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
            "/content/multi_model_phi_3/image_finetuning/finetuning/model.py:39: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
            "  self.projections.load_state_dict(torch.load(projection_path, map_location=device), strict=False)\n",
            "Using custom data configuration default-559b28e319de0343\n",
            "INFO:datasets.builder:Using custom data configuration default-559b28e319de0343\n",
            "Loading Dataset Infos from /usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json\n",
            "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.10/dist-packages/datasets/packaged_modules/json\n",
            "Overwrite dataset info from restored data version if exists.\n",
            "INFO:datasets.builder:Overwrite dataset info from restored data version if exists.\n",
            "Loading Dataset info from /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092\n",
            "INFO:datasets.info:Loading Dataset info from /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092\n",
            "Found cached dataset json (/root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092)\n",
            "INFO:datasets.builder:Found cached dataset json (/root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092)\n",
            "Loading Dataset info from /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092\n",
            "INFO:datasets.info:Loading Dataset info from /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092\n",
            "Process #0 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00000_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #0 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00000_of_00010.arrow\n",
            "Process #1 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00001_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #1 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00001_of_00010.arrow\n",
            "Process #2 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00002_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #2 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00002_of_00010.arrow\n",
            "Process #3 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00003_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #3 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00003_of_00010.arrow\n",
            "Process #4 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00004_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #4 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00004_of_00010.arrow\n",
            "Process #5 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00005_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #5 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00005_of_00010.arrow\n",
            "Process #6 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00006_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #6 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00006_of_00010.arrow\n",
            "Process #7 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00007_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #7 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00007_of_00010.arrow\n",
            "Process #8 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00008_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #8 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00008_of_00010.arrow\n",
            "Process #9 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00009_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Process #9 will write at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_00009_of_00010.arrow\n",
            "Loading cached processed dataset at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_*_of_00010.arrow\n",
            "INFO:datasets.arrow_dataset:Loading cached processed dataset at /root/.cache/huggingface/datasets/json/default-559b28e319de0343/0.0.0/f4e89e8750d5d5ffbef2c078bf0ddfedef29dc2faff52a6255cf513c05eb1092/cache-6111b26d09859c8f_*_of_00010.arrow\n",
            "Concatenating 10 shards\n",
            "INFO:datasets.arrow_dataset:Concatenating 10 shards\n",
            "/content/multi_model_phi_3/image_finetuning/finetuning/dataset.py:10: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
            "  self.image_embeddings = torch.load('clip_embeddings.pt')\n",
            "[WARNING|trainer.py:598] 2024-10-10 15:17:18,521 >> max_steps is given, it will override any value given in num_train_epochs\n",
            "[INFO|trainer.py:648] 2024-10-10 15:17:18,522 >> Using auto half precision backend\n",
            "[INFO|trainer.py:2134] 2024-10-10 15:17:19,563 >> ***** Running training *****\n",
            "[INFO|trainer.py:2135] 2024-10-10 15:17:19,565 >>   Num examples = 141,941\n",
            "[INFO|trainer.py:2136] 2024-10-10 15:17:19,570 >>   Num Epochs = 1\n",
            "[INFO|trainer.py:2137] 2024-10-10 15:17:19,571 >>   Instantaneous batch size per device = 4\n",
            "[INFO|trainer.py:2140] 2024-10-10 15:17:19,573 >>   Total train batch size (w. parallel, distributed & accumulation) = 4\n",
            "[INFO|trainer.py:2141] 2024-10-10 15:17:19,574 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:2142] 2024-10-10 15:17:19,576 >>   Total optimization steps = 60\n",
            "[INFO|trainer.py:2143] 2024-10-10 15:17:19,580 >>   Number of trainable parameters = 124,302,336\n",
            "WARNING:transformers_modules.microsoft.phi-3-mini-4k-instruct.0a67737cc96d2554230f90338b163bc6380a2a85.modeling_phi3:You are not running the flash-attention implementation, expect numerical differences.\n",
            "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:1399: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
            "  with device_autocast_ctx, torch.cpu.amp.autocast(**cpu_autocast_kwargs), recompute_context:  # type: ignore[attr-defined]\n",
            "[WARNING|modeling_utils.py:1264] 2024-10-10 15:17:29,290 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='60' max='60' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [60/60 15:44, Epoch 0/1]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>20</td>\n",
              "      <td>9.531900</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>40</td>\n",
              "      <td>10.267400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>60</td>\n",
              "      <td>9.545700</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[INFO|trainer.py:3503] 2024-10-10 15:32:51,845 >> Saving model checkpoint to ./checkpoint_dir/checkpoint-60\n",
            "[INFO|configuration_utils.py:472] 2024-10-10 15:32:51,849 >> Configuration saved in ./checkpoint_dir/checkpoint-60/config.json\n",
            "[INFO|modeling_utils.py:2799] 2024-10-10 15:33:11,817 >> Model weights saved in ./checkpoint_dir/checkpoint-60/model.safetensors\n",
            "[INFO|tokenization_utils_base.py:2684] 2024-10-10 15:33:11,827 >> tokenizer config file saved in ./checkpoint_dir/checkpoint-60/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2693] 2024-10-10 15:33:11,830 >> Special tokens file saved in ./checkpoint_dir/checkpoint-60/special_tokens_map.json\n",
            "[INFO|trainer.py:2394] 2024-10-10 15:33:13,911 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "[INFO|trainer.py:3819] 2024-10-10 15:33:13,928 >> \n",
            "***** Running Evaluation *****\n",
            "[INFO|trainer.py:3821] 2024-10-10 15:33:13,931 >>   Num examples = 15771\n",
            "[INFO|trainer.py:3824] 2024-10-10 15:33:13,933 >>   Batch size = 4\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "***** train metrics *****\n",
            "  epoch                    =     0.0017\n",
            "  total_flos               =        0GF\n",
            "  train_loss               =     9.7817\n",
            "  train_runtime            = 0:15:54.33\n",
            "  train_samples_per_second =      0.251\n",
            "  train_steps_per_second   =      0.063\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ],
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='1158' max='3943' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [1158/3943 1:48:54 < 4:22:10, 0.18 it/s]\n",
              "    </div>\n",
              "    "
            ]
          },
          "metadata": {}
        }
      ],
      "source": [
        "%run finetune.py"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.13"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "f8108b8c120d4f49ac2b0fa38d6213c3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_868fd8ed1fb5432ab6d6761b4e4ce17d",
              "IPY_MODEL_f75b8c1cb15a4dc9871b28a286dd3b82",
              "IPY_MODEL_a08098910db44feabc38e65bf4a55379"
            ],
            "layout": "IPY_MODEL_13be5acd97ab44babec61cedcf5b2a3a"
          }
        },
        "868fd8ed1fb5432ab6d6761b4e4ce17d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_239ab0a871684811ae7a3e16daa8991a",
            "placeholder": "​",
            "style": "IPY_MODEL_4ca7774f57454d74bd1b7c9445030038",
            "value": "Loading checkpoint shards: 100%"
          }
        },
        "f75b8c1cb15a4dc9871b28a286dd3b82": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_557ecfce51574b8db9236bdc8d0bd555",
            "max": 2,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_bc0e09ab397f42879b8c874eb10e6a2b",
            "value": 2
          }
        },
        "a08098910db44feabc38e65bf4a55379": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8dec270eb8c649649f6b95ddde159a0f",
            "placeholder": "​",
            "style": "IPY_MODEL_f9d65533a8fd4310b1466713c22d8255",
            "value": " 2/2 [00:30&lt;00:00, 14.75s/it]"
          }
        },
        "13be5acd97ab44babec61cedcf5b2a3a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "239ab0a871684811ae7a3e16daa8991a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4ca7774f57454d74bd1b7c9445030038": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "557ecfce51574b8db9236bdc8d0bd555": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "bc0e09ab397f42879b8c874eb10e6a2b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8dec270eb8c649649f6b95ddde159a0f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f9d65533a8fd4310b1466713c22d8255": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}