File size: 19,037 Bytes

525fbd4

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "from graphviz import Digraph\n",
        "\n",
        "def trace(root):\n",
        "    #Builds a set of all nodes and edges in a graph\n",
        "    nodes, edges = set(), set()\n",
        "    def build(v):\n",
        "        if v not in nodes:\n",
        "            nodes.add(v)\n",
        "            for child in v._prev:\n",
        "                edges.add((child, v))\n",
        "                build(child)\n",
        "    build(root)\n",
        "    return nodes, edges\n",
        "\n",
        "def draw_dot(root):\n",
        "    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n",
        "\n",
        "    nodes, edges = trace(root)\n",
        "    for n in nodes:\n",
        "        uid = str(id(n))\n",
        "        #For any value in the graph, create a rectangular ('record') node for it\n",
        "        dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n",
        "        if n._op:\n",
        "            #If this value is a result of some operation, then create an op node for it\n",
        "            dot.node(name = uid + n._op, label=n._op)\n",
        "            #and connect this node to it\n",
        "            dot.edge(uid + n._op, uid)\n",
        "\n",
        "    for n1, n2 in edges:\n",
        "        #Connect n1 to the node of n2\n",
        "        dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n",
        "\n",
        "    return dot"
      ],
      "metadata": {
        "id": "T0rN8d146jvF"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import math"
      ],
      "metadata": {
        "id": "JlYxBvFK0AjA"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class Value:\n",
        "\n",
        "    def __init__(self, data, _children=(), _op='', label=''):\n",
        "        self.data = data\n",
        "        self.grad = 0.0\n",
        "        self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.\n",
        "        self._prev = set(_children)\n",
        "        self._op = _op\n",
        "        self.label = label\n",
        "\n",
        "\n",
        "    def __repr__(self):\n",
        "        return f\"Value(data={self.data})\"\n",
        "\n",
        "    def __add__(self, other):\n",
        "        other = other if isinstance(other, Value) else Value(other)\n",
        "        out = Value(self.data + other.data, (self, other), '+')\n",
        "\n",
        "        def backward():\n",
        "          self.grad += 1.0 * out.grad\n",
        "          other.grad += 1.0 * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def __radd__(self, other): #here\n",
        "        return self + other\n",
        "\n",
        "    def __mul__(self, other):\n",
        "        other = other if isinstance(other, Value) else Value(other)\n",
        "        out = Value(self.data * other.data, (self, other), '*')\n",
        "\n",
        "        def backward():\n",
        "          self.grad += other.data * out.grad\n",
        "          other.grad += self.data * out.grad\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def __rmul__(self, other):   #other * self\n",
        "        return self * other\n",
        "\n",
        "    def __truediv__(self, other):  #self/other\n",
        "        return self * other**-1\n",
        "\n",
        "    def __neg__(self):\n",
        "        return self * -1\n",
        "\n",
        "    def __sub__(self, other):  #self - other\n",
        "        return self + (-other)\n",
        "\n",
        "    def __pow__(self, other):\n",
        "        assert isinstance(other, (int, float)), \"only supporting int/float powers for now\"\n",
        "        out = Value(self.data ** other, (self, ), f\"**{other}\")\n",
        "\n",
        "        def backward():\n",
        "          self.grad += (other * (self.data ** (other - 1))) * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def tanh(self):\n",
        "        x = self.data\n",
        "        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n",
        "        out = Value(t, (self, ), 'tanh')\n",
        "\n",
        "        def backward():\n",
        "          self.grad += 1 - (t**2) * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def exp(self):\n",
        "        x = self.data\n",
        "        out = Value(math.exp(x), (self, ), 'exp')   #We merged t and out, into just out\n",
        "\n",
        "        def backward():\n",
        "            self.grad += out.data * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def backward(self):\n",
        "\n",
        "      topo = []\n",
        "      visited = set()\n",
        "      def build_topo(v):\n",
        "        if v not in visited:\n",
        "          visited.add(v)\n",
        "          for child in v._prev:\n",
        "            build_topo(child)\n",
        "          topo.append(v)\n",
        "\n",
        "      build_topo(self)\n",
        "\n",
        "      self.grad = 1.0\n",
        "      for node in reversed(topo):\n",
        "        node._backward()"
      ],
      "metadata": {
        "id": "tA0zbyEwFbD5"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "---------------"
      ],
      "metadata": {
        "id": "m9hy05zbxhLP"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import random"
      ],
      "metadata": {
        "id": "gu3tnJu1Wti5"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class Neuron:\n",
        "    def __init__(self, nin):\n",
        "        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]\n",
        "        self.b = Value(random.uniform(-1, 1))\n",
        "\n",
        "    def __call__(self, x):\n",
        "        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)\n",
        "        out = act.tanh()\n",
        "        return out\n",
        "\n",
        "    def parameters(self):\n",
        "        return self.w + [self.b]\n",
        "\n",
        "class Layer:\n",
        "    def __init__(self, nin, nout):\n",
        "        self.neurons = [Neuron(nin) for _ in range(nout)]\n",
        "\n",
        "    def __call__(self, x):\n",
        "        outs = [n(x) for n in self.neurons]\n",
        "        return outs[0] if len(outs) == 1 else outs\n",
        "\n",
        "    def parameters(self):\n",
        "        return [p for n in self.neurons for p in n.parameters()]\n",
        "\n",
        "        # Alternative way of writing the above return function:\n",
        "        # parameters = []\n",
        "        # for n in self.neurons:\n",
        "        #   p = n.parameters()\n",
        "        #   parameters.extend(p)\n",
        "\n",
        "class MLP:\n",
        "    def __init__(self, nin, nouts):\n",
        "        sz = [nin] + nouts\n",
        "        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]\n",
        "\n",
        "    def __call__(self, x):\n",
        "        for layer in self.layers:\n",
        "            x = layer(x)\n",
        "        return x\n",
        "\n",
        "    def parameters(self):\n",
        "        return [p for l in self.layers for p in l.parameters()]"
      ],
      "metadata": {
        "id": "HmEO8Gi1KN_m"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "-------------"
      ],
      "metadata": {
        "id": "VHG86ZRipV_g"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now we are trying to slighly nudge the value in order to reduce the loss"
      ],
      "metadata": {
        "id": "4P4QTecRpfJy"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "So this essentially adds as an **update function**"
      ],
      "metadata": {
        "id": "VoV7xT_Ip60A"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "for p in n.parameters():\n",
        "  p.data += -0.01 * p.grad #The negative sign is to convert any negative value to positive. Therefore increasing the value of the data, therefore decresing the loss"
      ],
      "metadata": {
        "id": "9GQoQUYEpMRP"
      },
      "execution_count": 12,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "------------------"
      ],
      "metadata": {
        "id": "TwdGr8Awqam4"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "------------"
      ],
      "metadata": {
        "id": "ux4ZrKc_riiA"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now we follow three steps: Forward pass -> Backward pass -> Update"
      ],
      "metadata": {
        "id": "dyoqzuslp-kP"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "x = [2.0, 3.0, -1.0]\n",
        "n = MLP(3, [4, 4, 1])\n",
        "n(x)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "SYoQlzlMrelv",
        "outputId": "3ce2cbfa-fec9-4618-cd27-8ed9d28bcf5b"
      },
      "execution_count": 35,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Value(data=0.33215137965743546)"
            ]
          },
          "metadata": {},
          "execution_count": 35
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "xs = [\n",
        "    [2.0, 3.0, -1.0],\n",
        "    [3.0, -1.0, 0.5],\n",
        "    [0.5, 1.0, 1.0],\n",
        "    [1.0, 1.0, -1.0]\n",
        "]\n",
        "\n",
        "ys = [1.0, -1.0, -1.0, 1.0] #output we want"
      ],
      "metadata": {
        "id": "wRwCzkhequ5C"
      },
      "execution_count": 36,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#forward pass\n",
        "ypred = [n(x) for x in xs]\n",
        "loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))\n",
        "loss"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Bxbev90VqFnG",
        "outputId": "52407404-8787-4e29-c07b-31063bea7111"
      },
      "execution_count": 54,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Value(data=5.767047506521353)"
            ]
          },
          "metadata": {},
          "execution_count": 54
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#backward pass\n",
        "loss.backward()"
      ],
      "metadata": {
        "id": "swKzizdIqJQf"
      },
      "execution_count": 55,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#update\n",
        "for p in n.parameters():\n",
        "  p.data += -0.01 * p.grad"
      ],
      "metadata": {
        "id": "BtOE6keaqLse"
      },
      "execution_count": 56,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#check the prediction\n",
        "ypred"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "O5iGQsw3qY-S",
        "outputId": "2b14907c-d35f-4c4d-e503-d954e6f74435"
      },
      "execution_count": 57,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[Value(data=-0.25151630590655727),\n",
              " Value(data=0.42164884655021817),\n",
              " Value(data=-0.09631033350969018),\n",
              " Value(data=-0.16748189979649136)]"
            ]
          },
          "metadata": {},
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "-----------"
      ],
      "metadata": {
        "id": "qvIdNB-LsBFt"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "-------------"
      ],
      "metadata": {
        "id": "XR8TNjCDsKfb"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Putting the entire process together in a single function"
      ],
      "metadata": {
        "id": "jUvrBdh9sLPt"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#Initialize the neural net\n",
        "x = [2.0, 3.0, -1.0]\n",
        "n = MLP(3, [4, 4, 1])\n",
        "n(x)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8wJ2E5Vsshho",
        "outputId": "416ad55f-351d-4e98-f1ff-7108a2a32f65"
      },
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Value(data=0.9135198339971514)"
            ]
          },
          "metadata": {},
          "execution_count": 58
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#Data definition\n",
        "xs = [\n",
        "    [2.0, 3.0, -1.0],\n",
        "    [3.0, -1.0, 0.5],\n",
        "    [0.5, 1.0, 1.0],\n",
        "    [1.0, 1.0, -1.0]\n",
        "]\n",
        "\n",
        "ys = [1.0, -1.0, -1.0, 1.0] #output we want"
      ],
      "metadata": {
        "id": "qqZYLdOVrQ2i"
      },
      "execution_count": 59,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "for k in range(10):\n",
        "\n",
        "  #forward pass\n",
        "  ypred = [n(x) for x in xs]\n",
        "  loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))\n",
        "\n",
        "  #backward pass\n",
        "  for p in n.parameters():\n",
        "    p.grad = 0.0 #This is because after one round of update, we need to reset the value of the grads so that it can calculate and store the grad value of the updated loss function (i.e. The loss value that was improved after gradient descent). If we don't do this, the previous value of grad gets increamented with the new value during each back propagation (each time backward is called)\n",
        "  loss.backward()\n",
        "\n",
        "  #update\n",
        "  #THIS HERE, WHAT WE ARE DOING IS 'GRADIENT DESCENT'. WE ARE NUDGING THE INPUT VALUES BY A LITTLE BIT\n",
        "  for p in n.parameters():\n",
        "    p.data += -0.04 * p.grad\n",
        "\n",
        "  print(k, loss.data) #Printing the current number/iteration number plus how much loss"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "S3ffWvfDsS88",
        "outputId": "d3b74c8c-2d0d-4b8b-e31e-f1081138a321"
      },
      "execution_count": 92,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0 7.6021312440956095\n",
            "1 8.0\n",
            "2 6.398187062451399\n",
            "3 7.999999999997639\n",
            "4 8.0\n",
            "5 7.999964084143684\n",
            "6 8.0\n",
            "7 8.0\n",
            "8 7.999999961266539\n",
            "9 8.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "ypred"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JoKOXxUWtB7K",
        "outputId": "3420fc62-2352-47fc-d0e5-6547f3748ca5"
      },
      "execution_count": 93,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[Value(data=-1.0), Value(data=-1.0), Value(data=-1.0), Value(data=-1.0)]"
            ]
          },
          "metadata": {},
          "execution_count": 93
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "loss"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "iyZ2e7FvwT5H",
        "outputId": "5666e89c-6a29-486b-d3c8-290c29635124"
      },
      "execution_count": 94,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Value(data=8.0)"
            ]
          },
          "metadata": {},
          "execution_count": 94
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "If the loss was reduced, then you can `n.parameters` to see what were the values into the NN that caused to get the desired target outputs"
      ],
      "metadata": {
        "id": "JLcsDJkhwsVS"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "--------------------------"
      ],
      "metadata": {
        "id": "ubTHHuwzvNNh"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Okay so the predicted output didn't exactly come as expected 🥲 (The first and last value weren't supposed to be negative lol) \\\n",
        "\\\n",
        "But that was the idea of how we **train a neural net**!"
      ],
      "metadata": {
        "id": "xFwHw6IVvOKb"
      }
    }
  ]
}