File size: 15,442 Bytes

525fbd4

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "from graphviz import Digraph\n",
        "\n",
        "def trace(root):\n",
        "    #Builds a set of all nodes and edges in a graph\n",
        "    nodes, edges = set(), set()\n",
        "    def build(v):\n",
        "        if v not in nodes:\n",
        "            nodes.add(v)\n",
        "            for child in v._prev:\n",
        "                edges.add((child, v))\n",
        "                build(child)\n",
        "    build(root)\n",
        "    return nodes, edges\n",
        "\n",
        "def draw_dot(root):\n",
        "    dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n",
        "\n",
        "    nodes, edges = trace(root)\n",
        "    for n in nodes:\n",
        "        uid = str(id(n))\n",
        "        #For any value in the graph, create a rectangular ('record') node for it\n",
        "        dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n",
        "        if n._op:\n",
        "            #If this value is a result of some operation, then create an op node for it\n",
        "            dot.node(name = uid + n._op, label=n._op)\n",
        "            #and connect this node to it\n",
        "            dot.edge(uid + n._op, uid)\n",
        "\n",
        "    for n1, n2 in edges:\n",
        "        #Connect n1 to the node of n2\n",
        "        dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n",
        "\n",
        "    return dot"
      ],
      "metadata": {
        "id": "T0rN8d146jvF"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import math"
      ],
      "metadata": {
        "id": "JlYxBvFK0AjA"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class Value:\n",
        "\n",
        "    def __init__(self, data, _children=(), _op='', label=''):\n",
        "        self.data = data\n",
        "        self.grad = 0.0\n",
        "        self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.\n",
        "        self._prev = set(_children)\n",
        "        self._op = _op\n",
        "        self.label = label\n",
        "\n",
        "\n",
        "    def __repr__(self):\n",
        "        return f\"Value(data={self.data})\"\n",
        "\n",
        "    def __add__(self, other):\n",
        "        other = other if isinstance(other, Value) else Value(other)\n",
        "        out = Value(self.data + other.data, (self, other), '+')\n",
        "\n",
        "        def backward():\n",
        "          self.grad += 1.0 * out.grad\n",
        "          other.grad += 1.0 * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def __radd__(self, other): #here\n",
        "        return self + other\n",
        "\n",
        "    def __mul__(self, other):\n",
        "        other = other if isinstance(other, Value) else Value(other)\n",
        "        out = Value(self.data * other.data, (self, other), '*')\n",
        "\n",
        "        def backward():\n",
        "          self.grad += other.data * out.grad\n",
        "          other.grad += self.data * out.grad\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def __rmul__(self, other):   #other * self\n",
        "        return self * other\n",
        "\n",
        "    def __truediv__(self, other):  #self/other\n",
        "        return self * other**-1\n",
        "\n",
        "    def __neg__(self):\n",
        "        return self * -1\n",
        "\n",
        "    def __sub__(self, other):  #self - other\n",
        "        return self + (-other)\n",
        "\n",
        "    def __pow__(self, other):\n",
        "        assert isinstance(other, (int, float)), \"only supporting int/float powers for now\"\n",
        "        out = Value(self.data ** other, (self, ), f\"**{other}\")\n",
        "\n",
        "        def backward():\n",
        "          self.grad += (other * (self.data ** (other - 1))) * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def tanh(self):\n",
        "        x = self.data\n",
        "        t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n",
        "        out = Value(t, (self, ), 'tanh')\n",
        "\n",
        "        def backward():\n",
        "          self.grad += 1 - (t**2) * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def exp(self):\n",
        "        x = self.data\n",
        "        out = Value(math.exp(x), (self, ), 'exp')   #We merged t and out, into just out\n",
        "\n",
        "        def backward():\n",
        "            self.grad += out.data * out.grad\n",
        "\n",
        "        out._backward = backward\n",
        "        return out\n",
        "\n",
        "    def backward(self):\n",
        "\n",
        "      topo = []\n",
        "      visited = set()\n",
        "      def build_topo(v):\n",
        "        if v not in visited:\n",
        "          visited.add(v)\n",
        "          for child in v._prev:\n",
        "            build_topo(child)\n",
        "          topo.append(v)\n",
        "\n",
        "      build_topo(self)\n",
        "\n",
        "      self.grad = 1.0\n",
        "      for node in reversed(topo):\n",
        "        node._backward()"
      ],
      "metadata": {
        "id": "tA0zbyEwFbD5"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "---------------"
      ],
      "metadata": {
        "id": "m9hy05zbxhLP"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import random"
      ],
      "metadata": {
        "id": "gu3tnJu1Wti5"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class Neuron:\n",
        "\tdef __init__(self, nin):\n",
        "\t\tself.w = [ Value(random.uniform(-1,1)) for _ in range(nin) ]\n",
        "\t\tself.b = Value(random.uniform(-1,1))\n",
        "\n",
        "\tdef __call__(self, x):\n",
        "\t\t# (w*x)+b\n",
        "\t\tact = sum( (wi*xi for wi,xi in zip(self.w, x)), self.b )\n",
        "\t\tout = act.tanh()\n",
        "\t\treturn out\n",
        "\n",
        "class Layer:\n",
        "\tdef __init__(self, nin, nout):\n",
        "\t\tself.neurons = [Neuron(nin) for _ in range(nout)]\n",
        "\n",
        "\tdef __call__(self, x):\n",
        "\t\touts = [n(x) for n in self.neurons]\n",
        "\t\treturn outs[0] if len(outs)==1 else outs  #The New added line for making the output better\n",
        "\n",
        "class MLP:\n",
        "\tdef __init__(self, nin, nouts):\n",
        "\t\tsz = [nin] + nouts\n",
        "\t\tself.layers = [ Layer(sz[i], sz[i+1]) for i in range(len(nouts)) ]\n",
        "\n",
        "\tdef __call__(self, x):\n",
        "\t\tfor layer in self.layers:\n",
        "\t\t\tx = layer(x)\n",
        "\t\treturn x"
      ],
      "metadata": {
        "id": "aCXXYNg_W680"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "x = [2.0, 3.0, -1.0]\n",
        "n = MLP(3, [4, 4, 1])\n",
        "n(x)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aG9pKV_RXsO8",
        "outputId": "e6f183b9-896b-458f-9322-e91bc79e9da2",
        "collapsed": true
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Value(data=-0.33393070997191954)"
            ]
          },
          "metadata": {},
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "-----------"
      ],
      "metadata": {
        "id": "6DemdSsv_abu"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now, we'll be returning the **parameters** from the MLP. So that will be from Neuron -> Layer -> MLP"
      ],
      "metadata": {
        "id": "rhKQgN2LKBf9"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "class Neuron:\n",
        "    def __init__(self, nin):\n",
        "        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]\n",
        "        self.b = Value(random.uniform(-1, 1))\n",
        "\n",
        "    def __call__(self, x):\n",
        "        act = sum((wi * xi for wi, xi in zip(self.w, x)), self.b)\n",
        "        out = act.tanh()\n",
        "        return out\n",
        "\n",
        "    def parameters(self):\n",
        "        return self.w + [self.b]\n",
        "\n",
        "class Layer:\n",
        "    def __init__(self, nin, nout):\n",
        "        self.neurons = [Neuron(nin) for _ in range(nout)]\n",
        "\n",
        "    def __call__(self, x):\n",
        "        outs = [n(x) for n in self.neurons]\n",
        "        return outs[0] if len(outs) == 1 else outs\n",
        "\n",
        "    def parameters(self):\n",
        "        return [p for n in self.neurons for p in n.parameters()]\n",
        "\n",
        "        # Alternative way of writing the above return function:\n",
        "        # parameters = []\n",
        "        # for n in self.neurons:\n",
        "        #   p = n.parameters()\n",
        "        #   parameters.extend(p)\n",
        "\n",
        "class MLP:\n",
        "    def __init__(self, nin, nouts):\n",
        "        sz = [nin] + nouts\n",
        "        self.layers = [Layer(sz[i], sz[i + 1]) for i in range(len(nouts))]\n",
        "\n",
        "    def __call__(self, x):\n",
        "        for layer in self.layers:\n",
        "            x = layer(x)\n",
        "        return x\n",
        "\n",
        "    def parameters(self):\n",
        "        return [p for l in self.layers for p in l.parameters()]"
      ],
      "metadata": {
        "id": "HmEO8Gi1KN_m"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "x = [2.0, 3.0, -1.0]\n",
        "n = MLP(3, [4, 4, 1])\n",
        "n(x)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "e2VaJPFdMVUs",
        "outputId": "0a229e8c-2084-4037-e808-cc27cb3fd2ca"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "Value(data=0.7625252102576119)"
            ]
          },
          "metadata": {},
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "n.parameters()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "cfOp08kYNmDX",
        "outputId": "fe98dfd7-0e2b-4dd7-fb08-6f4cf60161ff"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "[Value(data=0.31785584973173164),\n",
              " Value(data=0.2998372553774835),\n",
              " Value(data=-0.8029008199517247),\n",
              " Value(data=-0.39340060142531286),\n",
              " Value(data=0.23322412084873956),\n",
              " Value(data=0.29891813550514534),\n",
              " Value(data=-0.5314862907700675),\n",
              " Value(data=0.19661072911432642),\n",
              " Value(data=0.9142418954398666),\n",
              " Value(data=0.041208786424172805),\n",
              " Value(data=-0.23983634992214187),\n",
              " Value(data=-0.593538786941121),\n",
              " Value(data=0.39482399486723296),\n",
              " Value(data=-0.9880306400643504),\n",
              " Value(data=-0.8097855189886964),\n",
              " Value(data=0.4629484174790124),\n",
              " Value(data=0.31168805444961634),\n",
              " Value(data=-0.9828138115624934),\n",
              " Value(data=0.5221437252554255),\n",
              " Value(data=-0.19703997468926882),\n",
              " Value(data=-0.5504279057638468),\n",
              " Value(data=-0.8365261779265616),\n",
              " Value(data=-0.22783861276612227),\n",
              " Value(data=0.5666981389300718),\n",
              " Value(data=-0.06415010714317604),\n",
              " Value(data=0.845414529622897),\n",
              " Value(data=0.4793425135418725),\n",
              " Value(data=-0.38321354069020086),\n",
              " Value(data=-0.10963021731006206),\n",
              " Value(data=0.14485994942129898),\n",
              " Value(data=-0.19028270981146433),\n",
              " Value(data=0.5148204886483112),\n",
              " Value(data=-0.8559156650791364),\n",
              " Value(data=0.3778416962066449),\n",
              " Value(data=0.09608787032156774),\n",
              " Value(data=-0.8288362456839788),\n",
              " Value(data=0.5641592956285757),\n",
              " Value(data=0.13764114112689052),\n",
              " Value(data=-0.19625087652731277),\n",
              " Value(data=-0.6117936229921406),\n",
              " Value(data=0.7546009612155813)]"
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "So these are all our parameters provided as inputs. The weights, inputs and biases"
      ],
      "metadata": {
        "id": "W0hGhhMaNozj"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "len(n.parameters())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "itFmD8hFNnph",
        "outputId": "f43eee99-5831-4708-f203-518ddf7011e5"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "41"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    }
  ]
}