diff --git "a/x12_creating_a_loss_function.ipynb" "b/x12_creating_a_loss_function.ipynb"
new file mode 100644--- /dev/null
+++ "b/x12_creating_a_loss_function.ipynb"
@@ -0,0 +1,715 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "One small change as been done in our Value object. Please note that \\\n",
+ "\\\n",
+ "The __radd__() functionality has been added."
+ ],
+ "metadata": {
+ "id": "VqE2CCj9HMUh"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from graphviz import Digraph\n",
+ "\n",
+ "def trace(root):\n",
+ " #Builds a set of all nodes and edges in a graph\n",
+ " nodes, edges = set(), set()\n",
+ " def build(v):\n",
+ " if v not in nodes:\n",
+ " nodes.add(v)\n",
+ " for child in v._prev:\n",
+ " edges.add((child, v))\n",
+ " build(child)\n",
+ " build(root)\n",
+ " return nodes, edges\n",
+ "\n",
+ "def draw_dot(root):\n",
+ " dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n",
+ "\n",
+ " nodes, edges = trace(root)\n",
+ " for n in nodes:\n",
+ " uid = str(id(n))\n",
+ " #For any value in the graph, create a rectangular ('record') node for it\n",
+ " dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n",
+ " if n._op:\n",
+ " #If this value is a result of some operation, then create an op node for it\n",
+ " dot.node(name = uid + n._op, label=n._op)\n",
+ " #and connect this node to it\n",
+ " dot.edge(uid + n._op, uid)\n",
+ "\n",
+ " for n1, n2 in edges:\n",
+ " #Connect n1 to the node of n2\n",
+ " dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n",
+ "\n",
+ " return dot"
+ ],
+ "metadata": {
+ "id": "T0rN8d146jvF"
+ },
+ "execution_count": 1,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import math"
+ ],
+ "metadata": {
+ "id": "JlYxBvFK0AjA"
+ },
+ "execution_count": 2,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "class Value:\n",
+ "\n",
+ " def __init__(self, data, _children=(), _op='', label=''):\n",
+ " self.data = data\n",
+ " self.grad = 0.0\n",
+ " self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.\n",
+ " self._prev = set(_children)\n",
+ " self._op = _op\n",
+ " self.label = label\n",
+ "\n",
+ "\n",
+ " def __repr__(self):\n",
+ " return f\"Value(data={self.data})\"\n",
+ "\n",
+ " def __add__(self, other):\n",
+ " other = other if isinstance(other, Value) else Value(other)\n",
+ " out = Value(self.data + other.data, (self, other), '+')\n",
+ "\n",
+ " def backward():\n",
+ " self.grad += 1.0 * out.grad\n",
+ " other.grad += 1.0 * out.grad\n",
+ "\n",
+ " out._backward = backward\n",
+ " return out\n",
+ "\n",
+ " def __radd__(self, other): #here\n",
+ " return self + other\n",
+ "\n",
+ " def __mul__(self, other):\n",
+ " other = other if isinstance(other, Value) else Value(other)\n",
+ " out = Value(self.data * other.data, (self, other), '*')\n",
+ "\n",
+ " def backward():\n",
+ " self.grad += other.data * out.grad\n",
+ " other.grad += self.data * out.grad\n",
+ " out._backward = backward\n",
+ " return out\n",
+ "\n",
+ " def __rmul__(self, other): #other * self\n",
+ " return self * other\n",
+ "\n",
+ " def __truediv__(self, other): #self/other\n",
+ " return self * other**-1\n",
+ "\n",
+ " def __neg__(self):\n",
+ " return self * -1\n",
+ "\n",
+ " def __sub__(self, other): #self - other\n",
+ " return self + (-other)\n",
+ "\n",
+ " def __pow__(self, other):\n",
+ " assert isinstance(other, (int, float)), \"only supporting int/float powers for now\"\n",
+ " out = Value(self.data ** other, (self, ), f\"**{other}\")\n",
+ "\n",
+ " def backward():\n",
+ " self.grad += (other * (self.data ** (other - 1))) * out.grad\n",
+ "\n",
+ " out._backward = backward\n",
+ " return out\n",
+ "\n",
+ " def tanh(self):\n",
+ " x = self.data\n",
+ " t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n",
+ " out = Value(t, (self, ), 'tanh')\n",
+ "\n",
+ " def backward():\n",
+ " self.grad += 1 - (t**2) * out.grad\n",
+ "\n",
+ " out._backward = backward\n",
+ " return out\n",
+ "\n",
+ " def exp(self):\n",
+ " x = self.data\n",
+ " out = Value(math.exp(x), (self, ), 'exp') #We merged t and out, into just out\n",
+ "\n",
+ " def backward():\n",
+ " self.grad += out.data * out.grad\n",
+ "\n",
+ " out._backward = backward\n",
+ " return out\n",
+ "\n",
+ " def backward(self):\n",
+ "\n",
+ " topo = []\n",
+ " visited = set()\n",
+ " def build_topo(v):\n",
+ " if v not in visited:\n",
+ " visited.add(v)\n",
+ " for child in v._prev:\n",
+ " build_topo(child)\n",
+ " topo.append(v)\n",
+ "\n",
+ " build_topo(self)\n",
+ "\n",
+ " self.grad = 1.0\n",
+ " for node in reversed(topo):\n",
+ " node._backward()"
+ ],
+ "metadata": {
+ "id": "tA0zbyEwFbD5"
+ },
+ "execution_count": 20,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "---------------"
+ ],
+ "metadata": {
+ "id": "m9hy05zbxhLP"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import random"
+ ],
+ "metadata": {
+ "id": "gu3tnJu1Wti5"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "class Neuron:\n",
+ "\tdef __init__(self, nin):\n",
+ "\t\tself.w = [ Value(random.uniform(-1,1)) for _ in range(nin) ]\n",
+ "\t\tself.b = Value(random.uniform(-1,1))\n",
+ "\n",
+ "\tdef __call__(self, x):\n",
+ "\t\t# (w*x)+b\n",
+ "\t\tact = sum( (wi*xi for wi,xi in zip(self.w, x)), self.b )\n",
+ "\t\tout = act.tanh()\n",
+ "\t\treturn out\n",
+ "\n",
+ "class Layer:\n",
+ "\tdef __init__(self, nin, nout):\n",
+ "\t\tself.neurons = [Neuron(nin) for _ in range(nout)]\n",
+ "\n",
+ "\tdef __call__(self, x):\n",
+ "\t\touts = [n(x) for n in self.neurons]\n",
+ "\t\treturn outs[0] if len(outs)==1 else outs #The New added line for making the output better\n",
+ "\n",
+ "class MLP:\n",
+ "\tdef __init__(self, nin, nouts):\n",
+ "\t\tsz = [nin] + nouts\n",
+ "\t\tself.layers = [ Layer(sz[i], sz[i+1]) for i in range(len(nouts)) ]\n",
+ "\n",
+ "\tdef __call__(self, x):\n",
+ "\t\tfor layer in self.layers:\n",
+ "\t\t\tx = layer(x)\n",
+ "\t\treturn x"
+ ],
+ "metadata": {
+ "id": "aCXXYNg_W680"
+ },
+ "execution_count": 21,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "x = [2.0, 3.0, -1.0]\n",
+ "n = MLP(3, [4, 4, 1])\n",
+ "n(x)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "aG9pKV_RXsO8",
+ "outputId": "e6f183b9-896b-458f-9322-e91bc79e9da2",
+ "collapsed": true
+ },
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Value(data=-0.33393070997191954)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 23
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "-----------"
+ ],
+ "metadata": {
+ "id": "6DemdSsv_abu"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now, we are creating a very simple dataset. Where we feed a list of values which we would like the NN to take as input.\\\n",
+ "\\\n",
+ "Along with the list of desired output targets. \\\n",
+ "So whenever we give the values xs into the NN, we want the output values to be those in ys respectively. \\\n",
+ "\n",
+ " \n",
+ "\n",
+ "It's almost like a simple Binary Classification. It needs to be either 1.0 or -1.0 in our example."
+ ],
+ "metadata": {
+ "id": "0VKhjeOJ_tBX"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "xs = [\n",
+ " [2.0, 3.0, -1.0],\n",
+ " [3.0, -1.0, 0.5],\n",
+ " [0.5, 1.0, 1.0],\n",
+ " [1.0, 1.0, -1.0]\n",
+ "]\n",
+ "\n",
+ "ys = [1.0, -1.0, -1.0, 1.0] #desired targets"
+ ],
+ "metadata": {
+ "id": "r4RHL9XJ_bR1"
+ },
+ "execution_count": 24,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now, ys is what WE WANT. \\\n",
+ "\\\n",
+ "Lets see what our NN thinks of those inputs"
+ ],
+ "metadata": {
+ "id": "FrOXcQ1dAqm-"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "ypred = [n(x) for x in xs]\n",
+ "ypred"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "_cmYUJJfAv-K",
+ "outputId": "a398d163-6d57-4246-b88f-ada6868eba14"
+ },
+ "execution_count": 25,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[Value(data=-0.33393070997191954),\n",
+ " Value(data=-0.7996605801165794),\n",
+ " Value(data=-0.053910703703307694),\n",
+ " Value(data=-0.5691658715750736)]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 25
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "-------"
+ ],
+ "metadata": {
+ "id": "2I42qlkcByTl"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So we can see how the outputs we have received are different from what we need \\\n",
+ "\\\n",
+ "first and fourth one needs to be increased slighly to reach our desired value. \\\n",
+ "\\\n",
+ "And the second, third one needs to be decreased."
+ ],
+ "metadata": {
+ "id": "9efbBNZpBzKl"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "-------"
+ ],
+ "metadata": {
+ "id": "fesk81HFDO12"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So now how do train/tune the NN or how do tune the weights in order to get our desired output.\n",
+ "\n",
+ " \n",
+ "\n",
+ "The trick in NN is to calculate a single value that contains the sum of the total performance of your NN. **And that will be the loss value.**\n"
+ ],
+ "metadata": {
+ "id": "EMRMR3XjCMH9"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "----------"
+ ],
+ "metadata": {
+ "id": "904P0NpRDQCG"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So this loss, will give us an intuition on how well the NN is performing. \\\n",
+ "\\\n",
+ "Right now in our example, it is not performing very well, as the values are way off. **Therefore the loss will be high and we need to look to minimize the loss.**"
+ ],
+ "metadata": {
+ "id": "867tWreLC7Y_"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "-------"
+ ],
+ "metadata": {
+ "id": "O1xe4_bmDT-2"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So in this particular situation/example, we are going to do/**calculate the Mean Squared Error Loss.**"
+ ],
+ "metadata": {
+ "id": "_VojDymsDUfG"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So first we take the 'y ground truth (ygt)' and then 'y output (yout)'. We will be pairing them together in the loop"
+ ],
+ "metadata": {
+ "id": "7B7UTG8aDp8x"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "for ygt, yout in zip(ys, ypred)"
+ ],
+ "metadata": {
+ "id": "moXVkKr-BGqI"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "And then we will be subtracting each of those values and then squaring them"
+ ],
+ "metadata": {
+ "id": "UvoyS8BCEUjm"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now that will give us the loss for each of those individual values"
+ ],
+ "metadata": {
+ "id": "eBWM11c8EXAP"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "[(yout - ygt)**2 for ygt, yout in zip(ys, ypred)]"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "AnpXU6bJEYfO",
+ "outputId": "0f3c339d-ca49-4e2d-e6dc-9bc8237b2545"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[Value(data=0.2551293266642757),\n",
+ " Value(data=1.5689343597801393),\n",
+ " Value(data=1.942823557477381),\n",
+ " Value(data=0.2264329556998548)]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So depending on how off your pred value is from the expected one, the higher th value will be \\\n",
+ "\\\n",
+ "That expression is written in such a way that, only when the yout and ygt are close to each other, it will become 0. Therefore no loss in that case \\\n",
+ "\\\n",
+ "So the aim is to make the loss as small as possible"
+ ],
+ "metadata": {
+ "id": "Mgvz23RuFHpj"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now, as mentioned before. **The final loss will just be the sum of all those numbers.**"
+ ],
+ "metadata": {
+ "id": "PeERIbSWFcvl"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))\n",
+ "loss"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NNfDMAtYE0yH",
+ "outputId": "c933dfc4-e652-411b-c3c5-f6eb0f3cdb02"
+ },
+ "execution_count": 26,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Value(data=5.176873511248545)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 26
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "*Note: There was an error here where int could not be added to a Value. So I have made the modification in the Value object to handle that :)*"
+ ],
+ "metadata": {
+ "id": "GNcROiuRHaf6"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "loss.backward()"
+ ],
+ "metadata": {
+ "id": "f7idyOIkHskE"
+ },
+ "execution_count": 27,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Something cool really happended when you ran that last cell"
+ ],
+ "metadata": {
+ "id": "9FInP559Hxis"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "n.layers[0].neurons[0].w[0]"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "YkgbHFOhHudX",
+ "outputId": "2f069084-b36d-4db5-cb60-3b0b99190795"
+ },
+ "execution_count": 28,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Value(data=-0.22585628440403194)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 28
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "n.layers[0].neurons[0].w[0].grad"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "67yRXhpfH7Q8",
+ "outputId": "0b1dbda2-4352-4dd8-fab8-4eccd83a7a56"
+ },
+ "execution_count": 29,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "3.080379043409595"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 29
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So now we have extracted the value and its grad value for one particular neuron!"
+ ],
+ "metadata": {
+ "id": "scA8Gt25INvq"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "-------"
+ ],
+ "metadata": {
+ "id": "olS1rGzhIT_4"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Lets look at the graph of the loss. That will give us a bigger graph, as if you would notice in the mean squared error expression that we wrote, we are passing each of the neuron to it."
+ ],
+ "metadata": {
+ "id": "Add_rhPMIUv5"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "draw_dot(loss)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "yrSJVJzmH-EV",
+ "outputId": "b9e13986-3d9f-437c-d8aa-6d0902e275c2"
+ },
+ "execution_count": 30,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "image/svg+xml": "\n\n\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 30
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "PHEEWWWW THAT WAS AWESOME LMAO"
+ ],
+ "metadata": {
+ "id": "OFEugNaYJCHN"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We have like 4 different foward pass and finally calculating the loss \\\n",
+ "\\\n",
+ "Not only that, we have also backpropagated throughout the entire graph!"
+ ],
+ "metadata": {
+ "id": "TNW8N6UnJNQ-"
+ }
+ }
+ ]
+}
\ No newline at end of file