diff --git "a/7_0_backward_func_each_operation.ipynb" "b/7_0_backward_func_each_operation.ipynb"
new file mode 100644--- /dev/null
+++ "b/7_0_backward_func_each_operation.ipynb"
@@ -0,0 +1,521 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "source": [
+ "from graphviz import Digraph\n",
+ "\n",
+ "def trace(root):\n",
+ " #Builds a set of all nodes and edges in a graph\n",
+ " nodes, edges = set(), set()\n",
+ " def build(v):\n",
+ " if v not in nodes:\n",
+ " nodes.add(v)\n",
+ " for child in v._prev:\n",
+ " edges.add((child, v))\n",
+ " build(child)\n",
+ " build(root)\n",
+ " return nodes, edges\n",
+ "\n",
+ "def draw_dot(root):\n",
+ " dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n",
+ "\n",
+ " nodes, edges = trace(root)\n",
+ " for n in nodes:\n",
+ " uid = str(id(n))\n",
+ " #For any value in the graph, create a rectangular ('record') node for it\n",
+ " dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n",
+ " if n._op:\n",
+ " #If this value is a result of some operation, then create an op node for it\n",
+ " dot.node(name = uid + n._op, label=n._op)\n",
+ " #and connect this node to it\n",
+ " dot.edge(uid + n._op, uid)\n",
+ "\n",
+ " for n1, n2 in edges:\n",
+ " #Connect n1 to the node of n2\n",
+ " dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n",
+ "\n",
+ " return dot"
+ ],
+ "metadata": {
+ "id": "T0rN8d146jvF"
+ },
+ "execution_count": 1,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import math"
+ ],
+ "metadata": {
+ "id": "JlYxBvFK0AjA"
+ },
+ "execution_count": 2,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "class Value:\n",
+ "\n",
+ " def __init__(self, data, _children=(), _op='', label=''):\n",
+ " self.data = data\n",
+ " self.grad = 0.0\n",
+ " self._prev = set(_children)\n",
+ " self._op = _op\n",
+ " self.label = label\n",
+ "\n",
+ "\n",
+ " def __repr__(self): # This basically allows us to print nicer looking expressions for the final output\n",
+ " return f\"Value(data={self.data})\"\n",
+ "\n",
+ " def __add__(self, other):\n",
+ " out = Value(self.data + other.data, (self, other), '+')\n",
+ "\n",
+ " return out\n",
+ "\n",
+ " def __mul__(self, other):\n",
+ " out = Value(self.data * other.data, (self, other), '*')\n",
+ "\n",
+ " return out\n",
+ "\n",
+ " def tanh(self):\n",
+ " x = self.data\n",
+ " t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n",
+ " out = Value(t, (self, ), 'tanh')\n",
+ "\n",
+ " return out"
+ ],
+ "metadata": {
+ "id": "iMW-UQKm1Con"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#What the graph looks like right now, the grad values are set to 0\n",
+ "draw_dot(o)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 322
+ },
+ "id": "j8zlrUnLz8F4",
+ "outputId": "9ea436d3-3701-4bb8-9fad-7dd9e14cbbe9"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "image/svg+xml": "\n\n\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "----------"
+ ],
+ "metadata": {
+ "id": "2uE_QgHkyovt"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We'll be adding a 'backward' function to our Value object and then implement its functionality in each of the operation function. \\\n",
+ "\\\n",
+ "We are basically converting everything we did manually to calculate the gradients in each operation to code :)"
+ ],
+ "metadata": {
+ "id": "o2ob02X8ypoE"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "class Value:\n",
+ "\n",
+ " def __init__(self, data, _children=(), _op='', label=''):\n",
+ " self.data = data\n",
+ " self.grad = 0.0\n",
+ " self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.\n",
+ " self._prev = set(_children)\n",
+ " self._op = _op\n",
+ " self.label = label\n",
+ "\n",
+ "\n",
+ " def __repr__(self): # This basically allows us to print nicer looking expressions for the final output\n",
+ " return f\"Value(data={self.data})\"\n",
+ "\n",
+ " def __add__(self, other):\n",
+ " out = Value(self.data + other.data, (self, other), '+')\n",
+ "\n",
+ " def backward():\n",
+ " self.grad = 1.0 * out.grad #Remember we are doing chain rule here, hence the product with out.grad\n",
+ " other.grad = 1.0 * out.grad\n",
+ "\n",
+ " out._backward = backward\n",
+ " return out\n",
+ "\n",
+ " def __mul__(self, other):\n",
+ " out = Value(self.data * other.data, (self, other), '*')\n",
+ "\n",
+ " def backward():\n",
+ " self.grad = other.data * out.grad #Remember we are doing chain rule here, hence the product with out.grad\n",
+ " other.grad = self.data * out.grad\n",
+ "\n",
+ " out._backward = backward\n",
+ " return out\n",
+ "\n",
+ " def tanh(self):\n",
+ " x = self.data\n",
+ " t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n",
+ " out = Value(t, (self, ), 'tanh')\n",
+ "\n",
+ " def backward():\n",
+ " self.grad = 1 - (t**2) * out.grad #Remember we are doing chain rule here, hence the product with out.grad\n",
+ "\n",
+ " out._backward = backward\n",
+ " return out"
+ ],
+ "metadata": {
+ "id": "4XPxg_t3wl35"
+ },
+ "execution_count": 29,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#Inputs x1, x2 of the neuron\n",
+ "x1 = Value(2.0, label='x1')\n",
+ "x2 = Value(0.0, label='x2')\n",
+ "\n",
+ "#Weights w1, w2 of the neuron - The synaptic values\n",
+ "w1 = Value(-3.0, label='w1')\n",
+ "w2 = Value(1.0, label='w2')\n",
+ "\n",
+ "#The bias of the neuron\n",
+ "b = Value(6.8813735870195432, label='b')\n",
+ "\n",
+ "x1w1 = x1*w1; x1w1.label = 'x1*w1'\n",
+ "x2w2 = x2*w2; x2w2.label = 'x2*w2'\n",
+ "\n",
+ "#The summation\n",
+ "x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'\n",
+ "\n",
+ "#n is basically the cell body, but without the activation function\n",
+ "n = x1w1x2w2 + b; n.label = 'n'\n",
+ "\n",
+ "#Now we pass n to the activation function\n",
+ "o = n.tanh(); o.label = 'o'"
+ ],
+ "metadata": {
+ "id": "S3HaLbW_zvne"
+ },
+ "execution_count": 32,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now, we call the '_backward' function that we have made one by one in order (backwards through the equation/graph) \\\n",
+ "\\\n",
+ "But before we could do that, we have to first set the value of o.grad to 1.0 \\\n",
+ "As notice in the Value object code that, we have initialised it to 0 \\\n",
+ "\\\n",
+ "Therefore, we'll start by adding o.grad to 1.0 and then we'll call the '_backward' function rest of them one by one"
+ ],
+ "metadata": {
+ "id": "F5uk5VVf1S8W"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "o.grad = 1.0"
+ ],
+ "metadata": {
+ "id": "ldcILQ3y1RoO"
+ },
+ "execution_count": 33,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "draw_dot(o)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 322
+ },
+ "id": "2nAmCY0E15Lx",
+ "outputId": "275d6787-e01f-413a-d6a9-a187ac16ece2"
+ },
+ "execution_count": 34,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "image/svg+xml": "\n\n\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 34
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now. we go with o"
+ ],
+ "metadata": {
+ "id": "yAFb_M9Z191o"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "o._backward()"
+ ],
+ "metadata": {
+ "id": "Q0RbjS7_17iq"
+ },
+ "execution_count": 35,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "draw_dot(o)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 322
+ },
+ "id": "o3mJmjwE2AlD",
+ "outputId": "5173f640-b103-41a6-bac1-504a602e9b60"
+ },
+ "execution_count": 36,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "image/svg+xml": "\n\n\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 36
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now, from n"
+ ],
+ "metadata": {
+ "id": "EgCQYtk_2nF0"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "n._backward()"
+ ],
+ "metadata": {
+ "id": "iPvOJyA-2C11"
+ },
+ "execution_count": 37,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "draw_dot(o)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 322
+ },
+ "id": "a56spKYG2skK",
+ "outputId": "03fee540-36b3-498b-cef7-98422aedbb74"
+ },
+ "execution_count": 38,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "image/svg+xml": "\n\n\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 38
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now, b is a leaf node so we just leave it there (Another reason why _backward was initiallised to None function, it's because it won't be called for leaf nodes. Therefore we set it to None for them) \\\n",
+ "\\\n",
+ "We'll continue with x1w1x1w2"
+ ],
+ "metadata": {
+ "id": "ZoUTucmy2yDk"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "x1w1x2w2._backward()"
+ ],
+ "metadata": {
+ "id": "8lo99I7A2vPS"
+ },
+ "execution_count": 39,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "draw_dot(o)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 322
+ },
+ "id": "D_PYbqNS3HHp",
+ "outputId": "83e5ad18-2cd7-4a49-d157-70bea42bbac1"
+ },
+ "execution_count": 40,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "image/svg+xml": "\n\n\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 40
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Now finally, to finding the gradient of the intial values"
+ ],
+ "metadata": {
+ "id": "W3fQZPPZ3Mg_"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "x1w1._backward()\n",
+ "x2w2._backward()"
+ ],
+ "metadata": {
+ "id": "n0OkYLRn3KUS"
+ },
+ "execution_count": 41,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "draw_dot(o)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 322
+ },
+ "id": "uLfNK3633c66",
+ "outputId": "660c35e3-a91b-4bc9-9596-a17cae4998fb"
+ },
+ "execution_count": 42,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "image/svg+xml": "\n\n\n\n\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 42
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "----------------"
+ ],
+ "metadata": {
+ "id": "w1B0CDnz4O_X"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### **Final output^**"
+ ],
+ "metadata": {
+ "id": "p43Gt7B54dDp"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Hence, we have not only verified the manual backpropagation calculation that we did, but also created funtions directly for each of them!"
+ ],
+ "metadata": {
+ "id": "n63PmnLT4QCs"
+ }
+ }
+ ]
+}
\ No newline at end of file