{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "import math" ], "metadata": { "id": "JlYxBvFK0AjA" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "class Value:\n", "\n", " def __init__(self, data, _children=(), _op='', label=''):\n", " self.data = data\n", " self.grad = 0.0\n", " self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.\n", " self._prev = set(_children)\n", " self._op = _op\n", " self.label = label\n", "\n", "\n", " def __repr__(self):\n", " return f\"Value(data={self.data})\"\n", "\n", " def __add__(self, other):\n", " other = other if isinstance(other, Value) else Value(other)\n", " out = Value(self.data + other.data, (self, other), '+')\n", "\n", " def backward():\n", " self.grad += 1.0 * out.grad\n", " other.grad += 1.0 * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def __mul__(self, other):\n", " other = other if isinstance(other, Value) else Value(other)\n", " out = Value(self.data * other.data, (self, other), '*')\n", "\n", " def backward():\n", " self.grad += other.data * out.grad\n", " other.grad += self.data * out.grad\n", " out._backward = backward\n", " return out\n", "\n", " def __rmul__(self, other): #other * self\n", " return self * other\n", "\n", " def __truediv__(self, other): #self/other\n", " return self * other**-1\n", "\n", " def __neg__(self):\n", " return self * -1\n", "\n", " def __sub__(self, other): #self - other\n", " return self + (-other)\n", "\n", " def __pow__(self, other):\n", " assert isinstance(other, (int, float)), \"only supporting int/float powers for now\"\n", " out = Value(self.data ** other, (self, ), f\"**{other}\")\n", "\n", " def backward():\n", " self.grad += (other * (self.data ** (other - 1))) * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def tanh(self):\n", " x = self.data\n", " t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n", " out = Value(t, (self, ), 'tanh')\n", "\n", " def backward():\n", " self.grad += 1 - (t**2) * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def exp(self):\n", " x = self.data\n", " out = Value(math.exp(x), (self, ), 'exp') #We merged t and out, into just out\n", "\n", " def backward():\n", " self.grad += out.data * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def backward(self):\n", "\n", " topo = []\n", " visited = set()\n", " def build_topo(v):\n", " if v not in visited:\n", " visited.add(v)\n", " for child in v._prev:\n", " build_topo(child)\n", " topo.append(v)\n", "\n", " build_topo(self)\n", "\n", " self.grad = 1.0\n", " for node in reversed(topo):\n", " node._backward()" ], "metadata": { "id": "tA0zbyEwFbD5" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "x1 = Value(2.0, label='x1')\n", "x2 = Value(0.0, label='x2')\n", "\n", "w1 = Value(-3.0, label='w1')\n", "w2 = Value(1.0, label='w2')\n", "\n", "b = Value(6.8813735870195432, label='b')\n", "\n", "x1w1 = x1*w1; x1w1.label = 'x1*w1'\n", "x2w2 = x2*w2; x2w2.label = 'x2*w2'\n", "\n", "x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'\n", "\n", "n = x1w1x2w2 + b; n.label = 'n'\n", "\n", "#o = n.tanh(); o.label = 'o'\n", "\n", "#Spliting up of the tanh function\n", "\n", "e = (2*n).exp()\n", "o = (e - 1) / (e + 1)\n", "o.label = 'o'" ], "metadata": { "id": "Ounbj2XwHSZ1" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "--------------" ], "metadata": { "id": "mBTYZa8wzbi0" } }, { "cell_type": "markdown", "source": [ "### **Now we'll how the same thing can be written in PyTorch syntax**" ], "metadata": { "id": "QY-LvbPlzcwK" } }, { "cell_type": "code", "source": [ "import torch" ], "metadata": { "id": "1dfm4XaizroF" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "x1 = torch.Tensor([2.0]).double() ; x1.requires_grad = True\n", "x2 = torch.Tensor([0.0]).double() ; x2.requires_grad = True\n", "w1 = torch.Tensor([-3.0]).double() ; w1.requires_grad = True\n", "w2 = torch.Tensor([1.0]).double() ; w2.requires_grad = True\n", "b = torch.Tensor([6.8813735870195432]).double() ; b.requires_grad = True\n", "\n", "n = x1*w1 + x2*w2 + b\n", "o = torch.tanh(n)\n", "\n", "print(o.data.item())\n", "o.backward()\n", "\n", "print('x2', x2.grad.item())\n", "print('w2', w2.grad.item())\n", "print('x1', x1.grad.item())\n", "print('w1', w1.grad.item())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CoMLfm5lzkEh", "outputId": "bddcf726-6830-4a2e-ea9a-bec32266dc8a" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.7071066904050358\n", "x2 0.5000001283844369\n", "w2 0.0\n", "x1 -1.5000003851533106\n", "w1 1.0000002567688737\n" ] } ] }, { "cell_type": "markdown", "source": [ "-------------" ], "metadata": { "id": "FvpWP3RjzxD1" } }, { "cell_type": "markdown", "source": [ "Some PyTorch syntax review" ], "metadata": { "id": "_WqfN608zx9O" } }, { "cell_type": "code", "source": [ "torch.Tensor([[1,2,3], [4,5,6]])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JWLvfH_Jztpj", "outputId": "c9209290-c43c-4cc8-c3b7-36ba90b6fddc" }, "execution_count": 5, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "tensor([[1., 2., 3.],\n", " [4., 5., 6.]])" ] }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "code", "source": [ "torch.Tensor([[1,2,3], [4,5,6]]).shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "4lG9Zz5fz1zu", "outputId": "14f98a9e-1cd1-4cf8-d429-db387ec3fc5e" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "torch.Size([2, 3])" ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "code", "source": [ "torch.Tensor([2.0]).dtype" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gcaF9qLpz3Ah", "outputId": "633bccd0-c61f-4f17-b81d-3f88e39574a3" }, "execution_count": 7, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "torch.float32" ] }, "metadata": {}, "execution_count": 7 } ] }, { "cell_type": "code", "source": [ "torch.Tensor([2.0]).double().dtype" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JCeS4262z5Ks", "outputId": "f7fcb909-6e7f-46ab-fb4f-ff1cee88a96d" }, "execution_count": 8, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "torch.float64" ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "code", "source": [ "x2.data" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "F8HuYRzXz7H9", "outputId": "284a5801-c7b1-4dbb-e950-545b161e190d" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "tensor([0.], dtype=torch.float64)" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "x2.data.item()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9EVQmyZfz-GQ", "outputId": "31f2d9c7-2fc2-4f2d-8590-c13d399a343c" }, "execution_count": 10, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "markdown", "source": [ "------------" ], "metadata": { "id": "pSGfEq290B4q" } } ] }