{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#
CS568:Deep Learning
Spring 2020
" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": {}, "colab_type": "code", "id": "SWQhFCwgkj9D" }, "outputs": [], "source": [ "import torch" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "TPDA5D6lkXpm" }, "source": [ "The Pytorch tensors can be copied from CPU to GPU. " ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": {}, "colab_type": "code", "id": "ZuSSV-0jkenS" }, "outputs": [ { "ename": "AssertionError", "evalue": "Torch not compiled with CUDA enabled", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# copy x to GPU\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0;31m# copy x back to CPU\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_lazy_init\u001b[0;34m()\u001b[0m\n\u001b[1;32m 176\u001b[0m raise RuntimeError(\n\u001b[1;32m 177\u001b[0m \"Cannot re-initialize CUDA in forked subprocess. \" + msg)\n\u001b[0;32m--> 178\u001b[0;31m \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 179\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_init\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0m_cudart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_load_cudart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_check_driver\u001b[0;34m()\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_cuda_isDriverSufficient'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Torch not compiled with CUDA enabled\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 93\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_isDriverSufficient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_getDriverVersion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAssertionError\u001b[0m: Torch not compiled with CUDA enabled" ] } ], "source": [ "# set seed for repeatability\n", "torch.manual_seed(22)\n", "# create a pytorch tensor\n", "x = torch.randn(2,2)\n", "# copy x to GPU\n", "y = x.cuda()\n", "# copy x back to CPU\n", "z = y.cpu()\n", "\n", "try:\n", " print(y.numpy())\n", "except RuntimeError as e:\n", " print(\"Error: \", e)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "hgiNBBH8mmwH" }, "source": [ "To perform any operation, both tensors should be on same device.\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": {}, "colab_type": "code", "id": "W0shLbd3lNlw" }, "outputs": [ { "ename": "AssertionError", "evalue": "Torch not compiled with CUDA enabled", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# b tensor on GPU\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_lazy_init\u001b[0;34m()\u001b[0m\n\u001b[1;32m 176\u001b[0m raise RuntimeError(\n\u001b[1;32m 177\u001b[0m \"Cannot re-initialize CUDA in forked subprocess. \" + msg)\n\u001b[0;32m--> 178\u001b[0;31m \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 179\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_init\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0m_cudart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_load_cudart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_check_driver\u001b[0;34m()\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_cuda_isDriverSufficient'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Torch not compiled with CUDA enabled\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 93\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_isDriverSufficient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 94\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_getDriverVersion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAssertionError\u001b[0m: Torch not compiled with CUDA enabled" ] } ], "source": [ "# a tensor on CPU\n", "a = torch.rand(2,3)\n", "# b tensor on GPU\n", "b = torch.rand(3,2).cuda() \n", "try:\n", " result = torch.mm(a,b) \n", " print(result)\n", "except TypeError as e:\n", " print(e)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "RUX545NvnAXp" }, "source": [ "Check if GPU is available then move tensors to GPU." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "colab_type": "code", "id": "SJSff6Dzk-0r", "outputId": "4ac95881-9269-4c3c-cdef-6a2833ea1169" }, "outputs": [], "source": [ "a = torch.rand(2,2)\n", "if torch.cuda.is_available():\n", " a = a.cuda()\n", " print(a)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "lFXV6odGoA6M" }, "source": [ "GPU vs CPU " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 51 }, "colab_type": "code", "id": "cqzgkV36ozp4", "outputId": "f2d29f0f-d8ea-49ef-b33f-fba53108eede" }, "outputs": [], "source": [ "from timeit import timeit \n", "\n", "def matmul():\n", " res = torch.mm(a, b) \n", "\n", "# create two random tensors \n", "a = torch.rand(1000, 1280)\n", "b = torch.rand(1280, 1)\n", "ite = 1000\n", "\n", "# Time CPU takes for matrix multiplication\n", "print('CPU: {} seconds'.format(timeit(matmul, number=ite)))\n", "# Time GPU takes for matrix multiplication\n", "a, b = a.cuda(), b.cuda()\n", "print('GPU: {} seconds'.format(timeit(matmul, number=ite)))" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "o0wjomIMqupQ" }, "source": [ "Pytorch tensors provide automatic differentiation. If you set requires_grad=True pytorch will compute the gradient of this tensor. " ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 319 }, "colab_type": "code", "id": "fr5OaqhYr1-l", "outputId": "7c1374d7-cd5a-48e3-a910-3daed3747e8b" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x torch.int64\n", "y torch.float32\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " \n", "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:4: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " after removing the cwd from sys.path.\n" ] } ], "source": [ "# construct a differentiable tensor\n", "x = torch.tensor(torch.arange(1,5), requires_grad=False)\n", "print(\"x\", x.dtype)\n", "y = torch.tensor(torch.arange(1,5).float(), requires_grad=True)\n", "print(\"y\", y.dtype)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "KL-ZdHvJuIJ5" }, "source": [ "The tensor y depends on x and x has discrete values. Therefore, we cannot compute gradient of tensor y. " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": {}, "colab_type": "code", "id": "lUv3cMwrsnGR" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x torch.int64\n", "x tensor([1, 2, 3, 4])\n", "y tensor([ 1, 4, 9, 16])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] }, { "ename": "RuntimeError", "evalue": "element 0 of tensors does not require grad and does not have a grad_fn", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"y\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;31m# Calculate gradient (dy/dx=2x)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;31m# Print values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 91\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 92\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn" ] } ], "source": [ "x = torch.tensor(torch.arange(1,5), requires_grad=False)\n", "print(\"x\", x.dtype)\n", "# compute square of x\n", "y = x**2\n", "print(\"x\",x)\n", "print(\"y\",y)\n", "# Calculate gradient (dy/dx=2x)\n", "y.sum().backward()\n", "# Print values\n", "print(x.grad)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 139 }, "colab_type": "code", "id": "TUreojDyupNO", "outputId": "a5e87a91-917f-4842-ab45-ac13556073ce" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x torch.float32\n", "x tensor([1., 2., 3., 4.], requires_grad=True)\n", "y tensor([ 1., 4., 9., 16.], grad_fn=)\n", "tensor([2., 4., 6., 8.])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "x = torch.tensor(torch.arange(1,5).float(), requires_grad=True)\n", "print(\"x\", x.dtype)\n", "# compute square of x\n", "y = x**2\n", "print(\"x\",x)\n", "print(\"y\",y)\n", "# Calculate gradient (dy/dx=2x)\n", "y.sum().backward()\n", "# Print values\n", "print(x.grad)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "8HqqB9l0vE_i" }, "source": [ "#### Basic building blocks to make a neural network in Pytorch" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "yZ-lRq8OwKlz" }, "source": [ "**Define model step**\n", "Construct a network using torch.nn module" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "colab_type": "code", "id": "nQ2Ltqm3vDg5", "outputId": "595c3c3b-89ad-410d-9b85-96ea482ac248" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Linear(in_features=3, out_features=2, bias=True)\n" ] } ], "source": [ "net = torch.nn.Linear(3,2)\n", "print(net)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "12tazin_wZXL" }, "source": [ "forward() function" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 88 }, "colab_type": "code", "id": "M1YGEcoSwX-c", "outputId": "86ef77d4-d3c5-4a56-caac-61699a6039b9" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([ 0.9696, -1.4331], grad_fn=)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "x = torch.tensor(torch.arange(0, 3).float(), requires_grad=True)\n", "y = net.forward(x) # or net(x)\n", "print(y)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "nFitLGpvxwsM" }, "source": [ "parameters() function" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "colab_type": "code", "id": "1ShuDsaEv01v", "outputId": "8307d2b7-d65c-451b-d330-08554fa3db29" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parameter containing:\n", "tensor([[-0.2149, 0.3190, 0.2837],\n", " [ 0.0011, -0.1010, -0.3801]], requires_grad=True)\n", "Parameter containing:\n", "tensor([ 0.0832, -0.5718], requires_grad=True)\n" ] } ], "source": [ "for param in net.parameters():\n", " print(param)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "svXfQAw64GQv" }, "source": [ "Create a model by constructing a class MyNetwork. This class will inherit from the nn.Module class of Pytorch." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 102 }, "colab_type": "code", "id": "gm-XqAIw2ABP", "outputId": "4647f8fa-8e0e-4035-8b0c-d3948a058f1f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MyNetwork(\n", " (layer1): Linear(in_features=5, out_features=50, bias=True)\n", " (layer2): Sigmoid()\n", " (layer3): Linear(in_features=50, out_features=10, bias=True)\n", ")\n" ] } ], "source": [ "class MyNetwork(torch.nn.Module): \n", " def __init__(self, input_dim, hidden_dim, output_dim):\n", " super().__init__()\n", " \"\"\"\n", " In this constructor we instantiate two nn.Linear modules and assign them as\n", " member variables.\n", "\n", " input_dim: input dimension\n", " hidden_dim: dimension of hidden layer\n", " output_dim: output dimension\n", " \"\"\"\n", " self.layer1 = torch.nn.Linear(input_dim, hidden_dim)\n", " self.layer2 = torch.nn.Sigmoid()\n", " self.layer3 = torch.nn.Linear(hidden_dim , output_dim)\n", " \n", "\n", " def forward(self, x):\n", " \"\"\"\n", " In this function we accept a Variable of input data and we must \n", " return a Variable of output data. \n", " \"\"\"\n", " x = self.layer1(x)\n", " x = self.layer2(x)\n", " x = self.layer3(x)\n", " return x\n", "\n", "net = MyNetwork(5, 50, 10)\n", "print(net)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "initialize child1 class\n", "initialize parent class\n", "initialize child2 class\n" ] } ], "source": [ "class parent:\n", " ''' Creates a parent class '''\n", " def __init__(self):\n", " print(\"initialize parent class\")\n", "\n", "class child1(parent):\n", " '''Inherits from parent, but does not run parent's init method '''\n", " def __init__(self):\n", " print(\"initialize child1 class\")\n", "\n", "class child2(parent):\n", " '''Inhereits from parent, but DOES run parents's init method'''\n", " def __init__(self):\n", " super().__init__()\n", " print(\"initialize child2 class\")\n", "\n", "c1 = child1()\n", "c2 = child2()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "colab": {}, "colab_type": "code", "id": "FmXYtubI2sMg" }, "outputs": [ { "ename": "NameError", "evalue": "name 'nn' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mReLU\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m nn.Linear(hidden_size, output_size))\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mnet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mneural_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36mneural_net\u001b[0;34m(input_size, hidden_size, output_size)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mneural_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m return torch.nn.Sequential(nn.Linear(input_size, hidden_size),\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mReLU\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m nn.Linear(hidden_size, output_size))\n\u001b[1;32m 5\u001b[0m \u001b[0mnet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mneural_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'nn' is not defined" ] } ], "source": [ "def neural_net(input_size, hidden_size, output_size):\n", " return torch.nn.Sequential(nn.Linear(input_size, hidden_size),\n", " nn.ReLU(),\n", " nn.Linear(hidden_size, output_size))\n", "net = neural_net(5, 50, 10)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "JHSlaUs64b1q" }, "source": [ "Now define loss function " ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 170 }, "colab_type": "code", "id": "TzpafTNA4djh", "outputId": "5c2e6b9f-dda4-4678-804b-1b6f3ecec12c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor(2.0477, grad_fn=)\n", "param Parameter containing:\n", "tensor([[ 0.4638, 0.3946, 0.0053],\n", " [-0.4698, 0.2770, -0.3739]], requires_grad=True)\n", "param Parameter containing:\n", "tensor([ 0.4330, -0.4905], requires_grad=True)\n", "param gradients tensor([[ 0.5496, 0.1314, -0.2081],\n", " [-0.9448, 0.7071, -0.7055]])\n", "param gradients tensor([ 0.5799, -0.4996])\n" ] } ], "source": [ "x = torch.randn(10, 3)\n", "y = torch.randn(10, 2)\n", "\n", "net = torch.nn.Linear(3, 2)\n", "criterion = torch.nn.MSELoss()\n", "optimizer = torch.optim.SGD(net.parameters(), lr=0.01)\n", "\n", "output = net(x)\n", "loss = criterion(output,y)\n", "print(loss)\n", "loss.backward()\n", "\n", "for param in net.parameters():\n", " print(\"param \", param)\n", "\n", "for param in net.parameters():\n", " print(\"param gradients\", param.grad)" ] }, { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "hgdbBMWC7aFz" }, "source": [ "Pyotrch accumulate gradients" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 136 }, "colab_type": "code", "id": "YMdRiUMZ6wJi", "outputId": "c6dfd096-569a-49d6-a7a4-6df49db256c6" }, "outputs": [], "source": [ "x = torch.randn(10, 3)\n", "y = torch.randn(10, 2)\n", "\n", "net = torch.nn.Linear(3, 2)\n", "criterion = torch.nn.MSELoss()\n", "\n", "output = net(x)\n", "loss = criterion(output,y)\n", "print(loss)\n", "loss.backward()\n", "for param in net.parameters():\n", " print(\"parameters gradients\", param.grad)\n", "\n", "output = net(x)\n", "loss = criterion(output,y)\n", "loss.backward()\n", "for param in net.parameters():\n", " print(\"parameters gradients\", param.grad)\n", "\n", "net.zero_grad()\n", "output = net(x)\n", "loss = criterion(output,y)\n", "loss.backward()\n", "for param in net.parameters():\n", " print(\"parameters gradients\", param.grad)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 272 }, "colab_type": "code", "id": "UO8jM6kB8Ryo", "outputId": "6fa6ff3a-caf7-4553-cc40-75417e72aec5" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "parameters gradients tensor([[ 0.3660, 0.0393, -0.3672],\n", " [-0.4298, 0.5568, 0.1277]])\n", "parameters gradients tensor([ 0.6440, -0.7389])\n", "Parameters before gradient descent :\n", "Parameter containing:\n", "tensor([[ 0.3447, -0.1549, -0.1806],\n", " [-0.3115, 0.1928, 0.2706]], requires_grad=True)\n", "Parameter containing:\n", "tensor([ 0.0963, -0.5436], requires_grad=True)\n", "Parameters after gradient descent :\n", "Parameter containing:\n", "tensor([[ 0.3410, -0.1553, -0.1769],\n", " [-0.3072, 0.1872, 0.2693]], requires_grad=True)\n", "Parameter containing:\n", "tensor([ 0.0899, -0.5362], requires_grad=True)\n" ] } ], "source": [ "x = torch.randn(10, 3)\n", "y = torch.randn(10, 2)\n", "\n", "net = torch.nn.Linear(3, 2)\n", "criterion = torch.nn.MSELoss()\n", "optimizer = torch.optim.SGD(net.parameters(), lr=0.01)\n", "\n", "net.zero_grad()\n", "output = net(x)\n", "loss = criterion(output,y)\n", "loss.backward()\n", "for param in net.parameters():\n", " print(\"parameters gradients\", param.grad)\n", "\n", "print(\"Parameters before gradient descent :\")\n", "for param in net.parameters():\n", " print(param)\n", "\n", "optimizer.step()\n", "\n", "print(\"Parameters after gradient descent :\")\n", "for param in net.parameters():\n", " print(param)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "colab_type": "code", "id": "8w0Eq1Gr8JHs", "outputId": "91b717b6-5193-410f-e552-a6152a20bf3e" }, "outputs": [], "source": [ "iterations = 10\n", "for i in range(iterations):\n", " optimizer.zero_grad() \n", " output = net(x)\n", " loss = criterion(output,y)\n", " loss.backward()\n", " optimizer.step()\n", " print(loss)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "name": "Pytorch_tutorial_2", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 1 }