{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <center>CS568:Deep Learning</center>  <center>Spring 2020</center> "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "SWQhFCwgkj9D"
   },
   "outputs": [],
   "source": [
    "import torch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "TPDA5D6lkXpm"
   },
   "source": [
    "The Pytorch tensors can be copied from CPU to GPU. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "ZuSSV-0jkenS"
   },
   "outputs": [
    {
     "ename": "AssertionError",
     "evalue": "Torch not compiled with CUDA enabled",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-3-5eb5b602abfb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m# copy x to GPU\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0;31m# copy x back to CPU\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[0mz\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcpu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_lazy_init\u001b[0;34m()\u001b[0m\n\u001b[1;32m    176\u001b[0m         raise RuntimeError(\n\u001b[1;32m    177\u001b[0m             \"Cannot re-initialize CUDA in forked subprocess. \" + msg)\n\u001b[0;32m--> 178\u001b[0;31m     \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    179\u001b[0m     \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_init\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    180\u001b[0m     \u001b[0m_cudart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_load_cudart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_check_driver\u001b[0;34m()\u001b[0m\n\u001b[1;32m     90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_cuda_isDriverSufficient'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Torch not compiled with CUDA enabled\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     93\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_isDriverSufficient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     94\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_getDriverVersion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAssertionError\u001b[0m: Torch not compiled with CUDA enabled"
     ]
    }
   ],
   "source": [
    "# set seed for repeatability\n",
    "torch.manual_seed(22)\n",
    "# create a pytorch tensor\n",
    "x = torch.randn(2,2)\n",
    "# copy x to GPU\n",
    "y = x.cuda()\n",
    "# copy x back to CPU\n",
    "z = y.cpu()\n",
    "\n",
    "try:\n",
    "    print(y.numpy())\n",
    "except RuntimeError as e:\n",
    "    print(\"Error: \", e)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "hgiNBBH8mmwH"
   },
   "source": [
    "To perform any operation, both tensors should be on same device.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "W0shLbd3lNlw"
   },
   "outputs": [
    {
     "ename": "AssertionError",
     "evalue": "Torch not compiled with CUDA enabled",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-4-712770bf70ec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# b tensor on GPU\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m3\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_lazy_init\u001b[0;34m()\u001b[0m\n\u001b[1;32m    176\u001b[0m         raise RuntimeError(\n\u001b[1;32m    177\u001b[0m             \"Cannot re-initialize CUDA in forked subprocess. \" + msg)\n\u001b[0;32m--> 178\u001b[0;31m     \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    179\u001b[0m     \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_init\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    180\u001b[0m     \u001b[0m_cudart\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_load_cudart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py\u001b[0m in \u001b[0;36m_check_driver\u001b[0;34m()\u001b[0m\n\u001b[1;32m     90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_check_driver\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     91\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'_cuda_isDriverSufficient'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 92\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mAssertionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Torch not compiled with CUDA enabled\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     93\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_isDriverSufficient\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     94\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_C\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_cuda_getDriverVersion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAssertionError\u001b[0m: Torch not compiled with CUDA enabled"
     ]
    }
   ],
   "source": [
    "# a tensor on CPU\n",
    "a = torch.rand(2,3)\n",
    "# b tensor on GPU\n",
    "b = torch.rand(3,2).cuda() \n",
    "try:\n",
    "    result = torch.mm(a,b)  \n",
    "    print(result)\n",
    "except TypeError as e:\n",
    "    print(e)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "RUX545NvnAXp"
   },
   "source": [
    "Check if GPU is available then move tensors to GPU."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "SJSff6Dzk-0r",
    "outputId": "4ac95881-9269-4c3c-cdef-6a2833ea1169"
   },
   "outputs": [],
   "source": [
    "a = torch.rand(2,2)\n",
    "if torch.cuda.is_available():\n",
    "    a = a.cuda()\n",
    "    print(a)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "lFXV6odGoA6M"
   },
   "source": [
    "GPU vs CPU "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 51
    },
    "colab_type": "code",
    "id": "cqzgkV36ozp4",
    "outputId": "f2d29f0f-d8ea-49ef-b33f-fba53108eede"
   },
   "outputs": [],
   "source": [
    "from timeit import timeit \n",
    "\n",
    "def matmul():\n",
    "    res = torch.mm(a, b) \n",
    "\n",
    "# create two random tensors \n",
    "a = torch.rand(1000, 1280)\n",
    "b = torch.rand(1280, 1)\n",
    "ite = 1000\n",
    "\n",
    "# Time CPU takes for matrix multiplication\n",
    "print('CPU: {} seconds'.format(timeit(matmul, number=ite)))\n",
    "# Time GPU takes for matrix multiplication\n",
    "a, b = a.cuda(), b.cuda()\n",
    "print('GPU: {} seconds'.format(timeit(matmul, number=ite)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "o0wjomIMqupQ"
   },
   "source": [
    "Pytorch tensors provide automatic differentiation. If you set requires_grad=True pytorch will compute the gradient of this tensor. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 319
    },
    "colab_type": "code",
    "id": "fr5OaqhYr1-l",
    "outputId": "7c1374d7-cd5a-48e3-a910-3daed3747e8b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x torch.int64\n",
      "y torch.float32\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \n",
      "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:4: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  after removing the cwd from sys.path.\n"
     ]
    }
   ],
   "source": [
    "# construct a differentiable tensor\n",
    "x = torch.tensor(torch.arange(1,5), requires_grad=False)\n",
    "print(\"x\", x.dtype)\n",
    "y = torch.tensor(torch.arange(1,5).float(), requires_grad=True)\n",
    "print(\"y\", y.dtype)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "KL-ZdHvJuIJ5"
   },
   "source": [
    "The tensor y depends on x and x has discrete values. Therefore, we cannot compute gradient of tensor y. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "lUv3cMwrsnGR"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x torch.int64\n",
      "x tensor([1, 2, 3, 4])\n",
      "y tensor([ 1,  4,  9, 16])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "ename": "RuntimeError",
     "evalue": "element 0 of tensors does not require grad and does not have a grad_fn",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-8-a23e8406bfdc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"y\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;31m# Calculate gradient (dy/dx=2x)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      9\u001b[0m \u001b[0;31m# Print values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrad\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    116\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m         \"\"\"\n\u001b[0;32m--> 118\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    120\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     91\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     92\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 93\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m     94\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn"
     ]
    }
   ],
   "source": [
    "x = torch.tensor(torch.arange(1,5), requires_grad=False)\n",
    "print(\"x\", x.dtype)\n",
    "# compute square of x\n",
    "y = x**2\n",
    "print(\"x\",x)\n",
    "print(\"y\",y)\n",
    "# Calculate gradient (dy/dx=2x)\n",
    "y.sum().backward()\n",
    "# Print values\n",
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 139
    },
    "colab_type": "code",
    "id": "TUreojDyupNO",
    "outputId": "a5e87a91-917f-4842-ab45-ac13556073ce"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x torch.float32\n",
      "x tensor([1., 2., 3., 4.], requires_grad=True)\n",
      "y tensor([ 1.,  4.,  9., 16.], grad_fn=<PowBackward0>)\n",
      "tensor([2., 4., 6., 8.])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "x = torch.tensor(torch.arange(1,5).float(), requires_grad=True)\n",
    "print(\"x\", x.dtype)\n",
    "# compute square of x\n",
    "y = x**2\n",
    "print(\"x\",x)\n",
    "print(\"y\",y)\n",
    "# Calculate gradient (dy/dx=2x)\n",
    "y.sum().backward()\n",
    "# Print values\n",
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "8HqqB9l0vE_i"
   },
   "source": [
    "#### Basic building blocks to make a neural network in Pytorch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "yZ-lRq8OwKlz"
   },
   "source": [
    "**Define model step**\n",
    "Construct a network using torch.nn module"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "id": "nQ2Ltqm3vDg5",
    "outputId": "595c3c3b-89ad-410d-9b85-96ea482ac248"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Linear(in_features=3, out_features=2, bias=True)\n"
     ]
    }
   ],
   "source": [
    "net = torch.nn.Linear(3,2)\n",
    "print(net)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "12tazin_wZXL"
   },
   "source": [
    "forward() function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 88
    },
    "colab_type": "code",
    "id": "M1YGEcoSwX-c",
    "outputId": "86ef77d4-d3c5-4a56-caac-61699a6039b9"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([ 0.9696, -1.4331], grad_fn=<AddBackward0>)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/arbish/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    }
   ],
   "source": [
    "x = torch.tensor(torch.arange(0, 3).float(), requires_grad=True)\n",
    "y = net.forward(x) # or net(x)\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "nFitLGpvxwsM"
   },
   "source": [
    "parameters() function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 102
    },
    "colab_type": "code",
    "id": "1ShuDsaEv01v",
    "outputId": "8307d2b7-d65c-451b-d330-08554fa3db29"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameter containing:\n",
      "tensor([[-0.2149,  0.3190,  0.2837],\n",
      "        [ 0.0011, -0.1010, -0.3801]], requires_grad=True)\n",
      "Parameter containing:\n",
      "tensor([ 0.0832, -0.5718], requires_grad=True)\n"
     ]
    }
   ],
   "source": [
    "for param in net.parameters():\n",
    "      print(param)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "svXfQAw64GQv"
   },
   "source": [
    "Create a model by constructing a class MyNetwork. This class will inherit from the nn.Module class of Pytorch."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 102
    },
    "colab_type": "code",
    "id": "gm-XqAIw2ABP",
    "outputId": "4647f8fa-8e0e-4035-8b0c-d3948a058f1f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MyNetwork(\n",
      "  (layer1): Linear(in_features=5, out_features=50, bias=True)\n",
      "  (layer2): Sigmoid()\n",
      "  (layer3): Linear(in_features=50, out_features=10, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "class MyNetwork(torch.nn.Module):    \n",
    "    def __init__(self, input_dim, hidden_dim, output_dim):\n",
    "        super().__init__()\n",
    "        \"\"\"\n",
    "        In this constructor we instantiate two nn.Linear modules and assign them as\n",
    "        member variables.\n",
    "\n",
    "        input_dim: input dimension\n",
    "        hidden_dim: dimension of hidden layer\n",
    "        output_dim: output dimension\n",
    "        \"\"\"\n",
    "        self.layer1 = torch.nn.Linear(input_dim, hidden_dim)\n",
    "        self.layer2 = torch.nn.Sigmoid()\n",
    "        self.layer3 = torch.nn.Linear(hidden_dim , output_dim)\n",
    "        \n",
    "\n",
    "    def forward(self, x):\n",
    "        \"\"\"\n",
    "        In this function we accept a Variable of input data and we must \n",
    "        return a Variable of output data. \n",
    "        \"\"\"\n",
    "        x = self.layer1(x)\n",
    "        x = self.layer2(x)\n",
    "        x = self.layer3(x)\n",
    "        return x\n",
    "\n",
    "net = MyNetwork(5, 50, 10)\n",
    "print(net)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "initialize child1 class\n",
      "initialize parent class\n",
      "initialize child2 class\n"
     ]
    }
   ],
   "source": [
    "class parent:\n",
    "    ''' Creates a parent class '''\n",
    "    def __init__(self):\n",
    "        print(\"initialize parent class\")\n",
    "\n",
    "class child1(parent):\n",
    "    '''Inherits from parent, but does not run parent's init method '''\n",
    "    def __init__(self):\n",
    "        print(\"initialize child1 class\")\n",
    "\n",
    "class child2(parent):\n",
    "    '''Inhereits from parent, but DOES run parents's init method'''\n",
    "    def __init__(self):\n",
    "        super().__init__()\n",
    "        print(\"initialize child2 class\")\n",
    "\n",
    "c1 = child1()\n",
    "c2 = child2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "FmXYtubI2sMg"
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'nn' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-20-6e874639fd82>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m                        \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mReLU\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m                        nn.Linear(hidden_size, output_size))\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mnet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mneural_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-20-6e874639fd82>\u001b[0m in \u001b[0;36mneural_net\u001b[0;34m(input_size, hidden_size, output_size)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mneural_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhidden_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m       return torch.nn.Sequential(nn.Linear(input_size, hidden_size),\n\u001b[0m\u001b[1;32m      3\u001b[0m                        \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mReLU\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m                        nn.Linear(hidden_size, output_size))\n\u001b[1;32m      5\u001b[0m \u001b[0mnet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mneural_net\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'nn' is not defined"
     ]
    }
   ],
   "source": [
    "def neural_net(input_size, hidden_size, output_size):\n",
    "      return torch.nn.Sequential(nn.Linear(input_size, hidden_size),\n",
    "                       nn.ReLU(),\n",
    "                       nn.Linear(hidden_size, output_size))\n",
    "net = neural_net(5, 50, 10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "JHSlaUs64b1q"
   },
   "source": [
    "Now define loss function "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 170
    },
    "colab_type": "code",
    "id": "TzpafTNA4djh",
    "outputId": "5c2e6b9f-dda4-4678-804b-1b6f3ecec12c"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(2.0477, grad_fn=<MseLossBackward>)\n",
      "param  Parameter containing:\n",
      "tensor([[ 0.4638,  0.3946,  0.0053],\n",
      "        [-0.4698,  0.2770, -0.3739]], requires_grad=True)\n",
      "param  Parameter containing:\n",
      "tensor([ 0.4330, -0.4905], requires_grad=True)\n",
      "param gradients tensor([[ 0.5496,  0.1314, -0.2081],\n",
      "        [-0.9448,  0.7071, -0.7055]])\n",
      "param gradients tensor([ 0.5799, -0.4996])\n"
     ]
    }
   ],
   "source": [
    "x = torch.randn(10, 3)\n",
    "y = torch.randn(10, 2)\n",
    "\n",
    "net = torch.nn.Linear(3, 2)\n",
    "criterion = torch.nn.MSELoss()\n",
    "optimizer = torch.optim.SGD(net.parameters(), lr=0.01)\n",
    "\n",
    "output = net(x)\n",
    "loss = criterion(output,y)\n",
    "print(loss)\n",
    "loss.backward()\n",
    "\n",
    "for param in net.parameters():\n",
    "    print(\"param \", param)\n",
    "\n",
    "for param in net.parameters():\n",
    "    print(\"param gradients\", param.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "hgdbBMWC7aFz"
   },
   "source": [
    "Pyotrch accumulate gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 136
    },
    "colab_type": "code",
    "id": "YMdRiUMZ6wJi",
    "outputId": "c6dfd096-569a-49d6-a7a4-6df49db256c6"
   },
   "outputs": [],
   "source": [
    "x = torch.randn(10, 3)\n",
    "y = torch.randn(10, 2)\n",
    "\n",
    "net = torch.nn.Linear(3, 2)\n",
    "criterion = torch.nn.MSELoss()\n",
    "\n",
    "output = net(x)\n",
    "loss = criterion(output,y)\n",
    "print(loss)\n",
    "loss.backward()\n",
    "for param in net.parameters():\n",
    "    print(\"parameters gradients\", param.grad)\n",
    "\n",
    "output = net(x)\n",
    "loss = criterion(output,y)\n",
    "loss.backward()\n",
    "for param in net.parameters():\n",
    "     print(\"parameters gradients\", param.grad)\n",
    "\n",
    "net.zero_grad()\n",
    "output = net(x)\n",
    "loss = criterion(output,y)\n",
    "loss.backward()\n",
    "for param in net.parameters():\n",
    "     print(\"parameters gradients\", param.grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 272
    },
    "colab_type": "code",
    "id": "UO8jM6kB8Ryo",
    "outputId": "6fa6ff3a-caf7-4553-cc40-75417e72aec5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "parameters gradients tensor([[ 0.3660,  0.0393, -0.3672],\n",
      "        [-0.4298,  0.5568,  0.1277]])\n",
      "parameters gradients tensor([ 0.6440, -0.7389])\n",
      "Parameters before gradient descent :\n",
      "Parameter containing:\n",
      "tensor([[ 0.3447, -0.1549, -0.1806],\n",
      "        [-0.3115,  0.1928,  0.2706]], requires_grad=True)\n",
      "Parameter containing:\n",
      "tensor([ 0.0963, -0.5436], requires_grad=True)\n",
      "Parameters after gradient descent :\n",
      "Parameter containing:\n",
      "tensor([[ 0.3410, -0.1553, -0.1769],\n",
      "        [-0.3072,  0.1872,  0.2693]], requires_grad=True)\n",
      "Parameter containing:\n",
      "tensor([ 0.0899, -0.5362], requires_grad=True)\n"
     ]
    }
   ],
   "source": [
    "x = torch.randn(10, 3)\n",
    "y = torch.randn(10, 2)\n",
    "\n",
    "net = torch.nn.Linear(3, 2)\n",
    "criterion = torch.nn.MSELoss()\n",
    "optimizer = torch.optim.SGD(net.parameters(), lr=0.01)\n",
    "\n",
    "net.zero_grad()\n",
    "output = net(x)\n",
    "loss = criterion(output,y)\n",
    "loss.backward()\n",
    "for param in net.parameters():\n",
    "     print(\"parameters gradients\", param.grad)\n",
    "\n",
    "print(\"Parameters before gradient descent :\")\n",
    "for param in net.parameters():\n",
    "    print(param)\n",
    "\n",
    "optimizer.step()\n",
    "\n",
    "print(\"Parameters after gradient descent :\")\n",
    "for param in net.parameters():\n",
    "    print(param)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 187
    },
    "colab_type": "code",
    "id": "8w0Eq1Gr8JHs",
    "outputId": "91b717b6-5193-410f-e552-a6152a20bf3e"
   },
   "outputs": [],
   "source": [
    "iterations = 10\n",
    "for i in range(iterations):\n",
    "    optimizer.zero_grad() \n",
    "    output = net(x)\n",
    "    loss = criterion(output,y)\n",
    "    loss.backward()\n",
    "    optimizer.step()\n",
    "    print(loss)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "Pytorch_tutorial_2",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}