# <center>CS568:Deep Learning</center>  <center>Spring 2020</center> 

In [2]:
import torch

The Pytorch tensors can be copied from CPU to GPU. 

In [3]:
# set seed for repeatability
torch.manual_seed(22)
# create a pytorch tensor
x = torch.randn(2,2)
# copy x to GPU
y = x.cuda()
# copy x back to CPU
z = y.cpu()

try:
    print(y.numpy())
except RuntimeError as e:
    print("Error: ", e)

AssertionError: Torch not compiled with CUDA enabled

To perform any operation, both tensors should be on same device.


In [4]:
# a tensor on CPU
a = torch.rand(2,3)
# b tensor on GPU
b = torch.rand(3,2).cuda() 
try:
    result = torch.mm(a,b)  
    print(result)
except TypeError as e:
    print(e)

AssertionError: Torch not compiled with CUDA enabled

Check if GPU is available then move tensors to GPU.

In [5]:
a = torch.rand(2,2)
if torch.cuda.is_available():
    a = a.cuda()
    print(a)

GPU vs CPU 

In [None]:
from timeit import timeit 

def matmul():
    res = torch.mm(a, b) 

# create two random tensors 
a = torch.rand(1000, 1280)
b = torch.rand(1280, 1)
ite = 1000

# Time CPU takes for matrix multiplication
print('CPU: {} seconds'.format(timeit(matmul, number=ite)))
# Time GPU takes for matrix multiplication
a, b = a.cuda(), b.cuda()
print('GPU: {} seconds'.format(timeit(matmul, number=ite)))

Pytorch tensors provide automatic differentiation. If you set requires_grad=True pytorch will compute the gradient of this tensor. 

In [7]:
# construct a differentiable tensor
x = torch.tensor(torch.arange(1,5), requires_grad=False)
print("x", x.dtype)
y = torch.tensor(torch.arange(1,5).float(), requires_grad=True)
print("y", y.dtype)

x torch.int64
y torch.float32


  
  after removing the cwd from sys.path.


The tensor y depends on x and x has discrete values. Therefore, we cannot compute gradient of tensor y. 

In [8]:
x = torch.tensor(torch.arange(1,5), requires_grad=False)
print("x", x.dtype)
# compute square of x
y = x**2
print("x",x)
print("y",y)
# Calculate gradient (dy/dx=2x)
y.sum().backward()
# Print values
print(x.grad)

x torch.int64
x tensor([1, 2, 3, 4])
y tensor([ 1,  4,  9, 16])


  """Entry point for launching an IPython kernel.


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [9]:
x = torch.tensor(torch.arange(1,5).float(), requires_grad=True)
print("x", x.dtype)
# compute square of x
y = x**2
print("x",x)
print("y",y)
# Calculate gradient (dy/dx=2x)
y.sum().backward()
# Print values
print(x.grad)

x torch.float32
x tensor([1., 2., 3., 4.], requires_grad=True)
y tensor([ 1.,  4.,  9., 16.], grad_fn=<PowBackward0>)
tensor([2., 4., 6., 8.])


  """Entry point for launching an IPython kernel.


#### Basic building blocks to make a neural network in Pytorch

**Define model step**
Construct a network using torch.nn module

In [10]:
net = torch.nn.Linear(3,2)
print(net)

Linear(in_features=3, out_features=2, bias=True)


forward() function

In [11]:
x = torch.tensor(torch.arange(0, 3).float(), requires_grad=True)
y = net.forward(x) # or net(x)
print(y)

tensor([ 0.9696, -1.4331], grad_fn=<AddBackward0>)


  """Entry point for launching an IPython kernel.


parameters() function

In [12]:
for param in net.parameters():
      print(param)

Parameter containing:
tensor([[-0.2149,  0.3190,  0.2837],
        [ 0.0011, -0.1010, -0.3801]], requires_grad=True)
Parameter containing:
tensor([ 0.0832, -0.5718], requires_grad=True)


Create a model by constructing a class MyNetwork. This class will inherit from the nn.Module class of Pytorch.

In [16]:
class MyNetwork(torch.nn.Module):    
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        """
        In this constructor we instantiate two nn.Linear modules and assign them as
        member variables.

        input_dim: input dimension
        hidden_dim: dimension of hidden layer
        output_dim: output dimension
        """
        self.layer1 = torch.nn.Linear(input_dim, hidden_dim)
        self.layer2 = torch.nn.Sigmoid()
        self.layer3 = torch.nn.Linear(hidden_dim , output_dim)
        

    def forward(self, x):
        """
        In this function we accept a Variable of input data and we must 
        return a Variable of output data. 
        """
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

net = MyNetwork(5, 50, 10)
print(net)

MyNetwork(
  (layer1): Linear(in_features=5, out_features=50, bias=True)
  (layer2): Sigmoid()
  (layer3): Linear(in_features=50, out_features=10, bias=True)
)


In [15]:
class parent:
    ''' Creates a parent class '''
    def __init__(self):
        print("initialize parent class")

class child1(parent):
    '''Inherits from parent, but does not run parent's init method '''
    def __init__(self):
        print("initialize child1 class")

class child2(parent):
    '''Inhereits from parent, but DOES run parents's init method'''
    def __init__(self):
        super().__init__()
        print("initialize child2 class")

c1 = child1()
c2 = child2()

initialize child1 class
initialize parent class
initialize child2 class


In [20]:
def neural_net(input_size, hidden_size, output_size):
      return torch.nn.Sequential(nn.Linear(input_size, hidden_size),
                       nn.ReLU(),
                       nn.Linear(hidden_size, output_size))
net = neural_net(5, 50, 10)

NameError: name 'nn' is not defined

Now define loss function 

In [21]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

output = net(x)
loss = criterion(output,y)
print(loss)
loss.backward()

for param in net.parameters():
    print("param ", param)

for param in net.parameters():
    print("param gradients", param.grad)

tensor(2.0477, grad_fn=<MseLossBackward>)
param  Parameter containing:
tensor([[ 0.4638,  0.3946,  0.0053],
        [-0.4698,  0.2770, -0.3739]], requires_grad=True)
param  Parameter containing:
tensor([ 0.4330, -0.4905], requires_grad=True)
param gradients tensor([[ 0.5496,  0.1314, -0.2081],
        [-0.9448,  0.7071, -0.7055]])
param gradients tensor([ 0.5799, -0.4996])


Pyotrch accumulate gradients

In [None]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()

output = net(x)
loss = criterion(output,y)
print(loss)
loss.backward()
for param in net.parameters():
    print("parameters gradients", param.grad)

output = net(x)
loss = criterion(output,y)
loss.backward()
for param in net.parameters():
     print("parameters gradients", param.grad)

net.zero_grad()
output = net(x)
loss = criterion(output,y)
loss.backward()
for param in net.parameters():
     print("parameters gradients", param.grad)

In [22]:
x = torch.randn(10, 3)
y = torch.randn(10, 2)

net = torch.nn.Linear(3, 2)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

net.zero_grad()
output = net(x)
loss = criterion(output,y)
loss.backward()
for param in net.parameters():
     print("parameters gradients", param.grad)

print("Parameters before gradient descent :")
for param in net.parameters():
    print(param)

optimizer.step()

print("Parameters after gradient descent :")
for param in net.parameters():
    print(param)


parameters gradients tensor([[ 0.3660,  0.0393, -0.3672],
        [-0.4298,  0.5568,  0.1277]])
parameters gradients tensor([ 0.6440, -0.7389])
Parameters before gradient descent :
Parameter containing:
tensor([[ 0.3447, -0.1549, -0.1806],
        [-0.3115,  0.1928,  0.2706]], requires_grad=True)
Parameter containing:
tensor([ 0.0963, -0.5436], requires_grad=True)
Parameters after gradient descent :
Parameter containing:
tensor([[ 0.3410, -0.1553, -0.1769],
        [-0.3072,  0.1872,  0.2693]], requires_grad=True)
Parameter containing:
tensor([ 0.0899, -0.5362], requires_grad=True)


In [None]:
iterations = 10
for i in range(iterations):
    optimizer.zero_grad() 
    output = net(x)
    loss = criterion(output,y)
    loss.backward()
    optimizer.step()
    print(loss)