Pytorch Introduction ¶

PyTorch is an open source machine learning framework. You can find more information about PyTorch by following one of the oficial tutorials or by reading the documentation .

Import Pythorch ¶

torch.cude.is_available()

In [141]:

# Import pytorch and check its version

import torch
import numpy as np
print(torch.__version__)
print(f'Is cuda available? {torch.cuda.is_available()}')
#

1.8.1
Is cuda available? False

Pytorch Tensor ¶

Tensor Initialization ¶

torch.tensor(), torch.from_numpy(), torch.zerors_like(), torch.ones_like(), torch.rand(), torch.ones(), torch.zeros(), torch.eye(), torch.full(),

In [142]:

# Tensor Initialization

# Directly from data
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)
print(f"Direct Tensor: \n {x_np} \n")

# From a NumPy array
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
print(f"Numpy Tensor: \n {x_np} \n")

# From another tensor:
x_zeros = torch.zeros_like(x_data)
print(f"Zeros Tensor: \n {x_zeros} \n")

x_ones = torch.ones_like(x_data) 
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) 
print(f"Random Tensor: \n {x_rand} \n")

shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)
full_tensor = torch.full(shape, 2)
print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}\n")
print(f"Full twos Tensor: \n {full_tensor}\n")
#

Direct Tensor: 
 tensor([[1, 2],
        [3, 4]]) 

Numpy Tensor: 
 tensor([[1, 2],
        [3, 4]]) 

Zeros Tensor: 
 tensor([[0, 0],
        [0, 0]]) 

Ones Tensor: 
 tensor([[1, 1],
        [1, 1]]) 

Random Tensor: 
 tensor([[0.3355, 0.0668],
        [0.2169, 0.7994]]) 

Random Tensor: 
 tensor([[0.0993, 0.1139, 0.0951],
        [0.6038, 0.0597, 0.6543]]) 

Ones Tensor: 
 tensor([[1., 1., 1.],
        [1., 1., 1.]]) 

Zeros Tensor: 
 tensor([[0., 0., 0.],
        [0., 0., 0.]])

Full twos Tensor: 
 tensor([[2, 2, 2],
        [2, 2, 2]])

Tensor Attributes ¶

tensor.dim(), tensor.shape, tensor.dtype, tensor.device,

In [143]:

# Tensor Atrributes
tensor = torch.rand(3,4)

print(f"Dimension of tensor: {tensor.dim()}\n")
print(f"Shape of tensor: {tensor.shape}\n")
print(f"Datatype of tensor: {tensor.dtype}\n")
print(f"Device tensor is stored on: {tensor.device}\n")

#

Dimension of tensor: 2

Shape of tensor: torch.Size([3, 4])

Datatype of tensor: torch.float32

Device tensor is stored on: cpu

Tensor Indexing ¶

tensor[start:stop:step]

In [144]:

# Tensor slicing
x = torch.tensor([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print('Original tensor:')
print(x)
print('shape: ', x.shape)

# Get row 1, and all columns. 
print('\nSingle row:')
print(x[1, :].shape, x[1, :])
print('shape: ', x[1, :].shape, x[1].shape)  

print('\nSingle column:')
print(x[:, 1])
print('shape: ', x[:, 1].shape)

# Get the first two rows and the last three columns
print('\nFirst two rows, last two columns:')
print(x[:2, -3:])
print('shape: ', x[:2, -3:].shape)

# Get every other row, and columns at index 1 and 2
print('\nEvery other row, middle columns:')
print(x[::2, 1:3])
print('shape: ', x[::2, 1:3].shape)

#

Original tensor:
tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
shape:  torch.Size([3, 4])

Single row:
torch.Size([4]) tensor([5, 6, 7, 8])
shape:  torch.Size([4]) torch.Size([4])

Single column:
tensor([ 2,  6, 10])
shape:  torch.Size([3])

First two rows, last two columns:
tensor([[2, 3, 4],
        [6, 7, 8]])
shape:  torch.Size([2, 3])

Every other row, middle columns:
tensor([[ 2,  3],
        [10, 11]])
shape:  torch.Size([2, 2])

More generally, given index arrays idx0 and idx1 with N elements each, a[idx0, idx1] is equivalent to:

torch.tensor([
  a[idx0[0], idx1[0]],
  a[idx0[1], idx1[1]],
  ...,
  a[idx0[N - 1], idx1[N - 1]]
])

(A similar pattern extends to tensors with more than two dimensions)

In [145]:

# Integer Index

x = torch.tensor([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print('Original tensor:')
print(x)
print('shape: ', x.shape)

idx = torch.tensor([3, 2, 1, 0])  # Index arrays can be int64 torch tensors
print('\nReordered columns:')
print(x[:, idx])


a = torch.tensor([0, 1, 0])  # Index arrays can be int64 torch tensors
b = torch.tensor([3, 2, 1])  # Index arrays can be int64 torch tensors
print('\nReordered rows/columns:')
print(x[a, b])

#

Original tensor:
tensor([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]])
shape:  torch.Size([3, 4])

Reordered columns:
tensor([[ 4,  3,  2,  1],
        [ 8,  7,  6,  5],
        [12, 11, 10,  9]])

Reordered rows/columns:
tensor([4, 7, 2])

In [146]:

# Boolen indexing
x = torch.tensor([[1,2], [3, 4], [5, 6]])
print('Original tensor:')
print(x)

mask = (x > 3)
print('\nMask tensor:')
print(mask)

# We can use the mask to construct a rank-1 tensor containing the elements of a
# that are selected by the mask
print('\nSelecting elements with the mask:')
print(x[mask])

# We can also use boolean masks to modify tensors; for example this sets all
# elements <= 3 to zero:
x[x <= 3] = 0
print('\nAfter modifying with a mask:')
print(x)

#

Original tensor:
tensor([[1, 2],
        [3, 4],
        [5, 6]])

Mask tensor:
tensor([[False, False],
        [False,  True],
        [ True,  True]])

Selecting elements with the mask:
tensor([4, 5, 6])

After modifying with a mask:
tensor([[0, 0],
        [0, 4],
        [5, 6]])

Tensor Operations ¶

tensor.cat(), torch.sum(), torch.mean(), torch.max(), torch.min(), torch.dot(), torch.mm(), torch.mv(), torch.addmm(), torch,addmv(),torch.bmm(), torch,baddmm(), torch.matmul(), torch.torch.broadcast_tensors()

Tensor Stacks ¶

In [147]:

# Tensor Operations

# Standard numpy-like indexing and slicing
tensor = torch.ones(4, 4)
tensor[:,1] = 0
tensor[2,2] = 2
tensor[3,3] = 3
print(f'{tensor}\n')

# When use `torch.cat` and specify `dim=x`, then the dimension `x` will increase. 
t_h = torch.cat([tensor, tensor], dim=1)
print(f'horizontal cat:\n {t_h}\n')

t_v = torch.cat([tensor, tensor], dim=0)
print(f'vetical cat:\n {t_v}\n')

#

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 2., 1.],
        [1., 0., 1., 3.]])

horizontal cat:
 tensor([[1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 2., 1., 1., 0., 2., 1.],
        [1., 0., 1., 3., 1., 0., 1., 3.]])

vetical cat:
 tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 2., 1.],
        [1., 0., 1., 3.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 2., 1.],
        [1., 0., 1., 3.]])

Elementwise Operations ¶

In [148]:

# Elementwise operations

x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float32)
y = torch.tensor([[5, 6, 7, 8]], dtype=torch.float32)

# Elementwise sum; all give the same result
print('Elementwise sum:')
print(x + y)
print(torch.add(x, y))
print(x.add(y))

# Elementwise difference
print('\nElementwise difference:')
print(x - y)
print(torch.sub(x, y))
print(x.sub(y))

# Elementwise product
print('\nElementwise product:')
print(x * y)
print(torch.mul(x, y))
print(x.mul(y))

# Elementwise division
print('\nElementwise division')
print(x / y)
print(torch.div(x, y))
print(x.div(y))

# Elementwise power
print('\nElementwise power')
print(x ** y)
print(torch.pow(x, y))
print(x.pow(y))

#

Elementwise sum:
tensor([[ 6.,  8., 10., 12.]])
tensor([[ 6.,  8., 10., 12.]])
tensor([[ 6.,  8., 10., 12.]])

Elementwise difference:
tensor([[-4., -4., -4., -4.]])
tensor([[-4., -4., -4., -4.]])
tensor([[-4., -4., -4., -4.]])

Elementwise product:
tensor([[ 5., 12., 21., 32.]])
tensor([[ 5., 12., 21., 32.]])
tensor([[ 5., 12., 21., 32.]])

Elementwise division
tensor([[0.2000, 0.3333, 0.4286, 0.5000]])
tensor([[0.2000, 0.3333, 0.4286, 0.5000]])
tensor([[0.2000, 0.3333, 0.4286, 0.5000]])

Elementwise power
tensor([[1.0000e+00, 6.4000e+01, 2.1870e+03, 6.5536e+04]])
tensor([[1.0000e+00, 6.4000e+01, 2.1870e+03, 6.5536e+04]])
tensor([[1.0000e+00, 6.4000e+01, 2.1870e+03, 6.5536e+04]])

Reduction Operations ¶

Reduction operations reduce the rank of tensors: the dimension over which you perform the reduction will be removed from the shape of the output. If you pass keepdim=True to a reduction operation, the specified dimension will not be removed; the output tensor will instead have a shape of 1 in that dimension.

In [149]:

# Reduction

# When use reduction and specify `dim=x`, then the dimension `x` will be removed. 

x = torch.tensor([[1, 2, 3], 
                  [4, 5, 6]], dtype=torch.float32)

print('Original tensor:')
print(x)

print('\nSum over entire tensor:')
print(torch.sum(x))

# We can sum over each row:
print('\nSum of each row:')
print(torch.sum(x, dim=0).shape)

# Sum over each column:
print('\nSum of each column:')
print(torch.sum(x, dim=1))

#

Original tensor:
tensor([[1., 2., 3.],
        [4., 5., 6.]])

Sum over entire tensor:
tensor(21.)

Sum of each row:
torch.Size([3])

Sum of each column:
tensor([ 6., 15.])

Matrix Operations ¶

@ is used for multiplication, same as torch.mm() , torch.mv() , torch.bmm() , torch.bmv()
torch.matmul()
- If both tensors are 1-dimensional, the dot product (scalar) is returned.
- If both arguments are 2-dimensional, the matrix-matrix product is returned.
- If the first argument is 2-dimensional and the second argument is 1-dimensional, the matrix-vector product is returned.
- If the first argument is 1-dimensional and the second argument is 2-dimensional, a 1 is prepended to its dimension for the purpose of the matrix multiply. After the matrix multiply, the prepended dimension is removed.
- Support broadcasting

In [150]:

# Matrix Operations
import torch

# torch.dot
x0 = torch.dot(torch.tensor([2, 3]), torch.tensor([2, 1]))
print(f'\n x0 shape: {x0.shape}')

# torch.mm
x1 = torch.mm(torch.randn(2, 3), torch.randn(3, 3))
print(f'\n x1 shape: {x1.shape}')

# torch.mv
x2 = torch.mv(torch.randn(2, 3), torch.randn(3))
print(f'\n x2 shape: {x2.shape}')

# torch.admm
M = torch.randn(2, 3)
m1 = torch.randn(2, 3)
m2 = torch.randn(3, 3)
# m1 @ m2 + M
x3 = torch.addmm(M, m1, m2)
print(f'\n x3 shape: {x3.shape}')

# torch.bmm
bm1 = torch.randn(10, 3, 4)
bm2 = torch.randn(10, 4, 5)
x4 = torch.bmm(bm1, bm2)
print(f'\n x4 shape: {x4.shape}')

# torch.matmul()
# vector x vector
print(f'\n vector x vector {torch.matmul(torch.randn(3), torch.randn(3)).size()}')
# matrix x vector
print(f' matrix x vector {torch.matmul(torch.randn(3, 4), torch.randn(4)).size()}')
# batched matrix x broadcasted vector
print(f' batched matrix x broadcasted vector {torch.matmul(torch.randn(10, 3, 4), torch.randn(4)).size()}')
# batched matrix x batched matrix
print(f' batched matrix x broadcasted vector {torch.matmul(torch.randn(10, 3, 4), torch.randn(10, 4, 5)).size()}')
# batched matrix x broadcasted matrix
print(f' batched matrix x broadcasted vector {torch.matmul(torch.randn(10, 3, 4), torch.randn(4, 5)).size()}')

#

 x0 shape: torch.Size([])

 x1 shape: torch.Size([2, 3])

 x2 shape: torch.Size([2])

 x3 shape: torch.Size([2, 3])

 x4 shape: torch.Size([10, 3, 5])

 vector x vector torch.Size([])
 matrix x vector torch.Size([3])
 batched matrix x broadcasted vector torch.Size([10, 3])
 batched matrix x broadcasted vector torch.Size([10, 3, 5])
 batched matrix x broadcasted vector torch.Size([10, 3, 5])

Tensor Broadcasting ¶

Each tensor has at least one dimension.
When iterating over the dimension sizes, starting at the trailing dimension, the dimension sizes must either be equal, one of them is 1, or one of them does not exist.

In [151]:

# Tensor Broadcasting

x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
v = torch.tensor([1, 0, 1])
print('Here is x (before broadcasting):')
print(x)
print('x.shape: ', x.shape)
print('\nHere is v (before broadcasting):')
print(v)
print('v.shape: ', v.shape)

xx, vv = torch.broadcast_tensors(x, v)
print('\nHere is xx (after) broadcasting):')
print(xx)
print('xx.shape: ', x.shape)
print('\nHere is vv (after broadcasting):')
print(vv)
print('vv.shape: ', vv.shape)

#

Here is x (before broadcasting):
tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])
x.shape:  torch.Size([4, 3])

Here is v (before broadcasting):
tensor([1, 0, 1])
v.shape:  torch.Size([3])

Here is xx (after) broadcasting):
tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]])
xx.shape:  torch.Size([4, 3])

Here is vv (after broadcasting):
tensor([[1, 0, 1],
        [1, 0, 1],
        [1, 0, 1],
        [1, 0, 1]])
vv.shape:  torch.Size([4, 3])

Tensor Data Type ¶

tensor.to(), tensor.new_zeros(), tensor.float(), tensor.double(),

In [152]:

# Tensor Data Type

# Let torch choose the datatype
x0 = torch.tensor([1, 2])   # List of integers
x1 = torch.tensor([1., 2.]) # List of floats
x2 = torch.tensor([1., 2])  # Mixed list
print('dtype when torch chooses for us:')
print('List of integers:', x0.dtype)
print('List of floats:', x1.dtype)
print('Mixed list:', x2.dtype)

# Force a particular datatype
y0 = torch.tensor([1, 2], dtype=torch.float32)  # 32-bit float
y1 = torch.tensor([1, 2], dtype=torch.int32)    # 32-bit (signed) integer
y2 = torch.tensor([1, 2], dtype=torch.int64)    # 64-bit (signed) integer
print('\ndtype when we force a datatype:')
print('32-bit float: ', y0.dtype)
print('32-bit integer: ', y1.dtype)
print('64-bit integer: ', y2.dtype)

# Other creation ops also take a dtype argument
z0 = torch.ones(1, 2)  # Let torch choose for us
z1 = torch.ones(1, 2, dtype=torch.int16) # 16-bit (signed) integer
z2 = torch.ones(1, 2, dtype=torch.uint8) # 8-bit (unsigned) integer
print('\ntorch.ones with different dtypes')
print('default dtype:', z0.dtype)
print('16-bit integer:', z1.dtype)
print('8-bit unsigned integer:', z2.dtype)

x0 = torch.eye(3, dtype=torch.int64)
x1 = x0.float()  # Cast to 32-bit float
x2 = x0.double() # Cast to 64-bit float
x3 = x0.to(torch.float32) # Alternate way to cast to 32-bit float
x4 = x0.to(torch.float64) # Alternate way to cast to 64-bit float
print('\nx0:', x0.dtype)
print('x1:', x1.dtype)
print('x2:', x2.dtype)
print('x3:', x3.dtype)
print('x4:', x4.dtype)

x0 = torch.eye(3, dtype=torch.float64)  # Shape (3, 3), dtype torch.float64
x1 = torch.zeros_like(x0)               # Shape (3, 3), dtype torch.float64
x2 = x0.new_zeros(4, 5)                 # Shape (4, 5), dtype torch.float64
x3 = torch.ones(6, 7).to(x0)            # Shape (6, 7), dtype torch.float64)
print('\nx0 shape is %r, dtype is %r' % (x0.shape, x0.dtype))
print('x1 shape is %r, dtype is %r' % (x1.shape, x1.dtype))
print('x2 shape is %r, dtype is %r' % (x2.shape, x2.dtype))
print('x3 shape is %r, dtype is %r' % (x3.shape, x3.dtype))
#

dtype when torch chooses for us:
List of integers: torch.int64
List of floats: torch.float32
Mixed list: torch.float32

dtype when we force a datatype:
32-bit float:  torch.float32
32-bit integer:  torch.int32
64-bit integer:  torch.int64

torch.ones with different dtypes
default dtype: torch.float32
16-bit integer: torch.int16
8-bit unsigned integer: torch.uint8

x0: torch.int64
x1: torch.float32
x2: torch.float64
x3: torch.float32
x4: torch.float64

x0 shape is torch.Size([3, 3]), dtype is torch.float64
x1 shape is torch.Size([3, 3]), dtype is torch.float64
x2 shape is torch.Size([4, 5]), dtype is torch.float64
x3 shape is torch.Size([6, 7]), dtype is torch.float64

Tensor Reshape ¶

tensor.view(), tensor.reshape(), tensor.transpose(), tensor.permute(), tensor.contiguous()

The view() function takes elements in row-major order, so you cannot transpose matrices with view() .

In [153]:

# tensor.view()

x0 = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
print('Original tensor:')
print(x0)
print('shape:', x0.shape)

# Flatten x0 into a rank 1 vector of shape (8,)
# Takes elements in row-major order
x1 = x0.view(-1)
print('\nFlattened tensor:')
print(x1)
print('shape:', x1.shape)

# Rank 2 tensor
x2 = x0.view(-1, 1)
print('\nRank 2 tensor:')
print('shape:', x2.shape)

# View x1 as shape (2, 2, 2)
x3 = x0.view(-1, 2, 2)
print('\nRank 3 tensor:')
print('shape:', x3.shape)

#

Original tensor:
tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])
shape: torch.Size([2, 4])

Flattened tensor:
tensor([1, 2, 3, 4, 5, 6, 7, 8])
shape: torch.Size([8])

Rank 2 tensor:
shape: torch.Size([8, 1])

Rank 3 tensor:
shape: torch.Size([2, 2, 2])

In [154]:

# Tensor transpose and permute

# Create a tensor of shape (2, 3, 4)
x0 = torch.tensor([
     [[1,  2,  3,  4],
      [5,  6,  7,  8],
      [9, 10, 11, 12]],
     [[13, 14, 15, 16],
      [17, 18, 19, 20],
      [21, 22, 23, 24]]])
print('Original tensor:')
print(x0)
print('shape:', x0.shape)

# Swap axes 1 and 2; shape is (2, 4, 3)
x1 = x0.transpose(1, 2)
print('\nSwap axes 1 and 2:')
print(x1)
print(x1.shape)

# Permute axes; the argument (1, 2, 0) means:
# - Make the old dimension 1 appear at dimension 0;
# - Make the old dimension 2 appear at dimension 1;
# - Make the old dimension 0 appear at dimension 2
# This results in a tensor of shape (3, 4, 2)
x2 = x0.permute(1, 2, 0)
print('\nPermute axes')
print(x2)
print('shape:', x2.shape)

#

Original tensor:
tensor([[[ 1,  2,  3,  4],
         [ 5,  6,  7,  8],
         [ 9, 10, 11, 12]],

        [[13, 14, 15, 16],
         [17, 18, 19, 20],
         [21, 22, 23, 24]]])
shape: torch.Size([2, 3, 4])

Swap axes 1 and 2:
tensor([[[ 1,  5,  9],
         [ 2,  6, 10],
         [ 3,  7, 11],
         [ 4,  8, 12]],

        [[13, 17, 21],
         [14, 18, 22],
         [15, 19, 23],
         [16, 20, 24]]])
torch.Size([2, 4, 3])

Permute axes
tensor([[[ 1, 13],
         [ 2, 14],
         [ 3, 15],
         [ 4, 16]],

        [[ 5, 17],
         [ 6, 18],
         [ 7, 19],
         [ 8, 20]],

        [[ 9, 21],
         [10, 22],
         [11, 23],
         [12, 24]]])
shape: torch.Size([3, 4, 2])

Tensor Test ¶

torch.all(), torch.is_close(), torch.all_close()

Tensor Range ¶

torch.arange(), torch.linspace()

In [ ]:

# Tensor Range

# torch.arange(start=0, end, step=1, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False)
# torch.linspace(start, end, steps, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor

#

Tensor GPU ¶

In [155]:

# Tensor on GPU

# Construct a tensor on the CPU
x0 = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
print('x0 device:', x0.device)

if torch.cuda.is_available():
    # Move it to the GPU using .to()
    x1 = x0.to('cuda')
    print('x1 device:', x1.device)

    # Move it to the GPU using .cuda()
    x2 = x0.cuda()
    print('x2 device:', x2.device)

    # Move it back to the CPU using .to()
    x3 = x1.to('cpu')
    print('x3 device:', x3.device)

    # Move it back to the CPU using .cpu()
    x4 = x2.cpu()
    print('x4 device:', x4.device)

    # We can construct tensors directly on the GPU as well
    y = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float64, device='cuda')
    print('y device / dtype:', y.device, y.dtype)

    # Calling x.to(y) where y is a tensor will return a copy of x with the same
    # device and dtype as y
    x5 = x0.to(y)
    print('x5 device / dtype:', x5.device, x5.dtype)
else:
    print('Cuda is not available.' )

#

x0 device: cpu
Cuda is not available.

Pytorch Autograd ¶

tensor.requires_grad_(), tensor.detach()

Every Tensor has a flag: requires_grad that allows for fine grained exclusion of subgraphs from gradient computation and can increase efficiency.

In [156]:

# Pytorch autograd

import torch
x = torch.randn(5, 5)  # requires_grad=False by default
y = torch.randn(5, 5)  # requires_grad=False by default
z = torch.randn((5, 5), requires_grad=True)
a = x + y
print(f'a requires_grad: {a.requires_grad}')
b = (a + 2*z).sum()
print(f'b requires_grad: {b.requires_grad}')
b.backward()
print(f'z grad: \n {z.grad}')

#

a requires_grad: False
b requires_grad: True
z grad: 
 tensor([[2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.],
        [2., 2., 2., 2., 2.]])

Pytorch DataLoader ¶

Pytorch Transforms ¶

Pytorch Loss Functions ¶

torch.nn.MSELoss(), torch.nn.CrossEntropyLoss(),torch.nn.MultiLabelMarginLoss(),

MSELoss() ¶

Input: (N, )
Target: (N, )
Output: Scalar. If reduction is 'none', then (N,)

In [157]:

# torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')

import torch
loss = torch.nn.MSELoss()
x = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5)
output = loss(x, target)
output.backward()

#

CrossEntropyLoss() ¶

Input: (N, C), N is batch size, C is number of classes
Target: (N,), $0 \leq target[i] \leq C-1 $
Outtput: (N,)

In [158]:

# torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')

import torch
loss = torch.nn.CrossEntropyLoss(reduction='mean')
x = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(x, target)
output.backward()

# Image pixel level classifiction for 5 classes
width = 10
height = 10
loss = torch.nn.CrossEntropyLoss(reduction='mean')
x = torch.randn(3, 5, width, height, requires_grad=True)
target = torch.empty(3, width, height, dtype=torch.long).random_(5)
output = loss(x, target)
output.backward()

#

MultiLabelMarginLoss() ¶

Creates a criterion that optimizes a multi-class multi-classification hinge loss. This means that for a sample x, it could have multiple correct labels.

Input: (C,) or (N, C), N is batch size, C is number of classes
Target: (C) or (N, C), label targets after first -1 are ignored
Output: Scalar. If reduction is 'none', then (N,)

In [159]:

# torch.nn.MultiLabelMarginLoss(size_average=None, reduce=None, reduction='mean')

import torch
loss = torch.nn.MultiLabelMarginLoss()
x = torch.FloatTensor([[0.1, 0.2, 0.4, 0.8]])
# Single class hinge loss, so label == 3
y = torch.LongTensor([[3, -1, -1, -1]])
output = loss(x, y)
print(f'single class loss: {output.item():.4f}')
expected = torch.tensor([0.25 * ((1-(0.8-0.1)) + (1-(0.8-0.2)) + (1-(0.8-0.4)))])
assert(torch.isclose(output, expected))

# Multi-class hinge loss, so label == 3 and label == 1
y = torch.LongTensor([[3, 0, -1, -1]])
output = loss(x, y)
print(f'multi-class loss: {output.item():.4f}')
expected = torch.tensor([0.25 * ((1-(0.1-0.2)) + (1-(0.1-0.4)) + (1-(0.8-0.2)) + (1-(0.8-0.4)))])
assert(torch.isclose(output, expected))

#

single class loss: 0.3250
multi-class loss: 0.8500

Torch Activation Functions ¶

Torch Optimizer ¶

torch.optim.SGD(),torch.optim.RMSprop(), torch.optim.Adam()

torch.optim is a package implementing various optimization algorithms. Most commonly used methods are already supported, and the interface is general enough, so that more sophisticated ones can be also easily integrated in the future. To use torch.optim you have to construct an optimizer object, that will hold the current state and will update the parameters based on the computed gradients.

Note: If you need to move a model to GPU via .cuda(), please do so before constructing optimizers for it. Parameters of a model after .cuda() will be different objects with those before the call.

Construct Optimizers ¶

In [ ]:

# Construct an optimizer
import torch
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = torch.optim.Adam([var1, var2], lr=0.0001)

#

Set Parameter Options ¶

This means that model.bas e’s parameters will use the default learning rate of 1e-2 , model.classifier ’s parameters will use a learning rate of 1e-3 , and a momentum of 0.9 will be used for all parameters.

In [35]:

# Set Parameter Options
import torch

class Model(nn.Module):
    def __init__(self, config={}):
        super(Model, self).__init__()
        num_classes=config.setdefault('num_classes', 10)
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(7*7*32, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

model = Model()

optimizer = torch.optim.SGD([
                {'params': model.layer1.parameters(), 'momentum': 0.8},
                {'params': model.layer2.parameters(), 'lr': 0.01}
                ], lr=0.001, momentum=0.9)

print(optimizer)

for i in range(len(optimizer.param_groups)):
    print(f'group_id: {i}, lr: {optimizer.param_groups[i]["lr"]}, momentum: {optimizer.param_groups[i]["momentum"]}')

#

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.001
    momentum: 0.8
    nesterov: False
    weight_decay: 0

Parameter Group 1
    dampening: 0
    lr: 0.01
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)
group_id: 0, lr: 0.001, momentum: 0.8
group_id: 1, lr: 0.01, momentum: 0.9

Model Parameter ¶

In [31]:

import torch
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, config={}):
        super(Model, self).__init__()
        num_classes=config.setdefault('num_classes', 10)
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(7*7*32, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out


model = Model()

print('Named parameters...')
for name, param in model.named_parameters():
    print(name)
    print(param.size(), '\n')
  
#print('Parameters...')
#for param in model.parameters():
#    print(param.size(), '\n')

#

Named parameters...
layer1.0.weight
torch.Size([16, 1, 5, 5]) 

layer1.0.bias
torch.Size([16]) 

layer1.1.weight
torch.Size([16]) 

layer1.1.bias
torch.Size([16]) 

layer2.0.weight
torch.Size([32, 16, 5, 5]) 

layer2.0.bias
torch.Size([32]) 

layer2.1.weight
torch.Size([32]) 

layer2.1.bias
torch.Size([32]) 

fc.weight
torch.Size([10, 1568]) 

fc.bias
torch.Size([10])

Taking an Optimization Step ¶

In [ ]:

# Taking an Optimization Step
for input, target in dataset:
    optimizer.zero_grad()
    output = model(input)
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()
#

Xipeng Wang

Machine Learning Notes - Pytorch

Pytorch Introduction ¶

Import Pythorch ¶

Pytorch Tensor ¶

Tensor Initialization ¶

Tensor Attributes ¶

Tensor Indexing ¶

Tensor Operations ¶

Tensor Stacks ¶

Elementwise Operations ¶

Reduction Operations ¶

Matrix Operations ¶

Tensor Broadcasting ¶

Tensor Data Type ¶

Tensor Reshape ¶

Tensor Test ¶

Tensor Range ¶

Tensor GPU ¶

Pytorch Autograd ¶

Pytorch DataLoader ¶

Pytorch Transforms ¶

Pytorch Loss Functions ¶

MSELoss() ¶

CrossEntropyLoss() ¶

MultiLabelMarginLoss() ¶

Torch Activation Functions ¶

Torch Optimizer ¶

Construct Optimizers ¶

Set Parameter Options ¶

Model Parameter ¶

Taking an Optimization Step ¶