PyTorch with Examples

CS231n Lecture 6 Deep Learning Hardware and Software
Learning PyTorch with Examples - Tutorials

Tensor

Tensor 手动构建网络

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)

learning_rate = 1e-4
for t in range(500):
h = x.mm(w1)
h_relu = h.clamp(min=0)
y_pred = h_relu.mm(w2)
loss = (y_pred - y).pow(2).sum()

grad_y_pred = 2.0*(y_pred - y)
grad_w2 = h_relu.t().mm(grad_y_pred)
grad_h_relu = grad_y_pred.mm(w2.t())
grad_h = grad_h_relu.clone()
grad_h[h<0] = 0
grad_w1 = x.t().mm(grad_h)

w1 -= learning_rate * w1.grad
w2 -= learning_rate * w2.grad

Autograd

Autograd 自动微分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H, requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)

learning_rate = 1e-4
for t in range(500):
y_pred = x.mm(w1).clamp(min=0).mm(w2)
loss = (y_pred - y).pow(2).sum()

loss.backward()

with torch.no_grad(): # Don’t build a computational graph for this part
w1 -= learning_rate * w1.grad
w2 -= learning_rate * w2.grad
w1.grad.zero_()
w2.grad.zero_()

torch.nn

nn 调用内置模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# NN Model: sequence of layers
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out))

learning_rate = 1e-4
for t in range(500):
y_pred = model(x)
loss = torch.nn.functional.mse_loss(y_pred, y)

loss.backward()

with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
model.zero_grad()

torch.optim

optimizer 使用优化器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import torch

N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# NN Model: sequence of layers
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out))

learning_rate = 1e-4
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for t in range(500):
y_pred = model(x)
loss = torch.nn.functional.mse_loss(y_pred, y)

loss.backward()

optimizer.step()
optimizer.zero_grad()

torch.nn.Module

nn.Module 自定义模型
A PyTorch Module is a neural net layer; Modules can contain weights or other modules.
基于nn.Module模块可自定义NN网络(层);自定义时可用 ModuleList 添加多个网络层;
自定义的网络可与内置网络层通过 Sequential 容器进一步组合。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import torch

#.....
class TwoLayerNet(torch.nn.Module):
def __init__(self, D_in, H, D_out):
super(TwoLayerNet, self).__init__()
self.linear1 = torch.nn.Linear(D_in, H)
self.linear2 = torch.nn.Linear(H, D_out)

def forward(self, x):
h_relu = self.linear1(x).clamp(min=0)
y_pred = self.linear2(h_relu)
return y_pred
#No need to define backward - autograd will handle it.

model = TwoLayerNet(D_in, H, D_out)

#.....
class ParallelBlock(torch.nn.Module):
def __init__(self, D_in, D_out):
super(ParallelBlock, self).__init__()
self.linear1 = torch.nn.Linear(D_in, D_out)
self.linear2 = torch.nn.Linear(D_in, D_out)

def forward(self, x):
h1 = self.linear1(x)
h2 = self.linear2(x)
return (h1 * h2).clamp(min=0)

# Mix custom Module with `Sequential` container
model = torch.nn.Sequential(
ParallelBlock(D_in, H),
ParallelBlock(H, H),
torch.nn.Linear(H, D_out))

3 ways of creating a neural network in PyTorch nn.Module, nn.Sequential, nn.ModuleList
When should I use nn.ModuleList and when should I use nn.Sequential?
In nn.Sequential, the nn.Module stored inside are connected in a cascaded way, it has a forward() method.
nn.ModuleList does not have a forward() method, there is no connection between the nn.Module that it stores.
nn.ModuleList is just a Python list with some enhancement: Module’s parameters are discoverable for optimizer.

DataLoader

DataLoader(torch.utils.data) 数据分批(mini-batch)、重排(shuffle)、多线程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import torch
from torch.utils.data import TensorDataset, DataLoader

#...

loader = DataLoader(TensorDataset(x,y), batch_size=8)

for epoch in range(20):
for x_batch, y_batch in loader:
y_pred = model(x_batch)
loss = torch.nn.functional.mse_loss(y_pred, y_batch)

loss.backward()
optimizer.step()
optimizer.zero_grad()