《TensorFlow vs PyTorch 7: 创建模型》后,就该训练模型了。
在PyTorch中
- 需要添加用于测量预测值于真实值之间差异的损失函数
- 更新模型参数的优化器
- 定义训练循环和测试训练
完整模型训练代码,如下所示:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" # Solve the OMP: Error #15
train_dataset = datasets.FashionMNIST(root='data',train=True, download=True, transform=ToTensor())
test_dataset = datasets.FashionMNIST(root='data',train=False, download=True, transform=ToTensor())
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64)
train_batch_data, train_batch_labels = next(iter(train_dataloader))
print(f"train_batch_data shape: {train_batch_data.size()}")
print(f"train_batch_labels shape: {train_batch_labels.size()}")
print(train_batch_labels[0])
import torch.nn as nn
import torch.nn.functional as F
class MyCNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,6,3) # 28->26
self.maxpool = nn.MaxPool2d(2,2) # 26->13
self.conv2 = nn.Conv2d(6,16,3) # 13->11
self.fc1 = nn.Linear(16*11*11,128)
self.fc2 = nn.Linear(128,10)
def forward(self, x):
x = self.maxpool(F.relu(self.conv1(x)))
x = F.relu(self.conv2(x))
x = torch.flatten(x, 1) # start dim=0是batch size, 所以从1开始flatten all dimensions except batch
x = F.relu(self.fc1(x))
x = self.fc2(x) # 分类输出不需要激活
return x
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MyCNN().to(device) # 将模型载入GPU
learning_rate = 1e-3
batch_size = 64
epochs = 5
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for steps, (X, y) in enumerate(dataloader):
# 前向计算
X, y = X.to(device), y.to(device) # 将数据载入GPU
pred = model(X)
# 计算损失
loss = loss_fn(pred, y)
# 反向传播
optimizer.zero_grad()
loss.backward()
optimizer.step()
if steps % 100 == 0:
loss, current = loss.item(), steps*len(X)
print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0.0, 0.0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device) # 将数据载入GPU
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimizer)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
运行结果如下:
Epoch 5
loss:0.298207 [32000/60000]
loss:0.296418 [38400/60000]
loss:0.390596 [44800/60000]
loss:0.444690 [51200/60000]
loss:0.282875 [57600/60000]
Test Error:
Accuracy: 89.9%, Avg loss: 0.280973
Done!
在TensorFlow中
- 需要添加用于测量预测值于真实值之间差异的损失函数
- 更新模型参数的优化器
- 定义训练循环和测试训练
完整模型训练代码,如下所示:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
inputs = tf.random.normal([64,28,28,1]) #The Conv2D op currently only supports the NHWC tensor format on the CPU
(training_data, training_labels) , (test_data, test_labels)= tf.keras.datasets.fashion_mnist.load_data()
train_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(training_data[...,tf.newaxis]/255, tf.float32),
tf.cast(training_labels,tf.int64)))
train_dataset = train_dataset.shuffle(1000).batch(64)
test_dataset = tf.data.Dataset.from_tensor_slices(
(tf.cast(test_data[...,tf.newaxis]/255, tf.float32),
tf.cast(test_labels,tf.int64)))
test_dataset = test_dataset.batch(64)
class MyCNN(tf.keras.Model):
def __init__(self,num_classes=10):
super().__init__()
self.conv1 = layers.Conv2D(filters=6, kernel_size=3, activation='relu') # 28->26
self.maxpool = layers.MaxPool2D(pool_size=(2,2)) # 26->13
self.conv2 = layers.Conv2D(filters=16, kernel_size=3, activation='relu') # 13->11
self.flatten = layers.Flatten()
self.fc1 = layers.Dense(128,activation='relu')
self.fc2 = layers.Dense(num_classes)
def call(self, x):
x = self.conv1(x)
x = self.maxpool(x)
x = self.conv2(x)
x = self.flatten(x)
x = self.fc1(x)
x = self.fc2(x) # 分类输出不需要激活
return x
model = MyCNN()
# https://stackoverflow.com/questions/64681232/why-is-it-that-input-shape-does-not-include-the-batch-dimension-when-passed-as
# build() https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#build
model.build(input_shape=[64,28,28,1])
model.summary()
logits = model(inputs)
print(f"logits'shape:{logits.shape}")
learning_rate = 1e-3
batch_size = 64
epochs = 5
# SparseCategoricalCrossentropy expect labels to be provided as integers
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
# complile the model with loss_fn and optimizer
model.compile(optimizer=optimizer, loss=loss_fn, metrics='sparse_categorical_accuracy')
# Fit the model on training dataset
model.fit(train_dataset, epochs=epochs, validation_data=test_dataset)
运行结果:
logits'shape:(64, 10)
Epoch 1/5
2021-11-27 14:54:20.880112: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
938/938 [==============================] - 4s 3ms/step - loss: 0.4822 - sparse_categorical_accuracy: 0.8286 - val_loss: 0.3760 - val_sparse_categorical_accuracy: 0.8671
Epoch 2/5
938/938 [==============================] - 3s 3ms/step - loss: 0.3298 - sparse_categorical_accuracy: 0.8818 - val_loss: 0.3379 - val_sparse_categorical_accuracy: 0.8804
Epoch 3/5
938/938 [==============================] - 3s 4ms/step - loss: 0.2827 - sparse_categorical_accuracy: 0.8970 - val_loss: 0.3230 - val_sparse_categorical_accuracy: 0.8828
Epoch 4/5
938/938 [==============================] - 3s 3ms/step - loss: 0.2498 - sparse_categorical_accuracy: 0.9086 - val_loss: 0.2869 - val_sparse_categorical_accuracy: 0.8951
Epoch 5/5
938/938 [==============================] - 3s 4ms/step - loss: 0.2243 - sparse_categorical_accuracy: 0.9175 - val_loss: 0.2784 - val_sparse_categorical_accuracy: 0.8977
结论:不管用PyTorch还是TensorFlow,构建模型,训练模型的思路是一致的,差别在于API函数,为了不至于搞混,可以一边编写一边参考各自的API手册。
网友评论