美文网首页
TensorFlow vs PyTorch 8: 训练模型

TensorFlow vs PyTorch 8: 训练模型

作者: LabVIEW_Python | 来源:发表于2021-11-08 19:23 被阅读0次

《TensorFlow vs PyTorch 7: 创建模型》后,就该训练模型了。

PyTorch

  • 需要添加用于测量预测值于真实值之间差异的损失函数
  • 更新模型参数的优化器
  • 定义训练循环和测试训练
    完整模型训练代码,如下所示:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision import transforms 
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt 
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" # Solve the OMP: Error #15

train_dataset = datasets.FashionMNIST(root='data',train=True, download=True, transform=ToTensor())
test_dataset = datasets.FashionMNIST(root='data',train=False, download=True, transform=ToTensor())

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader  = DataLoader(test_dataset, batch_size=64)

train_batch_data, train_batch_labels = next(iter(train_dataloader))
print(f"train_batch_data shape: {train_batch_data.size()}")
print(f"train_batch_labels shape: {train_batch_labels.size()}")
print(train_batch_labels[0])

import torch.nn as nn
import torch.nn.functional as F
class MyCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,6,3)     # 28->26
        self.maxpool = nn.MaxPool2d(2,2)  # 26->13
        self.conv2 = nn.Conv2d(6,16,3)    # 13->11
        self.fc1 = nn.Linear(16*11*11,128)
        self.fc2 = nn.Linear(128,10)
    
    def forward(self, x):
        x = self.maxpool(F.relu(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = torch.flatten(x, 1) # start dim=0是batch size, 所以从1开始flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.fc2(x)         # 分类输出不需要激活
        return x 

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = MyCNN().to(device)  # 将模型载入GPU

learning_rate = 1e-3
batch_size = 64
epochs = 5

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for steps, (X, y) in enumerate(dataloader):
        # 前向计算
        X, y = X.to(device), y.to(device) # 将数据载入GPU
        pred = model(X)
        # 计算损失
        loss = loss_fn(pred, y)
        # 反向传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if steps % 100 == 0:
            loss, current = loss.item(), steps*len(X)
            print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0.0, 0.0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device) # 将数据载入GPU
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

运行结果如下:

Epoch 5
loss:0.298207 [32000/60000]
loss:0.296418 [38400/60000]
loss:0.390596 [44800/60000]
loss:0.444690 [51200/60000]
loss:0.282875 [57600/60000]
Test Error:
Accuracy: 89.9%, Avg loss: 0.280973
Done!

TensorFlow

  • 需要添加用于测量预测值于真实值之间差异的损失函数
  • 更新模型参数的优化器
  • 定义训练循环和测试训练
    完整模型训练代码,如下所示:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

inputs = tf.random.normal([64,28,28,1]) #The Conv2D op currently only supports the NHWC tensor format on the CPU
(training_data, training_labels) , (test_data, test_labels)= tf.keras.datasets.fashion_mnist.load_data()

train_dataset = tf.data.Dataset.from_tensor_slices(
    (tf.cast(training_data[...,tf.newaxis]/255, tf.float32),
    tf.cast(training_labels,tf.int64)))
train_dataset = train_dataset.shuffle(1000).batch(64)

test_dataset = tf.data.Dataset.from_tensor_slices(
    (tf.cast(test_data[...,tf.newaxis]/255, tf.float32),
    tf.cast(test_labels,tf.int64)))
test_dataset = test_dataset.batch(64)

class MyCNN(tf.keras.Model):
    def __init__(self,num_classes=10):
        super().__init__()
        self.conv1 = layers.Conv2D(filters=6, kernel_size=3, activation='relu')     # 28->26
        self.maxpool = layers.MaxPool2D(pool_size=(2,2))                            # 26->13
        self.conv2 = layers.Conv2D(filters=16, kernel_size=3, activation='relu')    # 13->11
        self.flatten = layers.Flatten()
        self.fc1 = layers.Dense(128,activation='relu')
        self.fc2 = layers.Dense(num_classes)
    
    def call(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.flatten(x)
        x = self.fc1(x)         
        x = self.fc2(x) # 分类输出不需要激活
        return x 

model = MyCNN()
# https://stackoverflow.com/questions/64681232/why-is-it-that-input-shape-does-not-include-the-batch-dimension-when-passed-as
# build() https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#build

model.build(input_shape=[64,28,28,1])
model.summary()
logits = model(inputs)
print(f"logits'shape:{logits.shape}")


learning_rate = 1e-3
batch_size = 64
epochs = 5
# SparseCategoricalCrossentropy expect labels to be provided as integers
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

# complile the model with loss_fn and optimizer
model.compile(optimizer=optimizer, loss=loss_fn, metrics='sparse_categorical_accuracy')

# Fit the model on training dataset
model.fit(train_dataset, epochs=epochs, validation_data=test_dataset)

运行结果:

logits'shape:(64, 10)
Epoch 1/5
2021-11-27 14:54:20.880112: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
938/938 [==============================] - 4s 3ms/step - loss: 0.4822 - sparse_categorical_accuracy: 0.8286 - val_loss: 0.3760 - val_sparse_categorical_accuracy: 0.8671
Epoch 2/5
938/938 [==============================] - 3s 3ms/step - loss: 0.3298 - sparse_categorical_accuracy: 0.8818 - val_loss: 0.3379 - val_sparse_categorical_accuracy: 0.8804
Epoch 3/5
938/938 [==============================] - 3s 4ms/step - loss: 0.2827 - sparse_categorical_accuracy: 0.8970 - val_loss: 0.3230 - val_sparse_categorical_accuracy: 0.8828
Epoch 4/5
938/938 [==============================] - 3s 3ms/step - loss: 0.2498 - sparse_categorical_accuracy: 0.9086 - val_loss: 0.2869 - val_sparse_categorical_accuracy: 0.8951
Epoch 5/5
938/938 [==============================] - 3s 4ms/step - loss: 0.2243 - sparse_categorical_accuracy: 0.9175 - val_loss: 0.2784 - val_sparse_categorical_accuracy: 0.8977

结论:不管用PyTorch还是TensorFlow,构建模型,训练模型的思路是一致的,差别在于API函数,为了不至于搞混,可以一边编写一边参考各自的API手册。

相关文章

网友评论

      本文标题:TensorFlow vs PyTorch 8: 训练模型

      本文链接:https://www.haomeiwen.com/subject/incpzltx.html