美文网首页
TensorFlow vs PyTorch 8: 训练模型

TensorFlow vs PyTorch 8: 训练模型

作者: LabVIEW_Python | 来源:发表于2021-11-08 19:23 被阅读0次

    《TensorFlow vs PyTorch 7: 创建模型》后,就该训练模型了。

    PyTorch

    • 需要添加用于测量预测值于真实值之间差异的损失函数
    • 更新模型参数的优化器
    • 定义训练循环和测试训练
      完整模型训练代码,如下所示:
    import torch
    from torch.utils.data import Dataset, DataLoader
    from torchvision import datasets
    from torchvision import transforms 
    from torchvision.transforms import ToTensor
    import matplotlib.pyplot as plt 
    import os
    os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" # Solve the OMP: Error #15
    
    train_dataset = datasets.FashionMNIST(root='data',train=True, download=True, transform=ToTensor())
    test_dataset = datasets.FashionMNIST(root='data',train=False, download=True, transform=ToTensor())
    
    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_dataloader  = DataLoader(test_dataset, batch_size=64)
    
    train_batch_data, train_batch_labels = next(iter(train_dataloader))
    print(f"train_batch_data shape: {train_batch_data.size()}")
    print(f"train_batch_labels shape: {train_batch_labels.size()}")
    print(train_batch_labels[0])
    
    import torch.nn as nn
    import torch.nn.functional as F
    class MyCNN(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Conv2d(1,6,3)     # 28->26
            self.maxpool = nn.MaxPool2d(2,2)  # 26->13
            self.conv2 = nn.Conv2d(6,16,3)    # 13->11
            self.fc1 = nn.Linear(16*11*11,128)
            self.fc2 = nn.Linear(128,10)
        
        def forward(self, x):
            x = self.maxpool(F.relu(self.conv1(x)))
            x = F.relu(self.conv2(x))
            x = torch.flatten(x, 1) # start dim=0是batch size, 所以从1开始flatten all dimensions except batch
            x = F.relu(self.fc1(x))
            x = self.fc2(x)         # 分类输出不需要激活
            return x 
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = MyCNN().to(device)  # 将模型载入GPU
    
    learning_rate = 1e-3
    batch_size = 64
    epochs = 5
    
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    def train_loop(dataloader, model, loss_fn, optimizer):
        size = len(dataloader.dataset)
        for steps, (X, y) in enumerate(dataloader):
            # 前向计算
            X, y = X.to(device), y.to(device) # 将数据载入GPU
            pred = model(X)
            # 计算损失
            loss = loss_fn(pred, y)
            # 反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            if steps % 100 == 0:
                loss, current = loss.item(), steps*len(X)
                print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")
    
    def test_loop(dataloader, model, loss_fn):
        size = len(dataloader.dataset)
        num_batches = len(dataloader)
        test_loss, correct = 0.0, 0.0
    
        with torch.no_grad():
            for X, y in dataloader:
                X, y = X.to(device), y.to(device) # 将数据载入GPU
                pred = model(X)
                test_loss += loss_fn(pred, y).item()
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            test_loss /= num_batches
            correct /= size
            print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train_loop(train_dataloader, model, loss_fn, optimizer)
        test_loop(test_dataloader, model, loss_fn)
    print("Done!")
    

    运行结果如下:

    Epoch 5
    loss:0.298207 [32000/60000]
    loss:0.296418 [38400/60000]
    loss:0.390596 [44800/60000]
    loss:0.444690 [51200/60000]
    loss:0.282875 [57600/60000]
    Test Error:
    Accuracy: 89.9%, Avg loss: 0.280973
    Done!

    TensorFlow

    • 需要添加用于测量预测值于真实值之间差异的损失函数
    • 更新模型参数的优化器
    • 定义训练循环和测试训练
      完整模型训练代码,如下所示:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    
    inputs = tf.random.normal([64,28,28,1]) #The Conv2D op currently only supports the NHWC tensor format on the CPU
    (training_data, training_labels) , (test_data, test_labels)= tf.keras.datasets.fashion_mnist.load_data()
    
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(training_data[...,tf.newaxis]/255, tf.float32),
        tf.cast(training_labels,tf.int64)))
    train_dataset = train_dataset.shuffle(1000).batch(64)
    
    test_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(test_data[...,tf.newaxis]/255, tf.float32),
        tf.cast(test_labels,tf.int64)))
    test_dataset = test_dataset.batch(64)
    
    class MyCNN(tf.keras.Model):
        def __init__(self,num_classes=10):
            super().__init__()
            self.conv1 = layers.Conv2D(filters=6, kernel_size=3, activation='relu')     # 28->26
            self.maxpool = layers.MaxPool2D(pool_size=(2,2))                            # 26->13
            self.conv2 = layers.Conv2D(filters=16, kernel_size=3, activation='relu')    # 13->11
            self.flatten = layers.Flatten()
            self.fc1 = layers.Dense(128,activation='relu')
            self.fc2 = layers.Dense(num_classes)
        
        def call(self, x):
            x = self.conv1(x)
            x = self.maxpool(x)
            x = self.conv2(x)
            x = self.flatten(x)
            x = self.fc1(x)         
            x = self.fc2(x) # 分类输出不需要激活
            return x 
    
    model = MyCNN()
    # https://stackoverflow.com/questions/64681232/why-is-it-that-input-shape-does-not-include-the-batch-dimension-when-passed-as
    # build() https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer#build
    
    model.build(input_shape=[64,28,28,1])
    model.summary()
    logits = model(inputs)
    print(f"logits'shape:{logits.shape}")
    
    
    learning_rate = 1e-3
    batch_size = 64
    epochs = 5
    # SparseCategoricalCrossentropy expect labels to be provided as integers
    loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    
    # complile the model with loss_fn and optimizer
    model.compile(optimizer=optimizer, loss=loss_fn, metrics='sparse_categorical_accuracy')
    
    # Fit the model on training dataset
    model.fit(train_dataset, epochs=epochs, validation_data=test_dataset)
    

    运行结果:

    logits'shape:(64, 10)
    Epoch 1/5
    2021-11-27 14:54:20.880112: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
    938/938 [==============================] - 4s 3ms/step - loss: 0.4822 - sparse_categorical_accuracy: 0.8286 - val_loss: 0.3760 - val_sparse_categorical_accuracy: 0.8671
    Epoch 2/5
    938/938 [==============================] - 3s 3ms/step - loss: 0.3298 - sparse_categorical_accuracy: 0.8818 - val_loss: 0.3379 - val_sparse_categorical_accuracy: 0.8804
    Epoch 3/5
    938/938 [==============================] - 3s 4ms/step - loss: 0.2827 - sparse_categorical_accuracy: 0.8970 - val_loss: 0.3230 - val_sparse_categorical_accuracy: 0.8828
    Epoch 4/5
    938/938 [==============================] - 3s 3ms/step - loss: 0.2498 - sparse_categorical_accuracy: 0.9086 - val_loss: 0.2869 - val_sparse_categorical_accuracy: 0.8951
    Epoch 5/5
    938/938 [==============================] - 3s 4ms/step - loss: 0.2243 - sparse_categorical_accuracy: 0.9175 - val_loss: 0.2784 - val_sparse_categorical_accuracy: 0.8977

    结论:不管用PyTorch还是TensorFlow,构建模型,训练模型的思路是一致的,差别在于API函数,为了不至于搞混,可以一边编写一边参考各自的API手册。

    相关文章

      网友评论

          本文标题:TensorFlow vs PyTorch 8: 训练模型

          本文链接:https://www.haomeiwen.com/subject/incpzltx.html