美文网首页
pytorch 交叉熵损失教程(1)-torch.nn.Cros

pytorch 交叉熵损失教程(1)-torch.nn.Cros

作者: 纵春水东流 | 来源:发表于2022-06-15 11:39 被阅读0次

    1、交叉熵损失
    1.1 torch.nn.CrossEntropyLoss
    注意的坑:torch.nn.CrossEntropyLoss的输入是模型的原始输出,不经过softmax!
    (1)计算公式
    计算N个标签的损失
    \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}
    将N个标签的损失求和或平均(默认是平均)
    \ell(x, y) = \begin{cases} \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore\_index}\}} l_n, & \text{if reduction} = \text{`mean';}\\ \sum_{n=1}^N l_n, & \text{if reduction} = \text{`sum'.} \end{cases}

    (2)对应pytorch的输出
    1)模型原始预测输出 ,shape(n_samples,n_class), dtype(torch.float)
    2)真实标签,shape=(n_samples), dtype(torch.long)

    (3)例子1

        >>> loss = nn.CrossEntropyLoss()
        >>> input = torch.randn(3, 5, requires_grad=True) #模型原始预测输出,浮点数
        >>> target = torch.empty(3, dtype=torch.long).random_(5)#真实标签,整数
        >>> output = loss(input, target)
        >>> output.backward()
    
    In [30]: input # 3个样本,
    Out[30]: 
    tensor([[-1.3361, -0.1120,  0.9913, -0.1192,  0.3797],
            [ 1.7727, -1.7945,  0.4141,  0.1564,  0.9800],
            [ 0.1120,  0.1938,  0.4124,  1.8057,  1.5692]], requires_grad=True)
    
    In [32]: target #3个样本的标签
    Out[32]: tensor([2, 4, 4])
    

    (4)例子2
    load data

    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    
    iris = load_iris()
    X = iris['data']
    y = iris['target']
    names = iris['target_names']
    feature_names = iris['feature_names']
    
    # Scale data to have mean 0 and variance 1 
    # which is importance for convergence of the neural network
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Split the data set into training and testing
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=2)
    

    build model

    import torch
    import torch.nn as nn
    class Model(nn.Module):
        def __init__(self, input_dim):
            super(Model, self).__init__()
            self.layer1 = nn.Linear(input_dim, 50)
            self.layer2 = nn.Linear(50, 50)
            self.layer3 = nn.Linear(50, 3)
            
        def forward(self, x):
            x = F.relu(self.layer1(x))
            x = F.relu(self.layer2(x))
            #x = F.softmax(self.layer3(x), dim=1)
            x = self.layer3(x) #
            return x
    
    model     = Model(X_train.shape[1])
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn   = nn.CrossEntropyLoss()
    

    train model

    from torch.autograd import Variable
    import tqdm
    import numpy as np
    import torch.nn.functional as F
    
    EPOCHS  = 100
    X_train = Variable(torch.from_numpy(X_train)).float()
    y_train = Variable(torch.from_numpy(y_train)).long()
    X_test  = Variable(torch.from_numpy(X_test)).float()
    y_test  = Variable(torch.from_numpy(y_test)).long()
    
    loss_list     = np.zeros((EPOCHS,))
    accuracy_list = np.zeros((EPOCHS,))
    
    for epoch in tqdm.trange(EPOCHS):
        y_pred = model(X_train)
        loss = loss_fn(y_pred, y_train)
        loss_list[epoch] = loss.item()
        
        # Zero gradients
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            y_pred = model(X_test)
            correct = (torch.argmax(y_pred, dim=1) == y_test).type(torch.FloatTensor)
            accuracy_list[epoch] = correct.mean()
    

    plot train accuracy and validation accuracy

     import matplotlib.pyplot as plt
    fig, (ax1, ax2) = plt.subplots(2, figsize=(12, 6), sharex=True)
    
    ax1.plot(accuracy_list)
    ax1.set_ylabel("validation accuracy")
    ax2.plot(loss_list)
    ax2.set_ylabel("validation loss")
    ax2.set_xlabel("epochs");
    plt.show()
    

    plot roc curve

    from sklearn.metrics import roc_curve, auc
    from sklearn.preprocessing import OneHotEncoder
    
    plt.figure(figsize=(10, 10))
    plt.plot([0, 1], [0, 1], 'k--')
    
    # One hot encoding
    enc = OneHotEncoder()
    Y_onehot = enc.fit_transform(y_test[:, np.newaxis]).toarray()
    
    with torch.no_grad():
        y_pred = model(X_test).numpy()
        fpr, tpr, threshold = roc_curve(Y_onehot.ravel(), y_pred.ravel())
        
    plt.plot(fpr, tpr, label='AUC = {:.3f}'.format(auc(fpr, tpr)))
    plt.xlabel('False positive rate')
    plt.ylabel('True positive rate')
    plt.title('ROC curve')
    plt.legend();
    

    相关文章

      网友评论

          本文标题:pytorch 交叉熵损失教程(1)-torch.nn.Cros

          本文链接:https://www.haomeiwen.com/subject/qlcxvrtx.html