1、交叉熵损失
1.1 torch.nn.CrossEntropyLoss
注意的坑:torch.nn.CrossEntropyLoss的输入是模型的原始输出,不经过softmax!
(1)计算公式
计算N个标签的损失
将N个标签的损失求和或平均(默认是平均)
(2)对应pytorch的输出
1)模型原始预测输出 ,shape(n_samples,n_class), dtype(torch.float)
2)真实标签,shape=(n_samples), dtype(torch.long)
(3)例子1
>>> loss = nn.CrossEntropyLoss()
>>> input = torch.randn(3, 5, requires_grad=True) #模型原始预测输出,浮点数
>>> target = torch.empty(3, dtype=torch.long).random_(5)#真实标签,整数
>>> output = loss(input, target)
>>> output.backward()
In [30]: input # 3个样本,
Out[30]:
tensor([[-1.3361, -0.1120, 0.9913, -0.1192, 0.3797],
[ 1.7727, -1.7945, 0.4141, 0.1564, 0.9800],
[ 0.1120, 0.1938, 0.4124, 1.8057, 1.5692]], requires_grad=True)
In [32]: target #3个样本的标签
Out[32]: tensor([2, 4, 4])
(4)例子2
load data
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']
# Scale data to have mean 0 and variance 1
# which is importance for convergence of the neural network
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data set into training and testing
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=2)
build model
import torch
import torch.nn as nn
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 50)
self.layer2 = nn.Linear(50, 50)
self.layer3 = nn.Linear(50, 3)
def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
#x = F.softmax(self.layer3(x), dim=1)
x = self.layer3(x) #
return x
model = Model(X_train.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
train model
from torch.autograd import Variable
import tqdm
import numpy as np
import torch.nn.functional as F
EPOCHS = 100
X_train = Variable(torch.from_numpy(X_train)).float()
y_train = Variable(torch.from_numpy(y_train)).long()
X_test = Variable(torch.from_numpy(X_test)).float()
y_test = Variable(torch.from_numpy(y_test)).long()
loss_list = np.zeros((EPOCHS,))
accuracy_list = np.zeros((EPOCHS,))
for epoch in tqdm.trange(EPOCHS):
y_pred = model(X_train)
loss = loss_fn(y_pred, y_train)
loss_list[epoch] = loss.item()
# Zero gradients
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
y_pred = model(X_test)
correct = (torch.argmax(y_pred, dim=1) == y_test).type(torch.FloatTensor)
accuracy_list[epoch] = correct.mean()
plot train accuracy and validation accuracy
import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(2, figsize=(12, 6), sharex=True)
ax1.plot(accuracy_list)
ax1.set_ylabel("validation accuracy")
ax2.plot(loss_list)
ax2.set_ylabel("validation loss")
ax2.set_xlabel("epochs");
plt.show()
plot roc curve
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import OneHotEncoder
plt.figure(figsize=(10, 10))
plt.plot([0, 1], [0, 1], 'k--')
# One hot encoding
enc = OneHotEncoder()
Y_onehot = enc.fit_transform(y_test[:, np.newaxis]).toarray()
with torch.no_grad():
y_pred = model(X_test).numpy()
fpr, tpr, threshold = roc_curve(Y_onehot.ravel(), y_pred.ravel())
plt.plot(fpr, tpr, label='AUC = {:.3f}'.format(auc(fpr, tpr)))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend();
网友评论