import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms as T
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_ids = [0,1]
torch.backends.cudnn.benchmark = True
# 自定义的model
model = MyModel()
# 并行运算,如果需要的话
model = nn.DataParallel(model, device_ids=device_ids).to(device)
# loss function, 比如交叉熵
criterion = nn.CrossEntropyLoss()
criterion.to(device)
# optimizer,比如SGD
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
# 调整学习率
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1)
# 准备数据
transform = T.Compose([xxxxxx]) #自定义的转换
dataset = xxxxxx #定义的数据集
dataloader = torch.utils.data.DataLoader(dataset, batch_size=xxxx, shuffle=True)
# 训练num_epoches
def train(num_epoches):
num = 20 #隔多少个batch打印一次
for epoch in range(num_epoches):
torch.cuda.empty_cache()
print('Epoch: %d' % epoch)
train_loss = 0
model.train()
for batch_idx, (inputs, targets) in enumerate(dataloader):
# model.zero_grad()
# optimizer.zero_grad()#当optimizer=optim.Optimizer(model.parameters())时,两者等效
# 梯度清零
optimizer.zero_grad()
inputs, targets = inputs.to(device), targets.to(device)
outputs = model.forward(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
train_loss += loss.item()
if (batch_idx + 1) % num == 0:
print(batch_idx + 1, len(dataloader), 'Loss: %.3f' % (train_loss / num))
train_loss = 0
几个之前没用过的几个地方
- 在代码中设置使用的GPU,即上述代码中
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
CUDA_VISIBLE_DEVICES=1 Only device 1 will be seen
CUDA_VISIBLE_DEVICES=0,1 Devices 0 and 1 will be visible
CUDA_VISIBLE_DEVICES="0,1" Same as above, quotation marks are optional
CUDA_VISIBLE_DEVICES=0,2,3 Devices 0, 2, 3 will be visible; device 1 is masked
CUDA_VISIBLE_DEVICES="" No GPU will be visible
参考:http://www.cnblogs.com/darkknightzh/p/6591923.html
网友评论