最近, 百度AI Studio 推出了一项关于图像分割的课程,一个时常七天的短平快图像分割专项课程,详细介绍了几种经典的图像分割网络,FCN8s,U-Net,PSPNet和DeepLab系列,包括飞桨平台的数据处理,模型搭建,模型训练与预测等相关技能,本篇文章,会对一个礼拜以来的课程做一个整体性回顾。
利用飞桨构建一个数据加载器(dataloader)
飞桨提供了丰富的API用以模型训练前的数据加载,通过构造一个dataloader,生成相应的训练数据和标签作为模型的输入,如下代码所示,dataloader类可以根据自己的要求实现自定义的一些数据处理流程。
class BasicDataLoader(object):
def __init__(self,
image_folder,
image_list_file,
transform=Transform(256),
shuffle=True
):
self.image_folder = image_folder
self.image_list_file = image_list_file
self.transform = Transform(256)
self.shuffle = shuffle
self.data_list = self.read_list()
def read_list(self):
data_list = []
with open(self.image_list_file) as infile:
for line in infile:
#print(line)
image_path = os.path.join(self.image_folder,line.split()[0])
#print(image_path)
label_path = os.path.join(self.image_folder,line.split()[1])
#print(label_path)
data_list.append((image_path,label_path))
#data_list = random.shuffle(data_list)
# print(data_list)
return data_list
def preprocess(self, data, label):
h,w,c = data.shape
h_gt,w_gt = label.shape
assert h==h_gt,"Error"
assert w==w_gt,"Error"
if self.transform:
data,label = self.transform(data,label)
label = label[:,:,np.newaxis]
data = data[:,:,np.newaxis]
return data,label
def __len__(self):
return len(self.data_list)
def __call__(self):
for image,label in self.read_list():
image = cv2.imread(image,cv2.IMREAD_COLOR)
image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
label = cv2.imread(label,cv2.IMREAD_GRAYSCALE)
image,label = self.preprocess(image,label)
yield image,label
通过以下代码,利用paddle生成dataloader,并且可以实现自定义batch_size的大小。
def main():
batch_size = 5
place = fluid.CPUPlace()
with fluid.dygraph.guard(place):
# TODO: craete BasicDataloder instance
basic_dataloader =BasicDataLoader(
image_folder = "./dummy_data",
image_list_file = "./dummy_data/list.txt",
transform = Transform(256),
shuffle=True
)
# image_folder="./dummy_data"
# image_list_file="./dummy_data/list.txt"
# TODO: craete fluid.io.DataLoader instance
dataloader = fluid.io.DataLoader.from_generator(capacity=1,use_multiprocess=False)
# TODO: set sample generator for fluid dataloader
dataloader.set_sample_generator(basic_dataloader,
batch_size=batch_size,
places=place)
num_epoch = 2
for epoch in range(1, num_epoch+1):
print(f'Epoch [{epoch}/{num_epoch}]:')
for idx, (data, label) in enumerate(dataloader):
print(f'Iter {idx}, Data shape: {data.shape}, Label shape: {label.shape}')
if __name__ == "__main__":
main()
数据增强实现
实际的图像分割中,需要用到一些数据增强的功能,也就是Transform实现,基于飞桨的Transform实现代码如下所示:
import cv2
import numpy as np
import paddle.fluid as fluid
import os
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image):
for t in self.transforms:
image = t(image)
return image
class Normalize(object):
def __init__(self, mean_val, std_val, val_scale=1):
# set val_scale = 1 if mean and std are in range (0,1)
# set val_scale to other value, if mean and std are in range (0,255)
self.mean = np.array(mean_val, dtype=np.float32)
self.std = np.array(std_val, dtype=np.float32)
self.val_scale = 1/255.0 if val_scale==1 else 1
def __call__(self, image, label=None):
image = image.astype(np.float32)
image = image * self.val_scale
image = image - self.mean
image = image * (1 / self.std)
return image, label
class ConvertDataType(object):
def __call__(self, image, label=None):
if label is not None:
label = label.astype(np.int64)
return image.astype(np.float32), label
class Pad(object):
def __init__(self, size, ignore_label=255, mean_val=0, val_scale=1):
# set val_scale to 1 if mean_val is in range (0, 1)
# set val_scale to 255 if mean_val is in range (0, 255)
factor = 255 if val_scale == 1 else 1
self.size = size
self.ignore_label = ignore_label
self.mean_val=mean_val
# from 0-1 to 0-255
if isinstance(self.mean_val, (tuple,list)):
self.mean_val = [int(x* factor) for x in self.mean_val]
else:
self.mean_val = int(self.mean_val * factor)
def __call__(self, image, label=None):
h, w, c = image.shape
pad_h = max(self.size - h, 0)
pad_w = max(self.size - w, 0)
pad_h_half = int(pad_h / 2)
pad_w_half = int(pad_w / 2)
if pad_h > 0 or pad_w > 0:
image = cv2.copyMakeBorder(image,
top=pad_h_half,
left=pad_w_half,
bottom=pad_h - pad_h_half,
right=pad_w - pad_w_half,
borderType=cv2.BORDER_CONSTANT,
value=self.mean_val)
return image
# TODO
class CenterCrop(object):
def __call__(self,data):
#self.data = np.array(self,data)
w,h,_ = data.shape
target_w = int(w/2)
target_h = int(h/2)
start_x = int((w-target_w)//2)
start_y = int((h-target_h)//2)
crop_data = self.data[start_y:start_y+target_h,start_x:start_x+target_w,:]
crop_data = cv2.resize(crop_data ,(w,h))
return crop_data
# TODO
class Resize(object):
def __init__(self,h,w):
self.w = w
self.h = h
def __call__(self,data):
#self.data = np.array(self.data)
resized_data = cv2.resize(data,(self.w,self.h))
#print(type(resized_data))
return resized_data
# TODO
class RandomFlip(object):
def __init__(self,rate=0.5,h2v=1):
#self.data = data
self.rate = rate
self.h2v = h2v
def __call__(self,data):
if np.random.random()>self.rate:
# 沿垂直方向翻转
if self.h2v==0:
data = cv2.flip(data,1)
# 沿水平方向翻转
else:
data = cv2.flip(data,0)
return data
# TODO
class RandomCrop(object):
def __init__(self,rate=0.8):
#self.data = data
self.rate = rate
def __call__(self,data):
#print(data)
print(type(data))
w,h,_ = data.shape
target_w = int(w*self.rate)
target_h = int(h*self.rate)
start_x = int((w-target_w)//2)
start_y = int((h-target_h)//2)
zeros = data[start_y:start_y+target_w,start_x:start_x+target_w,:]
zeros = cv2.resize(zeros,(w,h,))
print(zeros)
return zeros
# TODO
class Scale(object):
def __init__(self,target_w,target_h):
#self.data = data
self.target_w = target_w
self.target_h = target_h
def __call__(self,data):
w,h,_ = data.shape
start_x = w-self.target_w
start_y = h-self.target_h
zeros = data[start_y:start_y+self.target_h,start_x:start_x+self.target_w,:]
zeros = cv2.resize(zeros,(w,h))
return zeros
# TODO
class RandomScale(object):
def __init__(self,ratio=0.8,rand=True):
#self.data = data
self.ratio= ratio
self.rand = rand
def __call__(self,data):
w,h,_ = data.shape
target_w = int(w*self.ratio)
target_h = int(h*self.ratio)
if self.rand:
start_x = np.random.randint(0, w - target_w)
start_y = np.random.randint(0, h - target_h)
else:
start_x = ( w - target_w ) // 2
start_y = ( h - target_h ) // 2
zeros = data[start_y:start_y+target_h,start_x:start_x+target_w,:]
zeros = cv2.resize(zeros ,(w,h))
return zeros
自定义实现Transform后,利用paddle实现自定义Transform组合的代码如下所示:
# Transform 测试
def main():
image = cv2.imread('./dummy_data/JPEGImages/2008_000064.jpg')
label = cv2.imread('./dummy_data/GroundTruth_trainval_png/2008_000064.png')
print(image.shape)
# TODO: crop_size
#crop_size = (256,256)
# TODO: Transform: RandomSacle, RandomFlip, Pad, RandomCrop
augment = Compose([
Resize(256,256),
RandomScale(),
RandomFlip(),
Pad(256),
RandomCrop()])
new_image = augment(image)
cv2.imwrite('aug_image{}.png'.format(i),new_image)
if __name__ == "__main__":
main()
模型构建
飞桨提供了基于动态图模式的模型构建方式,即通过类的初始化方式定义模型如卷积,池化的一些基本操作,而通过forward
方式,实现模型的前向传播。一个典型的U-Net网络的模型搭建代码如下图所示:
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import to_variable
from paddle.fluid.dygraph import Layer
from paddle.fluid.dygraph import Conv2D
from paddle.fluid.dygraph import BatchNorm
from paddle.fluid.dygraph import Pool2D
from paddle.fluid.dygraph import Conv2DTranspose
class Encoder(Layer):
def __init__(self, num_channels, num_filters):
super(Encoder, self).__init__()
#TODO: encoder contains:
# 1 3x3conv + 1bn + relu +
# 1 3x3conc + 1bn + relu +
# 1 2x2 pool
# return features before and after pool
self.conv1 = Conv2D(num_channels,
num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn1 = BatchNorm(num_filters,act='relu')
self.conv2 = Conv2D(num_filters,
num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn2 = BatchNorm(num_filters,act='relu')
self.pool = Pool2D(pool_size=2,pool_stride=2,pool_type='max',ceil_mode = True)
def forward(self, inputs):
# TODO: finish inference part
x = self.conv1(inputs)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
x_pooled = self.pool(x)
return x, x_pooled
class Decoder(Layer):
def __init__(self, num_channels, num_filters):
super(Decoder, self).__init__()
# TODO: decoder contains:
# 1 2x2 transpose conv (makes feature map 2x larger)
# 1 3x3 conv + 1bn + 1relu +
# 1 3x3 conv + 1bn + 1relu
self.up = Conv2DTranspose(num_channels = num_channels,
num_filters = num_filters,
filter_size = 2,
stride = 2)
self.conv1 = Conv2D(num_channels,
num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn1 = BatchNorm(num_filters,act='relu')
self.conv2 = Conv2D(num_filters,
num_filters,
filter_size=3,
stride=1,
padding=1)
self.bn2 = BatchNorm(num_filters,act='relu')
def forward(self, inputs_prev, inputs):
# TODO: forward contains an Pad2d and Concat
x = self.up(inputs)
h_diff = (inputs_prev.shape[2] - x.shape[2])
w_diff = (inputs_prev.shape[3] - x.shape[3])
x = fluid.layers.pad2d(x,paddings = [h_diff,h_diff - h_diff//2,w_diff,w_diff - w_diff//2])
x = fluid.layers.concat([inputs_prev,x],axis=1)
x = self.conv1(x)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
#Pad
return x
class UNet(Layer):
def __init__(self, num_classes=59):
super(UNet, self).__init__()
# encoder: 3->64->128->256->512
# mid: 512->1024->1024
#TODO: 4 encoders, 4 decoders, and mid layers contains 2 1x1conv+bn+relu
self.down1 = Encoder(num_channels=3,num_filters=64)
self.down2 = Encoder(num_channels=64,num_filters=128)
self.down3 = Encoder(num_channels=128,num_filters=256)
self.down4 = Encoder(num_channels=256,num_filters=512)
self.mid_conv1 = Conv2D(512,1024,filter_size=1,padding=0,stride=1)
self.mid_bn1 = BatchNorm(1024,'relu')
self.mid_conv2 = Conv2D(1024,1024,filter_size=1,stride = 1,padding=0)
self.mid_bn2 = BatchNorm(1024,'relu')
#这里通道数是encoder的两倍,因为这里进行了特征图通道叠加
self.up4 = Decoder(1024,512)
self.up3 = Decoder(512,256)
self.up2 = Decoder(256,128)
self.up1 = Decoder(128,64)
self.last_conv = Conv2D(num_channels = 64,num_filters = num_classes,filter_size = 1)
def forward(self, inputs):
x1, x = self.down1(inputs)
x2, x = self.down2(x)
x3, x = self.down3(x)
x4, x = self.down4(x)
# middle layers
x = self.mid_conv1(x)
x = self.mid_bn1(x)
x = self.mid_conv2(x)
x = self.mid_bn2(x)
x = self.up4(x4, x)
x = self.up3(x3, x)
x = self.up2(x2, x)
x = self.up1(x1, x)
x = self.last_conv(x)
return x
模型训练
根据dataloader和模型的定义,可以定义模型的训练代码如下所示,此过程,可以自定义优化算法,损失函数和一些基本的参数如:学习率,epoch等。
def train():
# Step 0: preparation
place = paddle.fluid.CUDAPlace(0)
with fluid.dygraph.guard(place):
# Step 1: Define training dataloader
#TODO: create dataloader
train_reader = train_dataloader(args.batch_size)
# # Step 2: Create model
if args.net == 'unet':
model = UNet(num_classes=59)
if args.net == 'pspnet':
model = PSPNet(num_classes=59)
# Step 3: Define criterion and optimizer
criterion = Basic_SegLoss
# create optimizer
opt = AdamOptimizer(learning_rate=args.lr,parameter_list=model.parameters())
# Step 4: Training
for epoch in range(1, args.num_epochs+1):
train_loss = train(train_reader,
model,
criterion,
opt,
epoch)
# print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {args.loss:.4f}")
if epoch % args.save_freq == 0 or epoch == args.num_epochs:
model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")
# TODO: save model and optmizer states
fluid.dygraph.save_dygraph(model.state_dict(),'save_model_state_dict')
fluid.dygraph.save_dygraph(opt.state_dict(),'save_opt_state_dict')
print(f'----- Save model: {model_path}.pdparams')
print(f'----- Save optimizer: {model_path}.pdopt')
- 损失函数定义:
飞桨框架提供了多种图像可用于图像分割的损失函数,Dice Loss,Jaccard Loss等,本次模型训练使用的是softmax交叉熵损失函数,如下代码所示:
def Basic_SegLoss(preds, labels, ignore_index=255):
n, c, h, w = preds.shape
# TODO: create softmax_with_cross_entropy criterion
# scale preds so that the class probas of each sample sum to 1
loss=fluid.layers.softmax_with_cross_entropy(logits=preds,label=labels,axis=1)
# TODO: transpose preds to NxHxWxC
mask = labels!=ignore_index
mask = fluid.layers.cast(mask, 'float32')
# TODO: call criterion and compute loss
loss = loss * mask
avg_loss = fluid.layers.mean(loss) / (fluid.layers.mean(mask) + eps)
return avg_loss
经过以上步骤,已经了解了模型搭建和训练的一些知识,最后,一个完整的模型训基本包括以下几个部分:
模型预测
经过多次训练后,通过飞桨可以保存模型的最佳权重参数,并通过保存好的模型权重参数进行预测,实现图片的分割,具体代码如下所示:
from unet import UNet
import cv2
import os
import numpy as np
import PIL
import paddle
import paddle.fluid as fluid
import matplotlib.pyplot as plt
def colorize(gray, palette):
# gray: numpy array of the label and 1*3N size list palette
color = Image.fromarray(gray.astype(np.uint8)).convert('P')
color.putpalette(palette)
return color
def save_blend_image(image_file, pred_file):
#print(image_file)
#print(pred_file)
o_file = "output.png"
image1 = PIL.Image.open(image_file)
#image2 = PIL.Image.open(pred_file)
image1 = image1.convert('RGBA')
image2 = pred_file.convert('RGBA')
image = Image.blend(image1, image2, 0.5)
o_file = o_file[0:-4] + "_blend.png"
image.save(o_file)
def inference_resize(image_file,size):
return cv2.resize(image_file,size)
def inference_sliding(image,window_size):
cover = window_size//2
w,h,_ = image.shape
segment_slide = []
for i in range(w//cover-1):
for j in range(h//cover-1):
image_zero = image[i*cover:(i+1)*cover,j*cover:(j+1)*cover,:]
segment_slide.append(image_zero)
return segment_slide
def save_images(prediction,save,img_file):
ofile_name = os.path.join(save,os.path.basename(img_file))+'.png'
print(ofile_name)
cv2.imwrite(ofile_name,prediction)
ofile_name_color = ofile_name[0:-4]+"_color.png"
colors = np.loadtxt("./color_files/pascal_context_colors.txt").astype("uint8")
color_image = colorize(prediction,colors)
plt.imshow(color_image)
plt.show()
save_blend_image(ofile_name,color_image)
def img_transform(img_path):
img = cv2.imread(img_path)
img = cv2.resize(img, (512, 512),cv2.INTER_NEAREST)
#plt.imshow(img[:,:,::-1])
#plt.show()
# HWC to CHW
if len(img.shape) == 3:
img = np.transpose(img,(2,0,1))
# 归一化
img = np.expand_dims(img, axis=0).astype('float32')
return img
# this inference code reads a list of image path, and do prediction for each image one by one
def main():
# 0. env preparation
place = paddle.fluid.CPUPlace()
with fluid.dygraph.guard(place):
# 1. create model
model = UNet(num_classes=59)
# 2. load pretrained model
state_dict,_ = fluid.dygraph.load_dygraph('./save_model_state_dict')
# 3. read test image list
img = './2008_000021.jpg'
img = img_transform(img)
model.eval()
img = fluid.dygraph.to_variable(img)
out = model(img)
#print(out.shape)
out = fluid.layers.softmax(img,axis=1)
#print(out.shape)
out = fluid.layers.squeeze(out,axes=[])
#print(out.shape)
out = fluid.layers.transpose(out,(1,2,0))
out = out.numpy()
out = out*255
plt.imshow(out)
plt.show()
#print(out)
save_images(out,'./',"out")
#plt.show(out)
main()
经过简单的训练后,模型的分割结果如下图所示:
完整课程与资料学习请访问:
飞桨图像分割专项课程
网友评论