美文网首页
机器学习与深度学习在图像处理上的小试牛刀

机器学习与深度学习在图像处理上的小试牛刀

作者: 原上的小木屋 | 来源:发表于2020-06-14 21:50 被阅读0次
  • 我的最终目标是创建一个能够判断图像是否是蝾螈的脸的判别器。因此,我们需要蝾螈的脸部图像和非蝾螈脸部的图像。我们需要编写程序来准备这样的图像。
  • 为此,有必要从单个图像中用矩形框出蝾螈头部(即Ground-truth),如果随机切割的矩形与Ground-truth在一定程度上重合,那么这个矩形框处就是蝾螈的头。
  • 重合程度通过检测评价函数IoU(Intersection over Union)来判断。通过下式进行计算:IoU=\frac{|RoI|}{|R_1+R_2-RoI|},其中
  1. R1Ground-truth的范围;
  2. R2为随机框出来的矩形的范围;
  3. RolR1R2重合的范围。
  • 下面代码为计算两个矩形的IoU吧。[x1, y1, x2, y2] x1,y1...矩形左上的坐标 x2,y2...矩形右下的坐标
import numpy as np#导入numpy包
# get IoU overlap ratio获取到重合程度
def iou(a, b):
    # get area of a
    area_a = (a[2] - a[0]) * (a[3] - a[1])#矩形a的范围
    # get area of b
    area_b = (b[2] - b[0]) * (b[3] - b[1])#矩形b的范围
    # get left top x of IoU
    iou_x1 = np.maximum(a[0], b[0])#计算iou_x1,iou_y1,iou_x2,iou_y2是为了锁定重合的范围
    # get left top y of IoU
    iou_y1 = np.maximum(a[1], b[1])
    # get right bottom of IoU
    iou_x2 = np.minimum(a[2], b[2])
    # get right bottom of IoU
    iou_y2 = np.minimum(a[3], b[3])
    # get width of IoU
    iou_w = iou_x2 - iou_x1#计算重合区域的宽度
    # get height of IoU
    iou_h = iou_y2 - iou_y1#计算重合区域的高度
    # get area of IoU
    area_iou = iou_w * iou_h#计算重合区域的面积
    # get overlap ratio between IoU and all area
    iou = area_iou / (area_a + area_b - area_iou)#计算出重合程度
    return iou#返回重合程度
# [x1, y1, x2, y2]
a = np.array((50, 50, 150, 150), dtype=np.float32)
b = np.array((60, 60, 170, 160), dtype=np.float32)
print(iou(a, b))

第二步,随机裁切图像作为训练数据

  • 这里,我们从图像中随机裁切出200个60*60的矩形,使其满足以下条件
  1. 使用np.random.seed(0),求出裁剪的矩形的左上角座标x1 = np.random.randint(W-60)y1=np.random.randint(H-60)
  2. 如果和 Ground-truth(gt = np.array((47, 41, 129, 103), dtype=np.float32))IoU大于0.5,那么就打上标注1,小于0.5就打上标注0。
  3. 标注1的矩形用红色画出,标注0的矩形用蓝色的线画出,Ground-truth用绿色的线画出
import cv2
import numpy as np#导入opencv库和numpy库
np.random.seed(0)
# get IoU overlap ratio
def iou(a, b):#与上面的函数相同,参数为两个矩形的起始坐标和终止坐标,输出为两个矩形的重合程度
    # get area of a
    area_a = (a[2] - a[0]) * (a[3] - a[1])
    # get area of b
    area_b = (b[2] - b[0]) * (b[3] - b[1])
    # get left top x of IoU
    iou_x1 = np.maximum(a[0], b[0])
    # get left top y of IoU
    iou_y1 = np.maximum(a[1], b[1])
    # get right bottom of IoU
    iou_x2 = np.minimum(a[2], b[2])
    # get right bottom of IoU
    iou_y2 = np.minimum(a[3], b[3])
    # get width of IoU
    iou_w = iou_x2 - iou_x1
    # get height of IoU
    iou_h = iou_y2 - iou_y1
    # get area of IoU
    area_iou = iou_w * iou_h
    # get overlap ratio between IoU and all area
    iou = area_iou / (area_a + area_b - area_iou)
    return iou
# crop and create database
def crop_bbox(img, gt, Crop_N=200, L=60, th=0.5):#参数为图像,矩形数量,矩形尺寸,阈值
    # get shape
    H, W, C = img.shape#获取图像尺寸
    # each crop#对于每一个矩形
    for i in range(Crop_N):
        # get left top x of crop bounding box
        x1 = np.random.randint(W - L)#随机给定初始坐标
        # get left top y of crop bounding box
        y1 = np.random.randint(H - L)#随机给定初始坐标
        # get right bottom x of crop bounding box
        x2 = x1 + L#通过初始坐标计算出终止坐标
        # get right bottom y of crop bounding box
        y2 = y1 + L#通过初始坐标计算出终止坐标
        # crop bounding box获取矩形参数
        crop = np.array((x1, y1, x2, y2))
        # get IoU between crop box and gt
        _iou = iou(gt, crop)#计算出与给定矩形的重合系数
        # assign label
        if _iou >= th:#如果系数大于阈值,打标为1,画上红线,否则打标为0,画上蓝线
            cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 1)
            label = 1
        else:
            cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 1)
            label = 0
    return img
# read image
img = cv2.imread("123.jpg")#给定搜索图像
# gt bounding box
gt = np.array((47, 41, 129, 103), dtype=np.float32)#给定搜索范围
# get crop bounding box
img = crop_bbox(img, gt)#将画上框的各种线输出
# draw gt
cv2.rectangle(img, (gt[0], gt[1]), (gt[2], gt[3]), (0,255,0), 1)#最后使用绿线将初始的给定范围标出
cv2.imwrite("out.jpg", img)#保存
cv2.imshow("result", img)#展示
cv2.waitKey(0)

通过上面这些代码,我们就可以从一张大图中裁切出若干个与指定目标区有一定重合度的图像,可以为我们之后的训练创造更多的数据进行训练

  • 将神经网络作为识别器,这就是现在流行的深度学习

下面的代码是包含输入层、中间层(Unit 数:64)、输出层(1)的网络。这是实现异或逻辑的网络。

import numpy as np
np.random.seed(0)#导入numpy库
# neural network
class NN:#创建一个NN类
    def __init__(self, ind=2, w=64, w2=64, outd=1, lr=0.1):#类的初始化给定ind(输入层),w(中间层),w2(中间层2),outd(输出层)和Ir(学习率)的参数
        # layer 1 weight第一层权重 np.random.normal(size,loc,scale)size均值loc标准差scale尺寸,
        self.w1 = np.random.normal(0, 1, [ind, w])
        # layer 1 bias第一层偏差
        self.b1 = np.random.normal(0, 1, [w])
        # layer 2 weight第二层权重
        self.w2 = np.random.normal(0, 1, [w, w2])
        # layer 2 bias第二层偏差
        self.b2 = np.random.normal(0, 1, [w2])
        # output layer weight输出层权重
        self.wout = np.random.normal(0, 1, [w2, outd])
        # output layer bias输出层偏差
        self.bout = np.random.normal(0, 1, [outd])
        # learning rate学习率
        self.lr = lr
    def forward(self, x):
        # input tensor输入张量
        self.z1 = x
        # layer 1 output tensor第一层输出张量
        self.z2 = sigmoid(np.dot(self.z1, self.w1) + self.b1)#np.dot常规的矩阵乘法
        # layer 2 output tensor第二层输出张量
        self.z3 = sigmoid(np.dot(self.z2, self.w2) + self.b2)#np.dot常规的矩阵乘法
        # output layer tensor输出张量
        self.out = sigmoid(np.dot(self.z3, self.wout) + self.bout)#np.dot常规的矩阵乘法
        return self.out
    def train(self, x, t):#训练函数,主要是误差反向传播函数
        # backpropagation output layer反向传播输出层
        # En = t * np.log(self.out) + (1-t) * np.log(1-self.out)
        En = (self.out - t) * self.out * (1 - self.out)
        # get gradients for weight and bias获取权重和偏移的梯度
        grad_wout = np.dot(self.z3.T, En)
        grad_bout = np.dot(np.ones([En.shape[0]]), En)
        # update weight and bias更新权重和偏移
        self.wout -= self.lr * grad_wout
        self.bout -= self.lr * grad_bout
        # backpropagation inter layer层间反向传播
        # get gradients for weight and bias获取权重和偏移的梯度
        grad_u2 = np.dot(En, self.wout.T) * self.z3 * (1 - self.z3)
        grad_w2 = np.dot(self.z2.T, grad_u2)
        grad_b2 = np.dot(np.ones([grad_u2.shape[0]]), grad_u2)
        # update weight and bias更新权重和偏移
        self.w2 -= self.lr * grad_w2
        self.b2 -= self.lr * grad_b2
        # get gradients for weight and bias获取权重和偏移的梯度
        grad_u1 = np.dot(grad_u2, self.w2.T) * self.z2 * (1 - self.z2)
        grad_w1 = np.dot(self.z1.T, grad_u1)
        grad_b1 = np.dot(np.ones([grad_u1.shape[0]]), grad_u1)
        # update weight and bias更新权重和偏移
        self.w1 -= self.lr * grad_w1
        self.b1 -= self.lr * grad_b1
# sigmoid sigmoid函数
def sigmoid(x):
    return 1. / (1. + np.exp(-x))
# train训练函数
def train_nn(nn, train_x, train_t, iteration_N=5000):#将nn对象,train_x,train_t和迭代次数作为参数传入训练函数
    for i in range(5000):
        # feed-forward data前馈数据
        nn.forward(train_x)
        # print("ite>>", i, 'y >>', nn.forward(train_x))
        # update parameters
        nn.train(train_x, train_t)
    return nn
# test
def test_nn(nn, test_x, test_t):#测试函数
    for j in range(len(test_x)):
        x = train_x[j]
        t = train_t[j]
        print("in:", x, "pred:", nn.forward(x))
# train data训练数据
train_x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
# train label data训练数据标签
train_t = np.array([[0], [1], [1], [0]], dtype=np.float32)
# prepare neural network准备神经网络
nn = NN()
# train#开始训练
nn = train_nn(nn, train_x, train_t, iteration_N=5000)
# test#训练完开始测试
test_nn(nn, train_x, train_t)

还是如上所述,上述代码实现了一个简单的将[0,0][1,1]达标为0,[0,1][1,0]打标为1的网络

使用python创建一个神经网络

class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],4) 
        self.weights2   = np.random.rand(4,1)                 
        self.y          = y
        self.output     = np.zeros(self.y.shape)
    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))
    def backprop(self):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))
        # update the weights with the derivative (slope) of the loss function
        self.weights1 += d_weights1
        self.weights2 += d_weights2
1111.png
主要分为三个截断
  1. 信号的前向传播
  2. 反向计算梯度
  3. 更新权值和阈值
import cv2
import numpy as np#导入两个常用的包
np.random.seed(0)
# get HOG获取方向梯度直方图
def HOG(img):
    # Grayscale
    def BGR2GRAY(img):#转灰度
        gray = 0.2126 * img[..., 2] + 0.7152 * img[..., 1] + 0.0722 * img[..., 0]
        return gray
    # Magnitude and gradient#获取不同方向上的一阶差分
    def get_gradXY(gray):
        H, W = gray.shape
        # padding before grad
        gray = np.pad(gray, (1, 1), 'edge')
        # get grad x
        gx = gray[1:H + 1, 2:] - gray[1:H + 1, :W]
        # get grad y
        gy = gray[2:, 1:W + 1] - gray[:H, 1:W + 1]
        # replace 0 with
        gx[gx == 0] = 1e-6
        return gx, gy
    # get magnitude and gradient#获取梯度幅度和梯度方向
    def get_MagGrad(gx, gy):
        # get gradient maginitude
        magnitude = np.sqrt(gx ** 2 + gy ** 2)
        # get gradient angle
        gradient = np.arctan(gy / gx)
        gradient[gradient < 0] = np.pi / 2 + gradient[gradient < 0] + np.pi / 2
        return magnitude, gradient
    # Gradient histogram梯度直方图,对梯度方向进行量化
    def quantization(gradient):
        # prepare quantization table
        gradient_quantized = np.zeros_like(gradient, dtype=np.int)
        # quantization base
        d = np.pi / 9
        # quantization
        for i in range(9):
            gradient_quantized[np.where((gradient >= d * i) & (gradient <= d * (i + 1)))] = i
        return gradient_quantized
    # get gradient histogram梯度直方图
    def gradient_histogram(gradient_quantized, magnitude, N=8):
        # get shape
        H, W = magnitude.shape
        # get cell num
        cell_N_H = H // N
        cell_N_W = W // N
        histogram = np.zeros((cell_N_H, cell_N_W, 9), dtype=np.float32)
        # each pixel
        for y in range(cell_N_H):
            for x in range(cell_N_W):
                for j in range(N):
                    for i in range(N):
                        histogram[y, x, gradient_quantized[y * 4 + j, x * 4 + i]] += magnitude[y * 4 + j, x * 4 + i]
        return histogram
    # histogram normalization
    def normalization(histogram, C=3, epsilon=1):
        cell_N_H, cell_N_W, _ = histogram.shape
        ## each histogram
        for y in range(cell_N_H):
            for x in range(cell_N_W):
                # for i in range(9):
                histogram[y, x] /= np.sqrt(np.sum(histogram[max(y - 1, 0): min(y + 2, cell_N_H),
                                                  max(x - 1, 0): min(x + 2, cell_N_W)] ** 2) + epsilon)
        return histogram#返回直方图
    # 1. BGR -> Gray
    gray = BGR2GRAY(img)
    # 1. Gray -> Gradient x and y
    gx, gy = get_gradXY(gray)
    # 2. get gradient magnitude and angle
    magnitude, gradient = get_MagGrad(gx, gy)
    # 3. Quantization
    gradient_quantized = quantization(gradient)
    # 4. Gradient histogram
    histogram = gradient_histogram(gradient_quantized, magnitude)
    # 5. Histogram normalization
    histogram = normalization(histogram
    return histogram
# get IoU overlap ratio
def iou(a, b):#计算匹配完整度
    # get area of a
    area_a = (a[2] - a[0]) * (a[3] - a[1])
    # get area of b
    area_b = (b[2] - b[0]) * (b[3] - b[1])
    # get left top x of IoU
    iou_x1 = np.maximum(a[0], b[0])
    # get left top y of IoU
    iou_y1 = np.maximum(a[1], b[1])
    # get right bottom of IoU
    iou_x2 = np.minimum(a[2], b[2])
    # get right bottom of IoU
    iou_y2 = np.minimum(a[3], b[3])
    # get width of IoU
    iou_w = iou_x2 - iou_x1
    # get height of IoU
    iou_h = iou_y2 - iou_y1
    # get area of IoU
    area_iou = iou_w * iou_h
    # get overlap ratio between IoU and all area
    iou = area_iou / (area_a + area_b - area_iou)
    return iou
# resize using bi-linear
def resize(img, h, w):#resize函数
    # get shape
    _h, _w, _c = img.shape
    # get resize ratio
    ah = 1. * h / _h
    aw = 1. * w / _w
    # get index of each y
    y = np.arange(h).repeat(w).reshape(w, -1)
    # get index of each x
    x = np.tile(np.arange(w), (h, 1))
    # get coordinate toward x and y of resized image
    y = (y / ah)
    x = (x / aw)
    # transfer to int
    ix = np.floor(x).astype(np.int32)
    iy = np.floor(y).astype(np.int32)
    # clip index
    ix = np.minimum(ix, _w - 2)
    iy = np.minimum(iy, _h - 2)
    # get distance between original image index and resized image index
    dx = x - ix
    dy = y - iy
    dx = np.tile(dx, [_c, 1, 1]).transpose(1, 2, 0)
    dy = np.tile(dy, [_c, 1, 1]).transpose(1, 2, 0)
    # resize
    out = (1 - dx) * (1 - dy) * img[iy, ix] + dx * (1 - dy) * img[iy, ix + 1] + (1 - dx) * dy * img[
        iy + 1, ix] + dx * dy * img[iy + 1, ix + 1]
    out[out > 255] = 255
    return out
# neural network
class NN:
    def __init__(self, ind=2, w=64, w2=64, outd=1, lr=0.1):
        # layer 1 weight
        self.w1 = np.random.normal(0, 1, [ind, w])
        # layer 1 bias
        self.b1 = np.random.normal(0, 1, [w])
        # layer 2 weight
        self.w2 = np.random.normal(0, 1, [w, w2])
        # layer 2 bias
        self.b2 = np.random.normal(0, 1, [w2])
        # output layer weight
        self.wout = np.random.normal(0, 1, [w2, outd])
        # output layer bias
        self.bout = np.random.normal(0, 1, [outd])
        # learning rate
        self.lr = lr
    def forward(self, x):
        # input tensor
        self.z1 = x
        # layer 1 output tensor
        self.z2 = sigmoid(np.dot(self.z1, self.w1) + self.b1)
        # layer 2 output tensor
        self.z3 = sigmoid(np.dot(self.z2, self.w2) + self.b2)
        # output layer tensor
        self.out = sigmoid(np.dot(self.z3, self.wout) + self.bout)
        return self.out
    def train(self, x, t):
        # backpropagation output layer
        # En = t * np.log(self.out) + (1-t) * np.log(1-self.out)
        En = (self.out - t) * self.out * (1 - self.out)
        # get gradients for weight and bias
        grad_wout = np.dot(self.z3.T, En)
        grad_bout = np.dot(np.ones([En.shape[0]]), En)
        # update weight and bias
        self.wout -= self.lr * grad_wout
        self.bout -= self.lr * grad_bout
        # backpropagation inter layer
        # get gradients for weight and bias
        grad_u2 = np.dot(En, self.wout.T) * self.z3 * (1 - self.z3)
        grad_w2 = np.dot(self.z2.T, grad_u2)
        grad_b2 = np.dot(np.ones([grad_u2.shape[0]]), grad_u2)
        # update weight and bias
        self.w2 -= self.lr * grad_w2
        self.b2 -= self.lr * grad_b2
        # get gradients for weight and bias
        grad_u1 = np.dot(grad_u2, self.w2.T) * self.z2 * (1 - self.z2)
        grad_w1 = np.dot(self.z1.T, grad_u1)
        grad_b1 = np.dot(np.ones([grad_u1.shape[0]]), grad_u1)
        # update weight and bias
        self.w1 -= self.lr * grad_w1
        self.b1 -= self.lr * grad_b1
# sigmoid
def sigmoid(x):
    return 1. / (1. + np.exp(-x))
# train
def train_nn(nn, train_x, train_t, iteration_N=10000):
    # each iteration
    for i in range(iteration_N):
        # feed-forward data
        nn.forward(train_x)
        # update parameter
        nn.train(train_x, train_t)
    return nn
# test
def test_nn(nn, test_x, test_t, pred_th=0.5):
    accuracy_N = 0.
    # each data
    for data, t in zip(test_x, test_t):
        # get prediction
        prob = nn.forward(data)
        # count accuracy
        pred = 1 if prob >= pred_th else 0
        if t == pred:
            accuracy_N += 1
    # get accuracy
    accuracy = accuracy_N / len(db)
    print("Accuracy >> {} ({} / {})".format(accuracy, accuracy_N, len(db)))
# crop bounding box and make dataset
def make_dataset(img, gt, Crop_N=200, L=60, th=0.5, H_size=32):
    # get shape
    H, W, _ = img.shape
    # get HOG feature dimension
    HOG_feature_N = ((H_size // 8) ** 2) * 9
    # prepare database
    db = np.zeros([Crop_N, HOG_feature_N + 1])
    # each crop
    for i in range(Crop_N):
        # get left top x of crop bounding box
        x1 = np.random.randint(W - L)
        # get left top y of crop bounding box
        y1 = np.random.randint(H - L)
        # get right bottom x of crop bounding box
        x2 = x1 + L
        # get right bottom y of crop bounding box
        y2 = y1 + L
        # get bounding box
        crop = np.array((x1, y1, x2, y2))
        _iou = np.zeros((3,))
        _iou[0] = iou(gt, crop)
        # _iou[1] = iou(gt2, crop)
        # _iou[2] = iou(gt3, crop)
        # get label
        if _iou.max() >= th:
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 1)
            label = 1
        else:
            cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 1)
            label = 0
        # crop area
        crop_area = img[y1:y2, x1:x2]
        # resize crop area
        crop_area = resize(crop_area, H_size, H_size)
        # get HOG feature
        _hog = HOG(crop_area)
        # store HOG feature and label
        db[i, :HOG_feature_N] = _hog.ravel()
        db[i, -1] = label
    return db
# Read image
img = cv2.imread("123.jpg").astype(np.float32)
# get HOG
histogram = HOG(img)
# prepare gt bounding box
gt = np.array((47, 41, 129, 103), dtype=np.float32)
# get database
db = make_dataset(img, gt)
# train neural network
# get input feature dimension
input_dim = db.shape[1] - 1
# prepare train data X
train_x = db[:, :input_dim]
# prepare train data t
train_t = db[:, -1][..., None]
# prepare neural network
nn = NN(ind=input_dim, lr=0.01)
# training
nn = train_nn(nn, train_x, train_t, iteration_N=10000)
# test
test_nn(nn, train_x, train_t)

将深度学习应用于物体检测

基本步骤

  1. 从图像左上角开始进行滑动窗口扫描;
  2. 在滑动的过程中,会依次圈出很多矩形区域;
  3. 裁剪出每个矩形区域对应的图像,并对裁剪出的图像提取特征(HOG,SIFT等);
  4. 使用分类器(CNN,SVM等)以确定每个矩形是否包含目标。

相关文章

网友评论

      本文标题:机器学习与深度学习在图像处理上的小试牛刀

      本文链接:https://www.haomeiwen.com/subject/rmewtktx.html