opencv+tensorflow+cnn实现人脸识别

作者: 少寨主的互联网洞察 | 来源:发表于2018-03-13 16:07 被阅读13759次

    训练“她”让“她”认识我
    学习自@little_wang
    原文地址:

    https://www.jianshu.com/p/38ca6daf6b40

    1.获取我的人脸数据:

    注意以下有一个引用库import cv2
    为顺利找到依赖库函数,需要先安装库:

    pip install opencv-python

    • 使用opencv打开摄像头,获取人脸
    • 对图像做一些预处理,如处理成64*64大小的图片
    • 获取期间,做一些明暗处理,以增加图像的噪声干扰,使得训练出来的模型具备一定的泛化能力
    • 共获取200张照片
    #!/usr/bin/python
    #coding=utf-8
    ''' face detect
    https://github.com/seathiefwang/FaceRecognition-tensorflow
    http://tumumu.cn/2017/05/02/deep-learning-face/
    '''
    # pylint: disable=invalid-name
    import os
    import random
    import numpy as np
    import cv2
    
    def createdir(*args):
        ''' create dir'''
        for item in args:
            if not os.path.exists(item):
                os.makedirs(item)
    
    IMGSIZE = 64
    
    
    def getpaddingSize(shape):
        ''' get size to make image to be a square rect '''
        h, w = shape
        longest = max(h, w)
        result = (np.array([longest]*4, int) - np.array([h, h, w, w], int)) // 2
        return result.tolist()
    
    def dealwithimage(img, h=64, w=64):
        ''' dealwithimage '''
        #img = cv2.imread(imgpath)
        top, bottom, left, right = getpaddingSize(img.shape[0:2])
        img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
        img = cv2.resize(img, (h, w))
        return img
    
    def relight(imgsrc, alpha=1, bias=0):
        '''relight'''
        imgsrc = imgsrc.astype(float)
        imgsrc = imgsrc * alpha + bias
        imgsrc[imgsrc < 0] = 0
        imgsrc[imgsrc > 255] = 255
        imgsrc = imgsrc.astype(np.uint8)
        return imgsrc
    
    def getfacefromcamera(outdir):
        createdir(outdir)
        camera = cv2.VideoCapture(0)
        haar = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
        n = 1
        while 1:
            if (n <= 200):
                print('It`s processing %s image.' % n)
                # 读帧
                success, img = camera.read()
    
                gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                faces = haar.detectMultiScale(gray_img, 1.3, 5)
                for f_x, f_y, f_w, f_h in faces:
                    face = img[f_y:f_y+f_h, f_x:f_x+f_w]
                    face = cv2.resize(face, (IMGSIZE, IMGSIZE))
                    #could deal with face to train
                    face = relight(face, random.uniform(0.5, 1.5), random.randint(-50, 50))
                    cv2.imwrite(os.path.join(outdir, str(n)+'.jpg'), face)
    
                    cv2.putText(img, 'haha', (f_x, f_y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)  #显示名字
                    img = cv2.rectangle(img, (f_x, f_y), (f_x + f_w, f_y + f_h), (255, 0, 0), 2)
                    n+=1
                cv2.imshow('img', img)
                key = cv2.waitKey(30) & 0xff
                if key == 27:
                    break
            else:
                break
        camera.release()
        cv2.destroyAllWindows()
    
    if __name__ == '__main__':
        name = input('please input yourename: ')
        getfacefromcamera(os.path.join('./image/trainfaces', name))
    
    
    

    执行完以后效果是这样的(原谅我作了处理(-——-))


    Inked捕获_LI.jpg
    2.创建CNN网络:
    #!/usr/bin/python
    #coding=utf-8
    ''' face detect convolution'''
    # pylint: disable=invalid-name
    import os
    import logging as log
    import matplotlib.pyplot as plt
    import common
    import numpy as np
    from tensorflow.examples.tutorials.mnist import input_data
    import tensorflow as tf
    import cv2
    
    SIZE = 64
    x_data = tf.placeholder(tf.float32, [None, SIZE, SIZE, 3])
    y_data = tf.placeholder(tf.float32, [None, None])
    
    keep_prob_5 = tf.placeholder(tf.float32)
    keep_prob_75 = tf.placeholder(tf.float32)
    
    def weightVariable(shape):
        ''' build weight variable'''
        init = tf.random_normal(shape, stddev=0.01)
        #init = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(init)
    
    def biasVariable(shape):
        ''' build bias variable'''
        init = tf.random_normal(shape)
        #init = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(init)
    
    def conv2d(x, W):
        ''' conv2d by 1, 1, 1, 1'''
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    
    def maxPool(x):
        ''' max pooling'''
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    def dropout(x, keep):
        ''' drop out'''
        return tf.nn.dropout(x, keep)
    
    def cnnLayer(classnum):
        ''' create cnn layer'''
        # 第一层
        W1 = weightVariable([3, 3, 3, 32]) # 卷积核大小(3,3), 输入通道(3), 输出通道(32)
        b1 = biasVariable([32])
        conv1 = tf.nn.relu(conv2d(x_data, W1) + b1)
        pool1 = maxPool(conv1)
        # 减少过拟合,随机让某些权重不更新
        drop1 = dropout(pool1, keep_prob_5) # 32 * 32 * 32 多个输入channel 被filter内积掉了
    
        # 第二层
        W2 = weightVariable([3, 3, 32, 64])
        b2 = biasVariable([64])
        conv2 = tf.nn.relu(conv2d(drop1, W2) + b2)
        pool2 = maxPool(conv2)
        drop2 = dropout(pool2, keep_prob_5) # 64 * 16 * 16
    
        # 第三层
        W3 = weightVariable([3, 3, 64, 64])
        b3 = biasVariable([64])
        conv3 = tf.nn.relu(conv2d(drop2, W3) + b3)
        pool3 = maxPool(conv3)
        drop3 = dropout(pool3, keep_prob_5) # 64 * 8 * 8
    
        # 全连接层
        Wf = weightVariable([8*16*32, 512])
        bf = biasVariable([512])
        drop3_flat = tf.reshape(drop3, [-1, 8*16*32])
        dense = tf.nn.relu(tf.matmul(drop3_flat, Wf) + bf)
        dropf = dropout(dense, keep_prob_75)
    
        # 输出层
        Wout = weightVariable([512, classnum])
        bout = weightVariable([classnum])
        #out = tf.matmul(dropf, Wout) + bout
        out = tf.add(tf.matmul(dropf, Wout), bout)
        return out
    
    def train(train_x, train_y, tfsavepath):
        ''' train'''
        log.debug('train')
        out = cnnLayer(train_y.shape[1])
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=y_data))
        train_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy)
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(y_data, 1)), tf.float32))
    
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            batch_size = 10
            num_batch = len(train_x) // 10
            for n in range(10):
                r = np.random.permutation(len(train_x))
                train_x = train_x[r, :]
                train_y = train_y[r, :]
    
                for i in range(num_batch):
                    batch_x = train_x[i*batch_size : (i+1)*batch_size]
                    batch_y = train_y[i*batch_size : (i+1)*batch_size]
                    _, loss = sess.run([train_step, cross_entropy],\
                                       feed_dict={x_data:batch_x, y_data:batch_y,
                                                  keep_prob_5:0.75, keep_prob_75:0.75})
    
                    print(n*num_batch+i, loss)
    
            # 获取测试数据的准确率
            acc = accuracy.eval({x_data:train_x, y_data:train_y, keep_prob_5:1.0, keep_prob_75:1.0})
            print('after 10 times run: accuracy is ', acc)
            saver.save(sess, tfsavepath)
    
    def validate(test_x, tfsavepath):
        ''' validate '''
        output = cnnLayer(2)
        #predict = tf.equal(tf.argmax(output, 1), tf.argmax(y_data, 1))
        predict = output
    
        saver = tf.train.Saver()
        with tf.Session() as sess:
            #sess.run(tf.global_variables_initializer())
            saver.restore(sess, tfsavepath)
            res = sess.run([predict, tf.argmax(output, 1)],
                           feed_dict={x_data: test_x,
                                      keep_prob_5:1.0, keep_prob_75: 1.0})
            return res
    
    if __name__ == '__main__':
        pass
    
    

    使用tf创建3层cnn,3 * 3的filter,输入为rgb所以:

    • 第一层的channel是3,图像宽高为64,输出32个filter,maxpooling是缩放一倍
    • 第二层的输入为32个channel,宽高是32,输出为64个filter,maxpooling是缩放一倍
    • 第三层的输入为64个channel,宽高是16,输出为64个filter,maxpooling是缩放一倍
      所以最后输入的图像是8 * 8 * 64,卷积层和全连接层都设置了dropout参数

    将输入的8 * 8 * 64的多维度,进行flatten,映射到512个数据上,然后进行softmax,输出到onehot类别上,类别的输入根据采集的人员的个数来确定。

    3.识别人脸分类
    def getfileandlabel(filedir):
        ''' get path and host paire and class index to name'''
        dictdir = dict([[name, os.path.join(filedir, name)] \
                        for name in os.listdir(filedir) if os.path.isdir(os.path.join(filedir, name))])
                        #for (path, dirnames, _) in os.walk(filedir) for dirname in dirnames])
    
        dirnamelist, dirpathlist = dictdir.keys(), dictdir.values()
        indexlist = list(range(len(dirnamelist)))
    
        return list(zip(dirpathlist, onehot(indexlist))), dict(zip(indexlist, dirnamelist))
        
    pathlabelpair, indextoname = getfileandlabel('./image/trainfaces')
    train_x, train_y = readimage(pathlabelpair)
    train_x = train_x.astype(np.float32) / 255.0
    myconv.train(train_x, train_y, savepath)
    
    • 将人脸从子目录内读出来,根据不同的人名,分配不同的onehot值,这里是按照遍历的顺序分配序号,然后训练,完成之后会保存checkpoint
    • 图像识别之前将像素值转换为0到1的范围
    • 需要多次训练的话,把checkpoint下面的上次训练结果删除,代码有个判断,有上一次的训练结果,就不会再训练了
    4.识别图像
    def testfromcamera(chkpoint):
        camera = cv2.VideoCapture(0)
        haar = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
        pathlabelpair, indextoname = getfileandlabel('./image/trainfaces')
        output = myconv.cnnLayer(len(pathlabelpair))
        #predict = tf.equal(tf.argmax(output, 1), tf.argmax(y_data, 1))
        predict = output
    
        saver = tf.train.Saver()
        with tf.Session() as sess:
            #sess.run(tf.global_variables_initializer())
            saver.restore(sess, chkpoint)
            
            n = 1
            while 1:
                if (n <= 20000):
                    print('It`s processing %s image.' % n)
                    # 读帧
                    success, img = camera.read()
    
                    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                    faces = haar.detectMultiScale(gray_img, 1.3, 5)
                    for f_x, f_y, f_w, f_h in faces:
                        face = img[f_y:f_y+f_h, f_x:f_x+f_w]
                        face = cv2.resize(face, (IMGSIZE, IMGSIZE))
                        #could deal with face to train
                        test_x = np.array([face])
                        test_x = test_x.astype(np.float32) / 255.0
                        
                        res = sess.run([predict, tf.argmax(output, 1)],\
                                       feed_dict={myconv.x_data: test_x,\
                                       myconv.keep_prob_5:1.0, myconv.keep_prob_75: 1.0})
                        print(res)
    
                        cv2.putText(img, indextoname[res[1][0]], (f_x, f_y - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, 255, 2)  #显示名字
                        img = cv2.rectangle(img, (f_x, f_y), (f_x + f_w, f_y + f_h), (255, 0, 0), 2)
                        n+=1
                    cv2.imshow('img', img)
                    key = cv2.waitKey(30) & 0xff
                    if key == 27:
                        break
                else:
                    break
        camera.release()
        cv2.destroyAllWindows()
    
    • 从训练的结果中恢复训练识别的参数,然后用于新的识别判断
    • 打开摄像头,采集到图片之后,进行人脸检测,检测出来之后,进行人脸识别,根据结果对应到人员名字,显示在图片中人脸的上面
    5.测试效果如下
    测试结果.PNG

    相关文章

      网友评论

        本文标题:opencv+tensorflow+cnn实现人脸识别

        本文链接:https://www.haomeiwen.com/subject/qctzfftx.html