美文网首页
Facenet源码解读之compare.py

Facenet源码解读之compare.py

作者: 弓長知行 | 来源:发表于2019-02-25 14:58 被阅读0次
    """Performs face alignment and calculates L2 distance between the embeddings of images."""
    
    # MIT License
    # 
    # Copyright (c) 2016 David Sandberg
    # 
    # Permission is hereby granted, free of charge, to any person obtaining a copy
    # of this software and associated documentation files (the "Software"), to deal
    # in the Software without restriction, including without limitation the rights
    # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    # copies of the Software, and to permit persons to whom the Software is
    # furnished to do so, subject to the following conditions:
    # 
    # The above copyright notice and this permission notice shall be included in all
    # copies or substantial portions of the Software.
    # 
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    # SOFTWARE.
    
    from __future__ import absolute_import
    from __future__ import division
    from __future__ import print_function
    
    from scipy import misc
    import tensorflow as tf
    import numpy as np
    import sys
    import os
    import copy
    import argparse
    import facenet
    import align.detect_face
    
    def main(args):
    
        images = load_and_align_data(args.image_files, args.image_size, args.margin, args.gpu_memory_fraction)
        with tf.Graph().as_default():
    
            with tf.Session() as sess:
          
                # Load the model
                facenet.load_model(args.model)
        
                # Get input and output tensors
                images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") # 网络输入
                embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") # 输出
                phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
    
                # Run forward pass to calculate embeddings
                feed_dict = { images_placeholder: images, phase_train_placeholder:False }
                emb = sess.run(embeddings, feed_dict=feed_dict)
                
                nrof_images = len(args.image_files) # 图片张数
    
                print('Images:')
                for i in range(nrof_images):
                    print('%1d: %s' % (i, args.image_files[i]))
                print('')
                
                # Print distance matrix
                print('Distance matrix')
                print('    ', end='')
                for i in range(nrof_images):
                    print('    %1d     ' % i, end='')
                print('')
                for i in range(nrof_images):
                    print('%1d  ' % i, end='')
                    for j in range(nrof_images):
                        dist = np.sqrt(np.sum(np.square(np.subtract(emb[i,:], emb[j,:])))) # 计算欧式距离
                        print('  %1.4f  ' % dist, end='')
                    print('')
                
                
    def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction):
        # mtcnn 要用到的3个参数
        minsize = 20 # minimum size of face
        threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
        factor = 0.709 # scale factor
        
        print('Creating networks and loading parameters')
        # 加载mtcnn模型
        with tf.Graph().as_default():
            gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction)
            sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
            with sess.as_default():
                pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
      
        tmp_image_paths=copy.copy(image_paths)
        img_list = []
        # 遍历图片
        for image in tmp_image_paths:
            img = misc.imread(os.path.expanduser(image), mode='RGB')
            img_size = np.asarray(img.shape)[0:2]
            bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) # mtcnn人脸检测返回人脸边框shape(边框数,5),第二维前4个数是边框坐标,第5个数是score
            # 如果没检测到人脸
            if len(bounding_boxes) < 1:
              image_paths.remove(image)
              print("can't detect face, remove ", image)
              continue
            det = np.squeeze(bounding_boxes[0,0:4]) # 删除维度为1的那一维,即mtcnn返回的边框数那一维(第一维)
            bb = np.zeros(4, dtype=np.int32)
            bb[0] = np.maximum(det[0]-margin/2, 0) # 坐标往下移一点。mtcnn检测出来的只有人脸部分,扩展其范围以包含更多信息
            bb[1] = np.maximum(det[1]-margin/2, 0) # 左移
            bb[2] = np.minimum(det[2]+margin/2, img_size[1]) # 上移
            bb[3] = np.minimum(det[3]+margin/2, img_size[0]) # 右移
            cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] # 从输入图片中裁剪处人脸部分
            aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') # resize为facenet网络输入大小160x160
            prewhitened = facenet.prewhiten(aligned) # 图片的标准化处理,类似tf.image.per_img_standard()
            img_list.append(prewhitened)
        images = np.stack(img_list) # 将几张图片堆叠起来
        return images
    
    def parse_arguments(argv):
        parser = argparse.ArgumentParser()
        
        parser.add_argument('model', type=str, 
            help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file')
        parser.add_argument('image_files', type=str, nargs='+', help='Images to compare')
        parser.add_argument('--image_size', type=int,
            help='Image size (height, width) in pixels.', default=160)
        parser.add_argument('--margin', type=int,
            help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
        parser.add_argument('--gpu_memory_fraction', type=float,
            help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0)
        return parser.parse_args(argv)
    
    if __name__ == '__main__':
        main(parse_arguments(sys.argv[1:]))
    
    

    欧式距离公式:
    n维空间点a(x11,x12,…,x1n)与b(x21,x22,…,x2n)间的欧氏距离(两个n维向量)


    图片.png

    相关文章

      网友评论

          本文标题:Facenet源码解读之compare.py

          本文链接:https://www.haomeiwen.com/subject/zohgyqtx.html