美文网首页
9.8 利用视觉码本和向量量化创建特征

9.8 利用视觉码本和向量量化创建特征

作者: MaskStar | 来源:发表于2018-11-12 15:22 被阅读0次

    为了创建一个目标识别系统,需要从每张图像中提取特征向量。每张图像需要有一个识别标志,以用于匹配。
    我们用视觉码本的概念来创建图像识别标志。在训练数据集中,码本实际上就是一个字典,用于提出关于图像的描述,我们用向量量化方法将很多特征点进行聚类并得出中心点,这些中心点将作为视觉码本的元素。

    训练数据集

    包含3类实例训练数据集,每一类包含20幅图像,可以在http://www.vision.caltech.edu/html-files/archive.html 下载。

    处理加载数据集:

    def load_training_data(input_folder):
        training_data = []   # 以list的形式 存储数据集中的图片信息
        if not os.path.isdir(input_folder):
            raise IOError("The folder " + input_folder + " doesn't exist")
        for root, dirs, files in os.walk(input_folder):
            for filename in (x for x in files if x.endswith('.jpg')):
                filepath = os.path.join(root, filename)   
                # filepath 输出为 'training_images/airplanes\\0001.jpg'
                filepath = filepath.replace('\\','/')   
                # 替换字符\\ 以方便处理 提取label  此时filepath 输出为:'training_images/airplanes/0001.jpg'
                object_class = filepath.split('/')[-2]  
                # 此时 object_class  为:airplanes
                #  将每幅图像的信息以字典的形式保存在  training_data
                training_data.append({'object_class': object_class,
                                      'image_path': filepath})  
        return training_data
    

    提取图片的特征:

    class FeatureBuilder(object):
        '''
        定义一个从输入图像提取特征的方法,
        用star检测器获取关键点,然后用SIFT提取这些位置的描述信息
        '''
        
        # 提取图片的特征
        def extract_features(self, img):
            #用Start获取关键点,
            keypoints = StarFeatureDetector().detect(img)
            # 用SIFT提取关键点的位置信息,keypoint是list类型。
            keypoints, feature_vectors = compute_sift_features(img, keypoints)
            #  feature_vectors 是numpy.ndarray类型
            return feature_vectors
    
        def get_codewords(self, input_map, scaling_size, max_samples=12):
            #max_samples:定义每类样本数据的最大样本数:如果大于最大样本数则后面相同样本的数据就跳过
            #input_map是所有样本数据的label和位置路径信息即训练数据,list类型
            keypoints_all = []
            #用 keypoints_all 存储所有图片的关键点特征信息
            count = 0
            cur_class = ''
            for item in input_map:
                # item是样本的 信息 
                #例如:{'image_path': 'training_images/airplanes/0001.jpg', 'object_class': 'airplanes'}
                # 如果大于样本数则跳过此样本  即: continue
                if count >= max_samples:
                    if cur_class != item['object_class']:
                        count = 0
                    else:
                        continue
                count += 1
                if count == max_samples:
                    print("Built centroids for", item['object_class'])
                # cur_class  记录当前样本的lebel, 然后读取图像
                cur_class = item['object_class']
                img = cv2.imread(item['image_path'])
                img = resize_image(img, scaling_size)
    
                num_dims = 128
                # 获取样本图像的  keypoint 关键点信息
                feature_vectors = self.extract_features(img)
                #  将keypoint 关键点信息  存储在 keypoints_all中
                keypoints_all.extend(feature_vectors)
            #对 keypoints_all 进行聚类
            kmeans, centroids = BagOfWords().cluster(keypoints_all)
            return kmeans, centroids
    

    定义一个类来处理词袋模型和向量量化

    class BagOfWords(object):
        def __init__(self, num_clusters=32):
            self.num_dims = 128
            self.num_clusters = num_clusters
            self.num_retries = 10
        
        # 用kmeans聚类来实现量化数据点
        def cluster(self, datapoints):
            kmeans = KMeans(self.num_clusters,
                            n_init=max(self.num_retries, 1),
                            max_iter=10, tol=1.0)
            #提取中心点
            res = kmeans.fit(datapoints)
            centroids = res.cluster_centers_
            return kmeans, centroids
        
        #  归一化数据
        def normalize(self, input_data):
            sum_input = np.sum(input_data)
    
            if sum_input > 0:
                return input_data / sum_input
            else:
                return input_data
        
        # 获得图像的特征向量
        def construct_feature(self, img, kmeans, centroids):
            #获取图像的keypoints和位置信息
            keypoints = StarFeatureDetector().detect(img)
            keypoints, feature_vectors = compute_sift_features(img, keypoints)
            # 用kmeans预测一幅图片的label
            labels = kmeans.predict(feature_vectors)
            feature_vector = np.zeros(self.num_clusters)
            # 创建直方图将其归一化
            for i, item in enumerate(feature_vectors):
                feature_vector[labels[i]] += 1
            
            feature_vector_img = np.reshape(feature_vector,
                                            ((1, feature_vector.shape[0])))
            return self.normalize(feature_vector_img)
    
    

    输入图像提取特征然后映射到某一类

    def compute_sift_features(img, keypoints):
        if img is None:
            raise TypeError('Invalid input image')
    
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        keypoints, descriptors = cv2.xfeatures2d.SIFT_create().compute(img_gray, keypoints)
        return keypoints, descriptors
    

    定义一个

    def get_feature_map(input_map, kmeans, centroids, scaling_size):
        feature_map = []
        for item in input_map:
            temp_dict = {}
            temp_dict['object_class'] = item['object_class']
    
            print("Extracting features for", item['image_path'])
    
            img = cv2.imread(item['image_path'])
            img = resize_image(img, scaling_size)
    
            temp_dict['feature_vector'] = BagOfWords().construct_feature(
                img, kmeans, centroids)
    
            if temp_dict['feature_vector'] is not None:
                feature_map.append(temp_dict)
    
        return feature_map
    

    resize_image

    def resize_image(input_img, new_size):
        h, w = input_img.shape[:2]
        scaling_factor = new_size / float(h)
    
        if w < h:
            scaling_factor = new_size / float(w)
    
        new_shape = (int(w * scaling_factor), int(h * scaling_factor))
        return cv2.resize(input_img, new_shape)
    

    Star检测器

    class StarFeatureDetector(object):
        def __init__(self):
            self.detector = cv2.xfeatures2d.StarDetector_create()
    
        def detect(self, img):
            return self.detector.detect(img)
    

    主文件import

    # -*- coding:utf8 -*-
    import os
    import sys
    import argparse
    # import cPickle as pickle
    import pickle as pickle
    import json
    import cv2
    import numpy as np
    from sklearn.cluster import KMeans
    
    
    • 在pycharm里编辑输入信息 方便调试
    if __name__ == '__main__':
        data_folder = 'training_images/'
        scaling_size = 200
        codebook_file = 'codebook/9_8.pkl'
        feature_map_file = 'feature_map/9_8.pkl'
    
        training_data = load_training_data(data_folder)
    
        # Build the visual codebook
        print("====== Building visual codebook ======")
        kmeans, centroids = FeatureBuilder().get_codewords(training_data, scaling_size)
        if codebook_file:
            with open(codebook_file, 'wb+') as f:
                pickle.dump((kmeans, centroids), f)
    
        # Extract features from input images
        print("\n====== Building the feature map ======")
    
        feature_map = get_feature_map(training_data, kmeans, centroids, scaling_size)
        if feature_map_file:
            with open(feature_map_file, 'wb+') as f:
                pickle.dump(feature_map, f)
    
    • 命令行方式运行文件
    #  定义命令行输入方式
    def build_arg_parser():
        parser = argparse.ArgumentParser(description='Extract features from a given \
                set of images')
    
        parser.add_argument("--data-folder", dest="data_folder", required=True,
                            help="Folder containing the training images organized in subfolders")
        parser.add_argument("--codebook-file", dest='codebook_file', required=True,
                            help="Output file where the codebook will be stored")
        parser.add_argument("--feature-map-file", dest='feature_map_file', required=True,
                            help="Output file where the feature map will be stored")
        parser.add_argument("--scaling-size", dest="scaling_size", type=int,
                            default=200, help="Scales the longer dimension of the image down \
                        to this size.")
    
        return parser
    
    
    if __name__ == '__main__':
        args = build_arg_parser().parse_args()
        data_folder = args.data_folder
        scaling_size = args.scaling_size
    
        # Load the training data
        training_data = load_training_data(data_folder)
    
        # Build the visual codebook
        print("====== Building visual codebook ======")
    
        kmeans, centroids = FeatureBuilder().get_codewords(training_data, scaling_size)
        if args.codebook_file:
            with open(args.codebook_file, 'wb+') as f:
                pickle.dump((kmeans, centroids), f)
    
        # Extract features from input images
        print("\n====== Building the feature map ======")
    
        feature_map = get_feature_map(training_data, kmeans, centroids, scaling_size)
        if args.feature_map_file:
            with open(args.feature_map_file, 'wb+') as f:
                pickle.dump(feature_map, f)
    

    相关文章

      网友评论

          本文标题:9.8 利用视觉码本和向量量化创建特征

          本文链接:https://www.haomeiwen.com/subject/zpuqfqtx.html