美文网首页
划分数据集(python)

划分数据集(python)

作者: huim | 来源:发表于2018-10-28 21:12 被阅读0次

    将特定文件路径下的图片数据划分为训练集、验证集和测试集。

    import glob
    import os.path
    import random
    
    # 数据路径
    INPUT_DATA = './flower_photos'
    
     # 按一定比例划分数据集
    def create_image_lists(testing_percentage, validation_percentage):
      result = {}
      sub_dirs = [x[0] for x in os.walk(INPUT_DATA)]
      is_root_dir = True
      for sub_dir in sub_dirs:
          if is_root_dir:
              is_root_dir = False
              continue
          
        # 图片的扩展名 
        extensions = ['jpeg', 'jpg', 'JPG', 'JPEG']
        file_list = []
        dir_name = os.path.basename(sub_dir)
        for extension in extensions:
            file_glob = os.path.join(INPUT_DATA, dir_name, '*.'+extension)
            file_list.extend(glob.glob(file_glob))
        if not file_list:
            continue
    
        label_name = dir_name.lower()
        training_images = []
        testing_images = []
        validation_images = []
        for file_name in file_list:
            base_name = os.path.basename(file_name)
            chance = np.random.randint(100)
            if chance < validation_percentage:
                validation_images.append(base_name)
            elif chance < (testing_percentage+validation_percentage):
                testing_images.append(base_name)
            else:
                training_images.append(base_name)
    
        result[label_name] = {
            'dir': dir_name,
            'training': training_images,
            'testing': testing_images,
            'validation': validation_images
        }
    
    return result
    

    获取图片样本的完整路径。

    # 图片路径
    def get_image_path(image_lists, image_dir, label_name, index, category):
        label_lists = image_lists[label_name]
        category_list = label_lists[category]
        mod_index = index % len(category_list)
        base_name = category_list[mod_index]
        sub_dir = label_lists['dir']
        full_path = os.path.join(image_dir, sub_dir, base_name)
    return full_path

    相关文章

      网友评论

          本文标题:划分数据集(python)

          本文链接:https://www.haomeiwen.com/subject/usbmtqtx.html