美文网首页
labelme json2dataset.py 源代码解析

labelme json2dataset.py 源代码解析

作者: 谢小帅 | 来源:发表于2019-02-14 16:25 被阅读8次

    json2dataset.py,源代码2个特点

    • label value 是按照 label name 字母顺序排列的
    • label color 通过 labelme.utils colormap 生成

    这样带来的问题是:我们标注的图片同一类对应的 label value 和 color 可能是不同的,所以要先确定下 value 和 color

    1. json2labelpng.py 简易版

    功能:json 文件转成 上色后的png

    import base64
    import json
    import os
    import PIL.Image
    from labelme import utils
    
    import numpy as np
    import csv
    
    color_dict = {}
    with open('sun37.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        for i, row in enumerate(reader):
            if i > 0:  # 跳过第一行
                color_dict[row[0]] = [int(row[1]), int(row[2]), int(row[3])]
    
    # label: val, 用于后面将 labelme 自生成的 val 替换为 大数据集的 val
    label_val_dict = {}
    for i, key in enumerate(color_dict.keys()):
        label_val_dict[key] = i
    
    # RGB color
    color_map = np.array(list(color_dict.values()))  # 38,3
    
    
    def json2labelpng(json_file):
        data = json.load(open(json_file))  # json->dict
    
        if data['imageData']:
            imageData = data['imageData']
        else:
            imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
            with open(imagePath, 'rb') as f:
                imageData = f.read()
                imageData = base64.b64encode(imageData).decode('utf-8')
    
        img = utils.img_b64_to_arr(imageData)
    
        # 使用已有的 label_val_dict,并结合已有的cmap上色
        label_name_to_value = {
            'background': 0  # 0 为背景
        }
        for shape in data['shapes']:
            label_name_to_value[shape['label']] = label_val_dict[shape['label']]
    
        lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)  # np,val_img
        # print(np.unique(lbl))  # [ 0  1  2  3  5 21 22 25 29] 已和 SUN 数据集一致
    
        # 使用自己的 colormap 并保存为 label.png
        lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P')  # Palette 模式
        lbl_pil.putpalette(color_map.astype(np.uint8).flatten())  # 转成 uint8 并展平
        lbl_pil.save(json_file.replace('.json', '.png'))
    
    
    if __name__ == '__main__':
        json2labelpng(json_file='C:/Users/Shuai/PycharmProjects/Toy/labelme/0.json')
    
    2. json2dataset.py 源代码解析
    import argparse
    import base64
    import json
    import os
    import os.path as osp
    
    import PIL.Image
    import yaml
    
    from labelme.logger import logger
    from labelme import utils
    
    import numpy as np
    import csv
    
    color_dict = {}
    with open('sun37.csv', 'r') as csv_file:
        reader = csv.reader(csv_file)
        for i, row in enumerate(reader):
            if i > 0:  # 跳过第一行
                color_dict[row[0]] = [int(row[1]), int(row[2]), int(row[3])]
    
    # label: val, 用于后面将 labelme 自生成的 val 替换为 大数据集的 val
    label_val_dict = {}
    for i, key in enumerate(color_dict.keys()):
        label_val_dict[key] = i
    
    # RGB color
    color_map = np.array(list(color_dict.values()))  # 38,3
    
    
    def main(params):
        logger.warning('This script is aimed to demonstrate how to convert the'
                       'JSON file to a single image dataset, and not to handle'
                       'multiple JSON files to generate a real-use dataset.')
    
        parser = argparse.ArgumentParser()
        parser.add_argument('json_file')
        parser.add_argument('-o', '--out', default=None)
        args = parser.parse_args(params)
    
        json_file = args.json_file
    
        if args.out is None:  # 如果没指定输出路径,设定 0_json 为输出文件夹
            out_dir = osp.basename(json_file).replace('.', '_')
            out_dir = osp.join(osp.dirname(json_file), out_dir)
        else:
            out_dir = args.out
        if not osp.exists(out_dir):  # osp, os.path
            os.mkdir(out_dir)
    
        data = json.load(open(json_file))  # json->dict
    
        if data['imageData']:
            imageData = data['imageData']
        else:
            imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
            with open(imagePath, 'rb') as f:
                imageData = f.read()
                imageData = base64.b64encode(imageData).decode('utf-8')
        img = utils.img_b64_to_arr(imageData)
    
        # label name 转成对应数字
    
        # labelme 生成 label_name_to_value 方法
        # =======================================
        # label_name_to_value = {
        #     '_background_': 0  # 0 为背景
        # }
        # for shape in sorted(data['shapes'], key=lambda x: x['label']):  # 以 label_name 排序
        #     label_name = shape['label']
        #     if label_name in label_name_to_value:
        #         label_value = label_name_to_value[label_name]
        #     else:
        #         label_value = len(label_name_to_value)  # label_name_to_value 长度对应添加进来的 新 label 的编号
        #         label_name_to_value[label_name] = label_value
        # lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
        # lbl 是由 label_value 组成的 480x640 图像,此时还没有上色
        # print(np.unique(lbl))  # [0 1 2 3 4 5 6 7 8],注意 lbl 的值和SUN数据集的 label value 并不对应,需要用类名建立对应关系
        # 这里用了 label_colormap 生成颜色
        # utils.lblsave(osp.join(out_dir, 'label.png'), lbl)
        # =======================================
    
        # 使用已有的 label_val_dict,并结合已有的cmap上色
        label_name_to_value = {
            'background': 0  # 0 为背景
        }
        for shape in data['shapes']:
            label_name_to_value[shape['label']] = label_val_dict[shape['label']]
    
        lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
        # print(np.unique(lbl))  # [ 0  1  2  3  5 21 22 25 29] 已和 SUN 数据集一致
    
        # 因为修改了val,原有功能不能实现了
        # label_names = [None] * (max(label_name_to_value.values()) + 1)  # 做个定长数组
        # for name, value in label_name_to_value.items():  # dict
        #     label_names[value] = name
        # lbl_viz = utils.draw_label(lbl, img, label_names)
    
        # 保存三张图:img.png, label.png, label_viz.png
        PIL.Image.fromarray(img).save(osp.join(out_dir, 'img.png'))
    
        # 使用自己的 colormap 并保存为 label.png
        lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P')  # Palette 模式
        lbl_pil.putpalette(color_map.astype(np.uint8).flatten())  # 转成 uint8 并展平
        lbl_pil.save(osp.join(out_dir, 'label.png'))
    
        # label_viz.png
        # PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))
    
        # with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
        #     for lbl_name in label_names:
        #         f.write(lbl_name + '\n')
    
        # logger.warning('info.yaml is being replaced by label_names.txt')
        # info = dict(label_names=label_names)
        # with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
        #     yaml.safe_dump(info, f, default_flow_style=False)
        #
        # logger.info('Saved to: {}'.format(out_dir))
    
    
    if __name__ == '__main__':
        params = [
            '0.json',  # 第1个参数
            '-o', '0_json'
        ]
        main(params)
    

    相关文章

      网友评论

          本文标题:labelme json2dataset.py 源代码解析

          本文链接:https://www.haomeiwen.com/subject/mxneeqtx.html