json2dataset.py,源代码2个特点
- label value 是按照 label name 字母顺序排列的
- label color 通过 labelme.utils colormap 生成
这样带来的问题是:我们标注的图片同一类对应的 label value 和 color 可能是不同的,所以要先确定下 value 和 color
1. json2labelpng.py 简易版
功能:json 文件转成 上色后的png
import base64
import json
import os
import PIL.Image
from labelme import utils
import numpy as np
import csv
color_dict = {}
with open('sun37.csv', 'r') as csv_file:
reader = csv.reader(csv_file)
for i, row in enumerate(reader):
if i > 0: # 跳过第一行
color_dict[row[0]] = [int(row[1]), int(row[2]), int(row[3])]
# label: val, 用于后面将 labelme 自生成的 val 替换为 大数据集的 val
label_val_dict = {}
for i, key in enumerate(color_dict.keys()):
label_val_dict[key] = i
# RGB color
color_map = np.array(list(color_dict.values())) # 38,3
def json2labelpng(json_file):
data = json.load(open(json_file)) # json->dict
if data['imageData']:
imageData = data['imageData']
else:
imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
with open(imagePath, 'rb') as f:
imageData = f.read()
imageData = base64.b64encode(imageData).decode('utf-8')
img = utils.img_b64_to_arr(imageData)
# 使用已有的 label_val_dict,并结合已有的cmap上色
label_name_to_value = {
'background': 0 # 0 为背景
}
for shape in data['shapes']:
label_name_to_value[shape['label']] = label_val_dict[shape['label']]
lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value) # np,val_img
# print(np.unique(lbl)) # [ 0 1 2 3 5 21 22 25 29] 已和 SUN 数据集一致
# 使用自己的 colormap 并保存为 label.png
lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P') # Palette 模式
lbl_pil.putpalette(color_map.astype(np.uint8).flatten()) # 转成 uint8 并展平
lbl_pil.save(json_file.replace('.json', '.png'))
if __name__ == '__main__':
json2labelpng(json_file='C:/Users/Shuai/PycharmProjects/Toy/labelme/0.json')
2. json2dataset.py 源代码解析
import argparse
import base64
import json
import os
import os.path as osp
import PIL.Image
import yaml
from labelme.logger import logger
from labelme import utils
import numpy as np
import csv
color_dict = {}
with open('sun37.csv', 'r') as csv_file:
reader = csv.reader(csv_file)
for i, row in enumerate(reader):
if i > 0: # 跳过第一行
color_dict[row[0]] = [int(row[1]), int(row[2]), int(row[3])]
# label: val, 用于后面将 labelme 自生成的 val 替换为 大数据集的 val
label_val_dict = {}
for i, key in enumerate(color_dict.keys()):
label_val_dict[key] = i
# RGB color
color_map = np.array(list(color_dict.values())) # 38,3
def main(params):
logger.warning('This script is aimed to demonstrate how to convert the'
'JSON file to a single image dataset, and not to handle'
'multiple JSON files to generate a real-use dataset.')
parser = argparse.ArgumentParser()
parser.add_argument('json_file')
parser.add_argument('-o', '--out', default=None)
args = parser.parse_args(params)
json_file = args.json_file
if args.out is None: # 如果没指定输出路径,设定 0_json 为输出文件夹
out_dir = osp.basename(json_file).replace('.', '_')
out_dir = osp.join(osp.dirname(json_file), out_dir)
else:
out_dir = args.out
if not osp.exists(out_dir): # osp, os.path
os.mkdir(out_dir)
data = json.load(open(json_file)) # json->dict
if data['imageData']:
imageData = data['imageData']
else:
imagePath = os.path.join(os.path.dirname(json_file), data['imagePath'])
with open(imagePath, 'rb') as f:
imageData = f.read()
imageData = base64.b64encode(imageData).decode('utf-8')
img = utils.img_b64_to_arr(imageData)
# label name 转成对应数字
# labelme 生成 label_name_to_value 方法
# =======================================
# label_name_to_value = {
# '_background_': 0 # 0 为背景
# }
# for shape in sorted(data['shapes'], key=lambda x: x['label']): # 以 label_name 排序
# label_name = shape['label']
# if label_name in label_name_to_value:
# label_value = label_name_to_value[label_name]
# else:
# label_value = len(label_name_to_value) # label_name_to_value 长度对应添加进来的 新 label 的编号
# label_name_to_value[label_name] = label_value
# lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
# lbl 是由 label_value 组成的 480x640 图像,此时还没有上色
# print(np.unique(lbl)) # [0 1 2 3 4 5 6 7 8],注意 lbl 的值和SUN数据集的 label value 并不对应,需要用类名建立对应关系
# 这里用了 label_colormap 生成颜色
# utils.lblsave(osp.join(out_dir, 'label.png'), lbl)
# =======================================
# 使用已有的 label_val_dict,并结合已有的cmap上色
label_name_to_value = {
'background': 0 # 0 为背景
}
for shape in data['shapes']:
label_name_to_value[shape['label']] = label_val_dict[shape['label']]
lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
# print(np.unique(lbl)) # [ 0 1 2 3 5 21 22 25 29] 已和 SUN 数据集一致
# 因为修改了val,原有功能不能实现了
# label_names = [None] * (max(label_name_to_value.values()) + 1) # 做个定长数组
# for name, value in label_name_to_value.items(): # dict
# label_names[value] = name
# lbl_viz = utils.draw_label(lbl, img, label_names)
# 保存三张图:img.png, label.png, label_viz.png
PIL.Image.fromarray(img).save(osp.join(out_dir, 'img.png'))
# 使用自己的 colormap 并保存为 label.png
lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode='P') # Palette 模式
lbl_pil.putpalette(color_map.astype(np.uint8).flatten()) # 转成 uint8 并展平
lbl_pil.save(osp.join(out_dir, 'label.png'))
# label_viz.png
# PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))
# with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
# for lbl_name in label_names:
# f.write(lbl_name + '\n')
# logger.warning('info.yaml is being replaced by label_names.txt')
# info = dict(label_names=label_names)
# with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
# yaml.safe_dump(info, f, default_flow_style=False)
#
# logger.info('Saved to: {}'.format(out_dir))
if __name__ == '__main__':
params = [
'0.json', # 第1个参数
'-o', '0_json'
]
main(params)
网友评论