介绍
Detectron是构建在Caffe2和Python之上计算机视觉库,集成了多项计算机视觉最新成果,一经发布广受好评。近期,Facebook AI研究院又开源了Detectron的升级版,也就是接下来我们要介绍的:Detectron2。
Detectron2 是 Facebook AI Research 推出的第二代CV库,它不但进一步集成了最新的目标检测算法,,而且是对先前版本 Detectron 的完全重写,号称目标检测三大开源神器之一(Detectron2/mmDetection(MMDetection专栏开篇)/SimpleDet)。源自最初的maskrcnn-benchmark库。
与 mmdetection 、TensorFlow Object Detection API一样,Detectron2 也是通过配置文件来设置各种参数,从而实现当前最优的目标检测效果。
安装detectron2
系统环境pytorch1.7,且必须要有gpu
pip install pyyaml==5.1
pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html
数据准备
从智能盘点—钢筋数量AI识别 竞赛 - DataFountain下载训练集和训练标签并重命名为data.zip和label.csv。解压data.zip到data目录
注册钢筋数据集:
import pandas as pd
from sklearn.model_selection import train_test_split
from detectron2.data import DatasetCatalog, MetadataCatalog
import os
import numpy as np
import json
from detectron2.structures.boxes import BoxMode
import itertools
csv = pd.read_csv('label.csv')
# dataset_dicts 是一个 list,每个元素是一条 record (dict对象)
# 每一条 record 的 key 包括 file_name、height、width、annotaions 等
def get_dicts(root,files):
dataset_dicts = []
for img_id in files:
img_path = os.path.join(root, img_id)
labels = csv[csv['ID']==img_id]
#image_label = {'path': image_path, 'boxes': []}
#image_label['boxes'] = parse_label_file(label_path)
record = {}
height, width = cv2.imread(img_path).shape[:2]
record["file_name"] = img_path
record["height"] = height
record["width"] = width
objs = []
for box in labels[' Detection']:
#print(box)
box = list(map(int,box.strip().split(' ')))
#print(box)
obj = {
"bbox": [box[0], box[1], box[2], box[3]],
"bbox_mode": BoxMode.XYXY_ABS,
"category_id": 0,
"iscrowd": 0
}
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
dirs = os.listdir('data')
train_files,test_files = train_test_split(dirs)
data_map = {
'train':train_files,
'test':test_files
}
for mode in ['train','test']:
DatasetCatalog.register(mode, lambda d=d: get_tl_dicts("data",data_map[mode]))
MetadataCatalog.get(mode).set(thing_classes=["redbar"])
查看样例数据:
from detectron2.utils.visualizer import Visualizer
from google.colab.patches import cv2_imshow
import random
import cv2
dataset_dicts = get_dicts("img",train_files)
for d in random.sample(dataset_dicts, 1):
img = cv2.imread(d["file_name"])
visualizer = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("train"), scale=0.5)
out = visualizer.draw_dataset_dict(d)
cv2_imshow(out.get_image()[:, :, ::-1])
创建探测模型进行训练
detectron2使用cfg配置模型,参数命名很简明,这里我们使用RestNet50+FasterRCNN+FPN:
from detectron2.engine import DefaultTrainer
from detectron2 import model_zoo
from detectron2.config import get_cfg
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("train",)
cfg.DATASETS.TEST = ("test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 4000 # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256 # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
测试模型
from detectron2.utils.visualizer import ColorMode
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set a custom testing threshold
predictor = DefaultPredictor(cfg)
dataset_dicts = get_tl_dicts("data",test_files)
metadata = MetadataCatalog.get("test")
for d in random.sample(dataset_dicts, 1):
im = cv2.imread(d["file_name"])
outputs = predictor(im) # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
v = Visualizer(im[:, :, ::-1],
metadata=metadata,
scale=0.5,
instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels. This option is only available for segmentation models
)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2_imshow(out.get_image()[:, :, ::-1])
网友评论