介绍

Detectron是构建在Caffe2和Python之上计算机视觉库，集成了多项计算机视觉最新成果，一经发布广受好评。近期，Facebook AI研究院又开源了Detectron的升级版,也就是接下来我们要介绍的：Detectron2。

Detectron2 是 Facebook AI Research 推出的第二代CV库，它不但进一步集成了最新的目标检测算法,，而且是对先前版本 Detectron 的完全重写，号称目标检测三大开源神器之一(Detectron2/mmDetection（MMDetection专栏开篇）/SimpleDet)。源自最初的maskrcnn-benchmark库。

与 mmdetection 、TensorFlow Object Detection API一样，Detectron2 也是通过配置文件来设置各种参数，从而实现当前最优的目标检测效果。

安装detectron2

系统环境pytorch1.7,且必须要有gpu

pip install pyyaml==5.1
pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html

数据准备

从智能盘点—钢筋数量AI识别竞赛 - DataFountain下载训练集和训练标签并重命名为data.zip和label.csv。解压data.zip到data目录

注册钢筋数据集:

import pandas as pd
from sklearn.model_selection import train_test_split
from detectron2.data import DatasetCatalog, MetadataCatalog
import os
import numpy as np
import json
from detectron2.structures.boxes import BoxMode
import itertools


csv = pd.read_csv('label.csv')
# dataset_dicts 是一个 list，每个元素是一条 record （dict对象）
# 每一条 record 的 key 包括 file_name、height、width、annotaions 等
def get_dicts(root,files):
    dataset_dicts = []

    for img_id in files:
        img_path = os.path.join(root, img_id)
        labels = csv[csv['ID']==img_id]
        #image_label = {'path': image_path, 'boxes': []}
        #image_label['boxes'] = parse_label_file(label_path)

        record = {}
        height, width = cv2.imread(img_path).shape[:2]
        record["file_name"] = img_path
        record["height"] = height
        record["width"] = width
        objs = []
        for box in labels[' Detection']:
            #print(box)
            box = list(map(int,box.strip().split(' ')))
            #print(box)
            obj = {
                "bbox": [box[0], box[1], box[2], box[3]],
                "bbox_mode": BoxMode.XYXY_ABS,
                "category_id": 0,
                "iscrowd": 0
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts


dirs = os.listdir('data')
train_files,test_files = train_test_split(dirs)
data_map = {
    'train':train_files,
    'test':test_files
}

for mode in ['train','test']:
    DatasetCatalog.register(mode, lambda d=d: get_tl_dicts("data",data_map[mode]))
    MetadataCatalog.get(mode).set(thing_classes=["redbar"])

查看样例数据:

from detectron2.utils.visualizer import Visualizer
from google.colab.patches import cv2_imshow
import random
import cv2

dataset_dicts = get_dicts("img",train_files)
for d in random.sample(dataset_dicts, 1):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("train"), scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2_imshow(out.get_image()[:, :, ::-1])

创建探测模型进行训练

detectron2使用cfg配置模型，参数命名很简明,这里我们使用RestNet50+FasterRCNN+FPN:

from detectron2.engine import DefaultTrainer
from detectron2 import model_zoo
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("train",)
cfg.DATASETS.TEST = ("test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 4000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

测试模型


from detectron2.utils.visualizer import ColorMode
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

dataset_dicts = get_tl_dicts("data",test_files)
metadata = MetadataCatalog.get("test")

for d in random.sample(dataset_dicts, 1):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=metadata, 
                   scale=0.5, 
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])