美文网首页
kmeans以及kmeans++聚类生成anchors

kmeans以及kmeans++聚类生成anchors

作者: 小白兔555 | 来源:发表于2022-05-17 10:33 被阅读0次

具体参考:https://blog.csdn.net/Arcofcosmos/article/details/120252992(如侵权,速删)

YOLOv5训练自己的数据集,YOLOv5模型中的anchors尺寸适用性已经非常强了,然而依旧可以尝试使用K-means以及K-means++算法找到更适合自己数据集的anchors(也许改进后的AP结果值并没有提高,反而下降不少==

1.K-Means算法

kmeans的具体步骤分为以下几步:

step1:随机设定初始聚类中心

step2:将距离某个聚类中心距离近的样本点归类到该聚类中心,将样本全部归类完毕后得到多个簇

step3:计算每个簇的均值作为新的聚类中心

step4:重复第二步和第三步直至聚类中心不再发生变化


#-------------------------------------------------------------------------------------------------#
#   kmeans虽然会对数据集中的框进行聚类,但是很多数据集由于框的大小相近,聚类出来的9个框相差不大,
#   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框,越浅的特征层适合越大的先验框
#   原始网络的先验框已经按大中小比例分配好了,不进行聚类也会有非常好的效果。
#-------------------------------------------------------------------------------------------------#
import glob
import xml.etree.ElementTree as ET

import numpy as np

def cas_iou(box,cluster):
    x = np.minimum(cluster[:,0],box[0])
    y = np.minimum(cluster[:,1],box[1])

    intersection = x * y
    area1 = box[0] * box[1]

    area2 = cluster[:,0] * cluster[:,1]
    iou = intersection / (area1 + area2 -intersection)

    return iou

def avg_iou(box,cluster):
    return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])

def kmeans(box,k):
    # 取出一共有多少框
    row = box.shape[0]
    
    # 每个框各个点的位置
    distance = np.empty((row,k))
    
    # 最后的聚类位置
    last_clu = np.zeros((row,))

    np.random.seed()

    # 随机选5个当聚类中心
    cluster = box[np.random.choice(row,k,replace = False)]
    # cluster = random.sample(row, k)
    while True:
        # 计算每一行距离五个点的iou情况。
        for i in range(row):
            distance[i] = 1 - cas_iou(box[i],cluster)
        
        # 取出最小点
        near = np.argmin(distance,axis=1)

        if (last_clu == near).all():
            break
        
        # 求每一个类的中位点
        for j in range(k):
            cluster[j] = np.median(
                box[near == j],axis=0)

        last_clu = near

    return cluster

def load_data(path):
    data = []
    # 对于每一个xml都寻找box
    for xml_file in glob.glob('{}/*xml'.format(path)):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
        if height<=0 or width<=0:
            continue
        
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
            ymin = int(float(obj.findtext('bndbox/ymin'))) / height
            xmax = int(float(obj.findtext('bndbox/xmax'))) / width
            ymax = int(float(obj.findtext('bndbox/ymax'))) / height

            xmin = np.float64(xmin)
            ymin = np.float64(ymin)
            xmax = np.float64(xmax)
            ymax = np.float64(ymax)
            # 得到宽高
            data.append([xmax-xmin,ymax-ymin])
    return np.array(data)


def calculate_anchors(dataset_anno_path = r'/home/slave110/code/VOCdevkit/VOC2007/Annotations', anchorsPath = '/home/slave110/code/VOCdevkit/VOC2007/yolo_anchors_kmeans.txt', anchors_num = 9, SIZE = 1000):
    # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
    # 会生成yolo_anchors.txt
    # SIZE = 1000
    # anchors_num = 9
    # # 载入数据集,可以使用VOC的xml
    # #path = r'/home/slave110/code/VOCdevkit/VOC2007/Annotations'
    # path = r'/home/slave110/code/VOCdevkit/VOC2007/Annotations'
    
    # 载入所有的xml
    # 存储格式为转化为比例后的width,height
    data = load_data(dataset_anno_path)
    
    # 使用k聚类算法
    out = kmeans(data,anchors_num)
    out = out[np.argsort(out[:,0])]
    print('acc:{:.2f}%'.format(avg_iou(data,out) * 100))
    print(out*SIZE)
    data = out*SIZE
    f = open(anchorsPath, 'w')
    row = np.shape(data)[0]
    for i in range(row):
        if i == 0:
            x_y = "%d,%d" % (data[i][0], data[i][1])
        else:
            x_y = ", %d,%d" % (data[i][0], data[i][1])
        f.write(x_y)
    f.close()


if __name__ == '__main__':
    #数据集xml注释路径
    dataset_anno_path = r'/home/slave110/code/VOCdevkit/VOC2007/Annotations'
    #生成的anchors的txt文件保存路径
    anchorsPath = '/home/slave110/code/VOCdevkit/VOC2007/yolo_anchors_kmeans.txt'
    #生成的anchors数量
    anchors_num = 9
    #输入的图片尺寸
    SIZE = 1000
    calculate_anchors(dataset_anno_path, anchorsPath, anchors_num, SIZE)

2.K-Means++算法

K-means++算法是在聚类中心上对K-means算法的改进,K-means++算法选取的聚类中心的距离尽可能大,但聚类的过程不变。

step1:首先从所有样本中随机选定第一个聚类中心

step2:记录所有样本到与其最近的聚类中心的距离

step3:所有非聚类中心样本点被选取作为下一个聚类中心的概率与step2中的距离大小成正比,也就是说距离越远的样本点越有 可能成为下一个聚类中心

step4:重复step2和step3直至选出多个聚类中心



import glob
import xml.etree.ElementTree as ET

import numpy as np

def cas_iou(box,cluster):
    x = np.minimum(cluster[:,0],box[0])
    y = np.minimum(cluster[:,1],box[1])

    intersection = x * y
    area1 = box[0] * box[1]

    area2 = cluster[:,0] * cluster[:,1]
    iou = intersection / (area1 + area2 -intersection)

    return iou

def avg_iou(box,cluster):
    return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])


def bboxesOverRation(bboxesA,bboxesB):
   
    bboxesA = np.array(bboxesA.astype('float'))
    bboxesB = np.array(bboxesB.astype('float'))
    M = bboxesA.shape[0]
    N = bboxesB.shape[0]
    
    areasA = bboxesA[:,2]*bboxesA[:,3]
    areasB = bboxesB[:,2]*bboxesB[:,3]
    
    xA = bboxesA[:,0]+bboxesA[:,2]
    yA = bboxesA[:,1]+bboxesA[:,3]
    xyA = np.stack([xA,yA]).transpose()
    xyxyA = np.concatenate((bboxesA[:,:2],xyA),axis=1)
    
    xB = bboxesB[:,0] +bboxesB[:,2]
    yB = bboxesB[:,1]+bboxesB[:,3]
    xyB = np.stack([xB,yB]).transpose()
    xyxyB = np.concatenate((bboxesB[:,:2],xyB),axis=1)
    
    iouRatio = np.zeros((M,N))
    for i in range(M):
        for j in range(N):
            x1 = max(xyxyA[i,0],xyxyB[j,0]);
            x2 = min(xyxyA[i,2],xyxyB[j,2]);
            y1 = max(xyxyA[i,1],xyxyB[j,1]);
            y2 = min(xyxyA[i,3],xyxyB[j,3]);
            Intersection = max(0,(x2-x1))*max(0,(y2-y1));
            Union = areasA[i]+areasB[j]-Intersection;
            iouRatio[i,j] = Intersection/Union; 
    return iouRatio


def load_data(path):
    data = []
    # 对于每一个xml都寻找box
    for xml_file in glob.glob('{}/*xml'.format(path)):
        tree = ET.parse(xml_file)
        height = int(tree.findtext('./size/height'))
        width = int(tree.findtext('./size/width'))
        if height<=0 or width<=0:
            continue
        
        # 对于每一个目标都获得它的宽高
        for obj in tree.iter('object'):
            xmin = int(float(obj.findtext('bndbox/xmin'))) / width
            ymin = int(float(obj.findtext('bndbox/ymin'))) / height
            xmax = int(float(obj.findtext('bndbox/xmax'))) / width
            ymax = int(float(obj.findtext('bndbox/ymax'))) / height

            xmin = np.float64(xmin)
            ymin = np.float64(ymin)
            xmax = np.float64(xmax)
            ymax = np.float64(ymax)
            # 得到宽高
            x = xmin + 0.5 * (xmax-xmin)
            y = ymin + 0.5 * (ymax-ymin)
            data.append([x, y, xmax-xmin,ymax-ymin])
    return np.array(data)


def estimateAnchorBoxes(trainingData,numAnchors=9,SIZE = 416):

    
    numsObver = trainingData.shape[0]
    xyArray = np.zeros((numsObver,2))
    trainingData[:,0:2] = xyArray
    assert(numsObver>=numAnchors)
    
    # kmeans++
    # init 
    centroids = [] # 初始化中心,kmeans++
    centroid_index = np.random.choice(numsObver, 1)
    centroids.append(trainingData[centroid_index])
    while len(centroids)<numAnchors:
        minDistList = []
        for box in trainingData:
            box = box.reshape((-1,4))
            minDist = 1
            for centroid in centroids:
                centroid = centroid.reshape((-1,4))
                ratio = (1-bboxesOverRation(box,centroid)).item()
                if ratio<minDist:
                    minDist = ratio
            minDistList.append(minDist)
            
        sumDist = np.sum(minDistList)
        prob = minDistList/sumDist 
        idx = np.random.choice(numsObver,1,replace=True,p=prob)
        centroids.append(trainingData[idx])
        
    # kmeans 迭代聚类
    maxIterTimes = 100
    iter_times = 0
    while True:
        minDistList = []
        minDistList_ind = []
        for box in trainingData:
            box = box.reshape((-1,4))
            minDist = 1
            box_belong = 0
            for i,centroid in enumerate(centroids):
                centroid = centroid.reshape((-1,4))
                ratio = (1-bboxesOverRation(box,centroid)).item()
                if ratio<minDist:
                    minDist = ratio
                    box_belong = i
            minDistList.append(minDist)
            minDistList_ind.append(box_belong)
        centroids_avg = []
        for _ in range(numAnchors):
            centroids_avg.append([])
        for i,anchor_id in enumerate(minDistList_ind):
            centroids_avg[anchor_id].append(trainingData[i])
        err = 0
        for i in range(numAnchors):
            if len(centroids_avg[i]):
                temp = np.mean(centroids_avg[i],axis=0)
                err +=  np.sqrt(np.sum(np.power(temp-centroids[i],2)))
                centroids[i] = np.mean(centroids_avg[i],axis=0)
        iter_times+=1
        if iter_times>maxIterTimes or err==0:
            break
    anchorBoxes = np.array([x[2:] for x in centroids])
    meanIoU = 1-np.mean(minDistList)
    anchorBoxes = anchorBoxes[np.argsort(anchorBoxes[:,0])]
    print('acc:{:.2f}%'.format(avg_iou(trainingData[:,2:],anchorBoxes) * 100))
    anchorBoxes = anchorBoxes*SIZE
    return anchorBoxes,meanIoU

def calculate_anchors(dataset_anno_path = r'/home/slave110/code/VOCdevkit/VOC2007/Annotations', anchorsPath = './yolo_anchors.txt', anchors_num = 9, SIZE = 1000):
    data = load_data(dataset_anno_path)
    anchors, _ = estimateAnchorBoxes(data, numAnchors = anchors_num, SIZE = SIZE)
    print(anchors)
    f = open(anchorsPath, 'w')
    row = np.shape(anchors)[0]
    for i in range(row):
        if i == 0:
            x_y = "%d,%d" % (anchors[i][0], anchors[i][1])
        else:
            x_y = ", %d,%d" % (anchors[i][0], anchors[i][1])
        f.write(x_y)
    f.close()


if __name__ == "__main__":
    #数据集xml注释路径
    dataset_anno_path = r'/home/slave110/code/VOCdevkit/VOC2007/Annotations'
    #生成的anchors的txt文件保存路径
    anchorsPath = '/home/slave110/code/VOCdevkit/VOC2007/yolo_anchors.txt'
    #生成的anchors数量
    anchors_num = 9
    #输入的图片尺寸
    SIZE = 1000
    calculate_anchors(dataset_anno_path, anchorsPath, anchors_num, SIZE)   

运行后的结果会保存在txt中。

相关文章

网友评论

      本文标题:kmeans以及kmeans++聚类生成anchors

      本文链接:https://www.haomeiwen.com/subject/lqggurtx.html