摘要:
SLIC:simple linear iterative clustering,简单的线性迭代聚类,它使用k-means聚类来生成超像素,实现起来简单,运行效率还比较高,超像素对边界的保持效果也比较好,具体的与其他方法的对比结果在下方论文中有详细记录。
论文地址:https://infoscience.epfl.ch/record/177415/files/Superpixel_PAMI2011-2.pdf
1.原理
SLIC本质上说只需要一个聚类数k,这个聚类数k也就是超像素的个数,希望把图片分割成多少个超像素就设多少个聚类中心,在初始化时把聚类中心等间隔的放到图像上,设图像像素数为N,则在每个边长为的方格内放置一个初始聚类中心, 为了避免把噪声像素和边缘当作聚类中心,会把该中心移动到周围3*3邻域内梯度最小的地方。
在用k-means做聚类时,采用的特征是Lab颜色空间的三个值加上坐标x,y,一共5个维度,,另外与常规的k均值不同的地方是,这里计算距离时只考虑聚类中心周围的邻域,因此计算量和一般的k均值聚类相比要小很多。搜索空间对比如下图,
聚类后计算每一类的平均特征,并把聚类中心的特征更新为平均特征。如果迭代超过10次或者前后两次聚类的差小于阈值则结束,否则继续聚类,更新聚类中心........。算法流程如下: 在这里插入图片描述
2. 实现
2.1 初始化聚类中心
原文中按聚类数k来等间隔地初始化聚类中心,假设样本总数为N,聚类数为k,则每隔个样本放置一个聚类中心。在图片上等间隔地放置k个初始聚类中心,也就是把图片等分成边长为的格子,在格子的固定位置放置初始聚类中心。另外为了避免初始的聚类中心落在物体边缘上,还要对每一个聚类中心都进行一下微调,具体就是计算初始聚类中心点周围邻域的梯度,把中心点移到梯度最小的点上。
这里实现的时候以初始超像素的边长len作为参数会比较直观,可以很简单的控制生成的超像素的大小,k和len的关系是。
注:图片坐标系以左上角为原点,水平向右为x轴正方向,水平向下为y轴正方向(与opencv一致),在访问图片数据矩阵时,一般用行和列的方式来描述,也就是row对应y,colum对应x。
- 等间隔放置聚类中心,
int initilizeCenters(cv::Mat &imageLAB, std::vector<center> ¢ers, int len)
{
if (imageLAB.empty())
{
std::cout << "In itilizeCenters: image is empty!\n";
return -1;
}
uchar *ptr = NULL;
center cent;
int num = 0;
for (int i = 0; i < imageLAB.rows; i += len)
{
cent.y = i + len / 2;
if (cent.y >= imageLAB.rows) continue;
ptr = imageLAB.ptr<uchar>(cent.y);
for (int j = 0; j < imageLAB.cols; j += len)
{
cent.x = j + len / 2;
if ((cent.x >= imageLAB.cols)) continue;
cent.L = *(ptr + cent.x * 3);
cent.A = *(ptr + cent.x * 3 + 1);
cent.B = *(ptr + cent.x * 3 + 2);
cent.label = ++num;
centers.push_back(cent);
}
}
return 0;
}
- 把聚类中心移到到周围8邻域里梯度最小的地方,梯度用Sobel计算(不希望聚类中心落在边缘上所以才调整的)
int fituneCenter(cv::Mat &imageLAB, cv::Mat &sobelGradient, std::vector<center> ¢ers)
{
if (sobelGradient.empty()) return -1;
center cent;
double *sobPtr = sobelGradient.ptr<double>(0);
uchar *imgPtr = imageLAB.ptr<uchar>(0);
int w = sobelGradient.cols;
for (int ck = 0; ck < centers.size(); ck++)
{
cent = centers[ck];
if (cent.x - 1 < 0 || cent.x + 1 >= sobelGradient.cols || cent.y - 1 < 0 || cent.y + 1 >= sobelGradient.rows)
{
continue;
}//end if
double minGradient = 9999999;
int tempx = 0, tempy = 0;
for (int m = -1; m < 2; m++)
{
sobPtr = sobelGradient.ptr<double>(cent.y + m);
for (int n = -1; n < 2; n++)
{
double gradient = pow(*(sobPtr + (cent.x + n) * 3), 2)
+ pow(*(sobPtr + (cent.x + n) * 3 + 1), 2)
+ pow(*(sobPtr + (cent.x + n) * 3 + 2), 2);
if (gradient < minGradient)
{
minGradient = gradient;
tempy = m;//row
tempx = n;//column
}//end if
}
}
cent.x += tempx;
cent.y += tempy;
imgPtr = imageLAB.ptr<uchar>(cent.y);
centers[ck].x = cent.x;
centers[ck].y = cent.y;
centers[ck].L = *(imgPtr + cent.x * 3);
centers[ck].A = *(imgPtr + cent.x * 3 + 1);
centers[ck].B = *(imgPtr + cent.x * 3 + 2);
}//end for
return 0;
}
2.2 聚类
对每一个聚类中心center_k,计算它到周围2len*2len区域的所有点的距离,如果新计算的距离比原来的距离更小,则把该点归入center_k这一类。
注意聚类的本质就是“物以类聚”,判断样本点和聚类中心的“近似”程度,可以从两个方面来考察,一种是距离测度:距离越近越相似,另一种是相似性测度,例如角度相似系数,相关系数,指数相似系数等。
距离的计算方式有很多种,比如:
- 欧拉距离
- 城区距离
- 切比雪夫距离
- 马氏距离(Mahalanobis):,它有一点比较好的性质就是与量纲无关(另外它还对坐标尺度、旋转、平移保持不变,从统计意义上去除了分量间的相关性),在这里分割超像素时,Lab颜色空间的距离往往会比空间距离大很多,用欧式距离时需要加一个权重参数来调节颜色距离和空间距离的比例。
要是以后有时间的话可以考虑一下都试一下这些距离聚类的效果。这里采用的是欧式距离,而且因为Lab颜色空间和图像xy坐标空间量纲不同,需要调整颜色空间距离和xy坐标距离的权重,论文中用下面的方式来计算距离
但是实际上在做超像素分割时我们更关心超像素的大小,而不是有多少个,虽然尺寸S和聚类数k有明确的对应关系,但是把k当输入参数不如直接用尺寸S来得直接,另外实际用起来有点麻烦,因为单独修改m或者s都会被另外一个参数调制,所以D的计算我改成了下面这样
int clustering(const cv::Mat &imageLAB, cv::Mat &DisMask, cv::Mat &labelMask,
std::vector<center> ¢ers, int len, int m)
{
if (imageLAB.empty())
{
std::cout << "clustering :the input image is empty!\n";
return -1;
}
double *disPtr = NULL;//disMask type: 64FC1
double *labelPtr = NULL;//labelMask type: 64FC1
const uchar *imgPtr = NULL;//imageLAB type: 8UC3
//disc = std::sqrt(pow(L - cL, 2)+pow(A - cA, 2)+pow(B - cB,2))
//diss = std::sqrt(pow(x-cx,2) + pow(y-cy,2));
//dis = sqrt(disc^2 + (diss/len)^2 * m^2)
double dis = 0, disc = 0, diss = 0;
//cluster center's cx, cy,cL,cA,cB;
int cx, cy, cL, cA, cB, clabel;
//imageLAB's x, y, L,A,B
int x, y, L, A, B;
//注:这里的图像坐标以左上角为原点,水平向右为x正方向,水平向下为y正方向,与opencv保持一致
// 从矩阵行列角度看,i表示行,j表示列,即(i,j) = (y,x)
for (int ck = 0; ck < centers.size(); ++ck)
{
cx = centers[ck].x;
cy = centers[ck].y;
cL = centers[ck].L;
cA = centers[ck].A;
cB = centers[ck].B;
clabel = centers[ck].label;
for (int i = cy - len; i < cy + len; i++)
{
if (i < 0 | i >= imageLAB.rows) continue;
//pointer point to the ith row
imgPtr = imageLAB.ptr<uchar>(i);
disPtr = DisMask.ptr<double>(i);
labelPtr = labelMask.ptr<double>(i);
for (int j = cx - len; j < cx + len; j++)
{
if (j < 0 | j >= imageLAB.cols) continue;
L = *(imgPtr + j * 3);
A = *(imgPtr + j * 3 + 1);
B = *(imgPtr + j * 3 + 2);
disc = std::sqrt(pow(L - cL, 2) + pow(A - cA, 2) + pow(B - cB, 2));
diss = std::sqrt(pow(j - cx, 2) + pow(i - cy, 2));
dis = sqrt(pow(disc, 2) + m * pow(diss, 2));
if (dis < *(disPtr + j))
{
*(disPtr + j) = dis;
*(labelPtr + j) = clabel;
}//end if
}//end for
}
}//end for (int ck = 0; ck < centers.size(); ++ck)
return 0;
}
2.3 更新聚类中心
对每一个聚类中心center_k,把所有属于这一类的点的特征加起来求平均,把这个平均特征赋给center_k。
int updateCenter(cv::Mat &imageLAB, cv::Mat &labelMask, std::vector<center> ¢ers, int len)
{
double *labelPtr = NULL;//labelMask type: 64FC1
const uchar *imgPtr = NULL;//imageLAB type: 8UC3
int cx, cy;
for (int ck = 0; ck < centers.size(); ++ck)
{
double sumx = 0, sumy = 0, sumL = 0, sumA = 0, sumB = 0, sumNum = 0;
cx = centers[ck].x;
cy = centers[ck].y;
for (int i = cy - len; i < cy + len; i++)
{
if (i < 0 | i >= imageLAB.rows) continue;
//pointer point to the ith row
imgPtr = imageLAB.ptr<uchar>(i);
labelPtr = labelMask.ptr<double>(i);
for (int j = cx - len; j < cx + len; j++)
{
if (j < 0 | j >= imageLAB.cols) continue;
if (*(labelPtr + j) == centers[ck].label)
{
sumL += *(imgPtr + j * 3);
sumA += *(imgPtr + j * 3 + 1);
sumB += *(imgPtr + j * 3 + 2);
sumx += j;
sumy += i;
sumNum += 1;
}//end if
}
}
//update center
if (sumNum == 0) sumNum = 0.000000001;
centers[ck].x = sumx / sumNum;
centers[ck].y = sumy / sumNum;
centers[ck].L = sumL / sumNum;
centers[ck].A = sumA / sumNum;
centers[ck].B = sumB / sumNum;
}//end for
return 0;
}
2.4 显示超像素分割结果
方式一:把属于同一类的点的特征都替换成平均特征;
方式二:画出聚类边界;
3. 实测效果
-
左侧为原图,中间为在原图上画出超像素边界效果图,右侧为超像素图像效果
bassball.png
4. 完整源码
//
//created by Mr. Peng. 2021\08\31
//
#include "opencv.hpp"
struct center
{
int x;//column
int y;//row
int L;
int A;
int B;
int label;
};
/////////////////////////////////////////////////////
//input parameters:
//imageLAB: the source image in Lab color space
//DisMask: it save the shortest distance to the nearest center
//labelMask: it save every pixel's label
//centers: clustering center
//len: the super pixls will be initialize to len*len
//m: a parameter witch adjust the weights of the spacial and color space distance
//
//output:
////////////////////////////////////////////////////
int clustering(const cv::Mat &imageLAB, cv::Mat &DisMask, cv::Mat &labelMask,
std::vector<center> ¢ers, int len, int m)
{
if (imageLAB.empty())
{
std::cout << "clustering :the input image is empty!\n";
return -1;
}
double *disPtr = NULL;//disMask type: 64FC1
double *labelPtr = NULL;//labelMask type: 64FC1
const uchar *imgPtr = NULL;//imageLAB type: 8UC3
//disc = std::sqrt(pow(L - cL, 2)+pow(A - cA, 2)+pow(B - cB,2))
//diss = std::sqrt(pow(x-cx,2) + pow(y-cy,2));
//dis = sqrt(disc^2 + (diss/len)^2 * m^2)
double dis = 0, disc = 0, diss = 0;
//cluster center's cx, cy,cL,cA,cB;
int cx, cy, cL, cA, cB, clabel;
//imageLAB's x, y, L,A,B
int x, y, L, A, B;
//注:这里的图像坐标以左上角为原点,水平向右为x正方向,水平向下为y正方向,与opencv保持一致
// 从矩阵行列角度看,i表示行,j表示列,即(i,j) = (y,x)
for (int ck = 0; ck < centers.size(); ++ck)
{
cx = centers[ck].x;
cy = centers[ck].y;
cL = centers[ck].L;
cA = centers[ck].A;
cB = centers[ck].B;
clabel = centers[ck].label;
for (int i = cy - len; i < cy + len; i++)
{
if (i < 0 | i >= imageLAB.rows) continue;
//pointer point to the ith row
imgPtr = imageLAB.ptr<uchar>(i);
disPtr = DisMask.ptr<double>(i);
labelPtr = labelMask.ptr<double>(i);
for (int j = cx - len; j < cx + len; j++)
{
if (j < 0 | j >= imageLAB.cols) continue;
L = *(imgPtr + j * 3);
A = *(imgPtr + j * 3 + 1);
B = *(imgPtr + j * 3 + 2);
disc = std::sqrt(pow(L - cL, 2) + pow(A - cA, 2) + pow(B - cB, 2));
diss = std::sqrt(pow(j - cx, 2) + pow(i - cy, 2));
dis = sqrt(pow(disc, 2) + m * pow(diss, 2));
if (dis < *(disPtr + j))
{
*(disPtr + j) = dis;
*(labelPtr + j) = clabel;
}//end if
}//end for
}
}//end for (int ck = 0; ck < centers.size(); ++ck)
return 0;
}
/////////////////////////////////////////////////////
//input parameters:
//imageLAB: the source image in Lab color space
//labelMask: it save every pixel's label
//centers: clustering center
//len: the super pixls will be initialize to len*len
//
//output:
////////////////////////////////////////////////////
int updateCenter(cv::Mat &imageLAB, cv::Mat &labelMask, std::vector<center> ¢ers, int len)
{
double *labelPtr = NULL;//labelMask type: 64FC1
const uchar *imgPtr = NULL;//imageLAB type: 8UC3
int cx, cy;
for (int ck = 0; ck < centers.size(); ++ck)
{
double sumx = 0, sumy = 0, sumL = 0, sumA = 0, sumB = 0, sumNum = 0;
cx = centers[ck].x;
cy = centers[ck].y;
for (int i = cy - len; i < cy + len; i++)
{
if (i < 0 | i >= imageLAB.rows) continue;
//pointer point to the ith row
imgPtr = imageLAB.ptr<uchar>(i);
labelPtr = labelMask.ptr<double>(i);
for (int j = cx - len; j < cx + len; j++)
{
if (j < 0 | j >= imageLAB.cols) continue;
if (*(labelPtr + j) == centers[ck].label)
{
sumL += *(imgPtr + j * 3);
sumA += *(imgPtr + j * 3 + 1);
sumB += *(imgPtr + j * 3 + 2);
sumx += j;
sumy += i;
sumNum += 1;
}//end if
}
}
//update center
if (sumNum == 0) sumNum = 0.000000001;
centers[ck].x = sumx / sumNum;
centers[ck].y = sumy / sumNum;
centers[ck].L = sumL / sumNum;
centers[ck].A = sumA / sumNum;
centers[ck].B = sumB / sumNum;
}//end for
return 0;
}
int showSLICResult(const cv::Mat &image, cv::Mat &labelMask, std::vector<center> ¢ers, int len)
{
cv::Mat dst = image.clone();
cv::cvtColor(dst, dst, cv::COLOR_BGR2Lab);
double *labelPtr = NULL;//labelMask type: 32FC1
uchar *imgPtr = NULL;//image type: 8UC3
int cx, cy;
double sumx = 0, sumy = 0, sumL = 0, sumA = 0, sumB = 0, sumNum = 0.00000001;
for (int ck = 0; ck < centers.size(); ++ck)
{
cx = centers[ck].x;
cy = centers[ck].y;
for (int i = cy - len; i < cy + len; i++)
{
if (i < 0 | i >= image.rows) continue;
//pointer point to the ith row
imgPtr = dst.ptr<uchar>(i);
labelPtr = labelMask.ptr<double>(i);
for (int j = cx - len; j < cx + len; j++)
{
if (j < 0 | j >= image.cols) continue;
if (*(labelPtr + j) == centers[ck].label)
{
*(imgPtr + j * 3) = centers[ck].L;
*(imgPtr + j * 3 + 1) = centers[ck].A;
*(imgPtr + j * 3 + 2) = centers[ck].B;
}//end if
}
}
}//end for
cv::cvtColor(dst, dst, cv::COLOR_Lab2BGR);
cv::namedWindow("showSLIC", 0);
cv::imshow("showSLIC", dst);
cv::waitKey(1);
return 0;
}
int showSLICResult2(const cv::Mat &image, cv::Mat &labelMask, std::vector<center> ¢ers, int len)
{
cv::Mat dst = image.clone();
//cv::cvtColor(dst, dst, cv::COLOR_Lab2BGR);
double *labelPtr = NULL;//labelMask type: 32FC1
double *labelPtr_nextRow = NULL;//labelMask type: 32FC1
uchar *imgPtr = NULL;//image type: 8UC3
for (int i = 0; i < labelMask.rows - 1; i++)
{
labelPtr = labelMask.ptr<double>(i);
imgPtr = dst.ptr<uchar>(i);
for (int j = 0; j < labelMask.cols - 1; j++)
{
//if left pixel's label is different from the right's
if (*(labelPtr + j) != *(labelPtr + j + 1))
{
*(imgPtr + 3 * j) = 0;
*(imgPtr + 3 * j + 1) = 0;
*(imgPtr + 3 * j + 2) = 0;
}
//if the upper pixel's label is different from the bottom's
labelPtr_nextRow = labelMask.ptr<double>(i + 1);
if (*(labelPtr_nextRow + j) != *(labelPtr + j))
{
*(imgPtr + 3 * j) = 0;
*(imgPtr + 3 * j + 1) = 0;
*(imgPtr + 3 * j + 2) = 0;
}
}
}
//show center
for (int ck = 0; ck < centers.size(); ck++)
{
imgPtr = dst.ptr<uchar>(centers[ck].y);
*(imgPtr + centers[ck].x * 3) = 100;
*(imgPtr + centers[ck].x * 3 + 1) = 100;
*(imgPtr + centers[ck].x * 3 + 1) = 10;
}
cv::namedWindow("showSLIC2", 0);
cv::imshow("showSLIC2", dst);
cv::waitKey(1);
return 0;
}
int initilizeCenters(cv::Mat &imageLAB, std::vector<center> ¢ers, int len)
{
if (imageLAB.empty())
{
std::cout << "In itilizeCenters: image is empty!\n";
return -1;
}
uchar *ptr = NULL;
center cent;
int num = 0;
for (int i = 0; i < imageLAB.rows; i += len)
{
cent.y = i + len / 2;
if (cent.y >= imageLAB.rows) continue;
ptr = imageLAB.ptr<uchar>(cent.y);
for (int j = 0; j < imageLAB.cols; j += len)
{
cent.x = j + len / 2;
if ((cent.x >= imageLAB.cols)) continue;
cent.L = *(ptr + cent.x * 3);
cent.A = *(ptr + cent.x * 3 + 1);
cent.B = *(ptr + cent.x * 3 + 2);
cent.label = ++num;
centers.push_back(cent);
}
}
return 0;
}
//if the center locates in the edges, fitune it's location.
int fituneCenter(cv::Mat &imageLAB, cv::Mat &sobelGradient, std::vector<center> ¢ers)
{
if (sobelGradient.empty()) return -1;
center cent;
double *sobPtr = sobelGradient.ptr<double>(0);
uchar *imgPtr = imageLAB.ptr<uchar>(0);
int w = sobelGradient.cols;
for (int ck = 0; ck < centers.size(); ck++)
{
cent = centers[ck];
if (cent.x - 1 < 0 || cent.x + 1 >= sobelGradient.cols || cent.y - 1 < 0 || cent.y + 1 >= sobelGradient.rows)
{
continue;
}//end if
double minGradient = 9999999;
int tempx = 0, tempy = 0;
for (int m = -1; m < 2; m++)
{
sobPtr = sobelGradient.ptr<double>(cent.y + m);
for (int n = -1; n < 2; n++)
{
double gradient = pow(*(sobPtr + (cent.x + n) * 3), 2)
+ pow(*(sobPtr + (cent.x + n) * 3 + 1), 2)
+ pow(*(sobPtr + (cent.x + n) * 3 + 2), 2);
if (gradient < minGradient)
{
minGradient = gradient;
tempy = m;//row
tempx = n;//column
}//end if
}
}
cent.x += tempx;
cent.y += tempy;
imgPtr = imageLAB.ptr<uchar>(cent.y);
centers[ck].x = cent.x;
centers[ck].y = cent.y;
centers[ck].L = *(imgPtr + cent.x * 3);
centers[ck].A = *(imgPtr + cent.x * 3 + 1);
centers[ck].B = *(imgPtr + cent.x * 3 + 2);
}//end for
return 0;
}
/////////////////////////////////////////////////////
//input parameters:
//image: the source image in RGB color space
//resultLabel: it save every pixel's label
//len: the super pixls will be initialize to len*len
//m: a parameter witch adjust the weights of diss
//output:
////////////////////////////////////////////////////
int SLIC(cv::Mat &image, cv::Mat &resultLabel, std::vector<center> ¢ers, int len, int m)
{
if (image.empty())
{
std::cout << "in SLIC the input image is empty!\n";
return -1;
}
int MAXDIS = 999999;
int height, width;
height = image.rows;
width = image.cols;
//convert color
cv::Mat imageLAB;
cv::cvtColor(image, imageLAB, cv::COLOR_BGR2Lab);
//get sobel gradient map
cv::Mat sobelImagex, sobelImagey, sobelGradient;
cv::Sobel(imageLAB, sobelImagex, CV_64F, 0, 1, 3);
cv::Sobel(imageLAB, sobelImagey, CV_64F, 1, 0, 3);
cv::addWeighted(sobelImagex, 0.5, sobelImagey, 0.5, 0, sobelGradient);//sobel output image type is CV_64F
//initiate
//std::vector<center> centers;
//disMask save the distance of the pixels to center;
cv::Mat disMask ;
//labelMask save the label of the pixels
cv::Mat labelMask = cv::Mat::zeros(cv::Size(width, height), CV_64FC1);
//initialize centers, get centers
initilizeCenters(imageLAB, centers, len);
//if the center locates in the edges, fitune it's location
fituneCenter(imageLAB, sobelGradient, centers);
//update cluster 10 times
for (int time = 0; time < 10; time++)
{
//clustering
disMask = cv::Mat(height, width, CV_64FC1, cv::Scalar(MAXDIS));
clustering(imageLAB, disMask, labelMask, centers, len, m);
//update
updateCenter(imageLAB, labelMask, centers, len);
//fituneCenter(imageLAB, sobelGradient, centers);
}
resultLabel = labelMask;
return 0;
}
int SLIC_Demo()
{
std::string imagePath = "K:\\deepImage\\plane.jpg";
cv::Mat image = cv::imread(imagePath);
cv::Mat labelMask;//save every pixel's label
cv::Mat dst;//save the shortest distance to the nearest centers
std::vector<center> centers;//clustering centers
int len = 25;//the scale of the superpixel ,len*len
int m = 10;//a parameter witch adjust the weights of spacial distance and the color space distance
SLIC(image, labelMask, centers, len, m);
cv::namedWindow("image", 1);
cv::imshow("image", image);
showSLICResult(image, labelMask, centers, len);
showSLICResult2(image, labelMask, centers, len);
cv::waitKey(0);
return 0;
}
int main()
{
SLIC_Demo();
return 0;
}
网友评论