Above All
机器学习的大作业是写图像分类。这里我整理一些有用的参考资料,以便后来提交报告的时候逻辑比较清晰。
主要想用的特征还是SIFT和SURF,当然我觉得数据集给我的感觉是颜色直方图也是可以用的。
一、简单粗暴的提取SIFT特征
源码:https://github.com/SimGuo/ImageProcessing/blob/master/main.cpp
二、Bag-of-words方法
作者:Savitch
出处:http://blog.csdn.net/assiduousknight/article/details/16901427
什么是BOW
Bag-of-words模型应用三步
接下来,我们通过上述图像展示如何通过Bag-of-words模型,将图像表示成数值向量。现在有三个目标类,分别是人脸、自行车和吉他。
-
Bag-of-words模型的第一步是利用
提取视觉词汇SIFT算法
,从每类图像中提取视觉词汇,将所有的视觉词汇集合在一起,如下图所示:
-
第二步是
利用K-Means算法构造单词表
。K-Means算法是一种基于样本间相似性度量的间接聚类方法,此算法以K为参数,把N个对象分为K个簇,以使簇内具有较高的相似度,而簇间相似度较低。SIFT提取的视觉词汇向量之间根据距离的远近,可以利用K-Means算法将词义相近的词汇合并,作为单词表中的基础词汇,假定我们将K设为4,那么单词表的构造过程如下图所示:
- 第三步是利用单词表的中词汇表示图像。利用SIFT算法,可以从每幅图像中提取很多个特征点,这些特征点都可以用单词表中的单词近似代替,通过统计单词表中每个单词在图像中出现的次数,可以将图像表示成为一个K=4维数值向量。请看下图:
代码(还没看懂)
- 配置环境
- 创建c++类CSIFTDiscriptor
为了方便使用,我们将SIFT库用C++类CSIFTDiscriptor封装,该类可以计算并获取指定图像的特征点向量集合。类的声名在SIFTDiscriptor.h文件中,内容如下:
成员函数实现在SIFTDiscriptor.cpp文件中,其中,CalculateSIFT函数完成特征点的提取和计算,其主要内部流程如下:#ifndef _SIFT_DISCRIPTOR_H_ #define _SIFT_DISCRIPTOR_H_ #include <string> #include <highgui.h> #include <cv.h> extern "C" { #include "../sift/sift.h" #include "../sift/imgfeatures.h" #include "../sift/utils.h" }; class CSIFTDiscriptor { public: int GetInterestPointNumber() { return m_nInterestPointNumber; } struct feature *GetFeatureArray() { return m_pFeatureArray; } public : void SetImgName(const std::string &strImgName) { m_strInputImgName = strImgName; } int CalculateSIFT(); public: CSIFTDiscriptor(const std::string &strImgName); CSIFTDiscriptor() { m_nInterestPointNumber = 0; m_pFeatureArray = NULL; } ~CSIFTDiscriptor(); private: std::string m_strInputImgName; int m_nInterestPointNumber; feature *m_pFeatureArray; }; #endif
- 调用OpenCV函数cvLoadImage加载输入图像;
- 为了统一输入图像的尺寸,CalculateSIFT函数的第二步是调整输入图像的尺寸,这通过调用cvResize函数实现;
- 如果输入图像是彩色图像,我们需要首先将其转化成灰度图,这通过调用cvCvtColor函数实现;
- 调用SIFT库函数sift_feature获取输入图像的特征点向量集合和特征点个数。
#include "SIFTDiscriptor.h"
int CSIFTDiscriptor::CalculateSIFT()
{
IplImage *pInputImg = cvLoadImage(m_strInputImgName.c_str());
if (!pInputImg)
{
return -1;
}
int nImgWidth = 320; //训练用标准图像大小
double dbScaleFactor = pInputImg->width / 300.0; //缩放因子
IplImage *pTmpImg = cvCreateImage(cvSize(pInputImg->width / dbScaleFactor, pInputImg->height / dbScaleFactor),
pInputImg->depth, pInputImg->nChannels);
cvResize(pInputImg, pTmpImg); //缩放
cvReleaseImage(&pInputImg);
if (pTmpImg->nChannels != 1) //非灰度图
{
IplImage *pGrayImg = cvCreateImage(cvSize(pTmpImg->width, pTmpImg->height),
pTmpImg->depth, 1);
cvCvtColor(pTmpImg, pGrayImg, CV_RGB2GRAY);
m_nInterestPointNumber = sift_features(pGrayImg, &m_pFeatureArray);
cvReleaseImage(&pGrayImg);
}
else
{
m_nInterestPointNumber = sift_features(pTmpImg, &m_pFeatureArray);
}
cvReleaseImage(&pTmpImg);
return m_nInterestPointNumber;
}
CSIFTDiscriptor::CSIFTDiscriptor(const std::string &strImgName)
{
m_strInputImgName = strImgName;
m_nInterestPointNumber = 0;
m_pFeatureArray = NULL;
CalculateSIFT();
}
CSIFTDiscriptor::~CSIFTDiscriptor()
{
if (m_pFeatureArray)
{
free(m_pFeatureArray);
}
}
```
- 创建c++类CImgSet,管理实验图像集合
Bag-of-words模型需要从多个目标类图像中提取视觉词汇,不同目标类的图像存储在不同子文件夹中,为了方便操作,我们设计了一个专门的类CImgSet用来管理图像集合,声明在文件ImgSet.h中:#ifndef _IMG_SET_H_ #define _IMG_SET_H_ #include <vector> #include <string> #pragma comment(lib, "shlwapi.lib") class CImgSet { public: CImgSet (const std::string &strImgDirName) : m_strImgDirName(strImgDirName+"//"), m_nImgNumber(0){} int GetTotalImageNumber() { return m_nImgNumber; } std::string GetImgName(int nIndex) { return m_szImgs.at(nIndex); } int LoadImgsFromDir() { return LoadImgsFromDir(""); } private: int LoadImgsFromDir(const std::string &strDirName); private: typedef std::vector <std::string> IMG_SET; IMG_SET m_szImgs; int m_nImgNumber; const std::string m_strImgDirName; }; #endif //成员函数实现在文件ImgSet.cpp中: #include "ImgSet.h" #include <windows.h> #include <Shlwapi.h> /** strSubDirName:子文件夹名 */ int CImgSet::LoadImgsFromDir(const std::string &strSubDirName) { WIN32_FIND_DATAA stFD = {0}; std::string strDirName; if ("" == strSubDirName) { strDirName = m_strImgDirName; } else { strDirName = strSubDirName; } std::string strFindName = strDirName + "//*"; HANDLE hFile = FindFirstFileA(strFindName.c_str(), &stFD); BOOL bExist = FindNextFileA(hFile, &stFD); for (;bExist;) { std::string strTmpName = strDirName + stFD.cFileName; if (strDirName + "." == strTmpName || strDirName + ".." == strTmpName) { bExist = FindNextFileA(hFile, &stFD); continue; } if (PathIsDirectoryA(strTmpName.c_str())) { strTmpName += "//"; LoadImgsFromDir(strTmpName); bExist = FindNextFileA(hFile, &stFD); continue; } std::string strSubImg = strDirName + stFD.cFileName; m_szImgs.push_back(strSubImg); bExist = FindNextFileA(hFile, &stFD); } m_nImgNumber = m_szImgs.size(); return m_nImgNumber; }
LoadImgsFromDir递归地从图像文件夹中获取所有实验用图像名,包括子文件夹。该函数内部通过循环调用windows API函数FindFirstFile和FindNextFile来找到文件夹中所有图像的名称。
- 创建CHistogram,生成图像的直方图表示
//ImgHistogram.h
#ifndef _IMG_HISTOGRAM_H_
#define _IMG_HISTOGRAM_H_
#include <string>
#include "SIFTDiscriptor.h"
#include "ImgSet.h"
const int cnClusterNumber = 1500;
const int ciMax_D = FEATURE_MAX_D;
class CHistogram
{
public:
void SetTrainingImgSetName(const std::string strTrainingImgSet)
{
m_strTrainingImgSetName = strTrainingImgSet;
}
int FormHistogram();
CvMat CalculateImgHistogram(const string strImgName, int pszImgHistogram[]);
CvMat *GetObservedData();
CvMat *GetCodebook()
{
return m_pCodebook;
}
void SetCodebook(CvMat *pCodebook)
{
m_pCodebook = pCodebook;
m_bSet = true;
}
public:
CHistogram():m_pszHistogram(0), m_nImgNumber(0), m_pObservedData(0), m_pCodebook(0), m_bSet(false){}
~CHistogram()
{
if (m_pszHistogram)
{
delete m_pszHistogram;
m_pszHistogram = 0;
}
if (m_pObservedData)
{
cvReleaseMat(&m_pObservedData);
m_pObservedData = 0;
}
if (m_pCodebook && !m_bSet)
{
cvReleaseMat(&m_pCodebook);
m_pCodebook = 0;
}
}
private :
bool m_bSet;
CvMat *m_pCodebook;
CvMat *m_pObservedData;
std::string m_strTrainingImgSetName;
int (*m_pszHistogram)[cnClusterNumber];
int m_nImgNumber;
};
#endif
#include "ImgHistogram.h"
int CHistogram::FormHistogram()
{
int nRet = 0;
CImgSet iImgSet(m_strTrainingImgSetName);
nRet = iImgSet.LoadImgsFromDir();
const int cnTrainingImgNumber = iImgSet.GetTotalImageNumber();
m_nImgNumber = cnTrainingImgNumber;
CSIFTDiscriptor *pDiscriptor = new CSIFTDiscriptor[cnTrainingImgNumber];
int nIPNumber(0) ;
for (int i = 0; i < cnTrainingImgNumber; ++i) //计算每一幅训练图像的SIFT描述符
{
const string strImgName = iImgSet.GetImgName(i);
pDiscriptor[i].SetImgName(strImgName);
pDiscriptor[i].CalculateSIFT();
nIPNumber += pDiscriptor[i].GetInterestPointNumber();
}
double (*pszDiscriptor)[FEATURE_MAX_D] = new double[nIPNumber][FEATURE_MAX_D]; //存储所有描述符的数组。每一行代表一个IP的描述符
ZeroMemory(pszDiscriptor, sizeof(int) * nIPNumber * FEATURE_MAX_D);
int nIndex = 0;
for (int i = 0; i < cnTrainingImgNumber; ++i) //遍历所有图像
{
struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();
int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();
for (int j = 0; j < nFeatureNumber; ++j) //遍历一幅图像中所有的IP(Interesting Point兴趣点
{
for (int k = 0; k < FEATURE_MAX_D; k++)//初始化一个IP描述符
{
pszDiscriptor[nIndex][k] = pFeatureArray[j].descr[k];
}
++nIndex;
}
}
CvMat *pszLabels = cvCreateMat(nIPNumber, 1, CV_32SC1);
//对所有IP的描述符,执行KMeans算法,找到cnClusterNumber个聚类中心,存储在pszClusterCenters中
if (!m_pCodebook) //构造码元表
{
CvMat szSamples,
*pszClusterCenters = cvCreateMat(cnClusterNumber, FEATURE_MAX_D, CV_32FC1);
cvInitMatHeader(&szSamples, nIPNumber, FEATURE_MAX_D, CV_32FC1, pszDiscriptor);
cvKMeans2(&szSamples, cnClusterNumber, pszLabels,
cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ),
1, (CvRNG *)0, 0, pszClusterCenters); //
m_pCodebook = pszClusterCenters;
}
m_pszHistogram = new int[cnTrainingImgNumber][cnClusterNumber]; //存储每幅图像的直方图表示,每一行对应一幅图像
ZeroMemory(m_pszHistogram, sizeof(int) * cnTrainingImgNumber * cnClusterNumber);
//计算每幅图像的直方图
nIndex = 0;
for (int i = 0; i < cnTrainingImgNumber; ++i)
{
struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();
int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();
// int nIndex = 0;
for (int j = 0; j < nFeatureNumber; ++j)
{
// CvMat szFeature;
// cvInitMatHeader(&szFeature, 1, FEATURE_MAX_D, CV_32FC1, pszDiscriptor[nIndex++]);
// double dbMinimum = 1.79769e308;
// int nCodebookIndex = 0;
// for (int k = 0; k < m_pCodebook->rows; ++k)//找到距离最小的码元,用最小码元代替原//来的词汇
// {
// CvMat szCode = cvMat(1, m_pCodebook->cols, m_pCodebook->type);
// cvGetRow(m_pCodebook, &szCode, k);
// double dbDistance = cvNorm(&szFeature, &szCode, CV_L2);
// if (dbDistance < dbMinimum)
// {
// dbMinimum = dbDistance;
// nCodebookIndex = k;
// }
// }
int nCodebookIndex = pszLabels->data.i[nIndex++]; //找到第i幅图像中第j个IP在Codebook中的索引值nCodebookIndex
++m_pszHistogram[i][nCodebookIndex]; //0<nCodebookIndex<cnClusterNumber;
}
}
//资源清理,函数返回
// delete []m_pszHistogram;
// m_pszHistogram = 0;
cvReleaseMat(&pszLabels);
// cvReleaseMat(&pszClusterCenters);
delete []pszDiscriptor;
delete []pDiscriptor;
return nRet;
}
//double descr_dist_sq( struct feature* f1, struct feature* f2 );
CvMat CHistogram::CalculateImgHistogram(const string strImgName, int pszImgHistogram[])
{
if ("" == strImgName || !m_pCodebook || !pszImgHistogram)
{
return CvMat();
}
CSIFTDiscriptor iImgDisp;
iImgDisp.SetImgName(strImgName);
iImgDisp.CalculateSIFT();
struct feature *pImgFeature = iImgDisp.GetFeatureArray();
int cnIPNumber = iImgDisp.GetInterestPointNumber();
// int *pszImgHistogram = new int[cnClusterNumber];
// ZeroMemory(pszImgHistogram, sizeof(int)*cnClusterNumber);
for (int i = 0; i < cnIPNumber; ++i)
{
double *pszDistance = new double[cnClusterNumber];
CvMat iIP = cvMat(FEATURE_MAX_D, 1, CV_32FC1, pImgFeature[i].descr);
for (int j = 0; j < cnClusterNumber; ++j)
{
CvMat iCode = cvMat(1, FEATURE_MAX_D, CV_32FC1);
cvGetRow(m_pCodebook, &iCode, j);
CvMat *pTmpMat = cvCreateMat(FEATURE_MAX_D, 1, CV_32FC1);
cvTranspose(&iCode, pTmpMat);
double dbDistance = cvNorm(&iIP, pTmpMat); //计算第i个IP与第j个code之间的距离
pszDistance[j] = dbDistance;
cvReleaseMat(&pTmpMat);
}
double dbMinDistance = pszDistance[0];
int nCodebookIndex = 0; //第i个IP在codebook中距离最小的code的索引值
for (int j = 1; j < cnClusterNumber; ++j)
{
if (dbMinDistance > pszDistance[j])
{
dbMinDistance = pszDistance[j];
nCodebookIndex = j;
}
}
++pszImgHistogram[nCodebookIndex];
delete []pszDistance;
}
CvMat iImgHistogram = cvMat(cnClusterNumber, 1, CV_32SC1, pszImgHistogram);
return iImgHistogram;
}
CvMat *CHistogram::GetObservedData()
{
CvMat iHistogram;
cvInitMatHeader(&iHistogram, m_nImgNumber, cnClusterNumber, CV_32SC1, m_pszHistogram);
CvMat *m_pObservedData = cvCreateMat(iHistogram.cols, iHistogram.rows, CV_32SC1);
cvTranspose(&iHistogram, m_pObservedData);
return m_pObservedData;
}
网友评论