美文网首页
图像分类

图像分类

作者: Fruit_初 | 来源:发表于2017-04-15 13:58 被阅读0次

    Above All

    机器学习的大作业是写图像分类。这里我整理一些有用的参考资料,以便后来提交报告的时候逻辑比较清晰。

    主要想用的特征还是SIFT和SURF,当然我觉得数据集给我的感觉是颜色直方图也是可以用的。


    一、简单粗暴的提取SIFT特征

    源码:https://github.com/SimGuo/ImageProcessing/blob/master/main.cpp


    二、Bag-of-words方法

    作者:Savitch
    出处:http://blog.csdn.net/assiduousknight/article/details/16901427
    什么是BOW

    first step then

    Bag-of-words模型应用三步

    接下来,我们通过上述图像展示如何通过Bag-of-words模型,将图像表示成数值向量。现在有三个目标类,分别是人脸、自行车和吉他。

    • Bag-of-words模型的第一步是利用SIFT算法,从每类图像中提取视觉词汇,将所有的视觉词汇集合在一起,如下图所示:

      提取视觉词汇
    • 第二步是利用K-Means算法构造单词表。K-Means算法是一种基于样本间相似性度量的间接聚类方法,此算法以K为参数,把N个对象分为K个簇,以使簇内具有较高的相似度,而簇间相似度较低。SIFT提取的视觉词汇向量之间根据距离的远近,可以利用K-Means算法将词义相近的词汇合并,作为单词表中的基础词汇,假定我们将K设为4,那么单词表的构造过程如下图所示:

    kmeans
    • 第三步是利用单词表的中词汇表示图像。利用SIFT算法,可以从每幅图像中提取很多个特征点,这些特征点都可以用单词表中的单词近似代替,通过统计单词表中每个单词在图像中出现的次数,可以将图像表示成为一个K=4维数值向量。请看下图:
    每张图根据词表转化为一个向量

    代码(还没看懂)

    1. 配置环境
    2. 创建c++类CSIFTDiscriptor
      为了方便使用,我们将SIFT库用C++类CSIFTDiscriptor封装,该类可以计算并获取指定图像的特征点向量集合。类的声名在SIFTDiscriptor.h文件中,内容如下:
      #ifndef _SIFT_DISCRIPTOR_H_  
      #define _SIFT_DISCRIPTOR_H_  
      #include <string>  
      #include <highgui.h>  
      #include <cv.h>  
        
      extern "C"  
      {     
      #include "../sift/sift.h"     
      #include "../sift/imgfeatures.h"      
      #include "../sift/utils.h"    
      };  
        
      class CSIFTDiscriptor  
      {     
      public:   
          int GetInterestPointNumber()          
          {         
              return m_nInterestPointNumber;    
          }     
          struct feature *GetFeatureArray()         
          {         
              return m_pFeatureArray;       
          }  
          public :          
              void SetImgName(const std::string &strImgName)        
              {         
                  m_strInputImgName = strImgName;       
              }       
              int CalculateSIFT();  
          public:   
              CSIFTDiscriptor(const std::string &strImgName);   
              CSIFTDiscriptor()         
              {         
                  m_nInterestPointNumber = 0;  
                  m_pFeatureArray = NULL;       
              }     
              ~CSIFTDiscriptor();  
          private:          
              std::string m_strInputImgName;    
              int m_nInterestPointNumber;   
              feature *m_pFeatureArray;     
      };    
      #endif  
      
      成员函数实现在SIFTDiscriptor.cpp文件中,其中,CalculateSIFT函数完成特征点的提取和计算,其主要内部流程如下:
    1. 调用OpenCV函数cvLoadImage加载输入图像;
    2. 为了统一输入图像的尺寸,CalculateSIFT函数的第二步是调整输入图像的尺寸,这通过调用cvResize函数实现;
    3. 如果输入图像是彩色图像,我们需要首先将其转化成灰度图,这通过调用cvCvtColor函数实现;
    4. 调用SIFT库函数sift_feature获取输入图像的特征点向量集合和特征点个数。
      #include "SIFTDiscriptor.h"  
      

    int CSIFTDiscriptor::CalculateSIFT()
    {
    IplImage *pInputImg = cvLoadImage(m_strInputImgName.c_str());
    if (!pInputImg)
    {
    return -1;
    }
    int nImgWidth = 320; //训练用标准图像大小
    double dbScaleFactor = pInputImg->width / 300.0; //缩放因子
    IplImage *pTmpImg = cvCreateImage(cvSize(pInputImg->width / dbScaleFactor, pInputImg->height / dbScaleFactor),
    pInputImg->depth, pInputImg->nChannels);
    cvResize(pInputImg, pTmpImg); //缩放
    cvReleaseImage(&pInputImg);

    if (pTmpImg->nChannels != 1)    //非灰度图  
    {  
        IplImage *pGrayImg = cvCreateImage(cvSize(pTmpImg->width, pTmpImg->height),  
            pTmpImg->depth, 1);  
        cvCvtColor(pTmpImg, pGrayImg, CV_RGB2GRAY);  
        m_nInterestPointNumber = sift_features(pGrayImg, &m_pFeatureArray);  
        cvReleaseImage(&pGrayImg);  
    }  
    else  
    {  
        m_nInterestPointNumber = sift_features(pTmpImg, &m_pFeatureArray);  
    }  
    cvReleaseImage(&pTmpImg);  
    return m_nInterestPointNumber;  
    

    }
    CSIFTDiscriptor::CSIFTDiscriptor(const std::string &strImgName)
    {
    m_strInputImgName = strImgName;
    m_nInterestPointNumber = 0;
    m_pFeatureArray = NULL;
    CalculateSIFT();
    }
    CSIFTDiscriptor::~CSIFTDiscriptor()
    {
    if (m_pFeatureArray)
    {
    free(m_pFeatureArray);
    }
    }
    ```

    1. 创建c++类CImgSet,管理实验图像集合
      Bag-of-words模型需要从多个目标类图像中提取视觉词汇,不同目标类的图像存储在不同子文件夹中,为了方便操作,我们设计了一个专门的类CImgSet用来管理图像集合,声明在文件ImgSet.h中:
      #ifndef _IMG_SET_H_  
      #define _IMG_SET_H_  
      #include <vector>  
      #include <string>  
      #pragma comment(lib, "shlwapi.lib")  
      class CImgSet  
      {  
        public:  
      CImgSet (const std::string &strImgDirName) : m_strImgDirName(strImgDirName+"//"), m_nImgNumber(0){}  
          int GetTotalImageNumber()  
          {  
              return m_nImgNumber;  
          }  
          std::string GetImgName(int nIndex)  
          {  
              return m_szImgs.at(nIndex);  
          }    
          int LoadImgsFromDir()  
          {  
              return LoadImgsFromDir("");  
          }    
      private:  
          int LoadImgsFromDir(const std::string &strDirName);  
      private:  
          typedef std::vector <std::string> IMG_SET;  
          IMG_SET m_szImgs;  
          int m_nImgNumber;  
          const std::string m_strImgDirName;  
      };  
      #endif  
      
      //成员函数实现在文件ImgSet.cpp中:  
      #include "ImgSet.h"  
      #include <windows.h>  
      #include <Shlwapi.h>  
      /** 
      strSubDirName:子文件夹名 
      */  
      int CImgSet::LoadImgsFromDir(const std::string &strSubDirName)  
      {  
          WIN32_FIND_DATAA stFD = {0};  
          std::string strDirName;  
          if ("" == strSubDirName)  
          {  
              strDirName = m_strImgDirName;  
          }    
          else  
          {    
              strDirName = strSubDirName;  
          }    
          std::string strFindName = strDirName + "//*";  
          HANDLE hFile = FindFirstFileA(strFindName.c_str(), &stFD);  
          BOOL bExist = FindNextFileA(hFile, &stFD);  
        
          for (;bExist;)  
          {  
              std::string strTmpName = strDirName + stFD.cFileName;  
              if (strDirName + "." == strTmpName || strDirName + ".." == strTmpName)  
              {  
                  bExist = FindNextFileA(hFile, &stFD);  
                  continue;  
              }  
              if (PathIsDirectoryA(strTmpName.c_str()))  
              {  
                  strTmpName += "//";  
                  LoadImgsFromDir(strTmpName);  
                  bExist = FindNextFileA(hFile, &stFD);  
                  continue;  
              }     
              std::string strSubImg = strDirName + stFD.cFileName;  
              m_szImgs.push_back(strSubImg);  
              bExist = FindNextFileA(hFile, &stFD);  
          }  
          m_nImgNumber = m_szImgs.size();  
          return m_nImgNumber;  
      }  
      

    LoadImgsFromDir递归地从图像文件夹中获取所有实验用图像名,包括子文件夹。该函数内部通过循环调用windows API函数FindFirstFile和FindNextFile来找到文件夹中所有图像的名称。

    1. 创建CHistogram,生成图像的直方图表示
    //ImgHistogram.h  
    #ifndef _IMG_HISTOGRAM_H_  
    #define _IMG_HISTOGRAM_H_    
    #include <string>  
    #include "SIFTDiscriptor.h"  
    #include "ImgSet.h"  
    const int cnClusterNumber = 1500;  
    const int ciMax_D = FEATURE_MAX_D;  
    
    class CHistogram  
    {  
    public:  
        void SetTrainingImgSetName(const std::string strTrainingImgSet)  
        {  
            m_strTrainingImgSetName = strTrainingImgSet;  
        }  
        int FormHistogram();  
        CvMat CalculateImgHistogram(const string strImgName, int pszImgHistogram[]);  
        CvMat *GetObservedData();  
        CvMat *GetCodebook()  
        {  
            return m_pCodebook;  
        }  
        void SetCodebook(CvMat *pCodebook)  
        {  
            m_pCodebook = pCodebook;  
            m_bSet = true;  
        }  
    public:  
        CHistogram():m_pszHistogram(0), m_nImgNumber(0), m_pObservedData(0), m_pCodebook(0), m_bSet(false){}  
        ~CHistogram()  
        {  
            if (m_pszHistogram)  
            {  
                delete m_pszHistogram;  
                m_pszHistogram = 0;  
            }  
            if (m_pObservedData)  
            {  
                cvReleaseMat(&m_pObservedData);  
                m_pObservedData = 0;  
            }  
            if (m_pCodebook && !m_bSet)  
            {  
                cvReleaseMat(&m_pCodebook);  
                m_pCodebook = 0;  
            }  
        }  
    private :  
        bool m_bSet;  
        CvMat *m_pCodebook;  
        CvMat *m_pObservedData;  
        std::string m_strTrainingImgSetName;  
        int (*m_pszHistogram)[cnClusterNumber];  
        int m_nImgNumber;  
    };  
    #endif  
    
    #include "ImgHistogram.h"  
    int CHistogram::FormHistogram()  
    {  
        int nRet = 0;  
        CImgSet iImgSet(m_strTrainingImgSetName);  
        nRet = iImgSet.LoadImgsFromDir();  
          
        const int cnTrainingImgNumber = iImgSet.GetTotalImageNumber();  
        m_nImgNumber = cnTrainingImgNumber;  
        CSIFTDiscriptor *pDiscriptor = new CSIFTDiscriptor[cnTrainingImgNumber];  
        int nIPNumber(0) ;  
        for (int i = 0; i < cnTrainingImgNumber; ++i)  //计算每一幅训练图像的SIFT描述符  
        {  
            const string strImgName = iImgSet.GetImgName(i);  
            pDiscriptor[i].SetImgName(strImgName);  
            pDiscriptor[i].CalculateSIFT();  
            nIPNumber += pDiscriptor[i].GetInterestPointNumber();  
        }  
          
        double (*pszDiscriptor)[FEATURE_MAX_D] = new double[nIPNumber][FEATURE_MAX_D];  //存储所有描述符的数组。每一行代表一个IP的描述符  
        ZeroMemory(pszDiscriptor, sizeof(int) * nIPNumber * FEATURE_MAX_D);  
        int nIndex = 0;  
        for (int i = 0; i < cnTrainingImgNumber; ++i)  //遍历所有图像  
        {  
            struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();  
            int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();  
            for (int j = 0; j < nFeatureNumber; ++j)  //遍历一幅图像中所有的IP(Interesting Point兴趣点  
            {  
                for (int k = 0; k < FEATURE_MAX_D; k++)//初始化一个IP描述符  
                {  
                    pszDiscriptor[nIndex][k] = pFeatureArray[j].descr[k];  
                }  
                ++nIndex;  
            }  
        }  
        CvMat *pszLabels = cvCreateMat(nIPNumber, 1, CV_32SC1);  
          
        //对所有IP的描述符,执行KMeans算法,找到cnClusterNumber个聚类中心,存储在pszClusterCenters中  
        if (!m_pCodebook)   //构造码元表  
        {  
            CvMat szSamples,   
                *pszClusterCenters = cvCreateMat(cnClusterNumber, FEATURE_MAX_D, CV_32FC1);  
            cvInitMatHeader(&szSamples, nIPNumber, FEATURE_MAX_D, CV_32FC1, pszDiscriptor);  
            cvKMeans2(&szSamples, cnClusterNumber, pszLabels,   
                cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ),  
                1, (CvRNG *)0, 0, pszClusterCenters);  //  
            m_pCodebook = pszClusterCenters;  
        }  
          
        m_pszHistogram = new int[cnTrainingImgNumber][cnClusterNumber];  //存储每幅图像的直方图表示,每一行对应一幅图像  
        ZeroMemory(m_pszHistogram, sizeof(int) * cnTrainingImgNumber * cnClusterNumber);  
          
        //计算每幅图像的直方图  
        nIndex = 0;  
        for (int i = 0; i < cnTrainingImgNumber; ++i)  
        {  
            struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();  
            int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();  
            //      int nIndex = 0;  
            for (int j = 0; j < nFeatureNumber; ++j)  
            {  
                //          CvMat szFeature;  
                //          cvInitMatHeader(&szFeature, 1, FEATURE_MAX_D, CV_32FC1, pszDiscriptor[nIndex++]);  
                //          double dbMinimum = 1.79769e308;  
                //          int nCodebookIndex = 0;  
                //          for (int k = 0; k < m_pCodebook->rows; ++k)//找到距离最小的码元,用最小码元代替原//来的词汇  
                //          {  
                //              CvMat szCode = cvMat(1, m_pCodebook->cols, m_pCodebook->type);  
                //              cvGetRow(m_pCodebook, &szCode, k);  
                //              double dbDistance = cvNorm(&szFeature, &szCode, CV_L2);  
                //              if (dbDistance < dbMinimum)  
                //              {  
                //                  dbMinimum = dbDistance;  
                //                  nCodebookIndex = k;  
                //              }  
                //          }  
                int nCodebookIndex = pszLabels->data.i[nIndex++];   //找到第i幅图像中第j个IP在Codebook中的索引值nCodebookIndex  
                ++m_pszHistogram[i][nCodebookIndex];   //0<nCodebookIndex<cnClusterNumber;   
            }  
        }  
          
        //资源清理,函数返回  
        //  delete []m_pszHistogram;  
        //  m_pszHistogram = 0;  
          
        cvReleaseMat(&pszLabels);     
        //  cvReleaseMat(&pszClusterCenters);  
        delete []pszDiscriptor;  
        delete []pDiscriptor;  
          
        return nRet;  
    }  
      
    //double descr_dist_sq( struct feature* f1, struct feature* f2 );  
    CvMat CHistogram::CalculateImgHistogram(const string strImgName, int pszImgHistogram[])  
    {  
        if ("" == strImgName || !m_pCodebook || !pszImgHistogram)  
        {  
            return CvMat();  
        }  
        CSIFTDiscriptor iImgDisp;  
        iImgDisp.SetImgName(strImgName);  
        iImgDisp.CalculateSIFT();  
        struct feature *pImgFeature = iImgDisp.GetFeatureArray();  
        int cnIPNumber = iImgDisp.GetInterestPointNumber();  
        //  int *pszImgHistogram = new int[cnClusterNumber];  
        //  ZeroMemory(pszImgHistogram, sizeof(int)*cnClusterNumber);  
        for (int i = 0; i < cnIPNumber; ++i)  
        {  
            double *pszDistance = new double[cnClusterNumber];  
            CvMat iIP = cvMat(FEATURE_MAX_D, 1, CV_32FC1, pImgFeature[i].descr);  
            for (int j = 0; j < cnClusterNumber; ++j)  
            {  
                CvMat iCode = cvMat(1, FEATURE_MAX_D, CV_32FC1);  
                cvGetRow(m_pCodebook, &iCode, j);  
                CvMat *pTmpMat = cvCreateMat(FEATURE_MAX_D, 1, CV_32FC1);  
                cvTranspose(&iCode, pTmpMat);  
                double dbDistance = cvNorm(&iIP, pTmpMat);  //计算第i个IP与第j个code之间的距离                
                pszDistance[j] = dbDistance;  
                cvReleaseMat(&pTmpMat);  
            }  
            double dbMinDistance = pszDistance[0];  
            int nCodebookIndex = 0;  //第i个IP在codebook中距离最小的code的索引值  
            for (int j = 1; j < cnClusterNumber; ++j)  
            {  
                if (dbMinDistance > pszDistance[j])  
                {  
                    dbMinDistance = pszDistance[j];  
                    nCodebookIndex = j;  
                }  
            }  
            ++pszImgHistogram[nCodebookIndex];  
            delete []pszDistance;  
        }  
        CvMat iImgHistogram = cvMat(cnClusterNumber, 1, CV_32SC1, pszImgHistogram);  
        return iImgHistogram;  
    }  
    
    
    CvMat *CHistogram::GetObservedData()  
    {  
        CvMat iHistogram;  
        cvInitMatHeader(&iHistogram, m_nImgNumber, cnClusterNumber, CV_32SC1, m_pszHistogram);  
        CvMat *m_pObservedData = cvCreateMat(iHistogram.cols, iHistogram.rows, CV_32SC1);  
        cvTranspose(&iHistogram, m_pObservedData);  
        return m_pObservedData;  
    }  
    

    相关文章

      网友评论

          本文标题:图像分类

          本文链接:https://www.haomeiwen.com/subject/jybnattx.html