美文网首页
K-means 分类算法

K-means 分类算法

作者: district10 | 来源:发表于2015-03-28 16:45 被阅读123次

data

41 286 357 71 298 426 39 241 350 47 283 399 22 201 449 70 239 363 87 290 449 62 209 417 39 265 415 55 229 363 12 294 442 1 296 378 39 270 380 7 228 379 5 277 385 71 215 442 75 243 424 13 280 414 40 244 380 80 234 429 55 268 435 1 275 434 2 296 433 45 204 446 51 226 389 78 279 370 79 217 407 54 273 426 48 247 400 41 252 423 50 276 362 22 283 444 

code

#include<stdio.h>
#include<stdlib.h>
#include<stdarg.h>
#include<math.h>

#define DONE NULL
#define THRESHOLD 0.01

enum {
    N_CLUSTERS = 3,
    MAX_ITERATIONS = 25,
    MAX_DISTANCE = 100,
};

typedef struct _kmeans_t {
    size_t n;
    size_t k;
    double *X;
    double *C;
    size_t *A;
    FILE *ifp;
    FILE *ofp;
} kmeans_t;

/* funcs */
void bye (void);

size_t belongs_to (double *c, size_t nc, double *v);

FILE *getifp (const char *fn);
FILE *getofp (const char *fn);

/* tasts */
void do_tasks (kmeans_t *km, ...);

void km_pipein (kmeans_t *km);
void km_init_clusters (kmeans_t *km);
void km_reassignment (kmeans_t *km);
void km_reclustering (const kmeans_t *km, double *c);
void km_loop (kmeans_t *km);
void km_pipeout (kmeans_t *km);
void km_freeall (kmeans_t *km);


/* main */
int main (void)
{
    atexit (bye); /* we must say goodbye before we exit */

    kmeans_t km = {
        .n = 0,
        .k = 3,
        .X = NULL,
        .A = NULL,
        .ifp = getifp ("data2.txt"),
        .ofp = getofp ("data-out.txt") 
    };

    /* task to do */
    do_tasks (&km,
              km_pipein,
              km_init_clusters,
              km_loop,
              km_pipeout,
              km_freeall,
              DONE
             );

    /* exit */
    exit (EXIT_SUCCESS);
}


/* funcs */
void bye (void)
{
    printf ("bye bye\n");
    getc (stdin);
    printf ("bye~\n");
}

size_t belongs_to (double *c, size_t nc, double *v)
{
    size_t i;
    size_t a; /* anchor */

    for (i = 1, a = 0; i < nc; ++i) {
        if ( fabs (*(c+i)-*v) < fabs (*(c+a)-*v) ) {
           a = i;
        }
    }

    return a;
}


FILE *getifp (const char *fn)
{
    FILE *ifp = fopen (fn, "r");
    if (NULL == ifp) {
        fprintf (stderr, "cannot open %s for reading", fn);
        exit (EXIT_FAILURE);
    }
    return ifp;
}

FILE *getofp (const char *fn)
{
    FILE *ofp = fopen (fn, "w");
    if (NULL == ofp) {
        fprintf (stderr, "cannot open %s for writing\n", fn);
        exit (EXIT_FAILURE);
    }
    return ofp;
}

/* tasts */
void do_tasks (kmeans_t *km, ...)
{
    void (*task)() = NULL;

    va_list tasks;
    va_start (tasks, km);
    while (DONE != (
                    task = va_arg ( tasks, void(*)() ) 
                   )
          ) {
        (*task)(km);
    }
    return;
}

void km_pipein (kmeans_t *km)
{
    double tmp;
    size_t n;
    
    while (fscanf (km->ifp, "%lf", &tmp) == 1) ++(km->n);
    rewind (km->ifp);

    km->X = (double *) malloc (km->n*sizeof(double));
    for (n = 0; n < km->n; ++n) {
        fscanf (km->ifp, "%lf", km->X+n);
    }
    return;
}

void km_init_clusters (kmeans_t *km)
{
    size_t k;

    km->C = (double *)  malloc (km->k*sizeof(double));
    km->A = (size_t *)  malloc (km->n*sizeof(size_t));

    for (k = 0; k < km->k; ++k) {
        *(km->C+k) = *(km->X+k);
    }
    return;
}

void km_reassignment (kmeans_t *km)
{
    size_t n;

    for (n = 0; n < km->n; ++n) {
        *(km->A+n) = belongs_to (km->C, km->k, km->X+n);
    }
}


void km_reclustering (const kmeans_t *km, double *c)
{
    size_t n, k;

    for (k = 0; k < km->k; ++k) {
        *(c+k) = 0.0;
    }

    for (n = 0; n < km->n; ++n) {
        *(c+*(km->A+n)) += *(km->X+n);
    }

    for (k = 0; k < km->k; ++k) {
        *(c+k) /= (double)km->k;
    }

    return;
}

void km_loop (kmeans_t *km)
{
    size_t n;
    size_t k;
    size_t i;

    double diff;

    double *c = (double *) malloc (km->k*sizeof(double));

    km_reassignment (km);

    /* loops */
    for (i = 0; i < MAX_ITERATIONS; ++i) {
        km_reclustering (km, c);
        diff = 0.0;
        for (k = 0; k < km->k; ++k) {
            diff += fabs (*(km->C+k) - *(c+k));
        }

        if (diff < THRESHOLD) {
            break;
        }

        km_reassignment (km);
    } // end for

    free (c);

    return;
}

void km_pipeout (kmeans_t *km)
{
    size_t n;
    size_t k;
    
    fprintf (km->ofp, "--overall:\n");
    for (n = 0; n < km->n; ++n) {
        fprintf (km->ofp, "%10sP[%2d]: %10.4lf, cluster: %4d\n", "",
                 n,
                 *(km->X+n),
                 *(km->A+n)
                );
    }

    for (k = 0; k < km->k; ++k) {
        fprintf (km->ofp, "%10sC[%2d]: center: %10.4lf\n", "",
                 k,
                 *(km->C+k)
                );
    }

    return;
}

void km_freeall (kmeans_t *km)
{
    fclose (km->ifp);
    fclose (km->ofp);

    free (km->X);
    free (km->A);
    free (km->C);

    return;
}

GCC 编译结果

--overall:
          P[ 0]:    41.0000, cluster:    0
          P[ 1]:   286.0000, cluster:    1
          P[ 2]:   357.0000, cluster:    2
          P[ 3]:    71.0000, cluster:    0
          P[ 4]:   298.0000, cluster:    1
          P[ 5]:   426.0000, cluster:    2
          P[ 6]:    39.0000, cluster:    0
          P[ 7]:   241.0000, cluster:    1
          P[ 8]:   350.0000, cluster:    2
          P[ 9]:    47.0000, cluster:    0
          P[10]:   283.0000, cluster:    1
          P[11]:   399.0000, cluster:    2
          P[12]:    22.0000, cluster:    0
          P[13]:   201.0000, cluster:    1
          P[14]:   449.0000, cluster:    2
          P[15]:    70.0000, cluster:    0
          P[16]:   239.0000, cluster:    1
          P[17]:   363.0000, cluster:    2
          P[18]:    87.0000, cluster:    0
          P[19]:   290.0000, cluster:    1
          P[20]:   449.0000, cluster:    2
          P[21]:    62.0000, cluster:    0
          P[22]:   209.0000, cluster:    1
          P[23]:   417.0000, cluster:    2
          P[24]:    39.0000, cluster:    0
          P[25]:   265.0000, cluster:    1
          P[26]:   415.0000, cluster:    2
          P[27]:    55.0000, cluster:    0
          P[28]:   229.0000, cluster:    1
          P[29]:   363.0000, cluster:    2
          P[30]:    12.0000, cluster:    0
          P[31]:   294.0000, cluster:    1
          P[32]:   442.0000, cluster:    2
          P[33]:     1.0000, cluster:    0
          P[34]:   296.0000, cluster:    1
          P[35]:   378.0000, cluster:    2
          P[36]:    39.0000, cluster:    0
          P[37]:   270.0000, cluster:    1
          P[38]:   380.0000, cluster:    2
          P[39]:     7.0000, cluster:    0
          P[40]:   228.0000, cluster:    1
          P[41]:   379.0000, cluster:    2
          P[42]:     5.0000, cluster:    0
          P[43]:   277.0000, cluster:    1
          P[44]:   385.0000, cluster:    2
          P[45]:    71.0000, cluster:    0
          P[46]:   215.0000, cluster:    1
          P[47]:   442.0000, cluster:    2
          P[48]:    75.0000, cluster:    0
          P[49]:   243.0000, cluster:    1
          P[50]:   424.0000, cluster:    2
          P[51]:    13.0000, cluster:    0
          P[52]:   280.0000, cluster:    1
          P[53]:   414.0000, cluster:    2
          P[54]:    40.0000, cluster:    0
          P[55]:   244.0000, cluster:    1
          P[56]:   380.0000, cluster:    2
          P[57]:    80.0000, cluster:    0
          P[58]:   234.0000, cluster:    1
          P[59]:   429.0000, cluster:    2
          P[60]:    55.0000, cluster:    0
          P[61]:   268.0000, cluster:    1
          P[62]:   435.0000, cluster:    2
          P[63]:     1.0000, cluster:    0
          P[64]:   275.0000, cluster:    1
          P[65]:   434.0000, cluster:    2
          P[66]:     2.0000, cluster:    0
          P[67]:   296.0000, cluster:    1
          P[68]:   433.0000, cluster:    2
          P[69]:    45.0000, cluster:    0
          P[70]:   204.0000, cluster:    1
          P[71]:   446.0000, cluster:    2
          P[72]:    51.0000, cluster:    0
          P[73]:   226.0000, cluster:    1
          P[74]:   389.0000, cluster:    2
          P[75]:    78.0000, cluster:    0
          P[76]:   279.0000, cluster:    1
          P[77]:   370.0000, cluster:    2
          P[78]:    79.0000, cluster:    0
          P[79]:   217.0000, cluster:    1
          P[80]:   407.0000, cluster:    2
          P[81]:    54.0000, cluster:    0
          P[82]:   273.0000, cluster:    1
          P[83]:   426.0000, cluster:    2
          P[84]:    48.0000, cluster:    0
          P[85]:   247.0000, cluster:    1
          P[86]:   400.0000, cluster:    2
          P[87]:    41.0000, cluster:    0
          P[88]:   252.0000, cluster:    1
          P[89]:   423.0000, cluster:    2
          P[90]:    50.0000, cluster:    0
          P[91]:   276.0000, cluster:    1
          P[92]:   362.0000, cluster:    2
          P[93]:    22.0000, cluster:    0
          P[94]:   283.0000, cluster:    1
          P[95]:   444.0000, cluster:    2
          C[ 0]: center:    41.0000
          C[ 1]: center:   286.0000
          C[ 2]: center:   357.0000

VS2010 编译

没有结果,已花样作死,死绝。

我不应该乱玩函数指针。。。


log: $1: 考研复试阶段;

相关文章

  • 2018.12.8

    本周总结: 学习情况: 1、学习K-means算法,并通过在Python上运行k-means算法,绘制对应的分类图...

  • 吴恩达机器学习Coursera-week8

    Clustering K-Means算法 从本周开始,我们开始学习无监督学习算法,而clustering分类算法是...

  • Kmeans

    K-means与kNN虽然都是以k打头,但却是两类算法——kNN为监督学习中的分类算法,而k-means则是非监督...

  • 2020-05-14

    svm分类、随机森林、决策树、线性回归、K-means算法、朴素贝叶斯

  • 大数据--聚类算法

    本篇结构 简介 聚类算法的分类 K-Means聚类算法 DBSCAN聚类算法 本篇介绍了聚类算法的种类,重点关注K...

  • Kmeans聚类

    1 聚类与分类的区别2 k-means 聚类基本概念3 算法优缺点4 算法思路5 代码实现 1 聚类与分类的区别 ...

  • k-means算法总结

    目录 一、k-means算法原理 二、k-means算法目标函数是什么 三、总结 一、k-means算法原理 k-...

  • Apriori、ID3、Naive_Bayes等(数据挖掘)

    Apriori算法 线性回归 UCI分类KNN 决策树 Naive_Bayes K-Means图像分割 Aprio...

  • K-means 分类算法

    data code GCC 编译结果 VS2010 编译 没有结果,已花样作死,死绝。 我不应该乱玩函数指针。。。

  • 大数据-算法

    数据分类 KNN 分类算法特征值贝叶斯分类 数据聚类 K-means 是一种在给定分组个数后,能够对数据进行自动归...

网友评论

      本文标题:K-means 分类算法

      本文链接:https://www.haomeiwen.com/subject/tpunxttx.html