美文网首页
Self-made KNN algorithms

Self-made KNN algorithms

作者: 陈有朴 | 来源:发表于2022-09-20 14:38 被阅读0次

    最近生统课上的作业,要求不调包自己写一个KNN算法,用于预测鸢尾花数据集,

    test_samples <- data.frame(Sepal.Length = c(6.1, 5.9, 6.7, 5.6, 7.0, 6.5),
                               Sepal.Width = c(2.5, 5.0, 4.0, 3.1, 3.6, 3.2),
                               Petal.Length = c(1.7, 2.0, 6.5, 1.5, 6.3, 4.8),
                               Petal.Width = c(0.3, 1.2, 2.2, 0.1, 2.5, 1.5),
                               row.names = paste('sample', 1:6, sep = ''))
    test_samples
    
    
    cal.dist <- function(vector1, vector2){
      v.diff <- vector1 - vector2
      return(sum(sqrt(v.diff**2)))
    }
    
    
    k.nearest.neighbors <- function(train, test, k=3){
      # 
      n.col <- unlist(lapply(train, is.numeric))
      train.numeric <- train[, n.col]  # only extract the numeric dataframe
      train.factor <- train[, (n.col==FALSE)]
      
      # Calculate the distance
      # dist.list <- sapply(unique(as.character(train.factor)), function(x) NULL)  # only in character form, the list could be build with name but with emtpy entry
      dist.df <- apply(train.numeric, MARGIN = 1, cal.dist, test)
      check.df <- data.frame(distance=dist.df, label=train.factor)
      check.df <- check.df[order(check.df$distance, decreasing = FALSE), ]
    
      # Vote
      k.df <- check.df[c(1:k), ]
      count.table <- data.frame(table(k.df[, 2]))
      vote <- count.table[which(count.table[, 2] == max(count.table[, 2])), ][1, 1]
      return(as.character(vote))
    }
    
    # main function
    for (i in 1:nrow(test_samples)){
      line = test_samples[i, ]
      print(k.nearest.neighbors(iris, line, k=3))   # print the output vote results
    }
    

    预测结果如下,

    setosa
    setosa
    virginica
    setosa
    virginica
    versicolor
    

    相关文章

      网友评论

          本文标题:Self-made KNN algorithms

          本文链接:https://www.haomeiwen.com/subject/nhcvortx.html