集成评估分类

作者: Peter_iris | 来源:发表于2022-06-11 21:48 被阅读0次

集成评估分类
分类评估
数据挖掘大纲
分类模型的评估（三）
python week 15
《机器学习及实践——从零开始通往KAGGLE竞赛之路》读书笔记八
Various classifier comparisons o
《绩效管理》--绩效指标与标准
复习 - 模型测试
基于sklearn的集成分类器

一个模型对于对象的评估可能存在偏差，我们往往可以通过综合多个模型的预测结果从而最终评估对象。多个模型集成成为的模型叫做集成评估器，组成集成评估器的每个模型都叫做基评估器，通常来说，有三类集成算法：袋装法（bagging），提升法（boosting）和stacking。

bagging ：随机森林模式，模型独立，相互不影响，决策树越多，分类效果越好。
boosting：Adaboost，模型相互影响，先形成一个弱分类器，然后一步步将分类器得到提升。

R random Forest

library(randomForest)
library(pROC)
library(caret)
library(rpart)

load("CUP2_methyl.rda")
load("CUP2_anno.rda")

CUP2_methyl_1 = as.data.frame(t(CUP2_methyl[-c(1:3)]))
CUP2_methyl_1$Sample = gsub("methy_","",row.names(CUP2_methyl_1))
CUP2_methyl_anno=merge(CUP2_methyl_1,CUP2_anno,by="Sample")
row.names(CUP2_methyl_anno)=CUP2_methyl_anno$Sample;CUP2_methyl_anno$Sample=NULL
head(CUP2_methyl_anno[6140:6148]);head(CUP2_anno)

####remove the Live Cancer
CUP2_methyl_anno = CUP2_methyl_anno[which(CUP2_methyl_anno$Group != "LIHC"),]
CUP2_methyl_anno$Group = factor(CUP2_methyl_anno$Group,levels=c('CRC','LC','Normal','STAD'))
dim(CUP2_methyl_anno)

### creat 3/4 dataset as training dataset
set.seed(101)
sets =  createDataPartition(CUP2_methyl_anno$Group,p=0.75,list=F)
training = CUP2_methyl_anno[sets,]
training = training[-c(6145:6147)]
dim(training)

testing = CUP2_methyl_anno[-sets,]
testing = testing[-c(6145:6147)]
dim(testing)

cup2_train <- randomForest(as.factor(training$Group)~ .,data = training,
                           importance = TRUE,na.action =na.pass)
cup2_train
plot(cup2_train,main="random Forest origin")

cup2_test <-predict(cup2_train,newdata = testing,type = "class" )
head(cup2_test);head(testing$Group)
cup2.cf <- caret::confusionMatrix(as.factor(cup2_test),as.factor(testing$Group))
cup2.cf
cup2_test2<-predict(cup2_train,newdata=testing,type ="prob")
cup2_test2
roc.rf <-multiclass.roc(testing$Group,cup2_test2)

fit1<-rpart(Group~ ., data = training)

pre1<-predict(fit1,newdata=testing,type="prob")
roc <-multiclass.roc(testing$Group,pre1[,1])
plot(roc$rocs[[1]],col='red')
plot(roc$rocs[[2]],add=T,col="blue")
plot(roc$rocs[[3]],add=T,col="green")
plot(roc$rocs[[4]],add=T,col="orange")

download.png