示例
rm(list = ls())
if(!require(FactoMineR))install.packages('FactoMineR')
if(!require(factoextra))install.packages('factoextra')
library('FactoMineR')
library("factoextra")
选iris来做pca分析示例,是因为他的分组间有差异,有研究意义
去掉最后一列(分组信息)
dat <- iris[,-ncol(iris)]
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
pdata=data.frame(Species=iris$Species)
rownames(pdata)=rownames(dat)
看下热图
pheatmap::pheatmap(dat)
pca <- PCA(dat, graph = FALSE)
eig.val <- get_eigenvalue(pca);eig.val #特征值/方差/累计
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.91849782 72.9624454 72.96245
## Dim.2 0.91403047 22.8507618 95.81321
## Dim.3 0.14675688 3.6689219 99.48213
## Dim.4 0.02071484 0.5178709 100.00000
图1:碎石图:选择要分析的主成分(一般是前两个)
fviz_eig(pca, addlabels = TRUE, ylim = c(0, 100))
图2:样本聚类
fviz_pca_ind(pca, label="none", habillage=iris$Species,
addEllipses=TRUE, ellipse.level=0.95, palette = "Dark2")
# Read more: http://www.sthda.com/english/wiki/ggplot2-colors
图3:变量聚类
fviz_pca_var(pca, col.var = "contrib",
gradient.cols = c("white", "blue", "red"),
ggtheme = theme_minimal())
# 这个颜色是根据变量的贡献值赋值的
图4:结合变量和观测值,变量太多时不适用
fviz_pca_biplot(pca, label = "var", habillage=iris$Species,
addEllipses=TRUE, ellipse.level=0.95,
ggtheme = theme_minimal())
图5:看各原始变量主成分的贡献率
fviz_contrib(pca, choice = "var", axes = 1) #只看对PC1
fviz_contrib(pca, choice = "var", axes = 2) #只看对PC2
fviz_contrib(pca, choice = "var", axes = 1:2) #综合看PC1+PC2
#变量数多时加参数:top = n,表示前n
网友评论