本文摘自:R 语言主成分分析(PCA)实战教程
方便个人学习和查阅
安装依赖:
install.packages("FactoMineR")
install.packages("factoextra")
library("FactoMineR")
library("factoextra")
数据准备:
# 来自factoextra包的decathlon2演示数据集,数据集如下:
data(decathlon2)
head(decathlon2)
# pca前,先进行标准化:标准偏差1,平均值为零
# FactoMineR 中,PCA之前会自动标准化数据
decathlon2.active <- decathlon2[1:23, 1:10]
decathlon2.active[, 1:6]
data:image/s3,"s3://crabby-images/48882/48882ea97728a8da8d7a25f039b8c400c2b495a0" alt=""
res.pca <- PCA(decathlon2.active, graph = FALSE)
PCA(decathlon2.active) # 显示图
data:image/s3,"s3://crabby-images/b29dd/b29ddde93b1cbce1e00cfd24cb97648b32612789" alt=""
一、变量分析
var <- get_pca_var(res.pca)
data:image/s3,"s3://crabby-images/c15ed/c15ed10b3e63c6337450a27cd86f2bfec7ce3405" alt=""
1. 相关曲线作图
var$coord
data:image/s3,"s3://crabby-images/02b22/02b226f1e93e4ba03fea059c9e9326eb30a54207" alt=""
fviz_pca_var(res.pca, col.var = "black")
data:image/s3,"s3://crabby-images/7a6d1/7a6d1e13d10cc8858bdc4f149d5af3de566148cb" alt=""
2. 代表质量作图
var$cos2
data:image/s3,"s3://crabby-images/d4f78/d4f7811ca2d40e444b834a496234508b6eb80255" alt=""
corrplot展示各变量对各主成分的代表质量
library("corrplot")
# is.corr表示输入的矩阵不是相关系数矩阵
corrplot(var$cos2, is.corr=FALSE)
data:image/s3,"s3://crabby-images/d3f17/d3f17a9414d3a435728931f13be53b55b2bbb1e7" alt=""
各变量对一二主成分的代表质量柱形图(通过值的叠加显示)
fviz_cos2(res.pca, choice = "var", axes = 1:2)
data:image/s3,"s3://crabby-images/b998c/b998c7767406928f9ef73697e86c2e3ce138d49c" alt=""
各变量相关图,颜色代表代表质量
fviz_pca_var(res.pca, col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
data:image/s3,"s3://crabby-images/41c90/41c9050e7524c05f75405633ebfe36d0085ec369" alt=""
3. 变量对主成分的贡献作图
var$contrib
data:image/s3,"s3://crabby-images/64c9e/64c9e17bdffb903d4bd429f36ea27cf4dd1cc4a9" alt=""
corrplot展示每个变量对每个主成分的贡献
library("corrplot")
corrplot(var$contrib, is.corr=FALSE)
data:image/s3,"s3://crabby-images/2613a/2613ae5c2ddece50e98182dd511e1306a90fa186" alt=""
各变量对第一主成分的贡献
fviz_contrib(res.pca, choice = "var", axes = 1, top = 10)
data:image/s3,"s3://crabby-images/e497a/e497a3da21f6f71d1408b434092d02d339ef098a" alt=""
各变量对第二主成分的贡献
fviz_contrib(res.pca, choice = "var", axes = 2, top = 10)
data:image/s3,"s3://crabby-images/5bce4/5bce47a9dd97ec1fd46c56868d745687562bef92" alt=""
各变量对第一二主成分的总贡献
fviz_contrib(res.pca, choice = "var", axes = 1:2, top = 10)
data:image/s3,"s3://crabby-images/bbb54/bbb543b6884689fde379b5703f434792e1a485c2" alt=""
各变量相关图,颜色展示贡献度
fviz_pca_var(res.pca, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")
)
data:image/s3,"s3://crabby-images/d4103/d4103b550e3f86d4430192841974e0c54773e1e0" alt=""
二、观测值分析
factoextra包中的get_pca_ind()提取个体坐标,相关性,cos2 和贡献率
ind <- get_pca_ind(res.pca)
ind
data:image/s3,"s3://crabby-images/30750/30750eb6cc1e7d20e75cb9b3f10be7148f7f4e86" alt=""
1. 观测值坐标图
fviz_pca_ind(res.pca)
data:image/s3,"s3://crabby-images/8d7ec/8d7ec7079aa2cd3b35a070c2d7df21ce80575d81" alt=""
2. 观测值坐标图,cos2着色
ind$cos2
data:image/s3,"s3://crabby-images/e92fc/e92fc57ec5c3a7531231f1dc69fe513180c41603" alt=""
fviz_pca_ind(res.pca, col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
data:image/s3,"s3://crabby-images/20d07/20d07c3f0e8c25a337d9791cab91d299f855de1c" alt=""
3. 观测值坐标图,cos2着色,cos2大小
fviz_pca_ind(res.pca, col.ind = "cos2", pointsize = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
data:image/s3,"s3://crabby-images/0887d/0887d59db2de42e68b9c634821c5a17489f1bdc3" alt=""
4. 观测值柱形图,cos2代表质量
fviz_cos2(res.pca, choice = "ind")
data:image/s3,"s3://crabby-images/11ea4/11ea4585d6ba45333e40872c3896d6c755caff09" alt=""
5. 观测值柱形图,contrib贡献
fviz_contrib(res.pca, choice = "ind", axes = 1:2)
data:image/s3,"s3://crabby-images/c1cfd/c1cfdfc3828978c1682953121ed4a6620018eac2" alt=""
三、自定义观测值作图
1. 数据准备
head(iris)
data:image/s3,"s3://crabby-images/aea2d/aea2dfc15be80ae684029d2b80b4571815f1e5ef" alt=""
iris.pca <- PCA(iris[,-5], graph = FALSE)
PCA(iris[,-5])
data:image/s3,"s3://crabby-images/135e9/135e953aab5b30ce77809c615772369e08a95002" alt=""
2. PCA展示,添加椭圆,自定义颜色
fviz_pca_ind(iris.pca,
# 只显示点而不显示文本,默认都显示
geom.ind = "point",
# 设定分类种类
col.ind = iris$Species,
# 设定颜色
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
# 添加椭圆 Concentration ellipses
addEllipses = TRUE,
legend.title = "Groups",
)
data:image/s3,"s3://crabby-images/a9264/a92642b15670e4890f4eb66880fdb06ae92e5b23" alt=""
3. PCA展示,添加椭圆,分组颜色
fviz_pca_ind(iris.pca,
label = "none", # hide individual labels
habillage = iris$Species, # color by groups
addEllipses = TRUE, # Concentration ellipses
palette = "jco" # jco(临床肿瘤学杂志)调色板
)
data:image/s3,"s3://crabby-images/fdc06/fdc06b9ac1ca6c603e96de54497200dbb85ae376" alt=""
4. PCA展示,添加多边形,分组颜色
fviz_pca_ind(iris.pca, geom.ind = "point",
col.ind = iris$Species, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
# 用凸包多边形代替椭圆
addEllipses = TRUE, ellipse.type = "convex",
legend.title = "Groups"
)
data:image/s3,"s3://crabby-images/3dbb6/3dbb64202fd4cecc686bc4c393f8d662f752a36d" alt=""
四、观测量和变量的biplot(双标图)
biplot 展示了两方面内容:根据前两个主成分,每个观测的得分;根据前两个主成分,每个变量的载荷。
1. PCA biplot
fviz_pca_biplot(res.pca, repel = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)
data:image/s3,"s3://crabby-images/a87b2/a87b2ab02a1e5931d5fd362cd6de0374ae469ccd" alt=""
2. PCA biplot,添加椭圆
fviz_pca_biplot(iris.pca, repel = TRUE,
# 观测量颜色
col.ind = iris$Species, palette = "jco",
# 添加椭圆
addEllipses = TRUE, label = "var",
# 线条颜色
col.var = "black",
legend.title = "Species")
data:image/s3,"s3://crabby-images/c2ef9/c2ef9d908f3caef5b5a797e41a3551272702b642" alt=""
3. PCA biplot,添加椭圆,点大小
fviz_pca_biplot(iris.pca,
# Fill individuals by groups
geom.ind = "point",
# 点的形状
pointshape = 21,
# 点的大小
pointsize = 2.5,
# 按照组类特定形状
fill.ind = iris$Species,
col.ind = "black",
# Color variable by groups
# 颜色
col.var = factor(c("sepal", "sepal", "petal", "petal")),
# 标题
legend.title = list(fill = "Species", color = "Clusters"),
repel = TRUE # Avoid label overplotting
)+
ggpubr::fill_palette("jco")+ # Indiviual fill color
ggpubr::color_palette("npg") # Variable colors
data:image/s3,"s3://crabby-images/45126/451264688f9081250015d8d7c40603f6735b5aae" alt=""
网友评论