ggplot2绘制PCA图（1）

作者: Zhigang_Han | 来源:发表于2019-05-28 19:51 被阅读15次

Statquest学习笔记|在R中进行PCA（主成分分析）
04高通量测序-R实现主成分分析
ggplot2绘制PCA图（1）
ggplot2绘制分裂小提琴图
玫瑰图绘制
RNA-seq可视化之差异分析得到的FC值，我们能做什么？
R 数据可视化 —— 聚类热图 pheatmap
主成分分析(PCA)基本原理及分析实例
PCA 画图
ggplot2优雅的绘制高端热图

12个组，每组3个重复，一共36个样品的主成分分析图

1、导入包和数据处理

library(tidyr)
library(dplyr)
library(ggplot2)
data_o <- read.table("./PCA.txt",
                   header = T)
> data_o
               S1           S2           S3           S4           S5           S6           S7
M1   7.395760e+04 7.078706e+04 8.417177e+04  137621.1106  147372.6292  164831.9100 7.309625e+04
M2   3.943851e+03 5.747331e+03 5.227088e+03    6523.3657    6013.1556   12186.0886 5.114227e+03
M3   6.510884e+03 5.606016e+03 6.351800e+03   12000.8553   13808.4395   14835.7556 6.354940e+03
M4   1.384136e+05 1.317561e+05 1.553619e+05  266360.3396  289288.2152  339461.9371 1.301302e+05
M5   3.801214e+03 4.022403e+03 5.158690e+03    6174.7307    6762.0510    8130.7640 4.632161e+03
M6   1.201156e+03 1.123829e+03 1.782138e+03    2042.1169    1610.9445    2399.4726 9.799112e+02
M7   5.773008e+04 3.799364e+04 6.130102e+04   30516.9603   63046.2774   24196.7880 4.144398e+04
#翻转数据
data <- t(data_o)
> data
           M1        M2        M3        M4       M5        M6        M7         M8        M9      M10
S1   73957.60  3943.851  6510.884 138413.57 3801.214 1201.1558  57730.08   416.6729 1316.5815 118533.6
S2   70787.06  5747.331  5606.016 131756.14 4022.403 1123.8288  37993.64  1213.8793 1540.8043 134324.5
S3   84171.77  5227.088  6351.800 155361.89 5158.690 1782.1378  61301.02   534.9507 1244.3340 124778.7
S4  137621.11  6523.366 12000.855 266360.34 6174.731 2042.1169  30516.96  2634.5004 2461.8294 122279.8
S5  147372.63  6013.156 13808.440 289288.22 6762.051 1610.9445  63046.28   401.3044 2603.8165 119305.2
S6  164831.91 12186.089 14835.756 339461.94 8130.764 2399.4726  24196.79   473.9180 3278.3469 100409.4
#读取数据分组情况
group <- read.table("./group.txt",
                    header = T)
> group
   Group samplenames
1     G1          S1
2     G1          S2
3     G1          S3
4     G2          S4
5     G2          S5
6     G2          S6
7     G3          S7
8     G3          S8
9     G3          S9
10    G4         S10
11    G4         S11
12    G4         S12
###上述数据格式只列出一部分

2、主成分分析

#主成分计算
pca_data <- prcomp(data, scale = TRUE)
#查看合适主成分个数
screeplot(pca_data, type = "lines")
summary(pca_data)
#查看行名，确认是否为36个样品的名称
rownames(pca_data$x)
> rownames(pca_data$x)
 [1] "S1"  "S2"  "S3"  "S4"  "S5"  "S6"  "S7"  "S8"  "S9"  "S10" "S11" "S12" "S13" "S14" "S15" "S16" "S17"
[18] "S18" "S19" "S20" "S21" "S22" "S23" "S24" "S25" "S26" "S27" "S28" "S29" "S30" "S31" "S32" "S33" "S34"
[35] "S35" "S36"
#提取PC1的百分比
x_per <- round(summary(pca_data)$importance[2, 1]*100, 1)
#提取PC2的百分比
y_per <- round(summary(pca_data)$importance[2, 2]*100, 1)
#按照样品名称添加组并且合并
df_sample <- data.frame(samplenames=rownames(pca_data$x), pca_data$x) %>%
  left_join(group, by = "samplenames")
#数据尾部添加变量Group，只截取部分数据展示
> df_sample
          PC31         PC32        PC33        PC34         PC35          PC36 Group
1   0.306133837  0.161442452  0.03766617  0.30708158 -0.186722792 -1.422473e-15    G1
2   0.237143749 -0.599197140 -0.52253461 -0.36597316  0.205268460 -1.526557e-15    G1
3  -0.073255941 -0.069312851  0.02810155 -0.15750184  0.194224495 -1.741662e-15    G1
4   0.593003778 -0.003056265 -0.31652774 -0.35467364 -0.153817055 -1.013079e-15    G2
5  -0.368112005  0.085168850  0.47080323  0.50937052  0.114099679 -1.249001e-15    G2
6  -0.160988819  0.140851886 -0.28354428 -0.05074411 -0.068941571 -1.082467e-15    G2
7   0.323091269  0.126995166  0.44969245 -0.65800755 -0.352720233 -6.522560e-16    G3
8  -0.801287671 -0.206218914 -0.16401072  0.19778263  0.516041135 -1.085070e-15    G3
9  -0.082727209  0.303859473 -0.04526318  0.51639150 -0.266017284 -1.762479e-15    G3
10  0.023928552  0.526276376 -0.37558251 -0.52455436 -0.378126147 -7.771561e-16    G4
11  0.324106409 -0.183442282  0.36741010 -0.20372081  0.423367465 -1.332268e-15    G4
12 -0.339002487 -0.271889208  0.36575640  0.66261644  0.001102365 -1.235123e-15    G4

3、ggplot绘图

#设置适合科研的背景色
theme_set(theme_bw())
#绘图
plot_1 <- ggplot(df_sample, aes(x = PC1, y = PC2)) +
  geom_point(aes(color=Group),size=5) +
  xlab(paste("PC1","(", x_per,"%)",sep=" ")) +
  ylab(paste("PC2","(", y_per,"%)",sep=" ")) +
#t图例按照G1，G2，G3...排列
  scale_color_discrete(limits = c(paste("G", seq(1:12),sep = "")))

image.png