1.PCA
- 加载R包
BiocManager::install("PCAtools")
library(PCAtools)
library(tidyverse)
```
#读取数据
```
gene_expression<-read.table(file="rnaseq-apple/gene_exp.txt",header = T)
head(gene_expression)
BLO_S1_LD1 BLO_S1_LD2 BLO_S1_LD3 BLO_S2_LD1 BLO_S2_LD2 BLO_S2_LD3 BLO_S3_LD1 BLO_S3_LD2 BLO_S3_LD3 BLO_S4_LD1 BLO_S4_LD2 BLO_S4_LD3 KID_S1_LD1 KID_S1_LD2 KID_S1_LD3
HF36249 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
HF06454 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 5.231
HF31517 54.105 54.628 72.646 34.335 66.884 70.510 28.292 22.236 61.755 24.227 22.919 16.880 63.790 67.738 88.882
HF07207 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
HF16138 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
HF24764 37.845 31.335 32.200 51.658 33.046 40.531 26.209 36.140 28.605 25.200 35.759 14.814 37.978 44.917 42.374
KID_S2_LD1 KID_S2_LD2 KID_S2_LD3 KID_S3_LD1 KID_S3_LD2 KID_S3_LD3 KID_S4_LD1 KID_S4_LD2 KID_S4_LD3
HF36249 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
HF06454 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
HF31517 75.407 55.568 73.206 7.496 29.949 31.430 24.733 11.832 15.213
HF07207 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
HF16138 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
HF24764 37.583 41.682 23.796 27.777 32.126 35.417 20.903 29.229 19.286
samole_infor<-read.table(file="rnaseq-apple/sample_info.txt",header = T,row.names = 1)
head(samole_infor)
strain stage Antho Cy.3.Gal Fruit_weight
BLO_S1_LD1 BLO S1 3.0 3.0 40
BLO_S1_LD2 BLO S1 3.0 3.0 40
BLO_S1_LD3 BLO S1 3.0 3.0 40
BLO_S2_LD1 BLO S2 12.2 12.2 90
BLO_S2_LD2 BLO S2 12.2 12.2 90
BLO_S2_LD3 BLO S2 12.2 12.2 90
tail(samole_infor)
strain stage Antho Cy.3.Gal Fruit_weight
KID_S3_LD1 KID S3 137.2 115.9 144
KID_S3_LD2 KID S3 137.2 115.9 144
KID_S3_LD3 KID S3 137.2 115.9 144
KID_S4_LD1 KID S4 406.7 346.3 165
KID_S4_LD2 KID S4 406.7 346.3 165
KID_S4_LD3 KID S4 406.7 346.3 165
计算PCA
pac<-pca(gene_expression,metadata=samole_infor)
rotated<-pac$rotated
> head(rotated)
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
BLO_S1_LD1 -49469.14 15057.16 -454.5192 8411.3724 7552.6960 -1220.861 -443.726 -466.1595 310.5622 1217.96952 -1947.7771 824.33618 -531.3273 522.7237 -96.24148
BLO_S1_LD2 -45633.87 15543.93 13245.5139 -10940.2559 -3788.9435 7916.307 -3299.722 -165.4762 1312.4755 -258.09290 105.2893 -99.42380 228.7940 207.9851 446.01438
BLO_S1_LD3 -46717.66 20209.53 8234.3625 3985.0197 11576.3309 1204.673 -1103.596 525.1471 -226.3431 -891.70579 1045.9973 -631.17628 257.8611 -181.2148 -167.83820
BLO_S2_LD1 -13144.99 -30803.63 16205.3486 11338.7275 -5255.6342 2002.183 -2341.521 1178.7140 -1076.0024 -974.26857 -360.8252 -15.80409 -158.5351 -263.9794 -663.84528
BLO_S2_LD2 -12322.07 -14888.44 5275.2716 -818.2086 614.6884 1486.736 3250.971 -677.2021 534.4907 43.48976 1893.9821 1371.67688 -792.4302 450.5443 84.75676
BLO_S2_LD3 -15290.96 -14108.23 -4526.2833 -7139.3094 4992.1368 2273.598 3079.258 494.1480 -496.2651 -1215.31600 -380.2841 1003.34225 -559.9832 -282.6319 -754.21886
PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
BLO_S1_LD1 439.564694 -633.14314 310.2185 676.8497 -150.60109 -319.31067 110.00396 253.8511 6.883389e-11
BLO_S1_LD2 164.993336 -52.55215 345.6621 491.3560 -16.93068 -348.21336 -100.91814 -348.5413 6.883389e-11
BLO_S1_LD3 -619.697283 537.75110 -395.9370 -893.2291 106.45542 151.39062 -27.51704 181.5122 6.883389e-11
BLO_S2_LD1 3.851858 -39.14639 412.5201 -47.6036 37.82040 17.66335 -135.07773 -130.2348 6.883389e-11
BLO_S2_LD2 335.251504 510.19947 -496.5087 660.7700 370.71522 174.25787 629.00796 646.1405 6.883389e-11
BLO_S2_LD3 485.540092 -725.04285 -723.8823 -465.4147 -176.28882 -361.28678 -264.05864 -754.1549 6.883389e-11
ggplot2绘图
整理数据
1.去掉行名
rotated_tidy<-rownames_to_column(rotated,var="sample_name")
head(rotated_tidy)
sample_name PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15
1 BLO_S1_LD1 -49469.14 15057.16 -454.5192 8411.3724 7552.6960 -1220.861 -443.726 -466.1595 310.5622 1217.96952 -1947.7771 824.33618 -531.3273 522.7237 -96.24148
2 BLO_S1_LD2 -45633.87 15543.93 13245.5139 -10940.2559 -3788.9435 7916.307 -3299.722 -165.4762 1312.4755 -258.09290 105.2893 -99.42380 228.7940 207.9851 446.01438
3 BLO_S1_LD3 -46717.66 20209.53 8234.3625 3985.0197 11576.3309 1204.673 -1103.596 525.1471 -226.3431 -891.70579 1045.9973 -631.17628 257.8611 -181.2148 -167.83820
4 BLO_S2_LD1 -13144.99 -30803.63 16205.3486 11338.7275 -5255.6342 2002.183 -2341.521 1178.7140 -1076.0024 -974.26857 -360.8252 -15.80409 -158.5351 -263.9794 -663.84528
5 BLO_S2_LD2 -12322.07 -14888.44 5275.2716 -818.2086 614.6884 1486.736 3250.971 -677.2021 534.4907 43.48976 1893.9821 1371.67688 -792.4302 450.5443 84.75676
6 BLO_S2_LD3 -15290.96 -14108.23 -4526.2833 -7139.3094 4992.1368 2273.598 3079.258 494.1480 -496.2651 -1215.31600 -380.2841 1003.34225 -559.9832 -282.6319 -754.21886
PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
1 439.564694 -633.14314 310.2185 676.8497 -150.60109 -319.31067 110.00396 253.8511 6.883389e-11
2 164.993336 -52.55215 345.6621 491.3560 -16.93068 -348.21336 -100.91814 -348.5413 6.883389e-11
3 -619.697283 537.75110 -395.9370 -893.2291 106.45542 151.39062 -27.51704 181.5122 6.883389e-11
4 3.851858 -39.14639 412.5201 -47.6036 37.82040 17.66335 -135.07773 -130.2348 6.883389e-11
5 335.251504 510.19947 -496.5087 660.7700 370.71522 174.25787 629.00796 646.1405 6.883389e-11
6 485.540092 -725.04285 -723.8823 -465.4147 -176.28882 -361.28678 -264.05864 -754.1549 6.883389e-11
samole_infor_tidy<-rownames_to_column(samole_infor,var="sample_name")
head(samole_infor_tidy)
sample_name strain stage Antho Cy.3.Gal Fruit_weight
1 BLO_S1_LD1 BLO S1 3.0 3.0 40
2 BLO_S1_LD2 BLO S1 3.0 3.0 40
3 BLO_S1_LD3 BLO S1 3.0 3.0 40
4 BLO_S2_LD1 BLO S2 12.2 12.2 90
5 BLO_S2_LD2 BLO S2 12.2 12.2 90
解释度 比如PC1解释度为8.224552e+01,PC2为1.023421e+01等
pac$variance
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13
8.224552e+01 1.023421e+01 3.175249e+00 1.454057e+00 1.141373e+00 8.234302e-01 3.700784e-01 1.207162e-01 9.389095e-02 7.164297e-02 4.812836e-02 4.039241e-02 2.888659e-02
PC14 PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22 PC23 PC24
2.386747e-02 2.170546e-02 1.904344e-02 1.556694e-02 1.426426e-02 1.313013e-02 1.249111e-02 1.190933e-02 1.082498e-02 9.622137e-03 2.797783e-28
解释度绘图
screeplot(pac)
image.png
合数据加注释,好进一步利用ggplot画图
pca_results<-left_join(rotated_tidy,samole_infor_tidy,by ="sample_name") #俩名字一样一个即可
自定义形状绘图
ggplot(data = pca_results,aes(x=pca_results$PC1,pca_results$PC2))+
geom_point(size=8,aes(shape=strain,fill=stage))+
scale_shape_manual(values = 21:22)+theme_bw()+
stat_ellipse(aes(color=stage))+ #加圆圈
labs(x="PC1(82% variance explained)", y="PC2(10% variance explained)")
image.png
scale_shape_manual(values = 21:22)#选取点的形状
,具体每个数字对应的点的形状见下图:
2火山图
读取书
de_result <- read_excel("de_result.xlsx",
col_types = c("numeric", "text", "text",
"text", "text", "numeric", "numeric",
"numeric", "numeric", "text", "text",
"text", "text", "text", "text", "text",
"text", "text"))
展示列名,方便后面画图用
colnames(de_result)
[1] "Rank" "GENE_NAME" "EntrezId" "YeastOrtholog" "Description" "log2FoldChange" "lfcSE"
[8] "pvalue" "padj" "ORF19_ID" "A21_region" "ASSEMBLY22_ID" "A22_A_region" "A22_B_ID"
[15] "A22_B_region" "CGDID" "CGD_Primary_ID" "KEGG_ID"
加注释(上下调基因)
res<-select(de_result,Rank,GENE_NAME,log2FoldChange,pvalue) %>% mutate(direction=if_else(pvalue > 0.05| abs(log2FoldChange)<1,"NS", if_else(log2FoldChange>=1,"UP","DOWN")))
选取子集,用于后续加标签的基因
selected_genes<-c('FMP27','ERG251','NOT5','ERG3','MVB12','ERG28','ERG25','ERG27')
reselected<-filter(res,GENE_NAME%in%selected_genes)
绘图
library(ggrepel)
ggplot(data=res,aes(x=log2FoldChange,y=-log10(pvalue)))+
geom_point(size=3,aes(color=direction),show.legend = F)+
geom_point(data = reselected,size=3,shape=21,stroke=3,color=“yellow”)+对加标签的点加用21号圈突出出来,stroke=1是指的黑圈圈的线条粗细
geom_text_repel(data=reselected,aes(label=GENE_NAME))+ #加标签
geom_hline(yintercept = -log10(0.05),linetype="dotdash",color='gray')+
geom_vline(xintercept = c(-1,1),linetype="dotdash",color='gray')+
scale_color_manual(values = c("blue","gray","red"))+
theme_bw()
image.png
网友评论