输入数据

表型组，第二列为相关性及遗传参数估计得到的代表性表型

包含ID、连续值、分组的数据框.png

读数据&&预处理

setwd("\\path\\校正表型PCA")
# Load package
library(vegan)
library(ggplot2)
library(ggthemes)
# Load data
pheno_df <- read.table('adjust_phenotype.txt',row.names = 1,header = T)
group <- read.table('adjust_group_total_fat.txt',header = T)

#对原数据进行z-score归一化；
dt<-as.matrix(scale(data[,1:4]))
head(dt)

计算相关系数（协方差）矩阵

#计算相关系数矩阵；
rm1<-cor(dt)
rm1

求解特征值和相应的特征向量

rs1<-eigen(rm1)
rs1
#提取结果中的特征值，即各主成分的方差；
val <- rs1$values
#换算成标准差(Standard deviation);
(Standard_deviation <- sqrt(val))
#计算方差贡献率和累积贡献率；
(Proportion_of_Variance <- val/sum(val))
(Cumulative_Proportion <- cumsum(Proportion_of_Variance))

计算主成分得分

#提取结果中的特征向量(也称为Loadings,载荷矩阵)；
(U<-as.matrix(rs1$vectors))
#进行矩阵乘法，获得PC score；
PC <-dt %*% U
colnames(PC) <- c("PC1","PC2","PC3","PC4",
                  "PC5","PC6","PC7",
                  "PC8","PC9","PC10",
                  "PC11")
head(PC)
plot_data<-data.frame({PC})[1:2]
# 提取列名，便于后面操作。
plot_data$ID <- rownames(plot_data)
names(plot_data)[1:2] <- c('PC1', 'PC2')

# eig记录了PC排序结果中，主要排序轴的特征值（再除以特征值总和就是各轴的解释量）
eig = Proportion_of_Variance

#为样本点坐标添加分组信息
plot_data <- merge(plot_data, group, by = 'ID', all.x = TRUE)
head(plot_data)

画图

# figure1 分组
ggplot(data = plot_data, aes(x=PC1, y=PC2, fill=group)) +
  geom_point(shape = 21,color = 'black',size=4) +
  scale_fill_manual(values = c('#c95f55',#H
                               '#478bb4',#L
                               '#fdebdf'#M
                               ))+
  labs(x=paste("PC 1 (", format(100 * eig[1] / sum(eig), digits=4), "%)", sep=""),
       y=paste("PC 2 (", format(100 * eig[2] / sum(eig), digits=4), "%)", sep=""))+
  geom_hline(yintercept=0, linetype=4) +    
  geom_vline(xintercept=0 ,linetype=4)+          
  theme_few()+
  theme(legend.position = c(0.9, 0.2),
        legend.title = element_blank(),
        legend.background = element_rect(colour ="black"))
ggsave('adj_group_total_fat.pdf',width = 4,height = 4)

# figure2 连续
ggplot(data = plot_data, aes(x=PC1, y=PC2, fill=The.weight.of.Total.fat)) +
  geom_point(shape = 21,color = 'black',size=4) +
  scale_fill_gradient(low = '#478bb4',high = '#c95f55')+
  labs(x=paste("PC 1 (", format(100 * eig[1] / sum(eig), digits=4), "%)", sep=""),
       y=paste("PC 2 (", format(100 * eig[2] / sum(eig), digits=4), "%)", sep=""))+
  geom_hline(yintercept=0, linetype=4) +    
  geom_vline(xintercept=0 ,linetype=4)+          
  theme_few()+
  theme(legend.title = element_blank(),
        legend.position = c(0.8, 0.15),
        legend.direction = "horizontal")
ggsave('adj_value_total_fat.pdf',width = 4.5,height = 4)