参考文章:
1、GEOquery下载芯片数据及注释文件:https://www.jianshu.com/p/993b4022363f
2、得到的expressionset对象的简单使用:http://www.bio-info-trainee.com/1510.html
3、limma包的使用:https://www.jianshu.com/p/11534a814405
library(GEOquery)
library(stringr)
library(limma)
library(ggplot2)
library(pheatmap)
#array_endo<-getGEO("GSE111974",AnnotGPL = F,getGPL = F)
#save(array_endo,file = "GSE111974_array_endo.rdata")
array_endo<-array_endo[[1]]
array_endo
array_endo_mat<-exprs(array_endo)
#获得注释平台信息
array_endo_GPL<-getGEO("GPL17077",destdir = ".")
array_endo_GPL<-array_endo_GPL@dataTable@table
array_endo_GPL
#获取样本特征信息
array_endo_feature<-pData(array_endo)
array_endo_feature<-array_endo_feature[,c(2,8)]
array_endo_feature<-data.frame(sampleid=array_endo_feature[,1],
group=ifelse(grepl("Control",array_endo_feature$source_name_ch1),"Control","RIF"))
rownames(array_endo_feature)<-array_endo_feature$sampleid
array_endo_feature
#PCA
pca3<-prcomp(t(array_endo_mat))
pca3<-pca3$x
pca3<-as.data.frame(pca3)
ggplot(pca3,aes(x=PC1,y=PC2,color=array_endo_feature$group))+geom_point()
pheatmap(cor(array_endo_mat),annotation_row = array_endo_feature)
#使用limma,分别获得分组矩阵和差异比较矩阵
design_lim2<-model.matrix(~0+array_endo_feature$group)
design_lim2
rownames(design_lim2)<-colnames(array_endo_mat)
colnames(design_lim2)<-unique(array_endo_feature$group)
design_lim2
contrast.matrix_lim2<-makeContrasts(paste0(colnames(design_lim2),collapse = "-"),levels = design_lim2)
fit_lim2<-lmFit(array_endo_mat,design_lim2)
fit2_lim2<-contrasts.fit(fit_lim2,contrast.matrix_lim2)
fit2_lim2<-eBayes(fit2_lim2)
temp2<-topTable(fit2_lim2,coef = 1,number = Inf)
nrDEG2<-na.omit(temp2)
head(nrDEG2)
ggplot(nrDEG2,aes(x=logFC,y=-log10(P.Value)))+geom_point()+
geom_vline(xintercept = c(-1,1))+
geom_hline(yintercept = -log10(0.05))+
xlim(c(-4.5,4.5))+ylim(c(-0.5,16))
#进行基因注释
nrDEG2$gene_symbol<-array_endo_GPL[match(rownames(nrDEG2),array_endo_GPL$ID),7]
nrDEG2$gene_symbol[1:5]
网友评论