文章题目为Epidermal Growth Factor Receptor Activation: An Upstream Signal for Transition of Quiescent Astrocytes into Reactive Astrocytes after Neural Injury,于2016年发表;
文章研究思路如下:
- 视神经受损后,星形胶质细胞发生EGFR的上调和激活;
- EGFR的激活对星形胶质细胞表型的影响;
- EGF激活的星形胶质细胞的MicroArray分析;
- MicroArray中相关marker表达的比较;
- EGFR TKI 抑制视网膜神经节细胞受损;
下载GEO数据
rm(list=ls())
options( 'download.file.method.GEOquery' = 'libcurl' )
library(GEOquery)
f<-'GSE5282.Rdata'
if(!file.exists(f)){
gset<-getGEO('GSE5282',destdir='.',
AnnotGPL=F,
getGPL=F)
save(gset,file=f)
}
load('GSE5282.Rdata')
a<-gset[[1]]
dat <- exprs(a)
pd <- pData(a)
3波差异分析走起
boxplot
-
outline
设置为FALSE,表示离群点不显示; -
notch
展示the median +/- 1.57 x IQR/sqrt of n; -
las
取值为0,1,2,3;0
指平行于轴;1
指水平;2
指垂直于轴;3
指垂直;
image.png
log_dat<- log2(dat+1)
####必经的路-Check data
boxplot(log_dat,outline=FALSE, notch=T,las=2)
group<- gsub('replicate \\d','',pd$title)
group[grep('Control',pd$title)] <- rep('Control',6)
group[12] <- group[11]
library("FactoMineR")
library("factoextra")
df.pca <- PCA(t(log_dat), graph = FALSE)
df.pca
fviz_pca_ind(df.pca,
geom.ind = "point",
col.ind = group_list,
addEllipses = TRUE,
legend.title = "Groups"
)
####差异分析数据准备1
log_dat_4h<- log_dat[,rownames(pd)[grep('4h',pd$title)]]
colnames(log_dat_4h)==rownames(pd)[1:6]
group_list_4h <- rep(c('egf_4h','control'),each=3)
####差异分析数据准备2
log_dat_12h<- log_dat[,rownames(pd)[grep('12h',pd$title)]]
group_list_12h <- rep(c('control','egf_12h'),each=3)
####差异分析数据准备3
group_list <- rep('EGF',12)
group_list[grep('Control',pd$title)] <- rep('Control',6)
######差异分析‘发射‘
suppressMessages(library(limma))
deg<- function(data,group_list,contrasts){
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
rownames(design)=colnames(data)
contrast.matrix<-makeContrasts(contrasts=contrasts,levels = design)
fit <- lmFit(data, design)
fit1 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit1, 0.01)
tT <- topTable(fit2, adjust="fdr", number=nrow(fit2))
tT <- subset(tT, select=c("adj.P.Val","P.Value","t","B","logFC",'AveExpr'))
return(tT)
}
tT_4h<- deg(data = log_dat_4h,group_list = group_list_4h,contrasts = c('egf_4h-control'))
tT_12h<- deg(data = log_dat_12h,group_list = group_list_12h,contrasts = c('egf_12h-control'))
tT_all<- deg(data = log_dat,group_list = group_list,contrasts = c('EGF-Control'))
boxplot
PCA
可视化选择的基因
change<- function(need_DEG,logFC_cutoff){ifelse(need_DEG$adj.P.Val < 0.05 & abs(need_DEG$logFC) > logFC_cutoff,
ifelse(need_DEG$logFC > logFC_cutoff ,'UP','DOWN'),'NOT')}
tT_12h$change<- change(need_DEG = tT_12h,logFC_cutoff = 1)
tT_4h$change<- change(need_DEG = tT_4h,logFC_cutoff = 1)
tT_all$change<- change(need_DEG = tT_all,logFC_cutoff = 1)
all<-unique(c(rownames(tT_12h)[tT_12h$change=='UP'],rownames(tT_4h)[tT_4h$change=='UP'],rownames(tT_all)[tT_all$change=='DOWN']))
library(UpSetR)
tT12h<-as.numeric(all%in%rownames(tT_12h)[tT_12h$change=='UP'])
tT4h<-as.numeric(all%in%rownames(tT_4h)[tT_4h$change=='UP'])
tTcontrol<-as.numeric(all%in%rownames(tT_all)[tT_all$change=='DOWN'])
all_01<-data.frame(all,tT12h,tT4h,tTcontrol)
upset(all_01, nsets = 7, number.angles = 0, point.size = 2, line.size = 1, mainbar.y.label = "Gene Intersections", sets.x.label = "Gene Set", text.scale = c(1.4, 1.3, 1, 1, 1.5, 1))
UpSetR
千呼万唤始出来的热图
choose_ma<- log_dat[all_01$all,]
pd$title<- gsub('replicate \\d','',pd$title)
pd$title[grep('Control',pd$title)] <- rep('Control',6)
colnames(choose_ma)<- pd$title[match(colnames(choose_ma),rownames(pd))]
choose<- c(grep('4h',colnames(choose_ma))[-1],grep('Control',colnames(choose_ma))[c(1,3,5)],grep('12h',colnames(choose_ma))[-3])
choose_ma<- choose_ma[,choose]
library(pheatmap)
choose_ma<- t(scale(t(choose_ma)))
choose_ma[choose_ma>2]=2
choose_ma[choose_ma< -2]=-2
tmp=rep('',nrow(choose_ma))
bk = unique(c(seq(-2,2, length=100)))
pheatmap(choose_ma,labels_row = tmp,breaks=bk,color = colorRampPalette(c("navy", "white", "firebrick3"))(100),cellwidth = 15,cellheight = 0.2,filename = 'test.png')
pheatmap
有几点是做得不好的,聚类的线太粗了,boxplot展示的样本名展示有问题;不过对于重复操作,用函数执行还是满意的;
网友评论