GEO数据库对应的R包是GEOquery
,强大的ArrayExpress数据库也有对应的R包——ArrayExpress。
可以通过其访问ArrayExpress数据库,并构建ExpressonSet、AffyBatch和NChannelSet数据结构。
核心函数:
getAE(accession, type="full")
ae2bioc(rawset)
对于处理过的数据:
cnames <- getcolproc(mexp1422)
mexp1422proc <- procset(mexp1422,cnames[2])
1.数据下载
rm(list = ls())
library(ArrayExpress)
library(affy)
acs="E-MEXP-1422"
mexp1422 <- getAE(acs,type = "full", local =T )
#type = "raw" 下载raw data
#type = "processed" 下载processed data
#type = "full" 两者都下载
# local = T 当前目录下有,不用再次下载
mexp1422raw <- ae2bioc(mexp1422) #从raw data构建ExpressionSet对象
## Reading in : E:/panCancer/27-GIlnc/AF15.CEL
## Reading in : E:/panCancer/27-GIlnc/AF16.CEL
## Reading in : E:/panCancer/27-GIlnc/AF6.CEL
## Reading in : E:/panCancer/27-GIlnc/AF14.CEL
## Reading in : E:/panCancer/27-GIlnc/AF7.CEL
## Reading in : E:/panCancer/27-GIlnc/AF8.CEL
cnames <- getcolproc(mexp1422)
mexp1422proc <- procset(mexp1422,cnames[2]) #从processed data构建ExpressionSet对象
## Reading processed data matrix file, E:/panCancer/27-GIlnc/expression_and_calls.txt.magetab
2.数据预处理
mexp1422norm <- rma(mexp1422raw) #背景校正归一化
## Background correcting
## Normalizing
## Calculating Expression
boxplot(mexp1422norm)
image.png
可以看出校正后效果挺好
mexp1422norm1 <- exprs(mexp1422proc)
boxplot(mexp1422norm1) #可以看出processed data是归一化过的数据
image.png
3.差异分析
pd <- pData(mexp1422raw)
pd <- pd %>%
select("Factor.Value..RNAi.")
colnames(pd) <- "group"
pd$group <- case_when(pd$group=="GFP siRNA"~"ctrl",
pd$group=="PROX1 siRNA #1"~"siRNA1",
pd$group=="PROX1 siRNA #2"~"siRNA2")
group_list <- factor(pd$group,levels = c("ctrl","siRNA1","siRNA2"))
names(group_list) <- rownames(group_list)
design <- model.matrix(~0+group_list)
colnames(design) <- levels(group_list)
library(limma)
cont.matrix <- makeContrasts(si1.normal="siRNA1-ctrl",
si2.normal="siRNA2-ctrl",
si2.si1="siRNA2-siRNA1",
levels = design)
fit <- lmFit(mexp1422norm,design)
fit2 <- contrasts.fit(fit,cont.matrix)
fit2 <- eBayes(fit2)
deg <- topTable(fit2,adjust.method = "BH",sort.by = "F",n=Inf) %>%
rownames_to_column("probe_id")
# 探针注释
library(AnnoProbe)
probe2id <- idmap("GPL570")
volcanoInput <- probe2id %>%
inner_join(deg,by = "probe_id") %>%
select(-probe_id)
4. 火山图
volcanoInput$type <- case_when(volcanoInput$adj.P.Val<0.05&volcanoInput$si2.normal>1 ~ "up",
volcanoInput$adj.P.Val<0.05&volcanoInput$si2.normal< -1 ~ "down",
T~"stable")
ggplot(volcanoInput,aes(x=si2.normal,y=-log10(adj.P.Val),color=type))+
geom_point(alpha=0.4,size=3.5)+
scale_color_manual(values=c("#546de5", "#d2dae2","#ff4757"))+
geom_vline(xintercept=c(-1,1),lty=4,col="black",lwd=0.8) +
geom_hline(yintercept = -log10(0.05),lty=4,col="black",lwd=0.8) +
labs(x="log2(fold change)",
y="-log10 (p-value)")+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5),
legend.position="right",
legend.title = element_blank())
image.png
参考链接:
网友评论