以下内容根据生信技能树Jimmy老师代码按个人习惯修改而成,欢迎讨论共同进步
三种最常用的方法都试一下看看,用function定义函数的写法比较整齐,不然大段大段的心烦
用DEseq2
degdeseq <- function(exprset,pd,group,pvcut){
library(DESeq2)
colData <- data.frame(row.names=rownames(pd), grouplist=pd[,group])
dds <- DESeqDataSetFromMatrix(countData = exprset,colData = colData,design = ~ grouplist)
dds <- dds[rowSums(counts(dds))>100,] ###过滤低counts####
dds <- DESeq(dds)
res <- results(dds, contrast=c('grouplist',"trt","untrt"))
#res <- results(dds, contrast=c('grouplist',"trt","untrt"),pAdjustMethod = 'fdr', alpha = 0.01)
plotMA(res)
resordered <- as.data.frame(res[order(res$padj),])
resordered <- na.omit(resordered)
fdcutoff <- with(resordered,mean(abs(log2FoldChange))+2*sd(abs(log2FoldChange)))
#fdcutoff <- 3 ###或者简单粗暴自己定义cutoff###
print(fdcutoff)
resordered$sig <- ifelse(resordered$padj<pvcut & abs(resordered$log2FoldChange)>fdcutoff,ifelse(resordered$log2FoldChange > 0,'up','down'),'no sig')
return(resordered)
}
resdegseq <- degdeseq(exprset,pd,'group',0.05)
table(resdegseq$sig) ###看一下上下调基因的分布####
EdgeR
degdge <- function(exprset,group_list,pvcut){
library(edgeR)
dge <- DGEList(counts=exprset,group=factor(group_list))
keepe <- rowSums(cpm(dge)>1) >=2 ###按cpm过滤低表达####
dge <- dge[keepe,keep.lib.sizes=F]
dge <- calcNormFactors(dge)
#dge$samples
design <- model.matrix(~0+factor(group_list))
rownames(design)<-colnames(dge)
colnames(design)<-levels(factor(group_list))
dge <- estimateDisp(dge,design,robust = T)
#dge <- estimateGLMCommonDisp(dge,design)
#dge <- estimateGLMTrendedDisp(dge, design)
#dge <- estimateGLMTagwiseDisp(dge, design)
fit <- glmQLFit(dge, design)
cont.matrix=makeContrasts(contrasts=c('trt-untrt'),levels = design)
lrt <- glmQLFTest(fit, contrast=cont.matrix)
nrDEG <- topTags(lrt, n=nrow(exprset))
resdge <- na.omit(as.data.frame(nrDEG))
#fdcutoff <- 3
fdcutoff <- with(resdge,mean(abs(logFC))+2*sd(abs(logFC)))
print(paste0('fdcutoff is ',fdcutoff))
resdge$sig <- ifelse(resdge$PValue<pvcut & abs(resdge$logFC)>fdcutoff,ifelse(resdge$logFC > 0,'up','down'),'no sig')
return(resdge)
}
resdge <- degdge(exprset,group_list,0.05)
table(resdge$sig)
Limma
deglim <- function(exprset,pvcut){
library(limma)
design <- model.matrix(~0+factor(group_list))
colnames(design)=levels(factor(group_list))
rownames(design)=colnames(exprset)
dgelim <- DGEList(counts=exprset,group=factor(group_list))
keepl <- rowSums(cpm(dgelim)>1) >=2 ###按cpm过滤低表达####
dgelim <- dgelim[keepl,keep.lib.sizes=F]
dgelim <- calcNormFactors(dgelim)
#logCPM <- cpm(dgelim, log=TRUE, prior.count=3) ####可以选另一种方法trend#####
#fitc <- lmFit(logCPM, design)
v <- voom(dgelim,design,plot=TRUE, normalize="quantile")
fit <- lmFit(v, design)
cont.matrix=makeContrasts(contrasts=c('trt-untrt'),levels = design)
fit2 <- contrasts.fit(fit,cont.matrix)
fit2 <- eBayes(fit2)
tempOutput <- topTable(fit2, coef='trt-untrt', n=Inf)
deglim <- na.omit(tempOutput)
#fdcutoff <- 3
fdcutoff <- with(deglim,mean(abs(logFC))+2*sd(abs(logFC)))
deglim$sig <- ifelse(deglim$adj.P.Val<pvcut & abs(deglim$logFC)>fdcutoff,ifelse(deglim$logFC > 0,'up','down'),'no sig')
print(paste0('fdcutoff is ',fdcutoff))
return(resdge)
}
reslim <- deglim(exprset,0.05)
table(reslim$sig)
#######check expression ####
cherld <- function(rld){
library(gplots)
rld <- assay(rlogTransformation(dds)) ####in deseq2 count-log2
rld <- log2(cpm(exprset)+1) ####cpm
boxplot(rld)
hist(rld)
sampleDists <- as.matrix(dist(t(rld)))
library(RColorBrewer)
mycols <- brewer.pal(ncol(sampleDists), "Dark2")[1:length(unique(group_list))]
heatmap.2(as.matrix(sampleDists), key=F, trace="none",
col=colorpanel(100, "black", "white"),
ColSideColors=mycols[group_list], RowSideColors=mycols[group_list],
margin=c(10, 10), main="Sample Distance Matrix")
}
韦恩图,看一下三种方法的差距
######gene overlap ######
venn <- function(degf,allv,deseq){
library(VennDiagram)
geneedge <- rownames(degf)[-grep("no sig",degf$sig)]
genelima <- rownames(allv)[-grep("no sig",allv$sig)]
genedeseq <- rownames(deseq)[-grep("no sig",deseq$sig)]
venn.plot <- venn.diagram(x=list(edger=geneedge,limmac=genelima,deseq=genedeseq),col="transparent",filename = '3 triple overlap.png',
fill=c("red","blue","green"),cex = 1.5,
fontfamily = "serif",
fontface = "bold",
cat.default.pos = "text",
cat.col = c("darkred", "darkblue", "darkgreen"),
cat.cex = 1.5,
cat.fontfamily = "serif",
cat.dist = c(0.08, 0.08, 0.08),
cat.pos = 0,
alpha = 0.7,
margin = 0.08)
}
venn(resdegseq,resdge,reslim)
类似效果如下
热图展示
######heatmap######
pt <- function(degsig,exprn,pd){
library(pheatmap)
degsig <- degsig[-grep('no sig',degsig$sig),]
#degsig <- degsig[order(degsig$log2FoldChange,decreasing = T),] ###for deseq2####
g <- c(head(rownames(degsig),20),tail(rownames(degsig),20)) #前20个差异基因###
degmat <- exprn[g,]
#degmat <- log2(degmat+1)
degmat <- t(scale(t(degmat))) ##normalize extreme value##
anc <- matrix(pd$type)
rownames(anc) <- rownames(pd)
anc <- as.data.frame(pd$type,row.names = rownames(pd))
pheatmap(degmat,annotation_col = anc,cluster_cols = F)
}
pt(reslim,mergerem,pd2)
火山图
######volcano########
volcano_print <- function(DEG,fccutoff,pcutoff,plotfccut,plotpcut){
library(dplyr)
library(ggrepel)
degsig <- DEG[-grep('no sig',DEG$sig),]
this_tile <- paste0('Cutoff for log2FoldChange is ',round(fccutoff,3),
'\nThe number of up gene is ',nrow(DEG[DEG$sig =='up',]) ,
'\nThe number of down gene is ',nrow(DEG[DEG$sig =='down',]))
l <- degsig[which(abs(degsig$log2FoldChange)>=plotfccut & (-log10(degsig$padj) > -log10(plotpcut))),] ##filter label###
g <- ggplot(data=degsig, aes(x=log2FoldChange, y=-log10(padj), color=sig)) +
geom_point(aes(color = sig), alpha = 0.6, size = 1) +
theme_set(theme_set(theme_bw(base_size=20)))+
geom_vline(xintercept = c(-fccutoff, fccutoff), color = 'gray', size = 0.25) +
geom_hline(yintercept = -log(pcutoff, 10), color = 'gray', size = 0.25) +
xlab("log2 fold change") + ylab("-log10 p-value") +
ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
geom_text_repel(data=l,aes(label=rownames(l)),size=2) ###加标签###
print(g)
}
volcano_print(resdegseq,3,0.05,3,0.05)
代码出来是有标签的,图上忘了加
网友评论