rt.png
logFoldChange=1
adjustP=0.05
library(limma)
setwd("C:\\Users\\lexb4\\Desktop\\geoBatch\\07.diff")
rt=read.table("lncRNA.txt",sep="\t",header=T,check.names=F)
#重复基因取均值,去重复
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
rt=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
rt=avereps(rt)
rt=rt[rowMeans(rt)>0,]
#数据集矫正,标准化
rt=normalizeBetweenArrays(as.matrix(rt))
boxplot(rt)
#rt=log2(rt+1)
#差异分析
modType=c(rep("con",182),rep("treat",32))
design <- model.matrix(~0+factor(modType))
colnames(design) <- c("con","treat")
exprSet=rt #exprSet 为表达矩阵
rownames(design)=colnames(exprSet) #更改分组信息design行名为分组名,也就是样本名字
#构造了design矩阵,得出每一个样本属于哪一个组,属于为1,不属于为0
fit <- lmFit(rt,design)
#自定义比较元素
contrast.matrix<-makeContrasts(treat-con,levels=design)
#自定义函数
deg = function(exprSet,design,contrast.matrix){#定义一个叫deg的函数
##step1#logFC后是前的多少倍,进行差异化比较
fit <- lmFit(exprSet,design)
##step2
fit2 <- contrasts.fit(fit, contrast.matrix)
##这一步很重要,大家可以自行看看效果
fit2 <- eBayes(fit2) ## default no trend !!!
##eBayes() with trend=TRUE
##step3
tempOutput = topTable(fit2, coef=1, n=Inf)
nrDEG = na.omit(tempOutput)
#write.csv(nrDEG2,"limma_notrend.results.csv",quote = F)
head(nrDEG)
return(nrDEG)
}
deg = deg(exprSet,design,contrast.matrix)
head(deg)#查看前几行文件
save(deg,file = 'deg.Rdata')#存储文件
write.table(deg,file="limmaTab.xls",sep="\t",quote=F)
#write table
diffSig <- deg[with(deg, (abs(logFC)>logFoldChange & adj.P.Val < adjustP )), ]
write.table(diffSig,file="diff.xls",sep="\t",quote=F,row.names=T)
diffUp <- deg[with(deg, (logFC>logFoldChange & adj.P.Val < adjustP )), ]
write.table(diffUp,file="up.xls",sep="\t",quote=F,row.names=T)
diffDown <- deg[with(deg, (logFC<(-logFoldChange) & adj.P.Val < adjustP )), ]
write.table(diffDown,file="down.xls",sep="\t",quote=F,row.names=T)
#write expression level of diff gene
hmExp=rt[as.vector(diffSig[,1]),]
diffExp=rbind(id=colnames(hmExp),hmExp)
write.table(diffExp,file="diffExp.txt",sep="\t",quote=F,col.names=F)
#差异热图
if(T){
# 每次都要检测数据,rt为表达数据,取过log的
rt[1:4,1:4]
#分组
group_list<-c(rep("noTcells",182),rep("Tcells",32))
#查看分组
table(group_list)
x=deg$logFC #deg取logFC这列并将其重新赋值给x
names(x)=rownames(deg) #deg取probe_id这列,并将其作为名字给x
cg=c(names(head(sort(x),100)),#对x进行从小到大排列,取前100及后100,并取其对应的探针名,作为向量赋值给cg
names(tail(sort(x),100)))
library(pheatmap)
pheatmap(rt[cg,],show_colnames =F,show_rownames = F) #对dat按照cg取行,所得到的矩阵来画热图
n=t(scale(t(rt[cg,])))#通过“scale”对log-ratio数值进行归一化,现在的dat是行名为探针,列名为样本名,由于scale这个函数应用在不同组数据间存在差异时,需要行名为样本,因此需要用t(dat[cg,])来转换,最后再转换回来
n[n>2]=2
n[n< -2]= -2
n[1:4,1:4]
pheatmap(n,show_colnames =F,show_rownames = F)
ac=data.frame(g=group_list)
rownames(ac)=colnames(n) #将ac的行名也就分组信息(是‘no TNBC’还是‘TNBC’)给到n的列名,即热图中位于上方的分组信息
pheatmap(n,show_colnames =F,
show_rownames = F,
cluster_cols = F,
annotation_col=ac,filename = 'heatmap_top.pdf') #列名注释信息为ac即分组信息
}
#deg.csv是所有基因的差异表达情况
write.csv(deg,file = 'deg.csv')
dev.new()
#火山图
if(T){
nrDEG=deg
head(nrDEG)
attach(nrDEG)
plot(logFC,-log10(P.Value))
library(ggpubr)
df=nrDEG
df$v= -log10(P.Value) #df新增加一列'v',值为-log10(P.Value)
ggscatter(df, x = "logFC", y = "v",size=0.5)
df$g=ifelse(df$P.Value>0.05,'stable
', #if 判断:如果这一基因的P.Value>0.05,则为stable基因
ifelse( df$logFC >1,'up', #接上句else 否则:接下来开始判断那些P.Value<0.01的基因,再if 判断:如果logFC >1,则为up(上调)基因
ifelse( df$logFC < -1,'down','stable') )#接上句else 否则:接下来开始判断那些logFC <1 的基因,再if 判断:如果logFC <1,则为down(下调)基因,否则为stable基因
)
table(df$g)
df$name=rownames(df)
head(df)
ggscatter(df, x = "logFC", y = "v",size=0.5,color = 'g')
ggscatter(df, x = "logFC", y = "v", color = "g",size = 0.5,
label = "name", repel = T,
#label.select = rownames(df)[df$g != 'stable'] ,
#label.select = c('TTC9', 'AQP3', 'CXCL11','PTGS2'), #挑选一些基因在图中显示出来
palette = c("#00AFBB", "#E7B800", "#FC4E07") )
ggsave('volcano.png')
ggscatter(df, x = "AveExpr", y = "logFC",size = 0.2)
df$p_c = ifelse(df$P.Value<0.001,'p<0.001',
ifelse(df$P.Value<0.01,'0.001<p<0.01','p>0.01'))
table(df$p_c )
ggscatter(df,x = "AveExpr", y = "logFC", color = "p_c",size=0.2,
palette = c("green", "red", "black") )
ggsave('MA.pdf')
}
网友评论