美文网首页
表达谱数据为FPKM时下游分析

表达谱数据为FPKM时下游分析

作者: 找兔子的小萝卜 | 来源:发表于2021-11-22 16:51 被阅读0次
#数据下载
rm(list = ls())
options(stringsAsFactors = F)
library(GEOquery)

a=read.table('GSE149508_All_samples_processed_data_FPKM.txt.gz',sep='\t',quote = "",fill = T,
             comment.char = "!",header = T) # 提取表达矩阵
exp=a[,c(2,4:9)]
exp <- exp[!duplicated(exp$GeneName),]
row.names(exp)=exp[,1]
exp=exp[,-1]
exp[1:4,1:4]
#将FPKM转换为TPM
expMatrix <- exp
fpkmToTpm <- function(fpkm)
{
  exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
}
tpms <- apply(expMatrix,2,fpkmToTpm)
tpms[1:3,]
colSums(tpms)
##group
group_list=c(rep('Treat',3),rep('Normal',3))
## 强制限定顺序
group_list <- factor(group_list,levels = c("Normal","Treat"),ordered = F)
#表达矩阵数据校正
exprSet <- tpms
boxplot(exprSet,outline=FALSE, notch=T,col=group_list, las=2)
library(limma) 
exprSet=normalizeBetweenArrays(exprSet)
boxplot(exprSet,outline=FALSE, notch=T,col=group_list, las=2)
#判断数据是否需要转换,如果值很大就要进行log
exprSet <- log2(exprSet+1)
#差异分析:
dat <- exprSet
design=model.matrix(~factor( group_list ))
fit=lmFit(dat,design)
fit=eBayes(fit)
options(digits = 4)
topTable(fit,coef=2,adjust='BH')
bp=function(g){
  library(ggpubr)
  df=data.frame(gene=g,stage=group_list)
  p <- ggboxplot(df, x = "stage", y = "gene",
                 color = "stage", palette = "jco",
                 add = "jitter")
  #  Add p-value
  p + stat_compare_means()
}
deg=topTable(fit,coef=2,adjust='BH',number = Inf)
head(deg) 
save(exp,deg,file = 'deg.Rdata')


#(3)vision
#差异基因热图----
#为deg数据框添加几列
#热图
x=deg$logFC 
deg$gene=row.names(deg)
names(x)=deg$gene
cg=c(names(head(sort(x),100)),names(tail(sort(x),100)))
n=exp[cg,]
dim(n)
#作热图
library(pheatmap)
annotation_col=data.frame(group=group_list)
rownames(annotation_col)=colnames(n) 
library(ggplotify)
heatmap_plot <- as.ggplot(pheatmap(n,show_colnames =F,
                                   show_rownames = F,
                                   scale = "row",
                                   #cluster_cols = F, 
                                   annotation_col=annotation_col)) 
heatmap_plot

#3.加change列,标记上下调基因
library(dplyr)
logFC_t=1
P.Value_t = 0.05
k1 = (deg$P.Value < P.Value_t)&(deg$logFC < -logFC_t)
k2 = (deg$P.Value < P.Value_t)&(deg$logFC > logFC_t)
change = ifelse(k1,"down",ifelse(k2,"up","stable"))
deg <- mutate(deg,change)



#1.火山图----

library(ggplot2)
dat  = deg

p <- ggplot(data = dat, 
            aes(x = logFC, 
                y = -log10(P.Value))) +
  geom_point(alpha=0.4, size=3.5, 
             aes(color=change)) +
  ylab("-log10(Pvalue)")+
  scale_color_manual(values=c("blue", "grey","red"))+
  geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",lwd=0.8) +
  geom_hline(yintercept = -log10(P.Value_t),lty=4,col="black",lwd=0.8) +
  theme_bw()

write.table(exp,file='exp.csv',sep = ",")
write.table(deg,file='deg.csv',sep = ",")

相关文章

网友评论

      本文标题:表达谱数据为FPKM时下游分析

      本文链接:https://www.haomeiwen.com/subject/jijbtrtx.html