---------------------------------加载R包-------------------------
library(DESeq2)
library(pheatmap)
library(corrplot)
library(EnhancedVolcano)
library(ggplot2)
library(ggsci)
library(Vennerable)
library(rio)
library(matrixStats)
library(BiocGenerics)
library(biomaRt)
library(limma)
library(dplyr)
library(tidyverse)
library(tibble)
----------------------count ------------------------------------------------
count <- read.table('C:/High-throughput sequencing/LH0-4-12h/6sample_count.txt',header = T,sep = '\t')
count <- count[,-c(2:5)]
meta <- count[,1:2]
rownames(count) <- count$Geneid
count <- count[,-c(1:2)]
colnames(count) <- c("LH0h_1","LH0h_2","LH4h_1","LH4h_2","LH12h_1","LH12h_2")
---------------------gene information---------------------------------
gene.info <- read.table('mouse.ref102.gene.type.gtf',header = F,sep = '\t')
gene.info<-gene.info[,c(5:7)]
colnames(gene.info) <- c('geneid','symbol','biotype')
colnames(meta) <- c('geneid','length')
meta <- meta %>% left_join(y = gene.info,by = 'geneid')
-------------------------get TPM--------------------------------
kb <- meta$Length / 1000
fpkm <- count / kb
tpm <- t(t(fpkm)/colSums(fpkm) * 1000000)
tpm <- round(tpm,2) %>% as.data.frame()
----------------clean data-----------------------------
pick.genes <- unique(c(rownames(tpm[matrixStats::rowMedians(as.matrix(tpm[,1:6]), cols = 1:2) > 0.5, ]),
rownames(tpm[matrixStats::rowMedians(as.matrix(tpm[,1:6]), cols = 3:4) > 0.5, ]),
rownames(tpm[matrixStats::rowMedians(as.matrix(tpm[,1:6]), cols = 5:6) > 0.5, ])
))
count.clean <- count[pick.genes,]
fpkm.clean <- fpkm[pick.genes,]
tpm.clean <- tpm[pick.genes,]
count.clean <- rownames_to_column(count.clean,var = "row_names")
colnames(count.clean) <- c("geneid","LH0h_1","LH0h_2","LH4h_1","LH4h_2","LH12h_1","LH12h_2")
fpkm.clean <- rownames_to_column(fpkm.clean,var = "row_names")
colnames(fpkm.clean) <- c("geneid","LH0h_1","LH0h_2","LH4h_1","LH4h_2","LH12h_1","LH12h_2")
tpm.clean <- rownames_to_column(tpm.clean,var = "row_names")
colnames(tpm.clean) <- c("geneid","LH0h_1","LH0h_2","LH4h_1","LH4h_2","LH12h_1","LH12h_2")
count.clean <- count.clean %>% left_join(y = meta,by = 'geneid')
fpkm.clean <- fpkm.clean %>% left_join(y = meta,by = 'geneid')
tpm.clean <- tpm.clean %>% left_join(y = meta,by = 'geneid')
count.clean.pcg <- count.clean[count.clean$biotype == "protein_coding",]
fpkm.clean.pcg <- fpkm.clean[fpkm.clean$biotype == "protein_coding",]
tpm.clean.pcg <- tpm.clean[tpm.clean$biotype == "protein_coding",]
有重复基因,根据表达量删除低的
which(count.clean.pcg$symbol=="St6galnac2")
count.clean.pcg <- count.clean.pcg[-c(2719,6414,10370,14222),]
fpkm.clean.pcg <- fpkm.clean.pcg[-c(2719,6414,10370,14222),]
tpm.clean.pcg <-tpm.clean.pcg[-c(2719,6414,10370,14222),]
rownames(count.clean.pcg) <- count.clean.pcg$symbol
rownames(fpkm.clean.pcg) <- fpkm.clean.pcg$symbol
rownames(tpm.clean.pcg) <- tpm.clean.pcg$symbol
count.clean.pcg <- count.clean.pcg[,2:7]
fpkm.clean.pcg <- fpkm.clean.pcg[,2:7]
tpm.clean.pcg <- tpm.clean.pcg[,2:7]
write.csv(count.clean.pcg,'count_clean_pcg.csv')
write.csv(fpkm.clean.pcg,'fpkm_clean_pcg.csv')
write.csv(tpm.clean.pcg,'tpm_clean_pcg.csv')
差异分析
------------------------- DEG analysis-----------------------
padj_cutoff <- 0.05
group_list = c(rep('h0',2),rep("h4",2))
colData = data.frame(row.names = colnames(count.clean.pcg[,1:4]),group_list = group_list)
dds <- DESeqDataSetFromMatrix(countData = count.clean.pcg[,1:4],colData = colData,design = ~group_list)
rld <- rlog(dds, blind=TRUE)
dds <- DESeq(dds)
plotDispEsts(dds)
RES <- results(dds, contrast = c('group_list','h4','h0'),name = 'h4_vs_h0',alpha = 0.05) 前处后对
res_tbl <- RES %>%data.frame() %>%rownames_to_column(var="geneid") %>%as_tibble() %>%arrange(padj)
res_tbl<-res_tbl %>% left_join(y = gene.info,by = 'geneid')
write.csv(res_tbl,"h4_vs_h0_DEseq2_res.csv")
---------------log2FC pvaule------
sig_res <- dplyr::filter(res_tbl, padj < padj_cutoff) %>%
dplyr::arrange(padj)
write.csv(sig_res,"h4_vs_h0_sig_res0.05.csv")
pvalue<-sig_res$pvalue<0.05
up<-sig_res$log2FoldChange>2
down<-sig_res$log2FoldChange<(-2)
sig_res$change<-ifelse(pvalue&up,"up",ifelse(pvalue&down,"down","none"))
write.csv(sig_res,"h4_vs_h0_DEG_p0.05_FC2.csv")
功能分析
-----------------------enrichment-method2: clusterprofiler---------------------------
library(clusterProfiler)
library(org.Mm.eg.db)
ms.up <- factor(na.omit(sig_res[sig_res$change == 'up',]$geneid))
ms.up <- bitr(ms.up,fromType = 'SYMBOL',toType = 'ENTREZID',OrgDb = org.Mm.eg.db)
ms.down <- factor(na.omit(sig_res[sig_res$change == 'up',]$geneid ))
ms.down <- bitr(ms.down,fromType = 'SYMBOL',toType = 'ENTREZID',OrgDb = org.Mm.eg.db)
bg.genes <- bitr(sig_res$geneid ,fromType = 'SYMBOL',toType = 'ENTREZID',OrgDb = org.Mm.eg.db )
ms.up.go = enrichGO( gene = as.character(ms.up$ENTREZID),
universe = as.character(bg.genes$ENTREZID),
OrgDb = org.Mm.eg.db,
ont = 'BP',
pAdjustMethod = 'BH',
pvalueCutoff = 0.05,
qvalueCutoff = 0.1,
readable = TRUE)
dotplot(ms.up.go,showCategory = 20)
up.go_result_BP <- as.data.frame(ms.up.go)
write.table(up.go_result_BP, "up.go_result_BP.xls", quote = F, sep = "\t", col.names = T, row.names = F)
网友评论