美文网首页
计算ChIP-seq的CPM值

计算ChIP-seq的CPM值

作者: pudding815 | 来源:发表于2022-11-06 21:58 被阅读0次

    首先,准备callpeak后的narrowpeak文件,和bam文件

    利用bedtools multicov先计算每一个peak的count数。得到

    然后R开始

    # load packages ------

    library(ChIPseeker)

    library(org.Mm.eg.db)

    library(gggenes)

    library(ggplot2)

    library(GenomeInfoDb)

    library(GenomicFeatures)

    library(diffloop)

    library(GenomicRanges)

    library(rio)

    library(patchwork)

    library(tidyverse)

    library(venn)

    library(ggsci)

    library(scales)

    library(edgeR)

    library(stringi)

    cols.use <- pal_d3("category10")(10)

    show_col(cols.use)

    options(scipen = 20)

    options(ChIPseeker.ignore_1st_exon = T)

    options(ChIPseeker.ignore_1st_intron = T)

    options(ChIPseeker.ignore_downstream = T)

    options(ChIPseeker.ignore_promoter_subcategory = F)

    # prepera annotation files-------

    mus102.anno <- makeTxDbFromGFF("C:/High-throughput sequencing/ensmble基因组/Mus_musculus.GRCm38.102.chr.gff3.gz",organism='Mus musculus')

    seqlevels(mus102.anno)

    promoter_ens <- getPromoters(TxDb=mus102.anno, upstream=3000, downstream=3000)

    # load data-------

    ## chip reads counts

    h3k4_0h_count  <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov/H3K4me3_hCG_0h.peaks.reads.xls',header = F,sep = '\t')

    h3k4_4h_count  <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov//H3K4me3_hCG_4h.peaks.reads.xls',header = F,sep = '\t')

    h3k27_0h_count <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov/H3K27ac_hCG_0h.peaks.reads.xls',header = F,sep = '\t')

    h3k27_4h_count <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov/H3K27ac_hCG_4h.peaks.reads.xls',header = F,sep = '\t')

    colnames(h3k4_0h_count) <- c('chr','start', 'end', 'peak_id', '-log10p', 'counts')

    colnames(h3k4_4h_count) <- colnames(h3k4_0h_count)

    colnames(h3k27_0h_count) <- colnames(h3k4_0h_count)

    colnames(h3k27_4h_count) <- colnames(h3k4_0h_count)

    ## chip peaks

    h3k4_0h_peaks  <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K4ME3/H3K4me3_hCG_0h_peaks.narrowPeak')

    h3k4_4h_peaks  <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K4ME3/H3K4me3_hCG_4h_peaks.narrowPeak')

    h3k27_0h_peaks <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K27ac/H3K27ac_hCG_0h_peaks.narrowPeak')

    h3k27_4h_peaks <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K27ac/H3K27ac_hCG_4h_peaks.narrowPeak')

    # peaks anno-------

    # k4

    k4_peaks <- list(h3k4_0h_peaks = h3k4_0h_peaks, h3k4_4h_peaks = h3k4_4h_peaks)

    k4_peaks <- lapply(k4_peaks, rmchr)

    seqlevels(k4_peaks[[1]])

    k4_peakAnnoList <- lapply(k4_peaks, annotatePeak, TxDb=mus102.anno,tssRegion=c(-3000, 3000), verbose=FALSE,addFlankGeneInfo=TRUE, flankDistance=5000,annoDb="org.Mm.eg.db")

    k4.anno.0h <- data.frame(k4_peakAnnoList$h3k4_0h_peaks)

    k4.anno.4h <- data.frame(k4_peakAnnoList$h3k4_4h_peaks)

    # k27

    k27_peaks <- list(h3k27_0h_peaks = h3k27_0h_peaks, h3k27_4h_peaks = h3k27_4h_peaks)

    k27_peaks <- lapply(k27_peaks, rmchr)

    seqlevels(k27_peaks[[2]])

    k27_peakAnnoList <- lapply(k27_peaks, annotatePeak, TxDb=mus102.anno,overlap='all',annoDb="org.Mm.eg.db")

    k27.anno.0h <- data.frame(k27_peakAnnoList$h3k27_0h_peaks)

    k27.anno.4h <- data.frame(k27_peakAnnoList$h3k27_4h_peaks)

    # get cpm for chip------

    h3k4_0h_count$cpm <- cpm(h3k4_0h_count$counts)

    h3k4_4h_count$cpm <- cpm(h3k4_4h_count$counts)

    h3k27_0h_count$cpm <- cpm(h3k27_0h_count$counts)

    h3k27_4h_count$cpm <- cpm(h3k27_4h_count$counts)

    # combine cpm with peaks's gene

    colnames(k4.anno.0h)

    h3k4_0h <- h3k4_0h_count %>% left_join( y = k4.anno.0h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))

    h3k4_4h <- h3k4_4h_count %>% left_join( y = k4.anno.4h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))

    h3k4_0h_gene.unique <- h3k4_0h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))

    h3k4_4h_gene.unique <- h3k4_4h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))

    h3k4.gene.cpm <- list(h3k4_0h_gene.unique = h3k4_0h_gene.unique, h3k4_4h_gene.unique = h3k4_4h_gene.unique)

    export(h3k4.gene.cpm, file = 'h3k4.gene.cpm.corrected.xlsx')

    h3k27_0h <- h3k27_0h_count %>% left_join( y = k27.anno.0h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))

    h3k27_4h <- h3k27_4h_count %>% left_join( y = k27.anno.4h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))

    h3k27_0h_gene.unique <- h3k27_0h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))

    h3k27_4h_gene.unique <- h3k27_4h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))

    h3k27.gene.cpm <- list(h3k27_0h_gene.unique = h3k27_0h_gene.unique, h3k27_4h_gene.unique = h3k27_4h_gene.unique)

    export(h3k27.gene.cpm, file = 'h3k27.gene.cpm_2.xlsx')

    相关文章

      网友评论

          本文标题:计算ChIP-seq的CPM值

          本文链接:https://www.haomeiwen.com/subject/xnuitdtx.html