美文网首页ChIP-seq
2020-04-09重新学习Y叔的ChIPseeker系列

2020-04-09重新学习Y叔的ChIPseeker系列

作者: 程凉皮儿 | 来源:发表于2020-04-09 20:16 被阅读0次

    参考学习资料

    1.加载需要使用的R包

    rm(list = ls())
    library(ChIPseeker)
    library(ggplot2)
    library(dplyr)
    library(Vennerable)
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
    

    2.获取示例数据

    2.1.covplot,peakHeatmap,plotAvgProf2,upsetplot & vennpie

    参数annoDb = "org.Hs.eg.db"使结果更有可读性。

    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
    files <- getSampleFiles()
    peak <- readPeakFile(files[[4]])
    promoter <- getPromoters(TxDb=txdb, 
                             upstream=3000, downstream=3000)
    tagMatrix <- getTagMatrix(peak, 
                              windows=promoter)
    tagHeatmap(tagMatrix, xlim=c(-3000, 3000), 
               color="red")
    
    image.png
    # peakHeatmap(files, weightCol="V5", TxDb=txdb, 
    #             upstream=3000, downstream=3000, 
    #             color=rainbow(length(files)))
    covplot(files[[4]])
    
    image.png
    head(files)
    #> $ARmo_0M
    #> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174480_ARmo_0M_peaks.bed.gz"
    #> 
    #> $ARmo_1nM
    #> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174481_ARmo_1nM_peaks.bed.gz"
    #> 
    #> $ARmo_100nM
    #> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174482_ARmo_100nM_peaks.bed.gz"
    #> 
    #> $CBX6_BF
    #> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295076_CBX6_BF_ChipSeq_mergedReps_peaks.bed.gz"
    #> 
    #> $CBX7_BF
    #> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295077_CBX7_BF_ChipSeq_mergedReps_peaks.bed.gz"
    peak=GenomicRanges::GRangesList(CBX6=readPeakFile(files[[4]]),CBX7=readPeakFile(files[[5]]))
    covplot(peak, weightCol="V5") + facet_grid(chr ~ .id)
    #> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
    #> Warning in bind_rows_(x, .id): binding character and factor vector,
    #> coercing into character vector
    
    #> Warning in bind_rows_(x, .id): binding character and factor vector,
    #> coercing into character vector
    
    #> Warning in bind_rows_(x, .id): binding character and factor vector,
    #> coercing into character vector
    
    image.png
    covplot(peak, weightCol="V5", chrs=c("chr17", "chr18"), xlim=c(4e7, 5e7)) + facet_grid(chr ~ .id)
    #> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
    
    #> Warning in bind_rows_(x, .id): binding character and factor vector,
    #> coercing into character vector
    
    #> Warning in bind_rows_(x, .id): binding character and factor vector,
    #> coercing into character vector
    
    #> Warning in bind_rows_(x, .id): binding character and factor vector,
    #> coercing into character vector
    
    image.png
    
    plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
                xlab="Genomic Region (5'->3')", 
                ylab = "Read Count Frequency")
    
    image.png
    plotAvgProf2(files[[4]], TxDb=txdb, 
                 upstream=3000, downstream=3000,
                 xlab="Genomic Region (5'->3')", 
                 ylab = "Read Count Frequency")
    #> >> preparing promoter regions...  2020-04-09 20:05:39 
    #> >> preparing tag matrix...        2020-04-09 20:05:39 
    #> >> plotting figure...             2020-04-09 20:05:48
    
    image.png
    plotAvgProf(tagMatrix, xlim=c(-3000, 3000), 
                conf = 0.95, resample = 1000)
    #> >> Running bootstrapping for tag matrix...        2020-04-09 20:05:56
    
    image.png
    tagMatrixList <- lapply(files, getTagMatrix, 
                            windows=promoter)
    plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))
    
    image.png
    plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), 
                conf=0.95,resample=500, facet="row")
    #> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:49 
    #> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:51 
    #> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:54 
    #> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:59 
    #> >> Running bootstrapping for tag matrix...        2020-04-09 20:07:07
    
    image.png
    peakAnno <- annotatePeak(files[[4]], 
                             tssRegion=c(-3000, 3000),
                             TxDb=txdb, annoDb="org.Hs.eg.db")
    #> >> loading peak file...               2020-04-09 20:07:10 
    #> >> preparing features information...      2020-04-09 20:07:10 
    #> >> identifying nearest features...        2020-04-09 20:07:10 
    #> >> calculating distance from peak to TSS...   2020-04-09 20:07:11 
    #> >> assigning genomic annotation...        2020-04-09 20:07:11 
    #> >> adding gene annotation...          2020-04-09 20:07:32 
    #> >> assigning chromosome lengths           2020-04-09 20:07:32 
    #> >> done...                    2020-04-09 20:07:32
    plotAnnoPie(peakAnno)
    
    image.png
    plotAnnoBar(peakAnno)
    
    image.png
    vennpie(peakAnno)
    upsetplot(peakAnno)
    upsetplot(peakAnno, vennpie=TRUE)
    
    image.png image.png
    plotDistToTSS(peakAnno,
                  title="Distribution of transcription factor-binding loci\nrelative to TSS")
    
    image.png
    peakAnnoList <- lapply(files, annotatePeak, 
                           TxDb=txdb,tssRegion=c(-3000, 3000))
    #> >> loading peak file...               2020-04-09 20:07:35 
    #> >> preparing features information...      2020-04-09 20:07:35 
    #> >> identifying nearest features...        2020-04-09 20:07:35 
    #> >> calculating distance from peak to TSS...   2020-04-09 20:07:35 
    #> >> assigning genomic annotation...        2020-04-09 20:07:35 
    #> >> assigning chromosome lengths           2020-04-09 20:07:38 
    #> >> done...                    2020-04-09 20:07:38 
    #> >> loading peak file...               2020-04-09 20:07:38 
    #> >> preparing features information...      2020-04-09 20:07:38 
    #> >> identifying nearest features...        2020-04-09 20:07:38 
    #> >> calculating distance from peak to TSS...   2020-04-09 20:07:38 
    #> >> assigning genomic annotation...        2020-04-09 20:07:38 
    #> >> assigning chromosome lengths           2020-04-09 20:07:40 
    #> >> done...                    2020-04-09 20:07:40 
    #> >> loading peak file...               2020-04-09 20:07:40 
    #> >> preparing features information...      2020-04-09 20:07:40 
    #> >> identifying nearest features...        2020-04-09 20:07:40 
    #> >> calculating distance from peak to TSS...   2020-04-09 20:07:41 
    #> >> assigning genomic annotation...        2020-04-09 20:07:41 
    #> >> assigning chromosome lengths           2020-04-09 20:07:43 
    #> >> done...                    2020-04-09 20:07:43 
    #> >> loading peak file...               2020-04-09 20:07:43 
    #> >> preparing features information...      2020-04-09 20:07:43 
    #> >> identifying nearest features...        2020-04-09 20:07:43 
    #> >> calculating distance from peak to TSS...   2020-04-09 20:07:43 
    #> >> assigning genomic annotation...        2020-04-09 20:07:43 
    #> >> assigning chromosome lengths           2020-04-09 20:07:47 
    #> >> done...                    2020-04-09 20:07:47 
    #> >> loading peak file...               2020-04-09 20:07:47 
    #> >> preparing features information...      2020-04-09 20:07:48 
    #> >> identifying nearest features...        2020-04-09 20:07:48 
    #> >> calculating distance from peak to TSS...   2020-04-09 20:07:50 
    #> >> assigning genomic annotation...        2020-04-09 20:07:50 
    #> >> assigning chromosome lengths           2020-04-09 20:07:52 
    #> >> done...                    2020-04-09 20:07:52
    plotAnnoBar(peakAnnoList)
    
    image.png
    plotDistToTSS(peakAnnoList)              
    
    image.png
    genes <- lapply(peakAnnoList, function(i) 
      as.data.frame(i)$geneId)
    
    vennplot(genes[2:4], by='Vennerable')
    
    image.png
    
    f = getSampleFiles()[[4]]
    #x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb)
    #x
    x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb,annoDb = "org.Hs.eg.db",
                     addFlankGeneInfo=TRUE, flankDistance=5000)
    #> >> loading peak file...               2020-04-09 20:07:55 
    #> >> preparing features information...      2020-04-09 20:07:55 
    #> >> identifying nearest features...        2020-04-09 20:07:55 
    #> >> calculating distance from peak to TSS...   2020-04-09 20:07:55 
    #> >> assigning genomic annotation...        2020-04-09 20:07:55 
    #> >> adding gene annotation...          2020-04-09 20:07:57 
    #> >> adding flank feature information from peaks...     2020-04-09 20:07:57 
    #> >> assigning chromosome lengths           2020-04-09 20:07:58 
    #> >> done...                    2020-04-09 20:07:58
    as.GRanges(x) %>% head(3)
    #> GRanges object with 3 ranges and 17 metadata columns:
    #>       seqnames          ranges strand |          V4        V5
    #>          <Rle>       <IRanges>  <Rle> |    <factor> <numeric>
    #>   [1]     chr1   815093-817883      * | MACS_peak_1    295.76
    #>   [2]     chr1 1243288-1244338      * | MACS_peak_2     63.19
    #>   [3]     chr1 2979977-2981228      * | MACS_peak_3    100.16
    #>              annotation   geneChr geneStart   geneEnd geneLength
    #>             <character> <integer> <integer> <integer>  <integer>
    #>   [1] Distal Intergenic         1    803451    812182       8732
    #>   [2]          Promoter         1   1243994   1247057       3064
    #>   [3]          Promoter         1   2976181   2980350       4170
    #>       geneStrand      geneId transcriptId distanceToTSS         ENSEMBL
    #>        <integer> <character>  <character>     <numeric>     <character>
    #>   [1]          2      284593   uc001abt.4         -2911 ENSG00000230368
    #>   [2]          1      126789   uc001aed.3             0 ENSG00000169972
    #>   [3]          2      440556   uc001aka.3             0 ENSG00000177133
    #>            SYMBOL                                    GENENAME
    #>       <character>                                 <character>
    #>   [1]      FAM41C family with sequence similarity 41 member C
    #>   [2]       PUSL1               pseudouridine synthase like 1
    #>   [3]   PRDM16-DT                 PRDM16 divergent transcript
    #>                                                                                                                                                                flank_txIds
    #>                                                                                                                                                                <character>
    #>   [1]                                                                                                                                                           uc001abt.4
    #>   [2] uc001aea.2;uc001aeb.2;uc001aec.1;uc001aed.3;uc010nyi.2;uc009vjx.3;uc009vjy.1;uc001aee.2;uc001aef.2;uc001aeg.2;uc001aeh.2;uc001aei.2;uc001aek.2;uc009vjz.2;uc010nyj.2
    #>   [3]                                                                                                    uc001aka.3;uc010nzg.1;uc001akc.3;uc001ake.3;uc001akf.3;uc009vlh.3
    #>                                                                                         flank_geneIds
    #>                                                                                           <character>
    #>   [1]                                                                                          284593
    #>   [2] 116983;116983;116983;126789;126789;126789;54973;54973;54973;54973;54973;54973;54973;54973;54973
    #>   [3]                                                           440556;440556;63976;63976;63976;63976
    #>                                                            flank_gene_distances
    #>                                                                     <character>
    #>   [1]                                                                     -2911
    #>   [2] -1979;-19;0;0;0;-128;8492;15729;15729;15729;15729;15729;15729;15729;15729
    #>   [3]                                               0;0;-4514;-4514;-4514;-4514
    #>   -------
    #>   seqinfo: 24 sequences from hg19 genome
    

    2.2 组学间基因富集自定义分析

    compareCluster:可对分簇后基因做富集分析可视化

    library(ChIPseeker)
    library(clusterProfiler)
    #> Warning: package 'clusterProfiler' was built under R version 3.6.2
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
    
    bedfile=getSampleFiles()
    seq=lapply(bedfile, readPeakFile)
    genes=lapply(seq, function(i) 
      seq2gene(i, c(-1000, 3000), 3000, TxDb=txdb))
    
    cc = compareCluster(geneClusters = genes, 
                        fun="enrichKEGG", organism="hsa")
    
    dotplot(cc, showCategory=10)
    
    image.png

    后面可以自行生成bed文件生成Grange对象,或geneClusters进行富集分析。

    相关文章

      网友评论

        本文标题:2020-04-09重新学习Y叔的ChIPseeker系列

        本文链接:https://www.haomeiwen.com/subject/pxehmhtx.html