参考学习资料
- CS0: ChIPseq从入门到放弃
- CS1: ChIPseq简介
- CS2: BED文件
- CS3: peak注释
- CS4:关于ChIPseq注释的几个问题
- CS5: 吃着火锅,唱着歌,还把分析给做了
- CS6: ChIPseeker的可视化方法(中秋节的视觉饕餮)
- CS7:Genomic coordination的富集性分析(1)
- CS8:Genomic coordination的富集性分析(2)
- CS9: GEO数据挖掘
- CS10: 八卦终结版
1.加载需要使用的R包
rm(list = ls())
library(ChIPseeker)
library(ggplot2)
library(dplyr)
library(Vennerable)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
2.获取示例数据
2.1.covplot,peakHeatmap,plotAvgProf2,upsetplot & vennpie
参数annoDb = "org.Hs.eg.db"
使结果更有可读性。
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
files <- getSampleFiles()
peak <- readPeakFile(files[[4]])
promoter <- getPromoters(TxDb=txdb,
upstream=3000, downstream=3000)
tagMatrix <- getTagMatrix(peak,
windows=promoter)
tagHeatmap(tagMatrix, xlim=c(-3000, 3000),
color="red")
image.png
# peakHeatmap(files, weightCol="V5", TxDb=txdb,
# upstream=3000, downstream=3000,
# color=rainbow(length(files)))
covplot(files[[4]])
image.png
head(files)
#> $ARmo_0M
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174480_ARmo_0M_peaks.bed.gz"
#>
#> $ARmo_1nM
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174481_ARmo_1nM_peaks.bed.gz"
#>
#> $ARmo_100nM
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174482_ARmo_100nM_peaks.bed.gz"
#>
#> $CBX6_BF
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295076_CBX6_BF_ChipSeq_mergedReps_peaks.bed.gz"
#>
#> $CBX7_BF
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295077_CBX7_BF_ChipSeq_mergedReps_peaks.bed.gz"
peak=GenomicRanges::GRangesList(CBX6=readPeakFile(files[[4]]),CBX7=readPeakFile(files[[5]]))
covplot(peak, weightCol="V5") + facet_grid(chr ~ .id)
#> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
image.png
covplot(peak, weightCol="V5", chrs=c("chr17", "chr18"), xlim=c(4e7, 5e7)) + facet_grid(chr ~ .id)
#> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
image.png
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
xlab="Genomic Region (5'->3')",
ylab = "Read Count Frequency")
image.png
plotAvgProf2(files[[4]], TxDb=txdb,
upstream=3000, downstream=3000,
xlab="Genomic Region (5'->3')",
ylab = "Read Count Frequency")
#> >> preparing promoter regions... 2020-04-09 20:05:39
#> >> preparing tag matrix... 2020-04-09 20:05:39
#> >> plotting figure... 2020-04-09 20:05:48
image.png
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
conf = 0.95, resample = 1000)
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:05:56
image.png
tagMatrixList <- lapply(files, getTagMatrix,
windows=promoter)
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))
image.png
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000),
conf=0.95,resample=500, facet="row")
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:49
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:51
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:54
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:59
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:07:07
image.png
peakAnno <- annotatePeak(files[[4]],
tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Hs.eg.db")
#> >> loading peak file... 2020-04-09 20:07:10
#> >> preparing features information... 2020-04-09 20:07:10
#> >> identifying nearest features... 2020-04-09 20:07:10
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:11
#> >> assigning genomic annotation... 2020-04-09 20:07:11
#> >> adding gene annotation... 2020-04-09 20:07:32
#> >> assigning chromosome lengths 2020-04-09 20:07:32
#> >> done... 2020-04-09 20:07:32
plotAnnoPie(peakAnno)
image.png
plotAnnoBar(peakAnno)
image.png
vennpie(peakAnno)
upsetplot(peakAnno)
upsetplot(peakAnno, vennpie=TRUE)
image.png
image.png
plotDistToTSS(peakAnno,
title="Distribution of transcription factor-binding loci\nrelative to TSS")
image.png
peakAnnoList <- lapply(files, annotatePeak,
TxDb=txdb,tssRegion=c(-3000, 3000))
#> >> loading peak file... 2020-04-09 20:07:35
#> >> preparing features information... 2020-04-09 20:07:35
#> >> identifying nearest features... 2020-04-09 20:07:35
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:35
#> >> assigning genomic annotation... 2020-04-09 20:07:35
#> >> assigning chromosome lengths 2020-04-09 20:07:38
#> >> done... 2020-04-09 20:07:38
#> >> loading peak file... 2020-04-09 20:07:38
#> >> preparing features information... 2020-04-09 20:07:38
#> >> identifying nearest features... 2020-04-09 20:07:38
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:38
#> >> assigning genomic annotation... 2020-04-09 20:07:38
#> >> assigning chromosome lengths 2020-04-09 20:07:40
#> >> done... 2020-04-09 20:07:40
#> >> loading peak file... 2020-04-09 20:07:40
#> >> preparing features information... 2020-04-09 20:07:40
#> >> identifying nearest features... 2020-04-09 20:07:40
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:41
#> >> assigning genomic annotation... 2020-04-09 20:07:41
#> >> assigning chromosome lengths 2020-04-09 20:07:43
#> >> done... 2020-04-09 20:07:43
#> >> loading peak file... 2020-04-09 20:07:43
#> >> preparing features information... 2020-04-09 20:07:43
#> >> identifying nearest features... 2020-04-09 20:07:43
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:43
#> >> assigning genomic annotation... 2020-04-09 20:07:43
#> >> assigning chromosome lengths 2020-04-09 20:07:47
#> >> done... 2020-04-09 20:07:47
#> >> loading peak file... 2020-04-09 20:07:47
#> >> preparing features information... 2020-04-09 20:07:48
#> >> identifying nearest features... 2020-04-09 20:07:48
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:50
#> >> assigning genomic annotation... 2020-04-09 20:07:50
#> >> assigning chromosome lengths 2020-04-09 20:07:52
#> >> done... 2020-04-09 20:07:52
plotAnnoBar(peakAnnoList)
image.png
plotDistToTSS(peakAnnoList)
image.png
genes <- lapply(peakAnnoList, function(i)
as.data.frame(i)$geneId)
vennplot(genes[2:4], by='Vennerable')
image.png
f = getSampleFiles()[[4]]
#x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb)
#x
x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb,annoDb = "org.Hs.eg.db",
addFlankGeneInfo=TRUE, flankDistance=5000)
#> >> loading peak file... 2020-04-09 20:07:55
#> >> preparing features information... 2020-04-09 20:07:55
#> >> identifying nearest features... 2020-04-09 20:07:55
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:55
#> >> assigning genomic annotation... 2020-04-09 20:07:55
#> >> adding gene annotation... 2020-04-09 20:07:57
#> >> adding flank feature information from peaks... 2020-04-09 20:07:57
#> >> assigning chromosome lengths 2020-04-09 20:07:58
#> >> done... 2020-04-09 20:07:58
as.GRanges(x) %>% head(3)
#> GRanges object with 3 ranges and 17 metadata columns:
#> seqnames ranges strand | V4 V5
#> <Rle> <IRanges> <Rle> | <factor> <numeric>
#> [1] chr1 815093-817883 * | MACS_peak_1 295.76
#> [2] chr1 1243288-1244338 * | MACS_peak_2 63.19
#> [3] chr1 2979977-2981228 * | MACS_peak_3 100.16
#> annotation geneChr geneStart geneEnd geneLength
#> <character> <integer> <integer> <integer> <integer>
#> [1] Distal Intergenic 1 803451 812182 8732
#> [2] Promoter 1 1243994 1247057 3064
#> [3] Promoter 1 2976181 2980350 4170
#> geneStrand geneId transcriptId distanceToTSS ENSEMBL
#> <integer> <character> <character> <numeric> <character>
#> [1] 2 284593 uc001abt.4 -2911 ENSG00000230368
#> [2] 1 126789 uc001aed.3 0 ENSG00000169972
#> [3] 2 440556 uc001aka.3 0 ENSG00000177133
#> SYMBOL GENENAME
#> <character> <character>
#> [1] FAM41C family with sequence similarity 41 member C
#> [2] PUSL1 pseudouridine synthase like 1
#> [3] PRDM16-DT PRDM16 divergent transcript
#> flank_txIds
#> <character>
#> [1] uc001abt.4
#> [2] uc001aea.2;uc001aeb.2;uc001aec.1;uc001aed.3;uc010nyi.2;uc009vjx.3;uc009vjy.1;uc001aee.2;uc001aef.2;uc001aeg.2;uc001aeh.2;uc001aei.2;uc001aek.2;uc009vjz.2;uc010nyj.2
#> [3] uc001aka.3;uc010nzg.1;uc001akc.3;uc001ake.3;uc001akf.3;uc009vlh.3
#> flank_geneIds
#> <character>
#> [1] 284593
#> [2] 116983;116983;116983;126789;126789;126789;54973;54973;54973;54973;54973;54973;54973;54973;54973
#> [3] 440556;440556;63976;63976;63976;63976
#> flank_gene_distances
#> <character>
#> [1] -2911
#> [2] -1979;-19;0;0;0;-128;8492;15729;15729;15729;15729;15729;15729;15729;15729
#> [3] 0;0;-4514;-4514;-4514;-4514
#> -------
#> seqinfo: 24 sequences from hg19 genome
2.2 组学间基因富集自定义分析
compareCluster:可对分簇后基因做富集分析可视化
library(ChIPseeker)
library(clusterProfiler)
#> Warning: package 'clusterProfiler' was built under R version 3.6.2
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
bedfile=getSampleFiles()
seq=lapply(bedfile, readPeakFile)
genes=lapply(seq, function(i)
seq2gene(i, c(-1000, 3000), 3000, TxDb=txdb))
cc = compareCluster(geneClusters = genes,
fun="enrichKEGG", organism="hsa")
dotplot(cc, showCategory=10)
image.png
后面可以自行生成bed文件生成Grange对象,或geneClusters进行富集分析。
网友评论