这里是佳奥!初步注释了peaks,并在组件进行了比较,下一步便是用.bw文件计算相关性了。
1 ChIP-Seq分析结果:peaks相关性绘图
第五步,ChIP-Seq-correlation。
ChIP-Seq和RNA-Seq的.bw文件都可以放在IGV进行可视化比较,但是要说明ChIP-Seq数据的相关性就需要另外的处理,一般是deeptools。
因为ChIP-Seq数据无法通过基因为单位进行定量。
这个程序是以KB为长度进行定量。
multiBigwigSummary bins -b *WT*bw -o wt_results.npz -p 8
##绘制热图
plotCorrelation -in results.npz \
--corMethod spearman -- skipZeros \
--plotTitle "Spearman Correlation of Read Counts" \
--whatToPlot heatmap --colorMap RdYlBu --plotNumbers \
--plotFileFormat pdf \
-o heatmap_SpearmanCorr_readCounts.pdf \
--outFileCorMatrix SpearmanCorr_readCounts.tab
结果图,不是很好看。
QQ截图20220824155531.png
我们把结果.tab文件导入R进行绘图。
rm(list = ls())
options(stringsAsFactors = F)
a=read.table('SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
library(stringr)
ac=data.frame(group=str_split(rownames(a),'_',simplify = T)[,1])
rownames(ac)=colnames(a)
M=a
pheatmap::pheatmap(M,
annotation_col = ac)
a=read.table('merge_SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
QQ截图20220824155915.png
QQ截图20220824155926.png
QQ截图20220824161141.png
2 ChIPQC包的使用
第六步, ChIPQC。
library(ChIPQC)
samples = read.csv(file.path(system.file("extdata", package="ChIPQC"),
"example_QCexperiment.csv"))
samples
##自带的example没有附带数据,所以运行不了
exampleExp = ChIPQC(samples,annotaiton="hg19")
QCmetrics(exampleExp) #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)
##表格需要以下内容(.bam文件)
# SampleID: 样本ID
# Tissue, Factor, Condition: 不同的实验设计对照信息
# 三列信息必须包含在sampleSheet里,如果没有某一列的信息设为NA。
# Replicate : 重复样本的编号
# bamReads : 实验组BAM 文件的路径(data/bams)
# ControlID : 对照组样本ID
# bamControl :对照组样本的bam文件路径
# Peaks :样本peaks文件的路径
# PeakCaller :peak类型的字符串,可以是raw,bed,narrow,macs等。
(bams=list.files(path = '~/fly/CHIP-SEQ/align/',
pattern = '*.raw.bam$', full.names = T))
bams=bams[grepl('WT',bams)]
peaks=list.files(path = '~/fly/CHIP-SEQ/peaks-single/',pattern = '*_raw_peaks.narrowPeak', full.names = T)
peaks=peaks[grepl('WT',peaks)]
library(stringr)
SampleID=sub('.raw.bam','',basename(bams))
Replicate=str_split(basename(bams),'_',simplify = T)[,3]
Factor=str_split(basename(bams),'_',simplify = T)[,1]
samples=data.frame(SampleID=SampleID,
Tissue='WT',
Factor=Factor,
Replicate=1,
bamReads=bams,
Peaks=peaks)
exampleExp = ChIPQC(samples,
chromosomes='2L',##只看了一条染色体
annotaiton="dm6")
QCmetrics(exampleExp) #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)
运行成功会出现一个报表。
QQ截图20220824201538.png QQ截图20220824201554.png
第七步,peaks-anno-dm6,自己走的流程,ChIP-Seq的peaks进行注释。
##anno-for-each-bed
bedPeaksFile = 'Ez_SppsKO_1_raw_peaks.narrowPeak';
bedPeaksFile
## loading packages
require(ChIPseeker)
# BiocManager::install("TxDb.Dmelanogaster.UCSC.dm6.ensGene")
# BiocManager::install("org.Dm.eg.db")
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)##这里使用的参考基因组与文章中不一样,dm6
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)
peak <- readPeakFile( bedPeaksFile )
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')##过滤要看的染色体
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))##这一步很关键,TxDb的染色体带chr开头,而数据原本不带chr开头
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
print(sampleName)
plotAnnoPie(peakAnno)
plotAnnoBar(peakAnno)
QQ截图20220824201033.png
QQ截图20220824201041.png
##anno-for-all-peaks
require(ChIPseeker)
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)
anno_bed <- function(bedPeaksFile){
peak <- readPeakFile( bedPeaksFile )
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Dm.eg.db")
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
return(peakAnno_df)
}
f=list.files(path = './',pattern = 'WT',full.names = T)
> f
[1] "./Pc_WT_peaks.narrowPeak" "./Ph_WT_peaks.narrowPeak"
[3] "./Pho_WT_peaks.narrowPeak" "./Psc_WT_peaks.narrowPeak"
[5] "./Spps_WT_peaks.narrowPeak"
tmp = lapply(f, anno_bed)
head(tmp[[1]])
df=do.call(rbind,lapply(tmp, function(x){
#table(x$annotation)
num1=length(grep('Promoter',x$annotation))
num2=length(grep("5' UTR",x$annotation))
num3=length(grep('Exon',x$annotation))
num4=length(grep('Intron',x$annotation))
num5=length(grep("3' UTR",x$annotation))
num6=length(grep('Intergenic',x$annotation))
return(c(num1,num2,num3,num4,num5,num6 ))
}))
colnames(df)=c('Promoter',"5' UTR",'Exon','Intron',"3' UTR",'Intergenic')
rownames(df)=unlist(lapply(f, function(x){
(strsplit(basename(x),'\\.')[[1]][1])##这里的顺序很重要,先对x路径名取basename,再strsplit
}))
library(reshape2)
df2=melt(apply(df,1,function(x) x/sum(x)))
colnames(df2)=c('dis','sample','fraction')
library(ggpubr)
ggbarplot(df2, "sample", "fraction",
fill = "dis", color = "dis", palette = "jco" )
QQ截图20220824203918.png
主要思路就是注释。
下一篇我们绘制韦恩图。
我们下一篇再见!
网友评论