1 ChIP-Seq分析结果：peaks相关性绘图

第五步，ChIP-Seq-correlation。

ChIP-Seq和RNA-Seq的.bw文件都可以放在IGV进行可视化比较，但是要说明ChIP-Seq数据的相关性就需要另外的处理，一般是deeptools。

因为ChIP-Seq数据无法通过基因为单位进行定量。

这个程序是以KB为长度进行定量。

multiBigwigSummary  bins -b  *WT*bw  -o wt_results.npz -p 8

##绘制热图
plotCorrelation -in results.npz \
--corMethod spearman -- skipZeros \
--plotTitle "Spearman Correlation of Read Counts" \
--whatToPlot heatmap --colorMap RdYlBu --plotNumbers \
--plotFileFormat pdf \
-o heatmap_SpearmanCorr_readCounts.pdf \
--outFileCorMatrix SpearmanCorr_readCounts.tab

结果图，不是很好看。

QQ截图20220824155531.png

我们把结果.tab文件导入R进行绘图。

rm(list = ls())
options(stringsAsFactors = F)
a=read.table('SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)
library(stringr)
ac=data.frame(group=str_split(rownames(a),'_',simplify = T)[,1])
rownames(ac)=colnames(a)
M=a
pheatmap::pheatmap(M,
                   annotation_col = ac) 
 

a=read.table('merge_SpearmanCorr_readCounts.tab')
pheatmap::pheatmap(a)

QQ截图20220824155915.png

QQ截图20220824155926.png

QQ截图20220824161141.png

2 ChIPQC包的使用

第六步， ChIPQC。

library(ChIPQC)
samples = read.csv(file.path(system.file("extdata", package="ChIPQC"),
                             "example_QCexperiment.csv"))
samples

##自带的example没有附带数据，所以运行不了
exampleExp = ChIPQC(samples,annotaiton="hg19")
QCmetrics(exampleExp)  #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)

##表格需要以下内容（.bam文件）

# SampleID: 样本ID
# Tissue, Factor, Condition: 不同的实验设计对照信息
# 三列信息必须包含在sampleSheet里，如果没有某一列的信息设为NA。
# Replicate : 重复样本的编号
# bamReads : 实验组BAM 文件的路径（data/bams）
# ControlID : 对照组样本ID
# bamControl :对照组样本的bam文件路径
# Peaks :样本peaks文件的路径
# PeakCaller ：peak类型的字符串，可以是raw,bed,narrow,macs等。

(bams=list.files(path = '~/fly/CHIP-SEQ/align/',
                 pattern = '*.raw.bam$', full.names = T))
bams=bams[grepl('WT',bams)]
peaks=list.files(path = '~/fly/CHIP-SEQ/peaks-single/',pattern = '*_raw_peaks.narrowPeak', full.names = T)
peaks=peaks[grepl('WT',peaks)]


library(stringr)
SampleID=sub('.raw.bam','',basename(bams))
Replicate=str_split(basename(bams),'_',simplify = T)[,3]
Factor=str_split(basename(bams),'_',simplify = T)[,1]


samples=data.frame(SampleID=SampleID,
                   Tissue='WT', 
                   Factor=Factor, 
                   Replicate=1,            
                   bamReads=bams,                           
                   Peaks=peaks) 

exampleExp = ChIPQC(samples,
                    chromosomes='2L',##只看了一条染色体
                    annotaiton="dm6")
QCmetrics(exampleExp)  #shows a summary of the main QC metrics
ChIPQCreport(exampleExp)

运行成功会出现一个报表。

QQ截图20220824201538.png

QQ截图20220824201554.png
第七步，peaks-anno-dm6，自己走的流程，ChIP-Seq的peaks进行注释。

##anno-for-each-bed

bedPeaksFile        = 'Ez_SppsKO_1_raw_peaks.narrowPeak'; 
bedPeaksFile
## loading packages
require(ChIPseeker)
# BiocManager::install("TxDb.Dmelanogaster.UCSC.dm6.ensGene")
# BiocManager::install("org.Dm.eg.db")
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)##这里使用的参考基因组与文章中不一样，dm6
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler) 
peak <- readPeakFile( bedPeaksFile ) 
seqlevels(peak)
keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')##过滤要看的染色体
seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))##这一步很关键，TxDb的染色体带chr开头，而数据原本不带chr开头
seqlevels(peak)
cat(paste0('there are ',length(peak),' peaks for this data' ))
peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000), 
                         TxDb=txdb, annoDb="org.Dm.eg.db") 
peakAnno_df <- as.data.frame(peakAnno)
sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
print(sampleName)
plotAnnoPie(peakAnno) 
plotAnnoBar(peakAnno)

QQ截图20220824201033.png

QQ截图20220824201041.png

##anno-for-all-peaks

require(ChIPseeker) 
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
require(clusterProfiler)  
anno_bed <- function(bedPeaksFile){
  peak <- readPeakFile( bedPeaksFile )  
  seqlevels(peak)
  keepChr= seqlevels(peak) %in% c('2L','2R','3L','3R','4','X','Y','M')
  seqlevels(peak, pruning.mode="coarse") <- seqlevels(peak)[keepChr]
  seqlevels(peak, pruning.mode="coarse") <- paste0('chr',seqlevels(peak))
  seqlevels(peak)
  cat(paste0('there are ',length(peak),' peaks for this data' ))
  peakAnno <- annotatePeak(peak, tssRegion=c(-3000, 3000), 
                           TxDb=txdb, annoDb="org.Dm.eg.db") 
  peakAnno_df <- as.data.frame(peakAnno)
  sampleName=basename(strsplit(bedPeaksFile,'\\.')[[1]][1])
  return(peakAnno_df)
}

f=list.files(path = './',pattern = 'WT',full.names = T)

> f
[1] "./Pc_WT_peaks.narrowPeak"   "./Ph_WT_peaks.narrowPeak"  
[3] "./Pho_WT_peaks.narrowPeak"  "./Psc_WT_peaks.narrowPeak" 
[5] "./Spps_WT_peaks.narrowPeak"

tmp = lapply(f, anno_bed)
head(tmp[[1]])
df=do.call(rbind,lapply(tmp, function(x){
  #table(x$annotation)
  num1=length(grep('Promoter',x$annotation))
  num2=length(grep("5' UTR",x$annotation))
  num3=length(grep('Exon',x$annotation))
  num4=length(grep('Intron',x$annotation))
  num5=length(grep("3' UTR",x$annotation))
  num6=length(grep('Intergenic',x$annotation))
  return(c(num1,num2,num3,num4,num5,num6 ))
}))

colnames(df)=c('Promoter',"5' UTR",'Exon','Intron',"3' UTR",'Intergenic')
rownames(df)=unlist(lapply(f, function(x){ 
  (strsplit(basename(x),'\\.')[[1]][1])##这里的顺序很重要，先对x路径名取basename，再strsplit
}))
library(reshape2)
df2=melt(apply(df,1,function(x) x/sum(x)))
colnames(df2)=c('dis','sample','fraction')
library(ggpubr)
ggbarplot(df2, "sample", "fraction",
          fill = "dis", color = "dis", palette = "jco"  )