100天生信-Day4
最近在做类ChIP-seq,看了一下使用ChIPseeker注释peaks的方法,保存一下代码:
install.packages('R.utils')
BiocManager::install("GenomicFeatures")
BiocManager::install("ChIPseeker")
library(R.utils)
library(GenomicFeatures)
library(ChIPseeker)
library(ggplot2)
##制作TxDb注释文件
# 下载gff3
download.file('ftp://ftp.ensemblgenomes.org/pub/plants/release-51/gff3/triticum_aestivum/Triticum_aestivum.IWGSC.51.gff3.gz',destfile = 'Ta.gff3.gz')
# 解压
R.utils::gunzip('Ta.gff3.gz')
# 制作
wheat2 <- makeTxDbFromGFF("Ta.gff3")
## 读入BED文件
setwd('path')
Ta_peaks_file <- readPeakFile('Ta_peaks.bed')
## 可视化peaks
# 染色体上分布
covplot(Ta_peaks_file)
# TSS上下游折线图
plotAvgProf2(Ta_peaks_file, TxDb=wheat2,
upstream=3000, downstream=3000,
xlab="Genomic Region (5'->3')",
ylab = "Read Count Frequency",
conf = 0.95, resample = 1000)
# TSS上下游热图
peakHeatmap(Ta_peaks_file, TxDb=wheat2,
upstream=3000, downstream=3000,
color=rainbow(length(Ta_peaks_file)))
## 注释peaks
peakAnnoList <- annotatePeak(Ta_peaks_file, tssRegion=c(-2500,2500), TxDb=wheat2, flankDistance=5000)
# 转换为dataframe方便查看
peakAnnoList_frame <- as.data.frame(peakAnnoList)
# 保存dataframe
write.csv(peakAnnoList_frame, file = 'Ta_peakAnnoList.csv')
## 可视化注释情况
# 条形图
plotDistToTSS(peakAnnoList)
# 饼图
plotAnnoPie(peakAnnoList)
参考教程:https://www.jieandze1314.com/post/cnposts/190/
https://www.jianshu.com/p/a7b6ce208f98
网友评论