1. 组蛋白分析流程ChIP-seq
mkdir 1.rawdata 2.cleandata 3.index 4.mapping 5.macs2_q0.05
workdir="/home/ypjia/6.data6/Bna_PaperMP2021_from-glli/1.ChIP-Seq_Out"
###### rawdata Q20_Q30_CG
cd ${workdir}/1.rawdata
for id in `cat ../Sample`;do
~/biosoft/reads_Q20_Q30_CG/fastq_stat ${id}_1.fastq.gz | awk '{print \$1\"\t\"\$2\"\t\"\$3\"\t\"\$4\"\t\"\$5}' > Out_${id}_1;
~/biosoft/reads_Q20_Q30_CG/fastq_stat ${id}_2.fastq.gz | awk '{print \$1\"\t\"\$2\"\t\"\$3\"\t\"\$4\"\t\"\$5}' > Out_${id}_2;
done
for id in `cat ../Sample`;do
cat Out_${id}_1 Out_${id}_2 |awk -F'\t' '{ for (i=1; i<=NF; i++) sum[i]+=$i } END { for (i=1; i<=NF; i++) print sum[i] }'|tr '\n' '\t'|awk '{print $0}' > Result_${id};
done
for id in `cat ../Sample`;do
cat Result_${id} >> reads_Q20_Q30_CG
rm Out_${id}_1 Out_${id}_2 Result_${id}
done
###### cleandata
cd ${workdir}/2.cleandata
for id in `cat ../Sample`; do
/home/ypjia/biosoft/fastp/fastp -i ${workdir}/1.rawdata/${id}_1.fastq.gz -o ./${id}_1.fq.gz -I ${workdir}/1.rawdata/${id}_2.fastq.gz -O ./${id}_2.fq.gz
done
###### Bowtie2 index
cd ${workdir}/3.index
#bowtie2-build -f /home/ypjia/5.data5/0.Public/Genome_ref/Bna_ZS11_v0/zs11.genome.fa --threads 10 ZS11_bowtie2_index
for id in `ls /home/ypjia/5.data5/0.Public/Genome_ref/Bna_ZS11_v0/bowtie2_index`;do ln -s ${id};done
###### mapping
cd ${workdir}/4.mapping
for id in `cat ../Sample`; do
module load Bowtie2/2.4.2
module load SAMtools/1.9
bowtie2 -q -p 10 --very-sensitive -X 1000 --fr -x ${workdir}/3.index/ZS11_bowtie2_index -1 ${workdir}/2.cleandata/${id}_1.fq.gz -2 ${workdir}/2.cleandata/${id}_2.fq.gz -S ${id}.sam 2> ${id}.bowtie2.log
samtools view -@ 10 -b -S -h -F 12 ${id}.sam > ${id}.bam
samtools sort -o ${id}.sorted.bam -@ 10 ${id}.bam
samtools index -b -@ 10 ${id}.sorted.bam
rm ${id}.sam ${id}.bam
java -jar /home/ypjia/biosoft/picard.jar MarkDuplicates REMOVE_DUPLICATES=true I=${id}.sorted.bam O=${id}.sorted.rmdup.bam M=${id}.sorted.rmdup.log
samtools index -b -@ 10 ${id}.sorted.rmdup.bam
done
## Summary of mapping
for id in `cat ../Sample`;do cat ${id}/${id}.bowtie2.log |tr ' ' '\t'|awk '{for(i=1; i<=NF; i++) { if ($i != "") { print $i; break; } } }'|sed -n -e '4p;5p;8p;13p;14p;1p'|tr '\n' '\t'|awk '{print $2*2+$3*2+$4*2+$5+$6"\t"$1*2}';done
for id in `cat ../Sample`;do sed -n 15p ${id}/${id}.bowtie2.log |tr ' ' '\t'|cut -f 1;done
for id in `cat ../Sample`;do sed -n 8p ${id}/${id}.sorted.rmdup.log |cut -f 7|awk '{print $0*2}';done
###### MACS2 call peak
cd ${workdir}/5.macs2_q0.0
for id in `cat ../Sample |grep -E 2063A`; do
macs2 callpeak -t ../4.mapping/${id}.sorted.rmdup.bam -c ../4.mapping/Control_leaf_2063A.sorted.rmdup.bam -q 0.05 -f BAM -g 1.01e9 -n ${id} --outdir ./ 2>> ./${id}.macs2.log; done
for id in `cat ../Sample |grep -E B409`; do
macs2 callpeak -t ../4.mapping/${id}.sorted.rmdup.bam -c ../4.mapping/Control_leaf_B409.sorted.rmdup.bam -q 0.05 -f BAM -g 1.01e9 -n ${id} --outdir ./ 2>> ./${id}.macs2.log; done
for id in `cat ../Sample`;do cat ${id}_peaks.narrowPeak |wc -l ;done
for id in `cat ../Sample`;do cat ${id}_peaks.narrowPeak |awk '{print $3-$2+1}'|awk '{sum+=$1};END{print sum}';done
## Summary of call peaks
for id in `cat ../Sample`;do cat ${id}_peaks.narrowPeak |wc -l ;done
for id in `cat ../Sample`;do cat ${id}_peaks.narrowPeak |awk '{print $3-$2+1}'|awk '{sum+=$1};END{print sum}';done
###### Other Analysis PCA
cd ${workdir}/4.mapping
source activate /home/ypjia/anaconda3/envs/deeptools
multiBamSummary bins --bamfiles flowerbud_H3K27ac_2063A_rep1.sorted.rmdup.bam flowerbud_H3K27ac_2063A_rep2.sorted.rmdup.bam flowerbud_H3K27ac_B409_rep1.sorted.rmdup.bam flowerbud_H3K27ac_B409_rep2.sorted.rmdup.bam flowerbud_H3K27me3_2063A_rep1.sorted.rmdup.bam flowerbud_H3K27me3_2063A_rep2.sorted.rmdup.bam flowerbud_H3K27me3_B409_rep1.sorted.rmdup.bam flowerbud_H3K27me3_B409_rep2.sorted.rmdup.bam flowerbud_H3K4me1_2063A_rep1.sorted.rmdup.bam flowerbud_H3K4me1_2063A_rep2.sorted.rmdup.bam flowerbud_H3K4me1_B409_rep1.sorted.rmdup.bam flowerbud_H3K4me1_B409_rep2.sorted.rmdup.bam flowerbud_H3K4me3_2063A_rep1.sorted.rmdup.bam flowerbud_H3K4me3_2063A_rep2.sorted.rmdup.bam flowerbud_H3K4me3_B409_rep1.sorted.rmdup.bam flowerbud_H3K4me3_B409_rep2.sorted.rmdup.bam flowerbud_H3K9me2_2063A_rep1.sorted.rmdup.bam flowerbud_H3K9me2_2063A_rep2.sorted.rmdup.bam flowerbud_H3K9me2_B409_rep1.sorted.rmdup.bam flowerbud_H3K9me2_B409_rep2.sorted.rmdup.bam flowerbud_RNAPII_2063A_rep1.sorted.rmdup.bam flowerbud_RNAPII_2063A_rep2.sorted.rmdup.bam flowerbud_RNAPII_B409_rep1.sorted.rmdup.bam flowerbud_RNAPII_B409_rep2.sorted.rmdup.bam leaf_H3K27ac_2063A_rep1.sorted.rmdup.bam leaf_H3K27ac_2063A_rep2.sorted.rmdup.bam leaf_H3K27ac_B409_rep1.sorted.rmdup.bam leaf_H3K27ac_B409_rep2.sorted.rmdup.bam leaf_H3K27me3_2063A_rep1.sorted.rmdup.bam leaf_H3K27me3_2063A_rep2.sorted.rmdup.bam leaf_H3K27me3_B409_rep1.sorted.rmdup.bam leaf_H3K27me3_B409_rep2.sorted.rmdup.bam leaf_H3K4me1_2063A_rep1.sorted.rmdup.bam leaf_H3K4me1_2063A_rep2.sorted.rmdup.bam leaf_H3K4me1_B409_rep1.sorted.rmdup.bam leaf_H3K4me1_B409_rep2.sorted.rmdup.bam leaf_H3K4me3_2063A_rep1.sorted.rmdup.bam leaf_H3K4me3_2063A_rep2.sorted.rmdup.bam leaf_H3K4me3_B409_rep1.sorted.rmdup.bam leaf_H3K4me3_B409_rep2.sorted.rmdup.bam leaf_H3K9me2_2063A_rep1.sorted.rmdup.bam leaf_H3K9me2_2063A_rep2.sorted.rmdup.bam leaf_H3K9me2_B409_rep1.sorted.rmdup.bam leaf_H3K9me2_B409_rep2.sorted.rmdup.bam leaf_RNAPII_2063A_rep1.sorted.rmdup.bam leaf_RNAPII_2063A_rep2.sorted.rmdup.bam leaf_RNAPII_B409_rep1.sorted.rmdup.bam leaf_RNAPII_B409_rep2.sorted.rmdup.bam root_H3K27ac_2063A_rep1.sorted.rmdup.bam root_H3K27ac_2063A_rep2.sorted.rmdup.bam root_H3K27ac_B409_rep1.sorted.rmdup.bam root_H3K27ac_B409_rep2.sorted.rmdup.bam root_H3K27me3_2063A_rep1.sorted.rmdup.bam root_H3K27me3_2063A_rep2.sorted.rmdup.bam root_H3K27me3_B409_rep1.sorted.rmdup.bam root_H3K27me3_B409_rep2.sorted.rmdup.bam root_H3K4me1_2063A_rep1.sorted.rmdup.bam root_H3K4me1_2063A_rep2.sorted.rmdup.bam root_H3K4me1_B409_rep1.sorted.rmdup.bam root_H3K4me1_B409_rep2.sorted.rmdup.bam root_H3K4me3_2063A_rep1.sorted.rmdup.bam root_H3K4me3_2063A_rep2.sorted.rmdup.bam root_H3K4me3_B409_rep1.sorted.rmdup.bam root_H3K4me3_B409_rep2.sorted.rmdup.bam root_H3K9me2_2063A_rep1.sorted.rmdup.bam root_H3K9me2_2063A_rep2.sorted.rmdup.bam root_H3K9me2_B409_rep1.sorted.rmdup.bam root_H3K9me2_B409_rep2.sorted.rmdup.bam root_RNAPII_2063A_rep1.sorted.rmdup.bam root_RNAPII_2063A_rep2.sorted.rmdup.bam root_RNAPII_B409_rep1.sorted.rmdup.bam root_RNAPII_B409_rep2.sorted.rmdup.bam silique_H3K27ac_2063A_rep1.sorted.rmdup.bam silique_H3K27ac_2063A_rep2.sorted.rmdup.bam silique_H3K27ac_B409_rep1.sorted.rmdup.bam silique_H3K27ac_B409_rep2.sorted.rmdup.bam silique_H3K27me3_2063A_rep1.sorted.rmdup.bam silique_H3K27me3_2063A_rep2.sorted.rmdup.bam silique_H3K27me3_B409_rep1.sorted.rmdup.bam silique_H3K27me3_B409_rep2.sorted.rmdup.bam silique_H3K4me1_2063A_rep1.sorted.rmdup.bam silique_H3K4me1_2063A_rep2.sorted.rmdup.bam silique_H3K4me1_B409_rep1.sorted.rmdup.bam silique_H3K4me1_B409_rep2.sorted.rmdup.bam silique_H3K4me3_2063A_rep1.sorted.rmdup.bam silique_H3K4me3_2063A_rep2.sorted.rmdup.bam silique_H3K4me3_B409_rep1.sorted.rmdup.bam silique_H3K4me3_B409_rep2.sorted.rmdup.bam silique_H3K9me2_2063A_rep1.sorted.rmdup.bam silique_H3K9me2_2063A_rep2.sorted.rmdup.bam silique_H3K9me2_B409_rep1.sorted.rmdup.bam silique_H3K9me2_B409_rep2.sorted.rmdup.bam silique_RNAPII_2063A_rep1.sorted.rmdup.bam silique_RNAPII_2063A_rep2.sorted.rmdup.bam silique_RNAPII_B409_rep1.sorted.rmdup.bam silique_RNAPII_B409_rep2.sorted.rmdup.bam --binSize 10000 --numberOfProcessors 10 --outRawCounts sample_10K.txt -o sample_10K.npz;
plotCorrelation -in sample_10K.npz --corMethod spearman --skipZeros --plotTitle "Sperman Correlation of Read Counts" --whatToPlot heatmap --colorMap RdYlBu --plotNumbers -o heatmap_SpearmanCorr.pdf --outFileCorMatrix SpearmanCorr_readCounts.tab
###### Other Analysis Phantompeakqualtools
cd ${workdir}/4.mapping
source activate /home/ypjia/anaconda3/envs/Phantompeakqualtools
for id in `cat ../Sample`; do
/home/ypjia/biosoft/phantompeakqualtools/run_spp.R -s=-100:5:500 -c=${id}.sorted.rmdup.bam -p=30 -savp -out=${id}.sorted.rmdup.bam.qual > ${id}.sorted.rmdup.bam.Rout;done
2. 甲基化流程WGBS-seq
mkdir 1.rawdata 2.cleandata 3.index 4.mapping 5.macs2_q0.05
workdir="/home/ypjia/6.data6/Bna_PaperMP2021_from-glli/1.WGBS-Seq_Out"
###### rawdata Q20_Q30_CG
cd ${workdir}/1.rawdata
for id in `cat ../Sample`;do
~/biosoft/reads_Q20_Q30_CG/fastq_stat ${id}_1.fastq.gz | awk '{print \$1\"\t\"\$2\"\t\"\$3\"\t\"\$4\"\t\"\$5}' > Out_${id}_1;
~/biosoft/reads_Q20_Q30_CG/fastq_stat ${id}_2.fastq.gz | awk '{print \$1\"\t\"\$2\"\t\"\$3\"\t\"\$4\"\t\"\$5}' > Out_${id}_2;
done
for id in `cat ../Sample`;do
cat Out_${id}_1 Out_${id}_2 |awk -F'\t' '{ for (i=1; i<=NF; i++) sum[i]+=$i } END { for (i=1; i<=NF; i++) print sum[i] }'|tr '\n' '\t'|awk '{print $0}' > Result_${id};
done
for id in `cat ../Sample`;do
cat Result_${id} >> reads_Q20_Q30_CG
rm Out_${id}_1 Out_${id}_2 Result_${id}
done
###### cleandata
cd ${workdir}/2.cleandata
for id in `cat ../Sample`; do
/home/ypjia/biosoft/fastp/fastp -i ${workdir}/1.rawdata/${id}_1.fastq.gz -o ./${id}_1.fq.gz -I ${workdir}/1.rawdata/${id}_2.fastq.gz -O ./${id}_2.fq.gz
done
###### zs11 index
cd ${workdir}/3.zs11_index
ln -s /home/ypjia/5.data5/0.Public/Genome_ref/Bna_ZS11_v0/zs11.genome.fa
ln -s /home/ypjia/5.data5/0.Public/Genome_ref/Bna_ZS11_v0/zs11.genome.fa.fai
/home/ypjia/biosoft/bismark_v0.22.1/bismark_genome_preparation --bowtie2 --verbose /home/ypjia/6.data6/Bna_PaperMP2021_from-glli/1.WGBS-Seq_Out/3.zs11_index
###### zs11 bismark
cd ${workdir}/4.zs11_bismark
for id in `cat ../Sample`;do
module load Bowtie2/2.4.2
module load Bismark/0.23.1
module load SAMtools/1.9
bismark --genome ${workdir}/3.zs11_index -1 ${workdir}/2.cleandata/${id}_1.fq.gz -2 ${workdir}/2.cleandata/${id}_2.fq.gz -p 20 -o ./ 2> ${id}_bowtie2.log;
deduplicate_bismark --bam -p ./${id}_1_bismark_bt2_pe.bam --samtools_path /home/software/bio/samtools-1.9/samtools;
bismark_methylation_extractor --multicore 20 --gzip --bedGraph --buffer_size 20G --CX --cytosine_report --genome_folder ${workdir}/3.zs11_index/ ./${id}_1_bismark_bt2_pe.deduplicated.bam 2>${id}_extracor.log;
done
###### lambda index
cd ${workdir}/5.lambda_index
/home/ypjia/biosoft/bismark_v0.22.1/bismark_genome_preparation --bowtie2 --verbose /home/ypjia/6.data6/Bna_PaperMP2021_from-glli/1.WGBS-Seq_Out/5.lambda_index
###### lambda bismark
cd ${workdir}/6.lambda_bismark
for id in `cat ../Sample`;do
module load Bowtie2/2.4.2
module load Bismark/0.23.1
module load SAMtools/1.9
bismark --genome ${workdir}/5.lambda_index -1 ${workdir}/2.cleandata/${id}_1.fq.gz -2 ${workdir}/2.cleandata/${id}_2.fq.gz -p 20 -o ./ 2> ${id}_bowtie2.log;
done
3. Hi-C流程 Juicer
mkdir fastq references
ln -s /home/ypjia/biosoft/juicer/CPU scripts
######
cd fastq
mv ${id}.1.fq.gz ${id}_R1.fastq.gz
mv ${id}.2.fq.gz ${id}_R2.fastq.gz
cd references
module load BWA/0.7.15
bwa index ./zs11.genome.fa # 参考基因组索引文件
python2.7 generate_site_positions.py DpnII ZS11 zs11.genome.fa # 酶切位点文件,我这里用DpnII
awk 'BEGIN{OFS="\t"}{print $1, $NF}' ZS11_DpnII.txt | head -n 19 > ./ZS11.sizes #染色质信息,也可以从.fai得到
###### run juicer
module load BWA/0.7.15
module load SAMtools/1.9
./scripts/juicer.sh -d /home/ypjia/4.data4/04.Others/yzq_HiC_NY7 -s DpnII -p ./references/ZS11.sizes -y ./references/ZS11_DpnII.txt -z ./references/zs11.genome.fa -D /home/ypjia/4.data4/04.Others/yzq_HiC_NY7
###### .hic后续处理略
# Hi-C其他工具:Juicer、HiC-Pro、HiCexplorer、HiCUP
# 存储格式.hic .h5 .cool等
# 可视化工具:HiCPlotter、pyGenomeTracks
# TAD鉴定 tadtools TopDom
# 3D模建工具:MOGEN
# RI、ICF、压缩指数等的计算
# 本人上述工具都使用过,此处不详细介绍,后续更多分析是将.hic的矩阵提取出来进行其他分析,比如A/B compartment划分
网友评论