建立人基因组windows,计算GC含量
#!/bin/bash
mkdir bed
(
#子shell
cd bed
genome="hg38"
# fetch chromosome size
if [ ! -f ${genome}.chrom.sizes ]; then
fetchChromSizes $genome > ${genome}.chrom.sizes
fi
binsize=200000
stepsize=40000
fasta="/home/reference/Hsapiens/UCSC/hg38/Sequence/WholeGenomeFasta/hg38.fa"
# make windows
bedtools makewindows -g ${genome}.chrom.sizes -w $binsize -s $stepsize >${genome}_binsize_${binsize}.bed
# calculate GC content
bedtools nuc -fi $fasta -bed ${genome}_binsize_${binsize}.bed | cut -f 1-3,5 > ${genome}_binsize_${binsize}_gc.bed
)
质控
DIR=/home/maosong/replication/paper/rawdata
sp="_"
suffix=".fastq.gz"
ls $DIR/*$suffix | while read line; do echo ${line%_*}; done | uniq >sample.txt
if [ ! -d trim ]; then
mkdir trim
fi
for sample in $(cat sample.txt); do
trim_galore --paired ${sample}${sp}1${suffix} ${sample}${sp}2${suffix} --gzip -j 8 -o trim
done
比对
if [ ! -d bam ]; then
mkdir bam
fi
REF=/home/reference/Hsapiens/UCSC/hg38/Sequence/BwaIndex/hg38
for line in $(cat sample.txt); do
sample=$(basename $line)
bwa mem -t 16 $REF trim/${sample}_1_val_1.fq.gz trim/${sample}_2_val_2.fq.gz | samtools sort -@ 16 -o bam/${sample}.bam -
done
过滤掉比对不上的reads
for line in $(cat sample.txt); do
sample=$(basename $line)
samtools view -b -h -F 4 bam/${sample}.bam >bam/${sample}.mapped.bam
done
合并bam, 可选操作
(
cd bam
samtools merge G1.mapped.bam SRR6491823.mapped.bam SRR6491824.mapped.bam
samtools merge MidS.mapped.bam SRR6491825.mapped.bam SRR6491826.mapped.bam
)
for sample in G1 MidS; do
bedtools coverage -a bed/${genome}_binsize_${binsize}_gc.bed -b bam/${sample}.mapped.bam > depth/${sample}.depth.txt
done
计算每个bin的counts数
for line in $(cat sample.txt); do
sample=$(basename $line)
bedtools coverage -a bed/${genome}_binsize_${binsize}_gc.bed -b bam/${sample}_mapped.bam > depth/${sample}.depth.txt
done
网友评论