#!/bin/sh
#PBS -N DL_FQ
#PBS -l nodes=1:ppn=16
#PBS -o /home/omics2017/hx_du/test1.log
#PBS -e /home/omics2017/hx_du/test1.err
#PBS -q batch
#PBS -l walltime=120:00:00
wkdir=/bios-store2/hx_du/fq_td/HBB300K_200501/200501/PE
src=/home/omics2017/hx_du/program
ref=/bios-store2/hx_du/NIPT/reference
bedfile=/bios-store2/hx_du/HBB300K_200327/test20200331/all_target_segments_covered_by_probes_HBB_v1_hg19_191108105530.bed
cd ${wkdir}
samples=$(for i in $(ls *_R1*.fastq.gz);do echo "${i%%_*}";done)
for ID in $samples
do
echo $ID
if [ -f ${ID}*R2*.fastq.gz ];then
echo ${ID} "R2 exist"
java -jar $src/Trimmomatic-0.38/trimmomatic-0.38.jar PE -threads 16 $wkdir/${ID}*R1*.fastq.gz $wkdir/${ID}*R2*.fastq.gz ${ID}_1_paired.fq.gz ${ID}_1_unpaired.fq.gz ${ID}_2_paired.fq.gz ${ID}_2_unpaired.fq.gz LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
# PE ===========================
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}_1_paired.fq.gz ${ID}_2_paired.fq.gz > ${ID}_pe.sam
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}_1_unpaired.fq.gz > ${ID}_s1.sam
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}_2_unpaired.fq.gz > ${ID}_s2.sam
picard MergeSamFiles \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}_pe.sam \
INPUT=${ID}_s1.sam \
INPUT=${ID}_s2.sam \
OUTPUT=${ID}.bam
rm ${ID}*.sam
# #===========================
else
echo ${ID} "R2 noexist"
java -jar $src/Trimmomatic-0.38/trimmomatic-0.38.jar SE -threads 16 $wkdir/${ID}*R1*.fastq.gz ${ID}.trimmed.fq.gz LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
$src/bwa-0.7.17/bwa mem -t 16 -M $ref/genome.fa ${ID}.trimmed.fq.gz > ${ID}_s1.sam
picard MergeSamFiles \
INPUT=${ID}_s1.sam \
OUTPUT=${ID}.bam
rm ${ID}*.sam
fi
picard AddOrReplaceReadGroups \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}.bam \
OUTPUT=${ID}.add.bam \
RGID=group1 RGLB=lib1 RGPL=illumina RGPU=unit1 RGSM=${ID}
picard SortSam \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}.add.bam \
OUTPUT=${ID}.add.sort.bam \
SORT_ORDER=coordinate
picard MarkDuplicates \
INPUT=${ID}.add.sort.bam \
OUTPUT=${ID}.add.dedup.bam \
METRICS_FILE=metrics.txt \
REMOVE_DUPLICATES=true
picard BuildBamIndex \
TMP_DIR=${wkdir}/tmp \
INPUT=${ID}.add.dedup.bam
# # Not essential ===========================
gatk BaseRecalibrator \
-R $ref/genome.fa \
-I ${ID}.add.dedup.bam \
--known-sites $ref/hg19/dbsnp_138.hg19.vcf \
--known-sites $ref/hg19/1000G_phase1.snps.high_confidence.hg19.sites.vcf \
-O ${ID}.recal.grp \
-L $bedfile
gatk ApplyBQSR \
-R $ref/genome.fa \
-I ${ID}.add.dedup.bam \
-bqsr ${ID}.recal.grp \
-O ${ID}.recal.bam \
-L $bedfile
# # ===========================
# # Singel Sample ===========================
gatk HaplotypeCaller \
-R $ref/genome.fa \
-I ${ID}.recal.bam \
-O ${ID}.raw.g.vcf \
-ERC GVCF \
-L $bedfile
# # =========================================
# # Multiple Sample ===========================
gatk GenotypeGVCFs \
-R $ref/genome.fa \
-V ${ID}.raw.g.vcf \
--include-non-variant-sites true \
-O ${ID}_genotype.vcf \
-L $bedfile
gatk SelectVariants \
-R $ref/genome.fa \
-V ${ID}_genotype.vcf \
--select-type-to-include SNP \
--select-type-to-include NO_VARIATION \
-O ${ID}_raw_snps.vcf \
-L $bedfile
#===========================
gatk VariantFiltration \
-V ${ID}_raw_snps.vcf \
--filter-expression "QD < 2.0 || FS > 200.0 || SOR > 10.0 || MQRankSum < -12.5 || ReadPosRankSum < -8.0 || DP < 10" \
--filter-name "Filter" \
-O ${ID}.filter.vcf
cat ${ID}.filter.vcf | grep -v '\./\.' > ${ID}.filter.gt.vcf
done
网友评论