美文网首页RNASeq 数据分析
GATK's Pipeline for calling

GATK's Pipeline for calling

作者: 晨语凡心 | 来源:发表于2018-02-01 16:37 被阅读58次

    #!/usr/bin/sh

    genome=/home/chenfan/mnt/sdc/RNAseq/hs37d5.fasta

    refgene=/home/chenfan/mnt/sdc/RNAseq/hg19_Refgene.gtf

    picard=/home/chenfan/mnt/sdc/RNAseq/picard.jar

    gatk=/home/chenfan/mnt/sdc/RNAseq/GenomeAnalysisTK.jar

    knownindel=/home/chenfan/VQSR/data/Mills_and_1000G_gold_standard.indels.b37.vcf

    knownsnp=/home/chenfan/VQSR/data/1000G_phase1.snps.high_confidence.b37.vcf

    result_path=/home/chenfan/mnt/sdc/RNAseq/result

    #------------------------------STAR 2-pass alignment steps-----------------------------------------

    echo "------------------------------STAR 2-pass alignment steps-----------------------------------------"

    in_dir=${result_path}/1_bamtofastq_output

    out_dir=${result_path}/2_star_output

    cd ${out_dir}/1index

    STAR --runMode genomeGenerate --genomeDir ${out_dir}/1index --genomeFastaFiles $genome --sjdbGTFfile $refgene --runThreadN 30

    cd ${out_dir}/1pass

    STAR --genomeDir ${out_dir}/1index --readFilesIn ${in_dir}/1.fastq  ${in_dir}/2.fastq --runThreadN 30

    cd ${out_dir}/2index

    STAR --runMode genomeGenerate --genomeDir ${out_dir}/2index --genomeFastaFiles $genome --sjdbFileChrStartEnd ${out_dir}/1pass/SJ.out.tab --sjdbOverhang 154 --runThreadN 30

    cd ${out_dir}/2pass

    STAR --genomeDir ${out_dir}/2index --readFilesIn ${in_dir}/1.fastq ${in_dir}/2.fastq  --runThreadN 30

    #------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------

    echo "------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------"

    in_dir=${out_dir}

    out_dir=${result_path}/3_picard_output

    cd ${out_dir}

    java -jar $picard AddOrReplaceReadGroups I=${in_dir}/2pass/Aligned.out.sam O=${out_dir}/sort_addRG_align.bam SO=coordinate RGID=hap1 RGLB=rnaseq RGPL=illumina RGPU=runname RGSM=C20

    java -jar $picard MarkDuplicates I=${out_dir}/sort_addRG_align.bam O=${out_dir}/markdup_sort_addRG_align.bam  CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=${out_dir}/output.metrics

    #-------------------------------SplitNCigarReads---------------------------------------

    echo "------------------------------SplitNCigarReads-----------------------------------------"

    in_dir=${out_dir}

    out_dir=${result_path}/4_splitncigarReads_output

    cd ${out_dir}

    java -jar $gatk -T SplitNCigarReads -R $genome -I ${in_dir}/markdup_sort_addRG_align.bam -o ${out_dir}/split_markdup_sort_addRG_align.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS

    #-------------------------------Indel Realignment ---------------------------------------

    echo "------------------------------Indel Realignment -----------------------------------------"

    in_dir=${out_dir}

    out_dir=${result_path}/5_indelrealign_output

    cd ${out_dir}

    java -jar $gatk \

    -T RealignerTargetCreator \

    -nt 32 \

    -R $genome \

    -I ${in_dir}/split_markdup_sort_addRG_align.bam \

    -o ${out_dir}/realign_interval.list \

    -known $knownindel

    java -jar $gatk \

    -T IndelRealigner \

    -R $genome \

    -I ${in_dir}/split_markdup_sort_addRG_align.bam \

    -known $knownindel \

    -o ${out_dir}/realign_split_markdup_sort_addRG_align.bam \

    -targetIntervals ${out_dir}/realign_interval.list

    #-------------------------------Base Recalibration---------------------------------------

    echo "------------------------------Base Recalibration-----------------------------------------"

    in_dir=${out_dir}

    out_dir=${result_path}/6_bqsr_output

    cd ${out_dir}

    java -jar $gatk \

    -T BaseRecalibrator \

    -nct 32 \

    -R $genome \

    -I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \

    -knownSites $knownsnp \

    -knownSites $knownindel \

    -o ${out_dir}/recal_data.table

    java -jar $gatk \

    -T PrintReads \

    -nct 32 \

    -R $genome \

    -I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \

    -BQSR ${out_dir}/recal_data.table \

    -o ${out_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam

    #-------------------------------Variant calling---------------------------------------

    echo "------------------------------Variant calling-----------------------------------------"

    in_dir=${out_dir}

    out_dir=${result_path}/7_callvariants_output

    cd ${out_dir}

    java -jar $gatk \

    -T HaplotypeCaller \

    -nct 32 \

    -R $genome \

    -ploidy 1 \

    -I ${in_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam \

    -dontUseSoftClippedBases \

    -stand_call_conf 20.0 \

    -o ${out_dir}/Hap1_rnaseq.vcf

    #-------------------------------Variant filtering---------------------------------------

    echo "------------------------------Variant filtering-----------------------------------------"

    in_dir=${out_dir}

    out_dir=${result_path}/8_filtervariants_output

    cd ${out_dir}

    java -jar $gatk \

    -T VariantFiltration \

    -R $genome \

    -V ${in_dir}/Hap1_rnaseq.vcf \

    -window 35 \

    -cluster 3 \

    --filterName FS -filter "FS>30.0" \

    --filterName QD -filter "QD<2.0" \

    -o ${out_dir}/filtered_Hap1_rnaseq.vcf

    java -jar $gatk \

    -T SelectVariants \

    -ef \

    -R $genome \

    -V ${out_dir}/filtered_Hap1_rnaseq.vcf \

    -o ${out_dir}/ef_Hap1_C20_rnaseq.vcf

    相关文章

      网友评论

        本文标题:GATK's Pipeline for calling

        本文链接:https://www.haomeiwen.com/subject/dmndzxtx.html