#!/usr/bin/sh
genome=/home/chenfan/mnt/sdc/RNAseq/hs37d5.fasta
refgene=/home/chenfan/mnt/sdc/RNAseq/hg19_Refgene.gtf
picard=/home/chenfan/mnt/sdc/RNAseq/picard.jar
gatk=/home/chenfan/mnt/sdc/RNAseq/GenomeAnalysisTK.jar
knownindel=/home/chenfan/VQSR/data/Mills_and_1000G_gold_standard.indels.b37.vcf
knownsnp=/home/chenfan/VQSR/data/1000G_phase1.snps.high_confidence.b37.vcf
result_path=/home/chenfan/mnt/sdc/RNAseq/result
#------------------------------STAR 2-pass alignment steps-----------------------------------------
echo "------------------------------STAR 2-pass alignment steps-----------------------------------------"
in_dir=${result_path}/1_bamtofastq_output
out_dir=${result_path}/2_star_output
cd ${out_dir}/1index
STAR --runMode genomeGenerate --genomeDir ${out_dir}/1index --genomeFastaFiles $genome --sjdbGTFfile $refgene --runThreadN 30
cd ${out_dir}/1pass
STAR --genomeDir ${out_dir}/1index --readFilesIn ${in_dir}/1.fastq ${in_dir}/2.fastq --runThreadN 30
cd ${out_dir}/2index
STAR --runMode genomeGenerate --genomeDir ${out_dir}/2index --genomeFastaFiles $genome --sjdbFileChrStartEnd ${out_dir}/1pass/SJ.out.tab --sjdbOverhang 154 --runThreadN 30
cd ${out_dir}/2pass
STAR --genomeDir ${out_dir}/2index --readFilesIn ${in_dir}/1.fastq ${in_dir}/2.fastq --runThreadN 30
#------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------
echo "------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------"
in_dir=${out_dir}
out_dir=${result_path}/3_picard_output
cd ${out_dir}
java -jar $picard AddOrReplaceReadGroups I=${in_dir}/2pass/Aligned.out.sam O=${out_dir}/sort_addRG_align.bam SO=coordinate RGID=hap1 RGLB=rnaseq RGPL=illumina RGPU=runname RGSM=C20
java -jar $picard MarkDuplicates I=${out_dir}/sort_addRG_align.bam O=${out_dir}/markdup_sort_addRG_align.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=${out_dir}/output.metrics
#-------------------------------SplitNCigarReads---------------------------------------
echo "------------------------------SplitNCigarReads-----------------------------------------"
in_dir=${out_dir}
out_dir=${result_path}/4_splitncigarReads_output
cd ${out_dir}
java -jar $gatk -T SplitNCigarReads -R $genome -I ${in_dir}/markdup_sort_addRG_align.bam -o ${out_dir}/split_markdup_sort_addRG_align.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS
#-------------------------------Indel Realignment ---------------------------------------
echo "------------------------------Indel Realignment -----------------------------------------"
in_dir=${out_dir}
out_dir=${result_path}/5_indelrealign_output
cd ${out_dir}
java -jar $gatk \
-T RealignerTargetCreator \
-nt 32 \
-R $genome \
-I ${in_dir}/split_markdup_sort_addRG_align.bam \
-o ${out_dir}/realign_interval.list \
-known $knownindel
java -jar $gatk \
-T IndelRealigner \
-R $genome \
-I ${in_dir}/split_markdup_sort_addRG_align.bam \
-known $knownindel \
-o ${out_dir}/realign_split_markdup_sort_addRG_align.bam \
-targetIntervals ${out_dir}/realign_interval.list
#-------------------------------Base Recalibration---------------------------------------
echo "------------------------------Base Recalibration-----------------------------------------"
in_dir=${out_dir}
out_dir=${result_path}/6_bqsr_output
cd ${out_dir}
java -jar $gatk \
-T BaseRecalibrator \
-nct 32 \
-R $genome \
-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \
-knownSites $knownsnp \
-knownSites $knownindel \
-o ${out_dir}/recal_data.table
java -jar $gatk \
-T PrintReads \
-nct 32 \
-R $genome \
-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \
-BQSR ${out_dir}/recal_data.table \
-o ${out_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam
#-------------------------------Variant calling---------------------------------------
echo "------------------------------Variant calling-----------------------------------------"
in_dir=${out_dir}
out_dir=${result_path}/7_callvariants_output
cd ${out_dir}
java -jar $gatk \
-T HaplotypeCaller \
-nct 32 \
-R $genome \
-ploidy 1 \
-I ${in_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam \
-dontUseSoftClippedBases \
-stand_call_conf 20.0 \
-o ${out_dir}/Hap1_rnaseq.vcf
#-------------------------------Variant filtering---------------------------------------
echo "------------------------------Variant filtering-----------------------------------------"
in_dir=${out_dir}
out_dir=${result_path}/8_filtervariants_output
cd ${out_dir}
java -jar $gatk \
-T VariantFiltration \
-R $genome \
-V ${in_dir}/Hap1_rnaseq.vcf \
-window 35 \
-cluster 3 \
--filterName FS -filter "FS>30.0" \
--filterName QD -filter "QD<2.0" \
-o ${out_dir}/filtered_Hap1_rnaseq.vcf
java -jar $gatk \
-T SelectVariants \
-ef \
-R $genome \
-V ${out_dir}/filtered_Hap1_rnaseq.vcf \
-o ${out_dir}/ef_Hap1_C20_rnaseq.vcf
网友评论