美文网首页
11.BWA重新跑-qualimap-GATK

11.BWA重新跑-qualimap-GATK

作者: jiarf | 来源:发表于2022-07-18 22:39 被阅读0次

    重新跑的脚本

    ## bwa.sh
    cd /data1/jiarongf/learning/cancer-WES/3.clean
    cat ../data/case |while read id
    do
        fq1=${id}_1_val_1.fq.gz
        fq2=${id}_2_val_2.fq.gz
        echo $fq1
        echo $fq2
        #bwa mem -M -t 11 -R "@RG    ID:$id  SM:$id  LB:WXS  PL:Illumina" /nas01/Genome/bwa_Homo_sapiens.GRCh38_index/Homo_sapiens_index $fq1 $fq2 \
        #bwa mem -M -t 11 -R "@RG\tID:$id\tSM:$id\tLB:WXS\tPL:Illumina" /nas01/Genome/bwa_Homo_sapiens.GRCh38_index/Homo_sapiens_index $fq1 $fq2 \
        # | samtools sort -@ 11 -m 1G --reference /nas01/Genome/fasta_gtf/Homo_sapiens.GRCh38.dna.toplevel.fa -o ../4.align/${id}.bam -
        bwa mem -M -t 11 -R "@RG\tID:$id\tSM:$id\tLB:WXS\tPL:Illumina" /data1/jiarongf/learning/cancer-WES/bwa/Homo_sapiens_assembly38 $fq1 $fq2 \
         | samtools sort -@ 11 -m 1G --reference /data1/jiarongf/learning/cancer-WES/bwa/Homo_sapiens_assembly38.fa -o ../4.bwaout/${id}.bam -
    done
    
    #bwa mem -M -t 11 -R "@RG\tID:$id\tSM:$id\tLB:WXS\tPL:Illumina" /nas01/Genome/bwa_Homo_sapiens.GRCh38_index/Homo_sapiens_index case5_germline_WES_1_val_1.fq.gz case5_germline_WES_2_val_2.fq.gz > case5_germline_WES.sam
    #samtools sort -@ 11 -m 1G --reference /nas01/Genome/fasta_gtf/Homo_sapiens.GRCh38.dna.toplevel.fa -o ../4.align/case5_germline_WES.bam case5_germline_WES.sam
    

    跑完之后的就有chr了

    case5_biorep_C_WES.4916 147     chr1    451229  0       75M     =       451196 -108     GGTCCAGGCAACAGCCAGAAATGAAAGGCACATTCTTGGGCTCATAATGGTCAGATAGTGGAGGGGCTTACATAG     ??=@>@BDDA@DCECDBEA@?CE@@DCDD@D?BACACBCDD@D@@?A@BCAD>D@?CACBD@BBAAB@?>?=:??     NM:i:0  MD:Z:75 MC:Z:76M        AS:i:75 XS:i:75 RG:Z:case5_biorep_C_WES
    

    qualimap

    #cd /data1/jiarongf/learning/cancer-WES/4.align
    #ls /data1/jiarongf/learning/cancer-WES/4.align/*bam | while read id
    cd /data1/jiarongf/learning/cancer-WES/4.bwaout
    ls /data1/jiarongf/learning/cancer-WES/4.bwaout/*bam | while read id
    do
        file=$(basename ${id} .bam)
        #qualimap bamqc --java-mem-size=40G -gff /data1/jiarongf/learning/cancer-WES/4.align/hg38.exon2.bed -nr 100000 -nw 500 -nt 16 -bam $id -outdir /data1/jiarongf/learning/cancer-WES/4.align/qualimap/${file}
        qualimap bamqc --java-mem-size=40G -gff /data1/jiarongf/learning/cancer-WES/4.align/hg38.exon.bed -nr 100000 -nw 500 -nt 16 -bam $id -outdir /data1/jiarongf/learning/cancer-WES/4.bwaout/qualimap/${file}
    done
    
    
    #cat CCDS.current.txt |grep  "Public" |  perl -alne '{/\[(.*?)\]/;next unless $1;$gene=$F[2];$exons=$1;$exons=~s/\s//g;$exons=~s/-/\t/g;print "$F[0]\t$_\t$gene" foreach split/,/,$exons;}' | sort -u | bedtools sort -i |awk '{print $0"\t0\t+"}'  > hg38.exon2.bed
    #cat CCDS.current.txt |grep  "Public" |  perl -alne '{/\[(.*?)\]/;next unless $1;$gene=$F[2];$exons=$1;$exons=~s/\s//g;$exons=~s/-/\t/g;print "$F[0]\t$_\t$gene" foreach split/,/,$exons;}' | sort -u | bedtools sort -i |awk '{print "chr"$0"\t0\t+"}'  > hg38.exon.bed
    
    #samtools view test_marked.bam | awk '$3="chr"$3' | awk -F ' ' '$7=($7=="=" || $7=="*"?$7:sprintf("chr%s",$7))' | tr ' ' '\t' | samtools view -b > output.bam
    # [E::sam_parse1] missing SAM header
    # [W::sam_read1] Parse error at line 199
    # [main_samview] truncated file.
    
    #samtools view -H test_marked.bam | sed -e 's/SN:\([0-9XY]\)/SN:chr\1/' -e 's/SN:MT/SN:chrM/' | samtools reheader - test_marked.bam > output.bam
    #[E::cram_get_ref] Failed to populate reference for id 0
    #samtools view -T test_marked.bam | sed -e 's/SN:\([0-9XY]\)/SN:chr\1/' -e 's/SN:MT/SN:chrM/' | samtools reheader - test_marked.bam > test.CHR.bam
    #Segmentation fault (core dumped)
    
    #samtools view -h test_marked.bam | sed -e '/^@SQ/s/SN\:/SN\:chr/' -e '/^[^@]/s/\t/\tchr/2'|awk -F ' ' '$7=($7=="=" || $7=="*"?$7:sprintf("chr%s",$7))' |tr " " "\t" | samtools view -h -b -@ 10 -S - > test.chr.bam
    #成功!
    
    (wes) 09:11:09 jiarongf@172.16.10.223:/data1/jiarongf/learning/cancer-WES/run
    $
    bash qualimap.sh > ../log/qualimap.sh2.log 2>&1
    
    qualimap.sh2.log

    log的一部分截图如上面,跑成功了


    跑了一整天,从早上九点,到晚上十一点

    GATK

    1.1 markduplicate
    GATK=/data1/jiarongf/learning/cancer-WES/run/gatk-4.2.6.1/gatk
    cd /data1/jiarongf/learning/cancer-WES/4.bwaout
    #cd /data1/jiarongf/learning/cancer-WES/4.align
    cat ../data/case  | while read id
    do
        BAM=./${id}.bam
    
        if [ ! -f ${id}_marked.bam ]
        then
        echo "start MarkDuplicates for ${id}" `date`
        $GATK --java-options "-Xmx20G -Djava.io.tmpdir=./" MarkDuplicates \
        -I ${BAM} \
        -O ${id}_marked.bam \
        -M ${id}.metrics \
        1>../log/${id}_log2.mark 2>&1 
        echo "end MarkDuplicates for ${id}" `date`
        samtools index -@ 16 -m 4G -b ${id}_marked.bam
        fi
    done
    
    # gatk --java-options "-Xmx20G -Djava.io.tmpdir=./" MarkDuplicates \
        # -I case6_techrep_2_WES.bam \
        # -O case6_techrep_2_WES_marked.bam \
        # -M case6_techrep_2_WES.metrics \
        # 1>../log/case6_techrep_2_WES_log.mark 2>&1 
    
    (wes) 08:56:21 jiarongf@172.16.10.223:/data1/jiarongf/learning/cancer-WES/run
    $
    bash markDuplicates.sh > ../log/markDuplicates2.sh.log 2>&1
    (wes) 21:11:13 jiarongf@172.16.10.223:/data1/jiarongf/learning/cancer-WES/run
    $
    cat ../log/markDuplicates2.sh.log | tail
    start MarkDuplicates for case6_techrep_2_WES Tue May 10 19:17:49 CST 2022
    end MarkDuplicates for case6_techrep_2_WES Tue May 10 19:42:06 CST 2022
    start MarkDuplicates for case1_techrep_2_WES Tue May 10 19:42:35 CST 2022
    end MarkDuplicates for case1_techrep_2_WES Tue May 10 20:08:32 CST 2022
    start MarkDuplicates for case2_techrep_2_WES Tue May 10 20:09:05 CST 2022
    end MarkDuplicates for case2_techrep_2_WES Tue May 10 20:31:39 CST 2022
    start MarkDuplicates for case5_techrep_2_WES Tue May 10 20:32:05 CST 2022
    end MarkDuplicates for case5_techrep_2_WES Tue May 10 20:58:15 CST 2022
    start MarkDuplicates for case5_biorep_C_WES Tue May 10 20:59:28 CST 2022
    end MarkDuplicates for case5_biorep_C_WES Tue May 10 21:10:59 CST 2022
    

    跑完了

    1.2 bqsr
    GATK=/data1/jiarongf/learning/cancer-WES/run/gatk-4.2.6.1/gatk
    snp=/data1/jiarongf/learning/cancer-WES/run/gatk_resource/dbsnp_146.hg38.vcf.gz
    indel=/data1/jiarongf/learning/cancer-WES/run/gatk_resource/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
    ref=/data1/jiarongf/learning/cancer-WES/run/gatk_resource/Homo_sapiens_assembly38.fasta
    cd /data1/jiarongf/learning/cancer-WES/4.bwaout
    #cd /data1/jiarongf/learning/cancer-WES/5.GATK
    cat ../data/case  | while read id
    #cat ../data/small.case.txt | while read id
    do
        if [ ! -f ${id}_bqsr.bam ]
        then
        echo "start BQSR for ${id}" `date`
        $GATK --java-options "-Xmx20G -Djava.io.tmpdir=./tmp/"  BaseRecalibrator \
        -R $ref  \
        -I ${id}_marked.bam  \
        --known-sites $snp \
        --known-sites $indel \
        -O ${id}_recal.table \
        1>../log/${id}_log2.recal 2>&1 
    
        $GATK --java-options "-Xmx20G -Djava.io.tmpdir=./tmp"  ApplyBQSR \
        -R $ref  \
        -I ${id}_marked.bam  \
        -bqsr ${id}_recal.table \
        -O ${id}_bqsr.bam \
        1>../log/${id}_log2.ApplyBQSR  2>&1 
    
        echo "end BQSR for ${id}" `date`
        fi
    done
    
    # gatk --java-options "-Xmx20G -Djava.io.tmpdir=./"  BaseRecalibrator \
        # -R /data1/jiarongf/learning/cancer-WES/run/gatk_resource/Homo_sapiens_assembly38.fasta  \
        # -I case1_biorep_A_techrep_WES_marked.chr.sed.bam  \
        # --known-sites /data1/jiarongf/learning/cancer-WES/run/gatk_resource/dbsnp_146.hg38.vcf.gz \
        # --known-sites /data1/jiarongf/learning/cancer-WES/run/gatk_resource/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz \
        # -O case1_biorep_A_techrep_WES_recal.table \
    
    (wes) 14:31:27 jiarongf@172.16.10.223:/data1/jiarongf/learning/cancer-WES/run
    $
    bash bqsr.sh > ../log/bqsr.sh2.log 2>&1
    

    相关文章

      网友评论

          本文标题:11.BWA重新跑-qualimap-GATK

          本文链接:https://www.haomeiwen.com/subject/nozcurtx.html