美文网首页
全基因组CNV分析

全基因组CNV分析

作者: bred | 来源:发表于2022-05-06 14:52 被阅读0次

    建立人基因组windows,计算GC含量

    #!/bin/bash
    mkdir bed
    (
    #子shell
    cd bed
    genome="hg38"
    # fetch chromosome size
    if [ ! -f ${genome}.chrom.sizes ]; then
        fetchChromSizes $genome > ${genome}.chrom.sizes
    fi
    binsize=200000
    stepsize=40000
    fasta="/home/reference/Hsapiens/UCSC/hg38/Sequence/WholeGenomeFasta/hg38.fa"
    # make windows
    bedtools makewindows -g ${genome}.chrom.sizes -w $binsize -s $stepsize >${genome}_binsize_${binsize}.bed
    # calculate GC content
    bedtools nuc -fi $fasta -bed ${genome}_binsize_${binsize}.bed | cut -f 1-3,5 > ${genome}_binsize_${binsize}_gc.bed
    )
    

    质控

    DIR=/home/maosong/replication/paper/rawdata
    sp="_"
    suffix=".fastq.gz"
    ls $DIR/*$suffix | while read line; do echo ${line%_*}; done | uniq >sample.txt
    
    if [ ! -d trim ]; then
        mkdir trim
    fi
    
    for sample in $(cat sample.txt); do
        trim_galore --paired ${sample}${sp}1${suffix} ${sample}${sp}2${suffix} --gzip -j 8 -o trim
    done
    

    比对

    if [ ! -d bam ]; then
        mkdir bam
    fi
    REF=/home/reference/Hsapiens/UCSC/hg38/Sequence/BwaIndex/hg38
    for line in $(cat sample.txt); do
        sample=$(basename $line)
        bwa mem -t 16 $REF trim/${sample}_1_val_1.fq.gz trim/${sample}_2_val_2.fq.gz | samtools sort -@ 16 -o bam/${sample}.bam -
    done
    

    过滤掉比对不上的reads

    for line in $(cat sample.txt); do
        sample=$(basename $line)
        samtools view -b -h -F 4 bam/${sample}.bam >bam/${sample}.mapped.bam
    done
    

    合并bam, 可选操作

    (
    cd bam
    samtools merge G1.mapped.bam SRR6491823.mapped.bam SRR6491824.mapped.bam
    samtools merge MidS.mapped.bam SRR6491825.mapped.bam SRR6491826.mapped.bam
    )
    for sample in G1 MidS; do
        bedtools coverage -a bed/${genome}_binsize_${binsize}_gc.bed -b bam/${sample}.mapped.bam > depth/${sample}.depth.txt
    done
    

    计算每个bin的counts数

    for line in $(cat sample.txt); do
        sample=$(basename $line)
        bedtools coverage -a bed/${genome}_binsize_${binsize}_gc.bed -b bam/${sample}_mapped.bam > depth/${sample}.depth.txt
    done
    

    相关文章

      网友评论

          本文标题:全基因组CNV分析

          本文链接:https://www.haomeiwen.com/subject/vfbsyrtx.html