Nanopore QC

作者: 落寞的橙子 | 来源:发表于2020-05-24 05:12 被阅读0次

    nanopore QC有好几种方法
    简单来说需要base call 后的sequencing_summary.txt文件
    pycoQC看起来比较酷炫
    1、pycoQC

    #!/bin/bash
    #SBATCH --time=1:00:00
    #SBATCH --cpus-per-task=2
    #SBATCH --mem=2g
    dir=/data/Nanopore
    log_dir=${dir}/log/pycoQC
    job_dir=${dir}/job/pycoQC
    
    mkdir -p ${log_dir}
    mkdir -p ${job_dir}
    
    thread=6
    cd /data/Nanopore/Fast5
    
    for i in $(ls -d $pwd *);do
    job_file="${job_dir}/${i}.job"
    
        echo "#!/bin/bash
    #SBATCH --job-name=${i}.pycoQC.job
    #SBATCH --output=$log_dir/${i}.pycoQC.out
    #SBATCH --time=5:00:00
    #SBATCH --cpus-per-task=${thread}
    #SBATCH --mem=10g
    source biosoft/conda/etc/profile.d/conda.sh
    conda activate pycoQC
    out_dir=/data/Nanopore/QC/pycoQC/${i}
    mkdir -p \${out_dir}
    pycoQC -f /data/Nanopore/Fastq/sequencing_summary/${i}.sequencing_summary.txt -o \${out_dir}${i}.pycoqc.html --min_pass_qual 7 
    " > $job_file
    sbatch $job_file
    done
    

    2、NanoR
    分两步走,第一步分析每个样本,第二个比较两组样本
    先写好Rscript再调用
    一、NanoR.R 代码如下

    #!/usr/bin/env Rscript
    suppressMessages(library(NanoR))
    suppressMessages(library(argparse))
    parser <- ArgumentParser(description="")
    parser$add_argument('-i', '--input', metavar="*", help="input dir name prefix")
    args <- parser$parse_args()
    prefix <- args$input
    sequencing_run<-paste0("/scratch/jiangc4/Nanopore/Fastq/",as.character(prefix))
    summary<-file.path(sequencing_run,"sequencing_summary.txt")
    fastq<-file.path(sequencing_run, "fastq_pass")
    out_dir<-paste0("/scratch/jiangc4/Nanopore/QC/NanoR/",as.character(prefix))
    dir.create(out_dir,recursive = T)
    output<-file.path(out_dir)
    Glist<-NanoPrepareG(DataSummary=summary,DataFastq=fastq)
    Gtable<-NanoTableG(NanoGList=Glist, DataOut=output, GCC=FALSE) # switch GCC to TRUE to enable calculation of GC content from passed FASTQ.
    NanoStatsG(NanoGList=Glist, NanoGTable=Gtable, DataOut=output, KeepGGObj = FALSE) #to store also table behind ggplot2-plots, switch KeepGGObj to TRUE
    

    调用代码

    #!/bin/bash
    #SBATCH --time=1:00:00
    #SBATCH --cpus-per-task=2
    #SBATCH --mem=2g
    nanoR_dir=/Scripts/Nanopore/QC/NanoR.R
    dir=/data_dir/Nanopore
    log_dir=${dir}/log/NanoR
    job_dir=${dir}/job/NanoR
    
    mkdir -p ${log_dir}
    mkdir -p ${job_dir}
    
    thread=2
    cd /data_dir/Nanopore/Fast5
    
    for i in $(ls -d $pwd *);do
    job_file="${job_dir}/${i}.job"
    
        echo "#!/bin/bash
    #SBATCH --job-name=${i}.NanoR.job
    #SBATCH --output=$log_dir/${i}.NanoR.out
    #SBATCH --time=5:00:00
    #SBATCH --cpus-per-task=${thread}
    #SBATCH --mem=10g
    ml R/3.5
    Rscript ${nanoR_dir} -i ${i}
    " > $job_file
    sbatch $job_file
    done
    

    二、两组比较
    写好比较的代码NanoR_Comparison.R

    #!/usr/bin/env Rscript
    suppressMessages(library(NanoR))
    inputs<-c("/data_dir/Control1/DataForComparison",
              "/data_dir/Control2/DataForComparison",
              "/data_dir/Control3/DataForComparison",
              "/data_dir/Treatment5/DataForComparison",
              "/data_dir/Treatment6/DataForComparison",
              "/data_dir/Treatment7/DataForComparison")
    labels<-c("Control1","Control2","Control3","Treatment5","Treatment6","Treatment7")
    output<-"/data_dir/comparison_output"
    dir.create(output,recursive = T)
    NanoCompare(DataIn=inputs,DataOut=output,Labels=labels)
    
    #!/bin/bash
    #SBATCH --time=2:00:00
    #SBATCH --cpus-per-task=2
    #SBATCH --mem=10g
    ml R/3.5
    Rscript ./NanoR_Comparison.R
    

    相关文章

      网友评论

        本文标题:Nanopore QC

        本文链接:https://www.haomeiwen.com/subject/rrtdahtx.html