nanopore QC有好几种方法
简单来说需要base call 后的sequencing_summary.txt文件
pycoQC看起来比较酷炫
1、pycoQC
#!/bin/bash
#SBATCH --time=1:00:00
#SBATCH --cpus-per-task=2
#SBATCH --mem=2g
dir=/data/Nanopore
log_dir=${dir}/log/pycoQC
job_dir=${dir}/job/pycoQC
mkdir -p ${log_dir}
mkdir -p ${job_dir}
thread=6
cd /data/Nanopore/Fast5
for i in $(ls -d $pwd *);do
job_file="${job_dir}/${i}.job"
echo "#!/bin/bash
#SBATCH --job-name=${i}.pycoQC.job
#SBATCH --output=$log_dir/${i}.pycoQC.out
#SBATCH --time=5:00:00
#SBATCH --cpus-per-task=${thread}
#SBATCH --mem=10g
source biosoft/conda/etc/profile.d/conda.sh
conda activate pycoQC
out_dir=/data/Nanopore/QC/pycoQC/${i}
mkdir -p \${out_dir}
pycoQC -f /data/Nanopore/Fastq/sequencing_summary/${i}.sequencing_summary.txt -o \${out_dir}${i}.pycoqc.html --min_pass_qual 7
" > $job_file
sbatch $job_file
done
2、NanoR
分两步走,第一步分析每个样本,第二个比较两组样本
先写好Rscript再调用
一、NanoR.R 代码如下
#!/usr/bin/env Rscript
suppressMessages(library(NanoR))
suppressMessages(library(argparse))
parser <- ArgumentParser(description="")
parser$add_argument('-i', '--input', metavar="*", help="input dir name prefix")
args <- parser$parse_args()
prefix <- args$input
sequencing_run<-paste0("/scratch/jiangc4/Nanopore/Fastq/",as.character(prefix))
summary<-file.path(sequencing_run,"sequencing_summary.txt")
fastq<-file.path(sequencing_run, "fastq_pass")
out_dir<-paste0("/scratch/jiangc4/Nanopore/QC/NanoR/",as.character(prefix))
dir.create(out_dir,recursive = T)
output<-file.path(out_dir)
Glist<-NanoPrepareG(DataSummary=summary,DataFastq=fastq)
Gtable<-NanoTableG(NanoGList=Glist, DataOut=output, GCC=FALSE) # switch GCC to TRUE to enable calculation of GC content from passed FASTQ.
NanoStatsG(NanoGList=Glist, NanoGTable=Gtable, DataOut=output, KeepGGObj = FALSE) #to store also table behind ggplot2-plots, switch KeepGGObj to TRUE
调用代码
#!/bin/bash
#SBATCH --time=1:00:00
#SBATCH --cpus-per-task=2
#SBATCH --mem=2g
nanoR_dir=/Scripts/Nanopore/QC/NanoR.R
dir=/data_dir/Nanopore
log_dir=${dir}/log/NanoR
job_dir=${dir}/job/NanoR
mkdir -p ${log_dir}
mkdir -p ${job_dir}
thread=2
cd /data_dir/Nanopore/Fast5
for i in $(ls -d $pwd *);do
job_file="${job_dir}/${i}.job"
echo "#!/bin/bash
#SBATCH --job-name=${i}.NanoR.job
#SBATCH --output=$log_dir/${i}.NanoR.out
#SBATCH --time=5:00:00
#SBATCH --cpus-per-task=${thread}
#SBATCH --mem=10g
ml R/3.5
Rscript ${nanoR_dir} -i ${i}
" > $job_file
sbatch $job_file
done
二、两组比较
写好比较的代码NanoR_Comparison.R
#!/usr/bin/env Rscript
suppressMessages(library(NanoR))
inputs<-c("/data_dir/Control1/DataForComparison",
"/data_dir/Control2/DataForComparison",
"/data_dir/Control3/DataForComparison",
"/data_dir/Treatment5/DataForComparison",
"/data_dir/Treatment6/DataForComparison",
"/data_dir/Treatment7/DataForComparison")
labels<-c("Control1","Control2","Control3","Treatment5","Treatment6","Treatment7")
output<-"/data_dir/comparison_output"
dir.create(output,recursive = T)
NanoCompare(DataIn=inputs,DataOut=output,Labels=labels)
#!/bin/bash
#SBATCH --time=2:00:00
#SBATCH --cpus-per-task=2
#SBATCH --mem=10g
ml R/3.5
Rscript ./NanoR_Comparison.R
网友评论