为什么我要干这样的傻事,为了让以后自己不干这样的傻事,记录一下这无语的代码,我也是醉了。再也不想用perl了,我要学习基于python的snakemake和nextflow语言组织我的流程。更多知识分享请到 https://zouhua.top/。
输入文件
find /data/user/zouhua/pipeline/Amplicon/RawData/ -name "*gz" | grep 'R1' | sort | perl -e 'print"sampleid\tforward-absolute-filepath\treverse-absolute-filepath\n"; while(<>){chomp; $name=(split("\/", $_))[-1]; $name1=$name; $name2=$_; $name1=~s/_R1.fq.gz//g; $name2=~s/R1/R2/; print "$name1\t$_\t$name2\n";}' > pe-33-manifest-trimmed.tsv
perl脚本
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long;
use Cwd 'abs_path';
my ($file_fq, $file_meta, $primer_f, $primer_r, $threads, $mode, $classifier, $depth, $column, $out, $help, $version);
GetOptions(
"f1|file_fq:s" => \$file_fq,
"f2|file_meta:s" => \$file_meta,
"pf|primer_f:s" => \$primer_f,
"pr|primer_r:s" => \$primer_r,
"t|threads:s" => \$threads,
"m|mode:s" => \$mode,
"c|classifier:s" => \$classifier,
"d|depth:s" => \$depth,
"co|column:s" => \$column,
"o|out:s" => \$out,
"h|help:s" => \$help,
"v|version" => \$version
);
&usage if(!defined $out);
my $cwd = abs_path;
# output
my $dir = "$cwd/result/";
system "mkdir -p $dir" unless(-d $dir);
# script
my $dir_script = "$cwd/result/script/";
system "mkdir -p $dir_script" unless(-d $dir_script);
########## output #########################################
# bash script per step
# combine all steps in one script
my $qza_s = join("", $dir_script, "Run.s1.import.sh");
my $primer_s = join("", $dir_script, "Run.s2.primer.sh");
my $ASV_s = join("", $dir_script, "Run.s3.ASV.sh");
my $filter_s = join("", $dir_script, "Run.s4.filter.sh");
my $taxo_s = join("", $dir_script, "Run.s5.taxo.sh");
my $diversity_s = join("", $dir_script, "Run.s6.diversity.sh");
my $ANCOM_s = join("", $dir_script, "Run.s7.ANCOM.sh");
my $LEfse_s = join("", $dir_script, "Run.s8.LEfse.sh");
my $picrust_s = join("", $dir_script, "Run.s9.picrust.sh");
# directory
my $qza_d = join("", $dir, "01.qza");
system "mkdir -p $qza_d" unless(-d $qza_d);
my $primer_d = join("", $dir, "02.remove_primer");
system "mkdir -p $primer_d" unless(-d $primer_d);
my $ASV_d = join("", $dir, "03.ASV");
system "mkdir -p $ASV_d" unless(-d $ASV_d);
my $filter_d = join("", $dir, "04.filter");
system "mkdir -p $filter_d" unless(-d $filter_d);
my $taxo_d = join("", $dir, "05.taxonomy");
system "mkdir -p $taxo_d" unless(-d $taxo_d);
my $diversity_d = join("", $dir, "06.diversity");
system "mkdir -p $diversity_d" unless(-d $diversity_d);
my $ANCOM_d = join("", $dir, "07.ANCOM");
system "mkdir -p $ANCOM_d" unless(-d $ANCOM_d);
my $LEfse_d = join("", $dir, "08.LEfse");
system "mkdir -p $LEfse_d" unless(-d $LEfse_d);
my $picrust_d = join("", $dir, "09.Picrust2");
system "mkdir -p $picrust_d" unless(-d $picrust_d);
###################################
# get qiime env name
my $qiime = `conda env list | grep "qiime.*" | cut -d " " -f 1`;
my $picrust = `conda env list | grep "picrust.*" | cut -d " " -f 1`;
###################################
# step1: import fastq into qza
open(OT1, "> $qza_s") or die "can't open $qza_s \n";
my $qza_out = join("/", $qza_d, "paired-end-demux.qza");
my $qzv_out = join("/", $qza_d, "paired-end-demux-summary.qzv");
my $qza_shell = "qiime tools import --type \'SampleData[PairedEndSequencesWithQuality]\' --input-path $file_fq --output-path $qza_out --input-format PairedEndFastqManifestPhred33V2";
my $qzv_shell = "qiime demux summarize --i-data $qza_out --o-visualization $qzv_out";
print OT1 "source activate $qiime\n";
print OT1 "$qza_shell\n$qzv_shell\n";
print OT1 "echo \"step1: successfully convert fastq into qza\\n\" \n";
close(OT1);
###################################
# step2: remove adapter
# step1: import fastq into qza
open(OT2, "> $primer_s") or die "can't open $primer_s \n";
my $primer_zout = join("/", $primer_d, "paired-end-demux-trim.qza");
my $primer_vout = join("/", $primer_d, "paired-end-demux-trim-summary.qzv");
my $primer_zshell = "qiime cutadapt trim-paired --i-demultiplexed-sequences $qza_out --p-front-f $primer_f --p-front-r $primer_r --p-cores $threads --o-trimmed-sequences $primer_zout";
my $primer_vshell = "qiime demux summarize --i-data $primer_zout --o-visualization $primer_vout";
print OT2 "source activate $qiime\n";
print OT2 "$primer_zshell\n$primer_vshell\n";
print OT2 "echo \"step2: successfully remove primer sequence\\n\" \n";
close(OT2);
###################################
# step3: get ASVs (dada2 deblur)
$mode ||= "dada2";
open(OT3, "> $ASV_s") or die "can't open $ASV_s \n";
print OT3 "source activate $qiime\n";
my $ASV_table = join("/", $ASV_d, "table.qza");
my $ASV_vtable = join("/", $ASV_d, "table.qzv");
my $ASV_seq = join("/", $ASV_d, "rep-seqs.qza");
my $ASV_stats = join("/", $ASV_d, "stats.qza");
if($mode eq "dada2"){
my $ASV_zshell = "qiime dada2 denoise-paired --i-demultiplexed-seqs $primer_zout --p-trunc-len-f 0 --p-trunc-len-r 0 --p-n-threads $threads --o-table $ASV_table --o-representative-sequences $ASV_seq --o-denoising-stats $ASV_stats";
my $ASV_vshell = " qiime feature-table summarize --i-table $ASV_table --o-visualization $ASV_vtable --m-sample-metadata-file $file_meta";
print OT3 "$ASV_zshell\n$ASV_vshell\n";
}elsif($mode eq "deblur"){
my $ASV_join = join("/", $ASV_d, "paired-end-demux-trim-join.qza");
my $ASV_vjoin = join("/", $ASV_d, "paired-end-demux-trim-join.qzv");
my $ASV_filter = join("/", $ASV_d, "paired-end-demux-trim-join-filtered.qza");
my $ASV_stats = join("/", $ASV_d, "paired-end-demux-trim-join-filter-stats.qza");
my $ASV_merge_shell = "qiime vsearch join-pairs --i-demultiplexed-seqs $primer_zout --o-joined-sequences $ASV_join";
my $ASV_sum_shell = "qiime demux summarize --i-data $ASV_join --o-visualization $ASV_vjoin";
my $ASV_quality_shell = "qiime quality-filter q-score --i-demux $ASV_join --o-filtered-sequences $ASV_filter --o-filter-stats $ASV_stats";
my $ASV_denoise_shell = "qiime deblur denoise-16S --i-demultiplexed-seqs $ASV_filter --p-trim-length 300 --o-representative-sequences $ASV_seq --o-table $ASV_table --p-sample-stats --o-stats $ASV_stats";
my $ASV_vshell = " qiime feature-table summarize --i-table $ASV_table --o-visualization $ASV_vtable --m-sample-metadata-file $file_meta";
print OT3 "source activate $qiime\n";
print OT3 "$ASV_merge_shell\n$ASV_sum_shell\n$ASV_quality_shell\n$ASV_denoise_shell\n$ASV_vshell\n";
}
print OT3 "echo \"step3: successfully get ASV table in $mode \\n\" \n";
close(OT3);
###################################
# step4: Filter the unsuitable ASV
# 1. remove low occurrence ASVs
# 2. remove contamination and mitochondria, chloroplast sequence
# 3. drop the low depth samples
open(OT4, "> $filter_s") or die "can't open $filter_s \n";
my $filter_low = join("/", $filter_d, "table_filter_low_freq.qza");
my $taxo = join("/", $taxo_d, "taxonomy.qza");
my $filter_contam = join("/", $filter_d, "table_filter_low_freq_contam.qza");
my $filter_sample = join("/", $filter_d, "final_table.qza");
my $filter_req = join("/", $filter_d, "final_rep_seqs.qza");
my $filter_tax_shell = "qiime feature-classifier classify-sklearn --i-classifier $classifier --i-reads $ASV_seq --o-classification $taxo --p-n-jobs $threads";
my $filter_low_shell = "qiime feature-table filter-features --i-table $ASV_table --p-min-frequency 10 --p-min-samples 1 --o-filtered-table $filter_low";
my $filter_contam_shell = "qiime taxa filter-table --i-table $filter_low --i-taxonomy $taxo --p-exclude mitochondria,chloroplast --o-filtered-table $filter_contam";
my $filter_sample_shell = "qiime feature-table filter-samples --i-table $filter_contam --p-min-frequency 4000 --o-filtered-table $filter_sample";
my $filter_final_shell = "qiime feature-table filter-seqs --i-data $ASV_seq --i-table $filter_sample --o-filtered-data $filter_req";
print OT4 "source activate $qiime\n";
print OT4 "$filter_tax_shell\n$filter_low_shell\n$filter_contam_shell\n$filter_sample_shell\n$filter_final_shell\n";
print OT4 "echo \"step4: successfully Filter the unsuitable ASV \\n\" \n";
close(OT4);
##################################
# step5: Taxonomic annotation
open(OT5, "> $taxo_s") or die "can't open $taxo_s\n";
my $taxo_final = join("/", $taxo_d, "taxonomy-final.qza");
my $taxo_barplot = join("/", $taxo_d, "taxonomy-final-barplots.qzv");
my $taxo_tax_shell = "qiime feature-classifier classify-sklearn --i-classifier $classifier --i-reads $filter_req --o-classification $taxo_final --p-n-jobs $threads";
my $taxo_bar_shell = "qiime taxa barplot --i-table $filter_sample --i-taxonomy $taxo_final --m-metadata-file $file_meta --o-visualization $taxo_barplot";
print OT5 "source activate $qiime\n";
print OT5 "$taxo_tax_shell\n$taxo_bar_shell\n";
print OT5 "echo \"step5: successfully Taxonomic annotation \\n\" \n";
close(OT5);
##################################
# step6: Constructing phylogenetic tree
open(OT6, "> $diversity_s") or die "can't open $diversity_s\n";
my $div_align = join("/", $diversity_d, "final_rep_seqs_aligned.qza");
my $div_mask = join("/", $diversity_d, "final_rep_seqs_masked.qza");
my $div_urtree = join("/", $diversity_d, "unrooted-tree.qza");
my $div_rtree = join("/", $diversity_d, "rooted-tree.qza");
my $div_phy_shell = "qiime phylogeny align-to-tree-mafft-fasttree --i-sequences $filter_req --o-alignment $div_align --o-masked-alignment $div_mask --p-n-threads $threads --o-tree $div_urtree --o-rooted-tree $div_rtree";
print OT6 "source activate $qiime\n";
print OT6 "$div_phy_shell\n";
##################################
# step7: rarefication curve and diversity analysis
my $rare_qzv_name = join("-", "depth", $depth, "alpha-rarefaction.qzv");
my $rare_metrics = join("-", "depth", $depth, "core-metrics");
my $div_rare = join("/", $diversity_d, $rare_qzv_name);
my $div_metrics = join("/", $diversity_d, $rare_metrics);
my $div_rare_shell = "qiime diversity alpha-rarefaction --i-table $filter_sample --i-phylogeny $div_rtree --p-max-depth $depth --m-metadata-file $file_meta --o-visualization $div_rare";
my $div_metric_shell = "qiime diversity core-metrics-phylogenetic --i-phylogeny $div_rtree --i-table $filter_sample --p-sampling-depth $depth --m-metadata-file $file_meta --output-dir $div_metrics";
print OT6 "$div_rare_shell\n$div_metric_shell\n";
print OT6 "echo \"step6: successfully Constructe phylogenetic tree \\n\" \n";
close(OT6);
#################################
# step8: Analysis of composition of microbiomes
# all diversity index and distance
# add pseudocount for log transform
open(OT7, "> $ANCOM_s") or die "can't open $ANCOM_s\n";
my $ancom_add = join("/", $ANCOM_d, "final_table_pseudocount.qza");
my $ancom_out = join("/", $ANCOM_d, "ancom_output");
my $ancom_add_shell = "qiime composition add-pseudocount --i-table $filter_sample --p-pseudocount 1 --o-composition-table $ancom_add";
my $ancom_out_shell = "qiime composition ancom --i-table $ancom_add --m-metadata-file $file_meta --m-metadata-column $column --output-dir $ancom_out";
print OT7 "source activate $qiime\n";
print OT7 "$ancom_add_shell\n$ancom_out_shell\n";
print OT7 "echo \"step7: successfully Constructe phylogenetic tree \\n\" \n";
close(OT7);
#################################
# step9: LDA Effect Size
open(OT8, "> $LEfse_s") or die "can't open $LEfse_s\n";
my $LEfse_collapse = join("/", $LEfse_d, "collapse.table.qza");
my $LEfse_freq = join("/", $LEfse_d, "collapse.frequency.table.qza");
my $LEfse_tsv = join("/", $LEfse_d, "collapse.frequency.table.tsv");
my $LEfse_collapse_shell = "qiime taxa collapse --i-table $filter_sample --o-collapsed-table $LEfse_collapse --p-level 6 --i-taxonomy $taxo_final";
my $LEfse_fre_shell = "qiime feature-table relative-frequency --i-table $LEfse_collapse --o-relative-frequency-table $LEfse_freq --output-dir $LEfse_d/output";
my $LEfse_export_shell = "qiime tools export --input-path $LEfse_freq --output-path $LEfse_d/output";
my $LEfse_tsv_shell = "biom convert -i $LEfse_d/output/feature-table.biom -o $LEfse_tsv --header-key \"taxonomy\" --to-tsv";
print OT8 "source activate $qiime\n";
print OT8 "$LEfse_collapse_shell\n$LEfse_fre_shell\n$LEfse_export_shell\n$LEfse_tsv_shell\n";
print OT8 "echo \"step8: successfully LDA Effect Size \\n\" \n";
close(OT8);
#################################
# step10: Functional prediction: picrust2
open(OT9, "> $picrust_s") or die "can't open $picrust_s\n";
my $picrust_out = join("/", $picrust_d, "picrust2_out_pipeline");
print OT9 "source activate $qiime\n";
my $picrust_out_shell = "qiime tools export --input-path $filter_req --output-path $picrust_d";
print OT9 "$picrust_out_shell\n";
print OT9 "source deactivate\n";
print OT9 "source activate $picrust\n";
my $picrust_final_shell = "picrust2_pipeline.py -s $picrust_d/dna-sequences.fasta -i $LEfse_d/output/feature-table.biom -o $picrust_out -p 30";
print OT9 "$picrust_final_shell\n";
print OT9 "echo \"step9: successfully Functional prediction \\n\" \n";
close(OT9);
###################################################
# combine all scripts
open(OT, "> $out") or die "can't open $out\n";
print OT "sh $qza_s\nsh $primer_s\nsh $ASV_s\nsh $filter_s\nsh $taxo_s\n";
print OT "sh $diversity_s\nsh $ANCOM_s\nsh $LEfse_s\nsh $picrust_s\n";
close(OT);
sub usage{
print <<USAGE;
usage:
perl $0 -f1 <fastq file> -f2 <sample group> -pf <forward primer> -pr <reverse primer> -t <threads> -m <dada2 or deblur> -c <classifier> -d <depth> -co <column> -o <out>
options:
-f1|fastq file :[essential].
-f2|sample group :[essential].
-pf|forward :[essential].
-pr|reverse :[essential].
-t|threads :[essential].
-m|denoise :[essential] dada2 or deblur.
-c|classifier :[essential].
-d|depth :[essential].
-co|column :[essential].
-o|out :[essential].
USAGE
exit;
};
sub version {
print <<VERSION;
version: v1.0
update: 20201228 - 20201228
author: zouhua1\@outlook.com
VERSION
};
运行
perl qiime2_v0.pl -f1 pe-33-manifest-trimmed.tsv -f2 sample-metadata.tsv -pf CCTAYGGGRBGCASCAG -pr GGACTACNNGGGTATCTAAT -t 20 -c silva-138-99-nb-classifier.qza -d 60000 -co Group -o Run.all.sh
最终目录结构
Amplicon_analysis
├── RawData-> /data/user/zouhua/rawdata/
├── pe-33-manifest-trimmed.tsv
├── qiime2_v0.pl
├── result
│ ├── 01.qza
│ ├── 02.remove_primer
│ ├── 03.ASV
│ ├── 04.filter
│ ├── 05.taxonomy
│ ├── 06.diversity
│ ├── 07.ANCOM
│ ├── 08.LEfse
│ ├── 09.Picrust2
│ └── script
│ ├── Run.s1.import.sh
│ ├── Run.s2.primer.sh
│ ├── Run.s3.ASV.sh
│ ├── Run.s4.filter.sh
│ ├── Run.s5.taxo.sh
│ ├── Run.s6.diversity.sh
│ ├── Run.s7.ANCOM.sh
│ ├── Run.s8.LEfse.sh
│ └── Run.s9.picrust.sh
├── Run.all.sh
├── sample-metadata.tsv
├── silva-138-99-nb-classifier.qza -> /data/share/database/qiime2_db/silva/silva-138-99-nb-classifier.qza
└── work.sh
引用
参考文章如引起任何侵权问题,可以与我联系,谢谢。
网友评论