美文网首页
扩增子分析:基于qiime2平台处理raw data的简单per

扩增子分析:基于qiime2平台处理raw data的简单per

作者: 生信学习者2 | 来源:发表于2020-12-29 10:00 被阅读0次

    为什么我要干这样的傻事,为了让以后自己不干这样的傻事,记录一下这无语的代码,我也是醉了。再也不想用perl了,我要学习基于python的snakemake和nextflow语言组织我的流程。更多知识分享请到 https://zouhua.top/

    输入文件

    find /data/user/zouhua/pipeline/Amplicon/RawData/ -name "*gz" | grep 'R1' | sort | perl -e 'print"sampleid\tforward-absolute-filepath\treverse-absolute-filepath\n"; while(<>){chomp; $name=(split("\/", $_))[-1]; $name1=$name; $name2=$_; $name1=~s/_R1.fq.gz//g; $name2=~s/R1/R2/; print "$name1\t$_\t$name2\n";}' > pe-33-manifest-trimmed.tsv
    

    perl脚本

    #!/usr/bin/perl 
    
    use warnings;
    use strict;
    use Getopt::Long;
    use Cwd 'abs_path';
    
    
    my ($file_fq, $file_meta, $primer_f, $primer_r, $threads, $mode, $classifier, $depth, $column, $out, $help, $version);
    GetOptions(
        "f1|file_fq:s"    =>  \$file_fq,
        "f2|file_meta:s"  =>  \$file_meta,
        "pf|primer_f:s"   =>  \$primer_f,
        "pr|primer_r:s"   =>  \$primer_r,
        "t|threads:s"     =>  \$threads, 
        "m|mode:s"        =>  \$mode,
        "c|classifier:s"  =>  \$classifier, 
        "d|depth:s"       =>  \$depth,
        "co|column:s"     =>  \$column,    
        "o|out:s"   =>  \$out,
        "h|help:s"  =>  \$help,
        "v|version" =>  \$version
    );
    &usage if(!defined $out);
    my $cwd = abs_path;
    
    # output
    my $dir = "$cwd/result/"; 
    system "mkdir -p $dir" unless(-d $dir);
    
    # script
    my $dir_script = "$cwd/result/script/"; 
    system "mkdir -p $dir_script" unless(-d $dir_script);
    
    ########## output #########################################
    # bash script per step
    # combine all steps in one script
    my $qza_s        = join("", $dir_script, "Run.s1.import.sh");
    my $primer_s     = join("", $dir_script, "Run.s2.primer.sh");
    my $ASV_s        = join("", $dir_script, "Run.s3.ASV.sh");
    my $filter_s     = join("", $dir_script, "Run.s4.filter.sh");
    my $taxo_s       = join("", $dir_script, "Run.s5.taxo.sh");
    my $diversity_s  = join("", $dir_script, "Run.s6.diversity.sh");
    my $ANCOM_s      = join("", $dir_script, "Run.s7.ANCOM.sh");
    my $LEfse_s      = join("", $dir_script, "Run.s8.LEfse.sh");
    my $picrust_s    = join("", $dir_script, "Run.s9.picrust.sh");
    
    # directory
    my $qza_d         = join("", $dir, "01.qza");
    system "mkdir -p $qza_d" unless(-d $qza_d);
    my $primer_d      = join("", $dir, "02.remove_primer");
    system "mkdir -p $primer_d" unless(-d $primer_d);
    my $ASV_d         = join("", $dir, "03.ASV");
    system "mkdir -p $ASV_d" unless(-d $ASV_d);
    my $filter_d      = join("", $dir, "04.filter");
    system "mkdir -p $filter_d" unless(-d $filter_d);
    my $taxo_d        = join("", $dir, "05.taxonomy");
    system "mkdir -p $taxo_d" unless(-d $taxo_d);
    my $diversity_d   = join("", $dir, "06.diversity");
    system "mkdir -p $diversity_d" unless(-d $diversity_d);
    my $ANCOM_d       = join("", $dir, "07.ANCOM");
    system "mkdir -p $ANCOM_d" unless(-d $ANCOM_d);
    my $LEfse_d       = join("", $dir, "08.LEfse");
    system "mkdir -p $LEfse_d" unless(-d $LEfse_d);
    my $picrust_d     = join("", $dir, "09.Picrust2");
    system "mkdir -p $picrust_d" unless(-d $picrust_d);
    
    ###################################
    # get qiime env name
    my $qiime = `conda env list | grep "qiime.*" | cut -d " " -f 1`;
    my $picrust = `conda env list | grep "picrust.*" | cut -d " " -f 1`;
    
    ###################################
    # step1: import fastq into qza
    open(OT1, "> $qza_s") or die "can't open $qza_s \n";
    my $qza_out = join("/", $qza_d, "paired-end-demux.qza");
    my $qzv_out = join("/", $qza_d, "paired-end-demux-summary.qzv");
    
    my $qza_shell = "qiime tools import --type \'SampleData[PairedEndSequencesWithQuality]\' --input-path $file_fq --output-path $qza_out --input-format PairedEndFastqManifestPhred33V2";
    my $qzv_shell = "qiime demux summarize --i-data $qza_out --o-visualization $qzv_out";
    print OT1 "source activate $qiime\n";
    print OT1 "$qza_shell\n$qzv_shell\n";
    print OT1 "echo \"step1: successfully convert fastq into qza\\n\" \n";
    close(OT1);
    
    
    ###################################
    # step2: remove adapter
    # step1: import fastq into qza
    open(OT2, "> $primer_s") or die "can't open $primer_s \n";
    my $primer_zout = join("/", $primer_d, "paired-end-demux-trim.qza");
    my $primer_vout = join("/", $primer_d, "paired-end-demux-trim-summary.qzv");
    
    my $primer_zshell = "qiime cutadapt trim-paired --i-demultiplexed-sequences $qza_out --p-front-f $primer_f --p-front-r $primer_r --p-cores $threads --o-trimmed-sequences $primer_zout";
    my $primer_vshell = "qiime demux summarize --i-data $primer_zout --o-visualization $primer_vout";
    print OT2 "source activate $qiime\n";
    print OT2 "$primer_zshell\n$primer_vshell\n";
    print OT2 "echo \"step2: successfully remove primer sequence\\n\" \n";
    close(OT2);
    
    
    ###################################
    # step3: get ASVs (dada2 deblur)
    $mode ||= "dada2";
    open(OT3, "> $ASV_s") or die "can't open $ASV_s \n";
    print OT3 "source activate $qiime\n";
    my $ASV_table = join("/", $ASV_d, "table.qza");
    my $ASV_vtable = join("/", $ASV_d, "table.qzv");
    my $ASV_seq = join("/", $ASV_d, "rep-seqs.qza");
    my $ASV_stats = join("/", $ASV_d, "stats.qza");
    if($mode eq "dada2"){
        my $ASV_zshell = "qiime dada2 denoise-paired --i-demultiplexed-seqs $primer_zout --p-trunc-len-f 0 --p-trunc-len-r 0 --p-n-threads $threads --o-table $ASV_table --o-representative-sequences $ASV_seq --o-denoising-stats $ASV_stats";
        my $ASV_vshell = " qiime feature-table summarize --i-table $ASV_table --o-visualization $ASV_vtable --m-sample-metadata-file $file_meta";
        print OT3 "$ASV_zshell\n$ASV_vshell\n";
    }elsif($mode eq "deblur"){
        my $ASV_join = join("/", $ASV_d, "paired-end-demux-trim-join.qza");
        my $ASV_vjoin = join("/", $ASV_d, "paired-end-demux-trim-join.qzv");
        my $ASV_filter = join("/", $ASV_d, "paired-end-demux-trim-join-filtered.qza");
        my $ASV_stats = join("/", $ASV_d, "paired-end-demux-trim-join-filter-stats.qza");    
    
        my $ASV_merge_shell = "qiime vsearch join-pairs --i-demultiplexed-seqs $primer_zout --o-joined-sequences $ASV_join";
        my $ASV_sum_shell = "qiime demux summarize --i-data $ASV_join --o-visualization $ASV_vjoin";
        my $ASV_quality_shell = "qiime quality-filter q-score --i-demux $ASV_join --o-filtered-sequences $ASV_filter --o-filter-stats $ASV_stats";
        my $ASV_denoise_shell = "qiime deblur denoise-16S --i-demultiplexed-seqs $ASV_filter --p-trim-length 300 --o-representative-sequences $ASV_seq --o-table $ASV_table --p-sample-stats --o-stats $ASV_stats";
        my $ASV_vshell = " qiime feature-table summarize --i-table $ASV_table --o-visualization $ASV_vtable --m-sample-metadata-file $file_meta";
        print OT3 "source activate $qiime\n";
        print OT3 "$ASV_merge_shell\n$ASV_sum_shell\n$ASV_quality_shell\n$ASV_denoise_shell\n$ASV_vshell\n";
        
    }
    print OT3 "echo \"step3: successfully get ASV table in $mode \\n\" \n";
    close(OT3);
    
    
    ###################################
    # step4: Filter the unsuitable ASV
    # 1. remove low occurrence ASVs
    # 2. remove contamination and mitochondria, chloroplast sequence
    # 3. drop the low depth samples
    open(OT4, "> $filter_s") or die "can't open $filter_s \n";
    my $filter_low = join("/", $filter_d, "table_filter_low_freq.qza");
    my $taxo = join("/", $taxo_d, "taxonomy.qza");
    my $filter_contam = join("/", $filter_d, "table_filter_low_freq_contam.qza");
    my $filter_sample = join("/", $filter_d, "final_table.qza");
    my $filter_req = join("/", $filter_d, "final_rep_seqs.qza");
    
    my $filter_tax_shell = "qiime feature-classifier classify-sklearn --i-classifier $classifier --i-reads $ASV_seq --o-classification $taxo --p-n-jobs $threads";
    my $filter_low_shell = "qiime feature-table filter-features --i-table $ASV_table --p-min-frequency 10 --p-min-samples 1 --o-filtered-table $filter_low";
    my $filter_contam_shell = "qiime taxa filter-table --i-table $filter_low --i-taxonomy $taxo --p-exclude mitochondria,chloroplast --o-filtered-table  $filter_contam";
    my $filter_sample_shell = "qiime feature-table filter-samples --i-table  $filter_contam --p-min-frequency 4000 --o-filtered-table  $filter_sample";
    my $filter_final_shell = "qiime feature-table filter-seqs --i-data $ASV_seq --i-table $filter_sample --o-filtered-data $filter_req";
    
    print OT4 "source activate $qiime\n";
    print OT4 "$filter_tax_shell\n$filter_low_shell\n$filter_contam_shell\n$filter_sample_shell\n$filter_final_shell\n";
    print OT4 "echo \"step4: successfully Filter the unsuitable ASV \\n\" \n";
    close(OT4);
    
    
    ##################################
    # step5: Taxonomic annotation
    open(OT5, "> $taxo_s") or die "can't open $taxo_s\n";
    my $taxo_final = join("/", $taxo_d, "taxonomy-final.qza");
    my $taxo_barplot = join("/", $taxo_d, "taxonomy-final-barplots.qzv");
    
    my $taxo_tax_shell = "qiime feature-classifier classify-sklearn --i-classifier $classifier  --i-reads $filter_req --o-classification $taxo_final --p-n-jobs $threads";
    my $taxo_bar_shell = "qiime taxa barplot --i-table $filter_sample --i-taxonomy $taxo_final --m-metadata-file $file_meta --o-visualization $taxo_barplot";
    
    print OT5 "source activate $qiime\n";
    print OT5 "$taxo_tax_shell\n$taxo_bar_shell\n";
    print OT5 "echo \"step5: successfully Taxonomic annotation \\n\" \n";
    close(OT5);
    
    
    ##################################
    # step6: Constructing phylogenetic tree
    open(OT6, "> $diversity_s") or die "can't open $diversity_s\n";
    my $div_align = join("/", $diversity_d, "final_rep_seqs_aligned.qza");
    my $div_mask = join("/", $diversity_d, "final_rep_seqs_masked.qza");
    my $div_urtree = join("/", $diversity_d, "unrooted-tree.qza");
    my $div_rtree = join("/", $diversity_d, "rooted-tree.qza");
    
    my $div_phy_shell = "qiime phylogeny align-to-tree-mafft-fasttree --i-sequences $filter_req --o-alignment $div_align --o-masked-alignment $div_mask --p-n-threads $threads --o-tree $div_urtree --o-rooted-tree $div_rtree";
    
    print OT6 "source activate $qiime\n";
    print OT6 "$div_phy_shell\n";
    
    ##################################
    # step7: rarefication curve and diversity analysis
    my $rare_qzv_name = join("-", "depth", $depth, "alpha-rarefaction.qzv");
    my $rare_metrics = join("-", "depth", $depth, "core-metrics");  
    my $div_rare = join("/", $diversity_d, $rare_qzv_name);
    my $div_metrics = join("/", $diversity_d, $rare_metrics);
    
    my $div_rare_shell = "qiime diversity alpha-rarefaction --i-table $filter_sample --i-phylogeny $div_rtree --p-max-depth $depth --m-metadata-file $file_meta --o-visualization $div_rare";
    my $div_metric_shell = "qiime diversity core-metrics-phylogenetic --i-phylogeny $div_rtree --i-table $filter_sample --p-sampling-depth $depth --m-metadata-file $file_meta --output-dir $div_metrics";
    
    print OT6 "$div_rare_shell\n$div_metric_shell\n";
    print OT6 "echo \"step6: successfully Constructe phylogenetic tree \\n\" \n";
    close(OT6);
    
    
    #################################
    # step8: Analysis of composition of microbiomes
    # all diversity index and distance 
    # add pseudocount for log transform
    open(OT7, "> $ANCOM_s") or die "can't open $ANCOM_s\n";
    my $ancom_add = join("/", $ANCOM_d, "final_table_pseudocount.qza");
    my $ancom_out = join("/", $ANCOM_d, "ancom_output");
    
    my $ancom_add_shell = "qiime composition add-pseudocount --i-table $filter_sample --p-pseudocount 1 --o-composition-table $ancom_add";
    my $ancom_out_shell = "qiime composition ancom --i-table $ancom_add --m-metadata-file $file_meta --m-metadata-column $column --output-dir $ancom_out";
    
    print OT7 "source activate $qiime\n";
    print OT7 "$ancom_add_shell\n$ancom_out_shell\n";
    print OT7 "echo \"step7: successfully Constructe phylogenetic tree \\n\" \n";
    close(OT7);
    
    
    #################################
    # step9: LDA Effect Size
    open(OT8, "> $LEfse_s") or die "can't open $LEfse_s\n";
    my $LEfse_collapse = join("/", $LEfse_d, "collapse.table.qza");
    my $LEfse_freq = join("/", $LEfse_d, "collapse.frequency.table.qza");
    my $LEfse_tsv = join("/", $LEfse_d, "collapse.frequency.table.tsv");
    
    my $LEfse_collapse_shell = "qiime taxa collapse --i-table $filter_sample --o-collapsed-table $LEfse_collapse --p-level 6 --i-taxonomy $taxo_final";
    my $LEfse_fre_shell = "qiime feature-table relative-frequency --i-table $LEfse_collapse --o-relative-frequency-table $LEfse_freq --output-dir $LEfse_d/output";
    my $LEfse_export_shell = "qiime tools export --input-path $LEfse_freq --output-path  $LEfse_d/output";
    my $LEfse_tsv_shell = "biom convert -i $LEfse_d/output/feature-table.biom -o $LEfse_tsv --header-key \"taxonomy\" --to-tsv";
    
    print OT8 "source activate $qiime\n";
    print OT8 "$LEfse_collapse_shell\n$LEfse_fre_shell\n$LEfse_export_shell\n$LEfse_tsv_shell\n";
    print OT8 "echo \"step8: successfully LDA Effect Size \\n\" \n";
    close(OT8);
    
    
    #################################
    # step10: Functional prediction: picrust2
    open(OT9, "> $picrust_s") or die "can't open $picrust_s\n";
    my $picrust_out = join("/", $picrust_d, "picrust2_out_pipeline");
    
    print OT9 "source activate $qiime\n";
    my $picrust_out_shell = "qiime tools export --input-path $filter_req --output-path $picrust_d"; 
    print OT9 "$picrust_out_shell\n";
    print OT9 "source deactivate\n";
    print OT9 "source activate $picrust\n";
    my $picrust_final_shell = "picrust2_pipeline.py -s $picrust_d/dna-sequences.fasta -i $LEfse_d/output/feature-table.biom -o $picrust_out  -p 30";
    print OT9 "$picrust_final_shell\n";
    print OT9 "echo \"step9: successfully Functional prediction \\n\" \n";
    close(OT9);
    
    ###################################################
    # combine all scripts
    open(OT, "> $out") or die "can't open $out\n";
    print OT "sh $qza_s\nsh $primer_s\nsh $ASV_s\nsh $filter_s\nsh $taxo_s\n";
    print OT "sh $diversity_s\nsh $ANCOM_s\nsh $LEfse_s\nsh $picrust_s\n";
    close(OT);
    
    sub usage{
        print <<USAGE;
    usage:
        perl $0 -f1 <fastq file> -f2 <sample group> -pf <forward primer> -pr <reverse primer> -t <threads> -m <dada2 or deblur> -c <classifier> -d <depth> -co <column> -o <out> 
    options:
        -f1|fastq file       :[essential]. 
        -f2|sample group     :[essential].
        -pf|forward          :[essential].
        -pr|reverse          :[essential].
        -t|threads           :[essential].
        -m|denoise           :[essential] dada2 or deblur. 
        -c|classifier        :[essential].
        -d|depth             :[essential].
        -co|column           :[essential].  
        -o|out               :[essential].
    USAGE
        exit;
    };
    
    sub version {
        print <<VERSION;
        version:    v1.0
        update:     20201228 - 20201228
        author:     zouhua1\@outlook.com
    VERSION
    };
    

    运行

    perl qiime2_v0.pl -f1 pe-33-manifest-trimmed.tsv -f2 sample-metadata.tsv -pf CCTAYGGGRBGCASCAG -pr GGACTACNNGGGTATCTAAT -t 20 -c silva-138-99-nb-classifier.qza -d 60000 -co Group -o Run.all.sh
    

    最终目录结构

    Amplicon_analysis
    ├── RawData-> /data/user/zouhua/rawdata/
    ├── pe-33-manifest-trimmed.tsv
    ├── qiime2_v0.pl
    ├── result
    │   ├── 01.qza
    │   ├── 02.remove_primer
    │   ├── 03.ASV
    │   ├── 04.filter
    │   ├── 05.taxonomy
    │   ├── 06.diversity
    │   ├── 07.ANCOM
    │   ├── 08.LEfse
    │   ├── 09.Picrust2
    │   └── script
    │       ├── Run.s1.import.sh
    │       ├── Run.s2.primer.sh
    │       ├── Run.s3.ASV.sh
    │       ├── Run.s4.filter.sh
    │       ├── Run.s5.taxo.sh
    │       ├── Run.s6.diversity.sh
    │       ├── Run.s7.ANCOM.sh
    │       ├── Run.s8.LEfse.sh
    │       └── Run.s9.picrust.sh
    ├── Run.all.sh
    ├── sample-metadata.tsv
    ├── silva-138-99-nb-classifier.qza -> /data/share/database/qiime2_db/silva/silva-138-99-nb-classifier.qza
    └── work.sh
    

    引用

    参考文章如引起任何侵权问题,可以与我联系,谢谢。

    相关文章

      网友评论

          本文标题:扩增子分析:基于qiime2平台处理raw data的简单per

          本文链接:https://www.haomeiwen.com/subject/bucfoktx.html