WDL-第5学

作者: MR来了 | 来源:发表于2021-03-31 16:15 被阅读0次

    问题:用bam2fastx将bam文件转成fastq格式,这里面学到的知识:

    • 学会写parameter_meta,介绍输入输出,它是可以帮忙生成帮助文档
    • 同一个workflow根据不同的输入调用不同的task

    解决办法:

    来源于bioWDL的一个例子

    • bam2fastx.wdl
      • 先显示task
    version 1.0
    task Bam2Fasta {
        input {
            ## 至少有一个文件
            Array[File]+ bam 
            Array[File]+ bamIndex
            String outputPrefix
            Int compressionLevel = 1
            Boolean splitByBarcode = false
            String preCommand = ''
    
            String? seqIdPrefix
            Int timeMinutes = 15
        }
        command <<<
            set -e 
            mkdir -p "$(dirname ~{outputPrefix})"
            cd "$(dirname ~{outputPrefix})"
            ## 链接文件及处理输入
            bamFiles=""
            for bamFile in ~{sep=" " bam}
            do
                ln $bamFile ./
                bamFiles=$bamFiles" $(basename $bamFile)"
            done
    
            for index in ~{sep=" " bamIndex}
            do
                ln $index ./
            done
            ~{preCommand}
            bam2fasta \
            --output ~{outputPrefix} \
            -c ~{compressionLevel} \
            ~{true="--split-barcodes" false="" splitByBarcode} \
            ~{"--seqid-prefix " + seqIdPrefix} \
            $bamFiles
        >>>
        output {
            File fastaFile = outputPrefix + ".fasta.gz"
        }
    }
    task Bam2Fastq {
        input {
            Array[File]+ bam
            Array[File]+ bamIndex
            String outputPrefix
            Int compressionLevel = 1
            Boolean splitByBarcode = false
            String preCommand = ''
            String? seqIdPrefix
        }
        command <<<
            set -e
            mkdir -p "$(dirname ~{outputPrefix})"
            cd "$(dirname ~{outputPrefix})"
            ## 链接文件及处理输入
            bamFiles=""
            for bamFile in ~{sep=" " bam}
            do
                ln $bamFile ./
                bamFiles=$bamFiles" $(basename $bamFile)"
            done
            for index in ~{sep=" " bamIndex}
            do
                ln $index ./
            done
            ~{preCommand}
            bam2fastq \
            --output ~{outputPrefix} \
            -c ~{compressionLevel} \
            ~{true="--split-barcodes" false="" splitByBarcode} \
            ~{"--seqid-prefix " + seqIdPrefix} \
            $bamFiles       
        >>>
        output {
            File fastqFile = outputPrefix + ".fastq.gz"
        }
    }
    
    • 再显示workflow
    workflow wf_bam2fastx {
        input {
            Boolean bam2fastq_exec = true
            Boolean bam2fasta_exec = false
            Array[File]+ bam
            Array[File]+ bamIndex
            String outputPrefix
            Int compressionLevel = 1
            Boolean splitByBarcode = false
            String preCommand = ''
            String? seqIdPrefix
        }
        if (bam2fastq_exec) {
            call Bam2Fastq {
                input:
                    bam = bam,
                    bamIndex = bamIndex,
                    outputPrefix = outputPrefix,
                    compressionLevel = compressionLevel,
                    splitByBarcode = splitByBarcode,
                    seqIdPrefix = seqIdPrefix,
                    preCommand = preCommand
            }
        }
        if (bam2fasta_exec) {
            call Bam2Fasta {
                input:
                    bam = bam,
                    bamIndex = bamIndex,
                    outputPrefix = outputPrefix,
                    compressionLevel = compressionLevel,
                    splitByBarcode = splitByBarcode,
                    seqIdPrefix = seqIdPrefix,  
                    preCommand = preCommand         
            }
        }
        parameter_meta {
            # inputs
            bam: {description: "The input pacbio bam file(s).", category: "required"}
            bamIndex: {description: "The .pbi index for the input file(s).", category: "required"}
            outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
            compressionLevel: {description: "Gzip compression level [1-9].", category: "advanced"}
            splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"}
            seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"}
            # outpus
            fastaFile: {description: "The fasta output file."}
        }
    }
    
    • 输入json
    {
      "wf_bam2fastx.preCommand": "unset PYTHONPATH && export PATH=\"/your_path/bin:$PATH\" && source activate /your_path/envs/ccs_env",
      "wf_bam2fastx.compressionLevel": 1,
      "wf_bam2fastx.bam": ["/your_path/PM.1029.ccs.bam", "/your_path/PM.0033.ccs.bam"],
      "wf_bam2fastx.splitByBarcode": false,
      "wf_bam2fastx.outputPrefix": "/your_path/bam2fastx_out/CCS",
      "wf_bam2fastx.bamIndex": ["/your_path/PM.1029.ccs.bam.pbi", "/your_path/PM.0033.ccs.bam.pbi"],
      "wf_bam2fastx.bam2fastq_exec": true,
      "wf_bam2fastx.bam2fasta_exec": false
    }
    
    • 最后执行及输出
    java -jar cromwell-57.jar run bam2fastx.wdl --inputs bam2fastx.wdl.json
    {
      "wf_bam2fastx.Bam2Fastq.fastqFile": "/your_path/bam2fastx_out/CCS.fastq.gz",
      "wf_bam2fastx.Bam2Fasta.fastaFile": null
    }
    [2021-03-31 16:03:40,49] [info] WorkflowManagerActor WorkflowActor-6f9f888d-fefa-4c20-8199-820645949ec0 is in a terminal state: WorkflowSucceededState
    [2021-03-31 16:03:45,36] [info] SingleWorkflowRunnerActor workflow finished with status 'Succeeded'.
    {
      "outputs": {
        "wf_bam2fastx.Bam2Fasta.fastaFile": null,
        "wf_bam2fastx.Bam2Fastq.fastqFile": "/your_path/bam2fastx_out/CCS.fastq.gz"
      },
      "id": "6f9f888d-fefa-4c20-8199-820645949ec0"
    }
    
    

    相关文章

      网友评论

        本文标题:WDL-第5学

        本文链接:https://www.haomeiwen.com/subject/iajohltx.html