问题:用bam2fastx
将bam文件转成fastq格式,这里面学到的知识:
- 学会写parameter_meta,介绍输入输出,它是可以帮忙生成帮助文档
- 同一个workflow根据不同的输入调用不同的task
解决办法:
来源于bioWDL的一个例子
- bam2fastx.wdl
- 先显示
task
- 先显示
version 1.0
task Bam2Fasta {
input {
## 至少有一个文件
Array[File]+ bam
Array[File]+ bamIndex
String outputPrefix
Int compressionLevel = 1
Boolean splitByBarcode = false
String preCommand = ''
String? seqIdPrefix
Int timeMinutes = 15
}
command <<<
set -e
mkdir -p "$(dirname ~{outputPrefix})"
cd "$(dirname ~{outputPrefix})"
## 链接文件及处理输入
bamFiles=""
for bamFile in ~{sep=" " bam}
do
ln $bamFile ./
bamFiles=$bamFiles" $(basename $bamFile)"
done
for index in ~{sep=" " bamIndex}
do
ln $index ./
done
~{preCommand}
bam2fasta \
--output ~{outputPrefix} \
-c ~{compressionLevel} \
~{true="--split-barcodes" false="" splitByBarcode} \
~{"--seqid-prefix " + seqIdPrefix} \
$bamFiles
>>>
output {
File fastaFile = outputPrefix + ".fasta.gz"
}
}
task Bam2Fastq {
input {
Array[File]+ bam
Array[File]+ bamIndex
String outputPrefix
Int compressionLevel = 1
Boolean splitByBarcode = false
String preCommand = ''
String? seqIdPrefix
}
command <<<
set -e
mkdir -p "$(dirname ~{outputPrefix})"
cd "$(dirname ~{outputPrefix})"
## 链接文件及处理输入
bamFiles=""
for bamFile in ~{sep=" " bam}
do
ln $bamFile ./
bamFiles=$bamFiles" $(basename $bamFile)"
done
for index in ~{sep=" " bamIndex}
do
ln $index ./
done
~{preCommand}
bam2fastq \
--output ~{outputPrefix} \
-c ~{compressionLevel} \
~{true="--split-barcodes" false="" splitByBarcode} \
~{"--seqid-prefix " + seqIdPrefix} \
$bamFiles
>>>
output {
File fastqFile = outputPrefix + ".fastq.gz"
}
}
- 再显示
workflow
workflow wf_bam2fastx {
input {
Boolean bam2fastq_exec = true
Boolean bam2fasta_exec = false
Array[File]+ bam
Array[File]+ bamIndex
String outputPrefix
Int compressionLevel = 1
Boolean splitByBarcode = false
String preCommand = ''
String? seqIdPrefix
}
if (bam2fastq_exec) {
call Bam2Fastq {
input:
bam = bam,
bamIndex = bamIndex,
outputPrefix = outputPrefix,
compressionLevel = compressionLevel,
splitByBarcode = splitByBarcode,
seqIdPrefix = seqIdPrefix,
preCommand = preCommand
}
}
if (bam2fasta_exec) {
call Bam2Fasta {
input:
bam = bam,
bamIndex = bamIndex,
outputPrefix = outputPrefix,
compressionLevel = compressionLevel,
splitByBarcode = splitByBarcode,
seqIdPrefix = seqIdPrefix,
preCommand = preCommand
}
}
parameter_meta {
# inputs
bam: {description: "The input pacbio bam file(s).", category: "required"}
bamIndex: {description: "The .pbi index for the input file(s).", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
compressionLevel: {description: "Gzip compression level [1-9].", category: "advanced"}
splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"}
seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"}
# outpus
fastaFile: {description: "The fasta output file."}
}
}
- 输入
json
{
"wf_bam2fastx.preCommand": "unset PYTHONPATH && export PATH=\"/your_path/bin:$PATH\" && source activate /your_path/envs/ccs_env",
"wf_bam2fastx.compressionLevel": 1,
"wf_bam2fastx.bam": ["/your_path/PM.1029.ccs.bam", "/your_path/PM.0033.ccs.bam"],
"wf_bam2fastx.splitByBarcode": false,
"wf_bam2fastx.outputPrefix": "/your_path/bam2fastx_out/CCS",
"wf_bam2fastx.bamIndex": ["/your_path/PM.1029.ccs.bam.pbi", "/your_path/PM.0033.ccs.bam.pbi"],
"wf_bam2fastx.bam2fastq_exec": true,
"wf_bam2fastx.bam2fasta_exec": false
}
- 最后执行及输出
java -jar cromwell-57.jar run bam2fastx.wdl --inputs bam2fastx.wdl.json
{
"wf_bam2fastx.Bam2Fastq.fastqFile": "/your_path/bam2fastx_out/CCS.fastq.gz",
"wf_bam2fastx.Bam2Fasta.fastaFile": null
}
[2021-03-31 16:03:40,49] [info] WorkflowManagerActor WorkflowActor-6f9f888d-fefa-4c20-8199-820645949ec0 is in a terminal state: WorkflowSucceededState
[2021-03-31 16:03:45,36] [info] SingleWorkflowRunnerActor workflow finished with status 'Succeeded'.
{
"outputs": {
"wf_bam2fastx.Bam2Fasta.fastaFile": null,
"wf_bam2fastx.Bam2Fastq.fastqFile": "/your_path/bam2fastx_out/CCS.fastq.gz"
},
"id": "6f9f888d-fefa-4c20-8199-820645949ec0"
}
网友评论