借鉴实例 1
# 定义命令行解析函数, 返回值为对象
#!/usr/bin/env python3
import argparse
def parse_arguments() -> object:
top_p = argparse.ArgumentParser(
description=__doc__.split("\n\n")[0],
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
epilog="seqrepo " + __version__ + ". See https://github.com/biocommons/biocommons.seqrepo for more information")
top_p.add_argument("--dry-run", "-n", default=False, action="store_true")
top_p.add_argument("--remote-host", default="dl.biocommons.org", help="rsync server host")
top_p.add_argument("--root-directory", "-r", default=SEQREPO_ROOT_DIR, help="seqrepo root directory (SEQREPO_ROOT_DI
")
top_p.add_argument("--rsync-exe", default="/usr/bin/rsync", help="path to rsync executable")
top_p.add_argument("--verbose", "-v", action="count", default=0, help="be verbose; multiple accepted")
top_p.add_argument("--version", action="version", version=__version__)
# dest and required bits are to work around a bug in the Python 3 version of argparse
# when no subcommands are provided
# https://stackoverflow.com/questions/22990977/why-does-this-argparse-code-behave-differently-between-python-2-and-3
# http://bugs.python.org/issue9253#msg186387
subparsers = top_p.add_subparsers(title="subcommands", dest="_subcommands")
subparsers.required = True
# add-assembly-names
ap = subparsers.add_parser(
"add-assembly-names", help="add assembly aliases (from bioutils.assemblies) to existing sequences")
ap.set_defaults(func=add_assembly_names)
ap.add_argument(
"--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a sna
pshot)")
ap.add_argument(
"--partial-load", "-p", default=False, action="store_true", help="assign assembly aliases even if some sequences a
re missing")
ap.add_argument(
"--reload-all", "-r", default=False, action="store_true", help="reload all assemblies, not just missing ones")
# export
ap = subparsers.add_parser("export", help="export sequences")
ap.set_defaults(func=export)
ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name")
# fetch-load
ap = subparsers.add_parser("fetch-load", help="fetch remote sequences by accession and load them (low-throughput!)")
ap.set_defaults(func=fetch_load)
ap.add_argument(
"--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a sna
pshot)")
ap.add_argument(
"accessions",
nargs="+",
help="accessions (NCBI or Ensembl)", )
ap.add_argument(
"--namespace",
"-n",
required=True,
help="namespace name (e.g., NCBI, Ensembl, LRG)", )
# init
ap = subparsers.add_parser("init", help="initialize seqrepo directory")
ap.set_defaults(func=init)
ap.add_argument(
"--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a sna
pshot)")
# list-local-instances
ap = subparsers.add_parser("list-local-instances", help="list local seqrepo instances")
ap.set_defaults(func=list_local_instances)
# list-remote-instances
ap = subparsers.add_parser("list-remote-instances", help="list remote seqrepo instances")
ap.set_defaults(func=list_remote_instances)
# load
ap = subparsers.add_parser("load", help="load a single fasta file")
ap.set_defaults(func=load)
ap.add_argument(
"--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable (i.e., not a sna
pshot)")
ap.add_argument(
"fasta_files",
nargs="+",
help="fasta files to load (compressed okay)", )
ap.add_argument(
"--namespace",
"-n",
required=True,
help="namespace name (e.g., NCBI, Ensembl, LRG)", )
# pull
ap = subparsers.add_parser("pull", help="pull incremental update from seqrepo mirror")
ap.set_defaults(func=pull)
ap.add_argument("--instance-name", "-i", default=None, help="instance name")
ap.add_argument("--update-latest", "-l", default=False, action="store_true", help="set latest symlink to point to this
instance")
# show-status
ap = subparsers.add_parser("show-status", help="show seqrepo status")
ap.set_defaults(func=show_status)
ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name")
# snapshot
ap = subparsers.add_parser("snapshot", help="create a new read-only seqrepo snapshot")
ap.set_defaults(func=snapshot)
ap.add_argument(
"--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
ap.add_argument("--destination-name", "-d",
default=datetime.datetime.utcnow().strftime("%F"),
help="destination directory name (must not already exist)")
# start-shell
ap = subparsers.add_parser("start-shell", help="start interactive shell with initialized seqrepo")
ap.set_defaults(func=start_shell)
ap.add_argument("--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RO, help="instance name")
# upgrade
ap = subparsers.add_parser("upgrade", help="upgrade seqrepo database and directory")
ap.set_defaults(func=upgrade)
ap.add_argument(
"--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
# update digests
ap = subparsers.add_parser("update-digests", help="update computed digests in place")
ap.set_defaults(func=update_digests)
ap.add_argument(
"--instance-name", "-i", default=DEFAULT_INSTANCE_NAME_RW, help="instance name; must be writeable")
# update latest (symlink)
ap = subparsers.add_parser("update-latest", help="create symlink `latest` to newest seqrepo instance")
ap.set_defaults(func=update_latest)
opts = top_p.parse_args()
return opts
效果
$seqrepo -h
usage: seqrepo [-h] [--dry-run] [--remote-host REMOTE_HOST]
[--root-directory ROOT_DIRECTORY] [--rsync-exe RSYNC_EXE]
[--verbose] [--version]
{add-assembly-names,export,fetch-load,init,list-local-instances,list-remote-instances,load,pull,show-status,snapshot,start-shell,upgrade,update-digests,update-latest}
...
command line interface to a local SeqRepo repository
optional arguments:
-h, --help show this help message and exit
--dry-run, -n
--remote-host REMOTE_HOST
rsync server host (default: dl.biocommons.org)
--root-directory ROOT_DIRECTORY, -r ROOT_DIRECTORY
seqrepo root directory (SEQREPO_ROOT_DIR) (default:
/usr/local/share/seqrepo)
--rsync-exe RSYNC_EXE
path to rsync executable (default: /usr/bin/rsync)
--verbose, -v be verbose; multiple accepted (default: 0)
--version show program's version number and exit
subcommands:
{add-assembly-names,export,fetch-load,init,list-local-instances,list-remote-instances,load,pull,show-status,snapshot,start-shell,upgrade,update-digests,update-latest}
add-assembly-names add assembly aliases (from bioutils.assemblies) to
existing sequences
export export sequences
fetch-load fetch remote sequences by accession and load them
(low-throughput!)
init initialize seqrepo directory
list-local-instances
list local seqrepo instances
list-remote-instances
list remote seqrepo instances
load load a single fasta file
pull pull incremental update from seqrepo mirror
show-status show seqrepo status
snapshot create a new read-only seqrepo snapshot
start-shell start interactive shell with initialized seqrepo
upgrade upgrade seqrepo database and directory
update-digests update computed digests in place
update-latest create symlink `latest` to newest seqrepo instance
seqrepo 0.5.2. See https://github.com/biocommons/biocommons.seqrepo for more
information
使用方法拆解
实例 2
def _parse_args():
"""
This function parses the arguments provided by the user
:return: a dictionary having a key for each arguments
:rtype: dict
"""
# small hack to get sub-parameters with dash and pass it to Augustus
for i, arg in enumerate(sys.argv):
if (arg[0] == '-' or arg[0] == '--') and (sys.argv[i - 1] == '-a' or sys.argv[i - 1] == '--augustus'):
sys.argv[i] = ' ' + arg
parser = argparse.ArgumentParser(
description='Welcome to BUSCO %s: the Benchmarking Universal Single-Copy Ortholog assessment tool.\n'
'For more detailed usage information, please review the README file provided with '
'this distribution and the BUSCO user guide.' % BuscoConfig.VERSION,
usage='python BUSCO.py -i [SEQUENCE_FILE] -l [LINEAGE] -o [OUTPUT_NAME] -m [MODE] [OTHER OPTIONS]',
formatter_class=RawTextHelpFormatter, add_help=False)
optional = parser.add_argument_group('optional arguments')
optional.add_argument(
'-i', '--in', dest='in', required=False, metavar='FASTA FILE', help='Input sequence file in FASTA format. '
'Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set.')
optional.add_argument(
'-c', '--cpu', dest='cpu', required=False, metavar='N', help='Specify the number (N=integer) '
'of threads/cores to use.')
optional.add_argument(
'-o', '--out', dest='out', required=False, metavar='OUTPUT',
help='Give your analysis run a recognisable short name. '
'Output folders and files will be labelled with this name. WARNING: do not provide a path')
optional.add_argument(
'-e', '--evalue', dest='evalue', required=False, metavar='N', type=float,
help='E-value cutoff for BLAST searches. '
'Allowed formats, 0.001 or 1e-03 (Default: %.0e)' % BuscoConfig.DEFAULT_ARGS_VALUES['evalue'])
optional.add_argument(
'-m', '--mode', dest='mode', required=False, metavar='MODE',
help='Specify which BUSCO analysis mode to run.\n'
'There are three valid modes:\n- geno or genome, for genome assemblies (DNA)\n- tran or '
'transcriptome, '
'for transcriptome assemblies (DNA)\n- prot or proteins, for annotated gene sets (protein)')
optional.add_argument(
'-l', '--lineage_path', dest='lineage_path', required=False, metavar='LINEAGE',
help='Specify location of the BUSCO lineage data to be used.\n'
'Visit http://busco.ezlab.org for available lineages.')
optional.add_argument(
'-f', '--force', action='store_true', required=False, dest='force',
help='Force rewriting of existing files. '
'Must be used when output files with the provided name already exist.')
optional.add_argument(
'-r', '--restart', action='store_true', required=False, dest='restart',
help='Restart an uncompleted run. Not available for the protein mode')
optional.add_argument(
'-sp', '--species', required=False, dest='species', metavar='SPECIES',
help='Name of existing Augustus species gene finding parameters. '
'See Augustus documentation for available options.')
optional.add_argument('--augustus_parameters', required=False, dest='augustus_parameters',
help='Additional parameters for the fine-tuning of Augustus run. '
'For the species, do not use this option.\n'
'Use single quotes as follow: \'--param1=1 --param2=2\', '
'see Augustus documentation for available options.')
optional.add_argument(
'-t', '--tmp_path', metavar='PATH', required=False, dest='tmp_path',
help='Where to store temporary files (Default: %s)' % BuscoConfig.DEFAULT_ARGS_VALUES['tmp_path'])
optional.add_argument(
'--limit', dest='limit', metavar='REGION_LIMIT', required=False,
type=int, help='How many candidate regions (contig or transcript) to consider per BUSCO (default: %s)'
% str(BuscoConfig.DEFAULT_ARGS_VALUES['limit']))
optional.add_argument(
'--long', action='store_true', required=False, dest='long',
help='Optimization mode Augustus '
'self-training (Default: Off) adds considerably to the run time, '
'but can improve results for some non-model organisms')
optional.add_argument(
'-q', '--quiet', dest='quiet', required=False, help='Disable the info logs, displays only errors',
action="store_true")
optional.add_argument(
'-z', '--tarzip', dest='tarzip', required=False, help='Tarzip the output folders likely to '
'contain thousands of files',
action="store_true")
optional.add_argument(
'--blast_single_core', dest='blast_single_core', required=False,
help='Force tblastn to run on a single core and ignore the --cpu argument for this step only. '
'Useful if inconsistencies when using multiple threads are noticed',
action="store_true")
optional.add_argument('-v', '--version', action='version', help="Show this version and exit",
version='BUSCO %s' % BuscoConfig.VERSION)
optional.add_argument('-h', '--help', action="help", help="Show this help message and exit")
return vars(parser.parse_args())
效果
$run_BUSCO.py -h
usage: python BUSCO.py -i [SEQUENCE_FILE] -l [LINEAGE] -o [OUTPUT_NAME] -m [MODE] [OTHER OPTIONS]
Welcome to BUSCO 3.0.2: the Benchmarking Universal Single-Copy Ortholog assessment tool.
For more detailed usage information, please review the README file provided with this distribution and the BUSCO user guide.
optional arguments:
-i FASTA FILE, --in FASTA FILE
Input sequence file in FASTA format. Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set.
-c N, --cpu N Specify the number (N=integer) of threads/cores to use.
-o OUTPUT, --out OUTPUT
Give your analysis run a recognisable short name. Output folders and files will be labelled with this name. WARNING: do not provide a path
-e N, --evalue N E-value cutoff for BLAST searches. Allowed formats, 0.001 or 1e-03 (Default: 1e-03)
-m MODE, --mode MODE Specify which BUSCO analysis mode to run.
There are three valid modes:
- geno or genome, for genome assemblies (DNA)
- tran or transcriptome, for transcriptome assemblies (DNA)
- prot or proteins, for annotated gene sets (protein)
-l LINEAGE, --lineage_path LINEAGE
Specify location of the BUSCO lineage data to be used.
Visit http://busco.ezlab.org for available lineages.
-f, --force Force rewriting of existing files. Must be used when output files with the provided name already exist.
-r, --restart Restart an uncompleted run. Not available for the protein mode
-sp SPECIES, --species SPECIES
Name of existing Augustus species gene finding parameters. See Augustus documentation for available options.
--augustus_parameters AUGUSTUS_PARAMETERS
Additional parameters for the fine-tuning of Augustus run. For the species, do not use this option.
Use single quotes as follow: '--param1=1 --param2=2', see Augustus documentation for available options.
-t PATH, --tmp_path PATH
Where to store temporary files (Default: ./tmp/)
--limit REGION_LIMIT How many candidate regions (contig or transcript) to consider per BUSCO (default: 3)
--long Optimization mode Augustus self-training (Default: Off) adds considerably to the run time, but can improve results for some non-model organisms
-q, --quiet Disable the info logs, displays only errors
-z, --tarzip Tarzip the output folders likely to contain thousands of files
--blast_single_core Force tblastn to run on a single core and ignore the --cpu argument for this step only. Useful if inconsistencies when using multiple threads are noticed
-v, --version Show this version and exit
-h, --help Show this help message and exit
拆解
argparse 官方文档
https://docs.python.org/zh-cn/3/library/argparse.html#argumentparser-objects
网友评论