#!/usr/bin/env python
# coding=utf-8
import argparse
def get_parser():
'''
Using argpaser to parse the input parameters
'''
parser = argparse.ArgumentParser(description='Scattering genome sequence to bins with assigned length')
parser.add_argument('-i', '--input', type=str, nargs=1,help='input the reference genome file with fasta format')
parser.add_argument('-b', '--binsize', type=int, nargs=1, help='bin size')
return parser
def binGenome(fasta, binsize):
'''Bining the reference to segments head to end with equal length'''
sequence = {}
with open(fasta) as fa:
for line in fa:
if line.startswith(">"):
ac = line.strip().split(">")[1]
seq = ""
else:
seq += line.strip()
sequence[ac] = seq
for ac, seq in sequence.items():
seqlen = len(seq)
for bin in range(0, seqlen, binsize):
start, end = bin, bin + binsize
if bin+binsize > seqlen:
end = seqlen
print(">%s_%d-%d"%(ac,start,end))
print(seq[start:end])
网友评论