这是我刚开始学习Python和生物信息学时写的一个练手脚本,写完之后成就感满满,结果不久就发现了一个处理序列的好工具seqtk,感觉这个脚本真是没有什么卵用。
import random
output_file = open(r"/its1/PROJECT/test/100000.fq","w")
seqs = []
with open(r"/its1/PROJECT/test/C1.fq","r") as input_file:
seqs = input_file.readlines()
num_lines = len(seqs)
total_records = num_lines / 4
total_records = int(total_records)
list = range(1, (total_records+1))
slice = random.sample(list, 100000)
for n in slice:
m = n - 1
m = m * 4
seq = seqs[m]
output_file.write(seq.strip()+"\n")
seq = seqs[m+1]
output_file.write(seq.strip()+"\n")
seq = seqs[m+2]
output_file.write(seq.strip()+"\n")
seq = seqs[m+3]
output_file.write(seq.strip()+"\n")
网友评论