#计算突变体中的pan基因
import sys
from itertools import combinations
input1 = open(sys.argv[1],'r') #含有样本名称的文件
input1_line = input1.readlines()
out = open(sys.argv[2],'w')
sample_lst = [] #创建含有样本名的列表
for i in input1_line:
l = i.strip().split('\t')
if l[0] not in sample_lst:
sample_lst.append(l[0])
else:
pass
all_lst = [] #创建含有所有样本可能组合的列表 例如当抽两个时,三个时
for i in range(1,len(sample_lst)+1):
#for i in range(1,3):
lst = list(combinations(sample_lst,i))
all_lst.append(lst)
gene_lst = []
for single_lst in all_lst:
for com in single_lst:
gene_lst = []
for sample in com:
input2 = open(sample,'r')
input2_line = input2.readlines()
for line in input2_line:
gene_line = line.strip().split('\t')
gene = gene_line[0]
#print(gene)
if gene not in gene_lst:
gene_lst.append(gene)
else:
pass
out.write('+'.join(com) + '\t' + str(len(gene_lst)) + '\n')
网友评论