import os.path
sig_dir = "D://project//gff//out//"
w_dir = "D://project//gff//cgc_table5//"
# fw= open(w_dir,"w")
cgc_number = 0
yuzhi = 4
for parent,dirnames,filenames in os.walk(sig_dir):
for filename in filenames:
gen = filename
f = open(os.path.join(parent,filename))
h = f.readlines()
f.close()
h_len = len(h)
prev_reseq = h[0].split(" ")[0]
list=[]
#wirte cluster
fw = open(w_dir+gen,"w")
for i in range(h_len):
if h[i]=='\n':
continue
line = h[i].split(" ")
if prev_reseq != line[0]:
#write the cluster
cluster=[]
cazy = 0
tc = 0
tf = 0
list_len = len(list)
cgc_no = 1
#wirte the cluster
for k in range(list_len):
cluster.append(list[k])
#print cluster[-1]
if list[k][-1] <= yuzhi :
if list[k][1]=="TC-DB":
tc = tc+1
elif list[k][1]=="TF":
tf = tf+1
elif list[k][1]=="CAZyme":
cazy = cazy +1
else :
if (cazy > 0 and tc > 0 and tf >0):
# if (cazy>0 and tc >0) or (cazy>0 and tf>0):
cnt = 0
for j in range(cluster[-1][0]+1-cluster[0][0]):
if h[cluster[0][0] + j].split()[-1] == "null":
for p in range(4):
fw.write("null" + " ")
else:
for p in range(4):
fw.write(str(cluster[cnt][p]) + " ")
cnt = cnt + 1
fw.write(prev_reseq+"-"+"CGC"+str(cgc_no)+" "+h[cluster[0][0]+j])
fw.write("+++\n")
cazy = 0
tc = 0
tf = 0
cgc_no = cgc_no +1
cgc_number = cgc_number+1
cluster=[] #delete the cluster
#write the left cluster
if len(cluster)>0:
if (cazy > 0 and tc > 0 and tf > 0):
# if (cazy > 0 and tc > 0) or (cazy > 0 and tf > 0):
cnt = 0
for j in range(cluster[-1][0] + 1 - cluster[0][0]):
if h[cluster[0][0] + j].split()[-1] == "null":
for p in range(4):
fw.write("null" + " ")
else:
for p in range(4):
fw.write(str(cluster[cnt][p]) + " ")
cnt = cnt + 1
fw.write(prev_reseq + "-" + "CGC" + str(cgc_no) + " " + h[cluster[0][0] + j])
fw.write("+++\n")
cazy = 0
tc = 0
tf = 0
cgc_no = cgc_no + 1
cgc_number = cgc_number + 1
cluster = [] # delete the cluster
# process the new refseq
list = []
if line[6] != "null":
sig = line[6].split('|')[0]
if len(list) == 0:
dis_prev = 0
else:
dis_prev = i - list[-1][0]
list[-1][3] = dis_prev
dis_next = 0
list.append([i, sig, dis_prev, dis_next])
prev_reseq = line[0]
# write left cluster
if len(list)>0:
cluster = []
cazy = 0
tc = 0
tf = 0
list_len = len(list)
cgc_no=1
# wirte the cluster
for k in range(list_len):
cluster.append(list[k])
if list[k][-1] <= yuzhi :
if list[k][1] == "TC-DB":
tc = tc + 1
elif list[k][1] == "TF":
tf = tf + 1
elif list[k][1] == "CAZyme":
cazy = cazy + 1
else:
if (cazy > 0 and tc > 0 and tf > 0):
# if (cazy > 0 and tc > 0) or (cazy > 0 and tf > 0):
cnt = 0
for j in range(cluster[-1][0] + 1 - cluster[0][0]):
if h[cluster[0][0] + j].split()[-1] == "null":
for p in range(4):
fw.write("null" + " ")
else:
for p in range(4):
fw.write(str(cluster[cnt][p]) + " ")
cnt = cnt + 1
fw.write(prev_reseq + "-" + "CGC" + str(cgc_no) + " " + h[cluster[0][0] + j])
fw.write("+++\n")
cazy = 0
tc = 0
tf = 0
cgc_no = cgc_no + 1
cgc_number = cgc_number + 1
cluster = [] # delete the cluster
# write the left cluster
if len(cluster) > 0:
if (cazy > 0 and tc > 0 and tf > 0):
# if (cazy > 0 and tc > 0) or (cazy > 0 and tf > 0):
cnt = 0
for j in range(cluster[-1][0] + 1 - cluster[0][0]):
if h[cluster[0][0] + j].split()[-1] == "null":
for p in range(4):
fw.write("null" + " ")
else:
for p in range(4):
fw.write(str(cluster[cnt][p]) + " ")
cnt = cnt + 1
fw.write(prev_reseq + "-" + "CGC" + str(cgc_no) + " " + h[cluster[0][0] + j])
cazy = 0
tc = 0
tf = 0
cgc_no = cgc_no + 1
cgc_number = cgc_number + 1
cluster = [] # delete the cluster
# process the new refseq
list = []
print cgc_number
网友评论