美文网首页
cgc_create

cgc_create

作者: keaidelele | 来源:发表于2017-08-07 21:19 被阅读4次
    import os.path
    sig_dir = "D://project//gff//out//"
    w_dir = "D://project//gff//cgc_table5//"
    # fw= open(w_dir,"w")
    
    cgc_number = 0
    yuzhi = 4
    for parent,dirnames,filenames in os.walk(sig_dir):
        for filename in filenames:
            gen = filename
            f = open(os.path.join(parent,filename))
            h = f.readlines()
            f.close()
            h_len = len(h)
            prev_reseq = h[0].split("   ")[0]
            list=[]
            #wirte cluster
            fw = open(w_dir+gen,"w")
            for i in range(h_len):
                if h[i]=='\n':
                    continue
                line = h[i].split(" ")
                if prev_reseq != line[0]:
                    #write the cluster
                    cluster=[]
                    cazy = 0
                    tc = 0
                    tf = 0
                    list_len = len(list)
                    cgc_no = 1
                    #wirte the cluster
                    for k in range(list_len):
                        cluster.append(list[k])
                        #print cluster[-1]
                        if list[k][-1] <= yuzhi :
                            if list[k][1]=="TC-DB":
                                tc = tc+1
                            elif list[k][1]=="TF":
                                tf = tf+1
                            elif list[k][1]=="CAZyme":
                                cazy = cazy +1
                        else :
                            if (cazy > 0 and tc > 0 and tf >0):
                            # if (cazy>0 and tc >0) or (cazy>0 and tf>0):
                                cnt = 0
                                for j in range(cluster[-1][0]+1-cluster[0][0]):
                                    if h[cluster[0][0] + j].split()[-1] == "null":
                                        for p in range(4):
                                            fw.write("null" + " ")
                                    else:
                                        for p in range(4):
                                            fw.write(str(cluster[cnt][p]) + "   ")
                                        cnt = cnt + 1
                                    fw.write(prev_reseq+"-"+"CGC"+str(cgc_no)+" "+h[cluster[0][0]+j])
                                fw.write("+++\n")
                                cazy = 0
                                tc = 0
                                tf = 0
                                cgc_no = cgc_no +1
                                cgc_number = cgc_number+1
                            cluster=[] #delete the cluster
                    #write the left cluster
                    if len(cluster)>0:
                        if (cazy > 0 and tc > 0 and tf > 0):
                        # if (cazy > 0 and tc > 0) or (cazy > 0 and tf > 0):
                            cnt = 0
                            for j in range(cluster[-1][0] + 1 - cluster[0][0]):
                                if h[cluster[0][0] + j].split()[-1] == "null":
                                    for p in range(4):
                                        fw.write("null" + " ")
                                else:
                                    for p in range(4):
                                        fw.write(str(cluster[cnt][p]) + "   ")
                                    cnt = cnt + 1
                                fw.write(prev_reseq + "-" + "CGC" + str(cgc_no) + " " + h[cluster[0][0] + j])
                            fw.write("+++\n")
                            cazy = 0
                            tc = 0
                            tf = 0
                            cgc_no = cgc_no + 1
                            cgc_number = cgc_number + 1
                        cluster = []  # delete the cluster
                    # process the new refseq
                    list = []
                if line[6] != "null":
                    sig = line[6].split('|')[0]
                    if len(list) == 0:
                        dis_prev = 0
                    else:
                        dis_prev = i - list[-1][0]
                        list[-1][3] = dis_prev
                    dis_next = 0
                    list.append([i, sig, dis_prev, dis_next])
                    prev_reseq = line[0]
            # write left cluster
            if len(list)>0:
                cluster = []
                cazy = 0
                tc = 0
                tf = 0
                list_len = len(list)
                cgc_no=1
                # wirte the cluster
                for k in range(list_len):
                    cluster.append(list[k])
                    if list[k][-1] <= yuzhi :
                        if list[k][1] == "TC-DB":
                            tc = tc + 1
                        elif list[k][1] == "TF":
                            tf = tf + 1
                        elif list[k][1] == "CAZyme":
                            cazy = cazy + 1
                    else:
                        if (cazy > 0 and tc > 0 and tf > 0):
                        # if (cazy > 0 and tc > 0) or (cazy > 0 and tf > 0):
                            cnt = 0
                            for j in range(cluster[-1][0] + 1 - cluster[0][0]):
                                if h[cluster[0][0] + j].split()[-1] == "null":
                                    for p in range(4):
                                        fw.write("null" + " ")
                                else:
                                    for p in range(4):
                                        fw.write(str(cluster[cnt][p]) + "   ")
                                    cnt = cnt + 1
                                fw.write(prev_reseq + "-" + "CGC" + str(cgc_no) + " " + h[cluster[0][0] + j])
                            fw.write("+++\n")
                            cazy = 0
                            tc = 0
                            tf = 0
                            cgc_no = cgc_no + 1
                            cgc_number = cgc_number + 1
    
                        cluster = []  # delete the cluster
                # write the left cluster
                if len(cluster) > 0:
                    if (cazy > 0 and tc > 0 and tf > 0):
                    # if (cazy > 0 and tc > 0) or (cazy > 0 and tf > 0):
                        cnt = 0
                        for j in range(cluster[-1][0] + 1 - cluster[0][0]):
                            if h[cluster[0][0] + j].split()[-1] == "null":
                                for p in range(4):
                                    fw.write("null" + " ")
                            else:
                                for p in range(4):
                                    fw.write(str(cluster[cnt][p]) + "   ")
                                cnt = cnt + 1
                            fw.write(prev_reseq + "-" + "CGC" + str(cgc_no) + " " + h[cluster[0][0] + j])
                        cazy = 0
                        tc = 0
                        tf = 0
                        cgc_no = cgc_no + 1
                        cgc_number = cgc_number + 1
                    cluster = []  # delete the cluster
                # process the new refseq
                list = []
    print cgc_number
    

    相关文章

      网友评论

          本文标题:cgc_create

          本文链接:https://www.haomeiwen.com/subject/ilfzlxtx.html