python30

作者: rong酱 | 来源:发表于2022-04-28 07:53 被阅读0次
    # -*- coding: utf-8 -*-
    
    import os
    import sys
    
    in1=sys.argv[1]
    ou1=sys.argv[2]
    
    insertdic={}
    with open(in1,'r') as i:
        li=i.readlines()
        for lin in li:
            linc=lin.strip().split("\t")
            chrID=linc[0]
            posID=linc[1]
            keys=str(chrID)+"_"+str(posID)
            insertdic[keys]=linc[2:]
    #print(str(insertdic))
    
    outc=open(ou1,'w')
    with open('pos_genev1.txt','r') as r:
        ri=r.readlines()
        for rin in ri:
            rinc=rin.strip().split("\t")
            rincID=rinc[0]
            rincS=rinc[1]
            rincE=rinc[2]
            for keyi in insertdic.keys():
                keyc=str(keyi).strip().split("_")
                chrkey=keyc[0]
                poskey=keyc[1]
                if str(chrkey) == str(rincID):
                    if int(poskey) >= int(rincS) and int(poskey) <= int(rincE):
                        print(str(keyc))
                        print(str(rinc))
                        genename=rinc[3]
                        outc.write(str(chrkey)+"\t"+str(poskey)+"\t")
                        for addi in insertdic[keyi]:
                            outc.write(str(addi)+"\t")
                        outc.write(str(genename)+"\n")
    outc.close()
    
    #os.system('cat pos_genev1_name.txt | sort -u >%s'%(ou1))
    #os.system('rm pos_genev1_name.txt')
    
    # -*- coding: utf-8 -*-
    
    import os
    import sys
    import re
    
    in1=sys.argv[1]
    #ou1=sys.argv[2]
    
    ouc=open("pos_gene.txt",'w')
    
    dic={}
    with open(in1,'r') as i:
        li=i.readlines()
        for lin in li:
            if not re.match(r'^#',lin):
                linc=lin.strip().split("\t")
                chrID=str(linc[0])
                chrs=int(linc[3]) # start point
                chre=int(linc[4]) # end point
                typeID=str(linc[2])
                if str(typeID) == "gene":
                    annoc=str(linc[-1])
                    annoi=annoc.strip().split(";")
                    for annoil in annoi:
                        if "gene_name" in str(annoil):
                            str_pat=re.compile(r'"(.*)"')
                            str_genename=str(annoil)
                            gene_name=str_pat.findall(str_genename)[0] # get gene name of the point
                            ouc.write(str(chrID)+"\t"+str(chrs)+"\t"+str(chre)+"\t"+str(gene_name)+"\n")
    #           if chrID in dic.keys():
    #               dic[chrID].append([chrs,chre,gene_name])
    #           else:
    #               dic[chrID]=[]
    #               dic[chrID].append([chrs,chre,gene_name])    
    

    相关文章

      网友评论

          本文标题:python30

          本文链接:https://www.haomeiwen.com/subject/nislyrtx.html