# -*- coding: utf-8 -*-
import os
import sys
in1=sys.argv[1]
ou1=sys.argv[2]
insertdic={}
with open(in1,'r') as i:
li=i.readlines()
for lin in li:
linc=lin.strip().split("\t")
chrID=linc[0]
posID=linc[1]
keys=str(chrID)+"_"+str(posID)
insertdic[keys]=linc[2:]
#print(str(insertdic))
outc=open(ou1,'w')
with open('pos_genev1.txt','r') as r:
ri=r.readlines()
for rin in ri:
rinc=rin.strip().split("\t")
rincID=rinc[0]
rincS=rinc[1]
rincE=rinc[2]
for keyi in insertdic.keys():
keyc=str(keyi).strip().split("_")
chrkey=keyc[0]
poskey=keyc[1]
if str(chrkey) == str(rincID):
if int(poskey) >= int(rincS) and int(poskey) <= int(rincE):
print(str(keyc))
print(str(rinc))
genename=rinc[3]
outc.write(str(chrkey)+"\t"+str(poskey)+"\t")
for addi in insertdic[keyi]:
outc.write(str(addi)+"\t")
outc.write(str(genename)+"\n")
outc.close()
#os.system('cat pos_genev1_name.txt | sort -u >%s'%(ou1))
#os.system('rm pos_genev1_name.txt')
# -*- coding: utf-8 -*-
import os
import sys
import re
in1=sys.argv[1]
#ou1=sys.argv[2]
ouc=open("pos_gene.txt",'w')
dic={}
with open(in1,'r') as i:
li=i.readlines()
for lin in li:
if not re.match(r'^#',lin):
linc=lin.strip().split("\t")
chrID=str(linc[0])
chrs=int(linc[3]) # start point
chre=int(linc[4]) # end point
typeID=str(linc[2])
if str(typeID) == "gene":
annoc=str(linc[-1])
annoi=annoc.strip().split(";")
for annoil in annoi:
if "gene_name" in str(annoil):
str_pat=re.compile(r'"(.*)"')
str_genename=str(annoil)
gene_name=str_pat.findall(str_genename)[0] # get gene name of the point
ouc.write(str(chrID)+"\t"+str(chrs)+"\t"+str(chre)+"\t"+str(gene_name)+"\n")
# if chrID in dic.keys():
# dic[chrID].append([chrs,chre,gene_name])
# else:
# dic[chrID]=[]
# dic[chrID].append([chrs,chre,gene_name])
网友评论