# -*- coding: utf-8 -*-
import os
import re
import sys
# help
if len(sys.argv)<4:
print('Usage: python rename.py [rename_txt] [rename_data_dir] [rename_log] [orig_data*] \n \
rename_txt -- txt file that save company\' name and customer\'s name which divided by tap \n \
rename_data_dir -- where you put renamed data \n \
rename_log -- file to save the \`cat data cmd\` \n \
orig_data -- you can insert more than one directory \n \
e.g. \n python rename.py orig_data/rename.txt raw_data rename.log orig_data')
sys.exit()
rename_txt = sys.argv[1]
rename_dir = sys.argv[2]
rename_log = sys.argv[3]
data_dic = sys.argv[4:]
# read sample list
oldsamplename_list = []
newsamplename_list = []
rename_dic = {}
with open(rename_txt, 'r') as f:
while True:
line = f.readline()
if not line:
break
line = line.strip('\n')
match = re.split(r'\s+',line)
oldname,newname = match[0],match[1]
oldsamplename_list.append(oldname)
newsamplename_list.append(newname)
rename_dic[oldname] = newname
# search all file
#
fileList = []
for filedir in data_dic:
for top, dirs, nondirs in os.walk(filedir):
for item in nondirs:
filepath,filename = os.path.split(item)
if re.search('gz',filename):
fileList.append(os.path.join(top, item))
# match
sample_dic = {}
SampleList = oldsamplename_list
for sample in SampleList:
samp_list = []
for fq in fileList:
fqpath,fqname = os.path.split(fq)
if re.search(sample,fqname):
samp_list.append(os.path.join(fqpath,fqname))
sample_dic[sample] = samp_list
# cat file
# out log
with open(rename_log, 'w') as f:
for key,value in sample_dic.items():
sample_name = rename_dic[key]
sample_R1 = []
sample_R2 = []
cmd1 = "cat "
cmd2 = "cat "
for file in value:
if re.search('R1',file) or re.search('1.fq.gz',file):
sample_R1.append(file)
if re.search('R2',file) or re.search('2.fq.gz',file):
sample_R2.append(file)
sample_R1.sort()
sample_R2.sort()
for i in sample_R1:
cmd1 += i + ' '
for i in sample_R2:
cmd2 += i + ' '
cmd1 += " > "+rename_dir+"/"+sample_name+".R1.fq.gz"
cmd2 += " > "+rename_dir+"/"+sample_name+".R2.fq.gz"
f.write( cmd1+"\n" )
f.write( cmd2+"\n" )
os.system(cmd1)
os.system(cmd2)
f.close()
网友评论