# -*- coding: utf-8 -*-
#!/usr/bin/env python
import os
import sys
import argparse
parser = argparse.ArgumentParser(description="trim data , change geneid to genename")
parser.add_argument('--infile',help=" inputfile ",required=True)
parser.add_argument('--outfile',help="output file",required=True)
parser.add_argument('--difffile',help="diff gene up down file",required=True)
argv = vars(parser.parse_args())
infile = argv['infile']
outfile =argv['outfile']
diffgene =argv['difffile']
idname = {}
with open('genename',"r") as nameliness:
namelines = nameliness.readlines()
for nameline in namelines:
namelin = nameline.strip().split("\t")
idname[namelin[0]]=namelin[1]
difflist = []
with open(diffgene,'r') as diffliness:
difflines = diffliness.readlines()
for diffline in difflines:
difflin = diffline.strip().split("\t")
geneid = difflin[0]
difflist.append(geneid)
outcon = open(outfile,"w")
with open(infile,"r") as inputliness:
inputlines = inputliness.readlines()
header = inputlines[0].strip().split("\t")
outcon.write(header[0]+"\tgenename\t"+header[2]+"\t"+header[3]+"\tlog2FoldChange\t"+header[5]+"\tpadj\tUp/Down-Regulation\n")
for inputline in inputlines[1:]:
inputlin = inputline.strip().split("\t")
geneid = inputlin[0]
genename = idname[geneid]
controlvalue = inputlin[2]
treatvalue = inputlin[3]
Pvalue = inputlin[5]
padj = inputlin[6]
log2FCvalue = inputlin[4]
if geneid in difflist:
if float(log2FCvalue)>0:
outcon.write(str(geneid)+"\t"+str(genename)+"\t"+str(controlvalue)+"\t"+str(treatvalue)+"\t"+str(log2FCvalue)+"\t"+str(Pvalue)+"\t"+str(padj)+"\t"+"Up\n")
elif float(log2FCvalue)<0:
outcon.write(str(geneid)+"\t"+str(genename)+"\t"+str(controlvalue)+"\t"+str(treatvalue)+"\t"+str(log2FCvalue)+"\t"+str(Pvalue)+"\t"+str(padj)+"\t"+"Down\n")
else:
outcon.write(str(geneid)+"\t"+str(genename)+"\t"+str(controlvalue)+"\t"+str(treatvalue)+"\t"+str(log2FCvalue)+"\t"+str(Pvalue)+"\t"+str(padj)+"\t"+"*\n")
网友评论