1、输入文件:
磷酸化上游激酶预测结果.xlsx
![](https://img.haomeiwen.com/i22798912/71a0cd65a8db66b2.png)
- 需求 :第1、2、6列不变,将含有
;
的列拆分为多行,每列;
对应位置一一对应;并将TYPEA列中的complex的蛋白去除;
2、输出文件:
relation.txt 关系表
target source relation
EIF4EBP1_S65 MTOR down-regulates
ABCF1_S109 CSNK2A1 unknown
UHRF1_S95 CSNK1D up-regulates
AKT1 AKT1 interaction
AKT1 GSK3B interaction
attri.txt 属性表
name type style
EIF4EBP1_S65 IDB_res up
PDHA1_S293 IDB_res down
ABCF1_S109 IDB_res down
MAPK14 IDA None
MAPK14 IDA None
3、脚本
import os
import pandas as pd
import numpy as np
from itertools import chain
path = os.getcwd()
for i in os.listdir():
if "VS" in i:
os.chdir(i)
file = pd.read_excel("磷酸化上游激酶预测结果.xlsx")
fc = i.split("_VS_")[0] + "/" + i.split("_VS_")[1]
file.loc[(file[fc] > 1.2), "type"] = "up"
file.loc[(file[fc] < 0.83), "type"] = "down"
file["ENTITYB_res"] = (file["ENTITYB"] + "_" + file["Modifications in Master Proteins"]).str.strip()
colname = ["ENTITYB_res", "type", "ENTITYA", "TYPEA", "EFFECT"]
subfile = file.loc[:, colname]
###拆分多行操作
lens = list(map(len, subfile['ENTITYA'].str.split(';')))
res = pd.DataFrame({'ENTITYB_res': np.repeat(subfile['ENTITYB_res'], lens),
'type': np.repeat(subfile['type'], lens),
'ENTITYA': list(chain.from_iterable(subfile['ENTITYA'].str.split(';'))),
'TYPEA': list(chain.from_iterable(subfile['TYPEA'].str.split(';'))),
'EFFECT': list(chain.from_iterable(subfile['EFFECT'].str.split(';')))})
res = res[res["TYPEA"] != "complex"].reset_index(drop=True)
res["EFFECT"] = [i.replace(" activity", "") if "activity" in i else i for i in res["EFFECT"].values.tolist()]
res = res.drop_duplicates().reset_index(drop=True)
##make relation.txt
relation_ID = pd.DataFrame({"target": res["ENTITYB_res"],
"source": res["ENTITYA"],
"relation": res["EFFECT"]
})
PPI = pd.read_csv("diff_relation.txt", sep="\t")
relation_PPI = pd.DataFrame({"target": PPI["V1"],
"source": PPI["V2"],
"relation": "interaction"
})
relation_merge = [relation_ID, relation_PPI]
relation = pd.concat(relation_merge).drop_duplicates().reset_index(drop=True)
relation.to_csv("relation.txt", sep="\t", index=False)
## make attribution.txt
attr_IDB = pd.DataFrame({"name": res["ENTITYB_res"],
"type": "IDB_res",
"style": res["type"]
})
attr_IDA = pd.DataFrame({"name": res["ENTITYA"],
"type": "IDA",
"style": "None"
})
attr_merge = [attr_IDB, attr_IDA]
attribution = pd.concat(attr_merge).drop_duplicates().reset_index(drop=True)
attribution.to_csv("attribution.txt", sep="\t", index=False)
os.chdir(path)
网友评论