美文网首页
利用python提取kegg的xml文件信息

利用python提取kegg的xml文件信息

作者: 快乐的夏天_eaf9 | 来源:发表于2019-01-19 18:25 被阅读0次

    利用python的xml包,对kegg的xml数据进行提取。根据这些数据,可以实现代谢路径的数据整合、重构或解析。

    # -*- coding: utf-8 -*-
    """
    Created on Fri Jan 18 09:05:39 2019
    
    @author: Menglei-Xia
    @email:mlxia@tust.edu.cn
    @Please Feel free to contact
    """
    
    import xml.etree.ElementTree as ET
    import os
    import openpyxl
    
    from tkinter import Tk
    import tkinter.filedialog as tf
    
    Tk().withdraw()
    b=tf.askopenfilename()
    
    
    tree = ET.ElementTree(file=b)
    root=tree.getroot()
    
    
    wb = openpyxl.Workbook()
    
    
    sheet1=wb.create_sheet('化合物信息');
    row_xia=1
    for elem in tree.iter(tag='entry'):
        key_xia=list(elem.attrib.keys())
        value_xia=list(elem.attrib.values())
        if 'compound' in value_xia:
            row_xia=row_xia+1
            colum_xia=0
            key_xia2=list(elem[0].attrib.keys())
            value_xia2=list(elem[0].attrib.values())       
            for ii in key_xia2:
                colum_xia=colum_xia+1
                sheet1.cell(row=row_xia,column=colum_xia).value=value_xia2[colum_xia-1]
    sheet1.cell(row=1,column=1).value='Name'
    sheet1.cell(row=1,column=2).value='x'
    sheet1.cell(row=1,column=3).value='height'
    sheet1.cell(row=1,column=4).value='y'
    sheet1.cell(row=1,column=5).value='width'
    sheet1.cell(row=1,column=6).value='fgcolor'
    sheet1.cell(row=1,column=7).value='type'
    sheet1.cell(row=1,column=8).value='bgcolor'
    
    
    sheet2=wb.create_sheet('gene');
    row_xia=1
    for elem in tree.iter(tag='entry'):
        key_xia=list(elem.attrib.keys())
        value_xia=list(elem.attrib.values())
        if 'gene' in value_xia:
            row_xia=row_xia+1
            colum_xia=0
            key_xia2=list(elem[0].attrib.keys())
            value_xia2=list(elem[0].attrib.values())       
            for ii in key_xia2:
                colum_xia=colum_xia+1
                sheet2.cell(row=row_xia,column=colum_xia).value=value_xia2[colum_xia-1]
    sheet2.cell(row=1,column=1).value='fgcolor'
    sheet2.cell(row=1,column=2).value='name'
    sheet2.cell(row=1,column=3).value='type'
    sheet2.cell(row=1,column=4).value='bgcolor'
    sheet2.cell(row=1,column=5).value='coords'
    
    
    
    
    
             
    sheet3=wb.create_sheet('ortholog');
    row_xia=1
    for elem in tree.iter(tag='entry'):
        key_xia=list(elem.attrib.keys())
        value_xia=list(elem.attrib.values())
        if 'ortholog' in value_xia:
            row_xia=row_xia+1
            colum_xia=0
            key_xia2=list(elem[0].attrib.keys())
            value_xia2=list(elem[0].attrib.values())       
            for ii in key_xia2:
                colum_xia=colum_xia+1
                sheet3.cell(row=row_xia,column=colum_xia).value=value_xia2[colum_xia-1]
    sheet3.cell(row=1,column=1).value='fgcolor'
    sheet3.cell(row=1,column=2).value='name'
    sheet3.cell(row=1,column=3).value='type'
    sheet3.cell(row=1,column=4).value='bgcolor'
    sheet3.cell(row=1,column=5).value='coords'
    
    
    sheet4=wb.create_sheet('reaction');
    row_xia=1
    for elem in tree.iter(tag='reaction'): 
        row_xia=row_xia+1
        sheet4.cell(row=row_xia,column=5).value=elem.attrib['type']
        sheet4.cell(row=row_xia,column=1).value=elem[0].attrib['id']
        sheet4.cell(row=row_xia,column=2).value=elem[0].attrib['name']
        sheet4.cell(row=row_xia,column=3).value=elem[1].attrib['id']
        sheet4.cell(row=row_xia,column=4).value=elem[1].attrib['name']    
    
    sheet4.cell(row=1,column=5).value='type'
    sheet4.cell(row=1,column=1).value='Source_id'
    sheet4.cell(row=1,column=2).value='Source_name'
    sheet4.cell(row=1,column=3).value='Target_id'
    sheet4.cell(row=1,column=4).value='Target_name'  
              
    wb.save('data3.xlsx')
    

    在这里没有对ElementTree的用法进行系统的解析。具体用法可以参考以下博文:
    Reference
    【1】深入解读Python解析XML的几种方式 及 ElementTree模块的使用

    相关文章

      网友评论

          本文标题:利用python提取kegg的xml文件信息

          本文链接:https://www.haomeiwen.com/subject/tzpzdqtx.html