美文网首页
python 解析及生成xml

python 解析及生成xml

作者: leon_tly | 来源:发表于2022-12-06 11:13 被阅读0次

    xml -- > dict

    from lxml import etree
    def parse_xml_to_dict(xml):
        """
        将xml文件解析成字典形式,参考tensorflow的recursive_parse_xml_to_dict
        Args:
            xml: xml tree obtained by parsing XML file contents using lxml.etree
    
        Returns:
            Python dictionary holding XML contents.
        """
    
        if len(xml) == 0:  # 遍历到底层,直接返回tag对应的信息
            return{xml.tag: xml.text}
    
        result= {}
        for child in xml:
            child_result= parse_xml_to_dict(child)  # 递归遍历标签信息
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:  # 因为object可能有多个,所以需要放入列表里
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}
    

    dict --> voc xml dict

    def generate_xml_dict(image_name, im_dict, folder, path, width, height):
        base_dict = \
        {
            "annotation": {
                "folder": folder,
                "filename": image_name,
                "path": path,
                "source": {
                    "database": "Unknown"
                },
                "size": {
                    "width": str(width),
                    "height": str(height),
                    "depth": "3"
                },
                "segmented": "0",
                "object": []
            }
        }
        for info in im_dict:
            obj_dict = \
            {
                "name": "0",
                "pose": "Unspecified",
                "truncated": "0",
                "difficult": "0",
                "bndbox": {
                    "xmin": "0",
                    "ymin": "0",
                    "xmax": "0",
                    "ymax": "0"
                }
            }
            obj_dict["name"] = info["label"]
            obj_dict["bndbox"]["xmin"] = str(info["x"])
            obj_dict["bndbox"]["ymin"] = str(info["y"])
            obj_dict["bndbox"]["xmax"] = str(info["x"]+info["w"])
            obj_dict["bndbox"]["ymax"] = str(info["y"]+info["h"])
            base_dict["annotation"]["object"].append(obj_dict)
        return base_dict
    

    dict --> xml

    
    import dict2xml
    data = {"age":20}
    xml_str_data = dict2xml.dict2xml(data)
    with open("test.xml", "w") as f:
        f.write(xml_str_data)
    

    使用例子

    # 解析xml文件打印
    import os
    from lxml import etree
    
    def parse_xml_to_dict(xml):
        if len(xml) == 0:
            return{xml.tag: xml.text}
    
        result= {}
        for child in xml:
            child_result= parse_xml_to_dict(child)
            if child.tag != 'object':
                result[child.tag] = child_result[child.tag]
            else:
                if child.tag not in result:
                    result[child.tag] = []
                result[child.tag].append(child_result[child.tag])
        return {xml.tag: result}
    
    xml_path = "C:\\Users\\Administrator\\Desktop\\xml"
    
    for xml_name in os.listdir(xml_path):
        xml_name = os.path.join(xml_path, xml_name)
        with open(xml_name) as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        info = parse_xml_to_dict(xml)
        print(info)
    
    

    对于xml中存在中文的应对方法

    with open(xml_path, "r", encoding='gb18030', errors='ignore') as f:
        xml_str = f.read()
        xml = etree.fromstring(xml_str)
        info = parse_xml_to_dict(xml)
    

    相关文章

      网友评论

          本文标题:python 解析及生成xml

          本文链接:https://www.haomeiwen.com/subject/rfmwfdtx.html