美文网首页Python
Python应用基础-根据指定文件生成XML

Python应用基础-根据指定文件生成XML

作者: Surpassme | 来源:发表于2020-06-28 20:20 被阅读0次

        因项目需要根据指定格式的文件生成XML标注文件,可以方便使用LabelImg打开进行编辑和查看。其原始文件默认使用逗号进行分隔,如下所示:


    image.png
    • 第1个值:原始图片中切图小文件,以AIpng_x,其中x代表原始图片的第几个切图文件
    • 第2~5值:分别对应于ymin, xmin, ymax, xmax
    • 第6个值:代表对应的标签标注

        在生成XML文件时,需要对其进行汇总,即将属于同一个原始文件的切图小文件的标注汇总到一起,其实现代码如下所示:

    import os
    from Logger import MyLogger
    from xml.dom.minidom import Document,parse
    from collections import defaultdict
    import re
    
    class OpeateXML:
    
        def __init__(self, srcPath: str, targetPath: str, srcFileName: str):
            self._srcPath = srcPath
            self._targetPath = targetPath
            self._srcFileName = srcFileName
    
        def readSrcFileName(self, fileEncoding="utf8") -> defaultdict:
            data = defaultdict(list)
            s = re.compile("\.AIpng_\d{1,}", re.IGNORECASE)
            srcFileFullPath = os.path.join(self._srcPath, self._srcFileName)
            try:
                if os.path.exists(srcFileFullPath):
                    with open(srcFileFullPath, mode="r", encoding=fileEncoding, errors="ignore") as fr:
                        for content in fr.readlines():
                            data[s.sub(".AIpng",content.strip().split(",")[0])].append(content.strip())
            except Exception as ex:
                MyLogger().error(f"OperateXML:read file error:\n{ex}")
                return {}
            else:
                # data.sort(key=lambda x: x.strip().split(",")[0])
                return data
    
        def getCreateXMLData(self,srcData:dict,mnlData:list)->defaultdict:
            """
            获取手动确认的图片
            srcData:Location.txt中的原始数据
            mnlData:手动确认数据
            """
            try:
                for key,values in srcData.items():
                    for item in mnlData:
                        for v in values:
                            if item in v.strip().split(",")[0]:
                                srcData[key][srcData[key].index(v)]=srcData[key][srcData[key].index(v)].replace("auto","mnl")
            except Exception as ex:
                MyLogger().error(f"OperateXML: get data from location and mnldata interaction error\n{ex}")
                return {}
            else:
                return srcData
    
        def operateXML(self,data:defaultdict)->None:
            for k in data.keys():
                xmlFileFullPath = os.path.join(self._targetPath, os.path.splitext(k)[0]+".xml")
                if os.path.exists(xmlFileFullPath):
                    self.appendExistXML(data={k:data[k]},xmlFileFullPath=xmlFileFullPath)
                else:
                    self.createNewXML({k:data[k]})
    
        def appendExistXML(self,data:defaultdict,xmlFileFullPath:str,fileEncoding="utf8"):
            try:
                doc = parse(xmlFileFullPath)
                rootNode = doc.documentElement
                # print(rootNode.nodeName)
                key=rootNode.getElementsByTagName("filename")[0].childNodes[0].data
                objs=rootNode.getElementsByTagName("object")
                for obj in objs:
                   name=obj.getElementsByTagName("name")[0].childNodes[0].data
                   bndboxs = obj.getElementsByTagName("bndbox")
                   for bndbox in bndboxs:
                       xmin = bndbox.getElementsByTagName("xmin")[0].childNodes[0].data
                       ymin = bndbox.getElementsByTagName("ymin")[0].childNodes[0].data
                       xmax = bndbox.getElementsByTagName("xmax")[0].childNodes[0].data
                       ymax = bndbox.getElementsByTagName("ymax")[0].childNodes[0].data
                   existData=f"existData,{ymin},{xmin},{ymax},{xmax},{name}"
                   data[key].append(existData)
                data[key]=list(set(data[key]))
                os.remove(xmlFileFullPath)
                self.createNewXML(data=data)
            except Exception as ex:
                MyLogger().error(f"OperateXML:append content to {xmlFileFullPath} error\n{ex}")
                return
    
        def createNewXML(self, data: dict, fileEncoding="utf8")->None:
            """
            data:传入的数据字典
            fileEncoding:XML默认编码格式
            """
            if data:
                try:
                    for k,v in data.items():
                        doc = Document()
                        # 创建根节点
                        rootNode = doc.createElement("annotation")
                        # 添加根节点
                        doc.appendChild(rootNode)
    
                        folder = doc.createElement("folder")
                        folderText = doc.createTextNode(self._targetPath)
                        folder.appendChild(folderText)
                        rootNode.appendChild(folder)
    
                        filename = doc.createElement("filename")
                        filenameText = doc.createTextNode(k)
                        filename.appendChild(filenameText)
                        rootNode.appendChild(filename)
    
                        path = doc.createElement("path")
                        pathText = doc.createTextNode(os.path.join(self._targetPath,k))
                        path.appendChild(pathText)
                        rootNode.appendChild(path)
                        for i in v:
                            tmpData = i.strip().split(",")
                            if len(tmpData) == 6:
                                _, ymin, xmin, ymax, xmax, labelName = tmpData
    
                                if not labelName.__contains__("/"):
                                    continue
    
                                objectObj = doc.createElement("object")
                                rootNode.appendChild(objectObj)
    
                                objectName = doc.createElement("name")
                                objectNameText = doc.createTextNode(labelName)
                                objectName.appendChild(objectNameText)
                                objectObj.appendChild(objectName)
    
                                objectBndBox = doc.createElement("bndbox")
                                objectObj.appendChild(objectBndBox)
    
                                objectBndBoxXmin = doc.createElement("xmin")
                                objectBndBoxYmin = doc.createElement("ymin")
                                objectBndBoxXmax = doc.createElement("xmax")
                                objectBndBoxYmax = doc.createElement("ymax")
    
                                objectBndBoxXminText = doc.createTextNode(xmin)
                                objectBndBoxYminText = doc.createTextNode(ymin)
                                objectBndBoxXmaxText = doc.createTextNode(xmax)
                                objectBndBoxYmaxText = doc.createTextNode(ymax)
    
                                objectBndBox.appendChild(objectBndBoxXmin)
                                objectBndBox.appendChild(objectBndBoxYmin)
                                objectBndBox.appendChild(objectBndBoxXmax)
                                objectBndBox.appendChild(objectBndBoxYmax)
    
                                objectBndBoxXmin.appendChild(objectBndBoxXminText)
                                objectBndBoxYmin.appendChild(objectBndBoxYminText)
                                objectBndBoxXmax.appendChild(objectBndBoxXmaxText)
                                objectBndBoxYmax.appendChild(objectBndBoxYmaxText)
    
                                objectObj.appendChild(objectBndBox)
                            else:
                                continue
                        # save xml
                        xmlName=os.path.splitext(k)[0]+".xml"
                        targetPath = os.path.join(self._targetPath, xmlName)
                        with open(targetPath, mode="w", encoding=fileEncoding,errors="ignore") as fw:
                            doc.writexml(fw, indent="\t", newl="\n", addindent="\t", encoding=fileEncoding)
                except Exception as ex:
                    MyLogger().error(f"OperateXML:Save xml error\n{ex}")
                    return
    
    if __name__ == '__main__':
        srcPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs"
        srcName = "locations.txt"
        targetPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs\in_number"
        operateXML = OpeateXML(srcPath, targetPath, srcName)
        a = operateXML.readSrcFileName()
        testData=['slide1_cell420_image0_met.AIpng_36.jpg', 'slide1_cell420_image0_met.AIpng_33.jpg', 'slide1_cell420_image0_met.AIpng_10.jpg', 'slide1_cell420_image0_met.AIpng_30.jpg']
        res=operateXML.getCreateXMLData(a,testData)
        operateXML.operateXML(res)
    

        最终生成的XML效果如下所示:

    image.png

        在LabelImg中的效果如下所示:


    image.png

    相关文章

      网友评论

        本文标题:Python应用基础-根据指定文件生成XML

        本文链接:https://www.haomeiwen.com/subject/adqgfktx.html