美文网首页
将有父子级的不定长字典转换成execl

将有父子级的不定长字典转换成execl

作者: Time一柒 | 来源:发表于2021-08-30 14:54 被阅读0次

    将有父子级的dict转换成有父子级的excel

    (当前处理方式只适用于层数较少时,层数多了就会很麻烦,且原始数据中还有部分需要与字典值进行对应,所以代码并不纯粹,后续有时间再进行精简)

    • 我这边的数据字典结构数据多少不定,层级多少不定,不过最多只有三层
    • 先将所有的父子级别的字典按照|分隔符号生成新的字典
    {
      父级|子级|孙级别:具体值
    }
    
    • 将上一步骤得到的新的字典通过pandas生成一个DataFrame对象
    • 将空值赋值其他默认值
    • split_r = pf.to_dict(orient='split')将DataFrame转回一个新的字典,得到三块列名,行数,数据(二维数组)
    • 再进行循环将列名拆分开分成三级,然后生成新的DataFrame对象
    • 将DataFrame转换成excel

    之所以会分那么多步骤,是因为字典数据不定长,通过pandas的DataFrame对象,能帮助将不定长数据也按照对应填充

    import pandas as pd
    import numpy as np
    def result_data_processing(result_data, result_dict):
        """
        将数据多层数据处理成键值对应
        :param result_data: 原始数据
        :param result_dict: 原始数据字典
        :return: 处理好的数据
        """
        # 生产结果字典列表
        excel_dict_list = []
        # excel名称列表
        listing_list = []
        # 遍历第一层患者基本信息
        for patient_key, patient_value in result_data.items():
            excel_dict = {}
            excel_dict["患者ID"] = patient_key
            # 遍历第二层crf文档
            for crf_key_id, crf_value in patient_value.items():
                # 有部分数据主表中没有,但是子表中有
                if crf_key_id not in result_dict.keys():
                    result_dict[crf_key_id] = crf_key_id
                crf_key = result_dict[crf_key_id]
                # 粒度是页面,会有下一层
                if type(crf_value) == dict:
                    # 遍历第三层 页面
                    for page_key, page_value in crf_value.items():
                        # 如果粒度是截面
                        if type(page_value) == dict:
                            # 遍历第四层 截面
                            for section_key, section_value in page_value.items():
                                excel_dict[crf_key + "|" + page_key + "|" + section_key] = section_value
                        else:
                            excel_dict[crf_key + "|" + page_key] = page_value
                else:
                    excel_dict[crf_key] = crf_value
            excel_dict_list.append(excel_dict)
            # 生成名称顺序列表
            for excel_dict_key in excel_dict.keys():
                if excel_dict_key not in listing_list:
                    listing_list.append(excel_dict_key)
    
        return [excel_dict_list, listing_list]
    
    # 先将所有的父子级别的字典按照|分隔符号生成新的字典
    excel_dict_list = result_data_processing(yuan, {"1": "1111"})
    
    
    # 将字典列表转换为DataFrame
    pf = pd.DataFrame(list(excel_dict_list[0]))
    # 指定列名顺序
    pf = pf[excel_dict_list[1]]
    # 将DataFrame中空的替换成固定值
    pf.fillna('0.0%',inplace=True)
    # DataFrame按照要求转dict
    split_r = pf.to_dict(orient='split')
    # 拿到所有字典的key-也是DataFrame的列名
    columns_name = split_r["columns"]
    # 拿到所有数据
    all_data = split_r["data"]
    
    # 创建新的结果字典
    columns_one=[]
    columns_two=[]
    columns_three=[]
    for columns in columns_name:
        if columns == "患者ID":
            columns_one.append(columns)
            columns_two.append(columns)
            columns_three.append(columns)
        else:
            columns_tree = columns.split("|")
            if len(columns_tree) == 1:
                columns_one.append(columns_tree[0])
            elif len(columns_tree) == 2:
                columns_one.append(columns_tree[0])
                columns_two.append(columns_tree[1])
            elif len(columns_tree) == 3:
                columns_one.append(columns_tree[0])
                columns_two.append(columns_tree[1])
                columns_three.append(columns_tree[2])
    # 将二维数组转换称numpy的二维数组
    numpy_data = np.array(all_data)
    if columns_three == ["患者ID"] and columns_two != ["患者ID"]:
        pf = pd.DataFrame(numpy_data, index=split_r["index"],
                          columns=[columns_one, columns_two])
    elif columns_two == ["患者ID"]:
        pf = pd.DataFrame(numpy_data, index=split_r["index"],
                          columns=[columns_one])
    else:
        pf = pd.DataFrame(numpy_data, index=split_r["index"],
                          columns=[columns_one, columns_two, columns_three])
    print(pf)
    
    file_path = pd.ExcelWriter('test2.xlsx')
    # 替换空单元格
    pf.fillna('0.0%', inplace=True)
    # 输出
    pf.to_excel(file_path, encoding='utf-8', index=True)
    # 保存表格
    file_path.save()
    

    我这边的数据结构

    {
        "26404112": {
            "e216b428-e123-492a-af79-c9ffe72271a9": {
                "产检情况": {
                    "孕产妇基本情况": "85.71%",
                    "孕周": "100.0%",
                    "不适主诉": "100.0%",
                    "妊娠合并症": "0.0%"
                },
                "实验室检查": {
                    "血脂": "100.0%",
                    "24小时尿蛋白定量": "100.0%",
                    "尿常规": "94.12%",
                    "肾功能": "85.71%",
                    "血糖及微量元素": "80.0%",
                    "凝血功能": "100.0%",
                    "肝功能": "100.0%",
                    "电解质": "87.5%",
                    "甲状腺功能": "85.71%",
                    "血常规": "100.0%"
                },
                "影像学检查": {
                    "产科B超检查": "75.0%",
                    "心电图检查": "100.0%"
                },
                "患者信息": {
                    "不良嗜好": "50.0%",
                    "基础信息": "100.0%",
                    "家族史": "0.0%",
                    "月经婚育史": "92.5%",
                    "既往史": "93.55%"
                }
            },
            "8e078749-f263-43be-a371-d8ccf4656ac2": {
                "产检和实验室检查": {
                    "产科检查": "53.33%",
                    "孕产妇基本情况": "0.0%",
                    "尿常规": "87.5%",
                    "妊娠合并症": "0.0%",
                    "孕周": "100.0%",
                    "不适主诉": "100.0%"
                }
            }
        },
        "111111": {
            "e216b428-e123-492a-af79-c9ffe72271a9": {
                "产检情况": {
                    "孕产妇基本情况": "85.71%",
                    "孕周": "100.0%",
                    "不适主诉": "100.0%",
                    "妊娠合并症": "0.0%"
                },
                "患者信息": {
                    "不良嗜好": "50.0%",
                    "基础信息": "100.0%",
                    "家族史": "0.0%",
                    "月经婚育史": "92.5%",
                    "既往史": "93.55%"
                }
            },
            "e1190c2d-0032-477c-b051-6ffdffb208b6": {
                "第一页": {
                    "疑似子痫前期:满足>1项检查发现": "100.0%",
                    "排除:明显的子痫前期、确诊的HELLP综合症、90天内服用过药物": "100.0%",
                    "疑似子痫前期:首次出现尿蛋白": "100.0%",
                    "疑似子痫前期:满足>1项症状": "100.0%",
                    "患者信息": "57.14%",
                    "排除:患者登记之后进一步排除部分病例": "100.0%",
                    "疑似子痫前期:血压首次升高": "85.19%",
                    "疑似子痫前期:血压进一步升高": "63.64%"
                }
            }
            "201444cc-3788-43ef-bcd4-3fcdb3555e6b": {
                "辅助检查": {
                    "肝功能": "100.0%",
                    "血常规": "100.0%",
                    "肾功能": "85.71%",
                    "电解质": "0.0%",
                    "凝血功能": "92.31%",
                    "心电图检查": "50.0%",
                    "血脂": "87.5%",
                    "尿常规": "85.71%"
                },
                "产检信息": {
                    "妊娠合并症": "0.0%",
                    "孕周": "0.0%",
                    "产科检查": "87.5%",
                    "不适主诉": "100.0%",
                    "孕产妇基本情况": "0.0%"
                }
            }
            "8e078749-f263-43be-a371-d8ccf4656ac2": {
                "产检和实验室检查": {
                    "产科检查": "53.33%",
                    "孕产妇基本情况": "0.0%",
                    "尿常规": "87.5%",
                    "妊娠合并症": "0.0%",
                    "孕周": "100.0%",
                    "不适主诉": "100.0%"
                }
            }
        }
    }
    

    相关文章

      网友评论

          本文标题:将有父子级的不定长字典转换成execl

          本文链接:https://www.haomeiwen.com/subject/gcqoiltx.html