美文网首页
python+requests+pandas实现数据对比,输出e

python+requests+pandas实现数据对比,输出e

作者: 雷阳洪 | 来源:发表于2022-03-08 17:49 被阅读0次

    教程介绍:

    该教程主要校验思路是根据news_id获取新闻和新闻主题数据,调用接口获取企业数据(实际结果)
    根据news_id获取csv中符合条件的数据,获取企业数据(预期结果),两者比较,输出不相等的数据(excel输出,日志输出)

    注意! 公司项目不同,仅供参考,重在思路理解

    执行脚本 test_match_company.py

    # -*- coding: utf-8 -*-
    
    import os, pytest, json
    import pandas as pd
    from jsonpath import jsonpath
    import requests
    from util.data.xlwt_tool import write_excel
    from util.report.logger_tool import Logger
    
    
    class TestMatchCompany():
        real_path = os.path.split(os.path.realpath(__file__))[0]
        news_path = real_path + os.sep + "../data/news_content.txt"
        news = pd.read_csv(news_path, sep='^')
        news1 = news.values.tolist()
    
        def setup_class(self):
            algo_name = "match_company"
            # host = "52.82.43.34"
            host = "192.168.250.111"
            port = "28364"
            # match_company_url
            self.url = "http://" + host + ':' + str(port) + '/algorithm/process/' + algo_name
            real_path = os.path.split(os.path.realpath(__file__))[0]
            labels_path = real_path + os.sep + "../data/news_company_label.csv"
            self.labels_expected = pd.read_csv(labels_path, sep=';')
            # self.baseline_cols = ['chinesename','emotionindicator','stockcode']
            self.baseline_cols = ['stockcode', 'companyid']
            self.logging = Logger('../logs/{}.log'.format(algo_name), level='error')
            # self.logging = Logger('../logs/{}.log'.format(algo_name), level='info')
    
        def test_company_labels(self):
            excel_data_list = []
            for i in self.news1:
                # news_id = str(i[0])
                news_id = i[0]
                title = i[1]
                content = i[2]
                # 获取match_company算法接口的结果数据
                parameter1 = {'title': title, 'content': content, 'extr_method': 1,
                              'use_skip': 0}  # extr_method默认值为0,需要设为1调用,且没有计划修改默认值
                payload1 = {'parameter': json.dumps(parameter1, ensure_ascii=False)}
                response1 = requests.post(self.url, data=payload1).json()
                json_data = jsonpath(response1,"$..com")
                shijijieguo = []
                for i in json_data[0]:
                    code = jsonpath(i,"$..code")
                    comcode = jsonpath(i,"$..comcode")
                    shijijieguo.append([code[0],comcode[0]])
                shijijieguo.sort()
    
                # 根据news_id获取csv文件中符合条件的数据
                expected = self.labels_expected
                baseline_result = expected[expected['newsid'] == news_id][self.baseline_cols]
                data_expect = baseline_result.values.tolist()
                for i in range(len(data_expect)):
                    if data_expect[i][0] == 'csf':
                        data_expect[i][0] = ''
                data_expect.sort()
    
                # 判断各种异常情况
                if data_expect == [] and shijijieguo == []:
                    # excel_data_list.append(["csv和接口都没查到:"+str(news_id), str(data_expect), str(shijijieguo)])
                    self.logging.debug(
                        "接口和csv文件都没有查到企业新闻数据,不做对比 news_id:{} 预期结果为空:{} 接口实际结果为空:{}".format(news_id, data_expect, shijijieguo))
    
                elif data_expect == [] and shijijieguo != []:
                    excel_data_list.append([str(news_id), str(data_expect), str(shijijieguo)])
                    self.logging.error("对比不一致 news_id:{} 预期结果为空:{} 接口实际结果不为空:{}".format(news_id, data_expect, shijijieguo))
    
                elif data_expect != [] and shijijieguo == []:
                    excel_data_list.append([str(news_id), str(data_expect), str(shijijieguo)])
                    self.logging.error("对比不一致 news_id:{} 预期结果为空:{} 接口实际结果不为空:{}".format(news_id, data_expect, shijijieguo))
    
                elif data_expect != [] and shijijieguo != []:
                    if data_expect != shijijieguo:
                        excel_data_list.append([str(news_id), str(data_expect), str(shijijieguo)])
                        self.logging.error("对比不一致 news_id:{} 预期结果:{} 实际结果:{}".format(news_id, data_expect, shijijieguo))
                    else:
                        # excel_data_list.append(["一致:"+str(news_id), str(data_expect), str(shijijieguo)])
                        self.logging.info("对比一致 news_id:{} 预期结果:{} 实际结果:{}".format(news_id, data_expect, shijijieguo))
            # 将日志输出到excel
            write_excel("../logs/match_company_error_{}.xls".format(len(excel_data_list)), ['news_id', '预期结果', '实际结果'], excel_data_list)
    

    结果输出工具 write_excel.py

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Author : Leiyh
    # @File : pandas_tool.py
    import xlwt
    
    
    def write_excel(file_name,data_title,data_list,encoding='utf-8'):
        '''
    
        :param file_name: 文件路径地址
        :param data_title: excel第一行的标题栏
        :param data_list: 二维数据列表
        :param encoding:
        :return:
        '''
        # 创建workbook和sheet对象 注意Workbook的开头W要大写
        workbook = xlwt.Workbook(encoding=encoding)
        # 添加一个名为sheet1的表
        sheet1 = workbook.add_sheet('sheet1', cell_overwrite_ok=True)
    
        # 向表头写入数据
        for i in range(len(data_title)):
            sheet1.write(0, i, str(data_title[i]))
    
        # 向sheet写入数据
        for i in range(len(data_list)):
            for j in range(len(data_title)):
                sheet1.write(i + 1, j, str(data_list[i][j]))
    
        # 保存数据到‘Workbook2.xls’文件中
        workbook.save(file_name)
        print('创建execel完成!')
    
    if __name__ == '__main__':
        # data = get_test_case("C:/softwareData/PycharmProjects/s00-wuling/documents/user/注册接口sign_up.xlsx")
        # print(data[0])
        # print(data[1])
        # write_excel("match_company.xls", ['news_id', '预期结果', '实际结果'],
        #             [['35942860', str(['', 'ICN5025197980', 1]), str(['', 'ICN5025197980', 1])]])
        write_excel("match_company.xls", ['news_id', '预期结果', '实际结果'],
                    [['35942860', "['', 'ICN5025197980', 1]", "['', 'ICN5025197980', 1]"]])
    
    
    

    日志输出工具 logger_tool.py

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Author : Leiyh
    # @File : logger_tool.py
    import logging
    
    
    class Logger(object):
        level_relations = {
            'notset':logging.NOTSET,
            'debug': logging.DEBUG,
            'info': logging.INFO,
            'warning': logging.WARNING,
            'error': logging.ERROR,
            'crit': logging.CRITICAL
        }
    
        def __init__(self, filename, filemode='w', level='notset',
                     format='%(asctime)s - %(levelname)s: %(message)s'):
            logger = logging.getLogger()
            logger.setLevel(level=self.level_relations.get(level))
            filehandle = logging.FileHandler(filename,filemode)
            formatter = logging.Formatter(format)
            filehandle.setFormatter(formatter)
            logger.addHandler(filehandle)
    
    
        def debug(self, msg):
            logging.debug(msg)
    
        def info(self, msg):
            logging.info(msg)
    
        def warning(self, msg):
            logging.warning(msg)
    
        def error(self, msg):
            logging.error(msg)
    
        def critical(self, msg):
            logging.critical(msg)
    
    
    if __name__ == '__main__':
        logger = Logger('all.log', level='info')
        logger.info("Start print log")
        logger.debug("Do something")
        logger.warning("Something maybe fail")
        logger.error("error print log")
    

    相关文章

      网友评论

          本文标题:python+requests+pandas实现数据对比,输出e

          本文链接:https://www.haomeiwen.com/subject/lwwarrtx.html