美文网首页
python | Auto updated the Excel

python | Auto updated the Excel

作者: __Fergus__ | 来源:发表于2021-02-12 19:57 被阅读0次

    背景描述

    每周四从数据库中查询数据,转换为固定的格式,写入excel邮件发送。

    技术栈小结

    os

    import os
    os.listdir(path)  # 当前路径下文件列表
    os.getcwd()  # 返回当前工作目录
    os.chdir(path)  # 改变当前工作目录
    os.mkdir(path)  # 创建文件夹
    
    os.path.expanduser('~')  # 获取“本地用户”路径
    os.path.join(dir, fil)  # 连接
    os.path.exists(path)  # 检测是否存在
    os.path.isdir(path)  # 文件路径是否存在
    os.path.isfile(path)  # 判断路径是否为文件
    os.path.rename(oldName, newName)  # 修改文件名
    os.path.split(path)  # 拆分为文件夹路径 & 文件
    

    time

    import time
    now = lambda : time.perf_counter()
    time.sleep(1)  # 暂停1s
    
     time.time()  # 返回时间戳,1970年以后
    Out[18]: 1612772248.3974457
    
    time.localtime(time.time())
    Out[19]: time.struct_time(tm_year=2021, tm_mon=2, tm_mday=8, tm_hour=16, tm_min=17, tm_sec=36, tm_wday=0, tm_yday=39, tm_isdst=0)
    
    time.asctime(time.localtime(time.time()))
    Out[21]: 'Mon Feb  8 16:18:14 2021'
    
    

    datetime

    from datetime import datetime, timedelta
    dat = lambda n: datetime.today() - timedelta(n)
    
    dat(1)
    Out[29]: datetime.datetime(2021, 2, 7, 16, 23, 44, 185446)
    
    dat(1).year, dat(1).month, dat(1).day
    Out[35]: (2021, 2, 7)
    
    datetime.strftime(dat(1), '%Y%m%d')
    Out[31]: '20210207'
    
    d = '20210201'
    datetime.strptime(d,'%Y%m%d')
    Out[33]: datetime.datetime(2021, 2, 1, 0, 0)
    
    # 季度
    'Q' + str((dat(1).month-1)//3+1)
    Out[38]: 'Q1'
    

    xlwings

    import xlwings as xw
    wb = xw.Book()
    xw.apps.keys()  ## PID
    sht = wb.sheets['Sheet1'] # wb.sheets[0]
    sht.range('A1').value = 'Foo'  # sht['A1'].value
    rng = sht['A1'].current_region  # 返回当前表格区域绝对路径
    cell = rng.last_cell  # 返回区域最后一个单元格
    cell.row, cell.column  # 返回R,C  -- python中从0开始
    cell.clear_contents()  # 清空文本
    cell.clear()  # 全部清空
    # 写入时默认为一行,若写入一列
    sht['A1'].options(transpose=True).value = [1, 2]
    
    # ndim
    cell.options(ndim=1).value
    Out[56]: [0.0]
    
    cell.value
    Out[57]: 0.0
    
    rng.value  # sht['A1'].expand().value
    Out[64]: [0.0, 0.0]
    
    rng.options(ndim=2).value
    Out[65]: [[0.0], [0.0]]
    
    # 与pandas,numpy配合
    sht['A1'].options(pd.DataFrame, expand='table').value
    sht['A1'].options(np.array, expand='table').value
    sht['A1:B7'].option(pd.Series).value
    

    pandas

    import pandas as pd
    df = pd.DataFrame()
    
    # 透析
    pd.pivot_table(df, values=['金额'], index=['日期'], columns=['类别'], aggfunc=['sum', 'mean'])
    

    sqlalchemy & configparser

    # path 配置文件路径
    from configparser import ConfigParser
    conf = ConfigParser()
    conf.read(path)
    conf.items(section)  # 返回数组
    conf.get(section, info)  # 查询section下的info
    
    # 数据库SQL Server
    # pip install pymssql
    from sqlalchemy import create_engine
    ss = 'mssql+pymssql://%s:%s@%s:%s/%s'
    engine = create_engine(ss % (ip, port, acc, pw, db))
    
    

    email

    # email构造邮件正文,smtplib发送邮件
    import smtplib
    from email.header import Header
    from email.mime.text import MIMEText
    from email.mime.application import MEMIApplication
    from email.mime.multipart import MIMEMultipart
    from email.utils import parseaddr, formataddr
    
    msg = MIMEMultipart()  # 邮件
    msg.attach(MEMEText(message, 'plain', 'utf-8'))  # 加入正文
    # 附件
    with open(path) as f:
        x = MIMEApplication(f.read())
        x.add_header('Content-Disposition', 'attachment', filename=filname)  # 加上头信息
        msg.attach(x)
    # 发送
    with smtplib.SMTP(server, 25) as smtp:  # SMTP默认协议端口25
        smtp.ehlo()
        smtp.starttls()  # 加密建立安全连接
        smtl.ehlo()
        smtp.set_debuglevel(1)  # 1,打印出与SMTP服务器交互的所有信息
        smtp.login(ac, pw)
        try:
            smtp.sendmail(From, To, msg.as_string())  # To为list
        except:
            raise
        
    

    代码

    # _*_ coding:utf-8 _*_
    '''
    
    Created on 2021/2/4
    Author: Fergus
    Note:
    1.用于获取、整理百度所需现金,并于每周四发送邮件
    2.查询 - 转换 - 打开(找到文件 & 修改日期) & 写入 & 保存 - 邮件发送
    
    '''
    
    import os
    import time
    import pandas as pd
    import xlwings as xw
    from datetime import datetime, timedelta
    
    now = lambda : time.perf_counter()
    
    def loginInfo(section):
        # 从配置文件获取相关信息
        from configparser import ConfigParser
        CONF = os.path.join(os.path.expanduser('~'), r'Chinasearch\c.s.conf')
        conf = ConfigParser()
        if os.path.exists(CONF):
            conf.read(CONF)
            return tuple(map(lambda x: x[1], conf.items(section)))
        else:
            raise OSError('配置文件不存在')
            
    def connectDB():
        # 连接数据库
        from sqlalchemy import create_engine
        ss = 'mssql+pymssql://%s:%s@%s:%s/%s'
        try:
            engine = create_engine(ss % loginInfo('SQL Server'))
        except Exception:
            print('数据库连接失败。')
            raise
        else:
            return engine
            
    def sqlP4P(key):
        # sql: 查询p4p
        sql = '''
            SELECT c.日期, 广告主, 信誉成长值, '{}', sum(c.sum_)
             FROM basicInfo b
              RIGHT JOIN (
            -- 子查询:现金
            SELECT 日期, 用户名, sum(金额) sum_
             FROM 现金
             WHERE 类别 in ({})
              AND 日期 BETWEEN '{}' AND '{}'
             GROUP BY 日期, 用户名 ) c
               ON b.用户名 = c.用户名
             GROUP BY c.日期, 广告主, 信誉成长值
             ORDER BY c.日期
        '''
        if key == '总现金':
            return sql.format('总点击(P4P)', "'搜索点击', '新产品', '自主投放'"
                            , datSt, datEnd)
        elif key == '原生':
            return sql.format('信息流现金', "'自主投放'", datSt, datEnd)
    
    def sqlKA(key):
        # sql: 查询ka
        return ''' SELECT 日期, 广告主, 二级行业, 产品线, 金额
            FROM ka_basicInfo
            WHERE 日期 BETWEEN '{}' AND '{}'
            ORDER BY 日期
            '''.format(datSt, datEnd)
    
    def inputDat():
        # 输入超、止日期
        while 1:
            i = input('输入起,止日期,用英文逗号隔开,如20210101,20210102\n')
            if i.count(',') == 1:
                s1, s2 = map(lambda x: x.strip(), i.split(','))
                try:
                    datetime.strptime(s1, '%Y%m%d')
                    datetime.strptime(s2, '%Y%m%d')
                except:
                    continue
                else:
                    return s1, s2
            else:
                print('输入错误')
                continue
    
    def searchData(func, key=None):
        # 访问数据库,返回查询结果
        with connectDB().begin() as conn:
            sql = func(key)
            return list(map(lambda x: list(x), conn.execute(sql).fetchall()))
    
    def getY():
        # 年:21
        return datetime.strptime(datEnd, '%Y%m%d').strftime('%y')
    
    def getQ():
        # 季:Q1
        return 'Q' + str((datetime.strptime(datEnd, '%Y%m%d').month - 1) // 3 + 1)
    
    def getDat(n=0):
        # 日期:01.01
        return (datetime.strptime(datEnd, '%Y%m%d') - timedelta(n)).strftime('%m.%d')
    
    def getFil(path):
        # 返回Excel的绝对路径
        fil = os.path.join(path
            , getY() + '年' + getQ() + ' iDATA现金消费(' + getDat() + ').xlsx')
        for n in range(30):
            fil1 = os.path.join(path
                    , getY() + '年' + getQ() + ' iDATA现金消费(' + getDat(n) + ').xlsx')
            if os.path.isfile(fil1):
                try:
                    os.rename(fil1, fil)
                except PermissionError as e:
                    print('文件已打开,无法操作,请先关闭:\n\n%s' % e)
                    raise
                else:
                    print('Return:', fil)
                    return fil
    
    def clear(sht):
        # 清空excel内容
        cell = sht['A1'].current_region.last_cell
        sht[1:cell.row, :cell.column].clear_contents()
    
    def writeP4P(wb):
        # 向Excel写入P4P结果
        sht = wb.sheets['P4P原始数据']
        clear(sht)
        # 写入
        sht['A2'].value = searchData(sqlP4P, '总现金')
        cell = sht['A1'].current_region.last_cell
        sht['A' + str(cell.row + 1)].value = searchData(sqlP4P, '原生')
    
    def writeKA(wb):
        # 向Excel写入KA结果
        sht = wb.sheets['KA原始数据']
        clear(sht)
        sht['A2'].value = searchData(sqlKA)
    
    def _sumP4P(sht, df):
        # 求和
        sht['C1'].value = df.loc[df['产品线'] == '总点击(P4P)', '消费现金'].sum()
        sht['B1'].value = df.loc[df['产品线'] == '信息流现金', '消费现金'].sum()
    
    def _transfer(wb, sht_name):
        # 将数据转换为df
        sht = wb.sheets[sht_name]
        df = sht['A1'].options(pd.DataFrame, expand='table').value
        df.reset_index(inplace=True)
        df['日期'] = df['日期'].map(lambda x: str(int(x)))
        return df
    
    def pivotP4P(wb):
        # 透析
        df = _transfer(wb, 'P4P原始数据')
        sht = wb.sheets['P4P现金']
        clear(sht)
        sht['A1'].value = pd.pivot_table(df, index=['日期'], columns=['产品线']
                              , values=['消费现金'], aggfunc=sum)
        # sum
        _sumP4P(sht, df)
    
    def _sumKA(sht, df):
        # 求和
        cell = sht['A1'].current_region.last_cell
        header = sht[1,1:cell.column].value
        #
        for n, h in enumerate(header):
            sht[0, header.index(h)+1].value = df.loc[df['catogary'] == h
                , '消费现金'].sum()
        
    def pivotKA(wb):
        # 透析
        df = _transfer(wb, 'KA原始数据')
        df['catogary'] = '-'
        df.loc[df['产品线'] == '品牌序章', 'catogary'] = '展示类产品现金消费'
        df.loc[df['产品线'] == '原生CPC', 'catogary'] = '原生CPC现金消费'
        df.loc[(df['产品线'] != '品牌序章') & 
               (df['产品线'] != '原生CPC')
               , 'catogary'] = 'KA总现金消费\n(不含展示类)'
        sht = wb.sheets['KA现金消费']
        clear(sht)
        # 原生cpc
        if '原生CPC现金消费' not in df.columns:
            df = pd.pivot_table(df, index='日期', columns=['catogary']
                , values=['消费现金'], aggfunc=sum, fill_value=0)
            df.columns = df.columns.get_level_values(1)
            df['原生CPC现金消费'] = 0
            sht['A2'].value = df
            # sum
            sht['B1'].value = df.sum().values
        else:
            sht['A1'].value = pd.pivot_table(df, index='日期', columns=['catogary']
                , values=['消费现金'], aggfunc=sum, fill_value=0)
            # sum
            _sumKA(sht, df)
        
    def writeExcel():
        path = getFil(PATH)
        wb = xw.Book(path)
        writeP4P(wb)
        writeKA(wb)
        pivotP4P(wb)
        pivotKA(wb)
        wb.save()
        wb.close()
    
    def sendEmail(message):
        # 发送邮件
        import smtplib
        from email.header import Header
        from email.mime.text import MIMEText
        from email.mime.multipart import MIMEMultipart
        from email.mime.application import MIMEApplication
        from email.utils import parseaddr, formataddr
        
        def _format_addr(s):
            name, addr = parseaddr(s)
            return formataddr((Header(name, 'utf-8').encode(), addr))
        
        msg = MIMEMultipart()
        msg['From'] = _format_addr('Fergus<%s>' % loginInfo('mail_baidu')[2])
        msg['To'] = loginInfo('Q_toBaidu')[0]
        msg.attach(MIMEText(message, 'plain', 'utf-8'))
        path = getFil(PATH)
        with open(path, 'rb') as f:
            x = MIMEApplication(f.read())
            x.add_header('Content-Disposition', 'attachment'
                , filename=os.path.split(path)[-1])
            msg['Subject'] = Header(os.path.split(path)[-1].replace('.xlsx','')
                , 'utf-8').encode()
            msg.attach(x)
        with smtplib.SMTP(loginInfo('mail_baidu')[1], 25) as smtp:
            smtp.ehlo()
            smtp.starttls()
            smtp.ehlo()
            smtp.set_debuglevel(1)
            smtp.login(loginInfo('mail_baidu')[2], loginInfo('mail_baidu')[3])
            try:
                smtp.sendmail(loginInfo('mail_baidu')[2]
                    , loginInfo('Q_toBaidu')[0].split(',')
                    , msg.as_string())
            except:
                raise
            
        
    if __name__ == '__main__':
    
        st = now()
        PATH = r'H:\sz_数据\Download'
        if not os.path.isdir(path):
            PATH = os.path.join(os.path.expanduser('~'), r'Downloads')
        datSt, datEnd = inputDat()
        writeExcel()
        sendEmail(
            '''Dear all,\n        季度现金消费见附件,请查收。
            \n如有任何疑问,可随时和我联系。\nFergus''')
        time.sleep(30)
        print('All Runtime: %.2fMin' % ((now() - st)/60))
    
    
    

    相关文章

      网友评论

          本文标题:python | Auto updated the Excel

          本文链接:https://www.haomeiwen.com/subject/yfwqxltx.html