背景描述
每周四从数据库中查询数据,转换为固定的格式,写入excel邮件发送。
技术栈小结
os
import os
os.listdir(path) # 当前路径下文件列表
os.getcwd() # 返回当前工作目录
os.chdir(path) # 改变当前工作目录
os.mkdir(path) # 创建文件夹
os.path.expanduser('~') # 获取“本地用户”路径
os.path.join(dir, fil) # 连接
os.path.exists(path) # 检测是否存在
os.path.isdir(path) # 文件路径是否存在
os.path.isfile(path) # 判断路径是否为文件
os.path.rename(oldName, newName) # 修改文件名
os.path.split(path) # 拆分为文件夹路径 & 文件
time
import time
now = lambda : time.perf_counter()
time.sleep(1) # 暂停1s
time.time() # 返回时间戳,1970年以后
Out[18]: 1612772248.3974457
time.localtime(time.time())
Out[19]: time.struct_time(tm_year=2021, tm_mon=2, tm_mday=8, tm_hour=16, tm_min=17, tm_sec=36, tm_wday=0, tm_yday=39, tm_isdst=0)
time.asctime(time.localtime(time.time()))
Out[21]: 'Mon Feb 8 16:18:14 2021'
datetime
from datetime import datetime, timedelta
dat = lambda n: datetime.today() - timedelta(n)
dat(1)
Out[29]: datetime.datetime(2021, 2, 7, 16, 23, 44, 185446)
dat(1).year, dat(1).month, dat(1).day
Out[35]: (2021, 2, 7)
datetime.strftime(dat(1), '%Y%m%d')
Out[31]: '20210207'
d = '20210201'
datetime.strptime(d,'%Y%m%d')
Out[33]: datetime.datetime(2021, 2, 1, 0, 0)
# 季度
'Q' + str((dat(1).month-1)//3+1)
Out[38]: 'Q1'
xlwings
import xlwings as xw
wb = xw.Book()
xw.apps.keys() ## PID
sht = wb.sheets['Sheet1'] # wb.sheets[0]
sht.range('A1').value = 'Foo' # sht['A1'].value
rng = sht['A1'].current_region # 返回当前表格区域绝对路径
cell = rng.last_cell # 返回区域最后一个单元格
cell.row, cell.column # 返回R,C -- python中从0开始
cell.clear_contents() # 清空文本
cell.clear() # 全部清空
# 写入时默认为一行,若写入一列
sht['A1'].options(transpose=True).value = [1, 2]
# ndim
cell.options(ndim=1).value
Out[56]: [0.0]
cell.value
Out[57]: 0.0
rng.value # sht['A1'].expand().value
Out[64]: [0.0, 0.0]
rng.options(ndim=2).value
Out[65]: [[0.0], [0.0]]
# 与pandas,numpy配合
sht['A1'].options(pd.DataFrame, expand='table').value
sht['A1'].options(np.array, expand='table').value
sht['A1:B7'].option(pd.Series).value
pandas
import pandas as pd
df = pd.DataFrame()
# 透析
pd.pivot_table(df, values=['金额'], index=['日期'], columns=['类别'], aggfunc=['sum', 'mean'])
sqlalchemy & configparser
# path 配置文件路径
from configparser import ConfigParser
conf = ConfigParser()
conf.read(path)
conf.items(section) # 返回数组
conf.get(section, info) # 查询section下的info
# 数据库SQL Server
# pip install pymssql
from sqlalchemy import create_engine
ss = 'mssql+pymssql://%s:%s@%s:%s/%s'
engine = create_engine(ss % (ip, port, acc, pw, db))
# email构造邮件正文,smtplib发送邮件
import smtplib
from email.header import Header
from email.mime.text import MIMEText
from email.mime.application import MEMIApplication
from email.mime.multipart import MIMEMultipart
from email.utils import parseaddr, formataddr
msg = MIMEMultipart() # 邮件
msg.attach(MEMEText(message, 'plain', 'utf-8')) # 加入正文
# 附件
with open(path) as f:
x = MIMEApplication(f.read())
x.add_header('Content-Disposition', 'attachment', filename=filname) # 加上头信息
msg.attach(x)
# 发送
with smtplib.SMTP(server, 25) as smtp: # SMTP默认协议端口25
smtp.ehlo()
smtp.starttls() # 加密建立安全连接
smtl.ehlo()
smtp.set_debuglevel(1) # 1,打印出与SMTP服务器交互的所有信息
smtp.login(ac, pw)
try:
smtp.sendmail(From, To, msg.as_string()) # To为list
except:
raise
代码
# _*_ coding:utf-8 _*_
'''
Created on 2021/2/4
Author: Fergus
Note:
1.用于获取、整理百度所需现金,并于每周四发送邮件
2.查询 - 转换 - 打开(找到文件 & 修改日期) & 写入 & 保存 - 邮件发送
'''
import os
import time
import pandas as pd
import xlwings as xw
from datetime import datetime, timedelta
now = lambda : time.perf_counter()
def loginInfo(section):
# 从配置文件获取相关信息
from configparser import ConfigParser
CONF = os.path.join(os.path.expanduser('~'), r'Chinasearch\c.s.conf')
conf = ConfigParser()
if os.path.exists(CONF):
conf.read(CONF)
return tuple(map(lambda x: x[1], conf.items(section)))
else:
raise OSError('配置文件不存在')
def connectDB():
# 连接数据库
from sqlalchemy import create_engine
ss = 'mssql+pymssql://%s:%s@%s:%s/%s'
try:
engine = create_engine(ss % loginInfo('SQL Server'))
except Exception:
print('数据库连接失败。')
raise
else:
return engine
def sqlP4P(key):
# sql: 查询p4p
sql = '''
SELECT c.日期, 广告主, 信誉成长值, '{}', sum(c.sum_)
FROM basicInfo b
RIGHT JOIN (
-- 子查询:现金
SELECT 日期, 用户名, sum(金额) sum_
FROM 现金
WHERE 类别 in ({})
AND 日期 BETWEEN '{}' AND '{}'
GROUP BY 日期, 用户名 ) c
ON b.用户名 = c.用户名
GROUP BY c.日期, 广告主, 信誉成长值
ORDER BY c.日期
'''
if key == '总现金':
return sql.format('总点击(P4P)', "'搜索点击', '新产品', '自主投放'"
, datSt, datEnd)
elif key == '原生':
return sql.format('信息流现金', "'自主投放'", datSt, datEnd)
def sqlKA(key):
# sql: 查询ka
return ''' SELECT 日期, 广告主, 二级行业, 产品线, 金额
FROM ka_basicInfo
WHERE 日期 BETWEEN '{}' AND '{}'
ORDER BY 日期
'''.format(datSt, datEnd)
def inputDat():
# 输入超、止日期
while 1:
i = input('输入起,止日期,用英文逗号隔开,如20210101,20210102\n')
if i.count(',') == 1:
s1, s2 = map(lambda x: x.strip(), i.split(','))
try:
datetime.strptime(s1, '%Y%m%d')
datetime.strptime(s2, '%Y%m%d')
except:
continue
else:
return s1, s2
else:
print('输入错误')
continue
def searchData(func, key=None):
# 访问数据库,返回查询结果
with connectDB().begin() as conn:
sql = func(key)
return list(map(lambda x: list(x), conn.execute(sql).fetchall()))
def getY():
# 年:21
return datetime.strptime(datEnd, '%Y%m%d').strftime('%y')
def getQ():
# 季:Q1
return 'Q' + str((datetime.strptime(datEnd, '%Y%m%d').month - 1) // 3 + 1)
def getDat(n=0):
# 日期:01.01
return (datetime.strptime(datEnd, '%Y%m%d') - timedelta(n)).strftime('%m.%d')
def getFil(path):
# 返回Excel的绝对路径
fil = os.path.join(path
, getY() + '年' + getQ() + ' iDATA现金消费(' + getDat() + ').xlsx')
for n in range(30):
fil1 = os.path.join(path
, getY() + '年' + getQ() + ' iDATA现金消费(' + getDat(n) + ').xlsx')
if os.path.isfile(fil1):
try:
os.rename(fil1, fil)
except PermissionError as e:
print('文件已打开,无法操作,请先关闭:\n\n%s' % e)
raise
else:
print('Return:', fil)
return fil
def clear(sht):
# 清空excel内容
cell = sht['A1'].current_region.last_cell
sht[1:cell.row, :cell.column].clear_contents()
def writeP4P(wb):
# 向Excel写入P4P结果
sht = wb.sheets['P4P原始数据']
clear(sht)
# 写入
sht['A2'].value = searchData(sqlP4P, '总现金')
cell = sht['A1'].current_region.last_cell
sht['A' + str(cell.row + 1)].value = searchData(sqlP4P, '原生')
def writeKA(wb):
# 向Excel写入KA结果
sht = wb.sheets['KA原始数据']
clear(sht)
sht['A2'].value = searchData(sqlKA)
def _sumP4P(sht, df):
# 求和
sht['C1'].value = df.loc[df['产品线'] == '总点击(P4P)', '消费现金'].sum()
sht['B1'].value = df.loc[df['产品线'] == '信息流现金', '消费现金'].sum()
def _transfer(wb, sht_name):
# 将数据转换为df
sht = wb.sheets[sht_name]
df = sht['A1'].options(pd.DataFrame, expand='table').value
df.reset_index(inplace=True)
df['日期'] = df['日期'].map(lambda x: str(int(x)))
return df
def pivotP4P(wb):
# 透析
df = _transfer(wb, 'P4P原始数据')
sht = wb.sheets['P4P现金']
clear(sht)
sht['A1'].value = pd.pivot_table(df, index=['日期'], columns=['产品线']
, values=['消费现金'], aggfunc=sum)
# sum
_sumP4P(sht, df)
def _sumKA(sht, df):
# 求和
cell = sht['A1'].current_region.last_cell
header = sht[1,1:cell.column].value
#
for n, h in enumerate(header):
sht[0, header.index(h)+1].value = df.loc[df['catogary'] == h
, '消费现金'].sum()
def pivotKA(wb):
# 透析
df = _transfer(wb, 'KA原始数据')
df['catogary'] = '-'
df.loc[df['产品线'] == '品牌序章', 'catogary'] = '展示类产品现金消费'
df.loc[df['产品线'] == '原生CPC', 'catogary'] = '原生CPC现金消费'
df.loc[(df['产品线'] != '品牌序章') &
(df['产品线'] != '原生CPC')
, 'catogary'] = 'KA总现金消费\n(不含展示类)'
sht = wb.sheets['KA现金消费']
clear(sht)
# 原生cpc
if '原生CPC现金消费' not in df.columns:
df = pd.pivot_table(df, index='日期', columns=['catogary']
, values=['消费现金'], aggfunc=sum, fill_value=0)
df.columns = df.columns.get_level_values(1)
df['原生CPC现金消费'] = 0
sht['A2'].value = df
# sum
sht['B1'].value = df.sum().values
else:
sht['A1'].value = pd.pivot_table(df, index='日期', columns=['catogary']
, values=['消费现金'], aggfunc=sum, fill_value=0)
# sum
_sumKA(sht, df)
def writeExcel():
path = getFil(PATH)
wb = xw.Book(path)
writeP4P(wb)
writeKA(wb)
pivotP4P(wb)
pivotKA(wb)
wb.save()
wb.close()
def sendEmail(message):
# 发送邮件
import smtplib
from email.header import Header
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
from email.utils import parseaddr, formataddr
def _format_addr(s):
name, addr = parseaddr(s)
return formataddr((Header(name, 'utf-8').encode(), addr))
msg = MIMEMultipart()
msg['From'] = _format_addr('Fergus<%s>' % loginInfo('mail_baidu')[2])
msg['To'] = loginInfo('Q_toBaidu')[0]
msg.attach(MIMEText(message, 'plain', 'utf-8'))
path = getFil(PATH)
with open(path, 'rb') as f:
x = MIMEApplication(f.read())
x.add_header('Content-Disposition', 'attachment'
, filename=os.path.split(path)[-1])
msg['Subject'] = Header(os.path.split(path)[-1].replace('.xlsx','')
, 'utf-8').encode()
msg.attach(x)
with smtplib.SMTP(loginInfo('mail_baidu')[1], 25) as smtp:
smtp.ehlo()
smtp.starttls()
smtp.ehlo()
smtp.set_debuglevel(1)
smtp.login(loginInfo('mail_baidu')[2], loginInfo('mail_baidu')[3])
try:
smtp.sendmail(loginInfo('mail_baidu')[2]
, loginInfo('Q_toBaidu')[0].split(',')
, msg.as_string())
except:
raise
if __name__ == '__main__':
st = now()
PATH = r'H:\sz_数据\Download'
if not os.path.isdir(path):
PATH = os.path.join(os.path.expanduser('~'), r'Downloads')
datSt, datEnd = inputDat()
writeExcel()
sendEmail(
'''Dear all,\n 季度现金消费见附件,请查收。
\n如有任何疑问,可随时和我联系。\nFergus''')
time.sleep(30)
print('All Runtime: %.2fMin' % ((now() - st)/60))
网友评论