General
#获取昨天日期 20200922
from datetime import date, timedelta
dt = (date.today() + timedelta(days=-1)).strftime("%Y%m%d")
# 获取运行时间
import time
print("reading data... " + time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
源表解析
# coding=utf-8
import re
import os
# 获取src.txt的sql源表写入new.txt, 输出new中不在old.txt的表, 同时输出old但不在new的表
with open('d:/desktop/test/src.txt', 'r', encoding='utf-8') as fr1:
str = fr1.read()
str = re.sub(r'(--).*', '', str) # 清空注释
regexp = r'((?i)(from)|(join))\s+(([a-zA-Z0-9\_]+)\.\s*([a-zA-Z0-9\_]+))'
result = re.findall(regexp, str)
table_set = set()
for m in result:
table_set.add(m[3].lower())
constr = '\n'
str1 = constr.join(table_set) # 用换行符拼接得到的源表集合
# 将源表写到new文件
with open('d:/desktop/test/new.txt', 'w+', encoding='utf-8') as fw1:
fw1.write(str1)
# 判断new SQL是否有新增源表 & 判断new sql是不是有删除源表
with open('d:/desktop/test/old.txt', 'r', encoding='utf-8') as fr2:
str_old = fr2.read()
# 判断是否新增
print('# new not in old: ')
for t in table_set:
if t not in str_old:
print(t)
# 判断是否删除
str_old = re.sub(r'(--).*', '', str_old)
regexp1 = r'((?i)(from)|(join))\s+(([a-zA-Z0-9\_]+)\.\s*([a-zA-Z0-9\_]+))'
result1 = re.findall(regexp1, str_old)
table_set1 = set()
for n in result1:
table_set1.add(n[3].lower()) # 获得old源表
with open('d:/desktop/test/new.txt', 'r', encoding='utf-8') as fr3:
str2 = fr3.read()
print('\n# old not in new: ')
for t in table_set1:
if t not in str2:
print(t)
print('bingo')
findall 返回列表 https://blog.csdn.net/Cashey1991/article/details/8875213
- 不带括号: 列表元素为字符串,字符串为匹配整个表达式(非整个表达式)
- 一个括号: 列表元素为字符串,字符串为匹配括号表达式(非整个表达式)
- 多个括号: 列表元素为tuple, tuple元素个数与括号数对应(包括匹配结果顺序)
文件读写
- 官方文档
- w+: 读写,先清文件内容,文件不存在会创建
- a+: 追加读写,文件不存在会创建
- r+: 读写,文件不存在报错
- a: 追加写,文件不存在会创建
with open('d:/desktop/test/result.txt', 'r', encoding='utf-8') as fr: # errors='ignore'
for line in fr.readlines(): # 一次性读取, 按行存放到list
print(line.strip()) # line会带\n
str = fr.read() # 一次性读取, 不能用于循环读取
网友评论