load函数就是从日志中提取合格的数据的生成器函数。
它可以作为dispatcher函数的数据源。
原来写的handler函数处理一个字典的'datetime'字段,不能处理日志抽取函数extract返回的字典,提供一个新的函数!
代码跑起来
# !/usr/bin/env python
# encoding:utf-8
'''
@auther:administrator
'''
import random # 产生随机数;
import re, datetime, time, threading
from queue import Queue
#line = '''183.69.210.164 - - [07/Apr/2017:09:32:40 +0800] "GET /index.php?m=login HTTP/1.1" 200 3661 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"'''
# pattern = '''([\d.]{7,15}) - - \[([/\w +:]+)\] "(\w+) (\S+) ([\w/\d.]+)" (\d+) (\d+) .+ "(.+)"'''
pattern = '(?P<remote>[\d.]{7,15}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<method>[^" ]+) (?P<url>[^" ]+) (?P<protocol>[^" ]+)" (?P<status>\d+) (?P<size>\d+) \S+ "(?P<userggent>[^"]*)"'
regex = re.compile(pattern)
matcher = regex.match(line)
ops = {
'datetime': lambda dstr: datetime.datetime.strptime(dstr, '%d/%b/%Y:%H:%M:%S %z'),
'status': int, 'size': int
}
# 提取
def extract(line: str):
matcher = regex.match(line)
if matcher:
return {k: ops.get(k, lambda x: x)(v) for k, v in matcher.groupdict().items()}
# print(extract(line))
# 数据源
def loadfile(filename: str,encoding='utf-8'):
with open(filename, encoding=encoding) as f:
for line in f:
fields = extract(line)
if isinstance(fields,(dict,)):
yield fields
else:
print("No match.{}".format(fields)) # TODO 解析失败就抛弃,或者打印日志
from pathlib import Path
# 文件目录处理;
def load(*paths,encoding='utf-8',ext='*.log',r=False):
for p in paths:
path = Path(p)
if path.is_dir():
if isinstance(ext,str):
ext = [ext]
for e in ext:
logs = path.rglob(e) if r else path.glob(e)
for log in logs:
yield from loadfile(str(log.absolute()))
elif path.is_file():
loadfile(str(log.absolute()))
for x in load('test.log'):
print(x)
# 生成器;
# 模拟用的数据源
def source(seconds=1):
while True:
yield {'datetime': datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=8))),
'value': random.randint(1, 100)}
time.sleep(seconds)
s = source()
# collecting date
# 分析函数、处理函数;
# 平均数
def avg_handler(iterable):
return sum(map(lambda item: item['value'], iterable)) / len(iterable)
ret = avg_handler(items)
print('{:.2f}'.format(ret))
# 窗口函数;
def window(q: Queue, handler, width: int, interval: int):
buf = []
start = datetime.datetime.strptime('19700101 00:00:01 +0800', '%Y%m%d %H:%M:%S %z')
current = datetime.datetime.strptime('19700101 00:00:01 +0800', '%Y%m%d %H:%M:%S %z')
delta = datetime.timedelta(seconds=width - interval)
while True:
data = q.get() # 阻塞的 next(iterator)
if data:
buf.append(data)
current = data['datetime']
print(current, start)
if (current - start).total_seconds() > interval:
print('~~~~~~~~~~~~~')
ret = handler(buf)
print('{:.2f}'.format(ret))
print(threading.current_thread())
start = current
# clean old_date
buf = [x for x in buf if x['datetime'] > current - delta]
# 分发器,数据的调入;
def dispatcher(src):
handlers = [] # 线程对象,但是里面实际上是不同的handler;
queues = []
def reg(handler, width, interval): # 数据谁,handler、width、interval ;
q = Queue()
t = threading.Thread(target=window, args=(q, handler, width, interval))
queues.append(q)
handlers.append(t)
def run():
for t in handlers:
t.start()
while True:
data = next(src)
for q in queues:
q.put(data)
return reg, run
if __name__ == ' __main__ ':
src = load('test.log')
for x in s:
print(x)
reg, run = dispatcher(src)
reg(avg_handler, 10, 5)
# reg(avg_handler,10,5)
# window(s,avg_handler,10,5)
# run()
print(threading.current_thread())
run()
#----------------------------------------------------
C:\Users\Administrator\PycharmProjects\learn\venv\Scripts\python.exe C:/Users/Administrator/PycharmProjects/learn/ttt3.py
test.log
123.125.71.36 - - [06/Apr/2017:18:09:25 +0800] "GET / HTTP/1.1" 200 8642 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
112.64.118.97 - - [06/Apr/2017:19:13:59 +0800] "GET /favicon.ico HTTP/1.1" 200 4101 "-" "Dalvik/2.1.0 (Linux; U; Android 5.1.1; SM-G9250 Build/LMY47X)"
119.123.183.219 - - [06/Apr/2017:20:59:39 +0800] "GET /favicon.ico HTTP/1.1" 200 4101 "-" "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
140.205.201.44 - - [07/Apr/2017:08:11:06 +0800] "GET / HTTP/1.1" 200 8642 "http://job.magedu.com/" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;Alibaba.Security.Heimdall.950384.p)"
183.69.210.164 - - [07/Apr/2017:09:32:39 +0800] "GET /member/ HTTP/1.1" 302 31 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:40 +0800] "GET /index.php?m=login HTTP/1.1" 200 3661 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/css.css HTTP/1.1" 200 8803 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/css.css HTTP/1.1" 200 8803 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/login.css HTTP/1.1" 200 3080 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/login.css HTTP/1.1" 200 3080 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /index.php?m=ajax&c=RedLoginHead HTTP/1.1" 200 294 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:53 +0800] "GET /app/include/authcode.inc.php HTTP/1.1" 200 384 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:54 +0800] "GET /js/layer/skin/layer.css HTTP/1.1" 200 1601 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
183.69.210.164 - - [07/Apr/2017:09:32:55 +0800] "GET /js/layer/skin/default/xubox_ico0.png HTTP/1.1" 200 32954 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
网友评论