美文网首页Python全栈工程师
26.3-代码合并和分析

26.3-代码合并和分析

作者: BeautifulSoulpy | 来源:发表于2019-10-24 19:15 被阅读0次

    load函数就是从日志中提取合格的数据的生成器函数。

    它可以作为dispatcher函数的数据源。

    原来写的handler函数处理一个字典的'datetime'字段,不能处理日志抽取函数extract返回的字典,提供一个新的函数!

    代码跑起来

    # !/usr/bin/env python
    # encoding:utf-8
    '''
    @auther:administrator
    
    '''
    
    import random  # 产生随机数;
    import re, datetime, time, threading
    from queue import Queue
    
    
    #line = '''183.69.210.164 - - [07/Apr/2017:09:32:40 +0800] "GET /index.php?m=login HTTP/1.1" 200 3661 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"'''
    
    # pattern = '''([\d.]{7,15}) - - \[([/\w +:]+)\] "(\w+) (\S+) ([\w/\d.]+)" (\d+) (\d+) .+ "(.+)"'''
    pattern = '(?P<remote>[\d.]{7,15}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<method>[^" ]+) (?P<url>[^" ]+) (?P<protocol>[^" ]+)" (?P<status>\d+) (?P<size>\d+) \S+ "(?P<userggent>[^"]*)"'
    
    regex = re.compile(pattern)
    matcher = regex.match(line)
    
    ops = {
        'datetime': lambda dstr: datetime.datetime.strptime(dstr, '%d/%b/%Y:%H:%M:%S %z'),
        'status': int, 'size': int
    }
    
    # 提取
    def extract(line: str):
        matcher = regex.match(line)
        if matcher:
            return {k: ops.get(k, lambda x: x)(v) for k, v in matcher.groupdict().items()}
    # print(extract(line))
    
    # 数据源
    def loadfile(filename: str,encoding='utf-8'):
        with open(filename, encoding=encoding) as f:
            for line in f:
                fields = extract(line)
                if isinstance(fields,(dict,)):
                    yield fields
                else:
                    print("No match.{}".format(fields)) # TODO 解析失败就抛弃,或者打印日志
    
    from pathlib import Path
    
    # 文件目录处理;
    def load(*paths,encoding='utf-8',ext='*.log',r=False):
        for p in paths:
            path = Path(p)
            if path.is_dir():
                if isinstance(ext,str):
                    ext = [ext]
                for e in ext:
                    logs = path.rglob(e) if r else path.glob(e)
                    for log in logs:
                        yield from loadfile(str(log.absolute()))
    
            elif path.is_file():
                loadfile(str(log.absolute()))
    
    for x in load('test.log'):
        print(x)
    
    # 生成器;
    
    # 模拟用的数据源
    def source(seconds=1):
        while True:
            yield {'datetime': datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=8))),
                   'value': random.randint(1, 100)}
            time.sleep(seconds)
    
    s = source()
    # collecting date
    
    # 分析函数、处理函数;
    # 平均数
    def avg_handler(iterable):
        return sum(map(lambda item: item['value'], iterable)) / len(iterable)
    
    ret = avg_handler(items)
    print('{:.2f}'.format(ret))
    
    
    
    # 窗口函数;
    def window(q: Queue, handler, width: int, interval: int):
        buf = []
        start = datetime.datetime.strptime('19700101 00:00:01 +0800', '%Y%m%d %H:%M:%S %z')
        current = datetime.datetime.strptime('19700101 00:00:01 +0800', '%Y%m%d %H:%M:%S %z')
        delta = datetime.timedelta(seconds=width - interval)
    
        while True:
            data = q.get()  # 阻塞的 next(iterator)
            if data:
                buf.append(data)
                current = data['datetime']
            print(current, start)
    
            if (current - start).total_seconds() > interval:
                print('~~~~~~~~~~~~~')
                ret = handler(buf)
                print('{:.2f}'.format(ret))
                print(threading.current_thread())
                start = current
    
                # clean old_date
                buf = [x for x in buf if x['datetime'] > current - delta]
    
    # 分发器,数据的调入;
    def dispatcher(src):
        handlers = []  # 线程对象,但是里面实际上是不同的handler;
        queues = []
    
        def reg(handler, width, interval):  # 数据谁,handler、width、interval  ;
            q = Queue()
            t = threading.Thread(target=window, args=(q, handler, width, interval))
    
            queues.append(q)
            handlers.append(t)
    
        def run():
            for t in handlers:
                t.start()
    
            while True:
                data = next(src)
                for q in queues:
                    q.put(data)
    
        return reg, run
    
    if __name__ == ' __main__ ':
        src = load('test.log')
        for x in s:
            print(x)
        reg, run = dispatcher(src)
    
        reg(avg_handler, 10, 5)
        # reg(avg_handler,10,5)
        # window(s,avg_handler,10,5)
        # run()
        print(threading.current_thread())
        run()
    #----------------------------------------------------
    C:\Users\Administrator\PycharmProjects\learn\venv\Scripts\python.exe C:/Users/Administrator/PycharmProjects/learn/ttt3.py
    test.log
    123.125.71.36 - - [06/Apr/2017:18:09:25 +0800] "GET / HTTP/1.1" 200 8642 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
    
    112.64.118.97 - - [06/Apr/2017:19:13:59 +0800] "GET /favicon.ico HTTP/1.1" 200 4101 "-" "Dalvik/2.1.0 (Linux; U; Android 5.1.1; SM-G9250 Build/LMY47X)"
    
    119.123.183.219 - - [06/Apr/2017:20:59:39 +0800] "GET /favicon.ico HTTP/1.1" 200 4101 "-" "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    140.205.201.44 - - [07/Apr/2017:08:11:06 +0800] "GET / HTTP/1.1" 200 8642 "http://job.magedu.com/" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;Alibaba.Security.Heimdall.950384.p)"
    
    183.69.210.164 - - [07/Apr/2017:09:32:39 +0800] "GET /member/ HTTP/1.1" 302 31 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:40 +0800] "GET /index.php?m=login HTTP/1.1" 200 3661 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/css.css HTTP/1.1" 200 8803 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/css.css HTTP/1.1" 200 8803 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/login.css HTTP/1.1" 200 3080 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /app/template/default//style/login.css HTTP/1.1" 200 3080 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:46 +0800] "GET /index.php?m=ajax&c=RedLoginHead HTTP/1.1" 200 294 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:53 +0800] "GET /app/include/authcode.inc.php HTTP/1.1" 200 384 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:54 +0800] "GET /js/layer/skin/layer.css HTTP/1.1" 200 1601 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    
    183.69.210.164 - - [07/Apr/2017:09:32:55 +0800] "GET /js/layer/skin/default/xubox_ico0.png HTTP/1.1" 200 32954 "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"
    

    相关文章

      网友评论

        本文标题:26.3-代码合并和分析

        本文链接:https://www.haomeiwen.com/subject/jphlvctx.html