美文网首页
python脚本对nginx日志做分析统计,发邮件

python脚本对nginx日志做分析统计,发邮件

作者: 肖金光xjg | 来源:发表于2018-10-09 14:35 被阅读17次
# -*- coding: utf-8 -*-
__author__ = 'xiaojg'

# log format
'''
'$host $remote_addr - $remote_user [$time_local] "$request"'
 '$status $upstream_status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'$upstream_addr $request_time $upstream_response_time';
'''
import sys,os
reload(sys)
from email.MIMEText import MIMEText
import smtplib
import argparse

import re,time

sys.setdefaultencoding('utf-8')

def send_mail(to_list,sub,content):
    mail_host="smtp.exmail.qq.com"  #设置验证服务器,这里以qq.com为例
    mail_user="xxx@xxx.com"     #设置验证用户名
    mail_pass="xxx"         #设置验证口令
    mail_postfix="qq.com"       #设置邮箱的后缀
    me=mail_user+"<"+mail_user+"@"+mail_postfix+">"
    msg = MIMEText(content,_subtype='html', _charset='utf-8')
    msg['Subject'] = sub
    msg['From'] = me
    msg['To'] = ",".join(to_list)
    try:
        # s = smtplib.SMTP(mail_host)
        s = smtplib.SMTP_SSL(mail_host, port=465)
        # s.connect(mail_host)
        s.login(mail_user, mail_pass)
        s.sendmail(me, to_list, msg.as_string())
        s.close()
        return True

    except Exception, e:
        print str(e)
        return False

HOST1 = r"?P<host>[\D]*"
ADDR2 = r"?P<addr>[\d.]*"
date3 = r"?P<date>\d+"
month4 = r"?P<month>\w+"
year5 = r"?P<year>\d+"
log_time6 = r"?P<time>\S+"
method7 = r"?P<method>\S+"
request8 = r"?P<request>\S+\.do"
#args9 = r"?P<args>\?.*"
HTTPVER9 = r"?P<httpver>\S+"
STATUS10 = r"?P<status>\d+"
bodyBytesSent11 = r"?P<bodyBytesSent>\d+"
HttpUserAgent12 = r"?P<HttpUserAgent>.*"
UpstreamAddr13 = r"?P<UpstreamAddr>\S+"
RequestTime14 = r"?P<RequestTime>\S+"
UpstreamResponseTime15 = r"?P<UpstreamResponseTime>\S+"


# /opt/open.facebac.com_nginx.log

# 日志列表
Log_all_Line = []
# IP ,key为IP ,value为数量
IP_DIST = {}
# APi,key为uri,value为数量
API_DIST = {}
# APi,key为uri,value为请求延时列表
API_status = {}

#with open('open.log','r') as logfile:
def read_log_file(logfilename):
    '''分析日志文件'''
    # 日志行数
    LogNum = 0
    with open(logfilename,'r') as logfile:
        for line in logfile:
            p = re.compile(r"(%s)\ (%s)\ -\ -\ \[(%s)/(%s)/(%s)\:(%s)\ [\S]+\]\ \"(%s)\ (%s).*\ (%s)\"(%s)\ \d+\ (%s)\ \"\S+\"\ \"(%s)\"\ \"\S+\"\ (%s)\ (%s)\ (%s)" %
                       ( HOST1,ADDR2,date3,month4,year5,log_time6,method7,request8,HTTPVER9,STATUS10,bodyBytesSent11,HttpUserAgent12,UpstreamAddr13,RequestTime14,UpstreamResponseTime15, ), re.VERBOSE)
            LogNum += 1
            m = re.findall(p, line)
            Log_all_Line.append(m)

            if m:
                IP = m[0][1]
                API = m[0][7]
                RequestTime = float(m[0][13])
                if IP_DIST.has_key(IP):
                    IP_DIST[IP] +=1
                else:
                    IP_DIST[IP] = 1

                if API_DIST.has_key(API):
                    API_DIST[API] += 1

                else:
                    API_DIST[API] = 1

                if API_status.has_key(API):
                    API_status[API].append(RequestTime,)
                else:
                    API_status[API] = [RequestTime,]

# 放回排序后的[(IP,num),(IP,num)]
def ipstat(ipdist):
    ipsort = sorted(ipdist.items(),key=lambda item:item[1], reverse=True,)
    return ipsort

# 放回排序后的[(API,num),(API,num)]
def apinumstat(apidist):
    apis = apidist
    apinumsort = sorted(apis.items(),key=lambda item:item[1], reverse=True,)
    return apinumsort

#计算平均数
def averagenum(num):
    nsum = 0
    for i in range(len(num)):
        nsum += num[i]
    return "%.3f" % float(nsum / len(num))

#计算中位数
def mediannum(num):
    listnum = [num[i] for i in range(len(num))]
    listnum.sort()
    lnum = len(num)
    if lnum % 2 == 1:
        i = int((lnum + 1) / 2)-1
        return listnum[i]
    else:
        i = int(lnum / 2)-1
        return (listnum[i] + listnum[i + 1]) / 2

#大于1s的数量
def contrast1(num):
    cnum = 0
    for i in range(len(num)):
        if float(num[i]) > 1.0:
            cnum += 1
    return cnum

# 放回排序后的[(API,num),(API,num)]
def api_time_stat(apidist):
    apis = apidist
    api_time_stat = {}
    for i in apis.keys():
        pjun = averagenum(apis[i])
        zhongwei = mediannum(apis[i])
        dayu1s = contrast1(apis[i])
        api_time_stat[i] = [pjun,zhongwei,dayu1s]
    return api_time_stat

def domain():
    ipsort = ipstat(IP_DIST)
    apisort = apinumstat(API_DIST)
    apitimestat = api_time_stat(API_status)
    iplist = []
    apilist = []
    apilist2 = []
    n = 10
    c = 0
    for x,y in ipsort:
        if c < n:
            # print(x,y)
            iplist.append("<tr><td>%s</td><td>%s</td><tr>" % (str(x),str(y)))
            c += 1
        else:
            c = 0
            break
    for x,y in apisort:
        if c < n:
            apilist.append("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><tr>" % (str(x), str(y), str(apitimestat[x][0]), str(apitimestat[x][1]), str(apitimestat[x][2])))

            # print(str(x),"访问总数:",str(y),"平均值:",str(apitimestat[x][0]),"中位数:",str(apitimestat[x][1]),"大于1s数量:",str(apitimestat[x][2]))
            c += 1
        else:
            c = 0
            break
    for i in apitimestat.keys():
        # print(i,'===>',"访问总数:",API_DIST[i],"平均值:",str(apitimestat[i][0]),"中位数:",apitimestat[i][1],"大于1s数量:",str(apitimestat[i][2]))
        apilist2.append("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><tr>" % (i,str(API_DIST[i]), str(apitimestat[i][0]), str(apitimestat[i][1]), str(apitimestat[i][2]),))
    return (iplist,apilist,apilist2)

def mail_msg(iplist,apilist,apilist2):
    alllist = []
    alllist.append('<html>')
    alllist.append('<body>')
    alllist.append('<h3>此统计编写: 作者jin</h3><br>')
    alllist.append('<a>-------------------------------------------------</a><br>')
    alllist.append("<h3>IP统计前N</h3>")
    alllist.append('<table border="1">')
    alllist.append('<tr><th>IP</th><th>访问次数</th><tr>')
    alllist.extend(iplist)
    alllist.append('</table>')
    alllist.append("<h3>API访问前N统计</h3>")
    alllist.append('<table border="1">')
    alllist.append('<tr><th>API</th><th>访问次数</th><th>平均值</th><th>中位数</th><th>大于1s数量</th><tr>')
    alllist.extend(apilist)
    alllist.append('</table>')
    alllist.append("<h3>所有API统计</h3>")
    alllist.append('<table border="1">')
    alllist.append('<tr><th>API</th><th>访问次数</th><th>平均值</th><th>中位数</th><th>大于1s数量</th><tr>')
    alllist.extend(apilist2)
    alllist.append('</table>')
    alllist.append('</html>')
    alllist.append('</body>')
    return alllist


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--file', type=str,help='log file name', required=True)
    options = parser.parse_args()

    logname = options.file
    read_log_file(logname)
    iplist,apilist,apilist2 = domain()
    msglist = mail_msg(iplist,apilist,apilist2)
    MSG = '\n'.join(msglist)

    user_to = ['收件人@xxx',] #收件人列表
    SUB = "nginx日志分析数据" + os.path.basename(logname) + time.strftime('%Y%m%d')
    print(MSG)
    send_mail(user_to,SUB,MSG)

相关文章

  • python脚本对nginx日志做分析统计,发邮件

  • 心得-2017/5/9

    一、关于nginx日志统计分析的心得总结 第一步:对nginx日志进行垃圾过滤 一般共性需要考虑过滤的内容包括: ...

  • NGINX日志分析

    Nginx日志分析 日志记录: 1.统计日志中访问最多的10个IP(shell中管道越少运行越快) 2.统计日志中...

  • Oracle logminer

    归档日志量统计 日志生成量最大的对象 (按小时统计) 使用logminer分析归档日志 1.使用脚本创建相关的包 ...

  • 19.实战 - 日志分析

    参考 利用Shell进行Web日志分析 nginx或者apache日志统计前十访问的URI Linux比赛 - 实...

  • zabbix4.0通过python脚本发邮件告警

    python脚本为敏捷开发脚本,在zabbix监控也起到重要作用,以下是使用python脚本发邮件告警的配置方法。...

  • nginx日志切割

    需求nginx的日志文件路径每天0点对nginx 的access与error日志进行切割以前一天的日期为命名 脚本...

  • 了解如何配置Nginx日志!

    了解如何配置Nginx日志! 前言 Nginx日志对于统计、系统服务排错很有用。 Nginx日志主要分为两种:ac...

  • Nginx的日志分割及pv统计展示

    运维统计分析,通过分割nginx日志(按小时/天/星期/月),pv统计访问量以便于分析热门操作场景。具体实施按以下...

  • Nginx监控请求statuscode纳入PMS

    check_statuscode 功能介绍 本脚本通过ELK查询nginx日志统计过去一分钟所有的请求,及http...

网友评论

      本文标题:python脚本对nginx日志做分析统计,发邮件

      本文链接:https://www.haomeiwen.com/subject/gpgeaftx.html