# -*- coding: utf-8 -*-
__author__ = 'xiaojg'
# log format
'''
'$host $remote_addr - $remote_user [$time_local] "$request"'
'$status $upstream_status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'$upstream_addr $request_time $upstream_response_time';
'''
import sys,os
reload(sys)
from email.MIMEText import MIMEText
import smtplib
import argparse
import re,time
sys.setdefaultencoding('utf-8')
def send_mail(to_list,sub,content):
mail_host="smtp.exmail.qq.com" #设置验证服务器,这里以qq.com为例
mail_user="xxx@xxx.com" #设置验证用户名
mail_pass="xxx" #设置验证口令
mail_postfix="qq.com" #设置邮箱的后缀
me=mail_user+"<"+mail_user+"@"+mail_postfix+">"
msg = MIMEText(content,_subtype='html', _charset='utf-8')
msg['Subject'] = sub
msg['From'] = me
msg['To'] = ",".join(to_list)
try:
# s = smtplib.SMTP(mail_host)
s = smtplib.SMTP_SSL(mail_host, port=465)
# s.connect(mail_host)
s.login(mail_user, mail_pass)
s.sendmail(me, to_list, msg.as_string())
s.close()
return True
except Exception, e:
print str(e)
return False
HOST1 = r"?P<host>[\D]*"
ADDR2 = r"?P<addr>[\d.]*"
date3 = r"?P<date>\d+"
month4 = r"?P<month>\w+"
year5 = r"?P<year>\d+"
log_time6 = r"?P<time>\S+"
method7 = r"?P<method>\S+"
request8 = r"?P<request>\S+\.do"
#args9 = r"?P<args>\?.*"
HTTPVER9 = r"?P<httpver>\S+"
STATUS10 = r"?P<status>\d+"
bodyBytesSent11 = r"?P<bodyBytesSent>\d+"
HttpUserAgent12 = r"?P<HttpUserAgent>.*"
UpstreamAddr13 = r"?P<UpstreamAddr>\S+"
RequestTime14 = r"?P<RequestTime>\S+"
UpstreamResponseTime15 = r"?P<UpstreamResponseTime>\S+"
# /opt/open.facebac.com_nginx.log
# 日志列表
Log_all_Line = []
# IP ,key为IP ,value为数量
IP_DIST = {}
# APi,key为uri,value为数量
API_DIST = {}
# APi,key为uri,value为请求延时列表
API_status = {}
#with open('open.log','r') as logfile:
def read_log_file(logfilename):
'''分析日志文件'''
# 日志行数
LogNum = 0
with open(logfilename,'r') as logfile:
for line in logfile:
p = re.compile(r"(%s)\ (%s)\ -\ -\ \[(%s)/(%s)/(%s)\:(%s)\ [\S]+\]\ \"(%s)\ (%s).*\ (%s)\"(%s)\ \d+\ (%s)\ \"\S+\"\ \"(%s)\"\ \"\S+\"\ (%s)\ (%s)\ (%s)" %
( HOST1,ADDR2,date3,month4,year5,log_time6,method7,request8,HTTPVER9,STATUS10,bodyBytesSent11,HttpUserAgent12,UpstreamAddr13,RequestTime14,UpstreamResponseTime15, ), re.VERBOSE)
LogNum += 1
m = re.findall(p, line)
Log_all_Line.append(m)
if m:
IP = m[0][1]
API = m[0][7]
RequestTime = float(m[0][13])
if IP_DIST.has_key(IP):
IP_DIST[IP] +=1
else:
IP_DIST[IP] = 1
if API_DIST.has_key(API):
API_DIST[API] += 1
else:
API_DIST[API] = 1
if API_status.has_key(API):
API_status[API].append(RequestTime,)
else:
API_status[API] = [RequestTime,]
# 放回排序后的[(IP,num),(IP,num)]
def ipstat(ipdist):
ipsort = sorted(ipdist.items(),key=lambda item:item[1], reverse=True,)
return ipsort
# 放回排序后的[(API,num),(API,num)]
def apinumstat(apidist):
apis = apidist
apinumsort = sorted(apis.items(),key=lambda item:item[1], reverse=True,)
return apinumsort
#计算平均数
def averagenum(num):
nsum = 0
for i in range(len(num)):
nsum += num[i]
return "%.3f" % float(nsum / len(num))
#计算中位数
def mediannum(num):
listnum = [num[i] for i in range(len(num))]
listnum.sort()
lnum = len(num)
if lnum % 2 == 1:
i = int((lnum + 1) / 2)-1
return listnum[i]
else:
i = int(lnum / 2)-1
return (listnum[i] + listnum[i + 1]) / 2
#大于1s的数量
def contrast1(num):
cnum = 0
for i in range(len(num)):
if float(num[i]) > 1.0:
cnum += 1
return cnum
# 放回排序后的[(API,num),(API,num)]
def api_time_stat(apidist):
apis = apidist
api_time_stat = {}
for i in apis.keys():
pjun = averagenum(apis[i])
zhongwei = mediannum(apis[i])
dayu1s = contrast1(apis[i])
api_time_stat[i] = [pjun,zhongwei,dayu1s]
return api_time_stat
def domain():
ipsort = ipstat(IP_DIST)
apisort = apinumstat(API_DIST)
apitimestat = api_time_stat(API_status)
iplist = []
apilist = []
apilist2 = []
n = 10
c = 0
for x,y in ipsort:
if c < n:
# print(x,y)
iplist.append("<tr><td>%s</td><td>%s</td><tr>" % (str(x),str(y)))
c += 1
else:
c = 0
break
for x,y in apisort:
if c < n:
apilist.append("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><tr>" % (str(x), str(y), str(apitimestat[x][0]), str(apitimestat[x][1]), str(apitimestat[x][2])))
# print(str(x),"访问总数:",str(y),"平均值:",str(apitimestat[x][0]),"中位数:",str(apitimestat[x][1]),"大于1s数量:",str(apitimestat[x][2]))
c += 1
else:
c = 0
break
for i in apitimestat.keys():
# print(i,'===>',"访问总数:",API_DIST[i],"平均值:",str(apitimestat[i][0]),"中位数:",apitimestat[i][1],"大于1s数量:",str(apitimestat[i][2]))
apilist2.append("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><tr>" % (i,str(API_DIST[i]), str(apitimestat[i][0]), str(apitimestat[i][1]), str(apitimestat[i][2]),))
return (iplist,apilist,apilist2)
def mail_msg(iplist,apilist,apilist2):
alllist = []
alllist.append('<html>')
alllist.append('<body>')
alllist.append('<h3>此统计编写: 作者jin</h3><br>')
alllist.append('<a>-------------------------------------------------</a><br>')
alllist.append("<h3>IP统计前N</h3>")
alllist.append('<table border="1">')
alllist.append('<tr><th>IP</th><th>访问次数</th><tr>')
alllist.extend(iplist)
alllist.append('</table>')
alllist.append("<h3>API访问前N统计</h3>")
alllist.append('<table border="1">')
alllist.append('<tr><th>API</th><th>访问次数</th><th>平均值</th><th>中位数</th><th>大于1s数量</th><tr>')
alllist.extend(apilist)
alllist.append('</table>')
alllist.append("<h3>所有API统计</h3>")
alllist.append('<table border="1">')
alllist.append('<tr><th>API</th><th>访问次数</th><th>平均值</th><th>中位数</th><th>大于1s数量</th><tr>')
alllist.extend(apilist2)
alllist.append('</table>')
alllist.append('</html>')
alllist.append('</body>')
return alllist
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--file', type=str,help='log file name', required=True)
options = parser.parse_args()
logname = options.file
read_log_file(logname)
iplist,apilist,apilist2 = domain()
msglist = mail_msg(iplist,apilist,apilist2)
MSG = '\n'.join(msglist)
user_to = ['收件人@xxx',] #收件人列表
SUB = "nginx日志分析数据" + os.path.basename(logname) + time.strftime('%Y%m%d')
print(MSG)
send_mail(user_to,SUB,MSG)
网友评论