有一个日志文件,日志中时间格式如下,并且日志中的时间是乱序的(不是时间大的在文件后,时间小的在文件前),需要对这样的文件输出1min日志条数,带宽,流量,重复ip,每分钟访问文件类型所占流量:
[19/Aug/2019:11:50:00 +0800]
下面计算8月19号,11点50到12点整每一分钟的日志条数,带宽,流量,重复ip,每分钟访问文件类型所占流量、代码如下:
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import time
def ananly_log():
time_interval=60
sumnum_mate=0
sum_time,sum_num,sum_ip,sum_type={},{},{},{}
start_timestamp=time.mktime(time.strptime('2019-08-19 11:50:00','%Y-%m-%d %H:%M:%S'))
end_timestamp = time.mktime(time.strptime('2019-08-19 12:00:00','%Y-%m-%d %H:%M:%S'))
key_num = (end_timestamp - start_timestamp)/time_interval
value = 0
for i in range(int(str(key_num).split('.')[0])):
sum_1time[i]=0
sum_1num[i]=0
sum_1ip[i]=[]
sum_type[i]={}
with open('./dilian.log','r+',encoding='utf-8',errors='ignore') as f1:
for line in f1:
sumnum_mate += 1
litem_timestamp = time.mktime(time.strptime(line.split('|')[0].strip(),'%Y/%m/%d %H:%M:%S'))
location=int(str((float(litem_timestamp)-start_timestamp)/time_interval).split('.')[0])
typefile=line.split('|')[7].split('?')[0].split('.')[-1].strip()
sum_1time[location] += int(line.split('|')[14])
sum_1num[location] += 1
sum_1ip[location].append(line.split('|')[14].strip())
if sum_type[location].get(typefile) is None:
sum_type[location][typefile] = 0
sum_type[location][typefile] += int(line.split('|')[14])
for i in range(int(str(key_num).split('.')[0])):
timeend=start_timestamp+(i+1)*time_interval
timestart=timeend-60
print('{0}至{1}的流量:{2}KB\t带宽:{3}Kbps'.format(time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(timestart)),time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(timeend)),round(sum_1time[i]/1000,3),round(sum_1time[i]*8/60/1000,3)),len(sum_1ip[i]))
for key,value in sum_type[i].items():
print('文件类型:{0}\t流量:{1}KB'.format(key,round(int(value)/1000,2)))
if __name__ == '__main__':
analy_log()
网友评论