先上正文:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
class CountPatt(object):
def __init__(self, patt):
self.patt = re.compile(patt)
self.result = {}
def count_patt(self, fname): #正则匹配
with open(fname) as fobj:
for line in fobj:
match = self.patt.search(line)
if match:
key = match.group()
self.result[key] = self.result.get(key, 0) + 1 #self.result.get(key, 0) + 1,get方式获取key的值,如果没
有默认返回0
return self.result
def sort(self): #
result = []
alist = self.result.items() #将字典以列表的方式返回
print(alist)
for i in xrange(len(alist)):
greater = alist[0]
for item in alist[1:]:
if greater[1] < item[1]:
greater = item
result.append(greater)
alist.remove(greater)
return result
if __name__ == "__main__":
httpd_log = '/tmp/access.log'
ip_pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
browser_pattern = r'Chrome|Safari|Firefox'
a = CountPatt(ip_pattern)
print a.count_patt(httpd_log)
print a.sort()
涉及到的一点细节,dict.get(key[, value]) 语法,后面的value当key的值不存在,就会返回该默认的value值:
[root@esearch-prod-component-010177210122 /home/liujiangbo] 03:41:26 0
# python
Python 2.6.6 (r266:84292, Aug 18 2016, 15:13:37)
[GCC 4.4.7 20120313 (Red Hat 4.4.7-17)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> result = {}
>>> result[111]=result.get(111,0)+1
>>> print(result)
{111: 1}
Python 字典(Dictionary) items() 函数以列表返回可遍历的(键, 值) 元组数组,语法:dict.items()
>>> print(result.items())
[(111, 1)]
>>> result = {}
这样可能看的不够明显,看下面:
#!/usr/bin/python
# coding=utf-8
dict = {'Google': 'www.google.com', 'Runoob': 'www.runoob.com', 'taobao': 'www.taobao.com'}
print "字典值 : %s" % dict.items()
# 遍历字典列表
for key,values in dict.items():
print key,values
执行结果:
字典值 : [('Google', 'www.google.com'), ('taobao', 'www.taobao.com'), ('Runoob', 'www.runoob.com')]
Google www.google.com
taobao www.taobao.com
Runoob www.runoob.com
网友评论