原程序:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import json
def read_config(path):
with open(path, 'r') as load_f:
load_dict = json.load(load_f)
# print path
# print load_dict
return load_dict
def printData(aggr):
for data in sorted(aggr.items(), key=lambda item:item[1]):
print data[0], ":", data[1]
def calculateAge(hukou):
aggr = {}
for person in hukou["rows"]:
year_str = person["idCard"][6:10]
if aggr.has_key(year_str):
aggr[year_str] += 1
else:
aggr[year_str] = 1
print printData(aggr)
def calculateCompany(hukou):
aggr = {}
for person in hukou["rows"]:
company = person["unit"]
if aggr.has_key(company):
aggr[company] += 1
else:
aggr[company] = 1
printData(aggr)
def calculateLevel(hukou):
aggr = {}
aggr["博士"] = 0
aggr["硕士"] = 0
aggr["本科"] = 0
aggr["其他"] = 0
for person in hukou["rows"]:
degree = person["s3"]
if degree == 15:
aggr["本科"] += 1
elif degree == 26:
aggr["硕士"] += 1
elif degree == 37:
aggr["博士"] += 1
else:
aggr["其他"] += 1
printData(aggr)
def calculateScore(hukou):
aggr = {}
for person in hukou["rows"]:
score = int(person["score"])
if aggr.has_key(score):
aggr[score] += 1
else:
aggr[score] = 1
printData(aggr)
if __name__ == '__main__':
hukou = read_config('./beijing.json')
print u"总人数: ", hukou["currentPageSize"]
# calculateAge(hukou)
calculateCompany(hukou)
# calculateLevel(hukou)
# calculateScore(hukou)
里面重复的分组统计代码看起来实在不舒服, 忍不住要改一下:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from collections import Counter
import json
def read_config(path):
with open(path, 'r') as load_f:
return json.load(load_f)
def print_data(aggr):
for data in sorted(aggr.items(), key=lambda item: item[1]):
print(data[0], ':', data[1])
def iter_age(hukou):
return (person['idCard'][6:10] for person in hukou['rows'])
def iter_company(hukou):
return (person['unit'] for person in hukou['rows'])
def iter_level(hukou):
for person in hukou['rows']:
degree = person['s3']
if degree == 15:
yield '本科'
elif degree == 26:
yield '硕士'
elif degree == 37:
yield '博士'
else:
yield '其他'
def iter_score(hukou):
return (int(person['score']) for person in hukou['rows'])
def stat(hukou, iter_func):
return Counter(iter_func(hukou))
if __name__ == '__main__':
hukou = read_config('./beijing.json')
print('总人数: ', hukou['currentPageSize'])
print_data(stat(hukou, iter_age))
#print_data(stat(hukou, iter_company))
#print_data(stat(hukou, iter_level))
#print_data(stat(hukou, iter_score))
改动有:
- 修改Shebang, 使其适应更多的Posix系统. 不同的环境, python解释器的路径是不一样的.
默认是 python2 还是 python3 也不一定, 新的 Shebang 指定了 python 的版本. - 从 python2 修改为 python3, python3 默认使用 unicode 内码, 中文前面不需要使用
u
前缀了 - 抽象出提取统计信息的
iter_xxx
函数, 复用统计程序stat
. - 使用python3内置的
Counter
类完成计数, 避免手工的循环 - 使用 snake_case 风格的命名规范代替 camelCase.
网友评论