美文网首页Python
利用Ipython进行计数和画图

利用Ipython进行计数和画图

作者: 王诗翔 | 来源:发表于2017-01-12 20:27 被阅读140次
    # -*- coding: utf-8 -*-
    import json
    path = '/home/wsx/文档/pydata-book-master/ch02/usagov_bitly_data2012-03-16-1331923249.txt'
    records = [json.loads(line) for line in open(path)]
    
    time_zones = [rec['tz'] for rec in records if 'tz' in rec]
    #计数
    #1
    def get_counts(sequence):
        counts = {}
        for x in sequence:
            if x in counts:
                counts[x] += 1
            else:
                counts[x] = 1
        return counts
    
    
    #2
    from collections import defaultdict
    
    def get_counts2(sequence):
        counts = defaultdict(int) #所有的值均会被初始化为0
        for x in sequence:
            counts[x] += 1
        return counts
     
        
    counts = get_counts(time_zones)   
    
    #得到前10位的时区及其计数值
    def top_counts(count_dict, n=10):
        value_key_pairs = [(count, tz) for tz, count in count_dict.items()]
        value_key_pairs.sort()
        return value_key_pairs[-n:]
    
        
    #可以在Python标准库中找到collection.Counter类,它能使这个任务变得更简单
    from collections import Counter
    counts = Counter(time_zones)
    counts.most_common(10)    
    
    #用pandas对时区进行计数
    from pandas import DataFrame, Series
    import pandas as pd; import numpy as np
    frame = DataFrame(records)
    frame
    tz_counts = frame['tz'].value_counts()
    tz_counts[:10]
    
    
    #然后,我们想利用绘图库(matplotlib)为这段数据生成一张图片。为此,我们先给记录中未知
    #或者缺失的时区填上一个替代值。fillna函数可以替换缺失值,而未知值可以通过布尔型数组
    #索引加以替换:
    clean_tz = frame['tz'].fillna('Missing')
    clean_tz[clean_tz == ''] = 'Unkonwn'
    tz_counts = clean_tz.value_counts()
    tz_counts[:10]
    tz_counts[:10].plot(kind='barh', rot=0)
    

    相关文章

      网友评论

        本文标题:利用Ipython进行计数和画图

        本文链接:https://www.haomeiwen.com/subject/scrjbttx.html