美文网首页
Python实战计划学习第三周

Python实战计划学习第三周

作者: 飞飞幻想 | 来源:发表于2016-06-02 21:25 被阅读0次

    绘制各类目对比柱状图

    import pymongo
    import charts
    client = pymongo.MongoClient('localhost',27017)
    local = client['local']
    _58_infos = local['log_y']
    
    
    def data_gen(type):
        stats = {}
        for info in _58_infos.find():
            area = info['cates'][2]
            stats[area] = 1 if area not in stats else stats[area] + 1
        for k, v in stats.items():
            yield {
                'name': k
                , 'data': [v]
                , 'type': type
            }
    series = [i for i in data_gen('column')]
    charts.plot(series,show='inline',options=dict(title=dict(text='发帖量')))
    
    屏幕快照 2016-06-02 下午9.09.28.png
    • 学习了yield语法,确实很好用

    绘制发帖量折线图

    import pymongo
    import charts
    from datetime import timedelta, date
    import time
    client = pymongo.MongoClient('localhost',27017)
    local = client['local']
    _58_infos = local['log_y']
    
    for i in _58_infos.find().limit(10):
        print(i['cates'][2])
    
    def gen_days(date1, date2):
        time1 = time.strptime(date1, '%Y.%m.%d')
        time2 = time.strptime(date2, '%Y.%m.%d')
        date1 = date(time1[0], time1[1], time1[2])
        date2 = date(time2[0], time2[1], time2[2])
        days = timedelta(days=1)
        while date1 <= date2:
            yield (date1.strftime('%Y.%m.%d'))
            date1 = date1 + days
    
    def data_gen(date1, date2, cates, type='line'):
        days = [i for i in gen_days(date1, date2)]
        stats = {}
        for i in cates:
            stats[i] = [0 for i in days]
        for info in _58_infos.find({'pub_date':{'$gte':days[0],'$lte':days[-1]},'cates':{'$in':cates}}):
            cate = info['cates'][2]
            pub_date = info['pub_date']
            stats[cate][days.index(pub_date)] +=1
        for k, v in stats.items():
            yield {
                'name': k
                , 'data': v
                , 'type': type
            }
    print([i for i in data_gen('2015.12.24','2016.01.05',['朝阳'])])
    
    options = {
        'chart': {'zoomType': 'xy'}
        , 'title': {'text': '发帖量统计'}
        , 'subtitle': {'text': '可视化统计图表'}
        , 'xAxis': {'categories': [i for i in gen_days('2015.12.24' , '2016.01.05') ]}
        , 'yAxis': {'title': {'text': '数量'}}
    }
    series = [data for data in data_gen('2015.12.24' , '2016.01.05', ['北京二手家电', '北京二手台式机/配件', '北京二手笔记本'])]
    charts.plot(series, show='inline', options=options)
    
    屏幕快照 2016-06-02 下午9.16.00.png
    • 学习了datetime库的一些知识。
    • 使用pymongo.find方法过滤自己需要的数据。

    绘制热销商品的分布饼图

    import pymongo
    import charts
    
    client = pymongo.MongoClient('localhost', 27017)
    local = client['local']
    _58_infos = local['log_y']
    
    
    def data_gen_1(date, time):
        pipeline = [
            {'$match': {'$and': [{'pub_date': date}, {'time': time}]}}
            , {'$group': {'_id': {'$slice': ['$area', 1]}, 'counts': {'$sum': 1}}}
            , {'$sort': {'counts': -1}}
            , {'$limit': 100}
        ]
        for i in _58_infos.aggregate(pipeline):
            yield [i['_id'][0], i['counts']]
    def data_gen_2(date, time):
        pipeline = [
            {'$match': {'$and': [{'pub_date': date}, {'time': time}]}}
            , {'$group': {'_id': {'$slice': ['$area', 1]}, 'counts': {'$sum': '$price'}}}
            , {'$sort': {'counts': -1}}
            , {'$limit': 100}
        ]
        for i in _58_infos.aggregate(pipeline):
            yield [i['_id'][0], i['counts']]
    
    options = {
        'chart': {'zoomType': 'xy'}
        , 'title': {'text': '发帖量统计'}
        , 'subtitle': {'text': '2016.01.10二手物品在随后7天内,交易时长为1天的类目分布占比'}
    }
    series = [{
        'type': 'pie'
        , 'name': '交易数'
        , 'data': [i for i in data_gen_1('2016.01.10', 1)]
    }]
    print(series)
    charts.plot(series, options=options, show='inline')
    
    
    屏幕快照 2016-06-02 下午9.18.57.png 屏幕快照 2016-06-02 下午9.22.12.png
    • 学习mongo的高级特性pieline方式处理数据,加快数据处理速度

    分析二手商品行情

    import pymongo
    import charts
    
    client = pymongo.MongoClient('localhost', 27017)
    local = client['local']
    _58_infos = local['log_y']
    
    
    def data_gen(area):
        pipeline = [
            {'$match': {'area': {'$in': [area]}}}
            , {'$group': {'_id': {'$slice': ['$cates', 2, 1]}, 'counts': {'$sum': 1}}}
            , {'$sort': {'counts': -1}}
            , {'$limit': 3}
        ]
        data = [0 for i in range(3)]
        for i in _58_infos.aggregate(pipeline):
            yield {
                'name': i['_id'][0]
                , 'data': i['counts']
            }
    
    
    def area_gen():
        pipeline = [
            {'$group': {'_id': {'$slice': ['$area', 1]}, 'counts': {'$sum': 1}}}
            , {'$sort': {'counts': -1}}
        ]
        for i in _58_infos.aggregate(pipeline):
            yield {
                'name': i['_id'][0]
                , 'data': i['counts']
            }
    
    area = '朝阳'
    data = [i for i in data_gen(area)]
    series = [{
        'name': '发帖量'
        , 'data': [i['data'] for i in data]
        , 'type': 'column'
    }]
    options = {
        'chart': {'type': 'column'}
        , 'title': {'text': '{}发帖最多的Top3类目'.format(area)}
        , 'subtitle': {'text': '可视化统计图表'}
        , 'xAxis': {'categories': [i['name'] for i in data]}
        , 'yAxis': {'title': {'text': '发帖量'}}
    }
    print('{} -> {}'.format(series, options))
    charts.plot(series, show='inline', options=options)
    
    屏幕快照 2016-06-02 下午9.23.50.png
    import pymongo
    import charts
    
    client = pymongo.MongoClient('localhost', 27017)
    walden = client['walden']
    _ganji_infos = walden['log_y']
    
    for i in _ganji_infos.find().limit(10):
        if i['price'] :
            i['price'] = int(i['price'])
        else:
            i['price'] = -1
        _ganji_infos.update_one({'_id':i['_id']},{'$set':{'price':i['price']}})
    for i in _ganji_infos.find().limit(10):
        print('{} {} {}'.format(i['cate'][3],i['newer'],i['price']))
    
    def data_gen(cate):
        pipeline = [
            {'$match': {'$and': [{'cate': {'$in':[cate]}}, {'price': {'$gt': 0}}]}}
            , {'$group': {'_id': '$newer', 'counts': {'$avg': '$price'}}}
            , {'$sort': {'counts': -1}}
        ]
        for i in _ganji_infos.aggregate(pipeline):
            print(i)
            yield {
                'name': i['_id']
                , 'data': i['counts']
            }
    
    
    def cate_gen():
        pipeline = [
            {'$match': {'price': {'$gt': 0}}}
            , {'$group': {'_id': {'$slice': ['$cate', 2, 1]}, 'counts': {'$avg': '$price'}}}
            , {'$sort': {'counts': -1}}
        ]
        for i in _ganji_infos.aggregate(pipeline):
            yield {
                'name': i['_id'][0]
                , 'data': i['counts']
            }
    
    
    print([i for i in cate_gen()])
    
    cate = '北京二手家具'
    data = [i for i in data_gen(cate)]
    print(data)
    series = [{
        'name': '平均价格'
        , 'data': [i['data'] for i in data]
    }]
    options = {
        'chart': {'zoomType': 'xy'}
        , 'title': {'text': '{}成色对应的平均价格'.format(cate)}
        , 'subtitle': {'text': '可视化统计图表'}
        , 'xAxis': {'categories': [i['name'] for i in data]}
        , 'yAxis': {'title': {'text': '平均价格'}}
    }
    print('{} -> {}'.format(series, options))
    
    charts.plot(series, show='inline', options=options)
    
    
    屏幕快照 2016-06-02 下午9.24.38.png
    • 主要用来调试pieline的结构,一定要小心处理

    相关文章

      网友评论

          本文标题:Python实战计划学习第三周

          本文链接:https://www.haomeiwen.com/subject/neukdttx.html