任务
- 各城区发帖量Top3大类目和发帖量
- 各大类目中,成色对应的平均价格情况
程序1
import pymongo
import charts
from datetime import timedelta,date
client = pymongo.MongoClient('localhost',27017)
test = client['test']
item_info = test['sample']
# In[62]:
for i in item_info.find({},{'_id':0,'pub_date':1}).limit(100):
print(i)
# In[63]:
#for i in item_info.find():
# frags = i['pub_date'].split('-')
# if len(frags)==1:
# date = frags[0]
# else:
# date = '{}.{}.{}'.format(frags[0],frags[1],frags[2])
# item_info.update_one({'_id':i['_id']},{'$set':{'pub_date':date}})
# In[64]:
def get_all_dates(date1,date2):
the_date = date(int(date1.split('.')[0]),int(date1.split('.')[1]),int(date1.split('.')[2]))
end_date = date(int(date2.split('.')[0]),int(date2.split('.')[1]),int(date2.split('.')[2]))
days = timedelta(days=1)
while the_date <= end_date:
yield the_date.strftime('%Y.%m.%d')
the_date = the_date + days
# In[65]:
#for i in get_all_dates('2015.11.12','2016.01.15'):
# print(i)
# In[66]:
def get_data_within(date1,data2,areas):
for area in areas:
area_day_posts = []
for date in get_all_dates(date1,data2):
a = list(item_info.find({'pub_date':date,'area':area}))
#print('#'*20,date,area,len(a),'#'*20)
each_day_posts = len(a)
area_day_posts.append(each_day_posts)
data = {
'name':area,
'data':area_day_posts,
'type':'line'
}
yield data
# In[67]:
for i in get_data_within('2015.11.12','2016.01.15',['朝阳','海淀','西城']):
print(i)
# In[69]:
options = {
'chart':{'zoomType':'xy'},
'title': {'text':'发帖量统计'},
'subtitle': {'text':'发帖量统计'},
'xAxis' : {'categories':[i for i in get_all_dates('2015.11.12','2016.01.15')]},
'yAxis' : {'title': { 'text' : '数量'}}
}
series = [ i for i in get_data_within('2015.11.12','2016.01.15',['朝阳','海淀','西城'])]
# In[70]:
charts.plot(series, show='inline', options = options )
程序1运行结果
1.jpg
程序2
import pymongo
import charts
from datetime import timedelta,date
# In[317]:
client = pymongo.MongoClient('localhost',27017)
test = client['test']
item_info = test['sample']
# In[318]:
for i in item_info.find({},{'_id':0,'look':1,'price':1,'cates':1}).limit(100):
print(i)
# In[319]:
def get_look_price_data(the_cates):
the_price = 0
look100 = []
look99 = []
look95 = []
look90 = []
look80 = []
look70 = []
the_price_data = []
for i in item_info.find({},{'_id':0,'look':1,'price':1,'cates':1}):
if i['cates'][2]== the_cates:
if i['price'] == '面议':
the_price = ''
elif i['price'][-2:]== ' 元':
the_price = int(i['price'][:-2])
elif i['price'][-1]=='元':
the_price = int(i['price'][:-1])
else:
the_price = int(i['price'])
#print(the_price,i['look'])
if the_price !='' and i['look'] != '-':
#print(the_price,i['look'])
if i['look']=='全新':
look100.append(the_price)
elif i['look']=='99成新':
look99.append(the_price)
elif i['look']=='95成新':
look95.append(the_price)
elif i['look']=='9成新':
look90.append(the_price)
elif i['look']=='8成新':
look80.append(the_price)
elif i['look']=='7成新及以下':
look70.append(the_price)
the_price_data = [sum(look100)/len(look100),sum(look99)/len(look99),sum(look95)/len(look95),sum(look90)/len(look90),sum(look80)/len(look80),sum(look70)/len(look70)]
print(the_price_data)
data = {
'name':the_cates,
'data':the_price_data,
'type':'line'
}
yield data
# In[320]:取二手家电类目的成色价格关系
#get_look_price_data('北京二手家电')
# In[321]:
series = [i for i in get_look_price_data('北京二手台式机/配件')]
print(series)
# In[322]:
options = {
'chart':{'zoomType':'xy'},
'title': {'text':'成色与价格的关系'},
'subtitle': {'text':'统计'},
'xAxis' : {'categories':['全新','99成新','95成新','9成新','8成新','7成新及以下']},
'yAxis' : {'title': { 'text' : '平均价格'}}
}
charts.plot(series, show='inline',options = options)
程序2运行结果
2.jpg
网友评论