美文网首页
MongoDB第一段代码,xiaozhu短租数据

MongoDB第一段代码,xiaozhu短租数据

作者: RhettButler | 来源:发表于2016-06-18 10:40 被阅读0次

    frombs4importBeautifulSoup

    importrequests

    importtime

    importpymongo

    client = pymongo.MongoClient('localhost',27017)

    duanzu = client['duanzu']

    sheet_lines = duanzu['sheet_lines']

    url=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/?startDate=2016-06-19&endDate=2016-06-19'.format(i)foriinrange(1,3,1)]

    lianjie1=[]

    defsexss(valuse):

    job=valuse

    job3=[]

    foriinjob:

    job1=i.get('class')

    ifjob1[0]=='member_ico1':

    job2='女'

    job3.append(job2)

    elifjob1[0]=='member_ico':

    job2='男'

    job3.append(job2)

    else:

    job2='性别未知'

    job3.append(job2)

    return(job3)

    deflian(url1):

    wb_data=requests.get(url1)

    soup=BeautifulSoup(wb_data.text,'lxml')

    lianjie=soup.find_all(style='cursor:pointer')

    foriinlianjie:

    abc=i.get('detailurl')

    url=abc

    time.sleep(1)

    wb_data=requests.get(url)

    soup=BeautifulSoup(wb_data.text,'lxml')

    titles=soup.select(' h4 > em')

    addresss=soup.select('p > span.pr5')

    prices=soup.select('div.day_l > span')

    images=soup.find_all(id='curBigImage')

    imagespeople=soup.select('div.member_pic > a > img')

    sexs=soup.select('div.member_pic > div')

    name_oweners=soup.select('div.w_240 > h6 > a')

    job4=sexss(sexs)

    info = []

    fortitle,address,price,image,imagepeople,sex,name_owenerinzip(titles,addresss,prices,images,imagespeople,sexs,name_oweners):

    data={

    'title':title.get_text(),

    'address':address.get_text(),

    'price':int(price.get_text()),

    'image':image.get('src'),

    'imagepeople':imagepeople.get('src'),

    'sex':job4,

    'name_owener':name_owener.get_text()

    }

    info.append(data)

    sheet_lines.insert_one(data)

    foriininfo:

    print(i['title'],i['address'],str( i['price'])+'¥',i['image'],i['imagepeople'],i['sex'],i['name_owener'])

    foriinurl:

    countent=lian(i)

    foriteminsheet_lines.find({'price':{'$gte':500}}):

    print(item)

    相关文章

      网友评论

          本文标题:MongoDB第一段代码,xiaozhu短租数据

          本文链接:https://www.haomeiwen.com/subject/hpovdttx.html