美文网首页
week2_1_homework-due

week2_1_homework-due

作者: jefflin910 | 来源:发表于2016-05-06 23:12 被阅读0次

    ## grab info in foshan.xiaozhu.com 

    ```

    import  requests,pymongo

    from  bs4  importBeautifulSoup

    client = pymongo.MongoClient('localhost',27017)

    xiaozhu_fs = client['xiaozhu_fs']

    sheet_daily_rent = xiaozhu_fs['sheet_daily_rent']

    # url = 'http://foshan.xiaozhu.com/fangzi/2302538827.html'

    # wb_data = requests.get(url)

    # soup = BeautifulSoup(wb_data.text,'lxml')

    def  client_get_info(info):

    sheet_daily_rent.insert_one(info)

    def  get_page_links(page_nums=3):

    page_links = []

    for  each_numinrange(1,page_nums):

    full_url ='http://foshan.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(each_num))

    wb_data = requests.get(full_url)

    soup = BeautifulSoup(wb_data.text,'lxml')

    for  linkinsoup.select('#page_list > ul > li > a'):

    page_links.append(link.get('href'))

    return  page_links

    def  print_gender(class_name):

    if  class_name =='member_boy_ico':

    return'male'

    else:

    if  class_name =='member_girl_ico':

    return'female'

    else:

    return None

    defget_page_info(page_nums=3):

    urls = get_page_links(page_nums)

    forurlinurls:

    wb_data = requests.get(url)

    soup = BeautifulSoup(wb_data.text,'lxml')

    gender = soup.select('#floatRightBox > div > div > h6 > span')[0].get('class')[0]

    data= {

    'title':soup.title.text,

    'address':soup.select('.pr5')[0].text,

    'daily-price':soup.select('.day_l')[0].text,

    'landlord_name':soup.select('a.lorder_name')[0].text,

    'gender':print_gender(gender),

    'landlord_info':list(soup.select('p.col_green')[0].stripped_strings)

    }

    #  client_get_info(data)

    #get_page_info()

    for  iteminsheet_daily_rent.find():

    print(item) 

    ```

    相关文章

      网友评论

          本文标题:week2_1_homework-due

          本文链接:https://www.haomeiwen.com/subject/okbjrttx.html