2-2

作者: ooocoo | 来源:发表于2016-08-09 23:01 被阅读0次
    def get_pages_within(pages):
        for page_num in range(1,pages+1):
            wb_data = requests.get('http://bj.58.com/shoujihao/pn{}/'.format(page_num))
            soup = BeautifulSoup(wb_data.text,'lxml')
            numbers = soup.select('strong.number')
            prices = soup.select('b.price')
            links = soup.select('a.t')
    
            for number, price, link in zip(numbers,prices,links):
                data = {
                    'title':number.get_text(),
                    'price':price.get_text(),
                    'link' :link.get('href')
                }
                shoujihao.insert_one(data)
            print('Done')
    

    相关文章

      网友评论

          本文标题:2-2

          本文链接:https://www.haomeiwen.com/subject/neidsttx.html