我的 代码
def get_pages_within(pages): for page_num in range(1,pages+1): wb_data = requests.get('http://bj.58.com/shoujihao/pn{}/'.format(page_num)) soup = BeautifulSoup(wb_data.text,'lxml') numbers = soup.select('strong.number') prices = soup.select('b.price') links = soup.select('a.t') for number, price, link in zip(numbers,prices,links): if 'zhineng' in link.get('href'): pass elif 'short' in link.get('href'): pass else: data = { 'title':number.get_text(), 'price':price.get_text(), 'link' :link.get('href').split('?')[0] } shoujihao.insert_one(data) print(data) print('Done')
网友评论