总结: 小猪正在调整,使我们很难爬取,测试了几次
请大家务必,配置好所有的环境,
成功展示:
smallpigintomongo.PNG代码:
#!C:\Python35\python.exe
# coding=utf-8
import requests
from bs4 import BeautifulSoup
import urllib.request
import time
import pymongo
from pymongo import MongoClient
import lxml
host = 'localhost'
port =27017
client =MongoClient(host,port)
#db= client['test']
#sheet =db['sheet']
#connect to mongodb
#client =pymongo.MongoClient('localhost',27017)
db=client['xiaozhu']
fangzi = db['fangzi']
def insert_fangzi_info(url):
web_data=requests.get(url)
#print("hellll")
soup =BeautifulSoup(web_data.text,'lxml')
#print(soup)
# 20161211 ceshi haoyong
titles = soup.select('#page_list > ul > li > div.result_btm_con > div > a > span')
prices = soup.select('span.result_price > i')
#titles =soup.select('span.result_title hiddenTxt')
#prices =soup.select('span.result_price>i')
print(titles)
print(prices)
for title,price in zip(titles,prices):
info ={
'title':title.get_text(),
#<span class="result_title hiddenTxt">三站奔中关,有转角阳台带独卫衣帽间的套间</span>
'price':int(price.get_text())
}
print(info)
fangzi.insert_one(info)
print("insert to fangzi biao")
def find_fangzi():
for info in fangzi.find():
if info['price']>=405:
print(info)
print("finish!")
urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(number) for number in range(1,5)]
for one_url in urls:
insert_fangzi_info(one_url)
find_fangzi()
'''
url22='http://bj.xiaozhu.com/search-duanzufang-p8-0/'
insert_fangzi_info(url22)
find_fangzi()
'''
网友评论