筛选结果
![`GK$TO1TNSIR@0OR输出结果.png
代码
from bs4 import BeautifulSoup
import requests
import pymongo
client = pymongo.MongoClient('localhost',27017)
fangjia = client['fangjia']
sheet_line = fangjia['sheet_line']
def gender_get(classname):
if(classname) == ['member_boy_ico']:
return 'boy'
else:
return 'girl'
def wb_analyse(url):
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text, 'lxml')
titles = soup.select('div.pho_info > h4 > em')
addresses = soup.select('div.pho_info > p > span.pr5')
rents = soup.select('div.day_l > span')
imgs = soup.select('#curBigImage')
ownerimgs = soup.select('div.js_box.clearfix > div.member_pic > a > img')
ownnames = soup.select('div.js_box.clearfix > div.w_240 > h6 > a')
genders = soup.select('div.js_box.clearfix > div.w_240 > h6 > span')
for title, address, rent, img, ownerimg, ownname, gender in zip(titles, addresses, rents, imgs, ownerimgs, ownnames,
genders):
data = {
'title': title.get_text(),
'rent': int(rent.get_text()),
}
sheet_line.insert_one(data)
def url_get(wbpage):
wbdata = requests.get(wbpage)
soup = BeautifulSoup(wbdata.text,'lxml')
links = soup.select('#page_list > ul > li > a')
for link in links:
urlwb = link.get('href')
wb_analyse(urlwb)
urls = ["http://bj.xiaozhu.com/search-duanzufang-p{}-0/".format(number) for number in range(1,3)]
for single_url in urls:
url_get(single_url)
for item in sheet_line.find():
if item['rent'] >= 500:
print(item)
总结
- 刚开始筛选的时候print都是空,后来发现,之前写的代码中rent是字符串,需要先转化为int()才能比较大小
网友评论