结果
Result.png
我的代码
from bs4 import BeautifulSoup
import requests
import time
import pymongo
__author__ = 'CP6'
client = pymongo.MongoClient('localhost', 27017)
xiaozhuDB = client['xiaozhuDB']
xiaozhuTb = xiaozhuDB['xiaozhuTb']
urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 4)]
def somMethod():
for single_url in urls:
wb_data = requests.get(single_url)
soup = BeautifulSoup(wb_data.text, 'lxml')
time.sleep(2)
titles = soup.select('span.result_title')
prices = soup.select('span.result_price > i')
links = soup.select('#page_list > ul > li > a')
# print(titles, prices, links,sep="\n-------------------------\n")
# info = []
for title, price, link in zip(titles, prices, links):
data = {
'title': title.get_text(),
'price': int(price.get_text()),
'link': link.get('href')
}
# print(data)
# info.append(data)
xiaozhuTb.insert_one(data)
# for item in info:
# # print(item)
# if item['price'] >= 500:
# print(item)
def getResult():
for item in xiaozhuTb.find({'price': {'$gt': 500}}):
print(item)
if __name__ == '__main__':
somMethod()
getResult()
总结
-
1. 数据库查询方法
# $lt/$lte/$gt/$gte/$ne 等价于 less than/equal/greater/not
-
2. 打开文件,读取内容
path = 'C:\\Users\Queen\Desktop\MuggleCode\Mycode\week2\\2-1\\2-1code_video\walden.txt'
with open(path, 'r') as f:
lines = f.readlines() // 读取文件的每一行
for index, line in enumerate(lines):
data = {
'index': index,
'line': line,
'words': len(line.split())
}
print(data)
-
3. enumerate 函数: 用于遍历序列中的元素以及它们的下标
for i,j in enumerate(('a','b','c')):
print i,j
0 a
1 b
2 c
-
4. 数据库操作步骤
// 建立连接
client = pymongo.MongoClient('localhost', 27017)
// 创建DB
xiaozhuDB = client['xiaozhuDB']
// 创建Table
xiaozhuTb = xiaozhuDB['xiaozhuTb']
// 插入数据
xiaozhuTb.insert_one(data)
// 查询数据
for item in xiaozhuTb.find({'price': {'$gt': 500}}):
print(item)
-
5. Bs4 prettify
print(soup.b.prettify()) // html以标准格式输出
网友评论