python3版本
import requests
import re
import json
import time
import csv
from bs4 import BeautifulSoup
def get_url():
a=[]
d = 0
start = time.clock()
n = 0
pages=[0,60,120,240,300]
with open(r'C:\Users\MY\Desktop\键盘数据.csv','w',newline='',encoding='utf-8') as file_1:
filednames =['商品名','价格','店铺','月销量']
writer = csv.writer(file_1)
writer.writerow(filednames)
for page in pages:
html = 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.leSoie&cat=50024406&s='+str(page)+'&q=%BC%FC%C5%CC&sort=d&style=g&industryCatId=50024406&type=pc#J_Filter'
r = requests.get(html)
soup = BeautifulSoup(r.text,'lxml')
shop_names = soup.select('a.productShop-name')#店名
shop_prices = soup.select('p.productPrice em')#商品价格
#商品名称
shop_titles = soup.select('div.productTitle a[target=_blank]')#选择 target="_blank" 的所有元素。
shop_status = soup.select('p.productStatus')#商品月销售
for title,price,name,status in zip (shop_titles,shop_prices,shop_names,shop_status):
time.sleep(1)
#data = {
# '商品名':title.get("title"),
# '价格':price.get_text(),
# '店铺':name.get_text().strip(),
# '月销量':status.get_text().strip()
# }
data = [title.get("title"),price.get_text(),name.get_text().strip(),status.get_text().strip()]
print(data)
n +=1
print('正在写入第%d条数据'%n)
#writer.writeheader()#写入表头
writer.writerow(data)#一行一行写入
print('第%d条数据已写入'%n)
print('关闭写入')
file_1.close()
end = time.clock()
print ("本次抓取耗时: %f s" % (end - start))
get_url()
网友评论