美文网首页
天猫键盘一日游-销量抓取

天猫键盘一日游-销量抓取

作者: 工厂里低层小白 | 来源:发表于2017-06-18 21:04 被阅读0次

python3版本

import requests
import re
import json
import time
import csv
from bs4 import BeautifulSoup

def get_url():
   a=[]
   d = 0
   start = time.clock()
   n = 0
   pages=[0,60,120,240,300]
   with open(r'C:\Users\MY\Desktop\键盘数据.csv','w',newline='',encoding='utf-8') as file_1:
        filednames =['商品名','价格','店铺','月销量']
    writer = csv.writer(file_1)
    writer.writerow(filednames)
    for page in pages:
        html = 'https://list.tmall.com/search_product.htm?spm=a220m.1000858.0.0.leSoie&cat=50024406&s='+str(page)+'&q=%BC%FC%C5%CC&sort=d&style=g&industryCatId=50024406&type=pc#J_Filter'
        
        r = requests.get(html)
        soup = BeautifulSoup(r.text,'lxml')
        shop_names = soup.select('a.productShop-name')#店名
        shop_prices = soup.select('p.productPrice em')#商品价格
        #商品名称
        shop_titles = soup.select('div.productTitle a[target=_blank]')#选择 target="_blank" 的所有元素。
        shop_status = soup.select('p.productStatus')#商品月销售
        for title,price,name,status in zip (shop_titles,shop_prices,shop_names,shop_status):
            time.sleep(1)
                #data = {
                #       '商品名':title.get("title"),
                #       '价格':price.get_text(),
                #       '店铺':name.get_text().strip(),
                #       '月销量':status.get_text().strip()
                #   }
            data = [title.get("title"),price.get_text(),name.get_text().strip(),status.get_text().strip()]
            print(data)
            n +=1
            print('正在写入第%d条数据'%n)
            #writer.writeheader()#写入表头
            writer.writerow(data)#一行一行写入
            print('第%d条数据已写入'%n)

print('关闭写入')
file_1.close()
end = time.clock()
print ("本次抓取耗时: %f s" % (end - start))

get_url()

相关文章

网友评论

      本文标题:天猫键盘一日游-销量抓取

      本文链接:https://www.haomeiwen.com/subject/bndwqxtx.html