利用Ajax动态爬取淘宝商品信息
功能依次实现爬虫的开发流程:爬取规则--->数据清洗--->数据存储
实现代码:
import requests
import json
import time
import csv
class Tao_Bao(object):
global auctions_distinct
auctions_distinct = []
def get_response(self):
for k in ['四件套', '手机壳']:
# 新建csv 文件,每循环一个关键字会生成其对应的CSV文件
file_name = k + '.csv'
with open(file_name, 'w', newline='')as csvfile:
writer = csv.writer(csvfile)
# 写入表头信息
writer.writerow(['标题 ', '价格', '销量', '店铺', '区域'])
csvfile.close()
# 循环次数可以根据实际自行设定
for p in range(6):
url = 'https://s.taobao.com/api?callback=jsonp665&ajax=true&m=customized&q=%s&s=%s' % (k, p)
r = requests.get(url)
response = r.text
response = response.split('(')[1].split(')')[0]
response_dict = json.loads(response)
response_aucitons_info = response_dict['API.CustomizedApi']['itemlist']['auctions']
# 调用函数get_auctons_info写入商品信息
self.get_auctions_info(response_aucitons_info, file_name)
time.sleep(4)
print('获取数据数量为:' + str(len(auctions_distinct)))
def get_auctions_info(self, response_auctions_info, file_name):
with open(file_name, 'a', newline='')as csvfile:
""" 生成CSV对象,用于写入CSV文件"""
writer = csv.writer(csvfile)
for i in response_auctions_info:
# 写入数据
writer.writerow([i['raw_title'], i['view_price'], i['view_sales'], i['nick'], i['item_loc']])
auctions_distinct.append(str(i['raw_title']))
csvfile.close()
if __name__ == '__main__':
Get_start = Tao_Bao()
Get_start.get_response()
实现效果
爬取淘宝商品信息
网友评论