import requests
import os,sys
import time
import re
def get_html(http_session,url):
r = http_session.get(url)
r.encoding = r.apparent_encoding
html = r.text
return html
def get_imgurl(html):
pic_urls = re.findall('"pic_url":"(.*?)"', html, re.S)
img_urls = []
for one_pic_url in pic_urls:
img_urls.append('http:' + one_pic_url)
print('http:' + one_pic_url)
return img_urls
def write_tofile(img_urls,keyword,page):
for id,img_url in enumerate(img_urls):
pic = requests.get(img_url)
with open(keyword+'/{}_{}.jpg'.format(page,id),'wb') as f:
f.write(pic.content)
def main(http_session,url,keyword,page):
html = get_html(http_session,url)
img_urls = get_imgurl(html)
write_tofile(img_urls,keyword,page)
if __name__=='__main__':
############################### cookie 准备 ###########################################
ori_cookie = "..." ###通过 浏览器F12,Network可找到
newcookie = ori_cookie.split(";")
cookie = {}
for c in newcookie:
new_c = c.split('=',1)
cookie[new_c[0]]=new_c[1]
http_session = requests.session()
requests.utils.add_dict_to_cookiejar(http_session.cookies,cookie)
############################### 输入关键字,开始爬取######################################
keyword = "电脑" ###爬取关键字图片
Page = 5 ###爬取页数
if not os.path.exists(keyword):
os.mkdir(keyword)
for p in range(Page):
url = 'http://s.taobao.com/search?q=' + keyword + '&s=' + str(p*44)
print(url)
main(http_session,url,keyword,p)
time.sleep(10)
######################################################################################
网友评论