```
import requests
from lxmlimport etree
import OpenSSL
import time
import json
s ='https://www.amazon.com'
i =1
a_list =[]
cont=120
# url = 'https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_nav_mas_1_mas'
# url1 = 'https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_pg_2?_encoding=UTF8&pg=1'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
proxies = {
'http':'114.99.7.122:8752',
}
try:
while i<=50:
# doc = requests.get("https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_pg_2?_encoding=UTF8&pg=%d"%i,headers=headers)
doc = requests.get("https://www.amazon.com/s/ref=sr_pg_2?fst=as%3Aoff&rh=n%3A10158976011%2Cn%3A1055398&page={0}&bbn=10158976011&ie=UTF8&qid=1533365790".format(i),headers=headers)
print(doc)
doc.encoding ='utf-8'
resoult = doc.text
res = etree.HTML(resoult)
# a_resoult = res.xpath("//ol/li/span[@class='a-list-item']/div[contains(@class,'a-section')]/span/a/@href")
a_resoult = res.xpath("//div[@id='mainResults' or @id='centerMinus' or @id='btfResults']//ul//li[contains(@id,'result')]/div[@class='s-item-container']/div[contains(@class,'a-spacing-mini')]/div[contains(@class,'sx-line-clamp-4')]/a/@href")
i +=1
for xin a_resoult:
r = x
print(r)
a_list.append(r)
except Exception as e:
print(e)
for ein a_list:
# if cont<=100:
a_list.remove(e)
doc = requests.get(e)
# doc.encoding = 'utf-8'
resoult1 = doc.text
res1 = etree.HTML(resoult1)
print(resoult1)
# a_resoult2 = res1.xpath("//span[@id='actualPriceValue']/strong[@class='priceLarge']/text()")
a_resoult2 = res1.xpath("//div[@id='title_feature_div']/div[@id='titleSection']/h1[@id='title']/span[@id='productTitle']/text()")
if a_resoult2:
print(a_resoult2)
else:
cont +=1
print(e)
res2 = etree.HTML(resoult1)
res3 = res2.xpath("//div[contains(@class,'a-text-center')]/img/@src")
if res3:
print(res3[0])
response = requests.get(res3[0])
img = response.content
with open('./img1/{0}.jpg'.format(cont),'wb')as f:
f.write(img)
else:
pass
```
网友评论