美文网首页
获取亚马逊列表页商品链接及进入详情页获取名字(为了测试出机器人页

获取亚马逊列表页商品链接及进入详情页获取名字(为了测试出机器人页

作者: 戒灵 | 来源:发表于2018-09-05 17:36 被阅读0次

    ```

    import requests

    from lxmlimport etree

    import OpenSSL

    import time

    import json

    s ='https://www.amazon.com'

    i =1

    a_list =[]

    cont=120

    # url = 'https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_nav_mas_1_mas'

    # url1 = 'https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_pg_2?_encoding=UTF8&pg=1'

    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}

    proxies = {

    'http':'114.99.7.122:8752',

    }

    try:

    while i<=50:

    # doc = requests.get("https://www.amazon.com/Best-Sellers-Appstore-Android-Customization/zgbs/mobile-apps/9408481011/ref=zg_bs_pg_2?_encoding=UTF8&pg=%d"%i,headers=headers)

            doc = requests.get("https://www.amazon.com/s/ref=sr_pg_2?fst=as%3Aoff&rh=n%3A10158976011%2Cn%3A1055398&page={0}&bbn=10158976011&ie=UTF8&qid=1533365790".format(i),headers=headers)

    print(doc)

    doc.encoding ='utf-8'

            resoult = doc.text

    res = etree.HTML(resoult)

    # a_resoult = res.xpath("//ol/li/span[@class='a-list-item']/div[contains(@class,'a-section')]/span/a/@href")

            a_resoult = res.xpath("//div[@id='mainResults' or @id='centerMinus' or @id='btfResults']//ul//li[contains(@id,'result')]/div[@class='s-item-container']/div[contains(@class,'a-spacing-mini')]/div[contains(@class,'sx-line-clamp-4')]/a/@href")

    i +=1

            for xin a_resoult:

    r = x

    print(r)

    a_list.append(r)

    except Exception as e:

    print(e)

    for ein a_list:

    # if cont<=100:

        a_list.remove(e)

    doc = requests.get(e)

    # doc.encoding = 'utf-8'

        resoult1 = doc.text

    res1 = etree.HTML(resoult1)

    print(resoult1)

    # a_resoult2 = res1.xpath("//span[@id='actualPriceValue']/strong[@class='priceLarge']/text()")

        a_resoult2 = res1.xpath("//div[@id='title_feature_div']/div[@id='titleSection']/h1[@id='title']/span[@id='productTitle']/text()")

    if a_resoult2:

    print(a_resoult2)

    else:

    cont +=1

            print(e)

    res2 = etree.HTML(resoult1)

    res3 = res2.xpath("//div[contains(@class,'a-text-center')]/img/@src")

    if res3:

    print(res3[0])

    response = requests.get(res3[0])

    img = response.content

    with open('./img1/{0}.jpg'.format(cont),'wb')as f:

    f.write(img)

    else:

    pass

    ```

    相关文章

      网友评论

          本文标题:获取亚马逊列表页商品链接及进入详情页获取名字(为了测试出机器人页

          本文链接:https://www.haomeiwen.com/subject/zlxzwftx.html