Amazon

作者: 乐小Pi孩_VoV | 来源:发表于2017-07-04 22:18 被阅读14次
    import requests
    from bs4 import BeautifulSoup
    
    url = 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=mouse'
    
    wb_data = requests.get(url)
    
    soup = BeautifulSoup(wb_data.text, 'lxml')
    
    ex = soup.select('ul#s-results-list-atf > li.s-result-item.celwidget')
    
    
    #result_0 > div > div > div > div.a-fixed-left-grid-col.a-col-right > div.a-row.a-spacing-small > div:nth-child(1) > a
    title = ex[2].select('div.a-row.a-spacing-none > a')[0]['title']
    price = ex[2].select('span.a-color-base.sx-zero-spacing')[0]['aria-label']
    review = ex[2].select('div.a-row.a-spacing-mini > a.a-size-small.a-link-normal.a-text-normal')[0].get_text()
    star = ex[2].select('span.a-icon-alt')[1].get_text().split(' ')[0]
    imageUrl = ex[2].select('div.a-row > div > a.a-link-normal.a-text-normal > img')[0]['src']
    link = ex[2].select('div.a-row.a-spacing-none > a')[0]['href']
    
    print(title, price, link, star, review, imageUrl)
    
    # coding:utf-8
    import os
    from bs4 import BeautifulSoup
    import requests
    
    data = open(r'F:\mouse.htm', 'rb')
    data_ = data.read()
    soup = BeautifulSoup(data_, 'lxml')
    
    ex = soup.select('ul#s-results-list-atf > li.s-result-item.celwidget')
    
    
    def getInfo(P):
        try:
            title = P.select('h2.a-size-medium.s-inline.s-access-title.a-text-normal')[0]['data-attribute']
        except:
            title = 'None'
        try:
            price = P.select('span.a-color-base.sx-zero-spacing')[0]['aria-label']
        except:
            price = 'None'
        try:
            review = P.select('div.a-row.a-spacing-mini > a.a-size-small.a-link-normal.a-text-normal')[0].get_text()
        except:
            review = 'None'
        try:
            star = P.select('span.a-icon-alt')[1].get_text().split(' ')[0]
        except:
            star = 'None'
        try:
            imageUrl = \
                P.select('div.a-row > div > a.a-link-normal.a-text-normal > img')[0]['srcset'].split(',')[0].split('1x')[0].split(' ')[0]
            down(imageUrl)
        except:
            imageUrl = 'None'
        try:
            link = P.select('div.a-row.a-spacing-none > a')[0]['href']
        except:
            link = 'None'
        try:
            data___ = dict(Title=title, Price=price, Review=review, Star=star, ImageUrl=imageUrl, Link=link)
            print(data___['ImageUrl'])
        except:
            pass
    
    
    def down(url):
        r = requests.get(url)
        Img = url.split('https://images-na.ssl-images-amazon.com/images/I/')[1].split('.')[0]
        target = './img/{}.jpg'.format(Img)
        with open(target, 'wb') as fs:
            fs.write(r.content)
        print('%s => %s' % (url, target))
    
    
    for i in ex:
        getInfo(i)
    
    

    相关文章

      网友评论

          本文标题:Amazon

          本文链接:https://www.haomeiwen.com/subject/hwxfhxtx.html