美文网首页
selenium+xpath 爬取京东商品信息

selenium+xpath 爬取京东商品信息

作者: 把握_cc79 | 来源:发表于2018-07-16 16:47 被阅读0次

    这是一个没有翻页处理的爬取,可以小修改下,实现隔壁淘宝信息抓取的翻页处理

    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import TimeoutException
    from selenium.webdriver.common.keys import Keys
    from lxml import etree
    
    
    def search():
        try:
            browser.get('https://www.jd.com/')
            input = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="key"]')))
            # submit = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@class="button"]')))
            input.clear()
            input.send_keys('鞋子', Keys.ENTER)
            # submit.click()
            total = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="J_bottomPage"]/span[2]/em[1]/b')))
            get_products()
            return total.text
        except TimeoutException:
            return search()
    
    def get_products():
        wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="page clearfix"]')))
        html = browser.page_source
        html = etree.HTML(html)
    
        images = html.xpath('//*[@id="J_goodsList"]/ul/li/div/div[1]/a/img/@src')
        comment = html.xpath('//*[@class="p-commit"]//a/text()')
        name = html.xpath('//*[@class="curr-shop"]/@title')
    
        for i in range(len(name)):
            temp = {
                'images': images,
                'comment': comment,
                'name': name,
            }
        print(temp)
    
    if __name__ == '__main__':
        browser = webdriver.Chrome()
        wait = WebDriverWait(browser, 10)
        search()
    

    相关文章

      网友评论

          本文标题:selenium+xpath 爬取京东商品信息

          本文链接:https://www.haomeiwen.com/subject/kambpftx.html