美文网首页IT@程序员猿媛
Python + selenium 京东网

Python + selenium 京东网

作者: ylz小叶 | 来源:发表于2019-04-07 20:48 被阅读1次

    import time

    from lxml import etree

    from selenium import webdriver

    from selenium.webdriver.support import expected_conditions as EC

    from selenium.webdriver.common.by import By

    from selenium.webdriver.support.ui import WebDriverWait

    import re

    import csv

    class JD_spider(object):

        driver_path = r"D:\yeliangzhen\Chrome\chromedriver.exe"

        def __init__(self):

            self.driver = webdriver.Chrome(executable_path=JD_spider.driver_path)

            self.goods = []

            # 从这个网址开始爬取

            self.page_url = 'https://www.jd.com/'

        def run(self):

            self.driver.get(self.page_url)

            inputTag = self.driver.find_element_by_id("key")

            inputTag.send_keys('iphone')

            button = self.driver.find_element_by_class_name("button")

            button.click()

            while True:

                source = self.driver.page_source

                WebDriverWait(driver=self.driver, timeout=20).until(

                    EC.presence_of_all_elements_located((By.XPATH, "//div[@class='p-wrap']//a[@class='pn-next']"))

                )

                self.get_page(source)

                time.sleep(2)

                next_btn = self.driver.find_element_by_xpath("//div[@class='p-wrap']//a[@class='pn-next']")

                if "pn-next disabled" in next_btn.get_attribute("class"):

                    break

                else:

                    next_btn.click()

        def get_page(self, source):

            try:

                good = {}

                html = etree.HTML(source)

                divs = html.xpath("//ul[@class='gl-warp clearfix']//li[@class='gl-item']/div[@class='gl-i-wrap']")

                for div in divs:

                    shop = div.xpath(".//div[@class='p-shop']//span/a/text()")[0]

                    title = div.xpath(".//div[@class='p-name p-name-type-2']//a/em")[0]

                    title = title.xpath("string(.)").strip()

                    price = div.xpath(".//div[@class='p-price']//text()")

                    price = "".join(price).strip()

                    commit = div.xpath(".//div[@class='p-commit']//strong//text()")

                    commit = "".join(commit).strip()

                    good = {"店铺": shop, "名称": title, "价格": price, "评论": commit}

                    self.goods.append(good)

                    headers = ["店铺", "名称", "价格", "评论"]

                    with open('jingdong.csv', 'w', encoding="utf-8", newline='') as f:

                        writer = csv.DictWriter(f, headers)

                        writer.writeheader()

                        writer.writerows(self.goods)

                    print(good)

                    print("--"*100)

            except Exception as e:

                print(e)

    if __name__ == '__main__':

        jd = JD_spider()

        jd.run()

    相关文章

      网友评论

        本文标题:Python + selenium 京东网

        本文链接:https://www.haomeiwen.com/subject/qudeiqtx.html