美文网首页
6、Selenium框架 -- 网页信息定位Demo

6、Selenium框架 -- 网页信息定位Demo

作者: 波罗的海de夏天 | 来源:发表于2020-04-25 09:18 被阅读0次

    Demo脚本:

    # -*- coding:utf-8 -*-
    from selenium import webdriver
    # 响应链
    from selenium.webdriver import ActionChains
    import json
    import time
    import os
    
    # info location
    def to_goods_page(driver, web_url):
        # 定位到目标网址
        driver.get(web_url)
        time.sleep(1)
        # 点击事件 -- "电脑"
        computer_element = driver.find_element_by_link_text('电脑')
        time.sleep(1)
        # 鼠标悬停
        ActionChains(driver).move_to_element(computer_element).perform()
        time.sleep(2)
        # 点击事件 -- "笔记本"
        driver.find_element_by_link_text("笔记本").click()
        time.sleep(1)
    
        # 切换句柄
        handles = driver.window_handles
        index_handle = driver.current_window_handle
        for handle in handles:
            if handle != index_handle:
                driver.switch_to.window(handle)
        time.sleep(2)
    
        # 点击事件 -- "thinkpad"
        driver.find_element_by_xpath('//*[@id="brand-11518"]/a/img').click()
        time.sleep(1)
        # 点击事件 -- "7000以上"
        driver.find_element_by_xpath('//*[@id="J_selectorPrice"]/div/div[2]/div/ul/li[7]/a').click()
        time.sleep(1)
        # 点击事件 -- "评论数"
        driver.find_element_by_xpath('//*[@id="J_filter"]/div[1]/div[1]/a[3]').click()
        time.sleep(1)
        # 点击事件 -- 点击一款电脑
        driver.find_element_by_xpath('//*[@id="plist"]/ul/li[1]/div/div[1]/a/img').click()
        time.sleep(1)
    
        # 切换句柄
        notebook_handle = driver.current_window_handle
        # 重新获取全部句柄
        handles = driver.window_handles
        for handle in handles:
            if handle != index_handle and handle != notebook_handle:
                driver.switch_to.window(handle)
        time.sleep(1)
    
        # 滚动'滚动条'
        js = 'window.scrollTo(0, 1000)'  # px 像素
        driver.execute_script(js)
        time.sleep(1)
    
        # 选中规则与包装
        driver.find_element_by_xpath('//*[@id="detail"]/div[1]/ul/li[2]').click()
        time.sleep(1)
    
        # 定位到所有表格中的数据
        info_elements = driver.find_elements_by_class_name('Ptable-item')
        # 结果信息数据
        result_list = []
        for info_element in info_elements:
            # 解析商品信息,封装成dict数据
            info_element_dict = get_info_element_dict(info_element)
            result_list.append(info_element_dict)
        time.sleep(1)
    
        # 信息保存到文件
        save_goods_info(result_list)
    
    
    # 解析商品信息,封装成dict数据
    def get_info_element_dict(info_element):
        # 获取信息 -- 第 1 列
        computer_part = info_element.find_element_by_tag_name('h3')
        # 获取信息 -- 第 2 列
        computer_info_keys = info_element.find_elements_by_tag_name('dt')
        # 获取信息 -- 第 3 列
        # computer_info_values = info_element.find_element_by_tag_name('dd')
        computer_info_values = info_element.find_elements_by_xpath('dl//dd[not(contains(@class, "Ptable-tips"))]')
    
        # 信息字典,存储 part
        part_dict = {}
        # 信息字典,存储 key: value
        key_and_value_dict = {}
        for i in range(len(computer_info_keys)):
            tmp_key = computer_info_keys[i].text
            tmp_value = computer_info_values[i].text
            key_and_value_dict[tmp_key] = tmp_value
        part_dict[computer_part.text] = key_and_value_dict
    
        return part_dict
    
    
    # 信息保存到文件
    def save_goods_info(result_list):
        with open(goods_file_path + goods_file_name, 'w', encoding='utf-8') as f:
            json.dump(result_list, f, ensure_ascii=False)
            # f.write(str(result_list))
    
    
    if __name__ == '__main__':
        # 绝对路径
        project_path = os.path.abspath(os.path.curdir)
        # 文件路径
        goods_file_path = project_path + '/goods_info/'
        print('----- goods file path:', goods_file_path)
        if not os.path.exists(goods_file_path):
            os.mkdir(goods_file_path)
        # 文件名 -- 文本文件
        goods_file_name = 'computer-2.infos'
        time.sleep(1)
    
        # 驱动浏览器
        driver = webdriver.Chrome('/Users/****/Desktop/work_note/ai_git/lesson_selenium/chromedriver')
        driver.maximize_window()
        # 目标网址
        web_url = 'https://www.jd.com/'
        # info location
        to_goods_page(driver, web_url)
    
        # 关闭浏览器
        time.sleep(5)
        driver.quit()
    




    相关文章

      网友评论

          本文标题:6、Selenium框架 -- 网页信息定位Demo

          本文链接:https://www.haomeiwen.com/subject/warhwhtx.html