美文网首页
去哪儿旅行网CSS字体偏移

去哪儿旅行网CSS字体偏移

作者: Lonelyroots | 来源:发表于2022-05-07 23:23 被阅读0次

有什么爬虫的问题都可以私信我哦,很乐意为你效劳,如果我有空的话!
该文仅供学习参考!!!

# -*- coding: utf-8 -*-
# @Time     : 2022/5/7 20:59
# @Author   : Lonelyroots
# @Email    : 1731498306@qq.com
# @File     : 去哪儿旅行网2022年5月16日机票数据爬取.py
# @Software : PyCharm

from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import re

class CssOffset:
    def __init__(self):
        """初始化驱动"""
        option = webdriver.ChromeOptions()
        option.add_experimental_option('excludeSwitches', ['enable-automation'])
        option.add_argument('--disable-blink-features=AutomationControlled')
        self.driver = webdriver.Chrome(options=option)
        self.url = 'https://flight.qunar.com/site/oneway_list.htm?searchDepartureAirport=%E4%B8%8A%E6%B5%B7&searchArrivalAirport=%E5%8C%97%E4%BA%AC&searchDepartureTime=2022-05-16&searchArrivalTime=2022-05-10&nextNDays=0&startSearch=true&fromCode=SHA&toCode=BJS&lowestPrice=null'
        self.wait = WebDriverWait(self.driver, 10)

    def get_flight_data(self):
        """
        获取机票数据
        """
        script = "Object.defineProperty(navigator, 'webdriver', {get: () =>false,});"
        self.driver.get(self.url)
        self.driver.execute_script(script)
        self.wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'div.btn-box>div.btn'))).click()
        divs = self.wait.until(EC.presence_of_all_elements_located((By.XPATH,'//div[@class="mb-10"]/div/div')))
        return self.parse_line_data(divs)

    def parse_line_data(self,divs):
        for div in divs:
            # 机场
            flight_name = div.find_element_by_xpath('.//div[@class="air"]').text
            begin_time = div.find_element_by_xpath('.//div[@class="sep-lf"]/h2').text  # 开始时间
            end_time = div.find_element_by_xpath('.//div[@class="sep-rt"]/h2').text  # 结束时间
            prices = div.find_elements_by_xpath('.//em[@class="rel"]/b/i')  # 定位占位价格
            price = [price.text for price in prices]
            to_cover_prices = div.find_elements_by_xpath('.//em[@class="rel"]/b')  # 定位补坑的
            to_cover_prices_and_styles = [
                (to_cover_price.text, to_cover_price.get_attribute('style')) for
                to_cover_price in to_cover_prices[1:]
            ]
            true_price = self.replace_price(price, to_cover_prices_and_styles)
            # print([flight_name, f'{begin_time}-{end_time}', true_price])
            yield [flight_name, f'{begin_time}-{end_time}', true_price]

    @staticmethod
    def replace_price(price, to_cover_prices_and_styles):
        """将价格进行恢复"""
        ......

if __name__ == '__main__':
    css_offset = CssOffset()
    data = [line for line in css_offset.get_flight_data()]
    print(data)

需要完整代码可以私信我哦

相关文章

网友评论

      本文标题:去哪儿旅行网CSS字体偏移

      本文链接:https://www.haomeiwen.com/subject/rvsfurtx.html