有什么爬虫的问题都可以私信我哦,很乐意为你效劳,如果我有空的话!
该文仅供学习参考!!!
# -*- coding: utf-8 -*-
# @Time : 2022/5/7 20:59
# @Author : Lonelyroots
# @Email : 1731498306@qq.com
# @File : 去哪儿旅行网2022年5月16日机票数据爬取.py
# @Software : PyCharm
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import re
class CssOffset:
def __init__(self):
"""初始化驱动"""
option = webdriver.ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument('--disable-blink-features=AutomationControlled')
self.driver = webdriver.Chrome(options=option)
self.url = 'https://flight.qunar.com/site/oneway_list.htm?searchDepartureAirport=%E4%B8%8A%E6%B5%B7&searchArrivalAirport=%E5%8C%97%E4%BA%AC&searchDepartureTime=2022-05-16&searchArrivalTime=2022-05-10&nextNDays=0&startSearch=true&fromCode=SHA&toCode=BJS&lowestPrice=null'
self.wait = WebDriverWait(self.driver, 10)
def get_flight_data(self):
"""
获取机票数据
"""
script = "Object.defineProperty(navigator, 'webdriver', {get: () =>false,});"
self.driver.get(self.url)
self.driver.execute_script(script)
self.wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'div.btn-box>div.btn'))).click()
divs = self.wait.until(EC.presence_of_all_elements_located((By.XPATH,'//div[@class="mb-10"]/div/div')))
return self.parse_line_data(divs)
def parse_line_data(self,divs):
for div in divs:
# 机场
flight_name = div.find_element_by_xpath('.//div[@class="air"]').text
begin_time = div.find_element_by_xpath('.//div[@class="sep-lf"]/h2').text # 开始时间
end_time = div.find_element_by_xpath('.//div[@class="sep-rt"]/h2').text # 结束时间
prices = div.find_elements_by_xpath('.//em[@class="rel"]/b/i') # 定位占位价格
price = [price.text for price in prices]
to_cover_prices = div.find_elements_by_xpath('.//em[@class="rel"]/b') # 定位补坑的
to_cover_prices_and_styles = [
(to_cover_price.text, to_cover_price.get_attribute('style')) for
to_cover_price in to_cover_prices[1:]
]
true_price = self.replace_price(price, to_cover_prices_and_styles)
# print([flight_name, f'{begin_time}-{end_time}', true_price])
yield [flight_name, f'{begin_time}-{end_time}', true_price]
@staticmethod
def replace_price(price, to_cover_prices_and_styles):
"""将价格进行恢复"""
......
if __name__ == '__main__':
css_offset = CssOffset()
data = [line for line in css_offset.get_flight_data()]
print(data)
需要完整代码可以私信我哦
网友评论