美文网首页
12306爬取

12306爬取

作者: 徒手說梦话 | 来源:发表于2019-02-17 13:59 被阅读0次
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
class Spider():
    driver_path = r'D:\python\geckodriver-v0.23.0-win64\geckodriver.exe'
    def __init__(self):
        self.driver = webdriver.Firefox(executable_path=self.driver_path)
        self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html'
        self.my_url = 'https://kyfw.12306.cn/otn/view/index.html'
        self.schedule_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc'
        self.name_url = 'https://kyfw.12306.cn/otn/confirmPassenger/initDc'
    
    def login(self):
        self.driver.get(self.login_url)
        WebDriverWait(self.driver,1000).until(
            EC.url_to_be(self.my_url)
        )
        print('登录成功')
        time.sleep(3)
        self.driver.get(self.schedule_url)
    
    def wait_input(self):
        self.from_address = input("出发地:")
        self.destination = input("目的地:")
        self.star_time = input("出发时间:")
        self.name = input("乘客名字(多个乘客就逗号分开):").split(",")
        self.trains_numbers = input("车次(多趟就用,隔开)").split(",") # ["D3754","D8482"]

    def choose_message(self):
        
        # 出发地是否输入正确
        WebDriverWait(self.driver,1000).until(
            EC.text_to_be_present_in_element_value((By.ID,'fromStationText'),self.from_address) # value和没有value的区别就是有value他会查询你的input标签,没有的话闭包中text内容,input标签不是闭包
        )
        # 目的地是否输入正确
        WebDriverWait(self.driver,1000).until(
            EC.text_to_be_present_in_element_value((By.ID,'toStationText'),self.destination)
        )
        # 出发日期yyy-mm-dd    
        WebDriverWait(self.driver,1000).until(
            EC.text_to_be_present_in_element_value((By.ID,'train_date'),self.star_time)
        )
        # 等待查询按钮可用
        WebDriverWait(self.driver,1000).until(
            EC.element_to_be_clickable((By.ID,"query_ticket"))
        )
        # 如果能够被点击,那么找到这个按钮,进行点击事件
        searchBtn = self.driver.find_element_by_id('query_ticket')
        searchBtn.click()

        # 点击按钮之后,等待车次信息是否显示出来
        WebDriverWait(self.driver,1000).until(
            EC.presence_of_all_elements_located((By.XPATH,'//tbody[@id="queryLeftTable"]'))
        )

        # 找到所以没有datatrain属性的tr标签,这些标签是存储了车次信息
        tr_list = self.driver.find_elements_by_xpath('.//tbody[@id="queryLeftTable"]/tr[not(@style)]')
        
        for tr in tr_list:
            self.train_name = tr.find_element_by_class_name("number").text
            if self.train_name in self.trains_numbers:
                ticket = tr.find_element_by_xpath(".//td[4]").text
                if ticket == "有" or ticket.isdigit: # "有"或者数字
                    orderBtn = tr.find_element_by_class_name("btn72")
                    orderBtn.click()
                    WebDriverWait(self.driver,1000).until(
                        EC.url_to_be(self.name_url)
                    )
                    time.sleep(2)

                    # 选中乘客信息
                    WebDriverWait(self.driver,1000).until(
                        EC.presence_of_all_elements_located((By.XPATH,'//ul[@id="normal_passenger_id"]'))
                    )
                    names = self.driver.find_elements_by_xpath('//*[@id="normal_passenger_id"]/li')
                    for name in names:
                        passenger = name.find_element_by_xpath('./label').text # 不能再label里面写text
                        print(passenger)
                        if passenger in self.name:
                            passengerBtn = name.find_element_by_xpath('./input[@typeflag="1"]')
                            passengerBtn.click() 
                        submitBtn = self.driver.find_element_by_id('submitOrder_id')
                        submitBtn.click()

                        # 核对信息
                        self.driver.implicitly_wait(5) # 隐式等待
                        certainBtn = self.driver.find_element_by_id('qr_submit_id')
                        certainBtn.click()
                        print("完成")
                    

    def run(self):
        self.wait_input()
        self.login()
        self.choose_message()

def main():
    spider = Spider()
    spider.run()

if __name__ == '__main__':
    main()
``

相关文章

网友评论

      本文标题:12306爬取

      本文链接:https://www.haomeiwen.com/subject/xasgeqtx.html