美文网首页
12306爬取

12306爬取

作者: 徒手說梦话 | 来源:发表于2019-02-17 13:59 被阅读0次
    from selenium import webdriver
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.by import By
    import time
    class Spider():
        driver_path = r'D:\python\geckodriver-v0.23.0-win64\geckodriver.exe'
        def __init__(self):
            self.driver = webdriver.Firefox(executable_path=self.driver_path)
            self.login_url = 'https://kyfw.12306.cn/otn/resources/login.html'
            self.my_url = 'https://kyfw.12306.cn/otn/view/index.html'
            self.schedule_url = 'https://kyfw.12306.cn/otn/leftTicket/init?linktypeid=dc'
            self.name_url = 'https://kyfw.12306.cn/otn/confirmPassenger/initDc'
        
        def login(self):
            self.driver.get(self.login_url)
            WebDriverWait(self.driver,1000).until(
                EC.url_to_be(self.my_url)
            )
            print('登录成功')
            time.sleep(3)
            self.driver.get(self.schedule_url)
        
        def wait_input(self):
            self.from_address = input("出发地:")
            self.destination = input("目的地:")
            self.star_time = input("出发时间:")
            self.name = input("乘客名字(多个乘客就逗号分开):").split(",")
            self.trains_numbers = input("车次(多趟就用,隔开)").split(",") # ["D3754","D8482"]
    
        def choose_message(self):
            
            # 出发地是否输入正确
            WebDriverWait(self.driver,1000).until(
                EC.text_to_be_present_in_element_value((By.ID,'fromStationText'),self.from_address) # value和没有value的区别就是有value他会查询你的input标签,没有的话闭包中text内容,input标签不是闭包
            )
            # 目的地是否输入正确
            WebDriverWait(self.driver,1000).until(
                EC.text_to_be_present_in_element_value((By.ID,'toStationText'),self.destination)
            )
            # 出发日期yyy-mm-dd    
            WebDriverWait(self.driver,1000).until(
                EC.text_to_be_present_in_element_value((By.ID,'train_date'),self.star_time)
            )
            # 等待查询按钮可用
            WebDriverWait(self.driver,1000).until(
                EC.element_to_be_clickable((By.ID,"query_ticket"))
            )
            # 如果能够被点击,那么找到这个按钮,进行点击事件
            searchBtn = self.driver.find_element_by_id('query_ticket')
            searchBtn.click()
    
            # 点击按钮之后,等待车次信息是否显示出来
            WebDriverWait(self.driver,1000).until(
                EC.presence_of_all_elements_located((By.XPATH,'//tbody[@id="queryLeftTable"]'))
            )
    
            # 找到所以没有datatrain属性的tr标签,这些标签是存储了车次信息
            tr_list = self.driver.find_elements_by_xpath('.//tbody[@id="queryLeftTable"]/tr[not(@style)]')
            
            for tr in tr_list:
                self.train_name = tr.find_element_by_class_name("number").text
                if self.train_name in self.trains_numbers:
                    ticket = tr.find_element_by_xpath(".//td[4]").text
                    if ticket == "有" or ticket.isdigit: # "有"或者数字
                        orderBtn = tr.find_element_by_class_name("btn72")
                        orderBtn.click()
                        WebDriverWait(self.driver,1000).until(
                            EC.url_to_be(self.name_url)
                        )
                        time.sleep(2)
    
                        # 选中乘客信息
                        WebDriverWait(self.driver,1000).until(
                            EC.presence_of_all_elements_located((By.XPATH,'//ul[@id="normal_passenger_id"]'))
                        )
                        names = self.driver.find_elements_by_xpath('//*[@id="normal_passenger_id"]/li')
                        for name in names:
                            passenger = name.find_element_by_xpath('./label').text # 不能再label里面写text
                            print(passenger)
                            if passenger in self.name:
                                passengerBtn = name.find_element_by_xpath('./input[@typeflag="1"]')
                                passengerBtn.click() 
                            submitBtn = self.driver.find_element_by_id('submitOrder_id')
                            submitBtn.click()
    
                            # 核对信息
                            self.driver.implicitly_wait(5) # 隐式等待
                            certainBtn = self.driver.find_element_by_id('qr_submit_id')
                            certainBtn.click()
                            print("完成")
                        
    
        def run(self):
            self.wait_input()
            self.login()
            self.choose_message()
    
    def main():
        spider = Spider()
        spider.run()
    
    if __name__ == '__main__':
        main()
    ``

    相关文章

      网友评论

          本文标题:12306爬取

          本文链接:https://www.haomeiwen.com/subject/xasgeqtx.html