美文网首页
爬取课程试题

爬取课程试题

作者: Noza_ea8f | 来源:发表于2021-05-18 21:57 被阅读0次
    from selenium import webdriver
    import pandas as pd
    import sys
    from lib import user_info
    
    
    def wait_key():
        while True:
            message = input('核对无误请输入“空格”:')
            if message == ' ':
                break
            elif message == 'q':
                sys.exit(0)
            print('你输入的是 ' + message + '!')
    
    
    # 打开网址的URL
    # base_url = "http://passport.ouchn.cn"
    base_url = "http://thome.ouchn.cn/"
    
    # 打开浏览器
    driver = webdriver.Chrome()
    # driver = webdriver.Firefox()
    driver.implicitly_wait(10)
    
    # 清除所有cookie
    driver.delete_all_cookies()
    driver.get(base_url)
    print(driver.get_cookies())
    
    # 输入用户名密码
    driver.find_element_by_id('username').send_keys(user_info.username)
    driver.find_element_by_id('password').send_keys(user_info.password)
    
    # 登录
    driver.find_element_by_css_selector('body > div > div > div > form > div > div > div:nth-child(4) > button').click()
    
    # 点击“进入”
    driver.find_element_by_css_selector(
        '.teachtable > table:nth-child(1) > tbody:nth-child(1) > tr:nth-child(4) > td:nth-child(5) > a:nth-child(1)').click()
    
    
    def crawl_data():
        # 获取最新窗口
        handles = driver.window_handles  # 获得所有窗口句柄
        driver.switch_to.window(handles[-1])  # 取最新的
    
        # 获取网页信息
        # txt = driver.page_source
        # 获取试题
        exams = driver.find_elements_by_xpath('//div[@class="qtext"]')
        exams_ls = []
        for i in exams:
            exams_ls.append(i.text)
        # 获取答案
        ansers = driver.find_elements_by_xpath('//div[@class="rightanswer"]')
        for i in ansers:
            exams_ls.append(i.text)
        # data = dict(zip(exams_ls, answers_ls))
        df = pd.DataFrame({'试题': exams_ls,})
        return df
    
    
    # 等待手工选择
    # 形考任务2
    wait_key()
    df = crawl_data()
    
    writer = pd.ExcelWriter('xx.xlsx')
    df.to_excel(writer, index=False)
    writer.save()
    

    相关文章

      网友评论

          本文标题:爬取课程试题

          本文链接:https://www.haomeiwen.com/subject/lvayjltx.html