美文网首页
Scrapy爬取课程试题及答案

Scrapy爬取课程试题及答案

作者: Noza_ea8f | 来源:发表于2021-05-21 21:38 被阅读0次
    import scrapy
    
    
    class ThomeSpider(scrapy.Spider):
        name = 'thome'
        allowed_domains = ['ouchn.cn']
        # 形考任务一
        # start_urls = ['http://hebei.ouchn.cn/mod/quiz/report.php?id=49058&mode=overview']
        # 形考任务二
        # start_urls = ['http://hebei.ouchn.cn/mod/quiz/report.php?id=49059&mode=overview']
        # 形考任务三
        start_urls = ['http://hebei.ouchn.cn/mod/quiz/report.php?id=49060&mode=overview']
    
        ls = []
    
        # 因为网页需要登录,所以需要加上cookies
        def start_requests(self):
            cookies = 'CheckCode=bAg0XLruL2w=; MoodleSession=7hmruvl32410n80b4n4ntfte9f; username=qhdzhengwei; UserName=qhdzhengwei'
            # cookies需要转换成字典
            cookies = {i.split('=')[0]: i.split('=')[1] for i in cookies.split('; ')}
            yield scrapy.Request(
                self.start_urls[0],
                callback=self.parse,
                cookies=cookies
            )
    
        def parse(self, response):
            urls = response.xpath('//tbody//td/a[@title="回顾试答"]/@href').getall()
            for url in urls:
                yield scrapy.Request(url=url, callback=self.get_data)
    
        def get_data(self, response):
            # 获取试题列表
            exams = response.xpath('//div[@class="qtext"]/p')
            for exam in exams:
                exam = ''.join(exam.xpath('.//text()').extract())  # 合并试题为一行
                if exam in ['一、单选题', '二、多选题', '三、判断题']:
                    continue
                print(exam)
                with open('exams.txt', 'a+') as f:
                    f.write(exam + '\n')
            # 获取答案列表
            answers = response.xpath('//div[@class="rightanswer"]')
            for answer in answers:
                answer = ''.join(answer.xpath('.//text()').extract())
                print(answer)
                with open('answers.txt', 'a+') as f:
                    f.write(answer + '\n')
    
    

    相关文章

      网友评论

          本文标题:Scrapy爬取课程试题及答案

          本文链接:https://www.haomeiwen.com/subject/fxtijltx.html