美文网首页工作生活
scrapy downloading images

scrapy downloading images

作者: 狼无雨雪 | 来源:发表于2019-07-05 12:57 被阅读0次
    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import scrapy
    # import codecs
    import os
    from bingproxy import BingProxy
    
    class ImagesSpider(scrapy.Spider):
        name = "images"
        dir_path = "huaban_bingproxy_big_images"
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
    #    allowed_domains = ["tyst.migu.cn"]
        start_urls = []  
        bingProxy = BingProxy()
    
        def start_requests(self):
            with open('processing_threading_huaban_big_images_all_urls_part3.txt') as url_list:
                for url in url_list:
                    url = url.strip()
                    #yield scrapy.Request(url = self.bingProxy.get_proxy_url(url), meta = {"origin_rul": url}, callback = self.parse )
                    if url != "" and url != None:
                        yield scrapy.Request(url = url, callback=self.parse,method="get")
        #def __init__(self, urlfile=None,*args, **kwargs):
         #   super(MusicSpider, self).__init__(*args, **kwargs)
          #  uf = codecs.open(urlfile, 'r', 'utf-8')
           # urls = [line.strip() for line in uf.readlines()]
            #self.start_urls = urls
    
        
        def parse(self, response):
            path = "huaban_bingproxy_big_images" +"/"+response.url.split('/')[-1] + ".png"
            # path = path.split('?')[0]
            # self.logger.info('Saving mp3 %s', path)
            with open(path, 'wb') as f:
                f.write(response.body)
    
    
    
    
    
    

    相关文章

      网友评论

        本文标题:scrapy downloading images

        本文链接:https://www.haomeiwen.com/subject/zatdhctx.html