美文网首页
爬取不同分辨率下的不同地图图片数据

爬取不同分辨率下的不同地图图片数据

作者: 左手一份执 | 来源:发表于2017-07-14 13:04 被阅读0次

    爬取不同分辨率下的不同地图图片数据

    from math import *
    import urllib
    import urllib2
    import requests
    import os
    url_list=[]
    # 生成url,如/8/0/0.png,8/0/1.png.../8/0/255.png.../8/255/255.png
    def create_url(first,second):
        for y in range(int(pow(2,second))):
            for z in range(int(pow(2,second))):
                url_list.append(str(first)+'/'+str(y)+'/'+str(z)+'.png')
                print str(first)+'/'+str(y)+'/'+str(z)+'.png'
        return url_list
    
    # 生成url对应的目录
    def create_dirs(url_list,base_filepath):
        for x in url_list:
            x = x.split('/')
            file_path = base_filepath + str(x[0]) + '/' + str(x[1])+'/'
            if not os.path.exists(file_path):
                print file_path
    
                os.makedirs(file_path)
    
    
    base_url = 'http://a.tile.openstreetmap.org/'
    # 图片下载
    def download_png(url_list,filepath):
        for x in url_list:
            url = base_url + x
            urllib.urlretrieve(url, filename='d:/test/'+x)
            # data = f.read()
            # with open(filepath + x, "wb+") as code:
            #     code.write(data)
    url_list=create_url(8,8)
    download_png(url_list,'d:/test/')
    #create_dirs(url_list,'d:/test/')
    

    用法

    • 先修改文件路径,分辨率等参数
    • 然后先注释掉download_png函数调用,先调用create_dirs函数创建目录,然后取消注释开始下载图片

    采用多进程爬取,并处理网络带来的IOError

    from math import *
    import urllib
    import urllib2
    import requests
    import os
    from exceptions import IOError
    import logging
    import logging
    
    logging.basicConfig(level=logging.WARNING,
                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                    datefmt='%a, %d %b %Y %H:%M:%S',
                    filename='myapp.log',
                    filemode='w')
    
    url_list=[]
    filepath = 'd:/test/'
    base_url = 'http://a.tile.openstreetmap.org/'
    
    def create_url(start,rate):
        for y in range(start,int(pow(2,rate))):
            for z in range(int(pow(2,rate))):
                url_list.append(str(rate)+'/'+str(y)+'/'+str(z)+'.png')
                logging.warning(str(rate)+'/'+str(y)+'/'+str(z)+'.png')
        return url_list
    
    def create_dirs(url_list,base_filepath):
        for x in url_list:
            x = x.split('/')
            file_path = base_filepath + str(x[0]) + '/' + str(x[1])+'/'
            if not os.path.exists(file_path):
                logging.warning(file_path)
                os.makedirs(file_path)
    
    
    def download_png(url_list,filepath):
        for x in url_list:
            try:
                url = base_url + x
                print url
                logging.warning(url)
                urllib.urlretrieve(url, filename=filepath+x)
            except IOError as serr:
                logging.error(serr)
                time.sleep(180)
                urllib.urlretrieve(url, filename=filepath+x)
    
    import multiprocessing
    import time
    
    def worker_1(start,rate):
        url_list = create_url(start, rate)
        create_dirs(url_list, filepath)
        download_png(url_list, filepath)
    
    
    
    def worker_2(start,rate):
        url_list = create_url(start, rate)
        create_dirs(url_list, filepath)
        download_png(url_list, filepath)
    
    def worker_3(start,rate):
        url_list = create_url(start, rate)
        create_dirs(url_list, filepath)
        download_png(url_list, filepath)
    
    def worker_4(start,rate):
        url_list = create_url(start, rate)
        create_dirs(url_list, filepath)
        download_png(url_list, filepath)
    
    def worker_5(start,rate):
        url_list = create_url(start, rate)
        create_dirs(url_list, filepath)
        download_png(url_list, filepath)
    
    def worker_5(start,rate):
        url_list = create_url(start, rate)
        create_dirs(url_list, filepath)
        download_png(url_list, filepath)
    
    if __name__ == "__main__":
        p1 = multiprocessing.Process(target = worker_1, args = (630,10))
        p2 = multiprocessing.Process(target = worker_2, args = (700,10))
        p3 = multiprocessing.Process(target = worker_3, args = (800,10))
        p4 = multiprocessing.Process(target = worker_4, args = (900, 10))
        p5 = multiprocessing.Process(target = worker_5, args = (1000, 10))
       
    
        p1.start()
        p2.start()
        p3.start()
        p4.start()
        p5.start()
    

    相关文章

      网友评论

          本文标题:爬取不同分辨率下的不同地图图片数据

          本文链接:https://www.haomeiwen.com/subject/lcwnhxtx.html