remove bad images

作者: 狼无雨雪 | 来源:发表于2019-07-04 18:58 被阅读0次

    第一步先找出不能resize的图片

    #encoding:utf-8 
    
    ## Used to find dirty pictures
    import os
    import re
    from skimage import io,transform
    from PIL import Image
    # from PIL import ImageFile
    # ImageFile.LOAD_TRUNCATED_IMAGES = True
    print(1, "Warning:")
    print(Image.MAX_IMAGE_PIXELS)
    
    print(2, "No warning:")
    Image.MAX_IMAGE_PIXELS = None
    print Image.MAX_IMAGE_PIXELS
    
    input_path = '/home/t-huch/cycleGAN/pytorch-CycleGAN-and-pix2pix/datasets/cubism/trainB'
    output_path = '/home/t-huch/cycleGAN/pytorch-CycleGAN-and-pix2pix/datasets/cubism/resize_trainB/'
    
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    remove_list = []
    two_dimension = []
    not_three_dimension = []
    truncated_image = []
    os.system("rm "+output_path)
    file_list = os.listdir(input_path)
    for index,file_name in enumerate(file_list):
        file_path = os.path.join(input_path,file_name)
        print("is reading: ",index,  file_path)
        if re.findall(".jpg",file_path):
            try:
                img_file2 = io.imread(file_path)
                if (len(img_file2.shape) == 2):
                    print "two dimension", file_name
                    print "two dimension", img_file2.shape
                    two_dimension.append(file_name)
                elif (len(img_file2.shape) != 3):
                    print "not three dimension", file_name
                    print "not three dimension", img_file2.shape
                    not_three_dimension.append(file_name)
                elif (img_file2.shape[0]*img_file2.shape[1]*img_file2.shape[2] >= 178956970) or (img_file2.shape[2] > 3):
                    print "images over pixels or not RGB", file_name
                    print "images over pixels or not RGB", img_file2.shape
                    remove_list.append(file_name)
            except Exception as e:
                truncated_image.append(file_name)
                print "image is truncated", file_name
                print "image is truncated", img_file2.shape
    print "truncated_image", truncated_image
    print "two_dimension", two_dimension
    print "not_three_dimension", not_three_dimension
    print "remove_list", remove_list
    

    第二步进行删除

    for remove_file_name in not_three_dimension:
        remove_file_path = os.path.join(input_path, remove_file_name)
        os.system("rm " + remove_file_path)
        print("rm " + remove_file_path)
    for remove_file_name in remove_list:
        remove_file_path = os.path.join(input_path, remove_file_name)
        os.system("rm " + remove_file_path)
        print("rm " + remove_file_path)
    for remove_file_name in truncated_image:
        remove_file_path = os.path.join(input_path, remove_file_name)
        os.system("rm " + remove_file_path)
        print("rm " + remove_file_path)
    

    相关文章

      网友评论

        本文标题:remove bad images

        本文链接:https://www.haomeiwen.com/subject/kxxdhctx.html