美文网首页
remove bad images

remove bad images

作者: 狼无雨雪 | 来源:发表于2019-12-23 17:06 被阅读0次
    
    #remove bad images
    #encoding:utf-8 
    ## Used to find dirty pictures
    import os
    import re
    import sys
    from skimage import io,transform
    from PIL import Image
    # from PIL import ImageFile
    # ImageFile.LOAD_TRUNCATED_IMAGES = True
    print(1, "Warning:")
    print(Image.MAX_IMAGE_PIXELS)
    
    print(2, "No warning:")
    Image.MAX_IMAGE_PIXELS = None
    print(Image.MAX_IMAGE_PIXELS)
    
    input_path = sys.argv[1]
    
    
    remove_list = []
    two_dimension = []
    not_three_dimension = []
    truncated_image = []
    file_list = os.listdir(input_path)
    for index,file_name in enumerate(file_list):
        file_path = os.path.join(input_path,file_name)
        print("is reading: ",index,  file_path)
        if re.findall(".jpg",file_path):
            try:
                img_file2 = io.imread(file_path)
                if (len(img_file2.shape) == 2):
                    print("two dimension", file_name)
                    print("two dimension", img_file2.shape)
                    two_dimension.append(file_name)
                elif (len(img_file2.shape) != 3):
                    print("not three dimension", file_name)
                    print("not three dimension", img_file2.shape)
                    not_three_dimension.append(file_name)
                elif (img_file2.shape[0]*img_file2.shape[1]*img_file2.shape[2] >= 178956970) or (img_file2.shape[2] > 3):
                    print("images over pixels or not RGB", file_name)
                    print("images over pixels or not RGB", img_file2.shape)
                    remove_list.append(file_name)
            except Exception as e:
                truncated_image.append(file_name)
                print("image is truncated", file_name)
                print("image is truncated", img_file2.shape)
    print("truncated_image", truncated_image)
    print("two_dimension", two_dimension)
    print("not_three_dimension", not_three_dimension)
    print("remove_list", remove_list)
    
    for remove_file_name in two_dimension :
        remove_file_path = os.path.join(input_path, remove_file_name)
        os.remove(remove_file_path)
        print("rm " + remove_file_path)
    for remove_file_name in not_three_dimension:
        remove_file_path = os.path.join(input_path, remove_file_name)
        os.remove(remove_file_path)
        print("rm " + remove_file_path)
    for remove_file_name in remove_list:
        remove_file_path = os.path.join(input_path, remove_file_name)
        os.remove(remove_file_path)
        print("rm " + remove_file_path)
    for remove_file_name in truncated_image:
        remove_file_path = os.path.join(input_path, remove_file_name)
        os.remove(remove_file_path)
        print("rm " + remove_file_path)
    

    相关文章

      网友评论

          本文标题:remove bad images

          本文链接:https://www.haomeiwen.com/subject/aqftoctx.html