美文网首页
kaggle猫狗大战

kaggle猫狗大战

作者: poteman | 来源:发表于2019-08-05 09:28 被阅读0次
    • 下载数据
    !wget --no-check-certificate \
      https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
      -O /tmp/cats_and_dogs_filtered.zip
    
    • 解压数据
    import os
    import zipfile
    
    local_zip = '/tmp/cats_and_dogs_filtered.zip'
    
    zip_ref = zipfile.ZipFile(local_zip, 'r')
    
    zip_ref.extractall('/tmp')
    zip_ref.close()
    
    • 查看数据
    !ls /tmp/cats_and_dogs_filtered/train
    
    • 数据路径
    base_dir = '/tmp/cats_and_dogs_filtered'
    
    train_dir = os.path.join(base_dir, 'train')
    validation_dir = os.path.join(base_dir, 'validation')
    
    train_cats_dir = os.path.join(train_dir, 'cats')
    train_dogs_dir = os.path.join(train_dir, 'dogs')
    
    validation_cats_dir = os.path.join(validation_dir, 'cats')
    validation_dogs_dir = os.path.join(validation_dir, 'dogs')
    
    • 查看数据情况
    train_cat_fnames = os.listdir(train_cats_dir)
    train_dog_fnames = os.listdir(train_dogs_dir)
    
    print(train_cat_fnames[:10])
    print(train_dog_fnames[:10])
    
    print('total training cat images :', len(os.listdir(      train_cats_dir ) ))
    print('total training dog images :', len(os.listdir(      train_dogs_dir ) ))
    
    print('total validation cat images :', len(os.listdir( validation_cats_dir ) ))
    print('total validation dog images :', len(os.listdir( validation_dogs_dir ) ))
    
    • 查看图片
    %matplotlib inline
    
    import matplotlib.image as mpimg
    import matplotlib.pyplot as plt
    
    # Parameters for our graph; we'll output images in a 4x4 configuration
    nrows = 4
    ncols = 4
    
    pic_index = 0 # Index for iterating over images
    
    # Set up matplotlib fig, and size it to fit 4x4 pics
    fig = plt.gcf()
    fig.set_size_inches(ncols*4, nrows*4)
    
    pic_index+=8
    
    next_cat_pix = [os.path.join(train_cats_dir, fname) 
                    for fname in train_cat_fnames[ pic_index-8:pic_index] 
                   ]
    
    next_dog_pix = [os.path.join(train_dogs_dir, fname) 
                    for fname in train_dog_fnames[ pic_index-8:pic_index]
                   ]
    
    for i, img_path in enumerate(next_cat_pix+next_dog_pix):
      # Set up subplot; subplot indices start at 1
      sp = plt.subplot(nrows, ncols, i + 1)
      sp.axis('Off') # Don't show axes (or gridlines)
    
      img = mpimg.imread(img_path)
      plt.imshow(img)
    
    plt.show()
    
    • 建立模型
    import tensorflow as tf
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
    
    model = Sequential([
        Conv2D(16, (3,3), activation='relu', input_shape=(150,150,3)),
        MaxPooling2D(2,2),
        Conv2D(32, (3,3), activation='relu'),
        MaxPooling2D(2,2),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(2,2),
        Flatten(),
        Dense(512, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.summary()
    
    • 模型编译
    from tensorflow.keras.optimizers import RMSprop
    
    model.compile(optimizer=RMSprop(lr=0.001),
                  loss='binary_crossentropy',
                  metrics=['acc'])
    
    • 构建数据生成器
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    
    train_datagen = ImageDataGenerator(rescale = 1.0/255.0)
    valid_datagen  = ImageDataGenerator(rescale = 1.0/255.0)
    
    train_generator = train_datagen.flow_from_directory(train_dir,
                                                        batch_size=20,
                                                        class_mode='binary',
                                                        target_size=(150,150))
    
    valid_generator = valid_datagen.flow_from_directory(validation_dir,
                                                        batch_size=20,
                                                        class_mode='binary',
                                                        target_size=(150,150))
    
    • 训练模型
    history = model.fit_generator(train_generator,
                                  validation_data=valid_generator,
                                  steps_per_epoch=100,
                                  epochs=5,
                                  validation_steps=50,
                                  verbose=1)
    
    • 使用模型进行预测
    import numpy as np
    
    from google.colab import files
    from keras.preprocessing import image
    
    uploaded=files.upload()
    
    for fn in uploaded.keys():
     
      # predicting images
      path='/content/' + fn
      img=image.load_img(path, target_size=(150, 150))
      
      x=image.img_to_array(img)
      x=np.expand_dims(x, axis=0)
      images = np.vstack([x])
      
      classes = model.predict(images, batch_size=10)
      
      print(classes[0])
      
      if classes[0]>0.5:
        print(fn + " is a dog")
        
      else:
        print(fn + " is a cat")
    
    • 查看中间层
    import numpy as np
    import random
    from tensorflow.keras.preprocessing.image import img_to_array, load_img
    
    # Let's define a new Model that will take an image as input, and will output
    # intermediate representations for all layers in the previous model after
    # the first.
    successive_outputs = [layer.output for layer in model.layers[1:]]
    
    #visualization_model = Model(img_input, successive_outputs)
    visualization_model = tf.keras.models.Model(inputs=model.input, outputs = successive_outputs)
    
    # Let's prepare a random input image of a cat or dog from the training set.
    cat_img_files = [os.path.join(train_cats_dir, f) for f in train_cat_fnames]
    dog_img_files = [os.path.join(train_dogs_dir, f) for f in train_dog_fnames]
    
    img_path = random.choice(cat_img_files + dog_img_files)
    img = load_img(img_path, target_size=(150, 150))
    
    x   = img_to_array(img)                           # Numpy array with shape (150, 150, 3)
    x   = x.reshape((1,) + x.shape)                   # Numpy array with shape (1, 150, 150, 3)
    
    # Rescale by 1/255
    x /= 255.0
    
    # Let's run our image through our network, thus obtaining all
    # intermediate representations for this image.
    successive_feature_maps = visualization_model.predict(x)
    
    # These are the names of the layers, so can have them as part of our plot
    layer_names = [layer.name for layer in model.layers]
    
    # -----------------------------------------------------------------------
    # Now let's display our representations
    # -----------------------------------------------------------------------
    for layer_name, feature_map in zip(layer_names, successive_feature_maps):
      
      if len(feature_map.shape) == 4:
        
        #-------------------------------------------
        # Just do this for the conv / maxpool layers, not the fully-connected layers
        #-------------------------------------------
        n_features = feature_map.shape[-1]  # number of features in the feature map
        size       = feature_map.shape[ 1]  # feature map shape (1, size, size, n_features)
        
        # We will tile our images in this matrix
        display_grid = np.zeros((size, size * n_features))
        
        #-------------------------------------------------
        # Postprocess the feature to be visually palatable
        #-------------------------------------------------
        for i in range(n_features):
          x  = feature_map[0, :, :, i]
          x -= x.mean()
          x /= x.std ()
          x *=  64
          x += 128
          x  = np.clip(x, 0, 255).astype('uint8')
          display_grid[:, i * size : (i + 1) * size] = x # Tile each filter into a horizontal grid
    
        #-----------------
        # Display the grid
        #-----------------
    
        scale = 20. / n_features
        plt.figure( figsize=(scale * n_features, scale) )
        plt.title ( layer_name )
        plt.grid  ( False )
        plt.imshow( display_grid, aspect='auto', cmap='viridis' ) 
    
    • 准确率和loss随着epoch的变化情况
    #-----------------------------------------------------------
    # Retrieve a list of list results on training and test data
    # sets for each training epoch
    #-----------------------------------------------------------
    acc      = history.history[     'acc' ]
    val_acc  = history.history[ 'val_acc' ]
    loss     = history.history[    'loss' ]
    val_loss = history.history['val_loss' ]
    
    epochs   = range(len(acc)) # Get number of epochs
    
    #------------------------------------------------
    # Plot training and validation accuracy per epoch
    #------------------------------------------------
    plt.plot  ( epochs,     acc )
    plt.plot  ( epochs, val_acc )
    plt.title ('Training and validation accuracy')
    plt.figure()
    
    #------------------------------------------------
    # Plot training and validation loss per epoch
    #------------------------------------------------
    

    【参考资料】
    1.google colab

    相关文章

      网友评论

          本文标题:kaggle猫狗大战

          本文链接:https://www.haomeiwen.com/subject/ulfpdctx.html