美文网首页TensorFlowkeras深度学习模型
3、数据预处理、优化、可视化

3、数据预处理、优化、可视化

作者: 上行彩虹人 | 来源:发表于2019-07-07 17:15 被阅读0次

    3、1 图像数据标准化

    显示数据
    绘制一个未经标准化的图像
    创建一个3*3的图像表格

    ax = plt.subplot(???)
    

    分割子图 参数可以连续写 也可以逗号分割
    前两个参数表示行数和列数
    也可以使用:

    figure,ax=plt.subplots(2,2)
    ax[0][0].plot(t,s,'r*')
    ax[0][1].plot(t*2,s,'b--')
    figure.show()
    

    导入数据

    from keras.datasets import mnist
    import matplotlib.pyplot as plt
    (x_train,y_train),(x_test,y_test) = mnist.load_data()
    
    for i in range(9):
        ax = plt.subplot(330+1+i)
        plt.tight_layout()
        ax.tick_params(axis='x',colors='white')
        ax.tick_params(axis='y',colors='white')
    
        plt.imshow(x_train[i],cmap=plt.get_cmap('gray'))
    
    plt.show()
    
    未处理

    接下来使用ImageDataGenerator对该图进行特征标准化处理。

    from keras.preprocessing.image import ImageDataGenerator
    from keras import backend as K 
    K.set_image_dim_ordering('th')
    
    x_train = x_train.reshape(x_train.shape[0],1,28,28) # (60000,1,28,28)
    x_test = x_test.reshape(x_test.shape[0],1,28,28)
    
    # 转为float数组
    # print(type(x_train)) <class 'numpy.ndarray'>
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    
    # 定义data preparation
    datagen = ImageDataGenerator(featurewise_center=True # 将输入数据集的均值设置为0
                                ,featurewise_std_normalization=True # 将输入除以数据集标准差 
                                ,samplewise_center=True # 样本均值都初始化为0
                                ,samplewise_std_normalization=True) # 将输入除以每个样本自身标准差
    
    # fit parameters from data
    datagen.fit(x_train)
    
    for x_batch,y_batch in datagen.flow(x_train,y_train,batch_size=9):
        print(x_batch.shape)
        print(type(x_batch))
    
        for i in range(9):
            ax = plt.subplot(330+1+i)
            plt.tight_layout()
            ax.tick_params(axis='x',colors='white')
            ax.tick_params(axis='y',colors='white')
            plt.imshow(x_batch[i].reshape(28,28),cmap=plt.get_cmap('gray'))
    
        plt.show()
        break
    
    处理后图片

    3.2 序列扩充

    定义需要填充的序列

    from keras.preprocessing.sequence import pad_sequences
    # 定义需要填充的序列
    sequences = [
    [1,2,3,4],
    [5,6,7],
    [8]
    ]
    

    默认填充

    # 默认填充
    padded = pad_sequences(sequences)
    print(padded)
    

    [[1 2 3 4]
    [0 5 6 7]
    [0 0 0 8]]
    后填充

    # 后填充 
    padded_post = pad_sequences(sequences,padding='post')
    print(padded_post)
    

    [[1 2 3 4]
    [5 6 7 0]
    [8 0 0 0]]
    截断填充

    padded_maxlen_pre = pad_sequences(sequences,maxlen=3,truncating='pre')
    print(padded_maxlen_pre)
    
    padded_maxlen_post = pad_sequences(sequences,maxlen=3,truncating='post')
    print(padded_maxlen_post)
    

    [[2 3 4]
    [5 6 7]
    [0 0 8]]
    [[1 2 3]
    [5 6 7]
    [0 0 8]]
    非默认值填充

    # 非默认值填充
    padded_value = pad_sequences(sequences,value=1.0)
    print(padded_value)
    

    [[1 2 3 4]
    [1 5 6 7]
    [1 1 1 8]]

    3.3 示例通用代码

    from __future__ import print_function
    import keras 
    from keras.datasets import mnist
    from keras.models import Sequential
    from keras.layers import Dense,Dropout
    from keras.optimizers import SGD
    
    batch_size = 128
    num_classs =10
    epochs = 20
    (x_train,y_train),(x_test,y_test) = mnist.load_data()
    
    x_train = x_train.reshape(60000,784)
    x_test = x_test.reshape(10000,784)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_test = x_test/255.
    x_train = x_train/255.
    
    y_train = keras.utils.to_categorical(y_train,num_classs)
    y_test = keras.utils.to_categorical(y_test,num_classs)
    
    
    model = Sequential()
    model.add(Dense(512,activation='relu',input_shape=(784,)))
    model.add(Dropout(0.2))
    model.add(Dense(512,activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_classs,activation='softmax'))
    
    model.summary()
    sgd = SGD(lr=0.01,decay=1e-6,momentum=0.9,nesterov=True)
    
    model.compile(loss='categorical_crossentropy'
                ,optimizer=sgd
                ,metrics=['accuracy'])
    
    history = model.fit(x_train,y_train
                        ,epochs=20
                        ,batch_size=batch_size
                        ,verbose=1
                        ,validation_data=(x_test,y_test))
    

    计算模型的准确率和损失

    print(history.history.keys())
    

    dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

    import matplotlib.pyplot as plt
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train','test'],loc='upper left') # 设置图片标签
    plt.show()
    
    准确率变化

    相关文章

      网友评论

        本文标题:3、数据预处理、优化、可视化

        本文链接:https://www.haomeiwen.com/subject/mdzbhctx.html