3、1 图像数据标准化
显示数据
绘制一个未经标准化的图像
创建一个3*3的图像表格
ax = plt.subplot(???)
分割子图 参数可以连续写 也可以逗号分割
前两个参数表示行数和列数
也可以使用:
figure,ax=plt.subplots(2,2)
ax[0][0].plot(t,s,'r*')
ax[0][1].plot(t*2,s,'b--')
figure.show()
导入数据
from keras.datasets import mnist
import matplotlib.pyplot as plt
(x_train,y_train),(x_test,y_test) = mnist.load_data()
for i in range(9):
ax = plt.subplot(330+1+i)
plt.tight_layout()
ax.tick_params(axis='x',colors='white')
ax.tick_params(axis='y',colors='white')
plt.imshow(x_train[i],cmap=plt.get_cmap('gray'))
plt.show()
未处理
接下来使用ImageDataGenerator对该图进行特征标准化处理。
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
K.set_image_dim_ordering('th')
x_train = x_train.reshape(x_train.shape[0],1,28,28) # (60000,1,28,28)
x_test = x_test.reshape(x_test.shape[0],1,28,28)
# 转为float数组
# print(type(x_train)) <class 'numpy.ndarray'>
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
# 定义data preparation
datagen = ImageDataGenerator(featurewise_center=True # 将输入数据集的均值设置为0
,featurewise_std_normalization=True # 将输入除以数据集标准差
,samplewise_center=True # 样本均值都初始化为0
,samplewise_std_normalization=True) # 将输入除以每个样本自身标准差
# fit parameters from data
datagen.fit(x_train)
for x_batch,y_batch in datagen.flow(x_train,y_train,batch_size=9):
print(x_batch.shape)
print(type(x_batch))
for i in range(9):
ax = plt.subplot(330+1+i)
plt.tight_layout()
ax.tick_params(axis='x',colors='white')
ax.tick_params(axis='y',colors='white')
plt.imshow(x_batch[i].reshape(28,28),cmap=plt.get_cmap('gray'))
plt.show()
break
处理后图片
3.2 序列扩充
定义需要填充的序列
from keras.preprocessing.sequence import pad_sequences
# 定义需要填充的序列
sequences = [
[1,2,3,4],
[5,6,7],
[8]
]
默认填充
# 默认填充
padded = pad_sequences(sequences)
print(padded)
[[1 2 3 4]
[0 5 6 7]
[0 0 0 8]]
后填充
# 后填充
padded_post = pad_sequences(sequences,padding='post')
print(padded_post)
[[1 2 3 4]
[5 6 7 0]
[8 0 0 0]]
截断填充
padded_maxlen_pre = pad_sequences(sequences,maxlen=3,truncating='pre')
print(padded_maxlen_pre)
padded_maxlen_post = pad_sequences(sequences,maxlen=3,truncating='post')
print(padded_maxlen_post)
[[2 3 4]
[5 6 7]
[0 0 8]]
[[1 2 3]
[5 6 7]
[0 0 8]]
非默认值填充
# 非默认值填充
padded_value = pad_sequences(sequences,value=1.0)
print(padded_value)
[[1 2 3 4]
[1 5 6 7]
[1 1 1 8]]
3.3 示例通用代码
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.optimizers import SGD
batch_size = 128
num_classs =10
epochs = 20
(x_train,y_train),(x_test,y_test) = mnist.load_data()
x_train = x_train.reshape(60000,784)
x_test = x_test.reshape(10000,784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_test = x_test/255.
x_train = x_train/255.
y_train = keras.utils.to_categorical(y_train,num_classs)
y_test = keras.utils.to_categorical(y_test,num_classs)
model = Sequential()
model.add(Dense(512,activation='relu',input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classs,activation='softmax'))
model.summary()
sgd = SGD(lr=0.01,decay=1e-6,momentum=0.9,nesterov=True)
model.compile(loss='categorical_crossentropy'
,optimizer=sgd
,metrics=['accuracy'])
history = model.fit(x_train,y_train
,epochs=20
,batch_size=batch_size
,verbose=1
,validation_data=(x_test,y_test))
计算模型的准确率和损失
print(history.history.keys())
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
import matplotlib.pyplot as plt
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'],loc='upper left') # 设置图片标签
plt.show()
准确率变化
网友评论