import tensorflow as tf
import h5py
import numpy as np
INPUT_NODE = 12288
OUTPUT_NODE = 1
LAYER_DIMS = [12288, 20, 7, 5, 1]
# 初始化参数
def initialize_parameters(layer_dims):
tf.set_random_seed(1)
parameters = {}
L = len(layer_dims)
for l in range(1, L):
parameters['W' + str(l)] = tf.Variable(tf.random_normal([layer_dims[l-1], layer_dims[l]], stddev=1), name='w'+str(l))
parameters['b' + str(l)] = tf.Variable(tf.zeros([layer_dims[l]]), name='b' + str(l))
return parameters
# 前向传播过程
def inference(input_tensor, parameters):
A = input_tensor
L = len(parameters) // 2
for l in range(1, L):
A_prev = A
A = tf.nn.tanh(tf.matmul(A_prev, parameters['W'+str(l)]) + parameters['b' + str(l)])
AL = tf.nn.sigmoid(tf.matmul(A, parameters['W'+str(L)]) + parameters['b' + str(L)])
return AL
def train(X, Y, learning_rate=0.0075, num_iterations=2500, print_cost= False):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
parameters = initialize_parameters(LAYER_DIMS)
y = inference(x, parameters)
loss = - tf.reduce_mean(
tf.log(tf.clip_by_value(y, 1e-10, 1.0)) * y_ + tf.log(tf.clip_by_value((1 - y), 1e-10, 1.0)) * (1 - y_))
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
acc = evaluate(Y, y)
saver = tf.train.Saver()
with tf.Session() as sess:
tf.global_variables_initializer().run()
for i in range(num_iterations):
sess.run(train_step, feed_dict={x: X, y_:Y})
if i % 100 == 0:
total_loss = sess.run(loss, feed_dict={x:X, y_:Y})
train_acc = sess.run(acc, feed_dict={x:X, y_:Y})
print('Cost after interation %i: %f, train accuracy is %f' % (i, total_loss, train_acc))
saver.save(sess, './model/model.ckpt')
def predict(X, Y):
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
parameters = initialize_parameters(LAYER_DIMS)
y = inference(x, parameters)
test_acc = evaluate(Y, y)
saver = tf.train.Saver()
with tf.Session() as sess:
saver.restore(sess, './model/model.ckpt')
print('test accuracy is %f' %(sess.run(test_acc, feed_dict={x: X, y_: Y})))
def evaluate(Y, y):
ones = tf.ones_like(y)
zeros = tf.zeros_like(y)
predictions = tf.where(y < 0.5, x=zeros, y=ones)
labels = tf.cast(Y, tf.float32)
acc = tf.reduce_mean(tf.cast(tf.equal(labels, predictions), tf.float32))
return acc
def load_data():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
if __name__ == '__main__':
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1)
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1)
# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten / 255.
test_x = test_x_flatten / 255.
print("train_x's shape: " + str(train_x.shape))
print("test_x's shape: " + str(test_x.shape))
train(train_x, train_y.T)
按照这个网络结构,loss下降不明显,改成两层的反而下降明显
网友评论