美文网首页
(四)tensorflow--猫狗分类

(四)tensorflow--猫狗分类

作者: 计算机视觉__掉队选手 | 来源:发表于2019-03-24 19:36 被阅读0次

    猫狗数据集介绍

    对kaggle中的猫狗数据集使用Alexnet网络进行训练,该数据集包括25000张训练图片,12500张测试图片,包括猫和狗两种图片。

    代码整体介绍

    1.alexnet.py:定义alexnet网络
    2.datagenerator.py: 对数据集做预处理,定义输入的方式
    3.validate_image.py:对图像进行测试
    4.main.py:主函数,在训练集上训练
    alexnet.py

    import tensorflow as tf
    def alexnet(x,keep_prob,num_classes):
        #conv1
        with tf.name_scope('conv1') as scope:
            kernel = tf.Variable(tf.truncated_normal([11,11,3,96],dtype=tf.float32,stddev=1e-1),name='weights')
            conv = tf.nn.conv2d(x,kernel,[1,4,4,1],padding='SAME')
            biases = tf.Variable(tf.constant(0.0,shape=[96],dtype=tf.float32),trainable=True,name='biases')
            bias = tf.nn.bias_add(conv,biases)
            conv1 = tf.nn.relu(bias,name=scope)
        #lr1
        with tf.name_scope('lrn1') as scope:
            lrn1 = tf.nn.local_response_normalization(conv1,alpha=1e-4,beta=0.75,depth_radius=2,bias=2.0)
        #pool1
        with tf.name_scope('pool1') as scope:
            pool1 = tf.nn.max_pool(lrn1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
        #conv2
        with tf.name_scope('conv2') as scope:
            pool1_groups = tf.split(axis=3,value=pool1,num_or_size_splits=2)
            kernel = tf.Variable(tf.truncated_normal([5,5,48,256],dtype=tf.float32,stddev=1e-1),name='weights')
            kernel_groups = tf.split(axis=3,value=kernel,num_or_size_splits=2)
            conv_up = tf.nn.conv2d(pool1_groups[0],kernel_groups[0],[1,1,1,1],padding='SAME')
            conv_down = tf.nn.conv2d(pool1_groups[1],kernel_groups[1],[1,1,1,1],padding='SAME')
            biases = tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
            biases_groups = tf.split(axis=0,value=biases,num_or_size_splits=2)
            bias_up = tf.nn.bias_add(conv_up,biases_groups[0])
            bias_down = tf.nn.bias_add(conv_down,biases_groups[1])
            bias = tf.concat(axis=3,values=[bias_up,bias_down])
            conv2 = tf.nn.relu(bias,name=scope)
        #lrn2
        with tf.name_scope('lrn2') as scope:
            lrn2 = tf.nn.local_response_normalization(conv2,alpha=1e-4,beta=0.75,depth_radius=2,bias=2.0)
        #pool2
        with tf.name_scope('pool2') as scope:
            pool2 = tf.nn.max_pool(lrn2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
        #conv3
        with tf.name_scope('conv3') as scope:
            kernel = tf.Variable(tf.truncated_normal([3,3,256,384],dtype=tf.float32,stddev=1e-1),name='weights')
            conv = tf.nn.conv2d(pool2,kernel,[1,1,1,1],padding='SAME')
            biases = tf.Variable(tf.constant(0.0,shape=[384],dtype=tf.float32),trainable=True,name='biases')
            bias = tf.nn.bias_add(conv,biases)
            conv3 = tf.nn.relu(bias,name=scope)
        
        with tf.name_scope("conv4") as scope:
            conv3_groups = tf.split(axis=3,value=conv3,num_or_size_splits=2)
            kernel = tf.Variable(tf.truncated_normal([3,3,192,384],dtype=tf.float32,stddev=1e-1),name='weights')
            kernel_groups = tf.split(axis=3,value=kernel,num_or_size_splits=2)
            conv_up = tf.nn.conv2d(conv3_groups[0],kernel_groups[0],[1,1,1,1],padding="SAME")
            conv_down = tf.nn.conv2d(conv3_groups[1],kernel_groups[1],[1,1,1,1],padding="SAME")
            biases = tf.Variable(tf.constant(0.0,shape=[384],dtype=tf.float32),trainable=True,name='biases')
            biases_groups = tf.split(axis=0,value=biases,num_or_size_splits=2)
            bias_up = tf.nn.bias_add(conv_up,biases_groups[0])
            bias_down = tf.nn.bias_add(conv_down,biases_groups[1])
            bias = tf.concat(axis=3,values=[bias_up,bias_down])
            conv4 = tf.nn.relu(bias,name=scope)
    
        with tf.name_scope("conv5") as scope:
            conv4_groups = tf.split(axis=3,value=conv4,num_or_size_splits=2)
            kernel = tf.Variable(tf.truncated_normal([3,3,192,256],dtype=tf.float32,stddev=1e-1),name='weights')
            kernel_groups = tf.split(axis=3,value=kernel,num_or_size_splits=2)
            conv_up = tf.nn.conv2d(conv4_groups[0],kernel_groups[0],[1,1,1,1],padding='SAME')
            conv_down = tf.nn.conv2d(conv4_groups[1],kernel_groups[1],[1,1,1,1],padding='SAME')
            biases = tf.Variable(tf.constant(0.0,shape=[256],dtype=tf.float32),trainable=True,name='biases')
            biases_groups = tf.split(axis=0,value=biases,num_or_size_splits=2)
            bias_up = tf.nn.bias_add(conv_up,biases_groups[0])
            bias_down = tf.nn.bias_add(conv_down,biases_groups[1])
            bias  = tf.concat(axis=3,values=[bias_up,bias_down])
            conv5 = tf.nn.relu(bias,name=scope)
    
        with tf.name_scope("pool5") as scope:
            pool5 = tf.nn.max_pool(conv5,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
        with tf.name_scope("flattened6") as scope:
            flattened = tf.reshape(pool5,shape=[-1,6*6*256])
        with tf.name_scope("fc6") as scope:
            weights = tf.Variable(tf.truncated_normal([6*6*256,4096],dtype=tf.float32,stddev=1e-1),name='weights')
            biases = tf.Variable(tf.constant(0.0,shape=[4096],dtype=tf.float32),trainable=True,name='biases')
            bias = tf.nn.xw_plus_b(flattened,weights,biases)
            fc6 = tf.nn.relu(bias)
    
        with tf.name_scope("dropout6") as scope:
            dropout6 = tf.nn.dropout(fc6,keep_prob)
        
        with tf.name_scope("fc7") as scope:
            weights = tf.Variable(tf.truncated_normal([4096,4096],dtype=tf.float32,stddev=1e-1),name='weights')
            biases = tf.Variable(tf.constant(0.0,shape=[4096],dtype=tf.float32),trainable=True,name='biases')
            bias = tf.nn.xw_plus_b(dropout6,weights,biases)
            fc7 = tf.nn.relu(bias)
        with tf.name_scope("dropout7") as scope:
            dropout7 = tf.nn.dropout(fc7,keep_prob)
    
        with tf.name_scope("fc8") as scope:
            weights = tf.Variable(tf.truncated_normal([4096,num_classes],dtype=tf.float32,stddev=1e-1),name='weights')
            biases = tf.Variable(tf.constant(0.0,shape=[num_classes],dtype=tf.float32),trainable=True,name='biases')
            fc8 = tf.nn.xw_plus_b(dropout7,weights,biases)
            
        return fc8
    
    

    datagenerator.py

    import numpy as np
    import tensorflow as tf
    from tensorflow.python.framework import dtypes
    from tensorflow.python.framework.ops import convert_to_tensor
    from tensorflow.data import Dataset
    VGG_MEAN = tf.constant([123.68,116.779,103.939],dtype=tf.float32)
    #图片数据转换为三维数据
    class ImageDataGenerator(object):
        def __init__(self,images,labels,batch_size,num_classes,image_format='jpg',shuffle=True):
            self.img_paths = images
            self.labels = labels
            self.data_size = len(self.labels)
            self.num_classes = num_classes
            self.image_format = image_format
            if shuffle:
                self._shuffle_lists()
    
            self.img_paths = convert_to_tensor(self.img_paths,dtype=dtypes.string)
            self.labels = convert_to_tensor(self.labels,dtype=dtypes.int32)
            data = tf.data.Dataset.from_tensor_slices((self.img_paths,self.labels))
            data = data.map(self._parse_function_train)
            data = data.batch(batch_size)
            self.data = data
    
        def _shuffle_lists(self):
            path = self.img_paths
            labels = self.labels
            permutation = np.random.permutation(self.data_size)
            self.img_paths = []
            self.labels = []
            for i in permutation:
                self.img_paths.append(path[i])
                self.labels.append(labels[i])
        def _parse_function_train(self,filename,label):
            one_hot = tf.one_hot(label,self.num_classes)
            img_string = tf.read_file(filename)
            if self.image_format == "jpg":
                img_decoded = tf.image.decode_jpeg(img_string,channels=3)
            elif self.image_format == "png":
                img_decoded = tf.image.decode_png(img_string,channels=3)
            else:
                print("Error")
            img_resized = tf.image.resize_images(img_decoded,[227,227])
            img_centered  = tf.subtract(img_resized,VGG_MEAN)
            img_bgr = img_centered[:,:,::-1]
            return img_bgr,one_hot
    
    

    main.py

    import numpy as np
    from VGG16_model import vgg16
    import os
    import tensorflow as tf
    from alexnet import alexnet
    from datagenerator import ImageDataGenerator
    from datetime import datetime
    import glob
    from tensorflow.data import Iterator
    def main():
        #超参数
        learning_rate = 1e-3
        num_epochs = 1
        train_batch_size = 8
        dropout_rate = 0.5
        num_classes = 2
        #format_size = [120,120]
        display_step = 20
        filewriter_path = './tsboard/'
        checkpoint_path = './checkpoints/'
        file_name_of_class = ['cat','dog']
        image_format = "jpg"
        train_dataset_paths="/home/dataset/kaggle/train/"
        #训练数据预处理
        train_image_paths = []
        train_labels = []
        train_image_paths = np.array(glob.glob(train_dataset_paths+'*.'+image_format)).tolist()
        print("train_image_length:",len(train_image_paths))
        for image_path in train_image_paths:
            image_file_name = image_path.split('/')[-1]
            for i in range(num_classes):
                if file_name_of_class[i] in image_file_name:
                    train_labels.append(i)
                    break
        #调用生成器
        train_data = ImageDataGenerator(
            images = train_image_paths,
            labels = train_labels,
            batch_size = train_batch_size,
            num_classes = num_classes,
            image_format = image_format,
            shuffle = True)
        #定义迭代器
        print(train_data.data.output_types,train_data.data.output_shapes)
        with tf.name_scope("input"):
            train_iterator = Iterator.from_structure(train_data.data.output_types,train_data.data.output_shapes)
            training_initalizer = train_iterator.make_initializer(train_data.data)
            train_next_batch =  train_iterator.get_next()
        x = tf.placeholder(tf.float32,[None,227,227,3]) 
        y = tf.placeholder(tf.float32,[None,num_classes])
        keep_prob = tf.placeholder(tf.float32)
        #定义alexnet网络
        logits = alexnet(x,keep_prob,num_classes)
        #定义vgg网络
        #x = tf.image.resize_images(x,format_size)
        #logits = vgg16(x,num_classes,isTrain=True,keep_prob=0.6)
        with tf.name_scope("loss"):
            loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=y))
        with tf.name_scope('optimizer'):
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
            train_op = optimizer.minimize(loss_op)
        train_prediction = tf.nn.softmax(logits)
        init = tf.global_variables_initializer()
        '''
        #tensorboard
        tf.summary.scalar('loss',loss_op)
        merged_summary = tf.summary.merge_all()
        writer = tf.summary.FileWriter(filewriter_path)
        '''
        saver = tf.train.Saver()
        train_batches_per_epoch = int(np.floor(train_data.data_size/train_batch_size))
        print(train_data.data_size)
        print(train_batches_per_epoch)
        with tf.Session() as sess:
            sess.run(init)
            #writer.add_graph(sess.graph)
            print("{}: start training...".format(datetime.now()))
            print("{}: openning tensorboard at --logdir{}".format(datetime.now(),filewriter_path))
            for epoch in range(num_epochs):
                sess.run(training_initalizer)
                print("{}:epoch number:{} start".format(datetime.now(),epoch+1))
                for step in range(500):
                    img_batch,label_batch = sess.run(train_next_batch)
                    loss,_,predictions = sess.run([loss_op,train_op,train_prediction],feed_dict={x:img_batch,y:label_batch,keep_prob:dropout_rate})
                    if step % display_step == 0:
                        print("{}:loss={}".format(datetime.now(),loss))
                        print("accuracy = {}".format(accuracy(predictions,label_batch)))
                        #s = sess.run(merged_summary,feed_dict={x:img_batch,y:label_batch,keep_prob:1.})
                        #writer.add_summary(s,epoch*train_batches_per_epoch+step)
                #save model
                print("{}:saving checkpoint of model...".format(datetime.now()))
                checkpoint_name = os.path.join(checkpoint_path,'model_epoch' + str(epoch+1)+'.ckpt')
                save_path = saver.save(sess,checkpoint_name)
                print("{}:epoch number:{} end".format(datetime.now(),epoch+1))
    def accuracy(predictions,labels):
        return 100.0*np.sum(np.argmax(predictions,1)==np.argmax(labels,1))/predictions.shape[0]
        
    
    if __name__ == "__main__":
    
        main()
    

    validate_image.py

    import tensorflow as tf
    from VGG16_model import vgg16
    from alexnet import alexnet
    import matplotlib.pyplot as plt
    class_name = ['cat','dog']
    def test_image(path_image,num_class):
        img_string = tf.read_file(path_image)
        img_decoded = tf.image.decode_png(img_string,channels=3)
        img_resized = tf.image.resize_images(img_decoded,[120,120])
        img_resized = tf.reshape(img_resized,shape=[1,120,120,3])
        fc8 = alexnet(img_resized,1,2)
        #vgg = vgg16(img_resized,num_class,return_all=True)
        #score = tf.nn.softmax(vgg[-3])
            score = tf.nn.softmax(fc8)
        print("score:",score)
        max = tf.argmax(score,1)
        saver = tf.train.Saver()
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            saver.restore(sess,'./checkpoints/model_epoch1.ckpt')
            #print(sess.run(vgg))
            #print("score",sess.run(score))
            prob = sess.run(max)[0]
            plt.imshow(img_decoded.eval())
            plt.title("class:"+class_name[prob])
            plt.show()
    if __name__ == "__main__":
        test_image("/home/dataset/kaggle/test1/3572.jpg",num_class=2)
    

    相关文章

      网友评论

          本文标题:(四)tensorflow--猫狗分类

          本文链接:https://www.haomeiwen.com/subject/krfpvqtx.html