美文网首页
TensorFlow(6)kaggle Digit Recogn

TensorFlow(6)kaggle Digit Recogn

作者: 操作系统 | 来源:发表于2017-08-05 12:53 被阅读0次

    下载、读取并展示数据

    下载

    读取

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import tensorflow as tf
    
    data = pd.read_csv("train.csv")
    data.head(3)
    
    dataset = data.iloc[:,1:]   # 提取特征
    dataset.head()
    
    label = data.iloc[:,0] # 提取标签
    label.head()
    
    
    # 将Pandas的DataFrame数据类型转换为矩阵
    trainset = dataset.as_matrix(columns=None)
    labelset = label.as_matrix(columns=None)
    
    
    # 随机取5行数据,已28*28像素的方式展示出来
    nsample = 5
    randidx = np.random.randint(trainset.shape[0], size = nsample)
    for i in randidx:
        curr_img    = np.reshape(trainset[i,:],(28,28)) # 28 by 28 matrix
        curr_label  = labelset[i] # label
        plt.matshow(curr_img,cmap=plt.get_cmap('gray'))
        plt.title(""+str(i)+"th Training Data" + "Label is "+str(curr_label))
        print(""+str(i)+"th Training Data" + "Label is "+str(curr_label))
        plt.show()
    

    图片展示

    数据预处理

    from sklearn import preprocessing
    # 标签值预处理:数值型转换为onehot型 
    ohe = preprocessing.OneHotEncoder()
    ohe.fit([[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]])
    labelset.shape=(42000,1)
    labelset=ohe.transform(labelset).toarray()
    
    
    label.head()
    labelset[0:4,:]
    
    # 特征值预处理:缩放到0-1区间
    min_max_scaler = preprocessing.MinMaxScaler()
    trainset = min_max_scaler.fit_transform(trainset)
    trainset[1,:]
    

    数据拟合

    采用逻辑回归模型

    x = tf.placeholder("float", [None, 784]) 
    y = tf.placeholder("float", [None, 10])  # None is for infinite 
    W = tf.Variable(tf.zeros([784, 10]))
    b = tf.Variable(tf.zeros([10]))
    # LOGISTIC REGRESSION MODEL
    actv = tf.nn.softmax(tf.matmul(x, W) + b) 
    # COST FUNCTION
    cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(actv), reduction_indices=1)) 
    # OPTIMIZER
    learning_rate = 0.01
    optm = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
    
    # PREDICTION
    pred = tf.equal(tf.argmax(actv, 1), tf.argmax(y, 1))   
    # ACCURACY
    accr = tf.reduce_mean(tf.cast(pred, "float"))
    # INITIALIZER
    init = tf.global_variables_initializer()
    
    training_epochs = 50
    batch_size      = 100
    display_step    = 5
    # SESSION
    sess = tf.Session()
    sess.run(init)
    # MINI-BATCH LEARNING
    for epoch in range(training_epochs):
        avg_cost = 0.
        num_batch = int(trainset.shape[0]/batch_size)
        for i in range(num_batch): 
            batch_xs = trainset[batch_size*i:batch_size*(i+1),:]
            batch_ys = labelset[batch_size*i:batch_size*(i+1)]
            #batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            sess.run(optm, feed_dict={x: batch_xs, y: batch_ys})
            feeds = {x: batch_xs, y: batch_ys}
            avg_cost += sess.run(cost, feed_dict=feeds)/num_batch
        # DISPLAY
        if epoch % display_step == 0:
            feeds_train = {x: batch_xs, y: batch_ys}
            #feeds_test = {x: mnist.test.images, y: mnist.test.labels}
            train_acc = sess.run(accr, feed_dict=feeds_train)
            #test_acc = sess.run(accr, feed_dict=feeds_test)
            print ("Epoch: %03d/%03d cost: %.9f train_acc: %.3f " 
                   % (epoch, training_epochs, avg_cost, train_acc))
    print ("DONE")
    

    将数据集与MNIST数据集合并

    trainbig = np.vstack((trainset,trainimg))
    labelbig = np.vstack((labelset,trainlabel))
    trainbig = np.vstack((trainbig,testimg))
    labelbig = np.vstack((labelbig,testlabel))
    trainbig.shape
    #(107000, 784)
    labelbig.shape
    #(10700,10)
    np.save("trainbig.npy",trainbig) #save
    np.save("labelbig.npy",labelbig)
    

    相关文章

      网友评论

          本文标题:TensorFlow(6)kaggle Digit Recogn

          本文链接:https://www.haomeiwen.com/subject/zauilxtx.html