美文网首页计算机杂谈
百度深度学习框架PaddlePaddle初体验---线性回归

百度深度学习框架PaddlePaddle初体验---线性回归

作者: 张照博 | 来源:发表于2020-02-15 21:01 被阅读0次

    正文之前

    在家闲着也是闲着,看论文的间隙,学习一下新鲜的深度学习框架也不错,参加几次学术会议,百度都在卖力的推广这个深度学习框架,其实最青睐的就是中文的文档啊,我可太爱了~

    原文:https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/beginners_guide/basics/fit_a_line/README.cn.html

    正文

    没啥好说的, 从头看起,照着官方的文档实现了线性回归的房价预测,代码如下:

    
    from __future__ import print_function
    import paddle
    import paddle.fluid as fluid
    import numpy
    import math
    import sys
    
    BATCH_SIZE = 20
    
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.uci_housing.train(), buf_size=500
        ),
        batch_size=BATCH_SIZE
    )
    
    test_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.uci_housing.test(), buf_size=500
        ),
        batch_size=BATCH_SIZE
    )
    
    x = fluid.layers.data(name='x', shape=[13], dtype='float32') # 定义输入的形状和数据类型
    y = fluid.layers.data(name='y', shape=[1], dtype='float32') # 定义输出的形状和数据类型
    y_predict = fluid.layers.fc(input=x, size=1) # 连接输入和输出的全连接层
    
    
    main_program = fluid.default_main_program()
    starup_program = fluid.default_startup_program()
    
    # 利用标签数据和输出的预测数据估计方差
    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
    # 对方差求均值,得到平均损失
    avg_loss = fluid.layers.mean(cost)
    
    test_program = main_program.clone(for_test=True)
    
    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_loss)
    
    
    use_cuda = False
    # 指明executor的执行场所
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    ###executor可以接受传入的program,
    # 并根据feed map(输入映射表)和fetch list(结果获取表)
    # 向program中添加数据输入算子和结果获取算子。
    ###使用close()关闭该executor,调用run(...)执行program。
    exe = fluid.Executor(place)
    
    num_epochs = 100
    
    def train_test(executor, program, reader, feeder, fetch_list):
        accumulated = 1 * [0]
        count = 0
        for data_test in reader():
            outs = executor.run(program = program,
                                feed = feeder.feed(data_test),
                                fetch_list = fetch_list )
            # 累加测试过程中的损失值
            accumulated = [x_c[0] + x_c[1][0] for x_c in zip(accumulated,outs)]
            # 累加测试过程中的样本数目
            count += 1
        return [x_d / count for x_d in accumulated]
    
    
    %matplotlib inline
    params_dirname = "fit_a_line.inference.model"
    feeder = fluid.DataFeeder(place = place, feed_list=[x,y])
    exe.run(starup_program)
    train_prompt = "train cost"
    test_prompt = "test cost"
    from paddle.utils.plot import Ploter
    plot_prompt = Ploter(train_prompt,test_prompt)
    
    step = 0
    
    exe_test = fluid.Executor(place)
    
    for pass_id in range(num_epochs):
        for data_train in train_reader():
            avg_loss_value,  = exe.run(
                main_program,
                feed = feeder.feed(data_train),
                fetch_list=[avg_loss]
            )
            if step % 10 == 0:
                plot_prompt.append(train_prompt, step, avg_loss_value[0])
                plot_prompt.plot()
                print("%s, Step %d, Cost %f" %
                          (train_prompt, step, avg_loss_value[0]))
            if step % 100 == 0:  # 每100批次记录并输出一下测试损失
                test_metics = train_test(executor=exe_test,
                                         program=test_program,
                                         reader=test_reader,
                                         fetch_list=[avg_loss.name],
                                         feeder=feeder)
                plot_prompt.append(test_prompt, step, test_metics[0])
                plot_prompt.plot()
                print("%s, Step %d, Cost %f" %
                          (test_prompt, step, test_metics[0]))
                if test_metics[0] < 10.0: # 如果准确率达到要求,则停止训练
                    break
            
            step += 1
    
            if math.isnan(float(avg_loss_value[0])):
                sys.exit("got NaN loss, training failed.")
    
            #保存训练参数到之前给定的路径中
            if params_dirname is not None:
                fluid.io.save_inference_model(params_dirname, ['x'], [y_predict], exe)
    
    
    train cost, Step 1480, Cost 30.048767
    infer_exe = fluid.Executor(place)
    inference_scope = fluid.core.Scope()
    
    def save_result(points1, points2):
        import matplotlib
        matplotlib.use("Agg")
        import matplotlib.pyplot as plt
        x1 = [idx for idx in range(len(points1))]
        y1 = points1
        y2 = points2
        l1 = plt.plot(x1, y1, 'r--', label='predictions')
        l2 = plt.plot(x1, y2, 'g--', label='GT')
        plt.plot(x1, y1, 'ro-', x1, y2, 'g+-')
        plt.title('predictions VS GT')
        plt.legend()
        plt.savefig('image/prediction_gt.png')
    
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names, 
         fetch_targets] = fluid.io.load_inference_model(params_dirname, infer_exe)
        #准备测试集
        infer_reader = paddle.batch(
            paddle.dataset.uci_housing.test(), batch_size = BATCH_SIZE)
        
        infer_data = next(infer_reader())
        
        print(infer_data[0][0])
        print(infer_data[0][1])
    
        infer_feat = numpy.array(
            [data[0] for data in infer_data]).astype("float32") # 提取测试集中的数据
        infer_label = numpy.array(
            [data[1] for data in infer_data]).astype("float32") # 提取测试集中的标签
        
        assert feed_target_names[0] == 'x'
        results = infer_exe.run(inference_program,
                                feed={feed_target_names[0]: numpy.array(infer_feat)},
                                fetch_list=fetch_targets) # 进行预测
       
        #打印预测结果和标签并且可视化结果:
        print("infer results: (House Price)")
        for idx, val in enumerate(results[0]):
            print("%d: %.2f" % (idx, val)) # 打印预测结果
    
        print("\nground truth:")
        for idx, val in enumerate(infer_label):
            print("%d: %.2f" % (idx, val)) # 打印标签值
    
        save_result(results[0], infer_label) # 保存图片
        
    
    [ 0.42616306 -0.11363636  0.25525005 -0.06916996  0.28457807 -0.14440207
      0.17327599 -0.19893267  0.62828665  0.49191383  0.18558153 -0.0686218
      0.40637243]
    [8.5]
    infer results: (House Price)
    0: 13.95
    1: 14.37
    2: 13.85
    3: 15.86
    4: 14.36
    5: 15.01
    6: 14.24
    7: 13.94
    8: 11.17
    9: 14.05
    10: 10.63
    11: 12.58
    12: 13.36
    13: 12.90
    14: 12.77
    15: 14.03
    16: 15.73
    17: 15.54
    18: 15.77
    19: 13.48
    
    ground truth:
    0: 8.50
    1: 5.00
    2: 11.90
    3: 27.90
    4: 17.20
    5: 27.50
    6: 15.00
    7: 17.20
    8: 17.90
    9: 16.30
    10: 7.00
    11: 7.20
    12: 7.50
    13: 10.40
    14: 8.80
    15: 8.40
    16: 16.70
    17: 14.20
    18: 20.80
    19: 13.40
    

    正文之后

    好久没写过简书了,今天看到了那啥日更挑战,就来注水吧~

    相关文章

      网友评论

        本文标题:百度深度学习框架PaddlePaddle初体验---线性回归

        本文链接:https://www.haomeiwen.com/subject/kaoyfhtx.html