美文网首页编程
Python基础学习16

Python基础学习16

作者: ericblue | 来源:发表于2019-02-21 08:57 被阅读0次

    Estimator算法程序官网介绍
    数据集说明:
    鸢尾花数据集包含四个特征和一个标签这四个特征确定了单株鸢尾花的下列植物学特征:

    • 花萼长度
    • 花萼宽度
    • 花瓣长度
    • 花瓣宽度

    模型会将这些特征表示为 float32 数值数据。

    该标签确定了鸢尾花品种,品种必须是下列任意一种:

    • 山鸢尾 (0)
    • 变色鸢尾 (1)
    • 维吉尼亚鸢尾 (2)

    模型会将该标签表示为 int32 分类数据。

    下表显示了数据集中的三个样本:
    花萼长度 花萼宽度 花瓣长度 花瓣宽度 品种(标签)
    5.1 3.3 1.7 0.5 0(山鸢尾)
    5.0 2.3 3.3 1.0 1(变色鸢尾)
    6.4 2.8 5.6 2.2 2(维吉尼亚鸢尾)

    实际数据集存储格式

    样例premade_estimator.py源码

    #  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
    #
    #  Licensed under the Apache License, Version 2.0 (the "License");
    #  you may not use this file except in compliance with the License.
    #  You may obtain a copy of the License at
    #
    #   http://www.apache.org/licenses/LICENSE-2.0
    #
    #  Unless required by applicable law or agreed to in writing, software
    #  distributed under the License is distributed on an "AS IS" BASIS,
    #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    #  See the License for the specific language governing permissions and
    #  limitations under the License.
    """An Example of a DNNClassifier for the Iris dataset."""
    from __future__ import absolute_import
    from __future__ import division
    from __future__ import print_function
    
    import argparse
    import tensorflow as tf
    
    import iris_data
    
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch_size', default=100, type=int, help='batch size')
    parser.add_argument('--train_steps', default=1000, type=int,
                        help='number of training steps')
    
    def main(argv):
        args = parser.parse_args(argv[1:])
    
        # Fetch the data
        (train_x, train_y), (test_x, test_y) = iris_data.load_data()
    
        # Feature columns describe how to use the input.
        my_feature_columns = []
        for key in train_x.keys():
            my_feature_columns.append(tf.feature_column.numeric_column(key=key))
    
        # Build 2 hidden layer DNN with 10, 10 units respectively.
        classifier = tf.estimator.DNNClassifier(
            feature_columns=my_feature_columns,
            # Two hidden layers of 10 nodes each.
            hidden_units=[10, 10],
            # The model must choose between 3 classes.
            n_classes=3)#输出层结果分为3类
    
        # Train the Model.
        classifier.train(
            input_fn=lambda:iris_data.train_input_fn(train_x, train_y,
                                                     args.batch_size),
            steps=args.train_steps)
    
        # Evaluate the model.
        eval_result = classifier.evaluate(
            input_fn=lambda:iris_data.eval_input_fn(test_x, test_y,
                                                    args.batch_size))
    
        print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
    
        # Generate predictions from the model
        expected = ['Setosa', 'Versicolor', 'Virginica']
        predict_x = {
            'SepalLength': [5.1, 5.9, 6.9],
            'SepalWidth': [3.3, 3.0, 3.1],
            'PetalLength': [1.7, 4.2, 5.4],
            'PetalWidth': [0.5, 1.5, 2.1],
        }
    
        predictions = classifier.predict(
            input_fn=lambda:iris_data.eval_input_fn(predict_x,
                                                    labels=None,
                                                    batch_size=args.batch_size))
    
        template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
    
        for pred_dict, expec in zip(predictions, expected):
            class_id = pred_dict['class_ids'][0]
            probability = pred_dict['probabilities'][class_id]
    
            print(template.format(iris_data.SPECIES[class_id],
                                  100 * probability, expec))
    
    
    if __name__ == '__main__':
        tf.logging.set_verbosity(tf.logging.INFO)
        tf.app.run(main)
    

    iris_data.py源码

    import pandas as pd
    import tensorflow as tf
    
    TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
    TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
    
    CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                        'PetalLength', 'PetalWidth', 'Species']
    SPECIES = ['Setosa', 'Versicolor', 'Virginica']
    
    def maybe_download():
        train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
        test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
    
        return train_path, test_path
    
    def load_data(y_name='Species'):
        """Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
        train_path, test_path = maybe_download()
    
        train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)#读入CSV数据
        train_x, train_y = train, train.pop(y_name)#train_x是载入数据表前四个字段,train_y是最后一个字段
    
        test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
        test_x, test_y = test, test.pop(y_name)
    
        return (train_x, train_y), (test_x, test_y)
    
    
    def train_input_fn(features, labels, batch_size):
        """An input function for training"""
        # Convert the inputs to a Dataset.
        dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    
        # Shuffle, repeat, and batch the examples.
        dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    
        # Return the dataset.
        return dataset
    
    
    def eval_input_fn(features, labels, batch_size):
        """An input function for evaluation or prediction"""
        features=dict(features)
        if labels is None:
            # No labels, use only features.
            inputs = features
        else:
            inputs = (features, labels)
    
        # Convert the inputs to a Dataset.
        dataset = tf.data.Dataset.from_tensor_slices(inputs)
    
        # Batch the examples
        assert batch_size is not None, "batch_size must not be None"
        dataset = dataset.batch(batch_size)
    
        # Return the dataset.
        return dataset
    
    
    # The remainder of this file contains a simple example of a csv parser,
    #     implemented using the `Dataset` class.
    
    # `tf.parse_csv` sets the types of the outputs to match the examples given in
    #     the `record_defaults` argument.
    CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]
    
    def _parse_line(line):
        # Decode the line into its fields
        fields = tf.decode_csv(line, record_defaults=CSV_TYPES)
    
        # Pack the result into a dictionary
        features = dict(zip(CSV_COLUMN_NAMES, fields))
    
        # Separate the label from the features
        label = features.pop('Species')
    
        return features, label
    
    
    def csv_input_fn(csv_path, batch_size):
        # Create a dataset containing the text lines.
        dataset = tf.data.TextLineDataset(csv_path).skip(1)
    
        # Parse each line.
        dataset = dataset.map(_parse_line)
    
        # Shuffle, repeat, and batch the examples.
        dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    
        # Return the dataset.
        return dataset
    
    

    相关文章

      网友评论

        本文标题:Python基础学习16

        本文链接:https://www.haomeiwen.com/subject/fdppsqtx.html