美文网首页
【泰坦尼克】

【泰坦尼克】

作者: 唯师默蓝 | 来源:发表于2020-02-19 00:30 被阅读0次
    import matplotlib as mpl
    import matplotlib. pyplot as plt
    import numpy as np
    import sklearn
    import pandas as pd
    import os
    import sys
    import time
    import tensorflow as tf
    from tensorflow import keras
    
    #显示所有列
    pd.set_option('display.max_columns', None)
    #显示所有行
    pd.set_option('display.max_rows', None)
    #设置value的显示长度为100,默认为50
    pd.set_option('max_colwidth',100)
    # print(tf.__version__)
    # print(sys. version_info)
    # for module in np, pd, sklearn, tf, keras:
    #     print(module.__name__, module.__version__)
    train_file="./data1/train.csv"
    eval_file="./data1/eval.csv"
    train_df=pd.read_csv(train_file)
    eval_df=pd.read_csv(eval_file)
    
    
    y_train=train_df.pop('survived')
    y_eval=eval_df.pop('survived')
    # print(train_df)
    # print(eval_df.head())
    # print(y_train.head())
    # print(y_eval.head())
    # train_df.age.hist(bins=20)  # 把所有的值分成二十份
    # plt.show()
    
    # 显示性别 画一个横向的柱状图   barh:横向  barv:纵向
    # train_df["sex"].value_counts().plot(kind='barh')
    # plt.show()
    # # 显示社会等级
    # train_df['class'].value_counts().plot(kind ='barh')
    # plt.show()
    
    # 把train_df拆开survived后的y_train再拼回去,并以性别分组,算出每个性别获救的概率
    sex_Prob = pd.concat([train_df,y_train], axis=1).groupby('sex').survived.mean()
    # 把结果用柱状图打印
    sex_Prob.plot(kind="barh")
    
    # 如果数据是离散值,features_Column可以很好的的对数据进行one-hot编码
    # 如果数据是连续值,features_Column可以很好的的对数据进行分统,把数据变成离散特征
    # 对九个指标分类,离散型 or 连续型
    categorical_columns=['sex','n_siblings_spouses','parch','class','deck','embark_town','alone']
    numeric_columns=['age','fare']
    feature_columns=[]
    for categorical_column in categorical_columns:
        vocab=train_df[categorical_column].unique() # 获得所有可能的值
        # tf.feature_column.indicator_column() one-hot编码
        #
        feature_columns.append(
            tf.feature_column.indicator_column(
                tf.feature_column.categorical_column_with_vocabulary_list(categorical_column,vocab)
            )
        )
    
    # 连续型
    for categorical_column in numeric_columns:
        feature_columns.append(
            tf.feature_column.numeric_column(categorical_column,dtype=tf.float32)
        )
    
    # 构建dataset
    # data_df -> x
    # label_df -> y
    def make_dataset(data_df,label_df,epochs=10,shuffle=True, batch_size=32):
        dataset = tf.data.Dataset.from_tensor_slices(
            (dict(data_df),label_df)
        )
        if shuffle:
            dataset = dataset.shuffle(10000)
        dataset = dataset.repeat(epochs).batch(batch_size)
        return dataset
    train_dataset =make_dataset(train_df,y_train,batch_size=5)
    
    # keras. layers. DenseFeature
    for x,y in train_dataset.take(1):
        age_column=feature_columns[7]
        gender_column=feature_columns[0]
        print(keras.layers.DenseFeatures(age_column)(x).numpy())
        print(keras.layers.DenseFeatures(gender_column)(x).numpy())
    
    
    for x,y in train_dataset.take(1):
        print(keras.layers.DenseFeatures(feature_columns)(x).numpy())
    
    model=keras.models.Sequential([
        keras.layers.DenseFeatures(feature_columns),
        keras.layers.Dense(100, activation='relu'),
        keras.layers.Dense(100, activation='relu'),
        keras.layers.Dense(2, activation='softmax'),
    ])
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=keras.optimizers.SGD(lr=0.01),
                  metrics=[' accuracy'])
    train_dataset=make_dataset(train_df,y_train,epochs =100)
    eval_dataset =make_dataset(eval_df,y_eval,epochs=1,shuffle = False)
    
    history=model.fit(
        train_dataset,
        validation_data=eval_dataset,
        steps_per_epoch=20,
        validation_steps=8,
        epochs=100)
    
    
    

    相关文章

      网友评论

          本文标题:【泰坦尼克】

          本文链接:https://www.haomeiwen.com/subject/lurofhtx.html