美文网首页
波士顿房价预测

波士顿房价预测

作者: 一天天111 | 来源:发表于2019-10-29 10:50 被阅读0次
    #!/usr/bin/env python3
    # -*- coding: utf-8 -*-
    """
    Created on Tue Oct 29 10:39:29 2019
    @author: liyili2
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import  ShuffleSplit
    from sklearn.metrics import make_scorer
    from sklearn.model_selection import GridSearchCV
    from sklearn.tree import DecisionTreeRegressor
    from sklearn.metrics import r2_score
    
    boston=pd.read_csv("/Users/liyili2/Downloads/datas/kaggle/housing.csv")
    price=boston['MEDV']
    features=boston.drop('MEDV',axis=1)
    print ("波士顿房价数据有{} 行 points with {} variables each.".format(*boston.shape))
    
    
    
    '''基础统计运算'''
    '''因变量基本统计量查看'''
    print("房价最小值是:",price.min())
    print("房价最大值是:",price.max())
    print("房价均值是:",price.mean())
    print("房价中位数是:",price.median())
    print("房价标准差是:",price.std())
    
    '''建模'''
    def performance_metric(y, y_predict):
        score = r2_score(y, y_predict)
        return score
     
    score = performance_metric([3, -0.5, 2, 7, 4.2], [2.5, 0.0, 2.1, 7.8, 5.3])
    print ("Model has a coefficient of determination, R^2, of {:.3f}.".format(score))
    
    
    X_train, X_test, y_train, y_test =train_test_split(features, price,test_size = 0.2, random_state = 17)#随机种子值
    print ("Training and testing split was successful.")
    
    def fit_model(X, y):
        cv_sets =  ShuffleSplit(X.shape[0], random_state = 0)
        regressor = DecisionTreeRegressor()
        params ={'max_depth': [1,2,3,4,5,6,7,8,9,10]}
        scoring_fnc = make_scorer(performance_metric)
        grid = GridSearchCV(regressor, params, scoring = scoring_fnc, cv = cv_sets)
        grid = grid.fit(X, y)
        return grid.best_estimator_
     
        
    reg = fit_model(X_train, y_train)
    print ("Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth']))
    
    client_data = [[5, 17, 15], # Client 1
                   [4, 32, 22], # Client 2
                   [8, 3, 12]]  # Client 3
    '''预测'''
    for i, price in enumerate(reg.predict(client_data)):
        print ("Predicted selling price for Client {}'s home: ${:,.2f}".format(i+1, price))
    
    结果:
    波士顿房价数据有489 行 points with 4 variables each.
    房价最小值是: 105000.0
    房价最大值是: 1024800.0
    房价均值是: 454342.9447852761
    房价中位数是: 438900.0
    房价标准差是: 165340.27765266786
    Model has a coefficient of determination, R^2, of 0.923.
    Training and testing split was successful.
    

    相关文章

      网友评论

          本文标题:波士顿房价预测

          本文链接:https://www.haomeiwen.com/subject/ikywvctx.html