第五次尝试

作者: Evas77 | 来源:发表于2017-10-06 15:20 被阅读3次
    import scipy.stats
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    
    %config InlineBackend.figure_format = 'retina'
    
    standard_norm = scipy.stats.norm
    
    x = np.arange(-4, 4, 0.01)
    plt.plot(x, standard_norm.pdf(x))
    plt.show()
    
    output_1_0.png
    t_dist = scipy.stats.t
    
    plt.plot(x, standard_norm.pdf(x), label='standard normal')
    
    x = np.arange(-4, 4, 0.01)
    plt.plot(x, t_dist.pdf(x, df=1), label='t distribution')
    
    plt.legend()
    plt.show()
    
    output_2_0.png
    house = pd.read_csv('house_size.csv', header=None)
    
    house_size = house.iloc[:,0]
    
    print(list(house_size))
    
    [314, 119, 217, 326, 342, 318, 130, 465, 383, 396, 507, 283, 250, 326, 279, 363, 229, 303, 367, 246, 247, 262, 209, 294, 112, 249, 354, 355, 272, 277, 377, 411, 223, 232, 445, 333, 336, 349, 611, 516, 233, 275, 395, 241, 127, 228, 305, 321, 235, 226, 288, 503, 305, 280, 318, 281, 227, 279, 171, 290, 336, 284, 380, 314, 316, 476, 309, 293, 160, 300, 319, 396, 275, 212, 344, 305, 280, 331, 359, 283, 136, 322, 359, 202, 188, 187, 457, 340, 262, 288, 318, 381, 289, 205, 373, 200, 320, 213, 261, 357]
    
    pop_std = 86
    
    sample_mean = house_size.mean()
    sample_mean
    
    300.85
    
    sample_size = len(house_size)
    sample_size
    
    100
    
    z_score = scipy.stats.norm.isf(0.025)  # 95% 置信度
    z_score
    
    1.9599639845400545
    
    margin_error = z_score * pop_std / np.sqrt(sample_size)
    margin_error
    
    16.855690267044469
    
    lower_limit = sample_mean - margin_error
    upper_limit = sample_mean + margin_error
    
    print('95%% Confidence Interval: ( %.1f, %.1f)' % (lower_limit, upper_limit))
    
    95% Confidence Interval: ( 284.0, 317.7)
    
    def ci_z(data, pop_std, confidence):
        sample_mean = np.mean(data)
        sample_size = len(data)
        
        alpha = (1 - confidence) / 2
        z_score = scipy.stats.norm.isf(alpha)
        
        ME = z_score * pop_std / np.sqrt(sample_size)
    
        lower_limit = sample_mean - ME
        upper_limit = sample_mean + ME
        
        return (lower_limit, upper_limit)
    
    ci_z(house_size, pop_std, 0.90)
    
    (286.70425880821733, 314.99574119178271)
    
    ci_z(house_size, pop_std, 0.95)
    
    (283.99430973295557, 317.70569026704447)
    
    ci_z(house_size, pop_std, 0.99)
    
    (278.69786798947951, 323.00213201052054)
    
    ci_z(house_size, pop_std, 1)
    
    (-inf, inf)
    
    np.random.choice(house_size, size=10)  # 从house_size数据中随机抽取10个数据,可重复抽取
    
    array([112, 294, 228, 283, 318, 355, 233, 277, 300, 395], dtype=int64)
    
    def bootstrap_mean(data):
        # 从数据data中重复抽样,样本大小与data相同,并返回样本均值
        return np.mean(np.random.choice(data, size=len(data)))
    
    def draw_bootstrap(data, times=1):
        
        #初始化长度为times的空数组
        bs_mean = np.empty(times)
        
        #进行多次(times次)抽样,将每次得到的样本均值存储在bs_mean中
        for i in range(times):
            bs_mean[i] = bootstrap_mean(data)
            
        return bs_mean
    
    bs_mean = draw_bootstrap(house_size, 10000)
    plt.hist(bs_mean, bins=50, normed=True, rwidth=0.9)
    plt.show()
    
    output_21_0.png
    np.percentile(bs_mean, [2.5, 97.5])
    
    array([ 283.62   ,  318.49025])
    
    作业确实看不懂。需要请教!!
    

    相关文章

      网友评论

        本文标题:第五次尝试

        本文链接:https://www.haomeiwen.com/subject/sifryxtx.html