import numpy as np
import pandas as pd
from pandas import Series,DataFrame
df = DataFrame({'height':np.random.randint(160,190,size = 50),
'weight':np.random.randint(45,90,size = 50),
'sex':np.random.randint(0,2,size = 50)})
df
df.max()
data:image/s3,"s3://crabby-images/16308/163082a42cee5586db60300cbabd5b979c232415" alt=""
53.png
df.min()
data:image/s3,"s3://crabby-images/2502a/2502a96bc2eb69002c87779da75f164ac03d2dfe" alt=""
37.png
df
data:image/s3,"s3://crabby-images/264a8/264a8652d877a6a5c80681f7424b2d2655c97a8b" alt=""
24.png
data:image/s3,"s3://crabby-images/e87ce/e87ceee02b494644e6c41f74ccf26936cb6cf058" alt=""
57.png
# 消除不同属性之间的数量级差异,缩放,归一化(0~1)
df2 = (df - df.min(axis = 0))/(df.max() - df.min())
df2.describe()
45.png
data:image/s3,"s3://crabby-images/65926/6592682c292c187b7273f0b635017974f1e3e19e" alt=""
15.png
# Z-score---->标准的高斯分布(正太分布)
# Z-score标准化方法,u 代表平均值
# δ表示std标准差
df3 = (df - df.mean())/df.std()
df3
# 平均值是0,标准差是1
49.png
df3.describe()
data:image/s3,"s3://crabby-images/78905/789057422b84225888488b2e418f33a64b4e7d5f" alt=""
33.png
网友评论