房价与房屋尺寸关系的线性拟合
#建立工程并导入sklearn包
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
#加载训练数据,建立回归方程
datasets_X = []
datasets_Y = []
fr = open('sklearn/prices.txt','r')
lines = fr.readlines()
for line in lines:
items = line.strip().split(',')
datasets_X.append(int(items[0]))
datasets_Y.append(int(items[1]))
length = len(datasets_X)
datasets_X = np.array(datasets_X).reshape([length,1])
datasets_Y = np.array(datasets_Y)
minX = min(datasets_X)
maxX = max(datasets_X)
X = np.arange(minX,maxX).reshape([-1,1])
linear = linear_model.LinearRegression()
linear.fit(datasets_X,datasets_Y)
# 图像中显示,可视化处理
plt.scatter(datasets_X, datasets_Y, color = 'red')
plt.plot(X, linear.predict(X), color = 'blue')
plt.xlabel('Area')
plt.ylabel('Price')
plt.show()
多项式回归
#建立工程并导入sklearn包
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
#加载训练数据,建立回归方程
datasets_X = []
datasets_Y = []
fr = open('sklearn/prices.txt','r')
lines = fr.readlines()
for line in lines:
items = line.strip().split(',')
datasets_X.append(int(items[0]))
datasets_Y.append(int(items[1]))
length = len(datasets_X)
datasets_X = np.array(datasets_X).reshape([length,1])
datasets_Y = np.array(datasets_Y)
minX = min(datasets_X)
maxX = max(datasets_X)
X = np.arange(minX,maxX).reshape([-1,1])
poly_reg = PolynomialFeatures(degree=2)
X_poly = poly_reg.fit_transform(datasets_X)
lin_reg_2 = linear_model.LinearRegression()
lin_reg_2.fit(X_poly,datasets_Y)
#可视化处理
plt.scatter(datasets_X,datasets_Y,color='red')
plt.plot(X,lin_reg_2.predict(poly_reg.fit_transform(X)),color='blue')
plt.xlabel('Area')
plt.ylabel('Price')
plt.show()
岭回归
#建立工程,导入sklearn相关工具包
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
#数据加载
data = np.genfromtxt('sklearn/ridge.txt')
print(data)
plt.plot(data[:,4])
#数据处理
X = data[:,:4]
y = data[:,4]
poly = PolynomialFeatures(6)
X = poly.fit_transform(X)
#划分训练集和测试集
train_set_X,test_set_X,train_set_y,test_set_y = \
train_test_split(X,y,test_size=0.3,random_state=0)
#创建回归器,并进行训练
clf = Ridge(alpha=1.0,fit_intercept=True)
clf.fit(train_set_X,train_set_y)
clf.score(test_set_X,test_set_y)
#画出拟合曲线
start = 200
end = 300
y_pre = clf.predict(X)
time = np.arange(start,end)
plt.plot(time,y[start:end],'b',label="real")
plt.plot(time,y_pre[start:end],'r',label='predict')
plt.legend(loc='upper left')
plt.show()
网友评论