1. Matplotlib
import matplotlib.pyplot as plt
plt.figure(figsize=(a,b))
plt.subplot(2,2,1)
#空心散点图
plt.scatter(x,y,marker='o',c='',label='xxx')
#线型图
plt.plot(x,y,color='b',label='xxx')
#填充区域
plt.fill_between(x,y1,y1,facecolor='pink',label='')
plt.legend(bbox_to_anchor=(a,b),loc=c)
plt.show()
2. LinearRegression
class PolynomialFeature(object):
"""
polynomial features
transforms input array with polynomial features
Example
=======
x =
[[a, b],
[c, d]]
y = PolynomialFeatures(degree=2).transform(x)
y =
[[1, a, b, a^2, a * b, b^2],
[1, c, d, c^2, c * d, d^2]]
"""
def __init__(self, degree=2):
"""
construct polynomial features
Parameters
----------
degree : int
degree of polynomial
"""
assert isinstance(degree, int)
self.degree = degree
def transform(self, x):
"""
transforms input array with polynomial features
Parameters
----------
x : (sample_size, n) ndarray
input array
Returns
-------
output : (sample_size, 1 + nC1 + ... + nCd) ndarray
polynomial features
"""
if x.ndim == 1:
x = x[:, None]
x_t = x.transpose()
features = [np.ones(len(x))]
for degree in range(1, self.degree + 1):
for items in itertools.combinations_with_replacement(x_t, degree):
features.append(functools.reduce(lambda x, y: x * y, items))
return np.asarray(features).transpose()
class Regression(object):
"""
Base class for regressors
"""
pass
class LinearRegression(Regression):
"""
Linear regression model
y = X @ w
t ~ N(t|X @ w, var)
"""
def fit(self, X:np.ndarray, t:np.ndarray):
"""
perform least squares fitting
Parameters
----------
X : (N, D) np.ndarray
training independent variable
t : (N,) np.ndarray
training dependent variable
"""
self.w = np.linalg.pinv(X) @ t
self.var = np.mean(np.square(X @ self.w - t))
def predict(self, X:np.ndarray, return_std:bool=False):
"""
make prediction given input
Parameters
----------
X : (N, D) np.ndarray
samples to predict their output
return_std : bool, optional
returns standard deviation of each predition if True
Returns
-------
y : (N,) np.ndarray
prediction of each sample
y_std : (N,) np.ndarray
standard deviation of each predition
"""
y = X @ self.w
if return_std:
y_std = np.sqrt(self.var) + np.zeros_like(y)
return y, y_std
return y
- 先创建数据(x_train、y_train、x_test、y_test)
x_test = x_train = np.linsapce(0,1,100)
y_train = np.sin(2*np.pi*x_train) + np.random.normal(scale=0.25, size=x.shape)
y_test = np.sin(2*np.pi*x_test)
- 利用PolyPolynomialFeature类对数据进行多项式分解```
x_train_poly = PolynomialFeature(degree=n).transform(x_train)
- 创建一个LinearRegression的类,利用训练数据来训练得到w值
lr = LinearRegression()
lr.fit(x_train_poly,y_train)
- 利用训练好的参数,对测试数据进行预测
y_pred = lr.predict(x_test_poly)
3. Conclusion
当采样数据较少时,高阶会出现过拟合问题。增大采样数量和增加正则化项,可以有效解决这个问题。
网友评论