二元线性回归
import numpy as np
#import pandas as pd
def cost_function(k1,k0,x,y):
x = np.array(x)
y = np.array(y)
cost = 1/(2 * len(x)) * np.sum((k1 * x + k0 - y)**2)
return cost
def gradient(k1,k0,x,y):
x = np.array(x)
y = np.array(y)
grad_k1 = np.sum((k1 * x + k0 - y) * x) / len(x)
grad_k0 = np.sum(k1 * x + k0 - y) / len(x)
return grad_k0, grad_k1
def linear_regression_binary(x,y,epochs,lr,k0,k1):
'''
epochs 迭代次数
lr 学习率
k0 k1 拟合参数
'''
best_k0 = None
best_k1 = None
best_cost = np.inf
for epoch in range(epochs):
cost = cost_function(k1,k0,x,y)
print("epoch: %d, k0: %d, k1: %d, cost: %d" % (epoch,k0,k1,cost))
if cost <= best_cost:
best_cost = cost
best_k0 = k0
best_k1 = k1
grad_k0,grad_k1 = gradient(k1,k0,x,y)
k0 = k0 - lr * grad_k0
k1 = k1 - lr * grad_k1
return best_k0, best_k1, best_cost
def gen_sample_data():
w = random.randint(0, 10) + random.random() # for noise random.random[0, 1)
b = random.randint(0, 5) + random.random()
num_samples = 100
x_list = []
y_list = []
for i in range(num_samples):
x = random.randint(0, 100) * random.random()
y = w * x + b + random.random() * random.randint(-1, 1)
x_list.append(x)
y_list.append(y)
return x_list, y_list, w, b
#%%
if __name__ == '__main__':
x_list, y_list, k1, k0 = gen_sample_data()
print(x_list)
print(y_list)
#%%
k0,k1,cost = linear_regression_binary(x_list,y_list,30,0.0002,k1,k0)
多元线性回归
其中用到
x_T = np.transpose(x) # 转置
x0 =np.ones((x.shape[0]))
x = np.column_stack((x,x0)) # 在矩阵x后面加新列x0
x = np.dot(x,y) # 若都为矩阵 则为矩阵乘法,若都为一维向量,则求点乘(内积)
import numpy as np
#import pandas as pd
'''
这里的向量k最后一个元素是k0,即截距,这是因为在给定的x的最后一位
都加了元素1,以便在计算时,符合所有k,包括k0。
'''
def cost_function(k,x,y):
x0 = np.ones((x.shape[0]))
x = np.column_stack((x,x0))
x_T = np.transpose(x)
y = np.array(y)
k = np.array(k)
cost = 1/(2 * len(x)) * np.sum((np.dot(k,x_T) - y)**2)
return cost
def gradient(k,x,y):
x0 = np.ones((x.shape[0]))
x = np.column_stack((x,x0))
y = np.array(y)
k = np.array(k)
x_T = np.transpose(x)
grad_k = np.dot((np.dot(k,x_T) - y), x) / len(x)
return grad_k
def linear_regression_Non_binary(x,y,epochs,lr,k):
'''
epochs 迭代次数
lr 学习率
k 参数向量
'''
best_k = None
best_cost = np.inf
k = np.array(k)
for epoch in range(epochs):
cost = cost_function(k,x,y)
print("epoch: %d, k: %d, cost: %d" % (epoch,k,cost))
if cost <= best_cost:
best_cost = cost
best_k = k
grad_k = gradient(k,x,y)
k = k - lr * grad_k
return best_k, best_cost
网友评论