# File name:
# Description:
# Author: zeng
# Time: 2019/11/21
# Change Activity:
import numpy as np
import matplotlib.pyplot as plt
seed = 1
def zero_mean(data): # 零均值化函数
mean = np.mean(data, axis=0)
return data - mean
def normalize(data): # 归一化
m = np.max(data)
return data / m
def norm_2(vector): # 计算2范数
return np.sqrt(np.sum(vector**2))
def theta(vector1, vector2): # 计算两向量夹角余弦值
modulus1 = norm_2(vector1)
modulus2 = norm_2(vector2)
return np.dot(vector1, vector2) / (modulus1*modulus2)
class PcaByNet:
"""
A neural network based PCA (Oja PCA), it is a Online Learning Model.
"""
def __init__(self, n_features):
self.y = None # 初始化网络输出
self.weight = np.random.randn(n_features) # 初始化网络权重
self.norm_recorder = [] # 记录每次迭代的范数
self.theta_recorder = [] # 记录每次迭代的夹角余弦
def fit(self, data, v, max_epochs=1000, lr_rate=1e-3, e=1e-5): # 学习函数
n_sample = len(data)
for epoch in range(max_epochs):
for i in range(n_sample):
x = data[i]
w = self.weight
y = np.matmul(x, w)
change = lr_rate * (y*x - y*y*w) # 权值改变量
self.weight = w + change # 更新权值
self.y = np.matmul(x, self.weight) # 更新输出
error = np.abs(norm_2(w) - 1)
self.norm_recorder.append(norm_2(w)) # 记录范数
self.theta_recorder.append(theta(w, v)) # 记录夹角余弦
if error < e:
print('迭代停止:', epoch * n_sample + i, epoch, i)
return epoch * n_sample + i # 返回迭代次数
print('迭代停止,未收敛')
return max_epochs * n_sample # 返回迭代次数
class PCA:
def __init__(self, n_components=1):
self.n_components = n_components
self.cov = None
self.e_values = None
self.e_vectors = None
def fit(self, data):
# data = self.zero_mean(data)
cov = np.cov(data, rowvar=False) # rowvar=0,说明传入的数据一行代表一个样本,若非0,说明传入的数据一列代表一个样本
self.cov = cov
e_values, e_vectors = np.linalg.eig(cov)
e_vectors = np.transpose(e_vectors) # 注意特征向量是列,不是行
# 排序
index = sorted(range(len(e_values)), key=lambda k: e_values[k])
e_values = np.array(sorted(e_values))
e_vectors = np.array([e_vectors[k] for k in index])
self.e_values = e_values
self.e_vectors = e_vectors
return e_values[self.n_components::], e_vectors[self.n_components::]
def data_generator(n_samples, limit=20):
np.random.seed(seed)
x = np.random.randint(limit, size=(n_samples, 1))
y = 2*x
noise1 = np.random.randn(n_samples, 1)
noise2 = np.random.randn(n_samples, 1)
x = x+noise1
y = y+noise2
data = np.hstack((x, y))
return data
if __name__ == '__main__':
ds = data_generator(20)
# 初始化模型
pca = PcaByNet(2)
ds = normalize(ds)
ds_pro = zero_mean(ds)
# 用矩阵计算主成分
model = PCA()
e, v = model.fit(ds_pro)
print('PCA VIA Matrix-Method')
print(f'特征值\t{e}')
print(f'特征向量\t{v}')
# 用网络计算主成分
pca.fit(ds_pro, v[0], 1000, 0.01, 1e-4) # 归一的参数
# pca.fit(data_pro, v[0], 1000, 0.001, 1e-4) # 不归一的参数
print(f'PCA VIA NET-Method')
print(f'特征向量\t{pca.weight}')
# 绘数据图
# 画数据散点
x1 = ds_pro[:, 0]
x2 = ds_pro[:, 1]
plt.subplot(131)
plt.scatter(x1, x2)
# 画斜线和坐标轴
plt.plot([-0.5, 0.5], [-1, 1]) # 画斜线
plt.plot([-1, 1], [0, 0], c='black') # 画横轴
plt.plot([0, 0], [-1, 1], c='black') # 画竖轴
# 画特征向量
plt.plot([0, pca.weight[0]], [0, pca.weight[1]], c='red', label='neural net')
plt.plot([0, v[0, 0]], [0, v[0, 1]], c='green', label='matrix')
plt.legend()
# 画范数变化
plt.subplot(132)
plt.plot(pca.norm_recorder)
# 画余弦变化
plt.subplot(133)
plt.plot(pca.theta_recorder)
plt.show()
网友评论