SVD 分解
例如推荐系统我们将每一个商品建立一个维度,那么数据样本维度就会上万,这样当前计算能力还是吃不消的。这样就需要我们对数进行降维,降维技术要尽可能保留数据结构,在降维后因为数据结构被保留下来,获得的结果是原始数据空间的可靠近似。
我们先来 PCA 是如何实现降维
假设我们数据集 也就是有 n 个样本,每一个样本有 m 特征(也就是有 m 个维度),PCA 就是将这个
矩阵投射到小子空间
上,
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt
img = Image.open('images/naruto.png')
imggray = img.convert('LA')
plt.figure(figsize=(9, 6))
plt.imshow(img);
![](https://img.haomeiwen.com/i8207483/39fd1d26b326a736.png)
imgmat = np.array(list(imggray.getdata(band=0)), float)
imgmat.shape = (imggray.size[1], imggray.size[0])
imgmat = np.matrix(imgmat)
plt.figure(figsize=(9,6))
plt.imshow(imgmat, cmap='gray');
![](https://img.haomeiwen.com/i8207483/67d51e889a71fb7f.png)
U, sigma, V = np.linalg.svd(imgmat)
print(sigma.shape)
reconstimg = np.matrix(U[:, :1]) * np.diag(sigma[:1]) * np.matrix(V[:1, :])
plt.imshow(reconstimg, cmap='gray');
(180,)
![](https://img.haomeiwen.com/i8207483/4a0b3145b864afea.png)
a = np.array([1,2,3])
a_diag = np.diag(a)
print(a_diag)
[[1 0 0]
[0 2 0]
[0 0 3]]
for i in range(2, 4):
reconstimg = np.matrix(U[:, :i]) * np.diag(sigma[:i]) * np.matrix(V[:i, :])
plt.imshow(reconstimg, cmap='gray')
title = "n = %s" % i
plt.title(title)
# print(i)
# plt.subplot(2,2,(i-1))
plt.show()
![](https://img.haomeiwen.com/i8207483/94f2d40ab71f80fd.png)
![](https://img.haomeiwen.com/i8207483/60fe67d24419e48c.png)
for i in range(10,12 ):
reconstimg = np.matrix(U[:, :i]) * np.diag(sigma[:i]) * np.matrix(V[:i, :])
print(U.shape)
print(sigma.shape)
print(V.shape)
plt.imshow(reconstimg, cmap='gray')
title = "n = %s" % i
plt.title(title)
# print(i)
# plt.subplot(2,2,(i-1))
plt.show()
(180, 180)
(180,)
(180, 180)
![](https://img.haomeiwen.com/i8207483/afbe2bd8162015e4.png)
[图片上传失败...(image-fe3053-1580739613951)]
(180, 180)
(180,)
(180, 180)
![](https://img.haomeiwen.com/i8207483/414e82668b26a9d2.png)
img = Image.open('images/naruto.png')
img = img.resize((28, 28))
plt.imshow(img)
print(img)
<PIL.Image.Image image mode=RGBA size=28x28 at 0x1271D9490>
![](https://img.haomeiwen.com/i8207483/e452154f8d7b0d9e.png)
img_nparray = np.array(list(img.getdata(band=0)), float)
print(img_nparray.shape)
(784,)
mu, sigma = 0, 1
s = np.random.normal(mu, sigma, 784)
# print(s)
mean_s = np.mean(s)
print(mean_s)
var_s = np.var(s)
max_s = np.max(s)
min_s = np.min(s)
print(var_s)
print(max_s)
print(min_s)
0.06221887461516067
1.0457062296216892
3.2441223180124705
-3.5929682195036348
from pylab import imshow, show, get_cmap
Z = np.random.random((28,28)) # Test data
plt.imshow(Z, cmap=get_cmap("Spectral"), interpolation='nearest')
plt.show()
![](https://img.haomeiwen.com/i8207483/5f4b8a7707846e0d.png)
网友评论