一、参考
单边雅各宾(Jacobi)变换的复数形式推导;
对称矩阵的特征值(EVD)计算的雅各宾(Jacobi)方法;
单边雅各宾(Jacobi)法求解矩阵奇异值分解(SVD);
An FPGA Implementation of the Hestenes-Jacobi Algorithm for Singular Value Decomposition
On an Implementation of Two-Sided Jacobi Method
二、理论
首先要解决双边的Givens
变换的求解:
![](https://img.haomeiwen.com/i19536936/08dde3f2a7fa9204.png)
![](https://img.haomeiwen.com/i19536936/2c2fef62ebf23bdc.png)
三、核心代码
template <typename _T>
__inline__ __host__ __device__ void cu_Givens_device(
_T alpha, _T beta,
_T &c, _T &s)
{
if (alpha == 0)
{
c = 0;
s = 1;
}
else if (beta == 0)
{
c = 1;
s = 0;
}
else if (abs(beta) > abs(alpha))
{
_T tau = -alpha / beta;
s = 1 / sqrt(1 + tau * tau);
c = s * tau;
}
else
{
_T tau = -beta / alpha;
c = 1 / sqrt(1 + tau * tau);
s = c * tau;
}
return;
}
/** ***************************************************************************
* @brief 计算单次Jacobi左旋转
*
* @tparam _T
* @param[inout] pA 矩阵数据
* @param[in] c
* @param[in] s
* @param[in] i 参与计算的第i行
* @param[in] k 参与计算的第k行
* @param[in] m 矩阵的行数
* @param[in] n 矩阵的列数
* @return __inline__
* ************************************************************************** */
template <typename _T>
__inline__ __host__ __device__ void cu_GivensMatMulLeft_device(
_T *pA, _T c, _T s,
int i, int k, int m, int n)
{
for (int j = 0; j < n; j++)
{
_T a = pA[i * n + j];
_T b = pA[k * n + j];
pA[i * n + j] = c * a - s * b;
pA[k * n + j] = s * a + c * b;
}
return;
}
/** ***************************************************************************
* @brief 计算单次Jacobi右旋转
*
* @tparam _T
* @param[inout] pA 矩阵数据
* @param[in] c
* @param[in] s
* @param[in] i 参与计算的第i列
* @param[in] k 参与计算的第k列
* @param[in] m 矩阵的行数
* @param[in] n 矩阵的列数
* @return __inline__
* ************************************************************************** */
template <typename _T>
__inline__ __host__ __device__ void cu_GivensMatMulRight_device(
_T *pA, _T c, _T s,
int i, int k, int m, int n)
{
for (int j = 0; j < m; j++)
{
_T a = pA[j * n + i];
_T b = pA[j * n + k];
pA[j * n + i] = c * a - s * b;
pA[j * n + k] = s * a + c * b;
}
return;
}
/** ***************************************************************************
* @brief 双边Jacobi法解算两边的的旋转矩阵
*
* @tparam _T
* @param[in] w 左上元素
* @param[in] x 右上元素
* @param[in] y 左下元素
* @param[in] z 右下元素
* @param[out] c1
* @param[out] s1
* @param[out] c2
* @param[out] s2
* @return __inline__
* ************************************************************************** */
template <typename _T>
__inline__ __host__ __device__ void cu_2SideJacobi_device(
_T w, _T x, _T y, _T z,
_T &c1, _T &s1, _T &c2, _T &s2)
{
_T u_1 = z - w;
_T u_2 = y + x;
_T v_1 = z + w;
_T v_2 = y - x;
_T cos_1;
_T sin_1;
_T cos_2;
_T sin_2;
_T pho{};
_T tao{};
_T eps = 1e-12;
if (abs(u_2) < eps * abs(u_1))
{
cos_1 = 1;
sin_1 = 0;
}
else
{
pho = u_1 / u_2;
if (pho >= 0)
{
tao = 1. / (pho + sqrt(1 + pho * pho));
}
else
{
tao = 1. / (pho - sqrt(1 + pho * pho));
}
cos_1 = 1 / sqrt(1 + tao * tao);
sin_1 = tao * cos_1;
}
if (abs(v_2) < eps * abs(v_1))
{
cos_2 = 1;
sin_2 = 0;
}
else
{
pho = v_1 / v_2;
if (pho >= 0)
{
tao = 1. / (pho + sqrt(1 + pho * pho));
}
else
{
tao = 1. / (pho - sqrt(1 + pho * pho));
}
cos_2 = 1 / sqrt(1 + tao * tao);
sin_2 = tao * cos_2;
}
_T ss12 = sin_1 * sin_2;
_T cc12 = cos_1 * cos_2;
_T sc12 = sin_1 * cos_2;
_T cs12 = cos_1 * sin_2;
c1 = cc12 + ss12;
s1 = sc12 - cs12;
c2 = cc12 - ss12;
s2 = sc12 + cs12;
return;
}
template <typename _T>
__inline__ __host__ __device__ void cu_Over2SideJacobiSVD_device(
_T *A, _T *U, _T *V, _T *S,
int m, int n, int &rank)
{
_T w, x, y, z,
c1, s1, c2, s2;
int rots = 1;
_T eps = 1e-12;
// 在这儿先把m>n的部分化为0
while (rots > 0 && m > n)
{
rots = 0;
for (int i = 0; i < m - n; i++)
{
for (int j = 0; j < n; j++)
{
_T alpha = A[i * n + j];
_T beta = A[(i + n) * n + j];
if (abs(beta) > eps * abs(alpha))
{
rots++;
cu_Givens_device(alpha, beta, c1, s1);
cu_GivensMatMulLeft_device(A, c1, s1, i, j, m, n);
cu_GivensMatMulLeft_device(U, c1, s1, i, j, m, n);
}
}
}
}
// 这儿是执行SVD分解
rots = 1;
while (rots > 0)
{
rots = 0;
for (int i = 0; i < n - 1; i++)
{
for (int j = i + 1; j < n; j++)
{
w = A[i * n + i];
x = A[i * n + j];
y = A[j * n + i];
z = A[j * n + j];
// _T eps_wz = eps * sqrt(abs(w * z));
_T eps_wz = eps * (abs(w) + abs(z));
if (abs(x) > eps_wz ||
abs(y) > eps_wz)
{
rots++;
cu_2SideJacobi_device(w, x, y, z, c1, s1, c2, s2);
cu_GivensMatMulLeft_device(A, c1, s1, i, j, m, n);
cu_GivensMatMulRight_device(A, c2, s2, i, j, m, n);
#ifdef _DEBUG
cout << "A = [" << endl;
for (size_t yy = 0; yy < n; yy++)
{
for (size_t xx = 0; xx < n; xx++)
{
cout << A[yy * n + xx] << " ";
}
cout << ";" << endl;
}
cout << "]" << endl;
#endif
cu_GivensMatMulLeft_device(U, c1, s1, i, j, m, n);
cu_GivensMatMulRight_device(V, c2, s2, i, j, m, n);
}
}
}
}
// 计算S
for (int i = 0; i < n; i++)
{
S[i] = A[i * n + i];
}
// 计算rank
rank = 0;
for (int i = 0; i < n; i++)
{
if (abs(S[i]) > eps)
{
rank++;
}
}
return;
}
四、一个简单测试代码
这个测试代码是把它当cpu代码执行的,主要验证精度,不影响在cuda上调用。
int main(int argc, char const *argv[])
{
const int n = 3;
const int m = 3;
/* code */
double A[m][n] = {
{1e40, 1e29, 1e19},
{1e29, 1e20, 1e9},
{1e19, 1e9, 1},
// {10, 11, 12},
// {13, 14, 15}
};
double U[m][n] =
{
{1, 0, 0},
{0, 1, 0},
{0, 0, 1}};
double V[m][n] =
{
{1, 0, 0},
{0, 1, 0},
{0, 0, 1}};
double S[n] = {0, 0, 0};
int rank{};
// 对A先标准化
cu_Over2SideJacobiSVD_device(
(double *)A, (double *)U, (double *)V, (double *)S,
m, n, rank);
std::cout << "U:\n";
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
std::cout << U[i][j] << " ";
}
}
std::cout << "\nV:\n";
for (int i = 0; i < m; i++)
{
for (int j = 0; j < n; j++)
{
std::cout << V[i][j] << " ";
}
}
std::cout << "\nS:\n";
for (int i = 0; i < n; i++)
{
std::cout << S[i] << " ";
}
std::cout << "\nrank:\n"
<< rank << std::endl;
return 0;
}
五、执行结果
A = [
1e+40 1.75922e+13 1e+19 ;
1.75932e+13 9.9e+19 9e+08 ;
1e+19 9e+08 1 ;
]
A = [
1e+40 1.75923e+13 1e+19 ;
1.75933e+13 9.9e+19 0 ;
1e+19 9.01653e-12 0.991818 ;
]
U:
1 1e-11 0 -1e-11 1 9.09091e-12 9.09091e-23 -9.09091e-12 1
V:
1 -1e-11 9.09091e-23 1e-11 1 -9.09091e-12 0 9.09091e-12 1
S:
1e+40 9.9e+19 0.991818
rank:
3
和之前的单边发对比,精度略有降低,主要差异为0.991818
的单边法和标准值都为0.9818182
,但是精度仍然高于matlab
直接调用的eig
函数,低于matlab
的svd
方法,也低于opencv
的svd
方法。看起来还是的实际使用单边法。
网友评论