126 QRFactorization() =
default;
130 void compute(
int rows,
int cols,
int row_stride,
int col_stride, value_type* A)
171 const int k = std::min(m, n);
180 for (
int i = 0, ii = 0; i < k; ++i, ii += row_stride + col_stride) {
182 tau[i] = gen_reflector(m - i, A[ii], A + ii + row_stride, row_stride);
186 apply_reflector(m - i, n - i - 1, A + ii, row_stride, math::adjoint(tau[i]),
187 A + ii + col_stride, row_stride, col_stride);
192 void compute(
int rows,
int cols, value_type* A, storage_order order = row_major)
194 int row_stride = (order == row_major ? cols : 1);
195 int col_stride = (order == row_major ? 1 : rows);
196 compute(rows, cols, row_stride, col_stride, A);
200 void factorize(
int rows,
int cols,
int row_stride,
int col_stride, value_type* A)
216 compute(rows, cols, row_stride, col_stride, A);
221 int k = std::min(m, n);
223 this->row_stride = row_stride;
224 this->col_stride = col_stride;
232 for (
int i = 0, ia = 0; i < m; ++i, ia += row_stride)
233 for (
int j = k, ja = k * col_stride; j < n; ++j, ja += col_stride)
234 q[ia + ja] = (i == j ? math::identity<value_type>() : math::zero<value_type>());
236 for (
int i = k - 1, ic = i * col_stride, ii = i * (row_stride + col_stride);
237 i >= 0; --i, ic -= col_stride, ii -= row_stride + col_stride) {
240 apply_reflector(m - i, n - i - 1, r + ii, row_stride, tau[i], &q[ii + col_stride], row_stride, col_stride);
244 for (
int j = 0, jr = 0; j < i; ++j, jr += row_stride)
245 q[jr + ic] = math::zero<value_type>();
247 q[ii] = math::identity<value_type>() - tau[i];
249 for (
int j = i + 1, jr = j * row_stride; j < m; ++j, jr += row_stride)
250 q[jr + ic] = -tau[i] * r[jr + ic];
254 void factorize(
int rows,
int cols, value_type* A, storage_order order = row_major)
256 int row_stride = (order == row_major ? cols : 1);
257 int col_stride = (order == row_major ? 1 : rows);
258 factorize(rows, cols, row_stride, col_stride, A);
262 value_type R(
int i,
int j)
const
265 return math::zero<value_type>();
266 return r[i * row_stride + j * col_stride];
270 value_type Q(
int i,
int j)
const
272 return q[i * row_stride + j * col_stride];
276 void solve(
int rows,
int cols,
int row_stride,
int col_stride, value_type* A,
277 const value_type* b, value_type* x,
bool computed =
false)
280 std::copy(b, b + rows, f.begin());
287 compute(rows, cols, row_stride, col_stride, A);
289 for (
int i = 0, ii = 0; i < cols; ++i, ii += row_stride + col_stride)
290 apply_reflector(rows - i, 1, r + ii, row_stride, math::adjoint(tau[i]), &f[i], 1, 1);
292 std::copy(f.begin(), f.begin() + cols, x);
294 for (
int i = cols, ia = (cols - 1) * col_stride; i-- > 0; ia -= col_stride) {
295 value_type rii = r[i * (row_stride + col_stride)];
296 if (math::is_zero(rii))
298 x[i] = math::inverse(rii) * x[i];
300 for (
int j = 0, ja = 0; j < i; ++j, ja += row_stride)
301 x[j] -= r[ia + ja] * x[i];
309 for (
int i = 0, n = cols * rows; i < n; ++i)
310 A[i] = math::adjoint(A[i]);
311 compute(cols, rows, col_stride, row_stride, A);
314 for (
int i = 0, ia = 0; i < rows; ++i, ia += col_stride) {
315 value_type rii = math::adjoint(r[i * (row_stride + col_stride)]);
316 if (math::is_zero(rii))
318 f[i] = math::inverse(rii) * f[i];
320 for (
int j = i + 1, ja = j * row_stride; j < rows; ++j, ja += row_stride)
321 f[j] -= math::adjoint(r[ia + ja]) * f[i];
324 std::copy(f.begin(), f.end(), x);
325 std::fill(x + rows, x + cols, math::zero<value_type>());
327 for (
int i = rows; i-- > 0;) {
328 int ii = i * (col_stride + row_stride);
329 apply_reflector(cols - i, 1, r + ii, col_stride, tau[i], x + i, 1, 1);
334 void solve(
int rows,
int cols, value_type* A,
const value_type* b, value_type* x,
335 storage_order order = row_major,
bool computed =
false)
337 int row_stride = (order == row_major ? cols : 1);
338 int col_stride = (order == row_major ? 1 : rows);
339 solve(rows, cols, row_stride, col_stride, A, b, x, computed);
344 return sizeof(value_type) * (tau.size() + f.size() + q.size());
349 typedef typename math::scalar_of<value_type>::type scalar_type;
351 static scalar_type sqr(scalar_type x) {
return x * x; }
358 value_type* r =
nullptr;
359 std::vector<value_type> tau, f;
360 std::vector<value_type> q;
362 static value_type gen_reflector(
int order, value_type& alpha, value_type* x,
int stride)
406 value_type tau = math::zero<value_type>();
411 scalar_type xnorm2 = 0;
412 for (
int i = 0, ii = 0; i < n; ++i, ii += stride)
413 xnorm2 += sqr(math::norm(x[ii]));
415 if (math::is_zero(xnorm2))
418 scalar_type beta = -std::abs(sqrt(sqr(math::norm(alpha)) + xnorm2));
419 if (Alina::detail::real(alpha) < 0)
422 tau = math::identity<value_type>() - math::inverse(beta) * alpha;
423 alpha = math::inverse(alpha - beta * math::identity<value_type>());
425 for (
int i = 0, ii = 0; i < n; ++i, ii += stride)
426 x[ii] = alpha * x[ii];
428 alpha = beta * math::identity<value_type>();
433 apply_reflector(
int m,
int n,
const value_type* v,
int v_stride, value_type tau,
434 value_type* C,
int row_stride,
int col_stride)
476 if (math::is_zero(tau))
480 for (
int i = 0, ia = 0; i < n; ++i, ia += col_stride) {
481 value_type s = math::adjoint(C[ia]);
482 for (
int j = 1, jv = v_stride, ja = row_stride; j < m; ++j, jv += v_stride, ja += row_stride) {
483 s += math::adjoint(C[ja + ia]) * v[jv];
486 s = tau * math::adjoint(s);
488 for (
int j = 1, jv = v_stride, ja = row_stride; j < m; ++j, jv += v_stride, ja += row_stride) {
489 C[ja + ia] -= v[jv] * s;
499class QRFactorization<value_type, typename std::enable_if<math::is_static_matrix<value_type>::value>
::type>
503 typedef typename Alina::math::rhs_of<value_type>::type rhs_type;
507 void compute(
int rows,
int cols,
int row_stride,
int col_stride, value_type* A)
517 copy_to_scalar_buf(rows, cols, row_stride, col_stride, A);
518 base.compute(rows * M, cols * N, 1, rows * M, buf.data());
521 void factorize(
int rows,
int cols,
int row_stride,
int col_stride, value_type* A)
531 copy_to_scalar_buf(rows, cols, row_stride, col_stride, A);
532 base.factorize(m, n, 1, m, buf.data());
535 void factorize(
int rows,
int cols, value_type* A, storage_order order = row_major)
537 int row_stride = (order == row_major ? cols : 1);
538 int col_stride = (order == row_major ? 1 : rows);
539 factorize(rows, cols, row_stride, col_stride, A);
542 value_type R(
int i,
int j)
const
550 v = math::zero<value_type>();
553 for (
int ii = 0; ii < N; ++ii)
554 for (
int jj = 0; jj < M; ++jj)
555 v(ii, jj) = base.R(i * N + ii, j * M + jj);
562 value_type Q(
int i,
int j)
const
569 for (
int ii = 0; ii < N; ++ii)
570 for (
int jj = 0; jj < M; ++jj)
571 v(ii, jj) = base.Q(i * N + ii, j * M + jj);
577 void solve(
int rows,
int cols,
int row_stride,
int col_stride, value_type* A,
578 const rhs_type* f, rhs_type* x,
bool computed =
false)
588 copy_to_scalar_buf(rows, cols, row_stride, col_stride, A);
589 base.solve(m, n, 1, m, buf.data(),
590 reinterpret_cast<const scalar_type*
>(f),
591 reinterpret_cast<scalar_type*
>(x),
595 void solve(
int rows,
int cols, value_type* A,
const rhs_type* f, rhs_type* x,
596 storage_order order = row_major,
bool computed =
false)
598 int row_stride = (order == row_major ? cols : 1);
599 int col_stride = (order == row_major ? 1 : rows);
600 solve(rows, cols, row_stride, col_stride, A, f, x, computed);
605 return base.bytes() +
sizeof(scalar_type) * buf.size();
610 typedef typename Alina::math::scalar_of<value_type>::type scalar_type;
615 QRFactorization<scalar_type> base;
616 std::vector<scalar_type> buf;
618 void copy_to_scalar_buf(
int rows,
int cols,
int row_stride,
int col_stride, value_type* A)
623 buf.resize(M * rows * N * cols);
625 const int scalar_rows = M * rows;
627 for (
int i = 0, ib = 0; i < rows; ++i)
628 for (
int ii = 0; ii < M; ++ii, ++ib)
629 for (
int j = 0, jb = 0; j < cols; ++j)
630 for (
int jj = 0; jj < N; ++jj, jb += scalar_rows)
631 buf[ib + jb] = A[i * row_stride + j * col_stride](ii, jj);