Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
QRFactorizationImpl.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* QRFactorizationImpl.h (C) 2000-2026 */
9/* */
10/* QR factorization of a dense matrix. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_QRFACTORIZATIONIMPL_H
13#define ARCCORE_ALINA_QRFACTORIZATIONIMPL_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25/*
26 * This is a port of ZGEQR2 procedure from LAPACK and its dependencies.
27 * The original code included the following copyright notice:
28 * \verbatim
29 Copyright (c) 1992-2013 The University of Tennessee and The University
30 of Tennessee Research Foundation. All rights
31 reserved.
32 Copyright (c) 2000-2013 The University of California Berkeley. All
33 rights reserved.
34 Copyright (c) 2006-2013 The University of Colorado Denver. All rights
35 reserved.
36
37 $COPYRIGHT$
38
39 Additional copyrights may follow
40
41 $HEADER$
42
43 Redistribution and use in source and binary forms, with or without
44 modification, are permitted provided that the following conditions are
45 met:
46
47 - Redistributions of source code must retain the above copyright
48 notice, this list of conditions and the following disclaimer.
49
50 - Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer listed
52 in this license in the documentation and/or other materials
53 provided with the distribution.
54
55 - Neither the name of the copyright holders nor the names of its
56 contributors may be used to endorse or promote products derived from
57 this software without specific prior written permission.
58
59 The copyright holders provide no reassurances that the source code
60 provided does not infringe any patent, copyright, or any other
61 intellectual property rights of third parties. The copyright holders
62 disclaim any liability to any recipient for claims brought against
63 recipient by any third party for infringement of that parties
64 intellectual property rights.
65
66 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
67 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
68 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
69 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
70 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
71 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
72 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
73 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
74 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
75 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
76 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
77 * \endverbatim
78 */
79/*---------------------------------------------------------------------------*/
80/*---------------------------------------------------------------------------*/
81
82#include <vector>
83#include <complex>
84#include <cmath>
85
86#include "arccore/alina/AlinaUtils.h"
87#include "arccore/alina/ValueTypeInterface.h"
88
89/*---------------------------------------------------------------------------*/
90/*---------------------------------------------------------------------------*/
91
92namespace Arcane::Alina::detail
93{
94
95/*---------------------------------------------------------------------------*/
96/*---------------------------------------------------------------------------*/
97
98enum storage_order
99{
100 row_major,
101 col_major
102};
103
104template <class T>
105inline T real(T a)
106{
107 return a;
108}
109
110template <class T>
111inline T real(std::complex<T> a)
112{
113 return std::real(a);
114}
115
116/*---------------------------------------------------------------------------*/
117/*---------------------------------------------------------------------------*/
118/*
119 * \brief In-place QR factorization of a dense matrix.
120 */
121template <typename value_type, class Enable = void>
122class QRFactorization
123{
124 public:
125
126 QRFactorization() = default;
127
128 public:
129
130 void compute(int rows, int cols, int row_stride, int col_stride, value_type* A)
131 {
132 /*
133 * Ported from ZGEQR2
134 * ==================
135 *
136 * Computes a QR factorization of an matrix A:
137 * A = Q * R.
138 *
139 * Arguments
140 * =========
141 *
142 * rows The number of rows of the matrix A.
143 * cols The number of columns of the matrix A.
144 *
145 * A On entry, the rows by cols matrix A.
146 * On exit, the elements on and above the diagonal of the
147 * array contain the min(m,n) by n upper trapezoidal
148 * matrix R (R is upper triangular if m >= n); the
149 * elements below the diagonal, with the array TAU,
150 * represent the unitary matrix Q as a product of
151 * elementary reflectors (see Further Details).
152 *
153 * Further Details
154 * ===============
155 *
156 * The matrix Q is represented as a product of elementary reflectors
157 *
158 * Q = H(1) H(2) . . . H(k), where k = min(m,n).
159 *
160 * Each H(i) has the form
161 *
162 * H(i) = I - tau * v * v'
163 *
164 * where tau is a value_type scalar, and v is a value_type vector
165 * with v[0:i) = 0 and v[i] = 1; v[i:m) is stored on exit in
166 * A[i+1:m)[i], and tau in tau[i].
167 * ==============================================================
168 */
169 const int m = rows;
170 const int n = cols;
171 const int k = std::min(m, n);
172
173 if (k <= 0)
174 return;
175
176 r = A;
177
178 tau.resize(k);
179
180 for (int i = 0, ii = 0; i < k; ++i, ii += row_stride + col_stride) {
181 // Generate elementary reflector H(i) to annihilate A[i+1:m)[i]
182 tau[i] = gen_reflector(m - i, A[ii], A + ii + row_stride, row_stride);
183
184 if (i + 1 < n) {
185 // Apply H(i)' to A[i:m)[i+1:n) from the left
186 apply_reflector(m - i, n - i - 1, A + ii, row_stride, math::adjoint(tau[i]),
187 A + ii + col_stride, row_stride, col_stride);
188 }
189 }
190 }
191
192 void compute(int rows, int cols, value_type* A, storage_order order = row_major)
193 {
194 int row_stride = (order == row_major ? cols : 1);
195 int col_stride = (order == row_major ? 1 : rows);
196 compute(rows, cols, row_stride, col_stride, A);
197 }
198
199 // Computes Q explicitly.
200 void factorize(int rows, int cols, int row_stride, int col_stride, value_type* A)
201 {
202 /*
203 * Ported from ZUNG2R
204 * ==================
205 *
206 * Generates an m by n matrix Q with orthonormal columns, which is
207 * defined as the first n columns of a product of k elementary
208 * reflectors of order m
209 *
210 * Q = H(1) H(2) . . . H(k)
211 *
212 * as returned by compute() [ZGEQR2].
213 *
214 * ==============================================================
215 */
216 compute(rows, cols, row_stride, col_stride, A);
217
218 m = rows;
219 n = cols;
220
221 int k = std::min(m, n);
222
223 this->row_stride = row_stride;
224 this->col_stride = col_stride;
225
226 q.resize(m * n);
227
228 // Initialise columns k+1:n to zero.
229 // [In the original code these were initialized to the columns of
230 // the unit matrix, but since k = min(n,m), the main diagonal is
231 // never seen here].
232 for (int i = 0, ia = 0; i < m; ++i, ia += row_stride)
233 for (int j = k, ja = k * col_stride; j < n; ++j, ja += col_stride)
234 q[ia + ja] = (i == j ? math::identity<value_type>() : math::zero<value_type>());
235
236 for (int i = k - 1, ic = i * col_stride, ii = i * (row_stride + col_stride);
237 i >= 0; --i, ic -= col_stride, ii -= row_stride + col_stride) {
238 // Apply H(i) to A[i:m)[i+1:n) from the left
239 if (i < n - 1)
240 apply_reflector(m - i, n - i - 1, r + ii, row_stride, tau[i], &q[ii + col_stride], row_stride, col_stride);
241
242 // Copy i-th reflector (including zeros and unit diagonal)
243 // to the column of Q to be processed next
244 for (int j = 0, jr = 0; j < i; ++j, jr += row_stride)
245 q[jr + ic] = math::zero<value_type>();
246
247 q[ii] = math::identity<value_type>() - tau[i];
248
249 for (int j = i + 1, jr = j * row_stride; j < m; ++j, jr += row_stride)
250 q[jr + ic] = -tau[i] * r[jr + ic];
251 }
252 }
253
254 void factorize(int rows, int cols, value_type* A, storage_order order = row_major)
255 {
256 int row_stride = (order == row_major ? cols : 1);
257 int col_stride = (order == row_major ? 1 : rows);
258 factorize(rows, cols, row_stride, col_stride, A);
259 }
260
261 // Returns element of the matrix R.
262 value_type R(int i, int j) const
263 {
264 if (j < i)
265 return math::zero<value_type>();
266 return r[i * row_stride + j * col_stride];
267 }
268
269 // Returns element of the matrix Q.
270 value_type Q(int i, int j) const
271 {
272 return q[i * row_stride + j * col_stride];
273 }
274
275 // Solves the system Q R x = f
276 void solve(int rows, int cols, int row_stride, int col_stride, value_type* A,
277 const value_type* b, value_type* x, bool computed = false)
278 {
279 f.resize(rows);
280 std::copy(b, b + rows, f.begin());
281
282 if (rows >= cols) {
283 // We are solving overdetermined (tall) system Ax = f by
284 // writing the matrix A as A = QR and solving for x as
285 // x = R^-1 Q^-1 f = R^-1 Q^T f.
286 if (!computed)
287 compute(rows, cols, row_stride, col_stride, A);
288
289 for (int i = 0, ii = 0; i < cols; ++i, ii += row_stride + col_stride)
290 apply_reflector(rows - i, 1, r + ii, row_stride, math::adjoint(tau[i]), &f[i], 1, 1);
291
292 std::copy(f.begin(), f.begin() + cols, x);
293
294 for (int i = cols, ia = (cols - 1) * col_stride; i-- > 0; ia -= col_stride) {
295 value_type rii = r[i * (row_stride + col_stride)];
296 if (math::is_zero(rii))
297 continue;
298 x[i] = math::inverse(rii) * x[i];
299
300 for (int j = 0, ja = 0; j < i; ++j, ja += row_stride)
301 x[j] -= r[ia + ja] * x[i];
302 }
303 }
304 else {
305 // We are solving underdetermined (wide) system Ax = f by
306 // writing the matrix A^T as A^T = QR and solving for x as
307 // x = Q^-T R^-T f = Q R^-T f.
308 if (!computed) {
309 for (int i = 0, n = cols * rows; i < n; ++i)
310 A[i] = math::adjoint(A[i]);
311 compute(cols, rows, col_stride, row_stride, A);
312 }
313
314 for (int i = 0, ia = 0; i < rows; ++i, ia += col_stride) {
315 value_type rii = math::adjoint(r[i * (row_stride + col_stride)]);
316 if (math::is_zero(rii))
317 continue;
318 f[i] = math::inverse(rii) * f[i];
319
320 for (int j = i + 1, ja = j * row_stride; j < rows; ++j, ja += row_stride)
321 f[j] -= math::adjoint(r[ia + ja]) * f[i];
322 }
323
324 std::copy(f.begin(), f.end(), x);
325 std::fill(x + rows, x + cols, math::zero<value_type>());
326
327 for (int i = rows; i-- > 0;) {
328 int ii = i * (col_stride + row_stride);
329 apply_reflector(cols - i, 1, r + ii, col_stride, tau[i], x + i, 1, 1);
330 }
331 }
332 }
333
334 void solve(int rows, int cols, value_type* A, const value_type* b, value_type* x,
335 storage_order order = row_major, bool computed = false)
336 {
337 int row_stride = (order == row_major ? cols : 1);
338 int col_stride = (order == row_major ? 1 : rows);
339 solve(rows, cols, row_stride, col_stride, A, b, x, computed);
340 }
341
342 size_t bytes()
343 {
344 return sizeof(value_type) * (tau.size() + f.size() + q.size());
345 }
346
347 private:
348
349 typedef typename math::scalar_of<value_type>::type scalar_type;
350
351 static scalar_type sqr(scalar_type x) { return x * x; }
352
353 int m = 0;
354 int n = 0;
355 int row_stride = 0;
356 int col_stride = 0;
357
358 value_type* r = nullptr;
359 std::vector<value_type> tau, f;
360 std::vector<value_type> q;
361
362 static value_type gen_reflector(int order, value_type& alpha, value_type* x, int stride)
363 {
364 /*
365 * Ported from ZLARFG
366 * ==================
367 *
368 * Generates a value_type elementary reflector H of order n, such
369 * that
370 *
371 * H' * ( alpha ) = ( beta ), H' * H = I.
372 * ( x ) ( 0 )
373 *
374 * where alpha and beta are scalars, with beta real, and x is an
375 * (n-1)-element value_type vector. H is represented in the form
376 *
377 * H = I - tau * ( 1 ) * ( 1 v' ) ,
378 * ( v )
379 *
380 * where tau is a value_type scalar and v is a value_type
381 * (n-1)-element vector. Note that H is not hermitian.
382 *
383 * If the elements of x are all zero and alpha is real,
384 * then tau = 0 and H is taken to be the unit matrix.
385 *
386 * Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 .
387 *
388 * Arguments
389 * =========
390 *
391 * order The order of the elementary reflector.
392 *
393 * alpha On entry, the value alpha.
394 * On exit, it is overwritten with the value beta.
395 *
396 * x dimension (1+(order-2)*abs(stride))
397 * On entry, the vector x.
398 * On exit, it is overwritten with the vector v.
399 *
400 * stride The increment between elements of x.
401 *
402 * Returns the value tau.
403 *
404 * ==============================================================
405 */
406 value_type tau = math::zero<value_type>();
407 if (order <= 1)
408 return tau;
409 int n = order - 1;
410
411 scalar_type xnorm2 = 0;
412 for (int i = 0, ii = 0; i < n; ++i, ii += stride)
413 xnorm2 += sqr(math::norm(x[ii]));
414
415 if (math::is_zero(xnorm2))
416 return tau;
417
418 scalar_type beta = -std::abs(sqrt(sqr(math::norm(alpha)) + xnorm2));
419 if (Alina::detail::real(alpha) < 0)
420 beta = -beta;
421
422 tau = math::identity<value_type>() - math::inverse(beta) * alpha;
423 alpha = math::inverse(alpha - beta * math::identity<value_type>());
424
425 for (int i = 0, ii = 0; i < n; ++i, ii += stride)
426 x[ii] = alpha * x[ii];
427
428 alpha = beta * math::identity<value_type>();
429 return tau;
430 }
431
432 static void
433 apply_reflector(int m, int n, const value_type* v, int v_stride, value_type tau,
434 value_type* C, int row_stride, int col_stride)
435 {
436 /*
437 * Ported from ZLARF
438 * =================
439 *
440 * Applies an elementary reflector H to an m-by-n matrix C from
441 * the left. H is represented in the form
442 *
443 * H = I - v * tau * v'
444 *
445 * where tau is a value_type scalar and v is a value_type vector.
446 *
447 * If tau = 0, then H is taken to be the unit matrix.
448 *
449 * To apply H' (the conjugate transpose of H), supply adjoint(tau)
450 * instead of tau.
451 *
452 * Arguments
453 * =========
454 *
455 * m The number of rows of the matrix C.
456 *
457 * n The number of columns of the matrix C.
458 *
459 * v The vector v in the representation of H.
460 * v is not used if tau = 0.
461 * The value of v[0] is ignored and assumed to be 1.
462 *
463 * v_stride The increment between elements of v.
464 *
465 * tau The value tau in the representation of H.
466 *
467 * C On entry, the m-by-n matrix C.
468 * On exit, C is overwritten by the matrix H * C.
469 *
470 * row_stride The increment between the rows of C.
471 * col_stride The increment between the columns of C.
472 *
473 * ==============================================================
474 */
475
476 if (math::is_zero(tau))
477 return;
478
479 // w = C` * v; C -= tau * v * w`
480 for (int i = 0, ia = 0; i < n; ++i, ia += col_stride) {
481 value_type s = math::adjoint(C[ia]);
482 for (int j = 1, jv = v_stride, ja = row_stride; j < m; ++j, jv += v_stride, ja += row_stride) {
483 s += math::adjoint(C[ja + ia]) * v[jv];
484 }
485
486 s = tau * math::adjoint(s);
487 C[ia] -= s;
488 for (int j = 1, jv = v_stride, ja = row_stride; j < m; ++j, jv += v_stride, ja += row_stride) {
489 C[ja + ia] -= v[jv] * s;
490 }
491 }
492 }
493};
494
495/*---------------------------------------------------------------------------*/
496/*---------------------------------------------------------------------------*/
497
498template <class value_type>
499class QRFactorization<value_type, typename std::enable_if<math::is_static_matrix<value_type>::value>::type>
500{
501 public:
502
503 typedef typename Alina::math::rhs_of<value_type>::type rhs_type;
504
505 QRFactorization() {}
506
507 void compute(int rows, int cols, int row_stride, int col_stride, value_type* A)
508 {
511
512 m = rows;
513 n = cols;
514
515 r = A;
516
517 copy_to_scalar_buf(rows, cols, row_stride, col_stride, A);
518 base.compute(rows * M, cols * N, 1, rows * M, buf.data());
519 }
520
521 void factorize(int rows, int cols, int row_stride, int col_stride, value_type* A)
522 {
525
526 m = rows * M;
527 n = cols * N;
528
529 r = A;
530
531 copy_to_scalar_buf(rows, cols, row_stride, col_stride, A);
532 base.factorize(m, n, 1, m, buf.data());
533 }
534
535 void factorize(int rows, int cols, value_type* A, storage_order order = row_major)
536 {
537 int row_stride = (order == row_major ? cols : 1);
538 int col_stride = (order == row_major ? 1 : rows);
539 factorize(rows, cols, row_stride, col_stride, A);
540 }
541
542 value_type R(int i, int j) const
543 {
546
547 value_type v;
548
549 if (j < i) {
550 v = math::zero<value_type>();
551 }
552 else {
553 for (int ii = 0; ii < N; ++ii)
554 for (int jj = 0; jj < M; ++jj)
555 v(ii, jj) = base.R(i * N + ii, j * M + jj);
556 }
557
558 return v;
559 }
560
561 // Returns element of the matrix Q.
562 value_type Q(int i, int j) const
563 {
566
567 value_type v;
568
569 for (int ii = 0; ii < N; ++ii)
570 for (int jj = 0; jj < M; ++jj)
571 v(ii, jj) = base.Q(i * N + ii, j * M + jj);
572
573 return v;
574 }
575
576 // Solves the system Q R x = f
577 void solve(int rows, int cols, int row_stride, int col_stride, value_type* A,
578 const rhs_type* f, rhs_type* x, bool computed = false)
579 {
582
583 m = rows * M;
584 n = cols * N;
585
586 r = A;
587
588 copy_to_scalar_buf(rows, cols, row_stride, col_stride, A);
589 base.solve(m, n, 1, m, buf.data(),
590 reinterpret_cast<const scalar_type*>(f),
591 reinterpret_cast<scalar_type*>(x),
592 computed);
593 }
594
595 void solve(int rows, int cols, value_type* A, const rhs_type* f, rhs_type* x,
596 storage_order order = row_major, bool computed = false)
597 {
598 int row_stride = (order == row_major ? cols : 1);
599 int col_stride = (order == row_major ? 1 : rows);
600 solve(rows, cols, row_stride, col_stride, A, f, x, computed);
601 }
602
603 size_t bytes() const
604 {
605 return base.bytes() + sizeof(scalar_type) * buf.size();
606 }
607
608 private:
609
610 typedef typename Alina::math::scalar_of<value_type>::type scalar_type;
611
612 int m, n;
613 value_type* r;
614
615 QRFactorization<scalar_type> base;
616 std::vector<scalar_type> buf;
617
618 void copy_to_scalar_buf(int rows, int cols, int row_stride, int col_stride, value_type* A)
619 {
622
623 buf.resize(M * rows * N * cols);
624
625 const int scalar_rows = M * rows;
626
627 for (int i = 0, ib = 0; i < rows; ++i)
628 for (int ii = 0; ii < M; ++ii, ++ib)
629 for (int j = 0, jb = 0; j < cols; ++j)
630 for (int jj = 0; jj < N; ++jj, jb += scalar_rows)
631 buf[ib + jb] = A[i * row_stride + j * col_stride](ii, jj);
632 }
633};
634
635/*---------------------------------------------------------------------------*/
636/*---------------------------------------------------------------------------*/
637
638} // namespace Arcane::Alina::detail
639
640/*---------------------------------------------------------------------------*/
641/*---------------------------------------------------------------------------*/
642
643#endif
Number of columns for statically sized matrix types.
Number of rows for statically sized matrix types.