Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
Relaxation.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* Relaxation.h (C) 2000-2026 */
9/* */
10/* Various relaxation algorithms. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_RELAXATION_H
13#define ARCCORE_ALINA_RELAXATION_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26#include "arccore/alina/BackendInterface.h"
27#include "arccore/alina/Adapters.h"
28#include "arccore/alina/ValueTypeInterface.h"
29#include "arccore/alina/QRFactorizationImpl.h"
30#include "arccore/alina/BuiltinBackend.h"
31#include "arccore/alina/DenseMatrixInverseImpl.h"
32#include "arccore/alina/BackendInterface.h"
33#include "arccore/alina/ILUSolverImpl.h"
34#include "arccore/alina/ValueTypeInterface.h"
35#include "arccore/alina/RelaxationBase.h"
36
37#include <deque>
38#include <queue>
39
40/*---------------------------------------------------------------------------*/
41/*---------------------------------------------------------------------------*/
42
43namespace Arcane::Alina
44{
45
46/*---------------------------------------------------------------------------*/
47/*---------------------------------------------------------------------------*/
51template <class BlockBackend, template <class> class Relax>
53{
54 typedef typename BlockBackend::value_type BlockType;
55
56 template <class Backend>
57 class type
58 {
59 public:
60
61 typedef Backend backend_type;
62
63 typedef Relax<BlockBackend> Base;
64
65 typedef typename Backend::matrix matrix;
66 typedef typename Backend::vector vector;
67 typedef typename Base::params params;
68 typedef typename Backend::params backend_params;
69
70 typedef typename Backend::value_type value_type;
71 typedef typename Backend::col_type col_type;
72 typedef typename Backend::ptr_type ptr_type;
73 typedef typename BuiltinBackend<value_type, col_type, ptr_type>::matrix build_matrix;
74
75 template <class Matrix>
76 type(const Matrix& A,
77 const params& prm = params(),
78 const backend_params& bprm = backend_params())
79 : base(*std::make_shared<CSRMatrix<BlockType, col_type, ptr_type>>(adapter::block_matrix<BlockType>(A)), prm, bprm)
80 , nrows(backend::nbRow(A) / math::static_rows<BlockType>::value)
81 {}
82
83 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
84 void apply_pre(const Matrix& A,
85 const VectorRHS& rhs,
86 VectorX& x,
87 VectorTMP& tmp) const
88 {
89 auto F = backend::reinterpret_as_rhs<BlockType>(rhs);
90 auto X = backend::reinterpret_as_rhs<BlockType>(x);
91 auto T = backend::reinterpret_as_rhs<BlockType>(tmp);
92 base.apply_pre(A, F, X, T);
93 }
94
95 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
96 void apply_post(const Matrix& A,
97 const VectorRHS& rhs,
98 VectorX& x,
99 VectorTMP& tmp) const
100 {
101 auto F = backend::reinterpret_as_rhs<BlockType>(rhs);
102 auto X = backend::reinterpret_as_rhs<BlockType>(x);
103 auto T = backend::reinterpret_as_rhs<BlockType>(tmp);
104 base.apply_post(A, F, X, T);
105 }
106
107 template <class Matrix, class Vec1, class Vec2>
108 void apply(const Matrix& A, const Vec1& rhs, Vec2&& x) const
109 {
110 auto F = backend::reinterpret_as_rhs<BlockType>(rhs);
111 auto X = backend::reinterpret_as_rhs<BlockType>(x);
112 base.apply(A, F, X);
113 }
114
115 const matrix& system_matrix() const
116 {
117 return base.system_matrix();
118 }
119
120 std::shared_ptr<matrix> system_matrix_ptr() const
121 {
122 return base.system_matrix_ptr();
123 }
124
125 size_t bytes() const
126 {
127 return base.bytes();
128 }
129
130 private:
131
132 Base base;
133 size_t nrows;
134 };
135};
136
137/*---------------------------------------------------------------------------*/
138/*---------------------------------------------------------------------------*/
142template <class Backend, template <class> class Relax>
143class RelaxationAsPreconditioner
144{
145 public:
146
147 typedef Backend backend_type;
148
149 typedef Relax<Backend> smoother;
150
151 typedef typename Backend::matrix matrix;
152 typedef typename Backend::vector vector;
153 typedef typename smoother::params params;
154 typedef typename Backend::params backend_params;
155
156 typedef typename Backend::value_type value_type;
157 typedef typename Backend::col_type col_type;
158 typedef typename Backend::ptr_type ptr_type;
159 typedef typename BuiltinBackend<value_type, col_type, ptr_type>::matrix build_matrix;
160
161 template <class Matrix>
162 RelaxationAsPreconditioner(const Matrix& M,
163 const params& prm = params(),
164 const backend_params& bprm = backend_params())
165 : prm(prm)
166 {
167 init(std::make_shared<build_matrix>(M), bprm);
168 }
169
170 RelaxationAsPreconditioner(std::shared_ptr<build_matrix> M,
171 const params& prm = params(),
172 const backend_params& bprm = backend_params())
173 : prm(prm)
174 {
175 init(M, bprm);
176 }
177
178 template <class Vec1, class Vec2>
179 void apply(const Vec1& rhs, Vec2&& x) const
180 {
181 S->apply(*A, rhs, x);
182 }
183
184 const matrix& system_matrix() const
185 {
186 return *A;
187 }
188
189 std::shared_ptr<matrix> system_matrix_ptr() const
190 {
191 return A;
192 }
193
194 size_t bytes() const
195 {
196 size_t b = 0;
197
198 if (A)
199 b += backend::bytes(*A);
200 if (S)
201 b += backend::bytes(*S);
202
203 return b;
204 }
205
206 private:
207
208 params prm;
209
210 std::shared_ptr<matrix> A;
211 std::shared_ptr<smoother> S;
212
213 void init(std::shared_ptr<build_matrix> M, const backend_params& bprm)
214 {
215 A = Backend::copy_matrix(M, bprm);
216 S = std::make_shared<smoother>(*M, prm, bprm);
217 }
218
219 friend std::ostream& operator<<(std::ostream& os, const RelaxationAsPreconditioner& p)
220 {
221 os << "Relaxation as preconditioner" << std::endl;
222 os << " Unknowns: " << backend::nbRow(p.system_matrix()) << std::endl;
223 os << " Nonzeros: " << backend::nonzeros(p.system_matrix()) << std::endl;
224 os << " Memory: " << human_readable_memory(p.bytes()) << std::endl;
225
226 return os;
227 }
228};
229
230/*---------------------------------------------------------------------------*/
231/*---------------------------------------------------------------------------*/
243template <class Backend>
245: public RelaxationBase
246{
247 public:
248
249 typedef typename Backend::value_type value_type;
250 typedef typename Backend::vector vector;
251
252 typedef typename math::scalar_of<value_type>::type scalar_type;
253
255 struct params
256 {
259
261 // use boosting factor for a more conservative upper bound estimate
262 // See: Adams, Brezina, Hu, Tuminaro,
263 // PARALLEL MULTIGRID SMOOTHING: POLYNOMIAL VERSUS
264 // GAUSS-SEIDEL, J. Comp. Phys. 188 (2003) 593-610.
265 //
266 double higher = 1.0;
267
269 double lower = 1.0 / 30.0;
270
271 // Number of power iterations to apply for the spectral radius
272 // estimation. When 0, use Gershgorin disk theorem to estimate
273 // spectral radius.
274 Int32 power_iters = 0;
275
276 // Scale the system matrix
277 bool scale = false;
278
279 params() = default;
280
281 params(const PropertyTree& p)
282 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, degree)
283 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, higher)
284 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, lower)
285 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, power_iters)
286 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, scale)
287 {
288 p.check_params({ "degree", "higher", "lower", "power_iters", "scale" });
289 }
290
291 void get(PropertyTree& p, const std::string& path) const
292 {
293 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, degree);
294 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, higher);
295 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, lower);
296 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, power_iters);
297 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, scale);
298 }
299 } prm;
300
302 template <class Matrix>
303 ChebyshevRelaxation(const Matrix& A, const params& prm,
304 const typename Backend::params& backend_prm)
305 : prm(prm)
306 , p(Backend::create_vector(backend::nbRow(A), backend_prm))
307 , r(Backend::create_vector(backend::nbRow(A), backend_prm))
308 {
309 scalar_type hi, lo;
310
311 //using spectral_radius;
312
313 if (prm.scale) {
314 M = Backend::copy_vector(diagonal(A, /*invert*/ true), backend_prm);
315 hi = spectral_radius<true>(A, prm.power_iters);
316 }
317 else {
318 hi = spectral_radius<false>(A, prm.power_iters);
319 }
320
321 lo = hi * prm.lower;
322 hi *= prm.higher;
323
324 // Centre of ellipse containing the eigenvalues of A:
325 d = 0.5 * (hi + lo);
326
327 // Semi-major axis of ellipse containing the eigenvalues of A:
328 c = 0.5 * (hi - lo);
329 }
330
331 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
332 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP&) const
333 {
334 solve(A, rhs, x);
335 }
336
337 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
338 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP&) const
339 {
340 solve(A, rhs, x);
341 }
342
343 template <class Matrix, class VectorRHS, class VectorX>
344 void apply(const Matrix& A, const VectorRHS& rhs, VectorX& x) const
345 {
346 backend::clear(x);
347 solve(A, rhs, x);
348 }
349
350 size_t bytes() const
351 {
352 size_t b = backend::bytes(*p) + backend::bytes(*r);
353 if (prm.scale)
354 b += backend::bytes(*M);
355 return b;
356 }
357
358 private:
359
360 std::shared_ptr<typename Backend::matrix_diagonal> M;
361 mutable std::shared_ptr<vector> p, r;
362
363 scalar_type c, d;
364
365 template <class Matrix, class VectorB, class VectorX>
366 void solve(const Matrix& A, const VectorB& b, VectorX& x) const
367 {
368 static const scalar_type one = math::identity<scalar_type>();
369 static const scalar_type zero = math::zero<scalar_type>();
370
371 scalar_type alpha = zero, beta = zero;
372
373 for (unsigned k = 0; k < prm.degree; ++k) {
374 backend::residual(b, A, x, *r);
375
376 if (prm.scale)
377 backend::vmul(one, *M, *r, zero, *r);
378
379 if (k == 0) {
380 alpha = math::inverse(d);
381 beta = zero;
382 }
383 else if (k == 1) {
384 alpha = 2 * d * math::inverse(2 * d * d - c * c);
385 beta = alpha * d - one;
386 }
387 else {
388 alpha = math::inverse(d - 0.25 * alpha * c * c);
389 beta = alpha * d - one;
390 }
391
392 backend::axpby(alpha, *r, beta, *p);
393 backend::axpby(one, *p, one, x);
394 }
395 }
396};
397
398/*---------------------------------------------------------------------------*/
399/*---------------------------------------------------------------------------*/
406template <class Backend>
408: public RelaxationBase
409{
410 typedef typename Backend::value_type value_type;
411 typedef typename math::scalar_of<value_type>::type scalar_type;
412
414 struct params
415 {
417 scalar_type damping = 0.72;
418
419 params() = default;
420
421 params(scalar_type damping)
423 {}
424
425 params(const PropertyTree& p)
426 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, damping)
427 {
428 p.check_params({ "damping" });
429 }
430
431 void get(PropertyTree& p, const std::string& path) const
432 {
433 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, damping);
434 }
435 } prm;
436
437 std::shared_ptr<typename Backend::matrix_diagonal> dia;
438
440
445 template <class Matrix>
447 const params& prm,
448 const typename Backend::params& backend_prm)
449 : prm(prm)
450 , dia(Backend::copy_vector(diagonal(A, true), backend_prm))
451 {}
452
454
461 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
462 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
463 {
464 backend::residual(rhs, A, x, tmp);
465 backend::vmul(prm.damping, *dia, tmp, math::identity<scalar_type>(), x);
466 }
467
469
476 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
477 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
478 {
479 backend::residual(rhs, A, x, tmp);
480 backend::vmul(prm.damping, *dia, tmp, math::identity<scalar_type>(), x);
481 }
482
483 template <class Matrix, class VectorRHS, class VectorX>
484 void apply(const Matrix&, const VectorRHS& rhs, VectorX& x) const
485 {
486 backend::vmul(math::identity<scalar_type>(), *dia, rhs, math::zero<scalar_type>(), x);
487 }
488
489 size_t bytes() const
490 {
491 return backend::bytes(*dia);
492 }
493};
494
495/*---------------------------------------------------------------------------*/
496/*---------------------------------------------------------------------------*/
507template <class Backend>
509: public RelaxationBase
510{
512 struct params
513 {
515 bool serial;
516
517 params()
518 : serial(false)
519 {}
520
521 params(const PropertyTree& p)
522 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, serial)
523 {
524 p.check_params({ "serial" });
525 }
526
527 void get(PropertyTree& p, const std::string& path) const
528 {
529 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, serial);
530 }
531 };
532
533 bool is_serial;
534
536 template <class Matrix>
537 GaussSeidelRelaxation(const Matrix& A, const params& prm, const typename Backend::params&)
538 : is_serial(prm.serial || ConcurrencyBase::maxAllowedThread() < 4)
539 {
540 if (!is_serial) {
541 forward = std::make_shared<parallel_sweep<true>>(A);
542 backward = std::make_shared<parallel_sweep<false>>(A);
543 }
544 }
545
547 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
548 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP&) const
549 {
550 if (is_serial)
551 serial_sweep(A, rhs, x, true);
552 else
553 forward->sweep(rhs, x);
554 }
555
557 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
558 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP&) const
559 {
560 if (is_serial)
561 serial_sweep(A, rhs, x, false);
562 else
563 backward->sweep(rhs, x);
564 }
565
566 template <class Matrix, class VectorRHS, class VectorX>
567 void apply(const Matrix& A, const VectorRHS& rhs, VectorX& x) const
568 {
569 backend::clear(x);
570 if (is_serial) {
571 serial_sweep(A, rhs, x, true);
572 serial_sweep(A, rhs, x, false);
573 }
574 else {
575 forward->sweep(rhs, x);
576 backward->sweep(rhs, x);
577 }
578 }
579
580 size_t bytes() const
581 {
582 size_t b = 0;
583 if (forward)
584 b += forward->bytes();
585 if (backward)
586 b += backward->bytes();
587 return b;
588 }
589
590 private:
591
592 template <class Matrix, class VectorRHS, class VectorX>
593 static void serial_sweep(const Matrix& A, const VectorRHS& rhs, VectorX& x, bool forward)
594 {
595 typedef typename backend::value_type<Matrix>::type val_type;
596 typedef typename math::rhs_of<val_type>::type rhs_type;
597
598 const ptrdiff_t n = backend::nbRow(A);
599
600 const ptrdiff_t beg = forward ? 0 : n - 1;
601 const ptrdiff_t end = forward ? n : -1;
602 const ptrdiff_t inc = forward ? 1 : -1;
603
604 for (ptrdiff_t i = beg; i != end; i += inc) {
605 val_type D = math::identity<val_type>();
606 rhs_type X;
607 X = rhs[i];
608
609 for (auto a = backend::row_begin(A, i); a; ++a) {
610 ptrdiff_t c = a.col();
611 val_type v = a.value();
612
613 if (c == i)
614 D = v;
615 else
616 X -= v * x[c];
617 }
618
619 x[i] = math::inverse(D) * X;
620 }
621 }
622
623 template <bool forward>
624 struct parallel_sweep
625 {
626 typedef typename Backend::value_type value_type;
627 typedef typename math::rhs_of<value_type>::type rhs_type;
628
629 struct task
630 {
631 ptrdiff_t beg, end;
632 task(ptrdiff_t beg, ptrdiff_t end)
633 : beg(beg)
634 , end(end)
635 {}
636 };
637
638 int nthreads = 0;
639
640 // thread-specific storage:
646
647 template <class Matrix>
648 parallel_sweep(const Matrix& A)
649 : nthreads(ConcurrencyBase::maxAllowedThread())
650 , tasks(nthreads)
651 , ptr(nthreads)
652 , col(nthreads)
653 , val(nthreads)
654 , ord(nthreads)
655 {
656
657 ptrdiff_t n = backend::nbRow(A);
658 ptrdiff_t nlev = 0;
659
660 UniqueArray<ptrdiff_t> level(n, 0);
661 UniqueArray<ptrdiff_t> order(n, 0);
662
663 // 1. split rows into levels.
664 ptrdiff_t beg = forward ? 0 : n - 1;
665 ptrdiff_t end = forward ? n : -1;
666 ptrdiff_t inc = forward ? 1 : -1;
667
668 for (ptrdiff_t i = beg; i != end; i += inc) {
669 ptrdiff_t l = level[i];
670
671 for (auto a = backend::row_begin(A, i); a; ++a) {
672 ptrdiff_t c = a.col();
673
674 if (forward) {
675 if (c >= i)
676 continue;
677 }
678 else {
679 if (c <= i)
680 continue;
681 }
682
683 l = std::max(l, level[c] + 1);
684 }
685
686 level[i] = l;
687 nlev = std::max(nlev, l + 1);
688 }
689
690 // 2. reorder matrix rows.
691 UniqueArray<ptrdiff_t> start(nlev + 1, 0);
692
693 for (ptrdiff_t i = 0; i < n; ++i)
694 ++start[level[i] + 1];
695
696 std::partial_sum(start.begin(), start.end(), start.begin());
697
698 for (ptrdiff_t i = 0; i < n; ++i)
699 order[start[level[i]]++] = i;
700
701 std::rotate(start.begin(), start.end() - 1, start.end());
702 start[0] = 0;
703
704 // 3. Organize matrix rows into tasks.
705 // Each level is split into nthreads tasks.
706 UniqueArray<ptrdiff_t> thread_rows(nthreads, 0);
707 UniqueArray<ptrdiff_t> thread_cols(nthreads, 0);
708
709 // TODO: Use a grain size of 1 to use all threads
710 arccoreParallelFor(0, nthreads, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
711 for (ptrdiff_t tid = begin; tid < (begin + size); ++tid) {
712 for (ptrdiff_t lev = 0; lev < nlev; ++lev) {
713 // split each level into tasks.
714 ptrdiff_t lev_size = start[lev + 1] - start[lev];
715 ptrdiff_t chunk_size = (lev_size + nthreads - 1) / nthreads;
716
717 ptrdiff_t beg = std::min(tid * chunk_size, lev_size);
718 ptrdiff_t end = std::min(beg + chunk_size, lev_size);
719
720 beg += start[lev];
721 end += start[lev];
722
723 tasks[tid].push_back(task(beg, end));
724
725 // count rows and nonzeros in the current task
726 thread_rows[tid] += end - beg;
727 for (ptrdiff_t i = beg; i < end; ++i) {
728 ptrdiff_t j = order[i];
729 thread_cols[tid] += backend::row_nonzeros(A, j);
730 }
731 }
732 }
733 });
734
735 // 4. reorganize matrix data for better cache and NUMA locality.
736 arccoreParallelFor(0, nthreads, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
737 for (ptrdiff_t tid = begin; tid < (begin + size); ++tid) {
738 col[tid].reserve(thread_cols[tid]);
739 val[tid].reserve(thread_cols[tid]);
740 ord[tid].reserve(thread_rows[tid]);
741 ptr[tid].reserve(thread_rows[tid] + 1);
742 ptr[tid].push_back(0);
743 for (task& t : tasks[tid]) {
744 ptrdiff_t loc_beg = ptr[tid].size() - 1;
745 ptrdiff_t loc_end = loc_beg;
746
747 for (ptrdiff_t r = t.beg; r < t.end; ++r, ++loc_end) {
748 ptrdiff_t i = order[r];
749
750 ord[tid].push_back(i);
751
752 for (auto a = backend::row_begin(A, i); a; ++a) {
753 col[tid].push_back(a.col());
754 val[tid].push_back(a.value());
755 }
756
757 ptr[tid].push_back(col[tid].size());
758 }
759
760 t.beg = loc_beg;
761 t.end = loc_end;
762 }
763 }
764 });
765 }
766
767 template <class Vector1, class Vector2>
768 void sweep(const Vector1& rhs, Vector2& x) const
769 {
770 // NOTE GG: This part was using OpenMP in the original implementation
771 for (ptrdiff_t tid = 0; tid < nthreads; ++tid) {
772 for (const task& t : tasks[tid]) {
773 for (ptrdiff_t r = t.beg; r < t.end; ++r) {
774 ptrdiff_t i = ord[tid][r];
775 ptrdiff_t beg = ptr[tid][r];
776 ptrdiff_t end = ptr[tid][r + 1];
777
778 value_type D = math::identity<value_type>();
779 rhs_type X;
780 X = rhs[i];
781
782 for (ptrdiff_t j = beg; j < end; ++j) {
783 ptrdiff_t c = col[tid][j];
784 value_type v = val[tid][j];
785
786 if (c == i)
787 D = v;
788 else
789 X -= v * x[c];
790 }
791
792 x[i] = math::inverse(D) * X;
793 }
794
795 // each task corresponds to a level, so we need
796 // to synchronize across threads at this point:
797 // TODO: Thread barrier
798 }
799 }
800 }
801
802 size_t bytes() const
803 {
804 size_t b = 0;
805
806 for (int i = 0; i < nthreads; ++i) {
807 b += sizeof(task) * tasks[i].size();
808 b += backend::bytes(ptr[i]);
809 b += backend::bytes(col[i]);
810 b += backend::bytes(val[i]);
811 b += backend::bytes(ord[i]);
812 }
813
814 return b;
815 }
816 };
817
818 std::shared_ptr<parallel_sweep<true>> forward;
819 std::shared_ptr<parallel_sweep<false>> backward;
820};
821
822/*---------------------------------------------------------------------------*/
823/*---------------------------------------------------------------------------*/
831template <class Backend>
833: public RelaxationBase
834{
835 typedef typename Backend::value_type value_type;
836 typedef typename Backend::col_type col_type;
837 typedef typename Backend::ptr_type ptr_type;
838 typedef typename Backend::vector vector;
839 typedef typename Backend::matrix matrix;
840 typedef typename Backend::matrix_diagonal matrix_diagonal;
841
842 typedef typename math::scalar_of<value_type>::type scalar_type;
843 typedef Impl::ILUSolver<Backend> ilu_solve;
844
846 struct params
847 {
849 scalar_type damping;
850
853
854 params()
855 : damping(1)
856 {}
857
858 params(const PropertyTree& p)
859 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, damping)
860 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, solve)
861 {
862 p.check_params({ "damping", "solve" }, { "k" });
863 }
864
865 void get(PropertyTree& p, const std::string& path) const
866 {
867 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, damping);
868 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, solve);
869 }
870 } prm;
871
873 template <class Matrix>
874 ILU0Relaxation(const Matrix& A, const params& prm, const typename Backend::params& bprm)
875 : prm(prm)
876 {
877 typedef typename BuiltinBackend<value_type, col_type, ptr_type>::matrix build_matrix;
878 const size_t n = backend::nbRow(A);
879
880 size_t Lnz = 0, Unz = 0;
881
882 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
883 ptrdiff_t row_beg = A.ptr[i];
884 ptrdiff_t row_end = A.ptr[i + 1];
885
886 for (ptrdiff_t j = row_beg; j < row_end; ++j) {
887 ptrdiff_t c = A.col[j];
888 if (c < i)
889 ++Lnz;
890 else if (c > i)
891 ++Unz;
892 }
893 }
894
895 auto L = std::make_shared<build_matrix>();
896 auto U = std::make_shared<build_matrix>();
897
898 L->set_size(n, n);
899 L->set_nonzeros(Lnz);
900 L->ptr[0] = 0;
901 U->set_size(n, n);
902 U->set_nonzeros(Unz);
903 U->ptr[0] = 0;
904
905 size_t Lhead = 0;
906 size_t Uhead = 0;
907
908 auto D = std::make_shared<numa_vector<value_type>>(n, false);
909
910 std::vector<value_type*> work(n, NULL);
911
912 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
913 ptrdiff_t row_beg = A.ptr[i];
914 ptrdiff_t row_end = A.ptr[i + 1];
915
916 for (ptrdiff_t j = row_beg; j < row_end; ++j) {
917 ptrdiff_t c = A.col[j];
918 value_type v = A.val[j];
919
920 if (c < i) {
921 L->col[Lhead] = c;
922 L->val[Lhead] = v;
923 work[c] = L->val + Lhead;
924 ++Lhead;
925 }
926 else if (c == i) {
927 (*D)[i] = v;
928 work[c] = &(*D)[i];
929 }
930 else {
931 U->col[Uhead] = c;
932 U->val[Uhead] = v;
933 work[c] = U->val + Uhead;
934 ++Uhead;
935 }
936 }
937
938 L->ptr[i + 1] = Lhead;
939 U->ptr[i + 1] = Uhead;
940
941 for (ptrdiff_t j = row_beg; j < row_end; ++j) {
942 ptrdiff_t c = A.col[j];
943
944 // Exit if diagonal is reached
945 if (c >= i) {
946 precondition(c == i, "No diagonal value in system matrix");
947 precondition(!math::is_zero((*D)[i]), "Zero pivot in ILU");
948
949 (*D)[i] = math::inverse((*D)[i]);
950 break;
951 }
952
953 // Compute the multiplier for jrow
954 value_type tl = (*work[c]) * (*D)[c];
955 *work[c] = tl;
956
957 // Perform linear combination
958 for (ptrdiff_t k = U->ptr[c]; k < static_cast<ptrdiff_t>(U->ptr[c + 1]); ++k) {
959 value_type* w = work[U->col[k]];
960 if (w)
961 *w -= tl * U->val[k];
962 }
963 }
964
965 // Get rid of zeros in the factors
966 Lhead = L->ptr[i];
967 Uhead = U->ptr[i];
968
969 for (ptrdiff_t j = Lhead, e = L->ptr[i + 1]; j < e; ++j) {
970 auto v = L->val[j];
971 if (!math::is_zero(v)) {
972 L->col[Lhead] = L->col[j];
973 L->val[Lhead] = v;
974 ++Lhead;
975 }
976 }
977
978 for (ptrdiff_t j = Uhead, e = U->ptr[i + 1]; j < e; ++j) {
979 auto v = U->val[j];
980 if (!math::is_zero(v)) {
981 U->col[Uhead] = U->col[j];
982 U->val[Uhead] = v;
983 ++Uhead;
984 }
985 }
986 L->ptr[i + 1] = Lhead;
987 U->ptr[i + 1] = Uhead;
988
989 // Refresh work
990 for (ptrdiff_t j = row_beg; j < row_end; ++j)
991 work[A.col[j]] = NULL;
992 }
993
994 L->setNbNonZero(Lhead);
995 U->setNbNonZero(Uhead);
996
997 ilu = std::make_shared<ilu_solve>(L, U, D, prm.solve, bprm);
998 }
999
1001 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1002 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1003 {
1004 backend::residual(rhs, A, x, tmp);
1005 ilu->solve(tmp);
1006 backend::axpby(prm.damping, tmp, math::identity<scalar_type>(), x);
1007 }
1008
1010 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1011 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1012 {
1013 backend::residual(rhs, A, x, tmp);
1014 ilu->solve(tmp);
1015 backend::axpby(prm.damping, tmp, math::identity<scalar_type>(), x);
1016 }
1017
1019 template <class Matrix, class VectorRHS, class VectorX>
1020 void apply(const Matrix&, const VectorRHS& rhs, VectorX& x) const
1021 {
1022 backend::copy(rhs, x);
1023 ilu->solve(x);
1024 }
1025
1026 size_t bytes() const
1027 {
1028 return ilu->bytes();
1029 }
1030
1031 private:
1032
1033 std::shared_ptr<ilu_solve> ilu;
1034};
1035
1036/*---------------------------------------------------------------------------*/
1037/*---------------------------------------------------------------------------*/
1041template <class Backend>
1043: public RelaxationBase
1044{
1045 typedef typename Backend::value_type value_type;
1046 typedef typename Backend::col_type col_type;
1047 typedef typename Backend::ptr_type ptr_type;
1048 typedef typename Backend::matrix matrix;
1049 typedef typename Backend::matrix_diagonal matrix_diagonal;
1050 typedef typename Backend::vector vector;
1051
1052 typedef typename math::scalar_of<value_type>::type scalar_type;
1053
1054 typedef Impl::ILUSolver<Backend> ilu_solve;
1055
1057 struct params
1058 {
1060 int k;
1061
1063 scalar_type damping;
1064
1067
1068 params()
1069 : k(1)
1070 , damping(1)
1071 {}
1072
1073 params(const PropertyTree& p)
1074 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, k)
1075 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, damping)
1076 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, solve)
1077 {
1078 p.check_params({ "k", "damping", "solve" });
1079 }
1080
1081 void get(PropertyTree& p, const std::string& path) const
1082 {
1083 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, k);
1084 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, damping);
1085 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, solve);
1086 }
1087 } prm;
1088
1090 template <class Matrix>
1091 ILUKRelaxation(const Matrix& A, const params& prm, const typename Backend::params& bprm)
1092 : prm(prm)
1093 {
1094 typedef typename BuiltinBackend<value_type, col_type, ptr_type>::matrix build_matrix;
1095
1096 const size_t n = backend::nbRow(A);
1097
1098 size_t Anz = backend::nonzeros(A);
1099
1100 std::vector<ptrdiff_t> Lptr;
1101 Lptr.reserve(n + 1);
1102 Lptr.push_back(0);
1103 std::vector<ptrdiff_t> Lcol;
1104 Lcol.reserve(Anz / 3);
1105 std::vector<value_type> Lval;
1106 Lval.reserve(Anz / 3);
1107
1108 std::vector<ptrdiff_t> Uptr;
1109 Uptr.reserve(n + 1);
1110 Uptr.push_back(0);
1111 std::vector<ptrdiff_t> Ucol;
1112 Ucol.reserve(Anz / 3);
1113 std::vector<value_type> Uval;
1114 Uval.reserve(Anz / 3);
1115
1116 std::vector<int> Ulev;
1117 Ulev.reserve(Anz / 3);
1118
1119 auto D = std::make_shared<numa_vector<value_type>>(n, false);
1120
1121 sparse_vector w(n, prm.k);
1122
1123 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
1124 w.reset(i);
1125
1126 for (auto a = backend::row_begin(A, i); a; ++a) {
1127 w.add(a.col(), a.value(), 0);
1128 }
1129
1130 while (!w.q.empty()) {
1131 nonzero& a = w.next_nonzero();
1132 a.val = a.val * (*D)[a.col];
1133
1134 for (ptrdiff_t j = Uptr[a.col], e = Uptr[a.col + 1]; j < e; ++j) {
1135 int lev = std::max(a.lev, Ulev[j]) + 1;
1136 w.add(Ucol[j], -a.val * Uval[j], lev);
1137 }
1138 }
1139
1140 w.sort();
1141
1142 for (const nonzero& e : w.nz) {
1143 if (e.col < i) {
1144 Lcol.push_back(e.col);
1145 Lval.push_back(e.val);
1146 }
1147 else if (e.col == i) {
1148 (*D)[i] = math::inverse(e.val);
1149 }
1150 else {
1151 Ucol.push_back(e.col);
1152 Uval.push_back(e.val);
1153 Ulev.push_back(e.lev);
1154 }
1155 }
1156
1157 Lptr.push_back(Lcol.size());
1158 Uptr.push_back(Ucol.size());
1159 }
1160
1161 ilu = std::make_shared<ilu_solve>(
1162 std::make_shared<build_matrix>(n, n, Lptr, Lcol, Lval),
1163 std::make_shared<build_matrix>(n, n, Uptr, Ucol, Uval),
1164 D, prm.solve, bprm);
1165 }
1166
1168 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1169 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1170 {
1171 backend::residual(rhs, A, x, tmp);
1172 ilu->solve(tmp);
1173 backend::axpby(prm.damping, tmp, math::identity<scalar_type>(), x);
1174 }
1175
1177 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1178 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1179 {
1180 backend::residual(rhs, A, x, tmp);
1181 ilu->solve(tmp);
1182 backend::axpby(prm.damping, tmp, math::identity<scalar_type>(), x);
1183 }
1184
1185 template <class Matrix, class VectorRHS, class VectorX>
1186 void apply(const Matrix&, const VectorRHS& rhs, VectorX& x) const
1187 {
1188 backend::copy(rhs, x);
1189 ilu->solve(x);
1190 }
1191
1192 size_t bytes() const
1193 {
1194 return ilu->bytes();
1195 }
1196
1197 private:
1198
1199 std::shared_ptr<ilu_solve> ilu;
1200
1201 struct nonzero
1202 {
1203 ptrdiff_t col;
1204 value_type val;
1205 int lev;
1206
1207 nonzero()
1208 : col(-1)
1209 {}
1210
1211 nonzero(ptrdiff_t col, const value_type& val, int lev)
1212 : col(col)
1213 , val(val)
1214 , lev(lev)
1215 {}
1216
1217 friend bool operator<(const nonzero& a, const nonzero& b)
1218 {
1219 return a.col < b.col;
1220 }
1221 };
1222
1223 struct sparse_vector
1224 {
1225 struct comp_indices
1226 {
1227 const std::deque<nonzero>& nz;
1228
1229 comp_indices(const std::deque<nonzero>& nz)
1230 : nz(nz)
1231 {}
1232
1233 bool operator()(int a, int b) const
1234 {
1235 return nz[a].col > nz[b].col;
1236 }
1237 };
1238
1239 typedef std::priority_queue<int, std::vector<int>, comp_indices>
1240 priority_queue;
1241
1242 int lfil;
1243
1244 std::deque<nonzero> nz;
1245 std::vector<ptrdiff_t> idx;
1246 priority_queue q;
1247
1248 ptrdiff_t dia;
1249
1250 sparse_vector(size_t n, int lfil)
1251 : lfil(lfil)
1252 , idx(n, -1)
1253 , q(comp_indices(nz))
1254 , dia(0)
1255 {}
1256
1257 void add(ptrdiff_t col, const value_type& val, int lev)
1258 {
1259 if (idx[col] < 0) {
1260 if (lev <= lfil) {
1261 int p = nz.size();
1262 idx[col] = p;
1263 nz.push_back(nonzero(col, val, lev));
1264 if (col < dia)
1265 q.push(p);
1266 }
1267 }
1268 else {
1269 nonzero& a = nz[idx[col]];
1270 a.val += val;
1271 a.lev = std::min(a.lev, lev);
1272 }
1273 }
1274
1275 typename std::deque<nonzero>::iterator begin()
1276 {
1277 return nz.begin();
1278 }
1279
1280 typename std::deque<nonzero>::iterator end()
1281 {
1282 return nz.end();
1283 }
1284
1285 nonzero& next_nonzero()
1286 {
1287 int p = q.top();
1288 q.pop();
1289 return nz[p];
1290 }
1291
1292 void sort()
1293 {
1294 std::sort(nz.begin(), nz.end());
1295 }
1296
1297 void reset(ptrdiff_t d)
1298 {
1299 for (const nonzero& e : nz)
1300 idx[e.col] = -1;
1301 nz.clear();
1302 dia = d;
1303 }
1304 };
1305};
1306
1307/*---------------------------------------------------------------------------*/
1308/*---------------------------------------------------------------------------*/
1309
1310namespace detail
1311{
1312 template <class Matrix>
1313 std::shared_ptr<Matrix> symb_product(const Matrix& A, const Matrix& B)
1314 {
1315 auto C = std::make_shared<Matrix>();
1316
1317 C->set_size(A.nbRow(), B.ncols);
1318
1319 auto A_ptr = A.ptr.data();
1320 auto A_col = A.col.data();
1321 auto B_ptr = B.ptr.data();
1322 auto B_col = B.col.data();
1323 auto C_ptr = C->ptr.data();
1324 C_ptr[0] = 0;
1325
1326 arccoreParallelFor(0, A.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1327 std::vector<ptrdiff_t> marker(B.ncols, -1);
1328
1329 for (ptrdiff_t ia = begin; ia < (begin + size); ++ia) {
1330 ptrdiff_t C_cols = 0;
1331 for (ptrdiff_t ja = A_ptr[ia], ea = A_ptr[ia + 1]; ja < ea; ++ja) {
1332 ptrdiff_t ca = A_col[ja];
1333
1334 for (ptrdiff_t jb = B_ptr[ca], eb = B_ptr[ca + 1]; jb < eb; ++jb) {
1335 ptrdiff_t cb = B_col[jb];
1336 if (marker[cb] != ia) {
1337 marker[cb] = ia;
1338 ++C_cols;
1339 }
1340 }
1341 }
1342 C_ptr[ia + 1] = C_cols;
1343 }
1344 });
1345
1346 C->set_nonzeros(C->scan_row_sizes(), /*need_values = */ false);
1347 auto C_col = C->col.data();
1348
1349 arccoreParallelFor(0, A.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1350 std::vector<ptrdiff_t> marker(B.ncols, -1);
1351
1352 for (ptrdiff_t ia = begin; ia < (begin + size); ++ia) {
1353 ptrdiff_t row_beg = C_ptr[ia];
1354 ptrdiff_t row_end = row_beg;
1355
1356 for (ptrdiff_t ja = A_ptr[ia], ea = A_ptr[ia + 1]; ja < ea; ++ja) {
1357 ptrdiff_t ca = A_col[ja];
1358
1359 for (ptrdiff_t jb = B_ptr[ca], eb = B_ptr[ca + 1]; jb < eb; ++jb) {
1360 ptrdiff_t cb = B_col[jb];
1361
1362 if (marker[cb] < row_beg) {
1363 marker[cb] = row_end;
1364 C_col[row_end] = cb;
1365 ++row_end;
1366 }
1367 }
1368 }
1369
1370 std::sort(C_col + row_beg, C_col + row_end);
1371 }
1372 });
1373
1374 return C;
1375 }
1376
1377} // namespace detail
1378
1379/*---------------------------------------------------------------------------*/
1380/*---------------------------------------------------------------------------*/
1384template <class Backend>
1386: public RelaxationBase
1387{
1388 typedef typename Backend::value_type value_type;
1389
1390 typedef ILU0Relaxation<Backend> Base;
1391
1393 struct params : Base::params
1394 {
1395 typedef typename Base::params BasePrm;
1396
1398 int k;
1399
1400 params()
1401 : k(1)
1402 {}
1403
1404 params(const PropertyTree& p)
1405 : BasePrm(p)
1406 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, k)
1407 {
1408 p.check_params( { "k", "damping", "solve" });
1409 }
1410
1411 void get(PropertyTree& p, const std::string& path) const
1412 {
1413 BasePrm::get(p, path);
1414 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, k);
1415 }
1416 } prm;
1417
1419 template <class Matrix>
1420 ILUPRelaxation(const Matrix& A, const params& prm, const typename Backend::params& bprm)
1421 : prm(prm)
1422 {
1423 if (prm.k == 0) {
1424 base = std::make_shared<Base>(A, prm, bprm);
1425 }
1426 else {
1427 auto P = detail::symb_product(A, A);
1428 for (int k = 1; k < prm.k; ++k) {
1429 P = detail::symb_product(*P, A);
1430 }
1431
1432 ptrdiff_t n = backend::nbRow(A);
1433 P->val.resize(P->nbNonZero());
1434
1435 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1436 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1437 ptrdiff_t p_beg = P->ptr[i];
1438 ptrdiff_t p_end = P->ptr[i + 1];
1439 ptrdiff_t a_beg = A.ptr[i];
1440 ptrdiff_t a_end = A.ptr[i + 1];
1441
1442 std::fill(P->val + p_beg, P->val + p_end, math::zero<value_type>());
1443
1444 for (ptrdiff_t ja = a_beg, ea = a_end, jp = p_beg, ep = p_end; ja < ea; ++ja) {
1445 ptrdiff_t ca = A.col[ja];
1446 while (jp < ep && P->col[jp] < ca)
1447 ++jp;
1448 if (P->col[jp] == ca)
1449 P->val[jp] = A.val[ja];
1450 }
1451 }
1452 });
1453
1454 base = std::make_shared<Base>(*P, prm, bprm);
1455 }
1456 }
1457
1459 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1460 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1461 {
1462 base->apply_pre(A, rhs, x, tmp);
1463 }
1464
1466 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1467 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1468 {
1469 base->apply_post(A, rhs, x, tmp);
1470 }
1471
1472 template <class Matrix, class VectorRHS, class VectorX>
1473 void apply(const Matrix& A, const VectorRHS& rhs, VectorX& x) const
1474 {
1475 base->apply(A, rhs, x);
1476 }
1477
1478 size_t bytes() const
1479 {
1480 return base->bytes();
1481 }
1482
1483 private:
1484
1485 std::shared_ptr<Base> base;
1486};
1487
1488/*---------------------------------------------------------------------------*/
1489/*---------------------------------------------------------------------------*/
1498template <class Backend>
1500: public RelaxationBase
1501{
1502 typedef typename Backend::value_type value_type;
1503 typedef typename Backend::col_type col_type;
1504 typedef typename Backend::ptr_type ptr_type;
1505 typedef typename Backend::matrix matrix;
1506 typedef typename Backend::matrix_diagonal matrix_diagonal;
1507 typedef typename Backend::vector vector;
1508
1509 typedef typename math::scalar_of<value_type>::type scalar_type;
1510
1511 typedef Impl::ILUSolver<Backend> ilu_solve;
1512
1514 struct params
1515 {
1517 double p = 2.0;
1518
1520 double tau = 1.0e-2;
1521
1523 double damping = 1.0;
1524
1527
1528 params() = default;
1529
1530 params(const PropertyTree& p)
1531 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, p)
1532 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, tau)
1533 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, damping)
1534 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, solve)
1535 {
1536 p.check_params( { "p", "tau", "damping", "solve" });
1537 }
1538
1539 void get(PropertyTree& p, const std::string& path) const
1540 {
1541 double p2 = this->p;
1542 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, p2);
1543 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, tau);
1544 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, damping);
1545 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, solve);
1546 }
1547 } prm;
1548
1550 template <class Matrix>
1551 ILUTRelaxation(const Matrix& A, const params& prm, const typename Backend::params& bprm)
1552 : prm(prm)
1553 {
1554 const size_t n = backend::nbRow(A);
1555
1556 size_t Lnz = 0, Unz = 0;
1557
1558 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
1559 ptrdiff_t row_beg = A.ptr[i];
1560 ptrdiff_t row_end = A.ptr[i + 1];
1561
1562 int lenL = 0, lenU = 0;
1563 for (ptrdiff_t j = row_beg; j < row_end; ++j) {
1564 ptrdiff_t c = A.col[j];
1565 if (c < i)
1566 ++lenL;
1567 else if (c > i)
1568 ++lenU;
1569 }
1570
1571 Lnz += static_cast<size_t>(lenL * prm.p);
1572 Unz += static_cast<size_t>(lenU * prm.p);
1573 }
1574
1575 auto L = std::make_shared<build_matrix>();
1576 auto U = std::make_shared<build_matrix>();
1577
1578 L->set_size(n, n);
1579 L->set_nonzeros(Lnz);
1580 L->ptr[0] = 0;
1581 U->set_size(n, n);
1582 U->set_nonzeros(Unz);
1583 U->ptr[0] = 0;
1584
1585 auto D = std::make_shared<numa_vector<value_type>>(n, false);
1586
1587 sparse_vector w(n);
1588
1589 for (ptrdiff_t i = 0, Lhead = 0, Uhead = 0; i < static_cast<ptrdiff_t>(n); ++i) {
1590 w.dia = i;
1591
1592 int lenL = 0;
1593 int lenU = 0;
1594
1595 scalar_type tol = math::zero<scalar_type>();
1596
1597 for (auto a = backend::row_begin(A, i); a; ++a) {
1598 w[a.col()] = a.value();
1599 tol += math::norm(a.value());
1600
1601 if (a.col() < i)
1602 ++lenL;
1603 if (a.col() > i)
1604 ++lenU;
1605 }
1606 tol *= prm.tau / (lenL + lenU);
1607
1608 while (!w.q.empty()) {
1609 ptrdiff_t k = w.next_nonzero();
1610 w[k] = w[k] * (*D)[k];
1611 value_type wk = w[k];
1612
1613 if (math::norm(wk) > tol) {
1614 for (ptrdiff_t j = U->ptr[k]; j < static_cast<ptrdiff_t>(U->ptr[k + 1]); ++j)
1615 w[U->col[j]] -= wk * U->val[j];
1616 }
1617 }
1618
1619 w.move_to(
1620 static_cast<int>(lenL * prm.p),
1621 static_cast<int>(lenU * prm.p),
1622 tol, Lhead, *L, Uhead, *U, *D);
1623
1624 L->ptr[i + 1] = Lhead;
1625 U->ptr[i + 1] = Uhead;
1626 }
1627
1628 L->setNbNonZero(L->ptr[n]);
1629 U->setNbNonZero(U->ptr[n]);
1630
1631 ilu = std::make_shared<ilu_solve>(L, U, D, prm.solve, bprm);
1632 }
1633
1635 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1636 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1637 {
1638 backend::residual(rhs, A, x, tmp);
1639 ilu->solve(tmp);
1640 backend::axpby(prm.damping, tmp, math::identity<scalar_type>(), x);
1641 }
1642
1644 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1645 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1646 {
1647 backend::residual(rhs, A, x, tmp);
1648 ilu->solve(tmp);
1649 backend::axpby(prm.damping, tmp, math::identity<scalar_type>(), x);
1650 }
1651
1652 template <class Matrix, class VectorRHS, class VectorX>
1653 void apply(const Matrix&, const VectorRHS& rhs, VectorX& x) const
1654 {
1655 backend::copy(rhs, x);
1656 ilu->solve(x);
1657 }
1658
1659 size_t bytes() const
1660 {
1661 return ilu->bytes();
1662 }
1663
1664 private:
1665
1666 typedef typename BuiltinBackend<value_type, col_type, ptr_type>::matrix build_matrix;
1667 std::shared_ptr<ilu_solve> ilu;
1668
1669 struct sparse_vector
1670 {
1671 struct nonzero
1672 {
1673 ptrdiff_t col;
1674 value_type val;
1675
1676 nonzero()
1677 : col(-1)
1678 {}
1679
1680 nonzero(ptrdiff_t col, const value_type& val = math::zero<value_type>())
1681 : col(col)
1682 , val(val)
1683 {}
1684 };
1685
1686 struct comp_indices
1687 {
1688 const std::vector<nonzero>& nz;
1689
1690 comp_indices(const std::vector<nonzero>& nz)
1691 : nz(nz)
1692 {}
1693
1694 bool operator()(int a, int b) const
1695 {
1696 return nz[a].col > nz[b].col;
1697 }
1698 };
1699
1700 typedef std::priority_queue<int, std::vector<int>, comp_indices> priority_queue;
1701
1702 std::vector<nonzero> nz;
1703 std::vector<ptrdiff_t> idx;
1704 priority_queue q;
1705
1706 ptrdiff_t dia;
1707
1708 sparse_vector(size_t n)
1709 : idx(n, -1)
1710 , q(comp_indices(nz))
1711 , dia(0)
1712 {
1713 nz.reserve(16);
1714 }
1715
1716 value_type operator[](ptrdiff_t i) const
1717 {
1718 if (idx[i] >= 0)
1719 return nz[idx[i]].val;
1720 return math::zero<value_type>();
1721 }
1722
1723 value_type& operator[](ptrdiff_t i)
1724 {
1725 if (idx[i] == -1) {
1726 int p = nz.size();
1727 idx[i] = p;
1728 nz.push_back(nonzero(i));
1729 if (i < dia)
1730 q.push(p);
1731 }
1732 return nz[idx[i]].val;
1733 }
1734
1735 typename std::vector<nonzero>::iterator begin()
1736 {
1737 return nz.begin();
1738 }
1739
1740 typename std::vector<nonzero>::iterator end()
1741 {
1742 return nz.end();
1743 }
1744
1745 ptrdiff_t next_nonzero()
1746 {
1747 int p = q.top();
1748 q.pop();
1749 return nz[p].col;
1750 }
1751
1752 struct higher_than
1753 {
1754 scalar_type tol;
1755 ptrdiff_t dia;
1756
1757 higher_than(scalar_type tol, ptrdiff_t dia)
1758 : tol(tol)
1759 , dia(dia)
1760 {}
1761
1762 bool operator()(const nonzero& v) const
1763 {
1764 return v.col == dia || math::norm(v.val) > tol;
1765 }
1766 };
1767
1768 struct L_first
1769 {
1770 ptrdiff_t dia;
1771
1772 L_first(ptrdiff_t dia)
1773 : dia(dia)
1774 {}
1775
1776 bool operator()(const nonzero& v) const
1777 {
1778 return v.col < dia;
1779 }
1780 };
1781
1782 struct by_abs_val
1783 {
1784 ptrdiff_t dia;
1785
1786 by_abs_val(ptrdiff_t dia)
1787 : dia(dia)
1788 {}
1789
1790 bool operator()(const nonzero& a, const nonzero& b) const
1791 {
1792 if (a.col == dia)
1793 return true;
1794 if (b.col == dia)
1795 return false;
1796
1797 return math::norm(a.val) > math::norm(b.val);
1798 }
1799 };
1800
1801 struct by_col
1802 {
1803 bool operator()(const nonzero& a, const nonzero& b) const
1804 {
1805 return a.col < b.col;
1806 }
1807 };
1808
1809 void move_to(int lp, int up, scalar_type tol,
1810 ptrdiff_t& Lhead, build_matrix& L,
1811 ptrdiff_t& Uhead, build_matrix& U,
1813 {
1814 typedef typename std::vector<nonzero>::iterator ptr;
1815
1816 ptr b = nz.begin();
1817 ptr e = nz.end();
1818
1819 // Move zeros to back:
1820 e = std::partition(b, e, higher_than(tol, dia));
1821
1822 // Split L and U:
1823 ptr m = std::partition(b, e, L_first(dia));
1824
1825 // Get largest p elements in L and U.
1826 ptr lend = std::min(b + lp, m);
1827 ptr uend = std::min(m + up, e);
1828
1829 if (lend != m)
1830 std::nth_element(b, lend, m, by_abs_val(dia));
1831 if (uend != e)
1832 std::nth_element(m, uend, e, by_abs_val(dia));
1833
1834 // Sort entries by column number
1835 std::sort(b, lend, by_col());
1836 std::sort(m, uend, by_col());
1837
1838 // copy L to the output matrix.
1839 for (ptr a = b; a != lend; ++a) {
1840 L.col[Lhead] = a->col;
1841 L.val[Lhead] = a->val;
1842
1843 ++Lhead;
1844 }
1845
1846 // Store inverted diagonal.
1847 D[dia] = math::inverse(m->val);
1848
1849 if (m != uend) {
1850 ++m;
1851
1852 // copy U to the output matrix.
1853 for (ptr a = m; a != uend; ++a) {
1854 U.col[Uhead] = a->col;
1855 U.val[Uhead] = a->val;
1856
1857 ++Uhead;
1858 }
1859 }
1860
1861 for (const nonzero& e : nz)
1862 idx[e.col] = -1;
1863 nz.clear();
1864 }
1865 };
1866};
1867
1868/*---------------------------------------------------------------------------*/
1869/*---------------------------------------------------------------------------*/
1879template <class Backend>
1881: public RelaxationBase
1882{
1883 typedef typename Backend::value_type value_type;
1884 typedef typename Backend::matrix_diagonal matrix_diagonal;
1885
1886 typedef typename math::scalar_of<value_type>::type scalar_type;
1889
1891 template <class Matrix>
1892 SPAI0Relaxation(const Matrix& A, const params&, const typename Backend::params& backend_prm)
1893 {
1894 const size_t n = backend::nbRow(A);
1895
1896 auto m = std::make_shared<numa_vector<value_type>>(n, false);
1897
1898 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1899 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1900 value_type num = math::zero<value_type>();
1901 scalar_type den = math::zero<scalar_type>();
1902
1903 for (auto a = backend::row_begin(A, i); a; ++a) {
1904 value_type v = a.value();
1905 scalar_type norm_v = math::norm(v);
1906 den += norm_v * norm_v;
1907 if (a.col() == i)
1908 num += v;
1909 }
1910
1911 (*m)[i] = math::inverse(den) * num;
1912 }
1913 });
1914
1915 M = Backend::copy_vector(m, backend_prm);
1916 }
1917
1919 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1920 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1921 {
1922 static const scalar_type one = math::identity<scalar_type>();
1923 backend::residual(rhs, A, x, tmp);
1924 backend::vmul(one, *M, tmp, one, x);
1925 }
1926
1928 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
1929 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
1930 {
1931 static const scalar_type one = math::identity<scalar_type>();
1932 backend::residual(rhs, A, x, tmp);
1933 backend::vmul(one, *M, tmp, one, x);
1934 }
1935
1936 template <class Matrix, class VectorRHS, class VectorX>
1937 void apply(const Matrix&, const VectorRHS& rhs, VectorX& x) const
1938 {
1939 backend::vmul(math::identity<scalar_type>(), *M, rhs, math::zero<scalar_type>(), x);
1940 }
1941
1942 size_t bytes() const
1943 {
1944 return backend::bytes(*M);
1945 }
1946
1947 std::shared_ptr<matrix_diagonal> M;
1948};
1949
1950/*---------------------------------------------------------------------------*/
1951/*---------------------------------------------------------------------------*/
1961template <class Backend>
1963: public RelaxationBase
1964{
1965 typedef typename Backend::value_type value_type;
1966 typedef typename Backend::vector vector;
1967
1968 typedef typename math::scalar_of<value_type>::type scalar_type;
1969
1972
1974 template <class Matrix>
1975 SPAI1Relaxation(const Matrix& A, const params&, const typename Backend::params& backend_prm)
1976 {
1977 typedef typename backend::value_type<Matrix>::type value_type;
1978
1979 const size_t n = backend::nbRow(A);
1980 const size_t m = backend::nbColumn(A);
1981
1982 auto Ainv = std::make_shared<Matrix>(A);
1983
1984 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1985 std::vector<ptrdiff_t> marker(m, -1);
1986 std::vector<ptrdiff_t> I, J;
1987 std::vector<value_type> B, ek;
1989
1990 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1991 ptrdiff_t row_beg = A.ptr[i];
1992 ptrdiff_t row_end = A.ptr[i + 1];
1993
1994 I.assign(A.col + row_beg, A.col + row_end);
1995
1996 J.clear();
1997 for (ptrdiff_t j = row_beg; j < row_end; ++j) {
1998 ptrdiff_t c = A.col[j];
1999
2000 for (ptrdiff_t jj = A.ptr[c], ee = A.ptr[c + 1]; jj < ee; ++jj) {
2001 ptrdiff_t cc = A.col[jj];
2002 if (marker[cc] < 0) {
2003 marker[cc] = 1;
2004 J.push_back(cc);
2005 }
2006 }
2007 }
2008 std::sort(J.begin(), J.end());
2009 B.assign(I.size() * J.size(), math::zero<value_type>());
2010 ek.assign(J.size(), math::zero<value_type>());
2011 for (size_t j = 0; j < J.size(); ++j) {
2012 marker[J[j]] = j;
2013 if (J[j] == static_cast<ptrdiff_t>(i))
2014 ek[j] = math::identity<value_type>();
2015 }
2016
2017 for (ptrdiff_t j = row_beg; j < row_end; ++j) {
2018 ptrdiff_t c = A.col[j];
2019
2020 for (auto a = backend::row_begin(A, c); a; ++a)
2021 B[marker[a.col()] + J.size() * (j - row_beg)] = a.value();
2022 }
2023
2024 qr.solve(J.size(), I.size(), &B[0], &ek[0], &Ainv->val[row_beg],
2025 Alina::detail::col_major);
2026
2027 for (size_t j = 0; j < J.size(); ++j)
2028 marker[J[j]] = -1;
2029 }
2030 });
2031
2032 M = Backend::copy_matrix(Ainv, backend_prm);
2033 }
2034
2036 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
2037 void apply_pre(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
2038 {
2039 backend::residual(rhs, A, x, tmp);
2040 backend::spmv(math::identity<scalar_type>(), *M, tmp, math::identity<scalar_type>(), x);
2041 }
2042
2044 template <class Matrix, class VectorRHS, class VectorX, class VectorTMP>
2045 void apply_post(const Matrix& A, const VectorRHS& rhs, VectorX& x, VectorTMP& tmp) const
2046 {
2047 backend::residual(rhs, A, x, tmp);
2048 backend::spmv(math::identity<scalar_type>(), *M, tmp, math::identity<scalar_type>(), x);
2049 }
2050
2051 template <class Matrix, class VectorRHS, class VectorX>
2052 void apply(const Matrix&, const VectorRHS& rhs, VectorX& x) const
2053 {
2054 backend::spmv(math::identity<scalar_type>(), *M, rhs, math::zero<scalar_type>(), x);
2055 }
2056
2057 size_t bytes() const
2058 {
2059 return backend::bytes(*M);
2060 }
2061
2062 std::shared_ptr<typename Backend::matrix> M;
2063};
2064
2065/*---------------------------------------------------------------------------*/
2066/*---------------------------------------------------------------------------*/
2067
2068} // namespace Arcane::Alina
2069
2070/*---------------------------------------------------------------------------*/
2071/*---------------------------------------------------------------------------*/
2072
2073namespace Arcane::Alina::backend
2074{
2075
2076template <class Backend>
2078 typename std::enable_if<(Alina::math::static_rows<typename Backend::value_type>::value > 1)>::type> : std::false_type
2079{};
2080
2081template <class Backend>
2083 typename std::enable_if<
2084 !Backend::provides_row_iterator::value>::type> : std::false_type
2085{};
2086
2087} // namespace Arcane::Alina::backend
2088
2089/*---------------------------------------------------------------------------*/
2090/*---------------------------------------------------------------------------*/
2091
2092#endif
2093
2094/*---------------------------------------------------------------------------*/
2095/*---------------------------------------------------------------------------*/
size_t bytes() const
Memory used in bytes.
Definition Relaxation.h:350
ChebyshevRelaxation(const Matrix &A, const params &prm, const typename Backend::params &backend_prm)
Constructs smoother for the system matrix.
Definition Relaxation.h:303
Solver for sparse triangular systems obtained as a result of an incomplete LU factorization.
Base class for solvers.
Class to handle empty parameters list.
Definition AlinaUtils.h:90
NUMA-aware vector container.
Definition NumaVector.h:42
iterator begin()
Itérateur sur le premier élément du tableau.
Informations de base pour la gestion du multi-threading.
Informations d'exécution d'une boucle.
Matrix class, to be used by user.
Vecteur 1D de données avec sémantique par valeur (style STL).
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
Alina::detail::empty_params params
Sparse matrix stored in CSR (Compressed Sparse Row) format.
Definition CSRMatrix.h:98
double lower
Lowest-to-highest eigen value ratio.
Definition Relaxation.h:269
double higher
highest eigen value safety upscaling.
Definition Relaxation.h:266
Int32 degree
Chebyshev polynomial degree.
Definition Relaxation.h:258
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply post-relaxation.
Definition Relaxation.h:477
DampedJacobiRelaxation(const Matrix &A, const params &prm, const typename Backend::params &backend_prm)
Constructs smoother for the system matrix.
Definition Relaxation.h:446
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply pre-relaxation.
Definition Relaxation.h:462
size_t bytes() const
Memory used in bytes.
Definition Relaxation.h:489
bool serial
Use serial version of the algorithm.
Definition Relaxation.h:515
Gauss-Seidel relaxation.
Definition Relaxation.h:510
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &) const
Apply pre-relaxation.
Definition Relaxation.h:548
size_t bytes() const
Memory used in bytes.
Definition Relaxation.h:580
GaussSeidelRelaxation(const Matrix &A, const params &prm, const typename Backend::params &)
Constructs smoother for the system matrix.
Definition Relaxation.h:537
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &) const
Apply post-relaxation.
Definition Relaxation.h:558
ilu_solve::params solve
Parameters for sparse triangular system solver.
Definition Relaxation.h:852
scalar_type damping
Damping factor.
Definition Relaxation.h:849
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply pre-relaxation.
void apply(const Matrix &, const VectorRHS &rhs, VectorX &x) const
Apply post-relaxation.
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply post-relaxation.
ILU0Relaxation(const Matrix &A, const params &prm, const typename Backend::params &bprm)
Constructs smoother for the system matrix.
Definition Relaxation.h:874
size_t bytes() const
Memory used in bytes.
ilu_solve::params solve
Parameters for sparse triangular system solver.
scalar_type damping
Damping factor.
size_t bytes() const
Memory used in bytes.
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply post-relaxation.
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply pre-relaxation.
ILUKRelaxation(const Matrix &A, const params &prm, const typename Backend::params &bprm)
Constructs smoother for the system matrix.
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply pre-relaxation.
ILUPRelaxation(const Matrix &A, const params &prm, const typename Backend::params &bprm)
Constructs smoother for the system matrix.
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply post-relaxation.
size_t bytes() const
Memory used in bytes.
double tau
Minimum magnitude of non-zero elements relative to the current row norm.
ilu_solve::params solve
Parameters for sparse triangular system solver.
ILUTRelaxation(const Matrix &A, const params &prm, const typename Backend::params &bprm)
Constructs smoother for the system matrix.
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply pre-relaxation.
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply post-relaxation.
size_t bytes() const
Memory used in bytes.
Converts input matrix to block format before constructing an AMG smoother.
Definition Relaxation.h:53
Alina::detail::empty_params params
Relaxation parameters.
size_t bytes() const
Memory used in bytes.
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply pre-relaxation.
SPAI0Relaxation(const Matrix &A, const params &, const typename Backend::params &backend_prm)
Constructs smoother for the system matrix.
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply post-relaxation.
Sparse approximate interface smoother.
Alina::detail::empty_params params
Relaxation parameters.
SPAI1Relaxation(const Matrix &A, const params &, const typename Backend::params &backend_prm)
Constructs smoother for the system matrix.
size_t bytes() const
Memory used in bytes.
void apply_post(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply post-relaxation.
void apply_pre(const Matrix &A, const VectorRHS &rhs, VectorX &x, VectorTMP &tmp) const
Apply pre-relaxation.
Is the relaxation supported by the backend?
Number of rows for statically sized matrix types.