Arcane  4.1.12.0
Developer documentation
Loading...
Searching...
No Matches
Coarsening.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* Coarsening.h (C) 2000-2026 */
9/* */
10/* Coarsening strategies for AMG hierarchy construction. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_COARSENING_H
13#define ARCCORE_ALINA_COARSENING_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26#include <tuple>
27#include <memory>
28#include <vector>
29#include <numeric>
30#include <limits>
31#include <algorithm>
32#include <cmath>
33
34#include "arccore/alina/BuiltinBackend.h"
35#include "arccore/alina/AlinaUtils.h"
36#include "arccore/alina/QRFactorizationImpl.h"
37#include "arccore/alina/Adapters.h"
38#include "arccore/alina/ValueTypeInterface.h"
39
40#include <mutex>
41
42/*---------------------------------------------------------------------------*/
43/*---------------------------------------------------------------------------*/
44
45namespace Arcane::Alina::detail
46{
47
48/*---------------------------------------------------------------------------*/
49/*---------------------------------------------------------------------------*/
50
54template <class Matrix>
55std::shared_ptr<Matrix> galerkin(const Matrix& A, const Matrix& P, const Matrix& R)
56{
57 return product(R, *product(A, P));
58}
59
60/*---------------------------------------------------------------------------*/
61/*---------------------------------------------------------------------------*/
65template <class Matrix>
66std::shared_ptr<Matrix> scaled_galerkin(const Matrix& A, const Matrix& P, const Matrix& R, float s)
67{
68 auto a = galerkin(A, P, R);
69 scale(*a, s);
70 return a;
71}
72
73/*---------------------------------------------------------------------------*/
74/*---------------------------------------------------------------------------*/
75
76} // namespace Arcane::Alina::detail
77
78/*---------------------------------------------------------------------------*/
79/*---------------------------------------------------------------------------*/
80
81namespace Arcane::Alina
82{
83
84/*---------------------------------------------------------------------------*/
85/*---------------------------------------------------------------------------*/
86
87struct nullspace_params
88{
90 int cols;
91
93
97 std::vector<double> B;
98
99 nullspace_params()
100 : cols(0)
101 {}
102
104 : cols(p.get("cols", nullspace_params().cols))
105 {
106 double* b = 0;
107 b = p.get("B", b);
108
109 if (b) {
110 Int32 rows = 0;
111 rows = p.get("rows", rows);
112
113 precondition(cols > 0, "Error in nullspace parameters: "
114 "B is set, but cols is not");
115
116 precondition(rows > 0, "Error in nullspace parameters: "
117 "B is set, but rows is not");
118
119 B.assign(b, b + rows * cols);
120 }
121 else {
122 precondition(cols == 0, "Error in nullspace parameters: "
123 "cols > 0, but B is empty");
124 }
125
126 p.check_params( { "cols", "rows", "B" });
127 }
128
129 void get(PropertyTree&, const std::string&) const {}
130};
131
132/*---------------------------------------------------------------------------*/
133/*---------------------------------------------------------------------------*/
150{
152 struct params
153 {
155
162
163 params()
164 : eps_strong(0.08f)
165 {}
166
167 params(const PropertyTree& p)
168 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, eps_strong)
169 {
170 p.check_params( { "eps_strong", "block_size" });
171 }
172
173 void get(PropertyTree& p, const std::string& path) const
174 {
175 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, eps_strong);
176 }
177 };
178
179 static const ptrdiff_t undefined = -1;
180 static const ptrdiff_t removed = -2;
181
183 size_t count;
184
191 std::vector<char> strong_connection;
192
199 std::vector<ptrdiff_t> id;
200
207 template <class Matrix>
208 plain_aggregates(const Matrix& A, const params& prm)
209 : count(0)
210 , strong_connection(backend::nonzeros(A))
211 , id(backend::nbRow(A))
212 {
213 typedef typename backend::value_type<Matrix>::type value_type;
214 typedef typename math::scalar_of<value_type>::type scalar_type;
215
216 scalar_type eps_squared = prm.eps_strong * prm.eps_strong;
217
218 const size_t n = backend::nbRow(A);
219
220 /* 1. Get strong connections */
221 auto dia = diagonal(A);
222 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
223 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
224 value_type eps_dia_i = eps_squared * (*dia)[i];
225
226 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
227 ptrdiff_t c = A.col[j];
228 value_type v = A.val[j];
229
230 strong_connection[j] = (c != i) && (eps_dia_i * (*dia)[c] < v * v);
231 }
232 }
233 });
234
235 /* 2. Get aggregate ids */
236
237 // Remove lonely nodes.
238 size_t max_neib = 0;
239 for (size_t i = 0; i < n; ++i) {
240 ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1];
241 max_neib = std::max<size_t>(max_neib, e - j);
242
243 ptrdiff_t state = removed;
244 for (; j < e; ++j)
245 if (strong_connection[j]) {
246 state = undefined;
247 break;
248 }
249
250 id[i] = state;
251 }
252
253 std::vector<ptrdiff_t> neib;
254 neib.reserve(max_neib);
255
256 // Perform plain aggregation
257 for (size_t i = 0; i < n; ++i) {
258 if (id[i] != undefined)
259 continue;
260
261 // The point is not adjacent to a core of any previous aggregate:
262 // so its a seed of a new aggregate.
263 ptrdiff_t cur_id = static_cast<ptrdiff_t>(count++);
264 id[i] = cur_id;
265
266 // (*) Include its neighbors as well.
267 neib.clear();
268 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
269 ptrdiff_t c = A.col[j];
270 if (strong_connection[j] && id[c] != removed) {
271 id[c] = cur_id;
272 neib.push_back(c);
273 }
274 }
275
276 // Temporarily mark undefined points adjacent to the new aggregate
277 // as members of the aggregate.
278 // If nobody claims them later, they will stay here.
279 for (ptrdiff_t c : neib) {
280 for (ptrdiff_t j = A.ptr[c], e = A.ptr[c + 1]; j < e; ++j) {
281 ptrdiff_t cc = A.col[j];
282 if (strong_connection[j] && id[cc] == undefined)
283 id[cc] = cur_id;
284 }
285 }
286 }
287
288 if (!count)
289 throw error::empty_level();
290
291 // Some of the aggregates could potentially vanish during expansion
292 // step (*) above. We need to exclude those and renumber the rest.
293 std::vector<ptrdiff_t> cnt(count, 0);
294 for (ptrdiff_t i : id)
295 if (i >= 0)
296 cnt[i] = 1;
297 std::partial_sum(cnt.begin(), cnt.end(), cnt.begin());
298
299 if (static_cast<ptrdiff_t>(count) > cnt.back()) {
300 count = cnt.back();
301
302 for (size_t i = 0; i < n; ++i)
303 if (id[i] >= 0)
304 id[i] = cnt[id[i]] - 1;
305 }
306 }
307};
308
309/*---------------------------------------------------------------------------*/
310/*---------------------------------------------------------------------------*/
311
312namespace detail
313{
314 struct skip_negative
315 {
316 const std::vector<ptrdiff_t>& key;
317 int block_size;
318
319 skip_negative(const std::vector<ptrdiff_t>& key, int block_size)
320 : key(key)
321 , block_size(block_size)
322 {}
323
324 bool operator()(ptrdiff_t i, ptrdiff_t j) const
325 {
326 // Cast to unsigned type to keep negative values at the end
327 return static_cast<size_t>(key[i]) / block_size <
328 static_cast<size_t>(key[j]) / block_size;
329 }
330 };
331} // namespace detail
332
333/*---------------------------------------------------------------------------*/
334/*---------------------------------------------------------------------------*/
343template <class Matrix>
344std::shared_ptr<Matrix>
345tentative_prolongation(size_t n,
346 size_t naggr,
347 const std::vector<ptrdiff_t> aggr,
348 nullspace_params& nullspace,
349 int block_size)
350{
351 typedef typename backend::value_type<Matrix>::type value_type;
352 typedef typename backend::col_type<Matrix>::type col_type;
353
354 auto P = std::make_shared<Matrix>();
355
356 ARCCORE_ALINA_TIC("tentative");
357 if (nullspace.cols > 0) {
358 ptrdiff_t nba = naggr / block_size;
359
360 // Sort fine points by aggregate number.
361 // Put points not belonging to any aggregate to the end of the list.
362 std::vector<ptrdiff_t> order(n);
363 for (size_t i = 0; i < n; ++i)
364 order[i] = i;
365 std::stable_sort(order.begin(), order.end(), detail::skip_negative(aggr, block_size));
366 std::vector<ptrdiff_t> aggr_ptr(nba + 1, 0);
367 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
368 ptrdiff_t a = aggr[order[i]];
369 if (a < 0)
370 break;
371 ++aggr_ptr[a / block_size + 1];
372 }
373 std::partial_sum(aggr_ptr.begin(), aggr_ptr.end(), aggr_ptr.begin());
374
375 // Precompute the shape of the prolongation operator.
376 // Each row contains exactly nullspace.cols non-zero entries.
377 // Rows that do not belong to any aggregate are empty.
378 P->set_size(n, nullspace.cols * nba);
379 P->ptr[0] = 0;
380
381 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
382 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
383 P->ptr[i + 1] = aggr[i] < 0 ? 0 : nullspace.cols;
384 }
385 });
386
387 P->scan_row_sizes();
388 P->set_nonzeros();
389
390 // Compute the tentative prolongation operator and null-space vectors
391 // for the coarser level.
392 std::vector<double> Bnew;
393 Bnew.resize(nba * nullspace.cols * nullspace.cols);
394
395 arccoreParallelFor(0, nba, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
396 Alina::detail::QRFactorization<double> qr;
397 std::vector<double> Bpart;
398
399 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
400 auto aggr_beg = aggr_ptr[i];
401 auto aggr_end = aggr_ptr[i + 1];
402 auto d = aggr_end - aggr_beg;
403
404 Bpart.resize(d * nullspace.cols);
405
406 for (ptrdiff_t j = aggr_beg, jj = 0; j < aggr_end; ++j, ++jj) {
407 ptrdiff_t ib = nullspace.cols * order[j];
408 for (int k = 0; k < nullspace.cols; ++k)
409 Bpart[jj + d * k] = nullspace.B[ib + k];
410 }
411
412 qr.factorize(d, nullspace.cols, &Bpart[0], Alina::detail::col_major);
413
414 for (int ii = 0, kk = 0; ii < nullspace.cols; ++ii)
415 for (int jj = 0; jj < nullspace.cols; ++jj, ++kk)
416 Bnew[i * nullspace.cols * nullspace.cols + kk] = qr.R(ii, jj);
417
418 for (ptrdiff_t j = aggr_beg, ii = 0; j < aggr_end; ++j, ++ii) {
419 col_type* c = &P->col[P->ptr[order[j]]];
420 value_type* v = &P->val[P->ptr[order[j]]];
421
422 for (int jj = 0; jj < nullspace.cols; ++jj) {
423 c[jj] = i * nullspace.cols + jj;
424 // TODO: this is just a workaround to make non-scalar value
425 // types compile. Most probably this won't actually work.
426 v[jj] = qr.Q(ii, jj) * math::identity<value_type>();
427 }
428 }
429 }
430 });
431
432 std::swap(nullspace.B, Bnew);
433 }
434 else {
435 P->set_size(n, naggr);
436 P->ptr[0] = 0;
437 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
438 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
439 P->ptr[i + 1] = (aggr[i] >= 0);
440 }
441 });
442
443 P->set_nonzeros(P->scan_row_sizes());
444
445 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
446 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
447 if (aggr[i] >= 0) {
448 P->col[P->ptr[i]] = aggr[i];
449 P->val[P->ptr[i]] = math::identity<value_type>();
450 }
451 }
452 });
453 }
454 ARCCORE_ALINA_TOC("tentative");
455
456 return P;
457}
458
459/*---------------------------------------------------------------------------*/
460/*---------------------------------------------------------------------------*/
470{
471 public:
472
475 {
483
484 params() = default;
485
486 params(const PropertyTree& p)
487 : plain_aggregates::params(p)
488 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, block_size)
489 {
490 p.check_params( { "eps_strong", "block_size" });
491 }
492
493 void get(Alina::PropertyTree& p, const std::string& path) const
494 {
495 plain_aggregates::params::get(p, path);
496 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, block_size);
497 }
498 };
499
500 static const ptrdiff_t undefined = -1;
501 static const ptrdiff_t removed = -2;
502
504 size_t count;
505
507 std::vector<char> strong_connection;
508
510 std::vector<ptrdiff_t> id;
511
513 template <class Matrix>
514 pointwise_aggregates(const Matrix& A, const params& prm, unsigned min_aggregate)
515 : count(0)
516 {
517 if (prm.block_size == 1) {
518 plain_aggregates aggr(A, prm);
519
520 remove_small_aggregates(A.nbRow(), 1, min_aggregate, aggr);
521
522 count = aggr.count;
523 strong_connection.swap(aggr.strong_connection);
524 id.swap(aggr.id);
525 }
526 else {
527 strong_connection.resize(backend::nonzeros(A));
528 id.resize(backend::nbRow(A));
529
530 auto ap = pointwise_matrix(A, prm.block_size);
531 auto& Ap = *ap;
532
533 plain_aggregates pw_aggr(Ap, prm);
534
535 remove_small_aggregates(Ap.nbRow(), prm.block_size, min_aggregate, pw_aggr);
536
537 count = pw_aggr.count * prm.block_size;
538
539 arccoreParallelFor(0, Ap.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
540 std::vector<ptrdiff_t> j(prm.block_size);
541 std::vector<ptrdiff_t> e(prm.block_size);
542
543 for (ptrdiff_t ip = begin; ip < (begin + size); ++ip) {
544 ptrdiff_t ia = ip * prm.block_size;
545
546 for (unsigned k = 0; k < prm.block_size; ++k, ++ia) {
547 id[ia] = prm.block_size * pw_aggr.id[ip] + k;
548
549 j[k] = A.ptr[ia];
550 e[k] = A.ptr[ia + 1];
551 }
552
553 for (ptrdiff_t jp = Ap.ptr[ip], ep = Ap.ptr[ip + 1]; jp < ep; ++jp) {
554 ptrdiff_t cp = Ap.col[jp];
555 bool sp = (cp == ip) || pw_aggr.strong_connection[jp];
556
557 ptrdiff_t col_end = (cp + 1) * prm.block_size;
558
559 for (unsigned k = 0; k < prm.block_size; ++k) {
560 ptrdiff_t beg = j[k];
561 ptrdiff_t end = e[k];
562
563 while (beg < end && A.col[beg] < col_end) {
564 strong_connection[beg] = sp && A.col[beg] != (ia + k);
565 ++beg;
566 }
567
568 j[k] = beg;
569 }
570 }
571 }
572 });
573 }
574 }
575
576 static void remove_small_aggregates(size_t n, unsigned block_size, unsigned min_aggregate,
577 plain_aggregates& aggr)
578 {
579 if (min_aggregate <= 1)
580 return; // nothing to do
581
582 // Count entries in each of the aggregates
583 std::vector<ptrdiff_t> count(aggr.count, 0);
584
585 for (size_t i = 0; i < n; ++i) {
586 ptrdiff_t id = aggr.id[i];
587 if (id != removed)
588 ++count[id];
589 }
590
591 // If any aggregate has less entries than required, remove it.
592 // Renumber the rest of the aggregates to leave no gaps.
593 size_t m = 0;
594 for (size_t i = 0; i < aggr.count; ++i) {
595 if (block_size * count[i] < min_aggregate) {
596 count[i] = removed;
597 }
598 else {
599 count[i] = m++;
600 }
601 }
602
603 // Update aggregate count and aggregate ids.
604 aggr.count = m;
605
606 for (size_t i = 0; i < n; ++i) {
607 ptrdiff_t id = aggr.id[i];
608 if (id != removed)
609 aggr.id[i] = count[id];
610 }
611 }
612};
613
614/*---------------------------------------------------------------------------*/
615/*---------------------------------------------------------------------------*/
638template <class Backend>
639struct AggregationCoarsening
640{
641 typedef pointwise_aggregates Aggregates;
642
644 struct params
645 {
648
651
667
668 params()
669 : over_interp(math::static_rows<typename Backend::value_type>::value == 1 ? 1.5f : 2.0f)
670 {}
671
672 params(const PropertyTree& p)
673 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, aggr)
674 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, nullspace)
675 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, over_interp)
676 {
677 p.check_params( { "aggr", "nullspace", "over_interp" });
678 }
679
680 void get(PropertyTree& p, const std::string& path) const
681 {
682 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, aggr);
683 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, nullspace);
684 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, over_interp);
685 }
686 } prm;
687
688 explicit AggregationCoarsening(const params& prm = params())
689 : prm(prm)
690 {}
691
699 template <class Matrix>
700 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
702 {
703 const size_t n = backend::nbRow(A);
704
705 ARCCORE_ALINA_TIC("aggregates");
706 Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
707 ARCCORE_ALINA_TOC("aggregates");
708
709 ARCCORE_ALINA_TIC("interpolation");
710 auto P = tentative_prolongation<Matrix>(
711 n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size);
712 ARCCORE_ALINA_TOC("interpolation");
713
714 return std::make_tuple(P, transpose(*P));
715 }
716
725 template <class Matrix>
726 std::shared_ptr<Matrix>
727 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
728 {
729 return detail::scaled_galerkin(A, P, R, 1 / prm.over_interp);
730 }
731};
732
733/*---------------------------------------------------------------------------*/
734/*---------------------------------------------------------------------------*/
741template <template <class> class Coarsening>
743{
744 template <class Backend>
745 struct type
746 {
747 typedef math::element_of<typename Backend::value_type>::type Scalar;
749 typedef Coarsening<BaseBackend> Base;
750
751 typedef typename Base::params params;
752 Base base;
753
754 type(const params& prm = params())
755 : base(prm) {};
756
757 template <class Matrix>
758 typename std::enable_if<backend::coarsening_is_supported<BaseBackend, Coarsening>::value &&
760 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>>::type
761 transfer_operators(const Matrix& B)
762 {
763 typedef typename backend::value_type<Matrix>::type Block;
764 auto T = base.transfer_operators(*adapter::unblock_matrix(B));
765
766 auto& P = *std::get<0>(T);
767 auto& R = *std::get<1>(T);
768
769 sort_rows(P);
770 sort_rows(R);
771
772 return std::make_tuple(
773 std::make_shared<Matrix>(adapter::block_matrix<Block>(P)),
774 std::make_shared<Matrix>(adapter::block_matrix<Block>(R)));
775 }
776
777 template <class Matrix>
778 typename std::enable_if<backend::coarsening_is_supported<BaseBackend, Coarsening>::value &&
780 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>>::type
781 transfer_operators(const Matrix& A)
782 {
783 return base.transfer_operators(A);
784 }
785
786 template <class Matrix>
787 typename std::enable_if<!backend::coarsening_is_supported<BaseBackend, Coarsening>::value,
788 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>>::type
789 transfer_operators(const Matrix&)
790 {
791 throw std::logic_error("The coarsening is not supported by the backend");
792 }
793
794 template <class Matrix>
795 std::shared_ptr<Matrix>
796 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
797 {
798 return base.coarse_operator(A, P, R);
799 }
800 };
801};
802
803/*---------------------------------------------------------------------------*/
804/*---------------------------------------------------------------------------*/
805
806// Create rigid body modes from coordinate vector.
807// To be used as near-nullspace vectors with aggregation coarsening
808// for 2D or 3D elasticity problems.
809// The output matrix B may be transposed on demand
810// (to be used as a set of deflation vectors).
811template <class Vector>
812int rigid_body_modes(int ndim, const Vector& coo, std::vector<double>& B, bool transpose = false)
813{
814 precondition(ndim == 2 || ndim == 3, "Only 2D or 3D problems are supported");
815 precondition(coo.size() % ndim == 0, "Coordinate vector size should be divisible by ndim");
816
817 size_t n = coo.size();
818 int nmodes = (ndim == 2 ? 3 : 6);
819 B.resize(n * nmodes, 0.0);
820
821 const int stride1 = transpose ? 1 : nmodes;
822 const int stride2 = transpose ? n : 1;
823
824 double sn = 1 / sqrt(n);
825
826 if (ndim == 2) {
827 for (size_t i = 0; i < n; ++i) {
828 size_t nod = i / ndim;
829 size_t dim = i % ndim;
830
831 double x = coo[nod * 2 + 0];
832 double y = coo[nod * 2 + 1];
833
834 // Translation
835 B[i * stride1 + dim * stride2] = sn;
836
837 // Rotation
838 switch (dim) {
839 case 0:
840 B[i * stride1 + 2 * stride2] = -y;
841 break;
842 case 1:
843 B[i * stride1 + 2 * stride2] = x;
844 break;
845 }
846 }
847 }
848 else if (ndim == 3) {
849 for (size_t i = 0; i < n; ++i) {
850 size_t nod = i / ndim;
851 size_t dim = i % ndim;
852
853 double x = coo[nod * 3 + 0];
854 double y = coo[nod * 3 + 1];
855 double z = coo[nod * 3 + 2];
856
857 // Translation
858 B[i * stride1 + dim * stride2] = sn;
859
860 // Rotation
861 switch (dim) {
862 case 0:
863 B[i * stride1 + 3 * stride2] = y;
864 B[i * stride1 + 5 * stride2] = z;
865 break;
866 case 1:
867 B[i * stride1 + 3 * stride2] = -x;
868 B[i * stride1 + 4 * stride2] = -z;
869 break;
870 case 2:
871 B[i * stride1 + 4 * stride2] = y;
872 B[i * stride1 + 5 * stride2] = -x;
873 break;
874 }
875 }
876 }
877
878 // Orthonormalization
879 std::array<double, 6> dot;
880 for (int i = ndim; i < nmodes; ++i) {
881 std::fill(dot.begin(), dot.end(), 0.0);
882 for (size_t j = 0; j < n; ++j) {
883 for (int k = 0; k < i; ++k)
884 dot[k] += B[j * stride1 + k * stride2] * B[j * stride1 + i * stride2];
885 }
886 double s = 0.0;
887 for (size_t j = 0; j < n; ++j) {
888 for (int k = 0; k < i; ++k)
889 B[j * stride1 + i * stride2] -= dot[k] * B[j * stride1 + k * stride2];
890 s += B[j * stride1 + i * stride2] * B[j * stride1 + i * stride2];
891 }
892 s = sqrt(s);
893 for (size_t j = 0; j < n; ++j)
894 B[j * stride1 + i * stride2] /= s;
895 }
896
897 return nmodes;
898}
899
900/*---------------------------------------------------------------------------*/
901/*---------------------------------------------------------------------------*/
908template <class Backend>
909struct RugeStubenCoarsening
910{
912 struct params
913 {
923 float eps_strong = 0.25f;
924
936 bool do_trunc = true;
937
939 float eps_trunc = 0.2f;
940
941 params() = default;
942
943 params(const PropertyTree& p)
944 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, eps_strong)
945 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, do_trunc)
946 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, eps_trunc)
947 {
948 p.check_params( { "eps_strong", "do_trunc", "eps_trunc" });
949 }
950
951 void get(PropertyTree& p, const std::string& path) const
952 {
953 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, eps_strong);
954 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, do_trunc);
955 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, eps_trunc);
956 }
957 } prm;
958
959 explicit RugeStubenCoarsening(const params& prm = params())
960 : prm(prm)
961 {}
962
963 template <class Matrix>
964 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
965 transfer_operators(const Matrix& A) const
966 {
967 typedef typename backend::value_type<Matrix>::type Val;
968 typedef typename backend::col_type<Matrix>::type Col;
969 typedef typename backend::ptr_type<Matrix>::type Ptr;
970 typedef typename math::scalar_of<Val>::type Scalar;
971
972 const size_t n = backend::nbRow(A);
973
974 static const Scalar eps = Alina::detail::eps<Scalar>(1);
975
976 static const Val zero = math::zero<Val>();
977
978 std::vector<char> cf(n, 'U');
979 CSRMatrix<char, Col, Ptr> S;
980
981 ARCCORE_ALINA_TIC("C/F split");
982 connect(A, prm.eps_strong, S, cf);
983 cfsplit(A, S, cf);
984 ARCCORE_ALINA_TOC("C/F split");
985
986 ARCCORE_ALINA_TIC("interpolation");
987 size_t nc = 0;
988 std::vector<ptrdiff_t> cidx(n);
989 for (size_t i = 0; i < n; ++i)
990 if (cf[i] == 'C')
991 cidx[i] = static_cast<ptrdiff_t>(nc++);
992
993 if (!nc)
994 throw error::empty_level();
995
996 auto P = std::make_shared<Matrix>();
997 P->set_size(n, nc, true);
998
999 std::vector<Val> Amin, Amax;
1000
1001 if (prm.do_trunc) {
1002 Amin.resize(n);
1003 Amax.resize(n);
1004 }
1005
1006 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1007 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1008 if (cf[i] == 'C') {
1009 ++P->ptr[i + 1];
1010 continue;
1011 }
1012
1013 if (prm.do_trunc) {
1014 Val amin = zero, amax = zero;
1015
1016 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1017 if (!S.val[j] || cf[A.col[j]] != 'C')
1018 continue;
1019
1020 amin = std::min(amin, A.val[j]);
1021 amax = std::max(amax, A.val[j]);
1022 }
1023
1024 Amin[i] = (amin *= prm.eps_trunc);
1025 Amax[i] = (amax *= prm.eps_trunc);
1026
1027 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1028 if (!S.val[j] || cf[A.col[j]] != 'C')
1029 continue;
1030
1031 if (A.val[j] < amin || amax < A.val[j])
1032 ++P->ptr[i + 1];
1033 }
1034 }
1035 else {
1036 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
1037 if (S.val[j] && cf[A.col[j]] == 'C')
1038 ++P->ptr[i + 1];
1039 }
1040 }
1041 });
1042
1043 P->set_nonzeros(P->scan_row_sizes());
1044
1045 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1046 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1047 ptrdiff_t row_head = P->ptr[i];
1048
1049 if (cf[i] == 'C') {
1050 P->col[row_head] = cidx[i];
1051 P->val[row_head] = math::identity<Val>();
1052 continue;
1053 }
1054
1055 Val dia = zero;
1056 Val a_num = zero, a_den = zero;
1057 Val b_num = zero, b_den = zero;
1058 Val d_neg = zero, d_pos = zero;
1059
1060 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1061 ptrdiff_t c = A.col[j];
1062 Val v = A.val[j];
1063
1064 if (c == i) {
1065 dia = v;
1066 continue;
1067 }
1068
1069 if (v < zero) {
1070 a_num += v;
1071 if (S.val[j] && cf[c] == 'C') {
1072 a_den += v;
1073 if (prm.do_trunc && Amin[i] < v)
1074 d_neg += v;
1075 }
1076 }
1077 else {
1078 b_num += v;
1079 if (S.val[j] && cf[c] == 'C') {
1080 b_den += v;
1081 if (prm.do_trunc && v < Amax[i])
1082 d_pos += v;
1083 }
1084 }
1085 }
1086
1087 Scalar cf_neg = 1;
1088 Scalar cf_pos = 1;
1089
1090 if (prm.do_trunc) {
1091 if (math::norm(static_cast<Val>(a_den - d_neg)) > eps)
1092 cf_neg = math::norm(a_den) / math::norm(static_cast<Val>(a_den - d_neg));
1093
1094 if (math::norm(static_cast<Val>(b_den - d_pos)) > eps)
1095 cf_pos = math::norm(b_den) / math::norm(static_cast<Val>(b_den - d_pos));
1096 }
1097
1098 if (zero < b_num && math::norm(b_den) < eps)
1099 dia += b_num;
1100
1101 Scalar alpha = math::norm(a_den) > eps ? -cf_neg * math::norm(a_num) / (math::norm(dia) * math::norm(a_den)) : 0;
1102 Scalar beta = math::norm(b_den) > eps ? -cf_pos * math::norm(b_num) / (math::norm(dia) * math::norm(b_den)) : 0;
1103
1104 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1105 ptrdiff_t c = A.col[j];
1106 Val v = A.val[j];
1107
1108 if (!S.val[j] || cf[c] != 'C')
1109 continue;
1110 if (prm.do_trunc && Amin[i] <= v && v <= Amax[i])
1111 continue;
1112
1113 P->col[row_head] = cidx[c];
1114 P->val[row_head] = (v < zero ? alpha : beta) * v;
1115 ++row_head;
1116 }
1117 }
1118 });
1119 ARCCORE_ALINA_TOC("interpolation");
1120
1121 return std::make_tuple(P, transpose(*P));
1122 }
1123
1124 template <class Matrix>
1125 std::shared_ptr<Matrix>
1126 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
1127 {
1128 return detail::galerkin(A, P, R);
1129 }
1130
1131 private:
1132
1133 //-------------------------------------------------------------------
1134 // On return S will hold both strong connection matrix (in S.val, which
1135 // is piggybacking A.ptr and A.col), and its transposition (in S.ptr
1136 // and S.val).
1137 //
1138 // Variables that have no positive connections are marked as F(ine).
1139 //-------------------------------------------------------------------
1140 template <typename Val, typename Col, typename Ptr>
1141 static void connect(CSRMatrix<Val, Col, Ptr> const& A, float eps_strong,
1142 CSRMatrix<char, Col, Ptr>& S,
1143 std::vector<char>& cf)
1144 {
1145 typedef typename math::scalar_of<Val>::type Scalar;
1146
1147 const size_t n = backend::nbRow(A);
1148 const size_t nnz = backend::nonzeros(A);
1149 const Scalar eps = Alina::detail::eps<Scalar>(1);
1150
1151 S.setNbRow(n);
1152 S.ncols = n;
1153 S.ptr.resize(n + 1);
1154 S.val.resize(nnz);
1155 S.ptr[0] = 0;
1156
1157 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1158 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1159
1160 S.ptr[i + 1] = 0;
1161
1162 Val a_min = math::zero<Val>();
1163
1164 for (auto a = backend::row_begin(A, i); a; ++a)
1165 if (a.col() != i)
1166 a_min = std::min(a_min, a.value());
1167
1168 if (math::norm(a_min) < eps) {
1169 cf[i] = 'F';
1170 continue;
1171 }
1172
1173 a_min *= eps_strong;
1174
1175 for (Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
1176 S.val[j] = (A.col[j] != i && A.val[j] < a_min);
1177 }
1178 });
1179
1180 // Transposition of S:
1181 for (size_t i = 0; i < nnz; ++i)
1182 if (S.val[i])
1183 ++(S.ptr[A.col[i] + 1]);
1184
1185 S.scan_row_sizes();
1186 S.col.resize(S.ptr[n]);
1187
1188 for (size_t i = 0; i < n; ++i)
1189 for (Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
1190 if (S.val[j])
1191 S.col[S.ptr[A.col[j]]++] = i;
1192
1193 std::rotate(S.ptr.data(), S.ptr.data() + n, S.ptr.data() + n + 1);
1194 S.ptr[0] = 0;
1195 }
1196
1197 // Split variables into C(oarse) and F(ine) sets.
1198 template <typename Val, typename Col, typename Ptr>
1199 static void cfsplit(CSRMatrix<Val, Col, Ptr> const& A,
1200 CSRMatrix<char, Col, Ptr> const& S,
1201 std::vector<char>& cf)
1202 {
1203 const size_t n = A.nbRow();
1204
1205 std::vector<Col> lambda(n);
1206
1207 // Initialize lambdas:
1208 for (size_t i = 0; i < n; ++i) {
1209 Col temp = 0;
1210 for (Ptr j = S.ptr[i], e = S.ptr[i + 1]; j < e; ++j)
1211 temp += (cf[S.col[j]] == 'U' ? 1 : 2);
1212 lambda[i] = temp;
1213 }
1214
1215 // Keep track of variable groups with equal lambda values.
1216 // ptr - start of a group;
1217 // cnt - size of a group;
1218 // i2n - variable number;
1219 // n2i - variable position in a group.
1220 std::vector<Ptr> ptr(n + 1, 0);
1221 std::vector<Ptr> cnt(n, 0);
1222 std::vector<Ptr> i2n(n);
1223 std::vector<Ptr> n2i(n);
1224
1225 for (size_t i = 0; i < n; ++i)
1226 ++ptr[lambda[i] + 1];
1227
1228 std::partial_sum(ptr.begin(), ptr.end(), ptr.begin());
1229
1230 for (size_t i = 0; i < n; ++i) {
1231 Col lam = lambda[i];
1232 Ptr idx = ptr[lam] + cnt[lam]++;
1233 i2n[idx] = i;
1234 n2i[i] = idx;
1235 }
1236
1237 // Process variables by decreasing lambda value.
1238 // 1. The variable with maximum value of lambda becomes next C-variable.
1239 // 2. Its neighbours from S' become F-variables.
1240 // 3. Keep lambda values in sync.
1241 for (size_t top = n; top-- > 0;) {
1242 Ptr i = i2n[top];
1243 Col lam = lambda[i];
1244
1245 if (lam == 0) {
1246 std::replace(cf.begin(), cf.end(), 'U', 'C');
1247 break;
1248 }
1249
1250 // Remove the variable from its group.
1251 --cnt[lam];
1252
1253 if (cf[i] == 'F')
1254 continue;
1255
1256 // Mark the variable as 'C'.
1257 cf[i] = 'C';
1258
1259 // Its neighbours from S' become F-variables.
1260 for (Ptr j = S.ptr[i], e = S.ptr[i + 1]; j < e; ++j) {
1261 Col c = S.col[j];
1262
1263 if (cf[c] != 'U')
1264 continue;
1265
1266 cf[c] = 'F';
1267
1268 // Increase lambdas of the newly created F's neighbours.
1269 for (Ptr aj = A.ptr[c], ae = A.ptr[c + 1]; aj < ae; ++aj) {
1270 if (!S.val[aj])
1271 continue;
1272
1273 Col ac = A.col[aj];
1274 Col lam_a = lambda[ac];
1275
1276 if (cf[ac] != 'U' || static_cast<size_t>(lam_a) + 1 >= n)
1277 continue;
1278
1279 Ptr old_pos = n2i[ac];
1280 Ptr new_pos = ptr[lam_a] + cnt[lam_a] - 1;
1281
1282 n2i[i2n[old_pos]] = new_pos;
1283 n2i[i2n[new_pos]] = old_pos;
1284
1285 std::swap(i2n[old_pos], i2n[new_pos]);
1286
1287 --cnt[lam_a];
1288 ++cnt[lam_a + 1];
1289 ptr[lam_a + 1] = ptr[lam_a] + cnt[lam_a];
1290
1291 lambda[ac] = lam_a + 1;
1292 }
1293 }
1294
1295 // Decrease lambdas of the newly created C's neighbours.
1296 for (Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; j++) {
1297 if (!S.val[j])
1298 continue;
1299
1300 Col c = A.col[j];
1301 Col lam = lambda[c];
1302
1303 if (cf[c] != 'U' || lam == 0)
1304 continue;
1305
1306 Ptr old_pos = n2i[c];
1307 Ptr new_pos = ptr[lam];
1308
1309 n2i[i2n[old_pos]] = new_pos;
1310 n2i[i2n[new_pos]] = old_pos;
1311
1312 std::swap(i2n[old_pos], i2n[new_pos]);
1313
1314 --cnt[lam];
1315 ++cnt[lam - 1];
1316 ++ptr[lam];
1317 lambda[c] = lam - 1;
1318 }
1319 }
1320 }
1321};
1322
1323/*---------------------------------------------------------------------------*/
1324/*---------------------------------------------------------------------------*/
1331template <class Backend>
1332struct SmoothedAggregationCoarserning
1333{
1334 typedef pointwise_aggregates Aggregates;
1335
1337 struct params
1338 {
1341
1344
1372 float relax = 1.0f;
1373
1374 // Estimate the matrix spectral radius.
1375 // This usually improves convergence rate and results in faster solves,
1376 // but costs some time during setup.
1377 bool estimate_spectral_radius = false;
1378
1379 // Number of power iterations to apply for the spectral radius
1380 // estimation. Use Gershgorin disk theorem when power_iters = 0.
1381 int power_iters = 0;
1382
1383 params() = default;
1384
1385 params(const PropertyTree& p)
1386 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, aggr)
1387 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, nullspace)
1388 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, relax)
1389 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, estimate_spectral_radius)
1390 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, power_iters)
1391 {
1392 p.check_params({ "aggr", "nullspace", "relax", "estimate_spectral_radius", "power_iters" });
1393 }
1394
1395 void get(PropertyTree& p, const std::string& path) const
1396 {
1397 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, aggr);
1398 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, nullspace);
1399 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, relax);
1400 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, estimate_spectral_radius);
1401 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, power_iters);
1402 }
1403 };
1404
1405 SmoothedAggregationCoarserning(const params& prm = params())
1406 : prm(prm)
1407 {}
1408
1409 template <class Matrix>
1410 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
1411 transfer_operators(const Matrix& A)
1412 {
1413 typedef typename backend::value_type<Matrix>::type value_type;
1414 typedef typename math::scalar_of<value_type>::type scalar_type;
1415
1416 const size_t n = backend::nbRow(A);
1417
1418 ARCCORE_ALINA_TIC("aggregates");
1419 Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
1420 prm.aggr.eps_strong *= 0.5;
1421 ARCCORE_ALINA_TOC("aggregates");
1422
1423 auto P_tent = tentative_prolongation<Matrix>(n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size);
1424
1425 auto P = std::make_shared<Matrix>();
1426 P->set_size(backend::nbRow(*P_tent), backend::nbColumn(*P_tent), true);
1427
1428 scalar_type omega = prm.relax;
1429 if (prm.estimate_spectral_radius) {
1430 omega *= static_cast<scalar_type>(4.0 / 3) / spectral_radius<true>(A, prm.power_iters);
1431 }
1432 else {
1433 omega *= static_cast<scalar_type>(2.0 / 3);
1434 }
1435
1436 ARCCORE_ALINA_TIC("smoothing");
1437 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1438 std::vector<ptrdiff_t> marker(P->ncols, -1);
1439
1440 // Count number of entries in P.
1441 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1442 for (ptrdiff_t ja = A.ptr[i], ea = A.ptr[i + 1]; ja < ea; ++ja) {
1443 ptrdiff_t ca = A.col[ja];
1444
1445 // Skip weak off-diagonal connections.
1446 if (ca != i && !aggr.strong_connection[ja])
1447 continue;
1448
1449 for (ptrdiff_t jp = P_tent->ptr[ca], ep = P_tent->ptr[ca + 1]; jp < ep; ++jp) {
1450 ptrdiff_t cp = P_tent->col[jp];
1451
1452 if (marker[cp] != i) {
1453 marker[cp] = i;
1454 ++(P->ptr[i + 1]);
1455 }
1456 }
1457 }
1458 }
1459 });
1460
1461 P->scan_row_sizes();
1462 P->set_nonzeros();
1463
1464 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1465 std::vector<ptrdiff_t> marker(P->ncols, -1);
1466
1467 // Fill the interpolation matrix.
1468 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1469
1470 // Diagonal of the filtered matrix is the original matrix
1471 // diagonal minus its weak connections.
1472 value_type dia = math::zero<value_type>();
1473 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1474 if (A.col[j] == i || !aggr.strong_connection[j])
1475 dia += A.val[j];
1476 }
1477 if (!math::is_zero(dia))
1478 dia = -omega * math::inverse(dia);
1479
1480 ptrdiff_t row_beg = P->ptr[i];
1481 ptrdiff_t row_end = row_beg;
1482 for (ptrdiff_t ja = A.ptr[i], ea = A.ptr[i + 1]; ja < ea; ++ja) {
1483 ptrdiff_t ca = A.col[ja];
1484
1485 // Skip weak off-diagonal connections.
1486 if (ca != i && !aggr.strong_connection[ja])
1487 continue;
1488
1489 value_type va = (ca == i)
1490 ? static_cast<value_type>(static_cast<scalar_type>(1 - omega) * math::identity<value_type>())
1491 : dia * A.val[ja];
1492
1493 for (ptrdiff_t jp = P_tent->ptr[ca], ep = P_tent->ptr[ca + 1]; jp < ep; ++jp) {
1494 ptrdiff_t cp = P_tent->col[jp];
1495 value_type vp = P_tent->val[jp];
1496
1497 if (marker[cp] < row_beg) {
1498 marker[cp] = row_end;
1499 P->col[row_end] = cp;
1500 P->val[row_end] = va * vp;
1501 ++row_end;
1502 }
1503 else {
1504 P->val[marker[cp]] += va * vp;
1505 }
1506 }
1507 }
1508 }
1509 });
1510 ARCCORE_ALINA_TOC("smoothing");
1511
1512 return std::make_tuple(P, transpose(*P));
1513 }
1514
1515 template <class Matrix>
1516 std::shared_ptr<Matrix>
1517 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
1518 {
1519 return detail::galerkin(A, P, R);
1520 }
1521
1522 params prm;
1523};
1524
1525/*---------------------------------------------------------------------------*/
1526/*---------------------------------------------------------------------------*/
1533template <class Backend>
1534struct SmoothedAggregationEnergyMinCoarsening
1535{
1536 typedef pointwise_aggregates Aggregates;
1537
1539 struct params
1540 {
1543
1546
1547 params() {}
1548
1549 params(const PropertyTree& p)
1550 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, aggr)
1551 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, nullspace)
1552 {
1553 p.check_params( { "aggr", "nullspace" });
1554 }
1555
1556 void get(PropertyTree& p, const std::string& path) const
1557 {
1558 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, aggr);
1559 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, nullspace);
1560 }
1561 } prm;
1562
1563 SmoothedAggregationEnergyMinCoarsening(const params& prm = params())
1564 : prm(prm)
1565 {}
1566
1567 template <class Matrix>
1568 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
1569 transfer_operators(const Matrix& A)
1570 {
1571 typedef typename backend::value_type<Matrix>::type Val;
1572 typedef typename backend::col_type<Matrix>::type Col;
1573 typedef typename backend::ptr_type<Matrix>::type Ptr;
1574 typedef ptrdiff_t Idx;
1575
1576 ARCCORE_ALINA_TIC("aggregates");
1577 Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
1578 prm.aggr.eps_strong *= 0.5;
1579 ARCCORE_ALINA_TOC("aggregates");
1580
1581 ARCCORE_ALINA_TIC("interpolation");
1582 auto P_tent = tentative_prolongation<Matrix>(backend::nbRow(A), aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size);
1583
1584 // Filter the system matrix
1585 CSRMatrix<Val, Col, Ptr> Af;
1586 Af.set_size(backend::nbRow(A), backend::nbColumn(A));
1587 Af.ptr[0] = 0;
1588
1589 std::vector<Val> dia(Af.nbRow());
1590
1591 arccoreParallelFor(0, Af.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1592 for (Idx i = begin; i < (begin + size); ++i) {
1593 Idx row_begin = A.ptr[i];
1594 Idx row_end = A.ptr[i + 1];
1595 Idx row_width = row_end - row_begin;
1596
1597 Val D = math::zero<Val>();
1598 for (Idx j = row_begin; j < row_end; ++j) {
1599 Idx c = A.col[j];
1600 Val v = A.val[j];
1601
1602 if (c == i)
1603 D += v;
1604 else if (!aggr.strong_connection[j]) {
1605 D += v;
1606 --row_width;
1607 }
1608 }
1609
1610 dia[i] = D;
1611 Af.ptr[i + 1] = row_width;
1612 }
1613 });
1614
1615 Af.set_nonzeros(Af.scan_row_sizes());
1616
1617 arccoreParallelFor(0, Af.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1618 for (Idx i = begin; i < (begin + size); ++i) {
1619
1620 Idx row_begin = A.ptr[i];
1621 Idx row_end = A.ptr[i + 1];
1622 Idx row_head = Af.ptr[i];
1623
1624 for (Idx j = row_begin; j < row_end; ++j) {
1625 Idx c = A.col[j];
1626
1627 if (c == i) {
1628 Af.col[row_head] = i;
1629 Af.val[row_head] = dia[i];
1630 ++row_head;
1631 }
1632 else if (aggr.strong_connection[j]) {
1633 Af.col[row_head] = c;
1634 Af.val[row_head] = A.val[j];
1635 ++row_head;
1636 }
1637 }
1638 }
1639 });
1640
1641 std::vector<Val> omega;
1642
1643 auto P = interpolation(Af, dia, *P_tent, omega);
1644 auto R = restriction(Af, dia, *P_tent, omega);
1645 ARCCORE_ALINA_TOC("interpolation");
1646
1647 return std::make_tuple(P, R);
1648 }
1649
1650 template <class Matrix>
1651 std::shared_ptr<Matrix>
1652 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
1653 {
1654 return detail::galerkin(A, P, R);
1655 }
1656
1657 private:
1658
1659 template <class AMatrix, typename Val, typename Col, typename Ptr>
1660 static std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
1661 interpolation(const AMatrix& A, const std::vector<Val>& Adia,
1662 const CSRMatrix<Val, Col, Ptr>& P_tent,
1663 std::vector<Val>& omega)
1664 {
1665 const size_t n = backend::nbRow(P_tent);
1666 const size_t nc = backend::nbColumn(P_tent);
1667
1668 auto AP = product(A, P_tent, /*sort rows: */ true);
1669
1670 omega.resize(nc, math::zero<Val>());
1671 std::vector<Val> denum(nc, math::zero<Val>());
1672
1673 // TODO CONCURRENCY: remove these mutex
1674 std::mutex mutex1;
1675 std::mutex mutex2;
1676
1677 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1678 std::vector<ptrdiff_t> marker(nc, -1);
1679
1680 // Compute A * Dinv * AP row by row and compute columnwise
1681 // scalar products necessary for computation of omega. The
1682 // actual results of matrix-matrix product are not stored.
1683 std::vector<Col> adap_col(128);
1684 std::vector<Val> adap_val(128);
1685
1686 for (ptrdiff_t ia = begin; ia < (begin + size); ++ia) {
1687 adap_col.clear();
1688 adap_val.clear();
1689
1690 // Form current row of ADAP matrix.
1691 for (auto a = A.row_begin(ia); a; ++a) {
1692 Col ca = a.col();
1693 Val va = math::inverse(Adia[ca]) * a.value();
1694
1695 for (auto p = AP->row_begin(ca); p; ++p) {
1696 Col c = p.col();
1697 Val v = va * p.value();
1698
1699 if (marker[c] < 0) {
1700 marker[c] = adap_col.size();
1701 adap_col.push_back(c);
1702 adap_val.push_back(v);
1703 }
1704 else {
1705 adap_val[marker[c]] += v;
1706 }
1707 }
1708 }
1709
1710 detail::sort_row(&adap_col[0], &adap_val[0], adap_col.size());
1711
1712 // Update columnwise scalar products (AP,ADAP) and (ADAP,ADAP).
1713 // 1. (AP, ADAP)
1714 for (
1715 Ptr ja = AP->ptr[ia], ea = AP->ptr[ia + 1],
1716 jb = 0, eb = adap_col.size();
1717 ja < ea && jb < eb;) {
1718 Col ca = AP->col[ja];
1719 Col cb = adap_col[jb];
1720
1721 if (ca < cb)
1722 ++ja;
1723 else if (cb < ca)
1724 ++jb;
1725 else /*ca == cb*/ {
1726 Val v = AP->val[ja] * adap_val[jb];
1727 {
1728 std::scoped_lock lock(mutex1);
1729 omega[ca] += v;
1730 ++ja;
1731 ++jb;
1732 }
1733 }
1734 }
1735
1736 // 2. (ADAP, ADAP) (and clear marker)
1737 for (size_t j = 0, e = adap_col.size(); j < e; ++j) {
1738 Col c = adap_col[j];
1739 Val v = adap_val[j];
1740 {
1741 std::scoped_lock lock(mutex2);
1742 denum[c] += v * v;
1743 marker[c] = -1;
1744 }
1745 }
1746 }
1747 });
1748
1749 for (size_t i = 0, m = omega.size(); i < m; ++i)
1750 omega[i] = math::inverse(denum[i]) * omega[i];
1751
1752 // Update AP to obtain P: P = (P_tent - D^-1 A P Omega)
1753 /*
1754 * Here we use the fact that if P(i,j) != 0,
1755 * then with necessity AP(i,j) != 0:
1756 *
1757 * AP(i,j) = sum_k(A_ik P_kj), and A_ii != 0.
1758 */
1759 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1760 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1761 Val dia = math::inverse(Adia[i]);
1762
1763 for (Ptr ja = AP->ptr[i], ea = AP->ptr[i + 1],
1764 jp = P_tent.ptr[i], ep = P_tent.ptr[i + 1];
1765 ja < ea; ++ja) {
1766 Col ca = AP->col[ja];
1767 Val va = -dia * AP->val[ja] * omega[ca];
1768
1769 for (; jp < ep; ++jp) {
1770 Col cp = P_tent.col[jp];
1771 if (cp > ca)
1772 break;
1773
1774 if (cp == ca) {
1775 va += P_tent.val[jp];
1776 break;
1777 }
1778 }
1779
1780 AP->val[ja] = va;
1781 }
1782 }
1783 });
1784
1785 return AP;
1786 }
1787
1788 template <typename AMatrix, typename Val, typename Col, typename Ptr>
1789 static std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
1790 restriction(const AMatrix& A, const std::vector<Val>& Adia,
1791 const CSRMatrix<Val, Col, Ptr>& P_tent,
1792 const std::vector<Val>& omega)
1793 {
1794 const size_t nc = backend::nbColumn(P_tent);
1795
1796 auto R_tent = transpose(P_tent);
1797 sort_rows(*R_tent);
1798
1799 auto RA = product(*R_tent, A, /*sort rows: */ true);
1800
1801 // Compute R = R_tent - Omega R_tent A D^-1.
1802 /*
1803 * Here we use the fact that if R(i,j) != 0,
1804 * then with necessity RA(i,j) != 0:
1805 *
1806 * RA(i,j) = sum_k(R_ik A_kj), and A_jj != 0.
1807 */
1808 arccoreParallelFor(0, nc, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1809 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1810 Val w = omega[i];
1811
1812 for (Ptr ja = RA->ptr[i], ea = RA->ptr[i + 1],
1813 jr = R_tent->ptr[i], er = R_tent->ptr[i + 1];
1814 ja < ea; ++ja) {
1815 Col ca = RA->col[ja];
1816 Val va = -w * math::inverse(Adia[ca]) * RA->val[ja];
1817
1818 for (; jr < er; ++jr) {
1819 Col cr = R_tent->col[jr];
1820 if (cr > ca)
1821 break;
1822
1823 if (cr == ca) {
1824 va += R_tent->val[jr];
1825 break;
1826 }
1827 }
1828
1829 RA->val[ja] = va;
1830 }
1831 }
1832 });
1833
1834 return RA;
1835 }
1836};
1837
1838/*---------------------------------------------------------------------------*/
1839/*---------------------------------------------------------------------------*/
1840
1841} // namespace Arcane::Alina
1842
1843/*---------------------------------------------------------------------------*/
1844/*---------------------------------------------------------------------------*/
1845
1846namespace Arcane::Alina::backend
1847{
1848
1849template <class Backend>
1851 typename std::enable_if<!std::is_arithmetic<typename backend::value_type<Backend>::type>::value>::type> : std::false_type
1852{};
1853
1854/*---------------------------------------------------------------------------*/
1855/*---------------------------------------------------------------------------*/
1856
1857} // namespace Arcane::Alina::backend
1858
1859/*---------------------------------------------------------------------------*/
1860/*---------------------------------------------------------------------------*/
1861
1862#endif
std::vector< char > strong_connection
Strong connectivity matrix.
Definition Coarsening.h:507
std::vector< ptrdiff_t > id
Aggerate id that each fine-level variable belongs to.
Definition Coarsening.h:510
size_t count
Number of aggregates.
Definition Coarsening.h:504
pointwise_aggregates(const Matrix &A, const params &prm, unsigned min_aggregate)
Constructs aggregates for a given matrix.
Definition Coarsening.h:514
Loop execution information.
Matrix class, to be used by user.
Vector class, to be used by user.
__host__ __device__ Real dot(Real2 u, Real2 v)
Dot product of u by v in .
Definition MathUtils.h:94
__host__ __device__ double sqrt(double v)
Square root of v.
Definition Math.h:142
__host__ __device__ Real3x3 transpose(const Real3x3 &t)
Transpose the matrix.
Definition MathUtils.h:265
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applies the lambda function lambda_function concurrently over the iteration interval given by loop_ra...
Definition ParallelFor.h:87
std::int32_t Int32
Signed integer type of 32 bits.
float over_interp
Over-interpolation factor .
Definition Coarsening.h:666
nullspace_params nullspace
Near nullspace parameters.
Definition Coarsening.h:650
Aggregates::params aggr
Aggregation parameters.
Definition Coarsening.h:647
std::tuple< std::shared_ptr< Matrix >, std::shared_ptr< Matrix > > transfer_operators(const Matrix &A)
Creates transfer operators for the given system matrix.
Definition Coarsening.h:701
std::shared_ptr< Matrix > coarse_operator(const Matrix &A, const Matrix &P, const Matrix &R) const
Creates system matrix for the coarser level.
Definition Coarsening.h:727
Apply a scalar coarsening on a block matrix.
Definition Coarsening.h:743
bool do_trunc
Truncate prolongation operator?
Definition Coarsening.h:936
float eps_trunc
Truncation parameter .
Definition Coarsening.h:939
float eps_strong
Parameter defining strong couplings.
Definition Coarsening.h:923
Classic Ruge-Stuben coarsening with direct interpolation.
Definition Coarsening.h:910
Aggregates::params aggr
Aggregation parameters.
nullspace_params nullspace
Near nullspace parameters.
Aggregates::params aggr
Aggregation parameters.
nullspace_params nullspace
Near nullspace parameters.
Is the coarsening supported by the backend?
Number of rows for statically sized matrix types.
std::vector< double > B
Near nullspace vectors.
Definition Coarsening.h:97
int cols
Number of vectors in near nullspace.
Definition Coarsening.h:90
float eps_strong
Parameter defining strong couplings.
Definition Coarsening.h:161
std::vector< ptrdiff_t > id
Aggerate id that each fine-level variable belongs to.
Definition Coarsening.h:199
std::vector< char > strong_connection
Strong connectivity matrix.
Definition Coarsening.h:191
plain_aggregates(const Matrix &A, const params &prm)
Constructs aggregates for a given matrix.
Definition Coarsening.h:208
size_t count
Number of aggregates.
Definition Coarsening.h:183
Int32 block_size
Block size for the system matrix.
Definition Coarsening.h:482