Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
Coarsening.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* Coarsening.h (C) 2000-2026 */
9/* */
10/* Coarsening strategies for AMG hierarchy construction. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_COARSENING_H
13#define ARCCORE_ALINA_COARSENING_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26#include <tuple>
27#include <memory>
28#include <vector>
29#include <numeric>
30#include <limits>
31#include <algorithm>
32#include <cmath>
33
34#include "arccore/alina/BuiltinBackend.h"
35#include "arccore/alina/AlinaUtils.h"
36#include "arccore/alina/QRFactorizationImpl.h"
37#include "arccore/alina/Adapters.h"
38#include "arccore/alina/ValueTypeInterface.h"
39
40#include <mutex>
41
42/*---------------------------------------------------------------------------*/
43/*---------------------------------------------------------------------------*/
44
45namespace Arcane::Alina::detail
46{
47
48/*---------------------------------------------------------------------------*/
49/*---------------------------------------------------------------------------*/
53template <class Matrix>
54std::shared_ptr<Matrix> galerkin(const Matrix& A, const Matrix& P, const Matrix& R)
55{
56 return product(R, *product(A, P));
57}
58
59/*---------------------------------------------------------------------------*/
60/*---------------------------------------------------------------------------*/
64template <class Matrix>
65std::shared_ptr<Matrix> scaled_galerkin(const Matrix& A, const Matrix& P, const Matrix& R, float s)
66{
67 auto a = galerkin(A, P, R);
68 scale(*a, s);
69 return a;
70}
71
72/*---------------------------------------------------------------------------*/
73/*---------------------------------------------------------------------------*/
74
75} // namespace Arcane::Alina::detail
76
77/*---------------------------------------------------------------------------*/
78/*---------------------------------------------------------------------------*/
79
80namespace Arcane::Alina
81{
82
83/*---------------------------------------------------------------------------*/
84/*---------------------------------------------------------------------------*/
85
86struct nullspace_params
87{
89 int cols;
90
92
96 std::vector<double> B;
97
98 nullspace_params()
99 : cols(0)
100 {}
101
103 : cols(p.get("cols", nullspace_params().cols))
104 {
105 double* b = 0;
106 b = p.get("B", b);
107
108 if (b) {
109 Int32 rows = 0;
110 rows = p.get("rows", rows);
111
112 precondition(cols > 0, "Error in nullspace parameters: "
113 "B is set, but cols is not");
114
115 precondition(rows > 0, "Error in nullspace parameters: "
116 "B is set, but rows is not");
117
118 B.assign(b, b + rows * cols);
119 }
120 else {
121 precondition(cols == 0, "Error in nullspace parameters: "
122 "cols > 0, but B is empty");
123 }
124
125 p.check_params( { "cols", "rows", "B" });
126 }
127
128 void get(PropertyTree&, const std::string&) const {}
129};
130
131/*---------------------------------------------------------------------------*/
132/*---------------------------------------------------------------------------*/
149{
151 struct params
152 {
154
161
162 params()
163 : eps_strong(0.08f)
164 {}
165
166 params(const PropertyTree& p)
167 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, eps_strong)
168 {
169 p.check_params( { "eps_strong", "block_size" });
170 }
171
172 void get(PropertyTree& p, const std::string& path) const
173 {
174 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, eps_strong);
175 }
176 };
177
178 static const ptrdiff_t undefined = -1;
179 static const ptrdiff_t removed = -2;
180
182 size_t count;
183
190 std::vector<char> strong_connection;
191
198 std::vector<ptrdiff_t> id;
199
206 template <class Matrix>
207 plain_aggregates(const Matrix& A, const params& prm)
208 : count(0)
209 , strong_connection(backend::nonzeros(A))
210 , id(backend::nbRow(A))
211 {
212 typedef typename backend::value_type<Matrix>::type value_type;
213 typedef typename math::scalar_of<value_type>::type scalar_type;
214
215 scalar_type eps_squared = prm.eps_strong * prm.eps_strong;
216
217 const size_t n = backend::nbRow(A);
218
219 /* 1. Get strong connections */
220 auto dia = diagonal(A);
221 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
222 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
223 value_type eps_dia_i = eps_squared * (*dia)[i];
224
225 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
226 ptrdiff_t c = A.col[j];
227 value_type v = A.val[j];
228
229 strong_connection[j] = (c != i) && (eps_dia_i * (*dia)[c] < v * v);
230 }
231 }
232 });
233
234 /* 2. Get aggregate ids */
235
236 // Remove lonely nodes.
237 size_t max_neib = 0;
238 for (size_t i = 0; i < n; ++i) {
239 ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1];
240 max_neib = std::max<size_t>(max_neib, e - j);
241
242 ptrdiff_t state = removed;
243 for (; j < e; ++j)
244 if (strong_connection[j]) {
245 state = undefined;
246 break;
247 }
248
249 id[i] = state;
250 }
251
252 std::vector<ptrdiff_t> neib;
253 neib.reserve(max_neib);
254
255 // Perform plain aggregation
256 for (size_t i = 0; i < n; ++i) {
257 if (id[i] != undefined)
258 continue;
259
260 // The point is not adjacent to a core of any previous aggregate:
261 // so its a seed of a new aggregate.
262 ptrdiff_t cur_id = static_cast<ptrdiff_t>(count++);
263 id[i] = cur_id;
264
265 // (*) Include its neighbors as well.
266 neib.clear();
267 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
268 ptrdiff_t c = A.col[j];
269 if (strong_connection[j] && id[c] != removed) {
270 id[c] = cur_id;
271 neib.push_back(c);
272 }
273 }
274
275 // Temporarily mark undefined points adjacent to the new aggregate
276 // as members of the aggregate.
277 // If nobody claims them later, they will stay here.
278 for (ptrdiff_t c : neib) {
279 for (ptrdiff_t j = A.ptr[c], e = A.ptr[c + 1]; j < e; ++j) {
280 ptrdiff_t cc = A.col[j];
281 if (strong_connection[j] && id[cc] == undefined)
282 id[cc] = cur_id;
283 }
284 }
285 }
286
287 if (!count)
288 throw error::empty_level();
289
290 // Some of the aggregates could potentially vanish during expansion
291 // step (*) above. We need to exclude those and renumber the rest.
292 std::vector<ptrdiff_t> cnt(count, 0);
293 for (ptrdiff_t i : id)
294 if (i >= 0)
295 cnt[i] = 1;
296 std::partial_sum(cnt.begin(), cnt.end(), cnt.begin());
297
298 if (static_cast<ptrdiff_t>(count) > cnt.back()) {
299 count = cnt.back();
300
301 for (size_t i = 0; i < n; ++i)
302 if (id[i] >= 0)
303 id[i] = cnt[id[i]] - 1;
304 }
305 }
306};
307
308/*---------------------------------------------------------------------------*/
309/*---------------------------------------------------------------------------*/
310
311namespace detail
312{
313 struct skip_negative
314 {
315 const std::vector<ptrdiff_t>& key;
316 int block_size;
317
318 skip_negative(const std::vector<ptrdiff_t>& key, int block_size)
319 : key(key)
320 , block_size(block_size)
321 {}
322
323 bool operator()(ptrdiff_t i, ptrdiff_t j) const
324 {
325 // Cast to unsigned type to keep negative values at the end
326 return static_cast<size_t>(key[i]) / block_size <
327 static_cast<size_t>(key[j]) / block_size;
328 }
329 };
330} // namespace detail
331
332/*---------------------------------------------------------------------------*/
333/*---------------------------------------------------------------------------*/
342template <class Matrix>
343std::shared_ptr<Matrix>
344tentative_prolongation(size_t n,
345 size_t naggr,
346 const std::vector<ptrdiff_t> aggr,
347 nullspace_params& nullspace,
348 int block_size)
349{
350 typedef typename backend::value_type<Matrix>::type value_type;
351 typedef typename backend::col_type<Matrix>::type col_type;
352
353 auto P = std::make_shared<Matrix>();
354
355 ARCCORE_ALINA_TIC("tentative");
356 if (nullspace.cols > 0) {
357 ptrdiff_t nba = naggr / block_size;
358
359 // Sort fine points by aggregate number.
360 // Put points not belonging to any aggregate to the end of the list.
361 std::vector<ptrdiff_t> order(n);
362 for (size_t i = 0; i < n; ++i)
363 order[i] = i;
364 std::stable_sort(order.begin(), order.end(), detail::skip_negative(aggr, block_size));
365 std::vector<ptrdiff_t> aggr_ptr(nba + 1, 0);
366 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
367 ptrdiff_t a = aggr[order[i]];
368 if (a < 0)
369 break;
370 ++aggr_ptr[a / block_size + 1];
371 }
372 std::partial_sum(aggr_ptr.begin(), aggr_ptr.end(), aggr_ptr.begin());
373
374 // Precompute the shape of the prolongation operator.
375 // Each row contains exactly nullspace.cols non-zero entries.
376 // Rows that do not belong to any aggregate are empty.
377 P->set_size(n, nullspace.cols * nba);
378 P->ptr[0] = 0;
379
380 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
381 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
382 P->ptr[i + 1] = aggr[i] < 0 ? 0 : nullspace.cols;
383 }
384 });
385
386 P->scan_row_sizes();
387 P->set_nonzeros();
388
389 // Compute the tentative prolongation operator and null-space vectors
390 // for the coarser level.
391 std::vector<double> Bnew;
392 Bnew.resize(nba * nullspace.cols * nullspace.cols);
393
394 arccoreParallelFor(0, nba, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
395 Alina::detail::QRFactorization<double> qr;
396 std::vector<double> Bpart;
397
398 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
399 auto aggr_beg = aggr_ptr[i];
400 auto aggr_end = aggr_ptr[i + 1];
401 auto d = aggr_end - aggr_beg;
402
403 Bpart.resize(d * nullspace.cols);
404
405 for (ptrdiff_t j = aggr_beg, jj = 0; j < aggr_end; ++j, ++jj) {
406 ptrdiff_t ib = nullspace.cols * order[j];
407 for (int k = 0; k < nullspace.cols; ++k)
408 Bpart[jj + d * k] = nullspace.B[ib + k];
409 }
410
411 qr.factorize(d, nullspace.cols, &Bpart[0], Alina::detail::col_major);
412
413 for (int ii = 0, kk = 0; ii < nullspace.cols; ++ii)
414 for (int jj = 0; jj < nullspace.cols; ++jj, ++kk)
415 Bnew[i * nullspace.cols * nullspace.cols + kk] = qr.R(ii, jj);
416
417 for (ptrdiff_t j = aggr_beg, ii = 0; j < aggr_end; ++j, ++ii) {
418 col_type* c = &P->col[P->ptr[order[j]]];
419 value_type* v = &P->val[P->ptr[order[j]]];
420
421 for (int jj = 0; jj < nullspace.cols; ++jj) {
422 c[jj] = i * nullspace.cols + jj;
423 // TODO: this is just a workaround to make non-scalar value
424 // types compile. Most probably this won't actually work.
425 v[jj] = qr.Q(ii, jj) * math::identity<value_type>();
426 }
427 }
428 }
429 });
430
431 std::swap(nullspace.B, Bnew);
432 }
433 else {
434 P->set_size(n, naggr);
435 P->ptr[0] = 0;
436 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
437 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
438 P->ptr[i + 1] = (aggr[i] >= 0);
439 }
440 });
441
442 P->set_nonzeros(P->scan_row_sizes());
443
444 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
445 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
446 if (aggr[i] >= 0) {
447 P->col[P->ptr[i]] = aggr[i];
448 P->val[P->ptr[i]] = math::identity<value_type>();
449 }
450 }
451 });
452 }
453 ARCCORE_ALINA_TOC("tentative");
454
455 return P;
456}
457
458/*---------------------------------------------------------------------------*/
459/*---------------------------------------------------------------------------*/
469{
470 public:
471
474 {
482
483 params() = default;
484
485 params(const PropertyTree& p)
486 : plain_aggregates::params(p)
487 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, block_size)
488 {
489 p.check_params( { "eps_strong", "block_size" });
490 }
491
492 void get(Alina::PropertyTree& p, const std::string& path) const
493 {
494 plain_aggregates::params::get(p, path);
495 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, block_size);
496 }
497 };
498
499 static const ptrdiff_t undefined = -1;
500 static const ptrdiff_t removed = -2;
501
503 size_t count;
504
506 std::vector<char> strong_connection;
507
509 std::vector<ptrdiff_t> id;
510
512 template <class Matrix>
513 pointwise_aggregates(const Matrix& A, const params& prm, unsigned min_aggregate)
514 : count(0)
515 {
516 if (prm.block_size == 1) {
517 plain_aggregates aggr(A, prm);
518
519 remove_small_aggregates(A.nbRow(), 1, min_aggregate, aggr);
520
521 count = aggr.count;
522 strong_connection.swap(aggr.strong_connection);
523 id.swap(aggr.id);
524 }
525 else {
526 strong_connection.resize(backend::nonzeros(A));
527 id.resize(backend::nbRow(A));
528
529 auto ap = pointwise_matrix(A, prm.block_size);
530 auto& Ap = *ap;
531
532 plain_aggregates pw_aggr(Ap, prm);
533
534 remove_small_aggregates(Ap.nbRow(), prm.block_size, min_aggregate, pw_aggr);
535
536 count = pw_aggr.count * prm.block_size;
537
538 arccoreParallelFor(0, Ap.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
539 std::vector<ptrdiff_t> j(prm.block_size);
540 std::vector<ptrdiff_t> e(prm.block_size);
541
542 for (ptrdiff_t ip = begin; ip < (begin + size); ++ip) {
543 ptrdiff_t ia = ip * prm.block_size;
544
545 for (unsigned k = 0; k < prm.block_size; ++k, ++ia) {
546 id[ia] = prm.block_size * pw_aggr.id[ip] + k;
547
548 j[k] = A.ptr[ia];
549 e[k] = A.ptr[ia + 1];
550 }
551
552 for (ptrdiff_t jp = Ap.ptr[ip], ep = Ap.ptr[ip + 1]; jp < ep; ++jp) {
553 ptrdiff_t cp = Ap.col[jp];
554 bool sp = (cp == ip) || pw_aggr.strong_connection[jp];
555
556 ptrdiff_t col_end = (cp + 1) * prm.block_size;
557
558 for (unsigned k = 0; k < prm.block_size; ++k) {
559 ptrdiff_t beg = j[k];
560 ptrdiff_t end = e[k];
561
562 while (beg < end && A.col[beg] < col_end) {
563 strong_connection[beg] = sp && A.col[beg] != (ia + k);
564 ++beg;
565 }
566
567 j[k] = beg;
568 }
569 }
570 }
571 });
572 }
573 }
574
575 static void remove_small_aggregates(size_t n, unsigned block_size, unsigned min_aggregate,
576 plain_aggregates& aggr)
577 {
578 if (min_aggregate <= 1)
579 return; // nothing to do
580
581 // Count entries in each of the aggregates
582 std::vector<ptrdiff_t> count(aggr.count, 0);
583
584 for (size_t i = 0; i < n; ++i) {
585 ptrdiff_t id = aggr.id[i];
586 if (id != removed)
587 ++count[id];
588 }
589
590 // If any aggregate has less entries than required, remove it.
591 // Renumber the rest of the aggregates to leave no gaps.
592 size_t m = 0;
593 for (size_t i = 0; i < aggr.count; ++i) {
594 if (block_size * count[i] < min_aggregate) {
595 count[i] = removed;
596 }
597 else {
598 count[i] = m++;
599 }
600 }
601
602 // Update aggregate count and aggregate ids.
603 aggr.count = m;
604
605 for (size_t i = 0; i < n; ++i) {
606 ptrdiff_t id = aggr.id[i];
607 if (id != removed)
608 aggr.id[i] = count[id];
609 }
610 }
611};
612
613/*---------------------------------------------------------------------------*/
614/*---------------------------------------------------------------------------*/
637template <class Backend>
638struct AggregationCoarsening
639{
640 typedef pointwise_aggregates Aggregates;
641
643 struct params
644 {
647
650
666
667 params()
668 : over_interp(math::static_rows<typename Backend::value_type>::value == 1 ? 1.5f : 2.0f)
669 {}
670
671 params(const PropertyTree& p)
672 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, aggr)
673 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, nullspace)
674 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, over_interp)
675 {
676 p.check_params( { "aggr", "nullspace", "over_interp" });
677 }
678
679 void get(PropertyTree& p, const std::string& path) const
680 {
681 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, aggr);
682 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, nullspace);
683 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, over_interp);
684 }
685 } prm;
686
687 explicit AggregationCoarsening(const params& prm = params())
688 : prm(prm)
689 {}
690
698 template <class Matrix>
699 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
701 {
702 const size_t n = backend::nbRow(A);
703
704 ARCCORE_ALINA_TIC("aggregates");
705 Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
706 ARCCORE_ALINA_TOC("aggregates");
707
708 ARCCORE_ALINA_TIC("interpolation");
709 auto P = tentative_prolongation<Matrix>(
710 n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size);
711 ARCCORE_ALINA_TOC("interpolation");
712
713 return std::make_tuple(P, transpose(*P));
714 }
715
724 template <class Matrix>
725 std::shared_ptr<Matrix>
726 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
727 {
728 return detail::scaled_galerkin(A, P, R, 1 / prm.over_interp);
729 }
730};
731
732/*---------------------------------------------------------------------------*/
733/*---------------------------------------------------------------------------*/
740template <template <class> class Coarsening>
742{
743 template <class Backend>
744 struct type
745 {
746 typedef math::element_of<typename Backend::value_type>::type Scalar;
748 typedef Coarsening<BaseBackend> Base;
749
750 typedef typename Base::params params;
751 Base base;
752
753 type(const params& prm = params())
754 : base(prm) {};
755
756 template <class Matrix>
757 typename std::enable_if<backend::coarsening_is_supported<BaseBackend, Coarsening>::value &&
759 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>>::type
760 transfer_operators(const Matrix& B)
761 {
762 typedef typename backend::value_type<Matrix>::type Block;
763 auto T = base.transfer_operators(*adapter::unblock_matrix(B));
764
765 auto& P = *std::get<0>(T);
766 auto& R = *std::get<1>(T);
767
768 sort_rows(P);
769 sort_rows(R);
770
771 return std::make_tuple(
772 std::make_shared<Matrix>(adapter::block_matrix<Block>(P)),
773 std::make_shared<Matrix>(adapter::block_matrix<Block>(R)));
774 }
775
776 template <class Matrix>
777 typename std::enable_if<backend::coarsening_is_supported<BaseBackend, Coarsening>::value &&
779 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>>::type
780 transfer_operators(const Matrix& A)
781 {
782 return base.transfer_operators(A);
783 }
784
785 template <class Matrix>
786 typename std::enable_if<!backend::coarsening_is_supported<BaseBackend, Coarsening>::value,
787 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>>::type
788 transfer_operators(const Matrix&)
789 {
790 throw std::logic_error("The coarsening is not supported by the backend");
791 }
792
793 template <class Matrix>
794 std::shared_ptr<Matrix>
795 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
796 {
797 return base.coarse_operator(A, P, R);
798 }
799 };
800};
801
802/*---------------------------------------------------------------------------*/
803/*---------------------------------------------------------------------------*/
804
805// Create rigid body modes from coordinate vector.
806// To be used as near-nullspace vectors with aggregation coarsening
807// for 2D or 3D elasticity problems.
808// The output matrix B may be transposed on demand
809// (to be used as a set of deflation vectors).
810template <class Vector>
811int rigid_body_modes(int ndim, const Vector& coo, std::vector<double>& B, bool transpose = false)
812{
813 precondition(ndim == 2 || ndim == 3, "Only 2D or 3D problems are supported");
814 precondition(coo.size() % ndim == 0, "Coordinate vector size should be divisible by ndim");
815
816 size_t n = coo.size();
817 int nmodes = (ndim == 2 ? 3 : 6);
818 B.resize(n * nmodes, 0.0);
819
820 const int stride1 = transpose ? 1 : nmodes;
821 const int stride2 = transpose ? n : 1;
822
823 double sn = 1 / sqrt(n);
824
825 if (ndim == 2) {
826 for (size_t i = 0; i < n; ++i) {
827 size_t nod = i / ndim;
828 size_t dim = i % ndim;
829
830 double x = coo[nod * 2 + 0];
831 double y = coo[nod * 2 + 1];
832
833 // Translation
834 B[i * stride1 + dim * stride2] = sn;
835
836 // Rotation
837 switch (dim) {
838 case 0:
839 B[i * stride1 + 2 * stride2] = -y;
840 break;
841 case 1:
842 B[i * stride1 + 2 * stride2] = x;
843 break;
844 }
845 }
846 }
847 else if (ndim == 3) {
848 for (size_t i = 0; i < n; ++i) {
849 size_t nod = i / ndim;
850 size_t dim = i % ndim;
851
852 double x = coo[nod * 3 + 0];
853 double y = coo[nod * 3 + 1];
854 double z = coo[nod * 3 + 2];
855
856 // Translation
857 B[i * stride1 + dim * stride2] = sn;
858
859 // Rotation
860 switch (dim) {
861 case 0:
862 B[i * stride1 + 3 * stride2] = y;
863 B[i * stride1 + 5 * stride2] = z;
864 break;
865 case 1:
866 B[i * stride1 + 3 * stride2] = -x;
867 B[i * stride1 + 4 * stride2] = -z;
868 break;
869 case 2:
870 B[i * stride1 + 4 * stride2] = y;
871 B[i * stride1 + 5 * stride2] = -x;
872 break;
873 }
874 }
875 }
876
877 // Orthonormalization
878 std::array<double, 6> dot;
879 for (int i = ndim; i < nmodes; ++i) {
880 std::fill(dot.begin(), dot.end(), 0.0);
881 for (size_t j = 0; j < n; ++j) {
882 for (int k = 0; k < i; ++k)
883 dot[k] += B[j * stride1 + k * stride2] * B[j * stride1 + i * stride2];
884 }
885 double s = 0.0;
886 for (size_t j = 0; j < n; ++j) {
887 for (int k = 0; k < i; ++k)
888 B[j * stride1 + i * stride2] -= dot[k] * B[j * stride1 + k * stride2];
889 s += B[j * stride1 + i * stride2] * B[j * stride1 + i * stride2];
890 }
891 s = sqrt(s);
892 for (size_t j = 0; j < n; ++j)
893 B[j * stride1 + i * stride2] /= s;
894 }
895
896 return nmodes;
897}
898
899/*---------------------------------------------------------------------------*/
900/*---------------------------------------------------------------------------*/
907template <class Backend>
908struct RugeStubenCoarsening
909{
911 struct params
912 {
922 float eps_strong = 0.25f;
923
935 bool do_trunc = true;
936
938 float eps_trunc = 0.2f;
939
940 params() = default;
941
942 params(const PropertyTree& p)
943 : ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, eps_strong)
944 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, do_trunc)
945 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, eps_trunc)
946 {
947 p.check_params( { "eps_strong", "do_trunc", "eps_trunc" });
948 }
949
950 void get(PropertyTree& p, const std::string& path) const
951 {
952 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, eps_strong);
953 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, do_trunc);
954 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, eps_trunc);
955 }
956 } prm;
957
958 explicit RugeStubenCoarsening(const params& prm = params())
959 : prm(prm)
960 {}
961
962 template <class Matrix>
963 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
964 transfer_operators(const Matrix& A) const
965 {
966 typedef typename backend::value_type<Matrix>::type Val;
967 typedef typename backend::col_type<Matrix>::type Col;
968 typedef typename backend::ptr_type<Matrix>::type Ptr;
969 typedef typename math::scalar_of<Val>::type Scalar;
970
971 const size_t n = backend::nbRow(A);
972
973 static const Scalar eps = Alina::detail::eps<Scalar>(1);
974
975 static const Val zero = math::zero<Val>();
976
977 std::vector<char> cf(n, 'U');
978 CSRMatrix<char, Col, Ptr> S;
979
980 ARCCORE_ALINA_TIC("C/F split");
981 connect(A, prm.eps_strong, S, cf);
982 cfsplit(A, S, cf);
983 ARCCORE_ALINA_TOC("C/F split");
984
985 ARCCORE_ALINA_TIC("interpolation");
986 size_t nc = 0;
987 std::vector<ptrdiff_t> cidx(n);
988 for (size_t i = 0; i < n; ++i)
989 if (cf[i] == 'C')
990 cidx[i] = static_cast<ptrdiff_t>(nc++);
991
992 if (!nc)
993 throw error::empty_level();
994
995 auto P = std::make_shared<Matrix>();
996 P->set_size(n, nc, true);
997
998 std::vector<Val> Amin, Amax;
999
1000 if (prm.do_trunc) {
1001 Amin.resize(n);
1002 Amax.resize(n);
1003 }
1004
1005 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1006 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1007 if (cf[i] == 'C') {
1008 ++P->ptr[i + 1];
1009 continue;
1010 }
1011
1012 if (prm.do_trunc) {
1013 Val amin = zero, amax = zero;
1014
1015 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1016 if (!S.val[j] || cf[A.col[j]] != 'C')
1017 continue;
1018
1019 amin = std::min(amin, A.val[j]);
1020 amax = std::max(amax, A.val[j]);
1021 }
1022
1023 Amin[i] = (amin *= prm.eps_trunc);
1024 Amax[i] = (amax *= prm.eps_trunc);
1025
1026 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1027 if (!S.val[j] || cf[A.col[j]] != 'C')
1028 continue;
1029
1030 if (A.val[j] < amin || amax < A.val[j])
1031 ++P->ptr[i + 1];
1032 }
1033 }
1034 else {
1035 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
1036 if (S.val[j] && cf[A.col[j]] == 'C')
1037 ++P->ptr[i + 1];
1038 }
1039 }
1040 });
1041
1042 P->set_nonzeros(P->scan_row_sizes());
1043
1044 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1045 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1046 ptrdiff_t row_head = P->ptr[i];
1047
1048 if (cf[i] == 'C') {
1049 P->col[row_head] = cidx[i];
1050 P->val[row_head] = math::identity<Val>();
1051 continue;
1052 }
1053
1054 Val dia = zero;
1055 Val a_num = zero, a_den = zero;
1056 Val b_num = zero, b_den = zero;
1057 Val d_neg = zero, d_pos = zero;
1058
1059 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1060 ptrdiff_t c = A.col[j];
1061 Val v = A.val[j];
1062
1063 if (c == i) {
1064 dia = v;
1065 continue;
1066 }
1067
1068 if (v < zero) {
1069 a_num += v;
1070 if (S.val[j] && cf[c] == 'C') {
1071 a_den += v;
1072 if (prm.do_trunc && Amin[i] < v)
1073 d_neg += v;
1074 }
1075 }
1076 else {
1077 b_num += v;
1078 if (S.val[j] && cf[c] == 'C') {
1079 b_den += v;
1080 if (prm.do_trunc && v < Amax[i])
1081 d_pos += v;
1082 }
1083 }
1084 }
1085
1086 Scalar cf_neg = 1;
1087 Scalar cf_pos = 1;
1088
1089 if (prm.do_trunc) {
1090 if (math::norm(static_cast<Val>(a_den - d_neg)) > eps)
1091 cf_neg = math::norm(a_den) / math::norm(static_cast<Val>(a_den - d_neg));
1092
1093 if (math::norm(static_cast<Val>(b_den - d_pos)) > eps)
1094 cf_pos = math::norm(b_den) / math::norm(static_cast<Val>(b_den - d_pos));
1095 }
1096
1097 if (zero < b_num && math::norm(b_den) < eps)
1098 dia += b_num;
1099
1100 Scalar alpha = math::norm(a_den) > eps ? -cf_neg * math::norm(a_num) / (math::norm(dia) * math::norm(a_den)) : 0;
1101 Scalar beta = math::norm(b_den) > eps ? -cf_pos * math::norm(b_num) / (math::norm(dia) * math::norm(b_den)) : 0;
1102
1103 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1104 ptrdiff_t c = A.col[j];
1105 Val v = A.val[j];
1106
1107 if (!S.val[j] || cf[c] != 'C')
1108 continue;
1109 if (prm.do_trunc && Amin[i] <= v && v <= Amax[i])
1110 continue;
1111
1112 P->col[row_head] = cidx[c];
1113 P->val[row_head] = (v < zero ? alpha : beta) * v;
1114 ++row_head;
1115 }
1116 }
1117 });
1118 ARCCORE_ALINA_TOC("interpolation");
1119
1120 return std::make_tuple(P, transpose(*P));
1121 }
1122
1123 template <class Matrix>
1124 std::shared_ptr<Matrix>
1125 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
1126 {
1127 return detail::galerkin(A, P, R);
1128 }
1129
1130 private:
1131
1132 //-------------------------------------------------------------------
1133 // On return S will hold both strong connection matrix (in S.val, which
1134 // is piggybacking A.ptr and A.col), and its transposition (in S.ptr
1135 // and S.val).
1136 //
1137 // Variables that have no positive connections are marked as F(ine).
1138 //-------------------------------------------------------------------
1139 template <typename Val, typename Col, typename Ptr>
1140 static void connect(CSRMatrix<Val, Col, Ptr> const& A, float eps_strong,
1141 CSRMatrix<char, Col, Ptr>& S,
1142 std::vector<char>& cf)
1143 {
1144 typedef typename math::scalar_of<Val>::type Scalar;
1145
1146 const size_t n = backend::nbRow(A);
1147 const size_t nnz = backend::nonzeros(A);
1148 const Scalar eps = Alina::detail::eps<Scalar>(1);
1149
1150 S.setNbRow(n);
1151 S.ncols = n;
1152 S.ptr.resize(n + 1);
1153 S.val.resize(nnz);
1154 S.ptr[0] = 0;
1155
1156 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1157 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1158
1159 S.ptr[i + 1] = 0;
1160
1161 Val a_min = math::zero<Val>();
1162
1163 for (auto a = backend::row_begin(A, i); a; ++a)
1164 if (a.col() != i)
1165 a_min = std::min(a_min, a.value());
1166
1167 if (math::norm(a_min) < eps) {
1168 cf[i] = 'F';
1169 continue;
1170 }
1171
1172 a_min *= eps_strong;
1173
1174 for (Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
1175 S.val[j] = (A.col[j] != i && A.val[j] < a_min);
1176 }
1177 });
1178
1179 // Transposition of S:
1180 for (size_t i = 0; i < nnz; ++i)
1181 if (S.val[i])
1182 ++(S.ptr[A.col[i] + 1]);
1183
1184 S.scan_row_sizes();
1185 S.col.resize(S.ptr[n]);
1186
1187 for (size_t i = 0; i < n; ++i)
1188 for (Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
1189 if (S.val[j])
1190 S.col[S.ptr[A.col[j]]++] = i;
1191
1192 std::rotate(S.ptr.data(), S.ptr.data() + n, S.ptr.data() + n + 1);
1193 S.ptr[0] = 0;
1194 }
1195
1196 // Split variables into C(oarse) and F(ine) sets.
1197 template <typename Val, typename Col, typename Ptr>
1198 static void cfsplit(CSRMatrix<Val, Col, Ptr> const& A,
1199 CSRMatrix<char, Col, Ptr> const& S,
1200 std::vector<char>& cf)
1201 {
1202 const size_t n = A.nbRow();
1203
1204 std::vector<Col> lambda(n);
1205
1206 // Initialize lambdas:
1207 for (size_t i = 0; i < n; ++i) {
1208 Col temp = 0;
1209 for (Ptr j = S.ptr[i], e = S.ptr[i + 1]; j < e; ++j)
1210 temp += (cf[S.col[j]] == 'U' ? 1 : 2);
1211 lambda[i] = temp;
1212 }
1213
1214 // Keep track of variable groups with equal lambda values.
1215 // ptr - start of a group;
1216 // cnt - size of a group;
1217 // i2n - variable number;
1218 // n2i - vaiable position in a group.
1219 std::vector<Ptr> ptr(n + 1, 0);
1220 std::vector<Ptr> cnt(n, 0);
1221 std::vector<Ptr> i2n(n);
1222 std::vector<Ptr> n2i(n);
1223
1224 for (size_t i = 0; i < n; ++i)
1225 ++ptr[lambda[i] + 1];
1226
1227 std::partial_sum(ptr.begin(), ptr.end(), ptr.begin());
1228
1229 for (size_t i = 0; i < n; ++i) {
1230 Col lam = lambda[i];
1231 Ptr idx = ptr[lam] + cnt[lam]++;
1232 i2n[idx] = i;
1233 n2i[i] = idx;
1234 }
1235
1236 // Process variables by decreasing lambda value.
1237 // 1. The vaiable with maximum value of lambda becomes next C-variable.
1238 // 2. Its neighbours from S' become F-variables.
1239 // 3. Keep lambda values in sync.
1240 for (size_t top = n; top-- > 0;) {
1241 Ptr i = i2n[top];
1242 Col lam = lambda[i];
1243
1244 if (lam == 0) {
1245 std::replace(cf.begin(), cf.end(), 'U', 'C');
1246 break;
1247 }
1248
1249 // Remove tne variable from its group.
1250 --cnt[lam];
1251
1252 if (cf[i] == 'F')
1253 continue;
1254
1255 // Mark the variable as 'C'.
1256 cf[i] = 'C';
1257
1258 // Its neighbours from S' become F-variables.
1259 for (Ptr j = S.ptr[i], e = S.ptr[i + 1]; j < e; ++j) {
1260 Col c = S.col[j];
1261
1262 if (cf[c] != 'U')
1263 continue;
1264
1265 cf[c] = 'F';
1266
1267 // Increase lambdas of the newly created F's neighbours.
1268 for (Ptr aj = A.ptr[c], ae = A.ptr[c + 1]; aj < ae; ++aj) {
1269 if (!S.val[aj])
1270 continue;
1271
1272 Col ac = A.col[aj];
1273 Col lam_a = lambda[ac];
1274
1275 if (cf[ac] != 'U' || static_cast<size_t>(lam_a) + 1 >= n)
1276 continue;
1277
1278 Ptr old_pos = n2i[ac];
1279 Ptr new_pos = ptr[lam_a] + cnt[lam_a] - 1;
1280
1281 n2i[i2n[old_pos]] = new_pos;
1282 n2i[i2n[new_pos]] = old_pos;
1283
1284 std::swap(i2n[old_pos], i2n[new_pos]);
1285
1286 --cnt[lam_a];
1287 ++cnt[lam_a + 1];
1288 ptr[lam_a + 1] = ptr[lam_a] + cnt[lam_a];
1289
1290 lambda[ac] = lam_a + 1;
1291 }
1292 }
1293
1294 // Decrease lambdas of the newly create C's neighbours.
1295 for (Ptr j = A.ptr[i], e = A.ptr[i + 1]; j < e; j++) {
1296 if (!S.val[j])
1297 continue;
1298
1299 Col c = A.col[j];
1300 Col lam = lambda[c];
1301
1302 if (cf[c] != 'U' || lam == 0)
1303 continue;
1304
1305 Ptr old_pos = n2i[c];
1306 Ptr new_pos = ptr[lam];
1307
1308 n2i[i2n[old_pos]] = new_pos;
1309 n2i[i2n[new_pos]] = old_pos;
1310
1311 std::swap(i2n[old_pos], i2n[new_pos]);
1312
1313 --cnt[lam];
1314 ++cnt[lam - 1];
1315 ++ptr[lam];
1316 lambda[c] = lam - 1;
1317 }
1318 }
1319 }
1320};
1321
1322/*---------------------------------------------------------------------------*/
1323/*---------------------------------------------------------------------------*/
1330template <class Backend>
1331struct SmoothedAggregationCoarserning
1332{
1333 typedef pointwise_aggregates Aggregates;
1334
1336 struct params
1337 {
1340
1343
1371 float relax = 1.0f;
1372
1373 // Estimate the matrix spectral radius.
1374 // This usually improves convergence rate and results in faster solves,
1375 // but costs some time during setup.
1376 bool estimate_spectral_radius = false;
1377
1378 // Number of power iterations to apply for the spectral radius
1379 // estimation. Use Gershgorin disk theorem when power_iters = 0.
1380 int power_iters = 0;
1381
1382 params() = default;
1383
1384 params(const PropertyTree& p)
1385 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, aggr)
1386 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, nullspace)
1387 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, relax)
1388 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, estimate_spectral_radius)
1389 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, power_iters)
1390 {
1391 p.check_params({ "aggr", "nullspace", "relax", "estimate_spectral_radius", "power_iters" });
1392 }
1393
1394 void get(PropertyTree& p, const std::string& path) const
1395 {
1396 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, aggr);
1397 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, nullspace);
1398 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, relax);
1399 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, estimate_spectral_radius);
1400 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, power_iters);
1401 }
1402 };
1403
1404 SmoothedAggregationCoarserning(const params& prm = params())
1405 : prm(prm)
1406 {}
1407
1408 template <class Matrix>
1409 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
1410 transfer_operators(const Matrix& A)
1411 {
1412 typedef typename backend::value_type<Matrix>::type value_type;
1413 typedef typename math::scalar_of<value_type>::type scalar_type;
1414
1415 const size_t n = backend::nbRow(A);
1416
1417 ARCCORE_ALINA_TIC("aggregates");
1418 Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
1419 prm.aggr.eps_strong *= 0.5;
1420 ARCCORE_ALINA_TOC("aggregates");
1421
1422 auto P_tent = tentative_prolongation<Matrix>(n, aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size);
1423
1424 auto P = std::make_shared<Matrix>();
1425 P->set_size(backend::nbRow(*P_tent), backend::nbColumn(*P_tent), true);
1426
1427 scalar_type omega = prm.relax;
1428 if (prm.estimate_spectral_radius) {
1429 omega *= static_cast<scalar_type>(4.0 / 3) / spectral_radius<true>(A, prm.power_iters);
1430 }
1431 else {
1432 omega *= static_cast<scalar_type>(2.0 / 3);
1433 }
1434
1435 ARCCORE_ALINA_TIC("smoothing");
1436 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1437 std::vector<ptrdiff_t> marker(P->ncols, -1);
1438
1439 // Count number of entries in P.
1440 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1441 for (ptrdiff_t ja = A.ptr[i], ea = A.ptr[i + 1]; ja < ea; ++ja) {
1442 ptrdiff_t ca = A.col[ja];
1443
1444 // Skip weak off-diagonal connections.
1445 if (ca != i && !aggr.strong_connection[ja])
1446 continue;
1447
1448 for (ptrdiff_t jp = P_tent->ptr[ca], ep = P_tent->ptr[ca + 1]; jp < ep; ++jp) {
1449 ptrdiff_t cp = P_tent->col[jp];
1450
1451 if (marker[cp] != i) {
1452 marker[cp] = i;
1453 ++(P->ptr[i + 1]);
1454 }
1455 }
1456 }
1457 }
1458 });
1459
1460 P->scan_row_sizes();
1461 P->set_nonzeros();
1462
1463 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1464 std::vector<ptrdiff_t> marker(P->ncols, -1);
1465
1466 // Fill the interpolation matrix.
1467 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1468
1469 // Diagonal of the filtered matrix is the original matrix
1470 // diagonal minus its weak connections.
1471 value_type dia = math::zero<value_type>();
1472 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
1473 if (A.col[j] == i || !aggr.strong_connection[j])
1474 dia += A.val[j];
1475 }
1476 if (!math::is_zero(dia))
1477 dia = -omega * math::inverse(dia);
1478
1479 ptrdiff_t row_beg = P->ptr[i];
1480 ptrdiff_t row_end = row_beg;
1481 for (ptrdiff_t ja = A.ptr[i], ea = A.ptr[i + 1]; ja < ea; ++ja) {
1482 ptrdiff_t ca = A.col[ja];
1483
1484 // Skip weak off-diagonal connections.
1485 if (ca != i && !aggr.strong_connection[ja])
1486 continue;
1487
1488 value_type va = (ca == i)
1489 ? static_cast<value_type>(static_cast<scalar_type>(1 - omega) * math::identity<value_type>())
1490 : dia * A.val[ja];
1491
1492 for (ptrdiff_t jp = P_tent->ptr[ca], ep = P_tent->ptr[ca + 1]; jp < ep; ++jp) {
1493 ptrdiff_t cp = P_tent->col[jp];
1494 value_type vp = P_tent->val[jp];
1495
1496 if (marker[cp] < row_beg) {
1497 marker[cp] = row_end;
1498 P->col[row_end] = cp;
1499 P->val[row_end] = va * vp;
1500 ++row_end;
1501 }
1502 else {
1503 P->val[marker[cp]] += va * vp;
1504 }
1505 }
1506 }
1507 }
1508 });
1509 ARCCORE_ALINA_TOC("smoothing");
1510
1511 return std::make_tuple(P, transpose(*P));
1512 }
1513
1514 template <class Matrix>
1515 std::shared_ptr<Matrix>
1516 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
1517 {
1518 return detail::galerkin(A, P, R);
1519 }
1520
1521 params prm;
1522};
1523
1524/*---------------------------------------------------------------------------*/
1525/*---------------------------------------------------------------------------*/
1532template <class Backend>
1533struct SmoothedAggregationEnergyMinCoarsening
1534{
1535 typedef pointwise_aggregates Aggregates;
1536
1538 struct params
1539 {
1542
1545
1546 params() {}
1547
1548 params(const PropertyTree& p)
1549 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, aggr)
1550 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, nullspace)
1551 {
1552 p.check_params( { "aggr", "nullspace" });
1553 }
1554
1555 void get(PropertyTree& p, const std::string& path) const
1556 {
1557 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, aggr);
1558 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, nullspace);
1559 }
1560 } prm;
1561
1562 SmoothedAggregationEnergyMinCoarsening(const params& prm = params())
1563 : prm(prm)
1564 {}
1565
1566 template <class Matrix>
1567 std::tuple<std::shared_ptr<Matrix>, std::shared_ptr<Matrix>>
1568 transfer_operators(const Matrix& A)
1569 {
1570 typedef typename backend::value_type<Matrix>::type Val;
1571 typedef typename backend::col_type<Matrix>::type Col;
1572 typedef typename backend::ptr_type<Matrix>::type Ptr;
1573 typedef ptrdiff_t Idx;
1574
1575 ARCCORE_ALINA_TIC("aggregates");
1576 Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
1577 prm.aggr.eps_strong *= 0.5;
1578 ARCCORE_ALINA_TOC("aggregates");
1579
1580 ARCCORE_ALINA_TIC("interpolation");
1581 auto P_tent = tentative_prolongation<Matrix>(backend::nbRow(A), aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size);
1582
1583 // Filter the system matrix
1584 CSRMatrix<Val, Col, Ptr> Af;
1585 Af.set_size(backend::nbRow(A), backend::nbColumn(A));
1586 Af.ptr[0] = 0;
1587
1588 std::vector<Val> dia(Af.nbRow());
1589
1590 arccoreParallelFor(0, Af.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1591 for (Idx i = begin; i < (begin + size); ++i) {
1592 Idx row_begin = A.ptr[i];
1593 Idx row_end = A.ptr[i + 1];
1594 Idx row_width = row_end - row_begin;
1595
1596 Val D = math::zero<Val>();
1597 for (Idx j = row_begin; j < row_end; ++j) {
1598 Idx c = A.col[j];
1599 Val v = A.val[j];
1600
1601 if (c == i)
1602 D += v;
1603 else if (!aggr.strong_connection[j]) {
1604 D += v;
1605 --row_width;
1606 }
1607 }
1608
1609 dia[i] = D;
1610 Af.ptr[i + 1] = row_width;
1611 }
1612 });
1613
1614 Af.set_nonzeros(Af.scan_row_sizes());
1615
1616 arccoreParallelFor(0, Af.nbRow(), ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1617 for (Idx i = begin; i < (begin + size); ++i) {
1618
1619 Idx row_begin = A.ptr[i];
1620 Idx row_end = A.ptr[i + 1];
1621 Idx row_head = Af.ptr[i];
1622
1623 for (Idx j = row_begin; j < row_end; ++j) {
1624 Idx c = A.col[j];
1625
1626 if (c == i) {
1627 Af.col[row_head] = i;
1628 Af.val[row_head] = dia[i];
1629 ++row_head;
1630 }
1631 else if (aggr.strong_connection[j]) {
1632 Af.col[row_head] = c;
1633 Af.val[row_head] = A.val[j];
1634 ++row_head;
1635 }
1636 }
1637 }
1638 });
1639
1640 std::vector<Val> omega;
1641
1642 auto P = interpolation(Af, dia, *P_tent, omega);
1643 auto R = restriction(Af, dia, *P_tent, omega);
1644 ARCCORE_ALINA_TOC("interpolation");
1645
1646 return std::make_tuple(P, R);
1647 }
1648
1649 template <class Matrix>
1650 std::shared_ptr<Matrix>
1651 coarse_operator(const Matrix& A, const Matrix& P, const Matrix& R) const
1652 {
1653 return detail::galerkin(A, P, R);
1654 }
1655
1656 private:
1657
1658 template <class AMatrix, typename Val, typename Col, typename Ptr>
1659 static std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
1660 interpolation(const AMatrix& A, const std::vector<Val>& Adia,
1661 const CSRMatrix<Val, Col, Ptr>& P_tent,
1662 std::vector<Val>& omega)
1663 {
1664 const size_t n = backend::nbRow(P_tent);
1665 const size_t nc = backend::nbColumn(P_tent);
1666
1667 auto AP = product(A, P_tent, /*sort rows: */ true);
1668
1669 omega.resize(nc, math::zero<Val>());
1670 std::vector<Val> denum(nc, math::zero<Val>());
1671
1672 // TODO CONCURRENCY: remove these mutex
1673 std::mutex mutex1;
1674 std::mutex mutex2;
1675
1676 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1677 std::vector<ptrdiff_t> marker(nc, -1);
1678
1679 // Compute A * Dinv * AP row by row and compute columnwise
1680 // scalar products necessary for computation of omega. The
1681 // actual results of matrix-matrix product are not stored.
1682 std::vector<Col> adap_col(128);
1683 std::vector<Val> adap_val(128);
1684
1685 for (ptrdiff_t ia = begin; ia < (begin + size); ++ia) {
1686 adap_col.clear();
1687 adap_val.clear();
1688
1689 // Form current row of ADAP matrix.
1690 for (auto a = A.row_begin(ia); a; ++a) {
1691 Col ca = a.col();
1692 Val va = math::inverse(Adia[ca]) * a.value();
1693
1694 for (auto p = AP->row_begin(ca); p; ++p) {
1695 Col c = p.col();
1696 Val v = va * p.value();
1697
1698 if (marker[c] < 0) {
1699 marker[c] = adap_col.size();
1700 adap_col.push_back(c);
1701 adap_val.push_back(v);
1702 }
1703 else {
1704 adap_val[marker[c]] += v;
1705 }
1706 }
1707 }
1708
1709 detail::sort_row(&adap_col[0], &adap_val[0], adap_col.size());
1710
1711 // Update columnwise scalar products (AP,ADAP) and (ADAP,ADAP).
1712 // 1. (AP, ADAP)
1713 for (
1714 Ptr ja = AP->ptr[ia], ea = AP->ptr[ia + 1],
1715 jb = 0, eb = adap_col.size();
1716 ja < ea && jb < eb;) {
1717 Col ca = AP->col[ja];
1718 Col cb = adap_col[jb];
1719
1720 if (ca < cb)
1721 ++ja;
1722 else if (cb < ca)
1723 ++jb;
1724 else /*ca == cb*/ {
1725 Val v = AP->val[ja] * adap_val[jb];
1726 {
1727 std::scoped_lock lock(mutex1);
1728 omega[ca] += v;
1729 ++ja;
1730 ++jb;
1731 }
1732 }
1733 }
1734
1735 // 2. (ADAP, ADAP) (and clear marker)
1736 for (size_t j = 0, e = adap_col.size(); j < e; ++j) {
1737 Col c = adap_col[j];
1738 Val v = adap_val[j];
1739 {
1740 std::scoped_lock lock(mutex2);
1741 denum[c] += v * v;
1742 marker[c] = -1;
1743 }
1744 }
1745 }
1746 });
1747
1748 for (size_t i = 0, m = omega.size(); i < m; ++i)
1749 omega[i] = math::inverse(denum[i]) * omega[i];
1750
1751 // Update AP to obtain P: P = (P_tent - D^-1 A P Omega)
1752 /*
1753 * Here we use the fact that if P(i,j) != 0,
1754 * then with necessity AP(i,j) != 0:
1755 *
1756 * AP(i,j) = sum_k(A_ik P_kj), and A_ii != 0.
1757 */
1758 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1759 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1760 Val dia = math::inverse(Adia[i]);
1761
1762 for (Ptr ja = AP->ptr[i], ea = AP->ptr[i + 1],
1763 jp = P_tent.ptr[i], ep = P_tent.ptr[i + 1];
1764 ja < ea; ++ja) {
1765 Col ca = AP->col[ja];
1766 Val va = -dia * AP->val[ja] * omega[ca];
1767
1768 for (; jp < ep; ++jp) {
1769 Col cp = P_tent.col[jp];
1770 if (cp > ca)
1771 break;
1772
1773 if (cp == ca) {
1774 va += P_tent.val[jp];
1775 break;
1776 }
1777 }
1778
1779 AP->val[ja] = va;
1780 }
1781 }
1782 });
1783
1784 return AP;
1785 }
1786
1787 template <typename AMatrix, typename Val, typename Col, typename Ptr>
1788 static std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
1789 restriction(const AMatrix& A, const std::vector<Val>& Adia,
1790 const CSRMatrix<Val, Col, Ptr>& P_tent,
1791 const std::vector<Val>& omega)
1792 {
1793 const size_t nc = backend::nbColumn(P_tent);
1794
1795 auto R_tent = transpose(P_tent);
1796 sort_rows(*R_tent);
1797
1798 auto RA = product(*R_tent, A, /*sort rows: */ true);
1799
1800 // Compute R = R_tent - Omega R_tent A D^-1.
1801 /*
1802 * Here we use the fact that if R(i,j) != 0,
1803 * then with necessity RA(i,j) != 0:
1804 *
1805 * RA(i,j) = sum_k(R_ik A_kj), and A_jj != 0.
1806 */
1807 arccoreParallelFor(0, nc, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
1808 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1809 Val w = omega[i];
1810
1811 for (Ptr ja = RA->ptr[i], ea = RA->ptr[i + 1],
1812 jr = R_tent->ptr[i], er = R_tent->ptr[i + 1];
1813 ja < ea; ++ja) {
1814 Col ca = RA->col[ja];
1815 Val va = -w * math::inverse(Adia[ca]) * RA->val[ja];
1816
1817 for (; jr < er; ++jr) {
1818 Col cr = R_tent->col[jr];
1819 if (cr > ca)
1820 break;
1821
1822 if (cr == ca) {
1823 va += R_tent->val[jr];
1824 break;
1825 }
1826 }
1827
1828 RA->val[ja] = va;
1829 }
1830 }
1831 });
1832
1833 return RA;
1834 }
1835};
1836
1837/*---------------------------------------------------------------------------*/
1838/*---------------------------------------------------------------------------*/
1839
1840} // namespace Arcane::Alina
1841
1842/*---------------------------------------------------------------------------*/
1843/*---------------------------------------------------------------------------*/
1844
1845namespace Arcane::Alina::backend
1846{
1847
1848template <class Backend>
1850 typename std::enable_if<!std::is_arithmetic<typename backend::value_type<Backend>::type>::value>::type> : std::false_type
1851{};
1852
1853/*---------------------------------------------------------------------------*/
1854/*---------------------------------------------------------------------------*/
1855
1856} // namespace Arcane::Alina::backend
1857
1858/*---------------------------------------------------------------------------*/
1859/*---------------------------------------------------------------------------*/
1860
1861#endif
std::vector< char > strong_connection
Strong connectivity matrix.
Definition Coarsening.h:506
std::vector< ptrdiff_t > id
Aggerate id that each fine-level variable belongs to.
Definition Coarsening.h:509
size_t count
Number of aggregates.
Definition Coarsening.h:503
pointwise_aggregates(const Matrix &A, const params &prm, unsigned min_aggregate)
Constructs aggregates for a given matrix.
Definition Coarsening.h:513
Informations d'exécution d'une boucle.
Matrix class, to be used by user.
Vector class, to be used by user.
__host__ __device__ Real dot(Real2 u, Real2 v)
Produit scalaire de u par v dans .
Definition MathUtils.h:96
__host__ __device__ double sqrt(double v)
Racine carrée de v.
Definition Math.h:135
__host__ __device__ Real3x3 transpose(const Real3x3 &t)
Transpose la matrice.
Definition MathUtils.h:258
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
float over_interp
Over-interpolation factor .
Definition Coarsening.h:665
nullspace_params nullspace
Near nullspace parameters.
Definition Coarsening.h:649
Aggregates::params aggr
Aggregation parameters.
Definition Coarsening.h:646
std::tuple< std::shared_ptr< Matrix >, std::shared_ptr< Matrix > > transfer_operators(const Matrix &A)
Creates transfer operators for the given system matrix.
Definition Coarsening.h:700
std::shared_ptr< Matrix > coarse_operator(const Matrix &A, const Matrix &P, const Matrix &R) const
Creates system matrix for the coarser level.
Definition Coarsening.h:726
Apply a scalar coarsening on a block matrix.
Definition Coarsening.h:742
bool do_trunc
Truncate prolongation operator?
Definition Coarsening.h:935
float eps_trunc
Truncation parameter .
Definition Coarsening.h:938
float eps_strong
Parameter defining strong couplings.
Definition Coarsening.h:922
Classic Ruge-Stuben coarsening with direct interpolation.
Definition Coarsening.h:909
Aggregates::params aggr
Aggregation parameters.
nullspace_params nullspace
Near nullspace parameters.
Aggregates::params aggr
Aggregation parameters.
nullspace_params nullspace
Near nullspace parameters.
Is the coarsening supported by the backend?
Number of rows for statically sized matrix types.
std::vector< double > B
Near nullspace vectors.
Definition Coarsening.h:96
int cols
Number of vectors in near nullspace.
Definition Coarsening.h:89
float eps_strong
Parameter defining strong couplings.
Definition Coarsening.h:160
std::vector< ptrdiff_t > id
Aggerate id that each fine-level variable belongs to.
Definition Coarsening.h:198
std::vector< char > strong_connection
Strong connectivity matrix.
Definition Coarsening.h:190
plain_aggregates(const Matrix &A, const params &prm)
Constructs aggregates for a given matrix.
Definition Coarsening.h:207
size_t count
Number of aggregates.
Definition Coarsening.h:182
Int32 block_size
Block size for the system matrix.
Definition Coarsening.h:481