Arcane  4.1.11.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
DistributedSubDomainDeflation.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* DistributedSubDomainDeflation.h (C) 2000-2026 */
9/* */
10/* Distributed solver based on subdomain deflation. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_DISTRIBUTEDSUBDOMAINDEFLATION_H
13#define ARCCORE_ALINA_DISTRIBUTEDSUBDOMAINDEFLATION_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26#include "arccore/alina/BuiltinBackend.h"
27#include "arccore/alina/Adapters.h"
28#include "arccore/alina/MessagePassingUtils.h"
29#include "arccore/alina/DistributedSkylineLUDirectSolver.h"
30#include "arccore/alina/DistributedInnerProduct.h"
31#include "arccore/alina/DistributedMatrix.h"
32
33#include <vector>
34#include <algorithm>
35#include <numeric>
36#include <memory>
37#include <functional>
38
39/*---------------------------------------------------------------------------*/
40/*---------------------------------------------------------------------------*/
41
42namespace Arcane::Alina
43{
44
45/*---------------------------------------------------------------------------*/
46/*---------------------------------------------------------------------------*/
47
50{
51 const int block_size;
57 constant_deflation(int block_size = 1)
58 : block_size(block_size)
59 {}
60
61 int dim() const
62 {
63 return block_size;
64 }
65
66 int operator()(ptrdiff_t row, int j) const
67 {
68 return row % block_size == j;
69 }
70};
71
72/*---------------------------------------------------------------------------*/
73/*---------------------------------------------------------------------------*/
74
75template <class SDD, class Matrix>
76struct sdd_projected_matrix
77{
78 typedef typename SDD::value_type value_type;
79
80 const SDD& S;
81 const Matrix& A;
82
83 sdd_projected_matrix(const SDD& S, const Matrix& A)
84 : S(S)
85 , A(A)
86 {}
87
88 template <class T, class Vec1, class Vec2>
89 void mul(T alpha, const Vec1& x, T beta, Vec2& y) const
90 {
91 ARCCORE_ALINA_TIC("top/spmv");
92 backend::spmv(alpha, A, x, beta, y);
93 ARCCORE_ALINA_TOC("top/spmv");
94
95 S.project(y);
96 }
97
98 template <class Vec1, class Vec2, class Vec3>
99 void residual(const Vec1& f, const Vec2& x, Vec3& r) const
100 {
101 ARCCORE_ALINA_TIC("top/residual");
102 backend::residual(f, A, x, r);
103 ARCCORE_ALINA_TOC("top/residual");
104
105 S.project(r);
106 }
107};
108
109/*---------------------------------------------------------------------------*/
110/*---------------------------------------------------------------------------*/
111
112template <class SDD, class Matrix>
113sdd_projected_matrix<SDD, Matrix> make_sdd_projected_matrix(const SDD& S, const Matrix& A)
114{
116}
117
118/*---------------------------------------------------------------------------*/
119/*---------------------------------------------------------------------------*/
120
126template <class LocalPrecond,
127 class IterativeSolver,
128 class DirectSolver = DistributedSkylineLUDirectSolver<typename LocalPrecond::backend_type>>
129class DistributedSubDomainDeflation
130{
131 public:
132
133 typedef typename LocalPrecond::backend_type backend_type;
134 typedef typename backend_type::params backend_params;
135
136 struct params
137 {
138 typename LocalPrecond::params local;
139 typename IterativeSolver::params isolver;
140 typename DirectSolver::params dsolver;
141
142 // Number of deflation vectors.
143 Int32 num_def_vec = 0;
144
145 // Value of deflation vector at the given row and column.
146 std::function<double(ptrdiff_t, unsigned)> def_vec;
147
148 params() {}
149
150 params(const PropertyTree& p)
151 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, local)
152 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, isolver)
153 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, dsolver)
154 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, num_def_vec)
155 {
156 void* ptr = 0;
157 ptr = p.get("def_vec", ptr);
158
159 precondition(ptr, "Error in subdomain_deflation parameters: def_vec is not set");
160
161 def_vec = *static_cast<std::function<double(ptrdiff_t, unsigned)>*>(ptr);
162
163 p.check_params({ "local", "isolver", "dsolver", "num_def_vec", "def_vec" });
164 }
165
166 void get(PropertyTree& p, const std::string& path) const
167 {
168 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, local);
169 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, isolver);
170 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, dsolver);
171 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, num_def_vec);
172 }
173 };
174
175 typedef typename backend_type::value_type value_type;
176 typedef typename math::scalar_of<value_type>::type scalar_type;
177 typedef typename backend_type::matrix bmatrix;
178 using col_type = backend_type::col_type;
179 using ptr_type = backend_type::ptr_type;
180 typedef typename backend_type::vector vector;
181 typedef DistributedMatrix<backend_type> matrix;
182
183 template <class Matrix>
184 DistributedSubDomainDeflation(mpi_communicator comm,
185 const Matrix& Astrip,
186 const params& prm = params(),
187 const backend_params& bprm = backend_params())
188 : comm(comm)
189 , nrows(backend::nbRow(Astrip))
190 , ndv(prm.num_def_vec)
191 , dv_start(comm.size + 1, 0)
192 , Z(ndv)
193 , q(backend_type::create_vector(nrows, bprm))
194 , S(nrows, prm.isolver, bprm, DistributedInnerProduct(comm))
195 {
196 A = std::make_shared<matrix>(comm, Astrip, nrows);
197 init(prm, bprm);
198 }
199
201 std::shared_ptr<matrix> A,
202 const params& prm = params(),
203 const backend_params& bprm = backend_params())
204 : comm(comm)
205 , nrows(A->loc_rows())
206 , ndv(prm.num_def_vec)
207 , A(A)
208 , dv_start(comm.size + 1, 0)
209 , Z(ndv)
210 , q(backend_type::create_vector(nrows, bprm))
211 , S(nrows, prm.isolver, bprm, DistributedInnerProduct(comm))
212 {
213 init(prm, bprm);
214 }
215
216 void init(const params& prm = params(),
217 const backend_params& bprm = backend_params())
218 {
219 ARCCORE_ALINA_TIC("setup deflation");
220 using build_matrix = CSRMatrix<value_type, col_type, ptr_type>;
221
222 // Lets see how many deflation vectors are there.
223 std::vector<ptrdiff_t> dv_size(comm.size);
224 MPI_Allgather(&ndv, 1, mpi_datatype<ptrdiff_t>(), &dv_size[0], 1, mpi_datatype<ptrdiff_t>(), comm);
225
226 std::partial_sum(dv_size.begin(), dv_size.end(), dv_start.begin() + 1);
227 nz = dv_start.back();
228
229 df.resize(ndv);
230 dx.resize(ndv);
231 dd = backend_type::create_vector(ndv, bprm);
232
233 auto az_loc = std::make_shared<build_matrix>();
234 auto az_rem = std::make_shared<build_matrix>();
235
236 auto a_loc = A->local();
237 auto a_rem = A->remote();
238
239 const CommunicationPattern<backend_type>& Acp = A->cpat();
240
241 // Fill deflation vectors.
242 ARCCORE_ALINA_TIC("copy deflation vectors");
243 {
244 std::vector<value_type> z(nrows);
245 for (int j = 0; j < ndv; ++j) {
246 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
247 for (ptrdiff_t i = begin; i < (begin + size); ++i)
248 z[i] = prm.def_vec(i, j);
249 });
250 Z[j] = backend_type::copy_vector(z, bprm);
251 }
252 }
253 ARCCORE_ALINA_TOC("copy deflation vectors");
254
255 ARCCORE_ALINA_TIC("first pass");
256 az_loc->set_size(nrows, ndv, true);
257 az_loc->set_nonzeros(nrows * dv_size[comm.rank]);
258 az_rem->set_size(nrows, 0, true);
259 // 1. Build local part of AZ matrix.
260 // 2. Count remote nonzeros
261 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
262 std::vector<ptrdiff_t> marker(Acp.recv.nbr.size(), -1);
263
264 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
265 ptrdiff_t az_loc_head = i * ndv;
266 az_loc->ptr[i + 1] = az_loc_head + ndv;
267
268 for (ptrdiff_t j = 0; j < ndv; ++j) {
269 az_loc->col[az_loc_head + j] = j;
270 az_loc->val[az_loc_head + j] = math::zero<value_type>();
271 }
272
273 for (ptrdiff_t j = a_loc->ptr[i], e = a_loc->ptr[i + 1]; j < e; ++j) {
274 ptrdiff_t c = a_loc->col[j];
275 value_type v = a_loc->val[j];
276
277 for (ptrdiff_t j = 0; j < ndv; ++j)
278 az_loc->val[az_loc_head + j] += v * prm.def_vec(c, j);
279 }
280
281 for (ptrdiff_t j = a_rem->ptr[i], e = a_rem->ptr[i + 1]; j < e; ++j) {
282 int d = Acp.domain(a_rem->col[j]);
283
284 if (marker[d] != i) {
285 marker[d] = i;
286 az_rem->ptr[i + 1] += dv_size[d];
287 }
288 }
289 }
290 });
291
292 az_rem->set_nonzeros(az_rem->scan_row_sizes());
293 ARCCORE_ALINA_TOC("first pass");
294
295 // Create local preconditioner.
296 ARCCORE_ALINA_TIC("local preconditioner");
297 P = std::make_shared<LocalPrecond>(*a_loc, prm.local, bprm);
298 ARCCORE_ALINA_TOC("local preconditioner");
299
300 A->set_local(P->system_matrix_ptr());
301 A->move_to_backend(bprm);
302
303 ARCCORE_ALINA_TIC("remote(A*Z)");
304 /* Construct remote part of AZ */
305 // Exchange deflation vectors
306 std::vector<ptrdiff_t> zrecv_ptr(Acp.recv.nbr.size() + 1, 0);
307 std::vector<ptrdiff_t> zcol_ptr;
308 zcol_ptr.reserve(Acp.recv.count() + 1);
309 zcol_ptr.push_back(0);
310
311 for (size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
312 ptrdiff_t ncols = Acp.recv.ptr[i + 1] - Acp.recv.ptr[i];
313 ptrdiff_t nvecs = dv_size[Acp.recv.nbr[i]];
314 ptrdiff_t size = nvecs * ncols;
315 zrecv_ptr[i + 1] = zrecv_ptr[i] + size;
316
317 for (ptrdiff_t j = 0; j < ncols; ++j)
318 zcol_ptr.push_back(zcol_ptr.back() + nvecs);
319 }
320
321 std::vector<value_type> zrecv(zrecv_ptr.back());
322 std::vector<value_type> zsend(Acp.send.count() * ndv);
323
324 for (size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
325 ptrdiff_t begin = zrecv_ptr[i];
326 ptrdiff_t size = zrecv_ptr[i + 1] - begin;
327
328 Acp.recv.req[i] = comm.doIReceive(&zrecv[begin], size, Acp.recv.nbr[i], tag_exc_vals);
329 }
330
331 for (size_t i = 0, k = 0; i < Acp.send.count(); ++i)
332 for (ptrdiff_t j = 0; j < ndv; ++j, ++k)
333 zsend[k] = prm.def_vec(Acp.send.col[i], j);
334
335 for (size_t i = 0; i < Acp.send.nbr.size(); ++i)
336 Acp.send.req[i] = comm.doISend(&zsend[ndv * Acp.send.ptr[i]], ndv * (Acp.send.ptr[i + 1] - Acp.send.ptr[i]),
337 Acp.send.nbr[i], tag_exc_vals);
338
339 comm.waitAll(Acp.recv.req);
340 comm.waitAll(Acp.send.req);
341
342 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
343 std::vector<ptrdiff_t> marker(nz, -1);
344
345 // AZ_rem = Arem * Z
346 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
347 ptrdiff_t az_rem_head = az_rem->ptr[i];
348 ptrdiff_t az_rem_tail = az_rem_head;
349
350 for (auto a = backend::row_begin(*a_rem, i); a; ++a) {
351 ptrdiff_t c = a.col();
352 value_type v = a.value();
353
354 // Domain the column belongs to
355 ptrdiff_t d = Acp.recv.nbr[std::upper_bound(Acp.recv.ptr.begin(), Acp.recv.ptr.end(), c) -
356 Acp.recv.ptr.begin() - 1];
357
358 value_type* zval = &zrecv[zcol_ptr[c]];
359 for (ptrdiff_t j = 0, k = dv_start[d]; j < dv_size[d]; ++j, ++k) {
360 if (marker[k] < az_rem_head) {
361 marker[k] = az_rem_tail;
362 az_rem->col[az_rem_tail] = k;
363 az_rem->val[az_rem_tail] = v * zval[j];
364 ++az_rem_tail;
365 }
366 else {
367 az_rem->val[marker[k]] += v * zval[j];
368 }
369 }
370 }
371 }
372 });
373 ARCCORE_ALINA_TOC("remote(A*Z)");
374
375 /* Build solver for the deflated matrix E. */
376 ARCCORE_ALINA_TIC("assemble E");
377
378 // Count nonzeros in E.
379 std::vector<int> nbrs; // processes we are talking to
380 nbrs.reserve(1 + Acp.send.nbr.size() + Acp.recv.nbr.size());
381 std::set_union(
382 Acp.send.nbr.begin(), Acp.send.nbr.end(),
383 Acp.recv.nbr.begin(), Acp.recv.nbr.end(),
384 std::back_inserter(nbrs));
385 nbrs.push_back(comm.rank);
386
387 build_matrix E;
388 E.set_size(ndv, nz, false);
389
390 {
391 ptrdiff_t nnz = 0;
392 for (int j : nbrs)
393 nnz += dv_size[j];
394 for (int k = 0; k <= ndv; ++k)
395 E.ptr[k] = k * nnz;
396 }
397 E.setNbNonZero(E.ptr[ndv]);
398 E.set_nonzeros(E.ptr[ndv]);
399
400 // Build local strip of E.
401 int nthreads = ConcurrencyBase::maxAllowedThread();
402 multi_array<value_type, 3> erow(nthreads, ndv, nz);
403 std::fill_n(erow.data(), erow.size(), 0);
404
405 {
406 ptrdiff_t dv_offset = dv_start[comm.rank];
407 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
408 const int tid = TaskFactory::currentTaskThreadIndex();
409 std::vector<value_type> z(ndv);
410
411 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
412 for (ptrdiff_t j = 0; j < ndv; ++j)
413 z[j] = prm.def_vec(i, j);
414
415 for (ptrdiff_t k = az_loc->ptr[i], e = az_loc->ptr[i + 1]; k < e; ++k) {
416 ptrdiff_t c = az_loc->col[k] + dv_offset;
417 value_type v = az_loc->val[k];
418
419 for (ptrdiff_t j = 0; j < ndv; ++j)
420 erow(tid, j, c) += v * z[j];
421 }
422
423 for (ptrdiff_t k = az_rem->ptr[i], e = az_rem->ptr[i + 1]; k < e; ++k) {
424 ptrdiff_t c = az_rem->col[k];
425 value_type v = az_rem->val[k];
426
427 for (ptrdiff_t j = 0; j < ndv; ++j)
428 erow(tid, j, c) += v * z[j];
429 }
430 }
431 });
432 }
433
434 for (int i = 0; i < ndv; ++i) {
435 int row_head = E.ptr[i];
436 for (int j : nbrs) {
437 for (int k = 0; k < dv_size[j]; ++k) {
438 int c = dv_start[j] + k;
439 value_type v = math::zero<value_type>();
440 for (int t = 0; t < nthreads; ++t)
441 v += erow(t, i, c);
442
443 E.col[row_head] = c;
444 E.val[row_head] = v;
445
446 ++row_head;
447 }
448 }
449 }
450 ARCCORE_ALINA_TOC("assemble E");
451
452 ARCCORE_ALINA_TIC("factorize E");
453 this->E = std::make_shared<DirectSolver>(comm, E, prm.dsolver);
454 ARCCORE_ALINA_TOC("factorize E");
455
456 ARCCORE_ALINA_TIC("finish(A*Z)");
457 AZ = std::make_shared<matrix>(comm, az_loc, az_rem);
458 AZ->move_to_backend(bprm);
459 ARCCORE_ALINA_TOC("finish(A*Z)");
460 ARCCORE_ALINA_TOC("setup deflation");
461 }
462
463 template <class Vec1, class Vec2>
464 void apply(const Vec1& rhs, Vec2&& x) const
465 {
466 size_t iters;
467 double error;
468 backend::clear(x);
469 std::tie(iters, error) = (*this)(rhs, x);
470 }
471
472 std::shared_ptr<matrix> system_matrix_ptr() const
473 {
474 return A;
475 }
476
477 const matrix& system_matrix() const
478 {
479 return *A;
480 }
481
482 template <class Matrix, class Vec1, class Vec2>
483 std::tuple<size_t, value_type> operator()(
484 const Matrix& A, const Vec1& rhs, Vec2&& x) const
485 {
486 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*this, A), *P, rhs, x);
487 postprocess(rhs, x);
488 return cnv;
489 }
490
491 template <class Vec1, class Vec2>
492 std::tuple<size_t, value_type>
493 operator()(const Vec1& rhs, Vec2&& x) const
494 {
495 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*this, *A), *P, rhs, x);
496 postprocess(rhs, x);
497 return cnv;
498 }
499
500 size_t size() const
501 {
502 return nrows;
503 }
504
505 template <class Vector>
506 void project(Vector& x) const
507 {
508 const auto one = math::identity<scalar_type>();
509
510 ARCCORE_ALINA_TIC("project");
511
512 ARCCORE_ALINA_TIC("local inner product");
513 for (ptrdiff_t j = 0; j < ndv; ++j)
514 df[j] = backend::inner_product(x, *Z[j]);
515 ARCCORE_ALINA_TOC("local inner product");
516
517 coarse_solve(df, dx);
518
519 ARCCORE_ALINA_TIC("spmv");
520 backend::copy(dx, *dd);
521 backend::spmv(-one, *AZ, *dd, one, x);
522 ARCCORE_ALINA_TOC("spmv");
523
524 ARCCORE_ALINA_TOC("project");
525 }
526
527 private:
528
529 static const int tag_exc_vals = 2011;
530 static const int tag_exc_dmat = 3011;
531 static const int tag_exc_dvec = 4011;
532 static const int tag_exc_lnnz = 5011;
533
534 mpi_communicator comm;
535 ptrdiff_t nrows, ndv, nz;
536
537 std::shared_ptr<matrix> A, AZ;
538 std::shared_ptr<LocalPrecond> P;
539
540 mutable std::vector<value_type> df, dx;
541 std::vector<ptrdiff_t> dv_start;
542
543 std::vector<std::shared_ptr<vector>> Z;
544
545 std::shared_ptr<DirectSolver> E;
546
547 std::shared_ptr<vector> q;
548 std::shared_ptr<vector> dd;
549
550 IterativeSolver S;
551
552 void coarse_solve(std::vector<value_type>& f, std::vector<value_type>& x) const
553 {
554 ARCCORE_ALINA_TIC("coarse solve");
555 (*E)(f, x);
556 ARCCORE_ALINA_TOC("coarse solve");
557 }
558
559 template <class Vec1, class Vec2>
560 void postprocess(const Vec1& rhs, Vec2& x) const
561 {
562 const auto one = math::identity<scalar_type>();
563
564 ARCCORE_ALINA_TIC("postprocess");
565
566 // q = rhs - Ax
567 backend::copy(rhs, *q);
568 backend::spmv(-one, *A, x, one, *q);
569
570 // df = transp(Z) * (rhs - Ax)
571 ARCCORE_ALINA_TIC("local inner product");
572 for (ptrdiff_t j = 0; j < ndv; ++j)
573 df[j] = backend::inner_product(*q, *Z[j]);
574 ARCCORE_ALINA_TOC("local inner product");
575
576 // dx = inv(E) * df
577 coarse_solve(df, dx);
578
579 // x += Z * dx
580 backend::lin_comb(ndv, dx, Z, one, x);
581
582 ARCCORE_ALINA_TOC("postprocess");
583 }
584};
585
586/*---------------------------------------------------------------------------*/
587/*---------------------------------------------------------------------------*/
588
589} // namespace Arcane::Alina
590
591/*---------------------------------------------------------------------------*/
592/*---------------------------------------------------------------------------*/
593
594namespace Arcane::Alina::backend
595{
596
597template <class SDD, class Matrix,
598 class Alpha, class Beta, class Vec1, class Vec2>
599struct spmv_impl<Alpha, sdd_projected_matrix<SDD, Matrix>, Vec1, Beta, Vec2>
600{
602
603 static void apply(Alpha alpha, const M& A, const Vec1& x, Beta beta, Vec2& y)
604 {
605 A.mul(alpha, x, beta, y);
606 }
607};
608
609template <class SDD, class Matrix, class Vec1, class Vec2, class Vec3>
610struct residual_impl<sdd_projected_matrix<SDD, Matrix>, Vec1, Vec2, Vec3>
611{
613
614 static void apply(const Vec1& rhs, const M& A, const Vec2& x, Vec3& r)
615 {
616 A.residual(rhs, x, r);
617 }
618};
619
620/*---------------------------------------------------------------------------*/
621/*---------------------------------------------------------------------------*/
622
623} // namespace Arcane::Alina::backend
624
625/*---------------------------------------------------------------------------*/
626/*---------------------------------------------------------------------------*/
627
628#endif
Distributed Matrix using message passing.
Distributed solver based on subdomain deflation.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
Informations d'exécution d'une boucle.
Matrix class, to be used by user.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
Inner product for distributed vectors.
Implementation for residual error compuatation.
Implementation for matrix-vector product.
constant_deflation(int block_size=1)
Constructor.
Convenience wrapper around MPI_Comm.