Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
DistributedSubDomainDeflation.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* DistributedSubDomainDeflation.h (C) 2000-2026 */
9/* */
10/* Distributed solver based on subdomain deflation. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_DISTRIBUTEDSUBDOMAINDEFLATION_H
13#define ARCCORE_ALINA_DISTRIBUTEDSUBDOMAINDEFLATION_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26#include "arccore/alina/BuiltinBackend.h"
27#include "arccore/alina/Adapters.h"
28#include "arccore/alina/MessagePassingUtils.h"
29#include "arccore/alina/DistributedSkylineLUDirectSolver.h"
30#include "arccore/alina/DistributedInnerProduct.h"
31#include "arccore/alina/DistributedMatrix.h"
32
33#include <vector>
34#include <algorithm>
35#include <numeric>
36#include <memory>
37#include <functional>
38
39/*---------------------------------------------------------------------------*/
40/*---------------------------------------------------------------------------*/
41
42namespace Arcane::Alina
43{
44
45/*---------------------------------------------------------------------------*/
46/*---------------------------------------------------------------------------*/
47
50{
51 const int block_size;
57 constant_deflation(int block_size = 1)
58 : block_size(block_size)
59 {}
60
61 int dim() const
62 {
63 return block_size;
64 }
65
66 int operator()(ptrdiff_t row, int j) const
67 {
68 return row % block_size == j;
69 }
70};
71
72/*---------------------------------------------------------------------------*/
73/*---------------------------------------------------------------------------*/
74
75template <class SDD, class Matrix>
76struct sdd_projected_matrix
77{
78 typedef typename SDD::value_type value_type;
79
80 const SDD& S;
81 const Matrix& A;
82
83 sdd_projected_matrix(const SDD& S, const Matrix& A)
84 : S(S)
85 , A(A)
86 {}
87
88 template <class T, class Vec1, class Vec2>
89 void mul(T alpha, const Vec1& x, T beta, Vec2& y) const
90 {
91 ARCCORE_ALINA_TIC("top/spmv");
92 backend::spmv(alpha, A, x, beta, y);
93 ARCCORE_ALINA_TOC("top/spmv");
94
95 S.project(y);
96 }
97
98 template <class Vec1, class Vec2, class Vec3>
99 void residual(const Vec1& f, const Vec2& x, Vec3& r) const
100 {
101 ARCCORE_ALINA_TIC("top/residual");
102 backend::residual(f, A, x, r);
103 ARCCORE_ALINA_TOC("top/residual");
104
105 S.project(r);
106 }
107};
108
109/*---------------------------------------------------------------------------*/
110/*---------------------------------------------------------------------------*/
111
112template <class SDD, class Matrix>
113sdd_projected_matrix<SDD, Matrix> make_sdd_projected_matrix(const SDD& S, const Matrix& A)
114{
116}
117
118/*---------------------------------------------------------------------------*/
119/*---------------------------------------------------------------------------*/
120
126template <class LocalPrecond,
127 class IterativeSolver,
128 class DirectSolver = DistributedSkylineLUDirectSolver<typename LocalPrecond::backend_type::value_type>>
129class DistributedSubDomainDeflation
130{
131 public:
132
133 typedef typename LocalPrecond::backend_type backend_type;
134 typedef typename backend_type::params backend_params;
135
136 struct params
137 {
138 typename LocalPrecond::params local;
139 typename IterativeSolver::params isolver;
140 typename DirectSolver::params dsolver;
141
142 // Number of deflation vectors.
143 Int32 num_def_vec = 0;
144
145 // Value of deflation vector at the given row and column.
146 std::function<double(ptrdiff_t, unsigned)> def_vec;
147
148 params() {}
149
150 params(const PropertyTree& p)
151 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, local)
152 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, isolver)
153 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, dsolver)
154 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, num_def_vec)
155 {
156 void* ptr = 0;
157 ptr = p.get("def_vec", ptr);
158
159 precondition(ptr, "Error in subdomain_deflation parameters: def_vec is not set");
160
161 def_vec = *static_cast<std::function<double(ptrdiff_t, unsigned)>*>(ptr);
162
163 p.check_params({ "local", "isolver", "dsolver", "num_def_vec", "def_vec" });
164 }
165
166 void get(PropertyTree& p, const std::string& path) const
167 {
168 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, local);
169 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, isolver);
170 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, dsolver);
171 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, num_def_vec);
172 }
173 };
174
175 typedef typename backend_type::value_type value_type;
176 typedef typename math::scalar_of<value_type>::type scalar_type;
177 typedef typename backend_type::matrix bmatrix;
178 typedef typename backend_type::vector vector;
179 typedef DistributedMatrix<backend_type> matrix;
180
181 template <class Matrix>
182 DistributedSubDomainDeflation(mpi_communicator comm,
183 const Matrix& Astrip,
184 const params& prm = params(),
185 const backend_params& bprm = backend_params())
186 : comm(comm)
187 , nrows(backend::nbRow(Astrip))
188 , ndv(prm.num_def_vec)
189 , dv_start(comm.size + 1, 0)
190 , Z(ndv)
191 , q(backend_type::create_vector(nrows, bprm))
192 , S(nrows, prm.isolver, bprm, DistributedInnerProduct(comm))
193 {
194 A = std::make_shared<matrix>(comm, Astrip, nrows);
195 init(prm, bprm);
196 }
197
199 std::shared_ptr<matrix> A,
200 const params& prm = params(),
201 const backend_params& bprm = backend_params())
202 : comm(comm)
203 , nrows(A->loc_rows())
204 , ndv(prm.num_def_vec)
205 , A(A)
206 , dv_start(comm.size + 1, 0)
207 , Z(ndv)
208 , q(backend_type::create_vector(nrows, bprm))
209 , S(nrows, prm.isolver, bprm, DistributedInnerProduct(comm))
210 {
211 init(prm, bprm);
212 }
213
214 void init(const params& prm = params(),
215 const backend_params& bprm = backend_params())
216 {
217 ARCCORE_ALINA_TIC("setup deflation");
218 typedef CSRMatrix<value_type, ptrdiff_t> build_matrix;
219
220 // Lets see how many deflation vectors are there.
221 std::vector<ptrdiff_t> dv_size(comm.size);
222 MPI_Allgather(&ndv, 1, mpi_datatype<ptrdiff_t>(), &dv_size[0], 1, mpi_datatype<ptrdiff_t>(), comm);
223
224 std::partial_sum(dv_size.begin(), dv_size.end(), dv_start.begin() + 1);
225 nz = dv_start.back();
226
227 df.resize(ndv);
228 dx.resize(ndv);
229 dd = backend_type::create_vector(ndv, bprm);
230
231 auto az_loc = std::make_shared<build_matrix>();
232 auto az_rem = std::make_shared<build_matrix>();
233
234 auto a_loc = A->local();
235 auto a_rem = A->remote();
236
237 const CommunicationPattern<backend_type>& Acp = A->cpat();
238
239 // Fill deflation vectors.
240 ARCCORE_ALINA_TIC("copy deflation vectors");
241 {
242 std::vector<value_type> z(nrows);
243 for (int j = 0; j < ndv; ++j) {
244 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
245 for (ptrdiff_t i = begin; i < (begin + size); ++i)
246 z[i] = prm.def_vec(i, j);
247 });
248 Z[j] = backend_type::copy_vector(z, bprm);
249 }
250 }
251 ARCCORE_ALINA_TOC("copy deflation vectors");
252
253 ARCCORE_ALINA_TIC("first pass");
254 az_loc->set_size(nrows, ndv, true);
255 az_loc->set_nonzeros(nrows * dv_size[comm.rank]);
256 az_rem->set_size(nrows, 0, true);
257 // 1. Build local part of AZ matrix.
258 // 2. Count remote nonzeros
259 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
260 std::vector<ptrdiff_t> marker(Acp.recv.nbr.size(), -1);
261
262 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
263 ptrdiff_t az_loc_head = i * ndv;
264 az_loc->ptr[i + 1] = az_loc_head + ndv;
265
266 for (ptrdiff_t j = 0; j < ndv; ++j) {
267 az_loc->col[az_loc_head + j] = j;
268 az_loc->val[az_loc_head + j] = math::zero<value_type>();
269 }
270
271 for (ptrdiff_t j = a_loc->ptr[i], e = a_loc->ptr[i + 1]; j < e; ++j) {
272 ptrdiff_t c = a_loc->col[j];
273 value_type v = a_loc->val[j];
274
275 for (ptrdiff_t j = 0; j < ndv; ++j)
276 az_loc->val[az_loc_head + j] += v * prm.def_vec(c, j);
277 }
278
279 for (ptrdiff_t j = a_rem->ptr[i], e = a_rem->ptr[i + 1]; j < e; ++j) {
280 int d = Acp.domain(a_rem->col[j]);
281
282 if (marker[d] != i) {
283 marker[d] = i;
284 az_rem->ptr[i + 1] += dv_size[d];
285 }
286 }
287 }
288 });
289
290 az_rem->set_nonzeros(az_rem->scan_row_sizes());
291 ARCCORE_ALINA_TOC("first pass");
292
293 // Create local preconditioner.
294 ARCCORE_ALINA_TIC("local preconditioner");
295 P = std::make_shared<LocalPrecond>(*a_loc, prm.local, bprm);
296 ARCCORE_ALINA_TOC("local preconditioner");
297
298 A->set_local(P->system_matrix_ptr());
299 A->move_to_backend(bprm);
300
301 ARCCORE_ALINA_TIC("remote(A*Z)");
302 /* Construct remote part of AZ */
303 // Exchange deflation vectors
304 std::vector<ptrdiff_t> zrecv_ptr(Acp.recv.nbr.size() + 1, 0);
305 std::vector<ptrdiff_t> zcol_ptr;
306 zcol_ptr.reserve(Acp.recv.count() + 1);
307 zcol_ptr.push_back(0);
308
309 for (size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
310 ptrdiff_t ncols = Acp.recv.ptr[i + 1] - Acp.recv.ptr[i];
311 ptrdiff_t nvecs = dv_size[Acp.recv.nbr[i]];
312 ptrdiff_t size = nvecs * ncols;
313 zrecv_ptr[i + 1] = zrecv_ptr[i] + size;
314
315 for (ptrdiff_t j = 0; j < ncols; ++j)
316 zcol_ptr.push_back(zcol_ptr.back() + nvecs);
317 }
318
319 std::vector<value_type> zrecv(zrecv_ptr.back());
320 std::vector<value_type> zsend(Acp.send.count() * ndv);
321
322 for (size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
323 ptrdiff_t begin = zrecv_ptr[i];
324 ptrdiff_t size = zrecv_ptr[i + 1] - begin;
325
326 Acp.recv.req[i] = comm.doIReceive(&zrecv[begin], size, Acp.recv.nbr[i], tag_exc_vals);
327 }
328
329 for (size_t i = 0, k = 0; i < Acp.send.count(); ++i)
330 for (ptrdiff_t j = 0; j < ndv; ++j, ++k)
331 zsend[k] = prm.def_vec(Acp.send.col[i], j);
332
333 for (size_t i = 0; i < Acp.send.nbr.size(); ++i)
334 Acp.send.req[i] = comm.doISend(&zsend[ndv * Acp.send.ptr[i]], ndv * (Acp.send.ptr[i + 1] - Acp.send.ptr[i]),
335 Acp.send.nbr[i], tag_exc_vals);
336
337 comm.waitAll(Acp.recv.req);
338 comm.waitAll(Acp.send.req);
339
340 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
341 std::vector<ptrdiff_t> marker(nz, -1);
342
343 // AZ_rem = Arem * Z
344 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
345 ptrdiff_t az_rem_head = az_rem->ptr[i];
346 ptrdiff_t az_rem_tail = az_rem_head;
347
348 for (auto a = backend::row_begin(*a_rem, i); a; ++a) {
349 ptrdiff_t c = a.col();
350 value_type v = a.value();
351
352 // Domain the column belongs to
353 ptrdiff_t d = Acp.recv.nbr[std::upper_bound(Acp.recv.ptr.begin(), Acp.recv.ptr.end(), c) -
354 Acp.recv.ptr.begin() - 1];
355
356 value_type* zval = &zrecv[zcol_ptr[c]];
357 for (ptrdiff_t j = 0, k = dv_start[d]; j < dv_size[d]; ++j, ++k) {
358 if (marker[k] < az_rem_head) {
359 marker[k] = az_rem_tail;
360 az_rem->col[az_rem_tail] = k;
361 az_rem->val[az_rem_tail] = v * zval[j];
362 ++az_rem_tail;
363 }
364 else {
365 az_rem->val[marker[k]] += v * zval[j];
366 }
367 }
368 }
369 }
370 });
371 ARCCORE_ALINA_TOC("remote(A*Z)");
372
373 /* Build solver for the deflated matrix E. */
374 ARCCORE_ALINA_TIC("assemble E");
375
376 // Count nonzeros in E.
377 std::vector<int> nbrs; // processes we are talking to
378 nbrs.reserve(1 + Acp.send.nbr.size() + Acp.recv.nbr.size());
379 std::set_union(
380 Acp.send.nbr.begin(), Acp.send.nbr.end(),
381 Acp.recv.nbr.begin(), Acp.recv.nbr.end(),
382 std::back_inserter(nbrs));
383 nbrs.push_back(comm.rank);
384
385 build_matrix E;
386 E.set_size(ndv, nz, false);
387
388 {
389 ptrdiff_t nnz = 0;
390 for (int j : nbrs)
391 nnz += dv_size[j];
392 for (int k = 0; k <= ndv; ++k)
393 E.ptr[k] = k * nnz;
394 }
395 E.setNbNonZero(E.ptr[ndv]);
396 E.set_nonzeros(E.ptr[ndv]);
397
398 // Build local strip of E.
399 int nthreads = ConcurrencyBase::maxAllowedThread();
400 multi_array<value_type, 3> erow(nthreads, ndv, nz);
401 std::fill_n(erow.data(), erow.size(), 0);
402
403 {
404 ptrdiff_t dv_offset = dv_start[comm.rank];
405 arccoreParallelFor(0, nrows, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
406 const int tid = TaskFactory::currentTaskThreadIndex();
407 std::vector<value_type> z(ndv);
408
409 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
410 for (ptrdiff_t j = 0; j < ndv; ++j)
411 z[j] = prm.def_vec(i, j);
412
413 for (ptrdiff_t k = az_loc->ptr[i], e = az_loc->ptr[i + 1]; k < e; ++k) {
414 ptrdiff_t c = az_loc->col[k] + dv_offset;
415 value_type v = az_loc->val[k];
416
417 for (ptrdiff_t j = 0; j < ndv; ++j)
418 erow(tid, j, c) += v * z[j];
419 }
420
421 for (ptrdiff_t k = az_rem->ptr[i], e = az_rem->ptr[i + 1]; k < e; ++k) {
422 ptrdiff_t c = az_rem->col[k];
423 value_type v = az_rem->val[k];
424
425 for (ptrdiff_t j = 0; j < ndv; ++j)
426 erow(tid, j, c) += v * z[j];
427 }
428 }
429 });
430 }
431
432 for (int i = 0; i < ndv; ++i) {
433 int row_head = E.ptr[i];
434 for (int j : nbrs) {
435 for (int k = 0; k < dv_size[j]; ++k) {
436 int c = dv_start[j] + k;
437 value_type v = math::zero<value_type>();
438 for (int t = 0; t < nthreads; ++t)
439 v += erow(t, i, c);
440
441 E.col[row_head] = c;
442 E.val[row_head] = v;
443
444 ++row_head;
445 }
446 }
447 }
448 ARCCORE_ALINA_TOC("assemble E");
449
450 ARCCORE_ALINA_TIC("factorize E");
451 this->E = std::make_shared<DirectSolver>(comm, E, prm.dsolver);
452 ARCCORE_ALINA_TOC("factorize E");
453
454 ARCCORE_ALINA_TIC("finish(A*Z)");
455 AZ = std::make_shared<matrix>(comm, az_loc, az_rem);
456 AZ->move_to_backend(bprm);
457 ARCCORE_ALINA_TOC("finish(A*Z)");
458 ARCCORE_ALINA_TOC("setup deflation");
459 }
460
461 template <class Vec1, class Vec2>
462 void apply(const Vec1& rhs, Vec2&& x) const
463 {
464 size_t iters;
465 double error;
466 backend::clear(x);
467 std::tie(iters, error) = (*this)(rhs, x);
468 }
469
470 std::shared_ptr<matrix> system_matrix_ptr() const
471 {
472 return A;
473 }
474
475 const matrix& system_matrix() const
476 {
477 return *A;
478 }
479
480 template <class Matrix, class Vec1, class Vec2>
481 std::tuple<size_t, value_type> operator()(
482 const Matrix& A, const Vec1& rhs, Vec2&& x) const
483 {
484 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*this, A), *P, rhs, x);
485 postprocess(rhs, x);
486 return cnv;
487 }
488
489 template <class Vec1, class Vec2>
490 std::tuple<size_t, value_type>
491 operator()(const Vec1& rhs, Vec2&& x) const
492 {
493 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*this, *A), *P, rhs, x);
494 postprocess(rhs, x);
495 return cnv;
496 }
497
498 size_t size() const
499 {
500 return nrows;
501 }
502
503 template <class Vector>
504 void project(Vector& x) const
505 {
506 const auto one = math::identity<scalar_type>();
507
508 ARCCORE_ALINA_TIC("project");
509
510 ARCCORE_ALINA_TIC("local inner product");
511 for (ptrdiff_t j = 0; j < ndv; ++j)
512 df[j] = backend::inner_product(x, *Z[j]);
513 ARCCORE_ALINA_TOC("local inner product");
514
515 coarse_solve(df, dx);
516
517 ARCCORE_ALINA_TIC("spmv");
518 backend::copy(dx, *dd);
519 backend::spmv(-one, *AZ, *dd, one, x);
520 ARCCORE_ALINA_TOC("spmv");
521
522 ARCCORE_ALINA_TOC("project");
523 }
524
525 private:
526
527 static const int tag_exc_vals = 2011;
528 static const int tag_exc_dmat = 3011;
529 static const int tag_exc_dvec = 4011;
530 static const int tag_exc_lnnz = 5011;
531
532 mpi_communicator comm;
533 ptrdiff_t nrows, ndv, nz;
534
535 std::shared_ptr<matrix> A, AZ;
536 std::shared_ptr<LocalPrecond> P;
537
538 mutable std::vector<value_type> df, dx;
539 std::vector<ptrdiff_t> dv_start;
540
541 std::vector<std::shared_ptr<vector>> Z;
542
543 std::shared_ptr<DirectSolver> E;
544
545 std::shared_ptr<vector> q;
546 std::shared_ptr<vector> dd;
547
548 IterativeSolver S;
549
550 void coarse_solve(std::vector<value_type>& f, std::vector<value_type>& x) const
551 {
552 ARCCORE_ALINA_TIC("coarse solve");
553 (*E)(f, x);
554 ARCCORE_ALINA_TOC("coarse solve");
555 }
556
557 template <class Vec1, class Vec2>
558 void postprocess(const Vec1& rhs, Vec2& x) const
559 {
560 const auto one = math::identity<scalar_type>();
561
562 ARCCORE_ALINA_TIC("postprocess");
563
564 // q = rhs - Ax
565 backend::copy(rhs, *q);
566 backend::spmv(-one, *A, x, one, *q);
567
568 // df = transp(Z) * (rhs - Ax)
569 ARCCORE_ALINA_TIC("local inner product");
570 for (ptrdiff_t j = 0; j < ndv; ++j)
571 df[j] = backend::inner_product(*q, *Z[j]);
572 ARCCORE_ALINA_TOC("local inner product");
573
574 // dx = inv(E) * df
575 coarse_solve(df, dx);
576
577 // x += Z * dx
578 backend::lin_comb(ndv, dx, Z, one, x);
579
580 ARCCORE_ALINA_TOC("postprocess");
581 }
582};
583
584/*---------------------------------------------------------------------------*/
585/*---------------------------------------------------------------------------*/
586
587} // namespace Arcane::Alina
588
589/*---------------------------------------------------------------------------*/
590/*---------------------------------------------------------------------------*/
591
592namespace Arcane::Alina::backend
593{
594
595template <class SDD, class Matrix,
596 class Alpha, class Beta, class Vec1, class Vec2>
597struct spmv_impl<Alpha, sdd_projected_matrix<SDD, Matrix>, Vec1, Beta, Vec2>
598{
600
601 static void apply(Alpha alpha, const M& A, const Vec1& x, Beta beta, Vec2& y)
602 {
603 A.mul(alpha, x, beta, y);
604 }
605};
606
607template <class SDD, class Matrix, class Vec1, class Vec2, class Vec3>
608struct residual_impl<sdd_projected_matrix<SDD, Matrix>, Vec1, Vec2, Vec3>
609{
611
612 static void apply(const Vec1& rhs, const M& A, const Vec2& x, Vec3& r)
613 {
614 A.residual(rhs, x, r);
615 }
616};
617
618/*---------------------------------------------------------------------------*/
619/*---------------------------------------------------------------------------*/
620
621} // namespace Arcane::Alina::backend
622
623/*---------------------------------------------------------------------------*/
624/*---------------------------------------------------------------------------*/
625
626#endif
Distributed Matrix using message passing.
Distributed solver based on subdomain deflation.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
Informations d'exécution d'une boucle.
Matrix class, to be used by user.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
Inner product for distributed vectors.
Implementation for residual error compuatation.
Implementation for matrix-vector product.
constant_deflation(int block_size=1)
Constructor.
Convenience wrapper around MPI_Comm.