Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
DistributedSolver.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/*---------------------------------------------------------------------------*/
9/*
10 * This file is based on the work on AMGCL library (version march 2026)
11 * which can be found at https://github.com/ddemidov/amgcl.
12 *
13 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
14 * SPDX-License-Identifier: MIT
15 */
16/*---------------------------------------------------------------------------*/
17/*---------------------------------------------------------------------------*/
18
19// Pour Eigen
20#pragma GCC diagnostic ignored "-Wdeprecated-copy"
21#pragma GCC diagnostic ignored "-Wint-in-bool-context"
22
23#include <iostream>
24#include <vector>
25#include <string>
26
27#include <boost/program_options.hpp>
28#include <boost/preprocessor/seq/for_each.hpp>
29
30#include "arccore/alina/BuiltinBackend.h"
31#include "arccore/alina/StaticMatrix.h"
32#include "arccore/alina/Adapters.h"
33
34#if defined(SOLVER_BACKEND_CUDA)
35# include "arccore/alina/CudaBackend.h"
36# include "arccore/alina/relaxation_cusparse_ilu0.h"
37#else
38# ifndef SOLVER_BACKEND_BUILTIN
39# define SOLVER_BACKEND_BUILTIN
40# endif
41#endif
42
43#include "arccore/alina/MessagePassingUtils.h"
44#include "arccore/alina/DistributedPreconditionedSolver.h"
45#include "arccore/alina/DistributedPreconditioner.h"
46#include "arccore/alina/DistributedSolverRuntime.h"
47
48#include "arccore/alina/IO.h"
49#include "arccore/alina/Profiler.h"
50
51#ifndef ARCCORE_ALINA_BLOCK_SIZES
52# define ARCCORE_ALINA_BLOCK_SIZES (3)(4)
53#endif
54
55using namespace Arcane;
56
57namespace math = Alina::math;
58
59//---------------------------------------------------------------------------
60ptrdiff_t
61assemble_poisson3d(Alina::mpi_communicator comm,
62 ptrdiff_t n, int block_size,
63 std::vector<ptrdiff_t>& ptr,
64 std::vector<ptrdiff_t>& col,
65 std::vector<double>& val,
66 std::vector<double>& rhs)
67{
68 ptrdiff_t n3 = n * n * n;
69
70 ptrdiff_t chunk = (n3 + comm.size - 1) / comm.size;
71 if (chunk % block_size != 0) {
72 chunk += block_size - chunk % block_size;
73 }
74 ptrdiff_t row_beg = std::min(n3, chunk * comm.rank);
75 ptrdiff_t row_end = std::min(n3, row_beg + chunk);
76 chunk = row_end - row_beg;
77
78 ptr.clear();
79 ptr.reserve(chunk + 1);
80 col.clear();
81 col.reserve(chunk * 7);
82 val.clear();
83 val.reserve(chunk * 7);
84
85 rhs.resize(chunk);
86 std::fill(rhs.begin(), rhs.end(), 1.0);
87
88 const double h2i = (n - 1) * (n - 1);
89 ptr.push_back(0);
90
91 for (ptrdiff_t idx = row_beg; idx < row_end; ++idx) {
92 ptrdiff_t k = idx / (n * n);
93 ptrdiff_t j = (idx / n) % n;
94 ptrdiff_t i = idx % n;
95
96 if (k > 0) {
97 col.push_back(idx - n * n);
98 val.push_back(-h2i);
99 }
100
101 if (j > 0) {
102 col.push_back(idx - n);
103 val.push_back(-h2i);
104 }
105
106 if (i > 0) {
107 col.push_back(idx - 1);
108 val.push_back(-h2i);
109 }
110
111 col.push_back(idx);
112 val.push_back(6 * h2i);
113
114 if (i + 1 < n) {
115 col.push_back(idx + 1);
116 val.push_back(-h2i);
117 }
118
119 if (j + 1 < n) {
120 col.push_back(idx + n);
121 val.push_back(-h2i);
122 }
123
124 if (k + 1 < n) {
125 col.push_back(idx + n * n);
126 val.push_back(-h2i);
127 }
128
129 ptr.push_back(col.size());
130 }
131
132 return chunk;
133}
134
135//---------------------------------------------------------------------------
136ptrdiff_t
137read_matrix_market(Alina::mpi_communicator comm,
138 const std::string& A_file, const std::string& rhs_file, int block_size,
139 std::vector<ptrdiff_t>& ptr,
140 std::vector<ptrdiff_t>& col,
141 std::vector<double>& val,
142 std::vector<double>& rhs)
143{
144 Alina::IO::mm_reader A_mm(A_file);
145 ptrdiff_t n = A_mm.rows();
146
147 ptrdiff_t chunk = (n + comm.size - 1) / comm.size;
148 if (chunk % block_size != 0) {
149 chunk += block_size - chunk % block_size;
150 }
151
152 ptrdiff_t row_beg = std::min(n, chunk * comm.rank);
153 ptrdiff_t row_end = std::min(n, row_beg + chunk);
154
155 chunk = row_end - row_beg;
156
157 A_mm(ptr, col, val, row_beg, row_end);
158
159 if (rhs_file.empty()) {
160 rhs.resize(chunk);
161 std::fill(rhs.begin(), rhs.end(), 1.0);
162 }
163 else {
164 Alina::IO::mm_reader rhs_mm(rhs_file);
165 rhs_mm(rhs, row_beg, row_end);
166 }
167
168 return chunk;
169}
170
171//---------------------------------------------------------------------------
172ptrdiff_t
173read_binary(Alina::mpi_communicator comm,
174 const std::string& A_file, const std::string& rhs_file, int block_size,
175 std::vector<ptrdiff_t>& ptr,
176 std::vector<ptrdiff_t>& col,
177 std::vector<double>& val,
178 std::vector<double>& rhs)
179{
180 ptrdiff_t n = Alina::IO::crs_size<ptrdiff_t>(A_file);
181
182 ptrdiff_t chunk = (n + comm.size - 1) / comm.size;
183 if (chunk % block_size != 0) {
184 chunk += block_size - chunk % block_size;
185 }
186
187 ptrdiff_t row_beg = std::min(n, chunk * comm.rank);
188 ptrdiff_t row_end = std::min(n, row_beg + chunk);
189
190 chunk = row_end - row_beg;
191
192 Alina::IO::read_crs(A_file, n, ptr, col, val, row_beg, row_end);
193
194 if (rhs_file.empty()) {
195 rhs.resize(chunk);
196 std::fill(rhs.begin(), rhs.end(), 1.0);
197 }
198 else {
199 ptrdiff_t rows, cols;
200 Alina::IO::read_dense(rhs_file, rows, cols, rhs, row_beg, row_end);
201 }
202
203 return chunk;
204}
205
206//---------------------------------------------------------------------------
207template <class Backend, class Matrix>
208std::shared_ptr<Alina::DistributedMatrix<Backend>>
209partition(Alina::mpi_communicator comm, const Matrix& Astrip,
210 typename Backend::vector& rhs, const typename Backend::params& bprm,
211 Alina::eMatrixPartitionerType ptype, int block_size = 1)
212{
213 auto& prof = Alina::Profiler::globalProfiler();
214 typedef typename Backend::value_type val_type;
215 typedef typename Alina::math::rhs_of<val_type>::type rhs_type;
216 typedef Alina::DistributedMatrix<Backend> DMatrix;
217
218 auto A = std::make_shared<DMatrix>(comm, Astrip);
219
220 if (comm.size == 1 || ptype == Alina::eMatrixPartitionerType::merge)
221 return A;
222
223 prof.tic("partition");
225 prm.put("type", ptype);
227
228 auto I = part(*A, block_size);
229 auto J = transpose(*I);
230 A = product(*J, *product(*A, *I));
231
232#if defined(SOLVER_BACKEND_BUILTIN)
233 Alina::numa_vector<rhs_type> new_rhs(J->loc_rows());
234#elif defined(SOLVER_BACKEND_CUDA)
235 thrust::device_vector<rhs_type> new_rhs(J->loc_rows());
236#endif
237
238 J->move_to_backend(bprm);
239
240 Alina::backend::spmv(1, *J, rhs, 0, new_rhs);
241 rhs.swap(new_rhs);
242 prof.toc("partition");
243
244 return A;
245}
246
247//---------------------------------------------------------------------------
248#if defined(SOLVER_BACKEND_BUILTIN)
249template <int B>
250void solve_block(Alina::mpi_communicator comm,
251 ptrdiff_t chunk,
252 const std::vector<ptrdiff_t>& ptr,
253 const std::vector<ptrdiff_t>& col,
254 const std::vector<double>& val,
255 const Alina::PropertyTree& prm,
256 const std::vector<double>& f,
257 Alina::eMatrixPartitionerType ptype)
258{
259 auto& prof = Alina::Profiler::globalProfiler();
260 typedef Alina::StaticMatrix<double, B, B> val_type;
261 typedef Alina::StaticMatrix<double, B, 1> rhs_type;
262
263 typedef Alina::BuiltinBackend<val_type> Backend;
264
265 typedef Alina::DistributedMatrix<Backend> DMatrix;
266
270 Solver;
271
272 typename Backend::params bprm;
273
274 Alina::numa_vector<rhs_type> rhs(reinterpret_cast<const rhs_type*>(&f[0]),
275 reinterpret_cast<const rhs_type*>(&f[0]) + chunk / B);
276
277 auto get_distributed_matrix = [&]() {
278 auto t = prof.scoped_tic("distributed matrix");
279
280 std::shared_ptr<DMatrix> A;
281
282 if (ptype != Alina::eMatrixPartitionerType::merge) {
283 A = partition<Backend>(comm,
284 Alina::adapter::block_matrix<val_type>(std::tie(chunk, ptr, col, val)),
285 rhs, bprm, ptype, prm.get("precond.coarsening.aggr.block_size", 1));
286 chunk = A->loc_rows();
287 }
288 else {
289 A = std::make_shared<DMatrix>(
290 comm,
291 Alina::adapter::block_matrix<val_type>(std::tie(chunk, ptr, col, val)));
292 }
293
294 return A;
295 };
296
297 std::shared_ptr<DMatrix> A;
298 std::shared_ptr<Solver> solve;
299
300 {
301 auto t = prof.scoped_tic("setup");
302 A = get_distributed_matrix();
303 solve = std::make_shared<Solver>(comm, A, prm, bprm);
304 }
305
306 if (comm.rank == 0) {
307 std::cout << *solve << std::endl;
308 }
309
310 if (prm.get("precond.allow_rebuild", false)) {
311 if (comm.rank == 0) {
312 std::cout << "Rebuilding the preconditioner..." << std::endl;
313 }
314
315 {
316 auto t = prof.scoped_tic("rebuild");
317 A = get_distributed_matrix();
318 solve->precond().rebuild(A);
319 }
320
321 if (comm.rank == 0) {
322 std::cout << *solve << std::endl;
323 }
324 }
325
327
328 prof.tic("solve");
329 Alina::SolverResult r = (*solve)(rhs, x);
330 prof.toc("solve");
331
332 if (comm.rank == 0) {
333 std::cout << "Iterations: " << r.nbIteration() << std::endl
334 << "Error: " << r.residual() << std::endl
335 << prof << std::endl;
336 }
337}
338#endif
339
340//---------------------------------------------------------------------------
341void solve_scalar(Alina::mpi_communicator comm,
342 ptrdiff_t chunk,
343 const std::vector<ptrdiff_t>& ptr,
344 const std::vector<ptrdiff_t>& col,
345 const std::vector<double>& val,
346 const Alina::PropertyTree& prm,
347 const std::vector<double>& f,
348 Alina::eMatrixPartitionerType ptype)
349{
350 auto& prof = Alina::Profiler::globalProfiler();
351#if defined(SOLVER_BACKEND_BUILTIN)
352 //using Backend = Alina::BuiltinBackend<double>;
354#elif defined(SOLVER_BACKEND_CUDA)
355 using Backend = Alina::backend::cuda<double>;
356#endif
357
358 std::cout << "Using scalar solve ptr_size=" << sizeof(ptrdiff_t)
359 << " ptr_type_size=" << sizeof(Backend::ptr_type)
360 << " col_type_size=" << sizeof(Backend::col_type)
361 << " value_type_size=" << sizeof(Backend::value_type)
362 << "\n";
363
364 typedef Alina::DistributedMatrix<Backend> DMatrix;
365
367
368 typename Backend::params bprm;
369
370#if defined(SOLVER_BACKEND_BUILTIN)
372#elif defined(SOLVER_BACKEND_CUDA)
373 cusparseCreate(&bprm.cusparse_handle);
374 thrust::device_vector<double> rhs(f);
375#endif
376
377 auto get_distributed_matrix = [&]() {
378 auto t = prof.scoped_tic("distributed matrix");
379 std::shared_ptr<DMatrix> A;
380
381 if (ptype != Alina::eMatrixPartitionerType::merge) {
382 A = partition<Backend>(comm,
383 std::tie(chunk, ptr, col, val), rhs, bprm, ptype,
384 prm.get("precond.coarsening.aggr.block_size", 1));
385 chunk = A->loc_rows();
386 }
387 else {
388 A = std::make_shared<DMatrix>(comm, std::tie(chunk, ptr, col, val));
389 }
390
391 return A;
392 };
393
394 std::shared_ptr<DMatrix> A;
395 std::shared_ptr<Solver> solve;
396
397 {
398 auto t = prof.scoped_tic("setup");
399 A = get_distributed_matrix();
400 solve = std::make_shared<Solver>(comm, A, prm, bprm);
401 }
402
403 if (comm.rank == 0) {
404 std::cout << "SolverInfo:\n";
405 std::cout << *solve << std::endl;
406 }
407
408 if (prm.get("precond.allow_rebuild", false)) {
409 if (comm.rank == 0) {
410 std::cout << "Rebuilding the preconditioner..." << std::endl;
411 }
412
413 {
414 auto t = prof.scoped_tic("rebuild");
415 A = get_distributed_matrix();
416 solve->precond().rebuild(A);
417 }
418
419 if (comm.rank == 0) {
420 std::cout << *solve << std::endl;
421 }
422 }
423
424#if defined(SOLVER_BACKEND_BUILTIN)
426#elif defined(SOLVER_BACKEND_CUDA)
427 thrust::device_vector<double> x(chunk, 0.0);
428#endif
429
430 prof.tic("solve");
431 Alina::SolverResult r = (*solve)(rhs, x);
432 prof.toc("solve");
433
434 if (comm.rank == 0) {
435 std::cout << "Iterations: " << r.nbIteration() << std::endl
436 << "Error: " << r.residual() << std::endl
437 << prof << std::endl;
438 }
439}
440
441//---------------------------------------------------------------------------
442int main(int argc, char* argv[])
443{
444 auto& prof = Alina::Profiler::globalProfiler();
445
446 Alina::mpi_init_thread mpi(&argc, &argv);
447 Alina::mpi_communicator comm(MPI_COMM_WORLD);
448
449 if (comm.rank == 0)
450 std::cout << "World size: " << comm.size << std::endl;
451
452 // Read configuration from command line
453 namespace po = boost::program_options;
454 po::options_description desc("Options");
455
456 desc.add_options()("help,h", "show help")("matrix,A",
457 po::value<std::string>(),
458 "System matrix in the MatrixMarket format. "
459 "When not specified, a Poisson problem in 3D unit cube is assembled. ")(
460 "rhs,f",
461 po::value<std::string>()->default_value(""),
462 "The RHS vector in the MatrixMarket format. "
463 "When omitted, a vector of ones is used by default. "
464 "Should only be provided together with a system matrix. ")(
465 "Ap",
466 po::value<std::vector<std::string>>()->multitoken(),
467 "Pre-partitioned matrix (single file per MPI process)")(
468 "fp",
469 po::value<std::vector<std::string>>()->multitoken(),
470 "Pre-partitioned RHS (single file per MPI process)")(
471 "binary,B",
472 po::bool_switch()->default_value(false),
473 "When specified, treat input files as binary instead of as MatrixMarket. "
474 "It is assumed the files were converted to binary format with mm2bin utility. ")(
475 "block-size,b",
476 po::value<int>()->default_value(1),
477 "The block size of the system matrix. "
478 "When specified, the system matrix is assumed to have block-wise structure. "
479 "This usually is the case for problems in elasticity, structural mechanics, "
480 "for coupled systems of PDE (such as Navier-Stokes equations), etc. ")(
481 "partitioner,r",
482 po::value<Alina::eMatrixPartitionerType>()->default_value(
483#if defined(ARCCORE_ALINA_HAVE_PARMETIS)
484 Alina::eMatrixPartitionerType::parmetis
485#else
486 Alina::eMatrixPartitionerType::merge
487#endif
488 ),
489 "Repartition the system matrix")(
490 "size,n",
491 po::value<ptrdiff_t>()->default_value(128),
492 "domain size")("prm-file,P",
493 po::value<std::string>(),
494 "Parameter file in json format. ")(
495 "prm,p",
496 po::value<std::vector<std::string>>()->multitoken(),
497 "Parameters specified as name=value pairs. "
498 "May be provided multiple times. Examples:\n"
499 " -p solver.tol=1e-3\n"
500 " -p precond.coarse_enough=300")(
501 "test-rebuild",
502 po::bool_switch()->default_value(false),
503 "When specified, try to rebuild the solver before solving. ");
504
505 po::positional_options_description p;
506 p.add("prm", -1);
507
508 po::variables_map vm;
509 po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
510 po::notify(vm);
511
512 if (vm.count("help")) {
513 if (comm.rank == 0)
514 std::cout << desc << std::endl;
515 return 0;
516 }
517
519 if (vm.count("prm-file")) {
520 prm.read_json(vm["prm-file"].as<std::string>());
521 }
522
523 if (vm.count("prm")) {
524 for (const std::string& v : vm["prm"].as<std::vector<std::string>>()) {
525 prm.putKeyValue(v);
526 }
527 }
528
529 ptrdiff_t n;
530 std::vector<ptrdiff_t> ptr;
531 std::vector<ptrdiff_t> col;
532 std::vector<double> val;
533 std::vector<double> rhs;
534
535 int block_size = vm["block-size"].as<int>();
536 int aggr_block = prm.get("precond.coarsening.aggr.block_size", 1);
537
538 bool binary = vm["binary"].as<bool>();
539 Alina::eMatrixPartitionerType ptype = vm["partitioner"].as<Alina::eMatrixPartitionerType>();
540
541 if (vm.count("matrix")) {
542 prof.tic("read");
543 if (binary) {
544 n = read_binary(comm,
545 vm["matrix"].as<std::string>(),
546 vm["rhs"].as<std::string>(),
547 block_size * aggr_block, ptr, col, val, rhs);
548 }
549 else {
550 n = read_matrix_market(comm,
551 vm["matrix"].as<std::string>(),
552 vm["rhs"].as<std::string>(),
553 block_size * aggr_block, ptr, col, val, rhs);
554 }
555 prof.toc("read");
556 }
557 else if (vm.count("Ap")) {
558 prof.tic("read");
559 ptype = Alina::eMatrixPartitionerType::merge;
560
561 std::vector<std::string> Aparts = vm["Ap"].as<std::vector<std::string>>();
562 comm.check(Aparts.size() == static_cast<size_t>(comm.size),
563 "--Ap should have single entry per MPI process");
564
565 if (binary) {
566 Alina::IO::read_crs(Aparts[comm.rank], n, ptr, col, val);
567 }
568 else {
569 ptrdiff_t m;
570 std::tie(n, m) = Alina::IO::mm_reader(Aparts[comm.rank])(ptr, col, val);
571 }
572
573 if (vm.count("fp")) {
574 std::vector<std::string> fparts = vm["fp"].as<std::vector<std::string>>();
575 comm.check(fparts.size() == static_cast<size_t>(comm.size),
576 "--fp should have single entry per MPI process");
577
578 ptrdiff_t rows;
579 ptrdiff_t cols;
580
581 if (binary) {
582 Alina::IO::read_dense(fparts[comm.rank], rows, cols, rhs);
583 }
584 else {
585 std::tie(rows, cols) = Alina::IO::mm_reader(fparts[comm.rank])(rhs);
586 }
587
588 comm.check(rhs.size() == static_cast<size_t>(n), "Wrong RHS size");
589 }
590 else {
591 rhs.resize(n, 1);
592 }
593 prof.toc("read");
594 }
595 else {
596 prof.tic("assemble");
597 n = assemble_poisson3d(comm,
598 vm["size"].as<ptrdiff_t>(),
599 block_size * aggr_block, ptr, col, val, rhs);
600 prof.toc("assemble");
601 }
602
603 if (vm["test-rebuild"].as<bool>()) {
604 prm.put("precond.allow_rebuild", true);
605 }
606
607 switch (block_size) {
608
609#if defined(SOLVER_BACKEND_BUILTIN)
610#define ARCCORE_ALINA_CALL_BLOCK_SOLVER(z, data, B) \
611 case B: \
612 solve_block<B>(comm, n, ptr, col, val, prm, rhs, ptype); \
613 break;
614
615 BOOST_PP_SEQ_FOR_EACH(ARCCORE_ALINA_CALL_BLOCK_SOLVER, ~, ARCCORE_ALINA_BLOCK_SIZES)
616
617#undef ARCCORE_ALINA_CALL_BLOCK_SOLVER
618#endif
619
620 case 1:
621 solve_scalar(comm, n, ptr, col, val, prm, rhs, ptype);
622 break;
623 default:
624 if (comm.rank == 0)
625 std::cout << "Unsupported block size!" << std::endl;
626 }
627}
Distributed Matrix using message passing.
Iterative solver wrapper for distributed linear systems.
Matrix market reader.
Definition IO.h:52
Result of a solving.
Definition AlinaUtils.h:52
NUMA-aware vector container.
Definition NumaVector.h:42
Matrix class, to be used by user.
Espace de nom pour les fonctions mathématiques.
Definition MathUtils.h:41
__host__ __device__ Real3x3 transpose(const Real3x3 &t)
Transpose la matrice.
Definition MathUtils.h:258
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Alina::detail::empty_params params
Runtime-configurable wrapper around matrix partitioner.
Convenience wrapper around MPI_Comm.
void check(const Condition &cond, const Message &message)
Communicator-wise condition checking.
Convenience wrapper around MPI_Init_threads/MPI_Finalize.