20#pragma GCC diagnostic ignored "-Wdeprecated-copy"
21#pragma GCC diagnostic ignored "-Wint-in-bool-context"
27#include <boost/program_options.hpp>
28#include <boost/preprocessor/seq/for_each.hpp>
30#include "arccore/alina/BuiltinBackend.h"
31#include "arccore/alina/StaticMatrix.h"
32#include "arccore/alina/Adapters.h"
34#if defined(SOLVER_BACKEND_CUDA)
35# include "arccore/alina/CudaBackend.h"
36# include "arccore/alina/relaxation_cusparse_ilu0.h"
38# ifndef SOLVER_BACKEND_BUILTIN
39# define SOLVER_BACKEND_BUILTIN
43#include "arccore/alina/MessagePassingUtils.h"
44#include "arccore/alina/DistributedPreconditionedSolver.h"
45#include "arccore/alina/DistributedPreconditioner.h"
46#include "arccore/alina/DistributedSolverRuntime.h"
48#include "arccore/alina/IO.h"
49#include "arccore/alina/Profiler.h"
51#ifndef ARCCORE_ALINA_BLOCK_SIZES
52# define ARCCORE_ALINA_BLOCK_SIZES (3)(4)
57namespace math = Alina::math;
62 ptrdiff_t n,
int block_size,
63 std::vector<ptrdiff_t>& ptr,
64 std::vector<ptrdiff_t>& col,
65 std::vector<double>& val,
66 std::vector<double>& rhs)
68 ptrdiff_t n3 = n * n * n;
70 ptrdiff_t chunk = (n3 + comm.size - 1) / comm.size;
71 if (chunk % block_size != 0) {
72 chunk += block_size - chunk % block_size;
74 ptrdiff_t row_beg = std::min(n3, chunk * comm.rank);
75 ptrdiff_t row_end = std::min(n3, row_beg + chunk);
76 chunk = row_end - row_beg;
79 ptr.reserve(chunk + 1);
81 col.reserve(chunk * 7);
83 val.reserve(chunk * 7);
86 std::fill(rhs.begin(), rhs.end(), 1.0);
88 const double h2i = (n - 1) * (n - 1);
91 for (ptrdiff_t idx = row_beg; idx < row_end; ++idx) {
92 ptrdiff_t k = idx / (n * n);
93 ptrdiff_t j = (idx / n) % n;
94 ptrdiff_t i = idx % n;
97 col.push_back(idx - n * n);
102 col.push_back(idx - n);
107 col.push_back(idx - 1);
112 val.push_back(6 * h2i);
115 col.push_back(idx + 1);
120 col.push_back(idx + n);
125 col.push_back(idx + n * n);
129 ptr.push_back(col.size());
138 const std::string& A_file,
const std::string& rhs_file,
int block_size,
139 std::vector<ptrdiff_t>& ptr,
140 std::vector<ptrdiff_t>& col,
141 std::vector<double>& val,
142 std::vector<double>& rhs)
145 ptrdiff_t n = A_mm.rows();
147 ptrdiff_t chunk = (n + comm.size - 1) / comm.size;
148 if (chunk % block_size != 0) {
149 chunk += block_size - chunk % block_size;
152 ptrdiff_t row_beg = std::min(n, chunk * comm.rank);
153 ptrdiff_t row_end = std::min(n, row_beg + chunk);
155 chunk = row_end - row_beg;
157 A_mm(ptr, col, val, row_beg, row_end);
159 if (rhs_file.empty()) {
161 std::fill(rhs.begin(), rhs.end(), 1.0);
165 rhs_mm(rhs, row_beg, row_end);
174 const std::string& A_file,
const std::string& rhs_file,
int block_size,
175 std::vector<ptrdiff_t>& ptr,
176 std::vector<ptrdiff_t>& col,
177 std::vector<double>& val,
178 std::vector<double>& rhs)
180 ptrdiff_t n = Alina::IO::crs_size<ptrdiff_t>(A_file);
182 ptrdiff_t chunk = (n + comm.size - 1) / comm.size;
183 if (chunk % block_size != 0) {
184 chunk += block_size - chunk % block_size;
187 ptrdiff_t row_beg = std::min(n, chunk * comm.rank);
188 ptrdiff_t row_end = std::min(n, row_beg + chunk);
190 chunk = row_end - row_beg;
192 Alina::IO::read_crs(A_file, n, ptr, col, val, row_beg, row_end);
194 if (rhs_file.empty()) {
196 std::fill(rhs.begin(), rhs.end(), 1.0);
199 ptrdiff_t rows, cols;
200 Alina::IO::read_dense(rhs_file, rows, cols, rhs, row_beg, row_end);
207template <
class Backend,
class Matrix>
208std::shared_ptr<Alina::DistributedMatrix<Backend>>
211 Alina::eMatrixPartitionerType ptype,
int block_size = 1)
213 auto& prof = Alina::Profiler::globalProfiler();
214 typedef typename Backend::value_type val_type;
215 typedef typename Alina::math::rhs_of<val_type>::type rhs_type;
218 auto A = std::make_shared<DMatrix>(comm, Astrip);
220 if (comm.size == 1 || ptype == Alina::eMatrixPartitionerType::merge)
223 prof.tic(
"partition");
225 prm.put(
"type", ptype);
228 auto I = part(*A, block_size);
230 A = product(*J, *product(*A, *I));
232#if defined(SOLVER_BACKEND_BUILTIN)
234#elif defined(SOLVER_BACKEND_CUDA)
235 thrust::device_vector<rhs_type> new_rhs(J->loc_rows());
238 J->move_to_backend(bprm);
240 Alina::backend::spmv(1, *J, rhs, 0, new_rhs);
242 prof.toc(
"partition");
248#if defined(SOLVER_BACKEND_BUILTIN)
252 const std::vector<ptrdiff_t>& ptr,
253 const std::vector<ptrdiff_t>& col,
254 const std::vector<double>& val,
256 const std::vector<double>& f,
257 Alina::eMatrixPartitionerType ptype)
259 auto& prof = Alina::Profiler::globalProfiler();
275 reinterpret_cast<const rhs_type*
>(&f[0]) + chunk / B);
277 auto get_distributed_matrix = [&]() {
278 auto t = prof.scoped_tic(
"distributed matrix");
280 std::shared_ptr<DMatrix> A;
282 if (ptype != Alina::eMatrixPartitionerType::merge) {
283 A = partition<Backend>(comm,
284 Alina::adapter::block_matrix<val_type>(std::tie(chunk, ptr, col, val)),
285 rhs, bprm, ptype, prm.get(
"precond.coarsening.aggr.block_size", 1));
286 chunk = A->loc_rows();
289 A = std::make_shared<DMatrix>(
291 Alina::adapter::block_matrix<val_type>(std::tie(chunk, ptr, col, val)));
297 std::shared_ptr<DMatrix> A;
298 std::shared_ptr<Solver> solve;
301 auto t = prof.scoped_tic(
"setup");
302 A = get_distributed_matrix();
303 solve = std::make_shared<Solver>(comm, A, prm, bprm);
306 if (comm.rank == 0) {
307 std::cout << *solve << std::endl;
310 if (prm.get(
"precond.allow_rebuild",
false)) {
311 if (comm.rank == 0) {
312 std::cout <<
"Rebuilding the preconditioner..." << std::endl;
316 auto t = prof.scoped_tic(
"rebuild");
317 A = get_distributed_matrix();
318 solve->precond().rebuild(A);
321 if (comm.rank == 0) {
322 std::cout << *solve << std::endl;
332 if (comm.rank == 0) {
333 std::cout <<
"Iterations: " << r.nbIteration() << std::endl
334 <<
"Error: " << r.residual() << std::endl
335 << prof << std::endl;
343 const std::vector<ptrdiff_t>& ptr,
344 const std::vector<ptrdiff_t>& col,
345 const std::vector<double>& val,
347 const std::vector<double>& f,
348 Alina::eMatrixPartitionerType ptype)
350 auto& prof = Alina::Profiler::globalProfiler();
351#if defined(SOLVER_BACKEND_BUILTIN)
354#elif defined(SOLVER_BACKEND_CUDA)
358 std::cout <<
"Using scalar solve ptr_size=" <<
sizeof(ptrdiff_t)
359 <<
" ptr_type_size=" <<
sizeof(Backend::ptr_type)
360 <<
" col_type_size=" <<
sizeof(Backend::col_type)
361 <<
" value_type_size=" <<
sizeof(Backend::value_type)
370#if defined(SOLVER_BACKEND_BUILTIN)
372#elif defined(SOLVER_BACKEND_CUDA)
373 cusparseCreate(&bprm.cusparse_handle);
374 thrust::device_vector<double> rhs(f);
377 auto get_distributed_matrix = [&]() {
378 auto t = prof.scoped_tic(
"distributed matrix");
379 std::shared_ptr<DMatrix> A;
381 if (ptype != Alina::eMatrixPartitionerType::merge) {
382 A = partition<Backend>(comm,
383 std::tie(chunk, ptr, col, val), rhs, bprm, ptype,
384 prm.get(
"precond.coarsening.aggr.block_size", 1));
385 chunk = A->loc_rows();
388 A = std::make_shared<DMatrix>(comm, std::tie(chunk, ptr, col, val));
394 std::shared_ptr<DMatrix> A;
395 std::shared_ptr<Solver> solve;
398 auto t = prof.scoped_tic(
"setup");
399 A = get_distributed_matrix();
400 solve = std::make_shared<Solver>(comm, A, prm, bprm);
403 if (comm.rank == 0) {
404 std::cout <<
"SolverInfo:\n";
405 std::cout << *solve << std::endl;
408 if (prm.get(
"precond.allow_rebuild",
false)) {
409 if (comm.rank == 0) {
410 std::cout <<
"Rebuilding the preconditioner..." << std::endl;
414 auto t = prof.scoped_tic(
"rebuild");
415 A = get_distributed_matrix();
416 solve->precond().rebuild(A);
419 if (comm.rank == 0) {
420 std::cout << *solve << std::endl;
424#if defined(SOLVER_BACKEND_BUILTIN)
426#elif defined(SOLVER_BACKEND_CUDA)
427 thrust::device_vector<double> x(chunk, 0.0);
434 if (comm.rank == 0) {
435 std::cout <<
"Iterations: " << r.nbIteration() << std::endl
436 <<
"Error: " << r.residual() << std::endl
437 << prof << std::endl;
442int main(
int argc,
char* argv[])
444 auto& prof = Alina::Profiler::globalProfiler();
450 std::cout <<
"World size: " << comm.size << std::endl;
453 namespace po = boost::program_options;
454 po::options_description desc(
"Options");
456 desc.add_options()(
"help,h",
"show help")(
"matrix,A",
457 po::value<std::string>(),
458 "System matrix in the MatrixMarket format. "
459 "When not specified, a Poisson problem in 3D unit cube is assembled. ")(
461 po::value<std::string>()->default_value(
""),
462 "The RHS vector in the MatrixMarket format. "
463 "When omitted, a vector of ones is used by default. "
464 "Should only be provided together with a system matrix. ")(
466 po::value<std::vector<std::string>>()->multitoken(),
467 "Pre-partitioned matrix (single file per MPI process)")(
469 po::value<std::vector<std::string>>()->multitoken(),
470 "Pre-partitioned RHS (single file per MPI process)")(
472 po::bool_switch()->default_value(
false),
473 "When specified, treat input files as binary instead of as MatrixMarket. "
474 "It is assumed the files were converted to binary format with mm2bin utility. ")(
476 po::value<int>()->default_value(1),
477 "The block size of the system matrix. "
478 "When specified, the system matrix is assumed to have block-wise structure. "
479 "This usually is the case for problems in elasticity, structural mechanics, "
480 "for coupled systems of PDE (such as Navier-Stokes equations), etc. ")(
482 po::value<Alina::eMatrixPartitionerType>()->default_value(
483#
if defined(ARCCORE_ALINA_HAVE_PARMETIS)
484 Alina::eMatrixPartitionerType::parmetis
486 Alina::eMatrixPartitionerType::merge
489 "Repartition the system matrix")(
491 po::value<ptrdiff_t>()->default_value(128),
492 "domain size")(
"prm-file,P",
493 po::value<std::string>(),
494 "Parameter file in json format. ")(
496 po::value<std::vector<std::string>>()->multitoken(),
497 "Parameters specified as name=value pairs. "
498 "May be provided multiple times. Examples:\n"
499 " -p solver.tol=1e-3\n"
500 " -p precond.coarse_enough=300")(
502 po::bool_switch()->default_value(
false),
503 "When specified, try to rebuild the solver before solving. ");
505 po::positional_options_description p;
508 po::variables_map vm;
509 po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
512 if (vm.count(
"help")) {
514 std::cout << desc << std::endl;
519 if (vm.count(
"prm-file")) {
520 prm.read_json(vm[
"prm-file"].as<std::string>());
523 if (vm.count(
"prm")) {
524 for (
const std::string& v : vm[
"prm"].as<std::vector<std::string>>()) {
530 std::vector<ptrdiff_t> ptr;
531 std::vector<ptrdiff_t> col;
532 std::vector<double> val;
533 std::vector<double> rhs;
535 int block_size = vm[
"block-size"].as<
int>();
536 int aggr_block = prm.get(
"precond.coarsening.aggr.block_size", 1);
538 bool binary = vm[
"binary"].as<
bool>();
539 Alina::eMatrixPartitionerType ptype = vm[
"partitioner"].as<Alina::eMatrixPartitionerType>();
541 if (vm.count(
"matrix")) {
544 n = read_binary(comm,
545 vm[
"matrix"].as<std::string>(),
546 vm[
"rhs"].as<std::string>(),
547 block_size * aggr_block, ptr, col, val, rhs);
550 n = read_matrix_market(comm,
551 vm[
"matrix"].as<std::string>(),
552 vm[
"rhs"].as<std::string>(),
553 block_size * aggr_block, ptr, col, val, rhs);
557 else if (vm.count(
"Ap")) {
559 ptype = Alina::eMatrixPartitionerType::merge;
561 std::vector<std::string> Aparts = vm[
"Ap"].as<std::vector<std::string>>();
562 comm.
check(Aparts.size() ==
static_cast<size_t>(comm.size),
563 "--Ap should have single entry per MPI process");
566 Alina::IO::read_crs(Aparts[comm.rank], n, ptr, col, val);
573 if (vm.count(
"fp")) {
574 std::vector<std::string> fparts = vm[
"fp"].as<std::vector<std::string>>();
575 comm.
check(fparts.size() ==
static_cast<size_t>(comm.size),
576 "--fp should have single entry per MPI process");
582 Alina::IO::read_dense(fparts[comm.rank], rows, cols, rhs);
588 comm.
check(rhs.size() ==
static_cast<size_t>(n),
"Wrong RHS size");
596 prof.tic(
"assemble");
597 n = assemble_poisson3d(comm,
598 vm[
"size"].as<ptrdiff_t>(),
599 block_size * aggr_block, ptr, col, val, rhs);
600 prof.toc(
"assemble");
603 if (vm[
"test-rebuild"].as<bool>()) {
604 prm.put(
"precond.allow_rebuild",
true);
607 switch (block_size) {
609#if defined(SOLVER_BACKEND_BUILTIN)
610#define ARCCORE_ALINA_CALL_BLOCK_SOLVER(z, data, B) \
612 solve_block<B>(comm, n, ptr, col, val, prm, rhs, ptype); \
615 BOOST_PP_SEQ_FOR_EACH(ARCCORE_ALINA_CALL_BLOCK_SOLVER, ~, ARCCORE_ALINA_BLOCK_SIZES)
617#undef ARCCORE_ALINA_CALL_BLOCK_SOLVER
621 solve_scalar(comm, n, ptr, col, val, prm, rhs, ptype);
625 std::cout <<
"Unsupported block size!" << std::endl;
Distributed Matrix using message passing.
Iterative solver wrapper for distributed linear systems.
Distributed Preconditioner.
NUMA-aware vector container.
Matrix class, to be used by user.
Espace de nom pour les fonctions mathématiques.
__host__ __device__ Real3x3 transpose(const Real3x3 &t)
Transpose la matrice.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Alina::detail::empty_params params
Runtime-configurable wrapper around matrix partitioner.
Convenience wrapper around MPI_Comm.
void check(const Condition &cond, const Message &message)
Communicator-wise condition checking.
Convenience wrapper around MPI_Init_threads/MPI_Finalize.