27#if defined(SOLVER_BACKEND_CUDA)
36#include "DomainPartition.h"
40#include <boost/scope_exit.hpp>
42#include <boost/program_options.hpp>
44#include <boost/multi_array.hpp>
45#if defined(SOLVER_BACKEND_CUDA)
46#include "arccore/alina/CudaBackend.h"
47#include "arccore/alina/relaxation_cusparse_ilu0.h"
50#ifndef SOLVER_BACKEND_BUILTIN
51#define SOLVER_BACKEND_BUILTIN
53#include "arccore/alina/BuiltinBackend.h"
57#include "arccore/alina/PreconditionedSolver.h"
58#include "arccore/alina/AMG.h"
59#include "arccore/alina/CoarseningRuntime.h"
60#include "arccore/alina/RelaxationRuntime.h"
61#include "arccore/alina/PreconditionerRuntime.h"
62#include "arccore/alina/DistributedDirectSolverRuntime.h"
63#include "arccore/alina/DistributedSolverRuntime.h"
64#include "arccore/alina/DistributedSubDomainDeflation.h"
65#include "arccore/alina/Adapters.h"
66#include "arccore/alina/Profiler.h"
67#include "AlinaSamplesCommon.h"
71struct partitioned_deflation
74 std::vector<unsigned> domain;
76 partitioned_deflation(boost::array<ptrdiff_t, 2> LO,
77 boost::array<ptrdiff_t, 2> HI,
81 DomainPartition<2> part(LO, HI, nparts);
83 ptrdiff_t nx = HI[0] - LO[0] + 1;
84 ptrdiff_t ny = HI[1] - LO[1] + 1;
86 domain.resize(nx * ny);
87 for (
unsigned p = 0; p < nparts; ++p) {
88 boost::array<ptrdiff_t, 2> lo = part.domain(p).min_corner();
89 boost::array<ptrdiff_t, 2> hi = part.domain(p).max_corner();
91 for (
int j = lo[1]; j <= hi[1]; ++j) {
92 for (
int i = lo[0]; i <= hi[0]; ++i) {
93 domain[(j - LO[1]) * nx + (i - LO[0])] = p;
99 size_t dim()
const {
return nparts; }
101 double operator()(ptrdiff_t i,
unsigned j)
const
103 return domain[i] == j;
107struct linear_deflation
109 std::vector<double> x;
110 std::vector<double> y;
112 linear_deflation(ptrdiff_t chunk,
113 boost::array<ptrdiff_t, 2> lo,
114 boost::array<ptrdiff_t, 2> hi)
116 double hx = 1.0 / (hi[0] - lo[0]);
117 double hy = 1.0 / (hi[1] - lo[1]);
119 ptrdiff_t nx = hi[0] - lo[0] + 1;
120 ptrdiff_t ny = hi[1] - lo[1] + 1;
125 for (ptrdiff_t j = 0; j < ny; ++j) {
126 for (ptrdiff_t i = 0; i < nx; ++i) {
127 x.push_back(i * hx - 0.5);
128 y.push_back(j * hy - 0.5);
133 size_t dim()
const {
return 3; }
135 double operator()(ptrdiff_t i,
unsigned j)
const
149struct bilinear_deflation
152 std::vector<double> v;
154 bilinear_deflation(ptrdiff_t n,
156 boost::array<ptrdiff_t, 2> lo,
157 boost::array<ptrdiff_t, 2> hi)
163 { lo[0] > 0 || lo[1] > 0, hi[0] + 1 < n || lo[1] > 0 },
164 { lo[0] > 0 || hi[1] + 1 < n, hi[0] + 1 < n || hi[1] + 1 < n }
167 for (
int j = 0; j < 2; ++j)
168 for (
int i = 0; i < 2; ++i)
179 v.resize(chunk * nv, 0);
181 double* dv = v.data();
183 ptrdiff_t nx = hi[0] - lo[0] + 1;
184 ptrdiff_t ny = hi[1] - lo[1] + 1;
186 double hx = 1.0 / (nx - 1);
187 double hy = 1.0 / (ny - 1);
189 for (
int j = 0; j < 2; ++j) {
190 for (
int i = 0; i < 2; ++i) {
194 boost::multi_array_ref<double, 2> V(dv, boost::extents[ny][nx]);
196 for (ptrdiff_t jj = 0; jj < ny; ++jj) {
198 double b = std::abs((1 - j) - y);
199 for (ptrdiff_t ii = 0; ii < nx; ++ii) {
202 double a = std::abs((1 - i) - x);
212 size_t dim()
const {
return nv; }
214 double operator()(ptrdiff_t i,
unsigned j)
const
216 return v[j * chunk + i];
220#ifndef SOLVER_BACKEND_CUDA
224 std::vector<double> v;
226 mba_deflation(ptrdiff_t n,
228 boost::array<ptrdiff_t, 2> lo,
229 boost::array<ptrdiff_t, 2> hi)
235 { lo[0] > 0 || lo[1] > 0, hi[0] + 1 < n || lo[1] > 0 },
236 { lo[0] > 0 || hi[1] + 1 < n, hi[0] + 1 < n || hi[1] + 1 < n }
239 for (
int j = 0; j < 2; ++j)
240 for (
int i = 0; i < 2; ++i)
244 v.resize(chunk * nv, 0);
246 double* dv = v.data();
247 std::fill(dv, dv + chunk, 1.0);
250 ptrdiff_t nx = hi[0] - lo[0] + 1;
251 ptrdiff_t ny = hi[1] - lo[1] + 1;
253 double hx = 1.0 / (nx - 1);
254 double hy = 1.0 / (ny - 1);
256 std::array<double, 2> cmin = { -0.01, -0.01 };
257 std::array<double, 2> cmax = { 1.01, 1.01 };
258 std::array<size_t, 2> grid = { 3, 3 };
260 std::array<std::array<double, 2>, 4> coo;
261 std::array<double, 4> val;
263 for (
int j = 0, idx = 0; j < 2; ++j) {
264 for (
int i = 0; i < 2; ++i, ++idx) {
270 for (
int j = 0, idx = 0; j < 2; ++j) {
271 for (
int i = 0; i < 2; ++i, ++idx) {
275 std::fill(val.begin(), val.end(), 0.0);
278 mba::MBA<2> interp(cmin, cmax, grid, coo, val, 8, 1e-8, 0.5, zero);
280 boost::multi_array_ref<double, 2> V(dv, boost::extents[ny][nx]);
282 for (
int jj = 0; jj < ny; ++jj)
283 for (
int ii = 0; ii < nx; ++ii) {
284 std::array<double, 2> p = { ii * hx, jj * hy };
285 V[jj][ii] = interp(p);
293 size_t dim()
const {
return nv; }
295 double operator()(ptrdiff_t i,
unsigned j)
const
297 return v[j * chunk + i];
300 static double zero(
const std::array<double, 2>&)
307struct harmonic_deflation
310 std::vector<double> v;
312 harmonic_deflation(ptrdiff_t n,
314 boost::array<ptrdiff_t, 2> lo,
315 boost::array<ptrdiff_t, 2> hi)
321 { lo[0] > 0 || lo[1] > 0, hi[0] + 1 < n || lo[1] > 0 },
322 { lo[0] > 0 || hi[1] + 1 < n, hi[0] + 1 < n || hi[1] + 1 < n }
325 for (
int j = 0; j < 2; ++j)
326 for (
int i = 0; i < 2; ++i)
337 v.resize(chunk * nv, 0);
338 double* dv = v.data();
340 ptrdiff_t nx = hi[0] - lo[0] + 1;
341 ptrdiff_t ny = hi[1] - lo[1] + 1;
343 std::vector<ptrdiff_t> ptr;
344 std::vector<ptrdiff_t> col;
345 std::vector<double> val;
346 std::vector<double> rhs(chunk, 0.0);
348 ptr.reserve(chunk + 1);
349 col.reserve(chunk * 5);
350 val.reserve(chunk * 5);
354 for (
int j = 0, k = 0; j < ny; ++j) {
355 for (
int i = 0; i < nx; ++i, ++k) {
357 (i == 0 && j == 0) ||
358 (i == 0 && j == ny - 1) ||
359 (i == nx - 1 && j == 0) ||
360 (i == nx - 1 && j == ny - 1)) {
369 col.push_back(k + nx);
372 else if (j == ny - 1) {
373 col.push_back(k - nx);
377 col.push_back(k - nx);
378 val.push_back(-0.25);
380 col.push_back(k + nx);
381 val.push_back(-0.25);
385 col.push_back(k + 1);
388 else if (i == nx - 1) {
389 col.push_back(k - 1);
393 col.push_back(k - 1);
394 val.push_back(-0.25);
396 col.push_back(k + 1);
397 val.push_back(-0.25);
401 ptr.push_back(col.size());
410 solve(Alina::adapter::zero_copy(chunk, ptr.data(), col.data(), val.data()));
412 for (
int j = 0; j < 2; ++j) {
413 for (
int i = 0; i < 2; ++i) {
417 ptrdiff_t idx = i * (nx - 1) + j * (ny - 1) * nx;
430 size_t dim()
const {
return nv; }
432 double operator()(ptrdiff_t i,
unsigned j)
const
434 return v[j * chunk + i];
440 const DomainPartition<2>& part;
441 const std::vector<ptrdiff_t>& dom;
444 const std::vector<ptrdiff_t>& d)
449 ptrdiff_t operator()(ptrdiff_t i, ptrdiff_t j)
const
451 boost::array<ptrdiff_t, 2> p = { { i, j } };
452 std::pair<int, ptrdiff_t> v = part.index(p);
453 return dom[v.first] + v.second;
459 auto& prof = Alina::Profiler::globalProfiler();
464 std::cout <<
"World size: " << world.size << std::endl;
468 std::string deflation_type =
"bilinear";
470 auto coarsening = Alina::eCoarserningType::smoothed_aggregation;
471 auto relaxation = Alina::eRelaxationType::spai0;
472 auto iterative_solver = Alina::eSolverType::bicgstabl;
473 auto direct_solver = Alina::eDistributedDirectSolverType::skyline_lu;
475 bool just_relax =
false;
476 bool symm_dirichlet =
true;
477 std::string problem =
"laplace2d";
478 std::string parameter_file;
479 std::string out_file;
481 namespace po = boost::program_options;
482 po::options_description desc(
"Options");
484 desc.add_options()(
"help,h",
"show help")(
486 po::value<std::string>(&problem)->default_value(problem),
487 "laplace2d, recirc2d")(
489 po::value<bool>(&symm_dirichlet)->default_value(symm_dirichlet),
490 "Use symmetric Dirichlet conditions in laplace2d")(
492 po::value<ptrdiff_t>(&n)->default_value(n),
495 po::value<Alina::eCoarserningType>(&coarsening)->default_value(coarsening),
496 "ruge_stuben, aggregation, smoothed_aggregation, smoothed_aggr_emin")(
498 po::value<Alina::eRelaxationType>(&relaxation)->default_value(relaxation),
499 "gauss_seidel, ilu0, iluk, ilut, damped_jacobi, spai0, spai1, chebyshev")(
501 po::value<Alina::eSolverType>(&iterative_solver)->default_value(iterative_solver),
502 "cg, bicgstab, bicgstabl, gmres")(
504 po::value<Alina::eDistributedDirectSolverType>(&direct_solver)->default_value(direct_solver),
506#ifdef ARCCORE_ALINA_HAVE_PASTIX
511 po::value<std::string>(&deflation_type)->default_value(deflation_type),
512 "constant, partitioned, linear, bilinear, mba, harmonic")(
514 po::value<int>()->default_value(16),
515 "number of partitions for partitioned deflation")(
517 po::value<std::string>(¶meter_file),
518 "parameter file in json format")(
520 po::value<std::vector<std::string>>()->multitoken(),
521 "Parameters specified as name=value pairs. "
522 "May be provided multiple times. Examples:\n"
523 " -p solver.tol=1e-3\n"
524 " -p precond.coarse_enough=300")(
526 po::bool_switch(&just_relax),
527 "Do not create AMG hierarchy, use relaxation as preconditioner")(
529 po::value<std::string>(&out_file),
532 po::variables_map vm;
533 po::store(po::parse_command_line(argc, argv, desc), vm);
536 if (vm.count(
"help")) {
537 std::cout << desc << std::endl;
542 if (vm.count(
"params"))
543 prm.read_json(parameter_file);
545 if (vm.count(
"prm")) {
546 for (
const std::string& v : vm[
"prm"].as<std::vector<std::string>>()) {
551 prm.put(
"isolver.type", iterative_solver);
552 prm.put(
"dsolver.type", direct_solver);
554 const ptrdiff_t n2 = n * n;
555 const double hinv = (n - 1);
556 const double h2i = (n - 1) * (n - 1);
557 const double h = 1 / hinv;
559 boost::array<ptrdiff_t, 2> lo = { { 0, 0 } };
560 boost::array<ptrdiff_t, 2> hi = { { n - 1, n - 1 } };
562 prof.tic(
"partition");
563 DomainPartition<2> part(lo, hi, world.size);
564 ptrdiff_t chunk = part.size(world.rank);
566 std::vector<ptrdiff_t> domain(world.size + 1);
567 MPI_Allgather(&chunk, 1, Alina::mpi_datatype<ptrdiff_t>(),
568 &domain[1], 1, Alina::mpi_datatype<ptrdiff_t>(), world);
569 std::partial_sum(domain.begin(), domain.end(), domain.begin());
571 lo = part.domain(world.rank).min_corner();
572 hi = part.domain(world.rank).max_corner();
573 prof.toc(
"partition");
577 prof.tic(
"deflation");
578 std::function<double(ptrdiff_t,
unsigned)> dv;
581 if (deflation_type ==
"constant") {
584 else if (deflation_type ==
"partitioned") {
585 ndv = vm[
"subparts"].as<
int>();
588 else if (deflation_type ==
"linear") {
592 else if (deflation_type ==
"bilinear") {
596#ifndef SOLVER_BACKEND_CUDA
598 else if (deflation_type ==
"mba") {
604 else if (deflation_type ==
"harmonic") {
610 throw std::runtime_error(
"Unsupported deflation type");
613 prm.put(
"num_def_vec", ndv);
614 prm.put(
"def_vec", &dv);
615 prof.toc(
"deflation");
617 prof.tic(
"assemble");
618 std::vector<ptrdiff_t> ptr;
619 std::vector<ptrdiff_t> col;
620 std::vector<double> val;
621 std::vector<double> rhs;
623 ptr.reserve(chunk + 1);
624 col.reserve(chunk * 5);
625 val.reserve(chunk * 5);
630 if (problem ==
"recirc2d") {
631 const double eps = 1e-5;
633 for (ptrdiff_t j = lo[1]; j <= hi[1]; ++j) {
635 for (ptrdiff_t i = lo[0]; i <= hi[0]; ++i) {
638 if (i == 0 || j == 0 || i + 1 == n || j + 1 == n) {
639 col.push_back(renum(i, j));
642 sin(M_PI * x) + sin(M_PI * y) +
643 sin(13 * M_PI * x) + sin(13 * M_PI * y));
646 double a = -sin(M_PI * x) * cos(M_PI * y) * hinv;
647 double b = sin(M_PI * y) * cos(M_PI * x) * hinv;
650 col.push_back(renum(i, j - 1));
651 val.push_back(-eps * h2i - std::max(b, 0.0));
655 col.push_back(renum(i - 1, j));
656 val.push_back(-eps * h2i - std::max(a, 0.0));
659 col.push_back(renum(i, j));
660 val.push_back(4 * eps * h2i + fabs(a) + fabs(b));
663 col.push_back(renum(i + 1, j));
664 val.push_back(-eps * h2i + std::min(a, 0.0));
668 col.push_back(renum(i, j + 1));
669 val.push_back(-eps * h2i + std::min(b, 0.0));
674 ptr.push_back(col.size());
679 for (ptrdiff_t j = lo[1]; j <= hi[1]; ++j) {
680 for (ptrdiff_t i = lo[0]; i <= hi[0]; ++i) {
681 if (!symm_dirichlet && (i == 0 || j == 0 || i + 1 == n || j + 1 == n)) {
682 col.push_back(renum(i, j));
688 col.push_back(renum(i, j - 1));
693 col.push_back(renum(i - 1, j));
697 col.push_back(renum(i, j));
698 val.push_back(4 * h2i);
701 col.push_back(renum(i + 1, j));
706 col.push_back(renum(i, j + 1));
712 ptr.push_back(col.size());
716 prof.toc(
"assemble");
720#if defined(SOLVER_BACKEND_CUDA)
721 cusparseCreate(&bprm.cusparse_handle);
724 auto f = Backend::copy_vector(rhs, bprm);
725 auto x = Backend::create_vector(chunk, bprm);
727 Alina::backend::clear(*x);
730 prm.put(
"local.class",
"relaxation");
731 prm.put(
"local.type", relaxation);
734 prm.put(
"local.coarsening.type", coarsening);
735 prm.put(
"local.relax.type", relaxation);
743 SDD solve(world, std::tie(chunk, ptr, col, val), prm, bprm);
750 if (world.rank == 0) {
751 std::cout <<
"Iterations: " << r.nbIteration() << std::endl
752 <<
"Error: " << r.residual() << std::endl
753 << prof << std::endl;
758int main(
int argc,
char* argv[])
760 return Arcane::Alina::SampleMainContext::execMain(main2, argc, argv);
Runtime wrapper for distributed direct solvers.
Distributed solver based on subdomain deflation.
Generalized Minimal Residual (GMRES) method.
Convenience class that bundles together a preconditioner and an iterative solver.
Vue d'un tableau d'éléments de type T.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Alina::detail::empty_params params
Smoothed aggregation coarsening.
Pointwise constant deflation vectors.
Convenience wrapper around MPI_Comm.