129class DistributedSubDomainDeflation
133 typedef typename LocalPrecond::backend_type backend_type;
134 typedef typename backend_type::params backend_params;
138 typename LocalPrecond::params local;
139 typename IterativeSolver::params isolver;
140 typename DirectSolver::params dsolver;
143 Int32 num_def_vec = 0;
146 std::function<double(ptrdiff_t,
unsigned)> def_vec;
151 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, local)
152 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, isolver)
153 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, dsolver)
154 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, num_def_vec)
157 ptr = p.get(
"def_vec", ptr);
159 precondition(ptr,
"Error in subdomain_deflation parameters: def_vec is not set");
161 def_vec = *
static_cast<std::function<
double(ptrdiff_t,
unsigned)
>*>(ptr);
163 p.check_params({
"local",
"isolver",
"dsolver",
"num_def_vec",
"def_vec" });
166 void get(
PropertyTree& p,
const std::string& path)
const
168 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, local);
169 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, isolver);
170 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, dsolver);
171 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, num_def_vec);
175 typedef typename backend_type::value_type value_type;
176 typedef typename math::scalar_of<value_type>::type scalar_type;
177 typedef typename backend_type::matrix bmatrix;
178 typedef typename backend_type::vector vector;
181 template <
class Matrix>
185 const backend_params& bprm = backend_params())
187 , nrows(backend::nbRow(Astrip))
188 , ndv(prm.num_def_vec)
189 , dv_start(comm.size + 1, 0)
191 , q(backend_type::create_vector(nrows, bprm))
194 A = std::make_shared<matrix>(comm, Astrip, nrows);
199 std::shared_ptr<matrix> A,
201 const backend_params& bprm = backend_params())
203 , nrows(A->loc_rows())
204 , ndv(prm.num_def_vec)
206 , dv_start(comm.size + 1, 0)
208 , q(backend_type::create_vector(nrows, bprm))
214 void init(
const params& prm = params(),
215 const backend_params& bprm = backend_params())
217 ARCCORE_ALINA_TIC(
"setup deflation");
218 typedef CSRMatrix<value_type, ptrdiff_t> build_matrix;
221 std::vector<ptrdiff_t> dv_size(comm.size);
222 MPI_Allgather(&ndv, 1, mpi_datatype<ptrdiff_t>(), &dv_size[0], 1, mpi_datatype<ptrdiff_t>(), comm);
224 std::partial_sum(dv_size.begin(), dv_size.end(), dv_start.begin() + 1);
225 nz = dv_start.back();
229 dd = backend_type::create_vector(ndv, bprm);
231 auto az_loc = std::make_shared<build_matrix>();
232 auto az_rem = std::make_shared<build_matrix>();
234 auto a_loc = A->local();
235 auto a_rem = A->remote();
237 const CommunicationPattern<backend_type>& Acp = A->cpat();
240 ARCCORE_ALINA_TIC(
"copy deflation vectors");
242 std::vector<value_type> z(nrows);
243 for (
int j = 0; j < ndv; ++j) {
245 for (ptrdiff_t i = begin; i < (begin + size); ++i)
246 z[i] = prm.def_vec(i, j);
248 Z[j] = backend_type::copy_vector(z, bprm);
251 ARCCORE_ALINA_TOC(
"copy deflation vectors");
253 ARCCORE_ALINA_TIC(
"first pass");
254 az_loc->set_size(nrows, ndv,
true);
255 az_loc->set_nonzeros(nrows * dv_size[comm.rank]);
256 az_rem->set_size(nrows, 0,
true);
260 std::vector<ptrdiff_t> marker(Acp.recv.nbr.size(), -1);
262 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
263 ptrdiff_t az_loc_head = i * ndv;
264 az_loc->ptr[i + 1] = az_loc_head + ndv;
266 for (ptrdiff_t j = 0; j < ndv; ++j) {
267 az_loc->col[az_loc_head + j] = j;
268 az_loc->val[az_loc_head + j] = math::zero<value_type>();
271 for (ptrdiff_t j = a_loc->ptr[i], e = a_loc->ptr[i + 1]; j < e; ++j) {
272 ptrdiff_t c = a_loc->col[j];
273 value_type v = a_loc->val[j];
275 for (ptrdiff_t j = 0; j < ndv; ++j)
276 az_loc->val[az_loc_head + j] += v * prm.def_vec(c, j);
279 for (ptrdiff_t j = a_rem->ptr[i], e = a_rem->ptr[i + 1]; j < e; ++j) {
280 int d = Acp.domain(a_rem->col[j]);
282 if (marker[d] != i) {
284 az_rem->ptr[i + 1] += dv_size[d];
290 az_rem->set_nonzeros(az_rem->scan_row_sizes());
291 ARCCORE_ALINA_TOC(
"first pass");
294 ARCCORE_ALINA_TIC(
"local preconditioner");
295 P = std::make_shared<LocalPrecond>(*a_loc, prm.local, bprm);
296 ARCCORE_ALINA_TOC(
"local preconditioner");
298 A->set_local(P->system_matrix_ptr());
299 A->move_to_backend(bprm);
301 ARCCORE_ALINA_TIC(
"remote(A*Z)");
304 std::vector<ptrdiff_t> zrecv_ptr(Acp.recv.nbr.size() + 1, 0);
305 std::vector<ptrdiff_t> zcol_ptr;
306 zcol_ptr.reserve(Acp.recv.count() + 1);
307 zcol_ptr.push_back(0);
309 for (
size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
310 ptrdiff_t ncols = Acp.recv.ptr[i + 1] - Acp.recv.ptr[i];
311 ptrdiff_t nvecs = dv_size[Acp.recv.nbr[i]];
312 ptrdiff_t size = nvecs * ncols;
313 zrecv_ptr[i + 1] = zrecv_ptr[i] + size;
315 for (ptrdiff_t j = 0; j < ncols; ++j)
316 zcol_ptr.push_back(zcol_ptr.back() + nvecs);
319 std::vector<value_type> zrecv(zrecv_ptr.back());
320 std::vector<value_type> zsend(Acp.send.count() * ndv);
322 for (
size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
323 ptrdiff_t begin = zrecv_ptr[i];
324 ptrdiff_t size = zrecv_ptr[i + 1] - begin;
326 Acp.recv.req[i] = comm.doIReceive(&zrecv[begin], size, Acp.recv.nbr[i], tag_exc_vals);
329 for (
size_t i = 0, k = 0; i < Acp.send.count(); ++i)
330 for (ptrdiff_t j = 0; j < ndv; ++j, ++k)
331 zsend[k] = prm.def_vec(Acp.send.col[i], j);
333 for (
size_t i = 0; i < Acp.send.nbr.size(); ++i)
334 Acp.send.req[i] = comm.doISend(&zsend[ndv * Acp.send.ptr[i]], ndv * (Acp.send.ptr[i + 1] - Acp.send.ptr[i]),
335 Acp.send.nbr[i], tag_exc_vals);
337 comm.waitAll(Acp.recv.req);
338 comm.waitAll(Acp.send.req);
341 std::vector<ptrdiff_t> marker(nz, -1);
344 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
345 ptrdiff_t az_rem_head = az_rem->ptr[i];
346 ptrdiff_t az_rem_tail = az_rem_head;
348 for (
auto a = backend::row_begin(*a_rem, i); a; ++a) {
349 ptrdiff_t c = a.col();
350 value_type v = a.value();
353 ptrdiff_t d = Acp.recv.nbr[std::upper_bound(Acp.recv.ptr.begin(), Acp.recv.ptr.end(), c) -
354 Acp.recv.ptr.begin() - 1];
356 value_type* zval = &zrecv[zcol_ptr[c]];
357 for (ptrdiff_t j = 0, k = dv_start[d]; j < dv_size[d]; ++j, ++k) {
358 if (marker[k] < az_rem_head) {
359 marker[k] = az_rem_tail;
360 az_rem->col[az_rem_tail] = k;
361 az_rem->val[az_rem_tail] = v * zval[j];
365 az_rem->val[marker[k]] += v * zval[j];
371 ARCCORE_ALINA_TOC(
"remote(A*Z)");
374 ARCCORE_ALINA_TIC(
"assemble E");
377 std::vector<int> nbrs;
378 nbrs.reserve(1 + Acp.send.nbr.size() + Acp.recv.nbr.size());
380 Acp.send.nbr.begin(), Acp.send.nbr.end(),
381 Acp.recv.nbr.begin(), Acp.recv.nbr.end(),
382 std::back_inserter(nbrs));
383 nbrs.push_back(comm.rank);
386 E.set_size(ndv, nz,
false);
392 for (
int k = 0; k <= ndv; ++k)
395 E.setNbNonZero(E.ptr[ndv]);
396 E.set_nonzeros(E.ptr[ndv]);
400 multi_array<value_type, 3> erow(nthreads, ndv, nz);
401 std::fill_n(erow.data(), erow.size(), 0);
404 ptrdiff_t dv_offset = dv_start[comm.rank];
407 std::vector<value_type> z(ndv);
409 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
410 for (ptrdiff_t j = 0; j < ndv; ++j)
411 z[j] = prm.def_vec(i, j);
413 for (ptrdiff_t k = az_loc->ptr[i], e = az_loc->ptr[i + 1]; k < e; ++k) {
414 ptrdiff_t c = az_loc->col[k] + dv_offset;
415 value_type v = az_loc->val[k];
417 for (ptrdiff_t j = 0; j < ndv; ++j)
418 erow(tid, j, c) += v * z[j];
421 for (ptrdiff_t k = az_rem->ptr[i], e = az_rem->ptr[i + 1]; k < e; ++k) {
422 ptrdiff_t c = az_rem->col[k];
423 value_type v = az_rem->val[k];
425 for (ptrdiff_t j = 0; j < ndv; ++j)
426 erow(tid, j, c) += v * z[j];
432 for (
int i = 0; i < ndv; ++i) {
433 int row_head = E.ptr[i];
435 for (
int k = 0; k < dv_size[j]; ++k) {
436 int c = dv_start[j] + k;
437 value_type v = math::zero<value_type>();
438 for (
int t = 0; t < nthreads; ++t)
448 ARCCORE_ALINA_TOC(
"assemble E");
450 ARCCORE_ALINA_TIC(
"factorize E");
451 this->E = std::make_shared<DirectSolver>(comm, E, prm.dsolver);
452 ARCCORE_ALINA_TOC(
"factorize E");
454 ARCCORE_ALINA_TIC(
"finish(A*Z)");
455 AZ = std::make_shared<matrix>(comm, az_loc, az_rem);
456 AZ->move_to_backend(bprm);
457 ARCCORE_ALINA_TOC(
"finish(A*Z)");
458 ARCCORE_ALINA_TOC(
"setup deflation");
461 template <
class Vec1,
class Vec2>
462 void apply(
const Vec1& rhs, Vec2&& x)
const
467 std::tie(iters, error) = (*this)(rhs, x);
470 std::shared_ptr<matrix> system_matrix_ptr()
const
475 const matrix& system_matrix()
const
480 template <
class Matrix,
class Vec1,
class Vec2>
481 std::tuple<size_t, value_type> operator()(
482 const Matrix& A,
const Vec1& rhs, Vec2&& x)
const
484 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*
this, A), *P, rhs, x);
489 template <
class Vec1,
class Vec2>
490 std::tuple<size_t, value_type>
491 operator()(
const Vec1& rhs, Vec2&& x)
const
493 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*
this, *A), *P, rhs, x);
503 template <
class Vector>
504 void project(Vector& x)
const
506 const auto one = math::identity<scalar_type>();
508 ARCCORE_ALINA_TIC(
"project");
510 ARCCORE_ALINA_TIC(
"local inner product");
511 for (ptrdiff_t j = 0; j < ndv; ++j)
512 df[j] = backend::inner_product(x, *Z[j]);
513 ARCCORE_ALINA_TOC(
"local inner product");
515 coarse_solve(df, dx);
517 ARCCORE_ALINA_TIC(
"spmv");
518 backend::copy(dx, *dd);
519 backend::spmv(-one, *AZ, *dd, one, x);
520 ARCCORE_ALINA_TOC(
"spmv");
522 ARCCORE_ALINA_TOC(
"project");
527 static const int tag_exc_vals = 2011;
528 static const int tag_exc_dmat = 3011;
529 static const int tag_exc_dvec = 4011;
530 static const int tag_exc_lnnz = 5011;
532 mpi_communicator comm;
533 ptrdiff_t nrows, ndv, nz;
535 std::shared_ptr<matrix> A, AZ;
536 std::shared_ptr<LocalPrecond> P;
538 mutable std::vector<value_type> df, dx;
539 std::vector<ptrdiff_t> dv_start;
541 std::vector<std::shared_ptr<vector>> Z;
543 std::shared_ptr<DirectSolver> E;
545 std::shared_ptr<vector> q;
546 std::shared_ptr<vector> dd;
550 void coarse_solve(std::vector<value_type>& f, std::vector<value_type>& x)
const
552 ARCCORE_ALINA_TIC(
"coarse solve");
554 ARCCORE_ALINA_TOC(
"coarse solve");
557 template <
class Vec1,
class Vec2>
558 void postprocess(
const Vec1& rhs, Vec2& x)
const
560 const auto one = math::identity<scalar_type>();
562 ARCCORE_ALINA_TIC(
"postprocess");
565 backend::copy(rhs, *q);
566 backend::spmv(-one, *A, x, one, *q);
569 ARCCORE_ALINA_TIC(
"local inner product");
570 for (ptrdiff_t j = 0; j < ndv; ++j)
571 df[j] = backend::inner_product(*q, *Z[j]);
572 ARCCORE_ALINA_TOC(
"local inner product");
575 coarse_solve(df, dx);
578 backend::lin_comb(ndv, dx, Z, one, x);
580 ARCCORE_ALINA_TOC(
"postprocess");