129class DistributedSubDomainDeflation
133 typedef typename LocalPrecond::backend_type backend_type;
134 typedef typename backend_type::params backend_params;
138 typename LocalPrecond::params local;
139 typename IterativeSolver::params isolver;
140 typename DirectSolver::params dsolver;
143 Int32 num_def_vec = 0;
146 std::function<double(ptrdiff_t,
unsigned)> def_vec;
151 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, local)
152 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, isolver)
153 , ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p, dsolver)
154 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, num_def_vec)
157 ptr = p.get(
"def_vec", ptr);
159 precondition(ptr,
"Error in subdomain_deflation parameters: def_vec is not set");
161 def_vec = *
static_cast<std::function<
double(ptrdiff_t,
unsigned)
>*>(ptr);
163 p.check_params({
"local",
"isolver",
"dsolver",
"num_def_vec",
"def_vec" });
166 void get(
PropertyTree& p,
const std::string& path)
const
168 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, local);
169 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, isolver);
170 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path, dsolver);
171 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, num_def_vec);
175 typedef typename backend_type::value_type value_type;
176 typedef typename math::scalar_of<value_type>::type scalar_type;
177 typedef typename backend_type::matrix bmatrix;
178 using col_type = backend_type::col_type;
179 using ptr_type = backend_type::ptr_type;
180 typedef typename backend_type::vector vector;
183 template <
class Matrix>
187 const backend_params& bprm = backend_params())
189 , nrows(backend::nbRow(Astrip))
190 , ndv(prm.num_def_vec)
191 , dv_start(comm.size + 1, 0)
193 , q(backend_type::create_vector(nrows, bprm))
196 A = std::make_shared<matrix>(comm, Astrip, nrows);
201 std::shared_ptr<matrix> A,
203 const backend_params& bprm = backend_params())
205 , nrows(A->loc_rows())
206 , ndv(prm.num_def_vec)
208 , dv_start(comm.size + 1, 0)
210 , q(backend_type::create_vector(nrows, bprm))
216 void init(
const params& prm = params(),
217 const backend_params& bprm = backend_params())
219 ARCCORE_ALINA_TIC(
"setup deflation");
220 using build_matrix = CSRMatrix<value_type, col_type, ptr_type>;
223 std::vector<ptrdiff_t> dv_size(comm.size);
224 MPI_Allgather(&ndv, 1, mpi_datatype<ptrdiff_t>(), &dv_size[0], 1, mpi_datatype<ptrdiff_t>(), comm);
226 std::partial_sum(dv_size.begin(), dv_size.end(), dv_start.begin() + 1);
227 nz = dv_start.back();
231 dd = backend_type::create_vector(ndv, bprm);
233 auto az_loc = std::make_shared<build_matrix>();
234 auto az_rem = std::make_shared<build_matrix>();
236 auto a_loc = A->local();
237 auto a_rem = A->remote();
239 const CommunicationPattern<backend_type>& Acp = A->cpat();
242 ARCCORE_ALINA_TIC(
"copy deflation vectors");
244 std::vector<value_type> z(nrows);
245 for (
int j = 0; j < ndv; ++j) {
247 for (ptrdiff_t i = begin; i < (begin + size); ++i)
248 z[i] = prm.def_vec(i, j);
250 Z[j] = backend_type::copy_vector(z, bprm);
253 ARCCORE_ALINA_TOC(
"copy deflation vectors");
255 ARCCORE_ALINA_TIC(
"first pass");
256 az_loc->set_size(nrows, ndv,
true);
257 az_loc->set_nonzeros(nrows * dv_size[comm.rank]);
258 az_rem->set_size(nrows, 0,
true);
262 std::vector<ptrdiff_t> marker(Acp.recv.nbr.size(), -1);
264 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
265 ptrdiff_t az_loc_head = i * ndv;
266 az_loc->ptr[i + 1] = az_loc_head + ndv;
268 for (ptrdiff_t j = 0; j < ndv; ++j) {
269 az_loc->col[az_loc_head + j] = j;
270 az_loc->val[az_loc_head + j] = math::zero<value_type>();
273 for (ptrdiff_t j = a_loc->ptr[i], e = a_loc->ptr[i + 1]; j < e; ++j) {
274 ptrdiff_t c = a_loc->col[j];
275 value_type v = a_loc->val[j];
277 for (ptrdiff_t j = 0; j < ndv; ++j)
278 az_loc->val[az_loc_head + j] += v * prm.def_vec(c, j);
281 for (ptrdiff_t j = a_rem->ptr[i], e = a_rem->ptr[i + 1]; j < e; ++j) {
282 int d = Acp.domain(a_rem->col[j]);
284 if (marker[d] != i) {
286 az_rem->ptr[i + 1] += dv_size[d];
292 az_rem->set_nonzeros(az_rem->scan_row_sizes());
293 ARCCORE_ALINA_TOC(
"first pass");
296 ARCCORE_ALINA_TIC(
"local preconditioner");
297 P = std::make_shared<LocalPrecond>(*a_loc, prm.local, bprm);
298 ARCCORE_ALINA_TOC(
"local preconditioner");
300 A->set_local(P->system_matrix_ptr());
301 A->move_to_backend(bprm);
303 ARCCORE_ALINA_TIC(
"remote(A*Z)");
306 std::vector<ptrdiff_t> zrecv_ptr(Acp.recv.nbr.size() + 1, 0);
307 std::vector<ptrdiff_t> zcol_ptr;
308 zcol_ptr.reserve(Acp.recv.count() + 1);
309 zcol_ptr.push_back(0);
311 for (
size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
312 ptrdiff_t ncols = Acp.recv.ptr[i + 1] - Acp.recv.ptr[i];
313 ptrdiff_t nvecs = dv_size[Acp.recv.nbr[i]];
314 ptrdiff_t size = nvecs * ncols;
315 zrecv_ptr[i + 1] = zrecv_ptr[i] + size;
317 for (ptrdiff_t j = 0; j < ncols; ++j)
318 zcol_ptr.push_back(zcol_ptr.back() + nvecs);
321 std::vector<value_type> zrecv(zrecv_ptr.back());
322 std::vector<value_type> zsend(Acp.send.count() * ndv);
324 for (
size_t i = 0; i < Acp.recv.nbr.size(); ++i) {
325 ptrdiff_t begin = zrecv_ptr[i];
326 ptrdiff_t size = zrecv_ptr[i + 1] - begin;
328 Acp.recv.req[i] = comm.doIReceive(&zrecv[begin], size, Acp.recv.nbr[i], tag_exc_vals);
331 for (
size_t i = 0, k = 0; i < Acp.send.count(); ++i)
332 for (ptrdiff_t j = 0; j < ndv; ++j, ++k)
333 zsend[k] = prm.def_vec(Acp.send.col[i], j);
335 for (
size_t i = 0; i < Acp.send.nbr.size(); ++i)
336 Acp.send.req[i] = comm.doISend(&zsend[ndv * Acp.send.ptr[i]], ndv * (Acp.send.ptr[i + 1] - Acp.send.ptr[i]),
337 Acp.send.nbr[i], tag_exc_vals);
339 comm.waitAll(Acp.recv.req);
340 comm.waitAll(Acp.send.req);
343 std::vector<ptrdiff_t> marker(nz, -1);
346 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
347 ptrdiff_t az_rem_head = az_rem->ptr[i];
348 ptrdiff_t az_rem_tail = az_rem_head;
350 for (
auto a = backend::row_begin(*a_rem, i); a; ++a) {
351 ptrdiff_t c = a.col();
352 value_type v = a.value();
355 ptrdiff_t d = Acp.recv.nbr[std::upper_bound(Acp.recv.ptr.begin(), Acp.recv.ptr.end(), c) -
356 Acp.recv.ptr.begin() - 1];
358 value_type* zval = &zrecv[zcol_ptr[c]];
359 for (ptrdiff_t j = 0, k = dv_start[d]; j < dv_size[d]; ++j, ++k) {
360 if (marker[k] < az_rem_head) {
361 marker[k] = az_rem_tail;
362 az_rem->col[az_rem_tail] = k;
363 az_rem->val[az_rem_tail] = v * zval[j];
367 az_rem->val[marker[k]] += v * zval[j];
373 ARCCORE_ALINA_TOC(
"remote(A*Z)");
376 ARCCORE_ALINA_TIC(
"assemble E");
379 std::vector<int> nbrs;
380 nbrs.reserve(1 + Acp.send.nbr.size() + Acp.recv.nbr.size());
382 Acp.send.nbr.begin(), Acp.send.nbr.end(),
383 Acp.recv.nbr.begin(), Acp.recv.nbr.end(),
384 std::back_inserter(nbrs));
385 nbrs.push_back(comm.rank);
388 E.set_size(ndv, nz,
false);
394 for (
int k = 0; k <= ndv; ++k)
397 E.setNbNonZero(E.ptr[ndv]);
398 E.set_nonzeros(E.ptr[ndv]);
402 multi_array<value_type, 3> erow(nthreads, ndv, nz);
403 std::fill_n(erow.data(), erow.size(), 0);
406 ptrdiff_t dv_offset = dv_start[comm.rank];
409 std::vector<value_type> z(ndv);
411 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
412 for (ptrdiff_t j = 0; j < ndv; ++j)
413 z[j] = prm.def_vec(i, j);
415 for (ptrdiff_t k = az_loc->ptr[i], e = az_loc->ptr[i + 1]; k < e; ++k) {
416 ptrdiff_t c = az_loc->col[k] + dv_offset;
417 value_type v = az_loc->val[k];
419 for (ptrdiff_t j = 0; j < ndv; ++j)
420 erow(tid, j, c) += v * z[j];
423 for (ptrdiff_t k = az_rem->ptr[i], e = az_rem->ptr[i + 1]; k < e; ++k) {
424 ptrdiff_t c = az_rem->col[k];
425 value_type v = az_rem->val[k];
427 for (ptrdiff_t j = 0; j < ndv; ++j)
428 erow(tid, j, c) += v * z[j];
434 for (
int i = 0; i < ndv; ++i) {
435 int row_head = E.ptr[i];
437 for (
int k = 0; k < dv_size[j]; ++k) {
438 int c = dv_start[j] + k;
439 value_type v = math::zero<value_type>();
440 for (
int t = 0; t < nthreads; ++t)
450 ARCCORE_ALINA_TOC(
"assemble E");
452 ARCCORE_ALINA_TIC(
"factorize E");
453 this->E = std::make_shared<DirectSolver>(comm, E, prm.dsolver);
454 ARCCORE_ALINA_TOC(
"factorize E");
456 ARCCORE_ALINA_TIC(
"finish(A*Z)");
457 AZ = std::make_shared<matrix>(comm, az_loc, az_rem);
458 AZ->move_to_backend(bprm);
459 ARCCORE_ALINA_TOC(
"finish(A*Z)");
460 ARCCORE_ALINA_TOC(
"setup deflation");
463 template <
class Vec1,
class Vec2>
464 void apply(
const Vec1& rhs, Vec2&& x)
const
469 std::tie(iters, error) = (*this)(rhs, x);
472 std::shared_ptr<matrix> system_matrix_ptr()
const
477 const matrix& system_matrix()
const
482 template <
class Matrix,
class Vec1,
class Vec2>
483 std::tuple<size_t, value_type> operator()(
484 const Matrix& A,
const Vec1& rhs, Vec2&& x)
const
486 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*
this, A), *P, rhs, x);
491 template <
class Vec1,
class Vec2>
492 std::tuple<size_t, value_type>
493 operator()(
const Vec1& rhs, Vec2&& x)
const
495 std::tuple<size_t, value_type> cnv = S(make_sdd_projected_matrix(*
this, *A), *P, rhs, x);
505 template <
class Vector>
506 void project(Vector& x)
const
508 const auto one = math::identity<scalar_type>();
510 ARCCORE_ALINA_TIC(
"project");
512 ARCCORE_ALINA_TIC(
"local inner product");
513 for (ptrdiff_t j = 0; j < ndv; ++j)
514 df[j] = backend::inner_product(x, *Z[j]);
515 ARCCORE_ALINA_TOC(
"local inner product");
517 coarse_solve(df, dx);
519 ARCCORE_ALINA_TIC(
"spmv");
520 backend::copy(dx, *dd);
521 backend::spmv(-one, *AZ, *dd, one, x);
522 ARCCORE_ALINA_TOC(
"spmv");
524 ARCCORE_ALINA_TOC(
"project");
529 static const int tag_exc_vals = 2011;
530 static const int tag_exc_dmat = 3011;
531 static const int tag_exc_dvec = 4011;
532 static const int tag_exc_lnnz = 5011;
534 mpi_communicator comm;
535 ptrdiff_t nrows, ndv, nz;
537 std::shared_ptr<matrix> A, AZ;
538 std::shared_ptr<LocalPrecond> P;
540 mutable std::vector<value_type> df, dx;
541 std::vector<ptrdiff_t> dv_start;
543 std::vector<std::shared_ptr<vector>> Z;
545 std::shared_ptr<DirectSolver> E;
547 std::shared_ptr<vector> q;
548 std::shared_ptr<vector> dd;
552 void coarse_solve(std::vector<value_type>& f, std::vector<value_type>& x)
const
554 ARCCORE_ALINA_TIC(
"coarse solve");
556 ARCCORE_ALINA_TOC(
"coarse solve");
559 template <
class Vec1,
class Vec2>
560 void postprocess(
const Vec1& rhs, Vec2& x)
const
562 const auto one = math::identity<scalar_type>();
564 ARCCORE_ALINA_TIC(
"postprocess");
567 backend::copy(rhs, *q);
568 backend::spmv(-one, *A, x, one, *q);
571 ARCCORE_ALINA_TIC(
"local inner product");
572 for (ptrdiff_t j = 0; j < ndv; ++j)
573 df[j] = backend::inner_product(*q, *Z[j]);
574 ARCCORE_ALINA_TOC(
"local inner product");
577 coarse_solve(df, dx);
580 backend::lin_comb(ndv, dx, Z, one, x);
582 ARCCORE_ALINA_TOC(
"postprocess");