50struct DistributedPMISAggregation
52 typedef typename Backend::value_type value_type;
53 typedef typename math::scalar_of<value_type>::type scalar_type;
56 using build_matrix = Backend::matrix;
57 using col_type = Backend::col_type;
58 using ptr_type = Backend::ptr_type;
60 using bool_matrix = bool_backend::matrix;
68 double eps_strong = 0.08;
76 : ARCCORE_ALINA_PARAMS_IMPORT_CHILD(p,
nullspace)
77 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, eps_strong)
78 , ARCCORE_ALINA_PARAMS_IMPORT_VALUE(p, block_size)
80 p.check_params({
"nullspace",
"eps_strong",
"block_size" });
85 ARCCORE_ALINA_PARAMS_EXPORT_CHILD(p, path,
nullspace);
86 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, eps_strong);
87 ARCCORE_ALINA_PARAMS_EXPORT_VALUE(p, path, block_size);
92 std::shared_ptr<DistributedMatrix<bool_backend>> conn;
93 std::shared_ptr<matrix> p_tent;
95 DistributedPMISAggregation(
const matrix& A, params& prm)
98 ptrdiff_t n = A.loc_rows();
99 std::vector<ptrdiff_t> state(n);
100 std::vector<int> owner(n);
102 if (prm.block_size == 1) {
103 conn = conn_strength(A, prm.eps_strong);
105 ptrdiff_t naggr = aggregates(*conn, state, owner);
106 p_tent = tentative_prolongation(A.comm(), n, naggr, state, owner);
109 typedef typename math::scalar_of<value_type>::type scalar;
110 using sbackend = BuiltinBackend<scalar, col_type, ptr_type>;
112 ptrdiff_t np = n / prm.block_size;
114 assert(np * prm.block_size == n &&
"Matrix size should be divisible by block_size");
116 DistributedMatrix<sbackend> A_pw(A.comm(),
117 pointwise_matrix(*A.local(), prm.block_size),
118 pointwise_matrix(*A.remote(), prm.block_size));
120 auto conn_pw = conn_strength(A_pw, prm.eps_strong);
122 std::vector<ptrdiff_t> state_pw(np);
123 std::vector<int> owner_pw(np);
125 ptrdiff_t naggr = aggregates(*conn_pw, state_pw, owner_pw);
127 conn = std::make_shared<DistributedMatrix<bool_backend>>(A.comm(),
128 expand_conn(*A.local(), *A_pw.local(), *conn_pw->local(), prm.block_size),
129 expand_conn(*A.remote(), *A_pw.remote(), *conn_pw->remote(), prm.block_size));
132 for (ptrdiff_t ip = begin; ip < (begin + size); ++ip) {
133 ptrdiff_t i = ip * prm.block_size;
134 ptrdiff_t s = state_pw[ip];
135 int o = owner_pw[ip];
137 for (
unsigned k = 0; k < prm.block_size; ++k) {
138 state[i + k] = (s < 0) ? s : (s * prm.block_size + k);
144 p_tent = tentative_prolongation(A.comm(), n, naggr * prm.block_size, state, owner);
148 std::shared_ptr<DistributedMatrix<bool_backend>>
149 squared_interface(
const DistributedMatrix<bool_backend>& A)
151 const CommunicationPattern<bool_backend>& C = A.cpat();
153 bool_matrix& A_loc = *A.local();
154 bool_matrix& A_rem = *A.remote();
156 ptrdiff_t A_rows = A.loc_rows();
158 ptrdiff_t A_beg = A.loc_col_shift();
159 ptrdiff_t A_end = A_beg + A_rows;
161 auto a_nbr = remote_rows(C, A,
false);
162 bool_matrix& A_nbr = *a_nbr;
166 std::vector<ptrdiff_t> rem_cols(A_rem.nbNonZero() + A_nbr.nbNonZero());
168 std::copy(A_nbr.col.data(), A_nbr.col.data() + A_nbr.nbNonZero(),
169 std::copy(A_rem.col.data(), A_rem.col.data() + A_rem.nbNonZero(), rem_cols.begin()));
171 std::sort(rem_cols.begin(), rem_cols.end());
172 rem_cols.erase(std::unique(rem_cols.begin(), rem_cols.end()), rem_cols.end());
174 ptrdiff_t n_rem_cols = 0;
175 std::unordered_map<ptrdiff_t, int> rem_idx(2 * rem_cols.size());
176 for (ptrdiff_t c : rem_cols) {
177 if (c >= A_beg && c < A_end)
179 rem_idx[c] = n_rem_cols++;
183 auto s_loc = std::make_shared<bool_matrix>();
184 auto s_rem = std::make_shared<bool_matrix>();
186 bool_matrix& S_loc = *s_loc;
187 bool_matrix& S_rem = *s_rem;
189 S_loc.set_size(A_rows, A_rows,
false);
190 S_rem.set_size(A_rows, 0,
false);
195 ARCCORE_ALINA_TIC(
"analyze");
197 std::vector<ptrdiff_t> loc_marker(A_rows, -1);
198 std::vector<ptrdiff_t> rem_marker(n_rem_cols, -1);
200 for (ptrdiff_t ia = begin; ia < (begin + size); ++ia) {
201 ptrdiff_t loc_cols = 0;
202 ptrdiff_t rem_cols = 0;
204 for (ptrdiff_t ja = A_rem.ptr[ia], ea = A_rem.ptr[ia + 1]; ja < ea; ++ja) {
205 ptrdiff_t ca = C.local_index(A_rem.col[ja]);
207 for (ptrdiff_t jb = A_nbr.ptr[ca], eb = A_nbr.ptr[ca + 1]; jb < eb; ++jb) {
208 ptrdiff_t cb = A_nbr.col[jb];
210 if (cb >= A_beg && cb < A_end) {
213 if (loc_marker[cb] != ia) {
221 if (rem_marker[cb] != ia) {
229 for (ptrdiff_t ja = A_loc.ptr[ia], ea = A_loc.ptr[ia + 1]; ja < ea; ++ja) {
230 ptrdiff_t ca = A_loc.col[ja];
232 for (ptrdiff_t jb = A_rem.ptr[ca], eb = A_rem.ptr[ca + 1]; jb < eb; ++jb) {
233 ptrdiff_t cb = rem_idx[A_rem.col[jb]];
235 if (rem_marker[cb] != ia) {
243 for (ptrdiff_t ja = A_loc.ptr[ia], ea = A_loc.ptr[ia + 1]; ja < ea; ++ja) {
244 ptrdiff_t ca = A_loc.col[ja];
246 for (ptrdiff_t jb = A_loc.ptr[ca], eb = A_loc.ptr[ca + 1]; jb < eb; ++jb) {
247 ptrdiff_t cb = A_loc.col[jb];
249 if (loc_marker[cb] != ia) {
257 S_rem.ptr[ia + 1] = rem_cols;
258 S_loc.ptr[ia + 1] = rem_cols ? loc_cols : 0;
261 ARCCORE_ALINA_TOC(
"analyze");
263 S_loc.set_nonzeros(S_loc.scan_row_sizes(),
false);
264 S_rem.set_nonzeros(S_rem.scan_row_sizes(),
false);
266 ARCCORE_ALINA_TIC(
"compute");
268 std::vector<ptrdiff_t> loc_marker(A_rows, -1);
269 std::vector<ptrdiff_t> rem_marker(n_rem_cols, -1);
271 for (ptrdiff_t ia = begin; ia < (begin + size); ++ia) {
272 ptrdiff_t loc_beg = S_loc.ptr[ia];
273 ptrdiff_t rem_beg = S_rem.ptr[ia];
274 ptrdiff_t loc_end = loc_beg;
275 ptrdiff_t rem_end = rem_beg;
277 if (rem_beg == S_rem.ptr[ia + 1])
280 for (ptrdiff_t ja = A_loc.ptr[ia], ea = A_loc.ptr[ia + 1]; ja < ea; ++ja) {
281 ptrdiff_t ca = A_loc.col[ja];
283 for (ptrdiff_t jb = A_loc.ptr[ca], eb = A_loc.ptr[ca + 1]; jb < eb; ++jb) {
284 ptrdiff_t cb = A_loc.col[jb];
286 if (loc_marker[cb] < loc_beg) {
287 loc_marker[cb] = loc_end;
288 S_loc.col[loc_end] = cb;
293 for (ptrdiff_t jb = A_rem.ptr[ca], eb = A_rem.ptr[ca + 1]; jb < eb; ++jb) {
294 ptrdiff_t gb = A_rem.col[jb];
295 ptrdiff_t cb = rem_idx[gb];
297 if (rem_marker[cb] < rem_beg) {
298 rem_marker[cb] = rem_end;
299 S_rem.col[rem_end] = gb;
305 for (ptrdiff_t ja = A_rem.ptr[ia], ea = A_rem.ptr[ia + 1]; ja < ea; ++ja) {
306 ptrdiff_t ca = C.local_index(A_rem.col[ja]);
308 for (ptrdiff_t jb = A_nbr.ptr[ca], eb = A_nbr.ptr[ca + 1]; jb < eb; ++jb) {
309 ptrdiff_t gb = A_nbr.col[jb];
311 if (gb >= A_beg && gb < A_end) {
312 ptrdiff_t cb = gb - A_beg;
314 if (loc_marker[cb] < loc_beg) {
315 loc_marker[cb] = loc_end;
316 S_loc.col[loc_end] = cb;
321 ptrdiff_t cb = rem_idx[gb];
323 if (rem_marker[cb] < rem_beg) {
324 rem_marker[cb] = rem_end;
325 S_rem.col[rem_end] = gb;
333 ARCCORE_ALINA_TOC(
"compute");
335 return std::make_shared<DistributedMatrix<bool_backend>>(A.comm(), s_loc, s_rem);
339 std::shared_ptr<DistributedMatrix<bool_backend>>
340 conn_strength(
const DistributedMatrix<B>& A, scalar_type eps_strong)
342 typedef typename B::value_type val_type;
343 typedef CSRMatrix<val_type> B_matrix;
345 ARCCORE_ALINA_TIC(
"conn_strength");
346 ptrdiff_t n = A.loc_rows();
348 const B_matrix& A_loc = *A.local();
349 const B_matrix& A_rem = *A.remote();
350 const CommunicationPattern<B>& C = A.cpat();
352 scalar_type eps_squared = eps_strong * eps_strong;
354 auto d = diagonal(A_loc);
355 numa_vector<val_type>& D = *d;
357 std::vector<val_type> D_loc(C.send.count());
358 std::vector<val_type> D_rem(C.recv.count());
360 for (
size_t i = 0, nv = C.send.count(); i < nv; ++i)
361 D_loc[i] = D[C.send.col[i]];
363 C.exchange(&D_loc[0], &D_rem[0]);
365 auto s_loc = std::make_shared<bool_matrix>();
366 auto s_rem = std::make_shared<bool_matrix>();
368 bool_matrix& S_loc = *s_loc;
369 bool_matrix& S_rem = *s_rem;
371 S_loc.set_size(n, n,
true);
372 S_rem.set_size(n, 0,
true);
374 S_loc.val.resize(A_loc.nbNonZero());
375 S_rem.val.resize(A_rem.nbNonZero());
378 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
379 val_type eps_dia_i = eps_squared * D[i];
381 for (ptrdiff_t j = A_loc.ptr[i], e = A_loc.ptr[i + 1]; j < e; ++j) {
382 ptrdiff_t c = A_loc.col[j];
383 val_type v = A_loc.val[j];
385 if ((S_loc.val[j] = (c == i || (eps_dia_i * D[c] < v * v))))
389 for (ptrdiff_t j = A_rem.ptr[i], e = A_rem.ptr[i + 1]; j < e; ++j) {
390 ptrdiff_t c = C.local_index(A_rem.col[j]);
391 val_type v = A_rem.val[j];
393 if ((S_rem.val[j] = (eps_dia_i * D_rem[c] < v * v)))
399 S_loc.setNbNonZero(S_loc.scan_row_sizes());
400 S_rem.setNbNonZero(S_rem.scan_row_sizes());
402 S_loc.col.resize(S_loc.nbNonZero());
403 S_rem.col.resize(S_rem.nbNonZero());
406 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
407 ptrdiff_t loc_head = S_loc.ptr[i];
408 ptrdiff_t rem_head = S_rem.ptr[i];
410 for (ptrdiff_t j = A_loc.ptr[i], e = A_loc.ptr[i + 1]; j < e; ++j)
412 S_loc.col[loc_head++] = A_loc.col[j];
414 for (ptrdiff_t j = A_rem.ptr[i], e = A_rem.ptr[i + 1]; j < e; ++j)
416 S_rem.col[rem_head++] = A_rem.col[j];
419 ARCCORE_ALINA_TOC(
"conn_strength");
421 return std::make_shared<DistributedMatrix<bool_backend>>(A.comm(), s_loc, s_rem);
424 ptrdiff_t aggregates(
const DistributedMatrix<bool_backend>& A,
425 std::vector<ptrdiff_t>& loc_state,
426 std::vector<int>& loc_owner)
428 ARCCORE_ALINA_TIC(
"PMIS");
429 static const int tag_exc_cnt = 4001;
430 static const int tag_exc_pts = 4002;
432 const bool_matrix& A_loc = *A.local();
433 const bool_matrix& A_rem = *A.remote();
435 ptrdiff_t n = A_loc.nbRow();
437 mpi_communicator comm = A.comm();
440 ARCCORE_ALINA_TIC(
"symbolic square");
441 auto S = squared_interface(A);
442 const bool_matrix& S_loc = *S->local();
443 const bool_matrix& S_rem = *S->remote();
444 const CommunicationPattern<bool_backend>& Sp = S->cpat();
445 ARCCORE_ALINA_TOC(
"symbolic square");
448 ptrdiff_t n_undone = 0;
449 std::vector<ptrdiff_t> rem_state(Sp.recv.count(), DistributedPMISAggregation::undone);
450 std::vector<int> rem_owner(Sp.recv.count(), -1);
451 std::vector<ptrdiff_t> send_state(Sp.send.count());
452 std::vector<int> send_owner(Sp.send.count());
455 std::atomic<ptrdiff_t> atomic_n_undone = 0;
457 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
458 ptrdiff_t wl = A_loc.ptr[i + 1] - A_loc.ptr[i];
459 ptrdiff_t wr = S_rem.ptr[i + 1] - S_rem.ptr[i];
462 loc_state[i] = DistributedPMISAggregation::deleted;
466 loc_state[i] = DistributedPMISAggregation::undone;
473 n_undone = n - atomic_n_undone;
476 for (ptrdiff_t i = 0, m = Sp.send.count(); i < m; ++i)
477 send_state[i] = loc_state[Sp.send.col[i]];
478 Sp.exchange(&send_state[0], &rem_state[0]);
480 std::vector<std::vector<ptrdiff_t>> send_pts(Sp.recv.nbr.size());
481 std::vector<ptrdiff_t> recv_pts;
483 UniqueArray<MessagePassing::Request> send_cnt_req(Sp.recv.nbr.size());
484 UniqueArray<MessagePassing::Request> send_pts_req(Sp.recv.nbr.size());
488 std::vector<ptrdiff_t> nbr;
491 for (
size_t i = 0; i < Sp.recv.nbr.size(); ++i)
495 for (ptrdiff_t i = 0; i < n; ++i) {
496 if (loc_state[i] != DistributedPMISAggregation::undone)
499 if (S_rem.ptr[i + 1] > S_rem.ptr[i]) {
501 bool selectable =
true;
502 for (ptrdiff_t j = S_rem.ptr[i], e = S_rem.ptr[i + 1]; j < e; ++j) {
504 std::tie(d, c) = Sp.remote_info(S_rem.col[j]);
506 if (rem_state[c] == DistributedPMISAggregation::undone && Sp.recv.nbr[d] > comm.rank) {
515 ptrdiff_t
id = naggr++;
516 loc_owner[i] = comm.rank;
521 for (ptrdiff_t j = A_loc.ptr[i], e = A_loc.ptr[i + 1]; j < e; ++j) {
522 ptrdiff_t c = A_loc.col[j];
524 if (loc_state[c] == DistributedPMISAggregation::undone)
526 loc_owner[c] = comm.rank;
531 for (ptrdiff_t j = A_rem.ptr[i], e = A_rem.ptr[i + 1]; j < e; ++j) {
532 ptrdiff_t c = A_rem.col[j];
534 std::tie(d, k) = Sp.remote_info(c);
538 send_pts[d].push_back(c);
539 send_pts[d].push_back(
id);
543 for (ptrdiff_t j = S_loc.ptr[i], e = S_loc.ptr[i + 1]; j < e; ++j) {
544 ptrdiff_t c = S_loc.col[j];
545 if (c != i && loc_state[c] == DistributedPMISAggregation::undone) {
546 loc_owner[c] = comm.rank;
552 for (ptrdiff_t j = S_rem.ptr[i], e = S_rem.ptr[i + 1]; j < e; ++j) {
553 ptrdiff_t c = S_rem.col[j];
555 std::tie(d, k) = Sp.remote_info(c);
557 if (rem_state[k] == DistributedPMISAggregation::undone) {
559 send_pts[d].push_back(c);
560 send_pts[d].push_back(
id);
566 ptrdiff_t
id = naggr++;
567 loc_owner[i] = comm.rank;
573 for (ptrdiff_t j = A_loc.ptr[i], e = A_loc.ptr[i + 1]; j < e; ++j) {
574 ptrdiff_t c = A_loc.col[j];
576 if (c != i && loc_state[c] != DistributedPMISAggregation::deleted) {
577 if (loc_state[c] == DistributedPMISAggregation::undone)
579 loc_owner[c] = comm.rank;
585 for (ptrdiff_t k : nbr) {
586 for (ptrdiff_t j = A_loc.ptr[k], e = A_loc.ptr[k + 1]; j < e; ++j) {
587 ptrdiff_t c = A_loc.col[j];
588 if (c != k && loc_state[c] == DistributedPMISAggregation::undone) {
589 loc_owner[c] = comm.rank;
599 for (
size_t i = 0; i < Sp.recv.nbr.size(); ++i) {
600 int npts = send_pts[i].size();
601 send_cnt_req[i] = comm.doISend(&npts, 1, Sp.recv.nbr[i], tag_exc_cnt);
605 send_pts_req[i] = comm.doISend(&send_pts[i][0], npts, Sp.recv.nbr[i], tag_exc_pts);
608 for (
size_t i = 0; i < Sp.send.nbr.size(); ++i) {
610 comm.doReceive(&npts, 1, Sp.send.nbr[i], tag_exc_cnt);
614 recv_pts.resize(npts);
615 comm.doReceive(&recv_pts[0], npts, Sp.send.nbr[i], tag_exc_pts);
617 for (
int k = 0; k < npts; k += 2) {
618 ptrdiff_t c = recv_pts[k] - Sp.loc_col_shift();
619 ptrdiff_t
id = recv_pts[k + 1];
621 if (loc_state[c] == DistributedPMISAggregation::undone)
624 loc_owner[c] = Sp.send.nbr[i];
629 for (
size_t i = 0; i < Sp.recv.nbr.size(); ++i) {
630 int npts = send_pts[i].size();
631 comm.wait(send_cnt_req[i]);
634 comm.wait(send_pts_req[i]);
637 for (ptrdiff_t i = 0, m = Sp.send.count(); i < m; ++i)
638 send_state[i] = loc_state[Sp.send.col[i]];
639 Sp.exchange(&send_state[0], &rem_state[0]);
641 if (0 == comm.reduceSum(n_undone))
647 ARCCORE_ALINA_TIC(
"drop empty aggregates");
648 for (ptrdiff_t i = 0, m = Sp.send.count(); i < m; ++i)
649 send_owner[i] = loc_owner[Sp.send.col[i]];
650 Sp.exchange(&send_owner[0], &rem_owner[0]);
652 std::vector<ptrdiff_t> new_id(naggr + 1, 0);
653 for (ptrdiff_t i = 0; i < n; ++i) {
654 if (loc_owner[i] == comm.rank && loc_state[i] >= 0)
655 new_id[loc_state[i] + 1] = 1;
658 for (
size_t i = 0; i < Sp.recv.count(); ++i) {
659 if (rem_owner[i] == comm.rank && rem_state[i] >= 0)
660 new_id[rem_state[i] + 1] = 1;
663 std::partial_sum(new_id.begin(), new_id.end(), new_id.begin());
665 if (comm.reduceSum(naggr - new_id.back()) > 0) {
666 naggr = new_id.back();
668 for (ptrdiff_t i = 0; i < n; ++i) {
669 if (loc_owner[i] == comm.rank && loc_state[i] >= 0) {
670 loc_state[i] = new_id[loc_state[i]];
674 for (
size_t i = 0; i < Sp.recv.nbr.size(); ++i) {
678 for (
auto p = Sp.remote_begin(); p != Sp.remote_end(); ++p) {
679 ptrdiff_t c = p->first;
682 std::tie(d, k) = p->second;
684 if (rem_owner[k] == comm.rank && rem_state[k] >= 0) {
685 send_pts[d].push_back(c);
686 send_pts[d].push_back(new_id[rem_state[k]]);
690 for (
size_t i = 0; i < Sp.recv.nbr.size(); ++i) {
691 int npts = send_pts[i].size();
692 send_cnt_req[i] = comm.doISend(&npts, 1, Sp.recv.nbr[i], tag_exc_cnt);
696 send_pts_req[i] = comm.doISend(&send_pts[i][0], npts, Sp.recv.nbr[i], tag_exc_pts);
699 for (
size_t i = 0; i < Sp.send.nbr.size(); ++i) {
701 comm.doReceive(&npts, 1, Sp.send.nbr[i], tag_exc_cnt);
705 recv_pts.resize(npts);
706 comm.doReceive(&recv_pts[0], npts, Sp.send.nbr[i], tag_exc_pts);
708 for (
int k = 0; k < npts; k += 2) {
709 ptrdiff_t c = recv_pts[k] - Sp.loc_col_shift();
710 ptrdiff_t
id = recv_pts[k + 1];
716 for (
size_t i = 0; i < Sp.recv.nbr.size(); ++i) {
717 int npts = send_pts[i].size();
718 comm.wait(send_cnt_req[i]);
721 comm.wait(send_pts_req[i]);
725 ARCCORE_ALINA_TOC(
"drop empty aggregates");
726 ARCCORE_ALINA_TOC(
"PMIS");
731 std::shared_ptr<matrix>
732 tentative_prolongation(mpi_communicator comm, ptrdiff_t n, ptrdiff_t naggr,
733 std::vector<ptrdiff_t>& state, std::vector<int>& owner)
735 auto p_loc = std::make_shared<build_matrix>();
736 auto p_rem = std::make_shared<build_matrix>();
737 build_matrix& P_loc = *p_loc;
738 build_matrix& P_rem = *p_rem;
740 ARCCORE_ALINA_TIC(
"tentative prolongation");
742 if (
int null_cols = prm.nullspace.cols) {
743 ptrdiff_t nba = naggr / prm.block_size;
745 std::vector<ptrdiff_t> fdom = comm.exclusive_sum(n);
746 std::vector<ptrdiff_t> cdom = comm.exclusive_sum(naggr);
748 std::vector<int> scounts(comm.size, 0);
749 std::vector<int> rcounts(comm.size);
754 P_loc.set_size(n, null_cols * nba,
true);
755 P_rem.set_size(n, 0,
true);
758 ptrdiff_t loc_dofs = 0;
760 for (ptrdiff_t i = 0; i < n; ++i) {
761 if (state[i] == DistributedPMISAggregation::deleted)
764 if (owner[i] == comm.rank) {
765 P_loc.ptr[i + 1] = null_cols;
769 P_rem.ptr[i + 1] = null_cols;
776 MPI_Ialltoall(scounts.data(), 1, MPI_INT,
777 rcounts.data(), 1, MPI_INT,
780 P_loc.set_nonzeros(P_loc.scan_row_sizes());
781 P_rem.set_nonzeros(P_rem.scan_row_sizes());
783 MPI_Wait(&req, MPI_STATUS_IGNORE);
787 for (
int i = 0; i < comm.size; ++i) {
794 std::vector<int> send_nbr;
795 send_nbr.reserve(snbr);
796 std::vector<int> recv_nbr;
797 recv_nbr.reserve(rnbr);
798 std::vector<int> send_ptr;
799 send_ptr.reserve(snbr + 1);
800 send_ptr.push_back(0);
801 std::vector<int> recv_ptr;
802 recv_ptr.reserve(rnbr + 1);
803 recv_ptr.push_back(0);
805 for (
int i = 0; i < comm.size; ++i) {
807 send_nbr.push_back(i);
808 send_ptr.push_back(send_ptr.back() + scounts[i]);
811 recv_nbr.push_back(i);
812 recv_ptr.push_back(recv_ptr.back() + rcounts[i]);
816 int send_dofs = send_ptr.back();
817 int recv_dofs = recv_ptr.back();
819 std::vector<ptrdiff_t> send_agg(send_dofs);
820 std::vector<ptrdiff_t> send_dof(send_dofs);
821 std::vector<double> send_row(send_dofs * null_cols);
823 std::vector<ptrdiff_t> recv_agg(recv_dofs);
824 std::vector<ptrdiff_t> recv_dof(recv_dofs);
825 std::vector<double> recv_row(recv_dofs * null_cols);
828 std::vector<ptrdiff_t> send_rank_ptr(comm.size + 1);
829 send_rank_ptr[0] = 0;
830 std::partial_sum(scounts.begin(), scounts.end(), send_rank_ptr.begin() + 1);
831 for (ptrdiff_t i = 0; i < n; ++i) {
835 if (s == DistributedPMISAggregation::deleted)
840 auto head = send_rank_ptr[o]++;
843 send_dof[head] = i + fdom[comm.rank];
844 std::copy_n(&prm.nullspace.B[i * null_cols], null_cols, &send_row[head * null_cols]);
848 UniqueArray<MessagePassing::Request> send_req(3 * snbr);
849 UniqueArray<MessagePassing::Request> recv_req(3 * rnbr);
851 for (
int i = 0; i < rnbr; ++i) {
854 int w = recv_ptr[i + 1] - p;
856 MessagePassing::Request* req = &recv_req[3 * i];
858 req[0] = comm.doIReceive(&recv_agg[p], w, n, tag_exc_agg);
859 req[1] = comm.doIReceive(&recv_dof[p], w, n, tag_exc_dof);
860 req[2] = comm.doIReceive(&recv_row[null_cols * p], null_cols * w, n, tag_exc_row);
863 for (
int i = 0; i < snbr; ++i) {
866 int w = send_ptr[i + 1] - p;
868 MessagePassing::Request* req = &send_req[3 * i];
870 req[0] = comm.doISend(&send_agg[p], w, n, tag_exc_agg);
871 req[1] = comm.doISend(&send_dof[p], w, n, tag_exc_dof);
872 req[2] = comm.doISend(&send_row[null_cols * p], null_cols * w, n, tag_exc_row);
875 ARCCORE_ALINA_TIC(
"MPI Wait");
876 comm.waitAll(recv_req);
877 comm.waitAll(send_req);
878 ARCCORE_ALINA_TOC(
"MPI Wait");
883 std::vector<std::tuple<ptrdiff_t, ptrdiff_t, double*, value_type*>> order;
884 order.reserve(loc_dofs + recv_dofs);
885 for (ptrdiff_t i = 0; i < n; ++i) {
889 if (s == DistributedPMISAggregation::deleted)
894 order.emplace_back(s / prm.block_size, i + fdom[comm.rank],
895 &prm.nullspace.B[i * null_cols], &P_loc.val[P_loc.ptr[i]]);
897 for (ptrdiff_t i = 0; i < recv_dofs; ++i) {
898 order.emplace_back(recv_agg[i] / prm.block_size, recv_dof[i],
899 &recv_row[i * null_cols],
nullptr);
901 std::sort(order.begin(), order.end());
903 std::vector<ptrdiff_t> aggr_ptr(nba + 1, 0);
904 for (
size_t i = 0; i < order.size(); ++i)
905 ++aggr_ptr[std::get<0>(order[i]) + 1];
906 std::partial_sum(aggr_ptr.begin(), aggr_ptr.end(), aggr_ptr.begin());
910 std::vector<double> Bnew;
911 Bnew.resize(nba * null_cols * null_cols);
914 Alina::detail::QRFactorization<double> qr;
915 std::vector<double> Bpart;
917 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
918 auto aggr_beg = aggr_ptr[i];
919 auto aggr_end = aggr_ptr[i + 1];
920 auto d = aggr_end - aggr_beg;
922 Bpart.resize(d * null_cols);
924 for (ptrdiff_t j = aggr_beg, r = 0; j < aggr_end; ++j, ++r) {
925 auto src = std::get<2>(order[j]);
926 for (
int c = 0; c < null_cols; ++c)
927 Bpart[r + d * c] = src[c];
930 qr.factorize(d, null_cols, &Bpart[0], Alina::detail::col_major);
932 for (ptrdiff_t r = 0, k = i * null_cols * null_cols; r < null_cols; ++r)
933 for (
int c = 0; c < null_cols; ++c, ++k)
934 Bnew[k] = qr.R(r, c);
936 for (ptrdiff_t j = aggr_beg, r = 0; j < aggr_end; ++j, ++r) {
937 auto src = std::get<2>(order[j]);
938 auto dst = std::get<3>(order[j]);
943 for (
int c = 0; c < null_cols; ++c)
944 dst[c] = qr.Q(r, c) * math::identity<value_type>();
947 for (
int c = 0; c < null_cols; ++c)
956 for (
int i = 0; i < snbr; ++i) {
959 int w = send_ptr[i + 1] - p;
960 send_req[i] = comm.doIReceive(&send_row[null_cols * p], null_cols * w, n, tag_exc_row);
963 for (
int i = 0; i < rnbr; ++i) {
966 int w = recv_ptr[i + 1] - p;
967 recv_req[i] = comm.doISend(&recv_row[null_cols * p], null_cols * w, n, tag_exc_row);
972 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
973 ptrdiff_t s = state[i];
974 if (s == DistributedPMISAggregation::deleted)
978 if (d == comm.rank) {
979 auto col = &P_loc.col[P_loc.ptr[i]];
980 for (
int j = 0; j < null_cols; ++j) {
981 col[j] = null_cols * s / prm.block_size + j;
985 auto col = &P_rem.col[P_rem.ptr[i]];
986 for (
int j = 0; j < null_cols; ++j) {
987 col[j] = null_cols * (s + cdom[d]) / prm.block_size + j;
993 ARCCORE_ALINA_TIC(
"MPI Wait");
994 comm.waitAll(send_req);
995 comm.waitAll(recv_req);
996 ARCCORE_ALINA_TOC(
"MPI Wait");
999 for (ptrdiff_t k = 0; k < send_dofs; ++k) {
1000 auto i = send_dof[k] - fdom[comm.rank];
1001 auto src = &send_row[k * null_cols];
1002 auto dst = &P_rem.val[P_rem.ptr[i]];
1004 for (ptrdiff_t j = 0; j < null_cols; ++j) {
1005 dst[j] = src[j] * math::identity<value_type>();
1009 std::swap(prm.nullspace.B, Bnew);
1012 std::vector<ptrdiff_t> dom = comm.exclusive_sum(naggr);
1014 P_loc.set_size(n, naggr,
true);
1015 P_rem.set_size(n, 0,
true);
1018 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1019 if (state[i] == DistributedPMISAggregation::deleted)
1022 if (owner[i] == comm.rank) {
1031 P_loc.set_nonzeros(P_loc.scan_row_sizes());
1032 P_rem.set_nonzeros(P_rem.scan_row_sizes());
1035 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
1036 ptrdiff_t s = state[i];
1037 if (s == DistributedPMISAggregation::deleted)
1041 if (d == comm.rank) {
1042 P_loc.col[P_loc.ptr[i]] = s;
1043 P_loc.val[P_loc.ptr[i]] = math::identity<value_type>();
1046 P_rem.col[P_rem.ptr[i]] = s + dom[d];
1047 P_rem.val[P_rem.ptr[i]] = math::identity<value_type>();
1052 ARCCORE_ALINA_TOC(
"tentative prolongation");
1054 return std::make_shared<matrix>(comm, p_loc, p_rem);
1057 template <
class pw_matrix>
1058 std::shared_ptr<bool_matrix>
1059 expand_conn(
const build_matrix& A,
const pw_matrix& Ap,
const bool_matrix& Cp,
1060 unsigned block_size)
const
1062 ptrdiff_t np = Cp.nbRow();
1063 ptrdiff_t n = np * block_size;
1065 auto c = std::make_shared<bool_matrix>();
1066 bool_matrix& C = *c;
1068 C.set_size(n, n,
true);
1069 C.val.resize(A.nbNonZero());
1072 std::vector<ptrdiff_t> j(block_size);
1073 std::vector<ptrdiff_t> e(block_size);
1075 for (ptrdiff_t ip = begin; ip < (begin + size); ++ip) {
1076 ptrdiff_t ia = ip * block_size;
1078 for (
unsigned k = 0; k < block_size; ++k) {
1079 j[k] = A.ptr[ia + k];
1080 e[k] = A.ptr[ia + k + 1];
1083 for (ptrdiff_t jp = Ap.ptr[ip], ep = Ap.ptr[ip + 1]; jp < ep; ++jp) {
1084 ptrdiff_t cp = Ap.col[jp];
1085 bool sp = Cp.val[jp];
1087 ptrdiff_t col_end = (cp + 1) * block_size;
1089 for (
unsigned k = 0; k < block_size; ++k) {
1090 ptrdiff_t beg = j[k];
1091 ptrdiff_t end = e[k];
1093 while (beg < end && A.col[beg] < col_end) {
1097 ++C.ptr[ia + k + 1];
1106 C.setNbNonZero(C.scan_row_sizes());
1107 C.col.resize(C.nbNonZero());
1110 std::vector<ptrdiff_t> j(block_size);
1111 std::vector<ptrdiff_t> e(block_size);
1112 std::vector<ptrdiff_t> h(block_size);
1114 for (ptrdiff_t ip = begin; ip < (begin + size); ++ip) {
1115 ptrdiff_t ia = ip * block_size;
1117 for (
unsigned k = 0; k < block_size; ++k) {
1118 j[k] = A.ptr[ia + k];
1119 e[k] = A.ptr[ia + k + 1];
1120 h[k] = C.ptr[ia + k];
1123 for (ptrdiff_t jp = Ap.ptr[ip], ep = Ap.ptr[ip + 1]; jp < ep; ++jp) {
1124 ptrdiff_t cp = Ap.col[jp];
1125 bool sp = Cp.val[jp];
1127 ptrdiff_t col_end = (cp + 1) * block_size;
1129 for (
unsigned k = 0; k < block_size; ++k) {
1130 ptrdiff_t beg = j[k];
1131 ptrdiff_t end = e[k];
1132 ptrdiff_t hed = h[k];
1134 while (beg < end && A.col[beg] < col_end) {
1136 C.col[hed++] = A.col[beg];
1152 static const int undone = -2;
1153 static const int deleted = -1;
1155 static const int tag_exc_agg = 4011;
1156 static const int tag_exc_dof = 4012;
1157 static const int tag_exc_row = 4013;