41class DistributedDirectSolverBase
45 typedef typename Backend::value_type value_type;
46 typedef typename math::scalar_of<value_type>::type scalar_type;
47 typedef typename math::rhs_of<value_type>::type rhs_type;
49 using col_type = Backend::col_type;
50 using ptr_type = Backend::ptr_type;
53 DistributedDirectSolverBase() {}
60 std::vector<int> domain = comm.exclusive_sum(n);
61 std::vector<int> active;
62 active.reserve(comm.size);
66 for (
int i = 0; i < comm.size; ++i) {
67 if (domain[i + 1] - domain[i] > 0) {
69 active_rank = active.size();
75 int nmasters = std::min<int>(active.size(), solver().comm_size(domain.back()));
76 int slaves_per_master = (active.size() + nmasters - 1) / nmasters;
77 int group_beg = (active_rank / slaves_per_master) * slaves_per_master;
79 group_master = active[group_beg];
83 comm.rank == group_master ? 0 : MPI_UNDEFINED,
84 comm.rank, &masters_comm);
90 std::vector<ptr_type> widths(n);
91 for (ptrdiff_t i = 0; i < n; ++i)
92 widths[i] = Astrip.ptr[i + 1] - Astrip.ptr[i];
94 if (comm.rank == group_master) {
95 int group_end = std::min<int>(group_beg + slaves_per_master, active.size());
97 int group_size = group_end - group_beg;
103 solve_req.resize(group_size);
104 slaves.reserve(group_size);
105 counts.reserve(group_size);
110 for (
int j = group_beg; j < group_end; ++j) {
113 int m = domain[i + 1] - domain[i];
121 A.set_size(nloc, domain.back(),
false);
125 cons_x.resize(A.nbRow());
128 std::copy(widths.begin(), widths.end(), &A.ptr[1]);
130 for (
int j = 0; j < group_size; ++j) {
133 cnt_req[j] = comm.doIReceive(&A.ptr[shift], counts[j], i, cnt_tag);
138 comm.waitAll(cnt_req);
140 A.set_nonzeros(A.scan_row_sizes());
142 std::copy(Astrip.col.data(), Astrip.col.data() + Astrip.nbNonZero(), A.col.data());
143 std::copy(Astrip.val.data(), Astrip.val.data() + Astrip.nbNonZero(), A.val.data());
145 shift = Astrip.nbNonZero();
146 for (
int j = 0, d0 = domain[comm.rank]; j < group_size; ++j) {
149 int nnz = A.ptr[domain[i + 1] - d0] - A.ptr[domain[i] - d0];
151 col_req[j] = comm.doIReceive(A.col + shift, nnz, i, col_tag);
152 val_req[j] = comm.doIReceive(A.val + shift, nnz, i, val_tag);
157 comm.waitAll(col_req);
158 comm.waitAll(val_req);
163 comm.doSend(widths.data(), n, group_master, cnt_tag);
164 comm.doSend(Astrip.col.data(), Astrip.nbNonZero(), group_master, col_tag);
165 comm.doSend(Astrip.val.data(), Astrip.nbNonZero(), group_master, val_tag);
174 const build_matrix& A_loc = *A.local();
175 const build_matrix& A_rem = *A.remote();
179 a.set_size(A.loc_rows(), A.glob_cols(),
false);
180 a.set_nonzeros(A_loc.nbNonZero() + A_rem.nbNonZero());
183 for (
size_t i = 0, head = 0; i < A_loc.nbRow(); ++i) {
184 ptrdiff_t shift = A.loc_col_shift();
186 for (ptrdiff_t j = A_loc.ptr[i], e = A_loc.ptr[i + 1]; j < e; ++j) {
187 a.col[head] = A_loc.col[j] + shift;
188 a.val[head] = A_loc.val[j];
192 for (ptrdiff_t j = A_rem.ptr[i], e = A_rem.ptr[i + 1]; j < e; ++j) {
193 a.col[head] = A_rem.col[j];
194 a.val[head] = A_rem.val[j];
204 virtual ~DistributedDirectSolverBase()
206 if (masters_comm != MPI_COMM_NULL)
207 MPI_Comm_free(&masters_comm);
212 return *
static_cast<Solver*
>(
this);
215 const Solver& solver()
const
217 return *
static_cast<const Solver*
>(
this);
220 template <
class VecF,
class VecX>
221 void operator()(
const VecF& f, VecX& x)
const
226 backend::copy(f, host_v);
228 if (comm.rank == group_master) {
229 std::copy(host_v.begin(), host_v.end(), cons_f.begin());
231 int shift = n, j = 0;
232 for (
int i : slaves) {
233 solve_req[j] = comm.doIReceive(&cons_f[shift], counts[j], i, rhs_tag);
234 shift += counts[j++];
237 comm.waitAll(solve_req);
239 solver().solve(cons_f, cons_x);
241 std::copy(cons_x.begin(), cons_x.begin() + n, host_v.begin());
245 for (
int i : slaves) {
246 solve_req[j] = comm.doISend(&cons_x[shift], counts[j], i, sol_tag);
247 shift += counts[j++];
250 comm.waitAll(solve_req);
253 comm.doSend(host_v.data(), n, group_master, rhs_tag);
254 comm.doReceive(host_v.data(), n, group_master, sol_tag);
257 backend::copy(host_v, x);
262 static const int cnt_tag = 5001;
263 static const int col_tag = 5002;
264 static const int val_tag = 5003;
265 static const int rhs_tag = 5004;
266 static const int sol_tag = 5005;
271 MPI_Comm masters_comm;
272 std::vector<int> slaves;
273 std::vector<int> counts;
274 mutable std::vector<rhs_type> cons_f, cons_x, host_v;