Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
MatrixPartitionUtils.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* MatrixPartitionUtils.h (C) 2000-2026 */
9/* */
10/* Utils for matrix repartitioning. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_MATRIXPARTITIONUTILS_H
13#define ARCCORE_ALINA_MATRIXPARTITIONUTILS_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26#include <vector>
27#include <algorithm>
28#include <numeric>
29
30#include <tuple>
31
32#include "arccore/alina/BackendInterface.h"
33#include "arccore/alina/DistributedMatrix.h"
34
35/*---------------------------------------------------------------------------*/
36/*---------------------------------------------------------------------------*/
37
38namespace Arcane::Alina
39{
40
41/*---------------------------------------------------------------------------*/
42/*---------------------------------------------------------------------------*/
43
44template <class Backend, class Ptr, class Col> void
45mpi_symm_graph(const DistributedMatrix<Backend>& A,
46 std::vector<Ptr>& ptr, std::vector<Col>& col)
47{
48 using build_matrix = Backend::matrix;
49
50 ARCCORE_ALINA_TIC("symm graph");
51
52 build_matrix& A_loc = *A.local();
53 build_matrix& A_rem = *A.remote();
54
55 ptrdiff_t n = A_loc.nbRow();
56 ptrdiff_t row_beg = A.loc_col_shift();
57
58 auto T = transpose(A);
59
60 build_matrix& T_loc = *T->local();
61 build_matrix& T_rem = *T->remote();
62
63 // Build symmetric graph
64 ptr.resize(n + 1, 0);
65
66 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
67 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
68 using Alina::detail::sort_row;
69
70 ptrdiff_t A_loc_beg = A_loc.ptr[i];
71 ptrdiff_t A_loc_end = A_loc.ptr[i + 1];
72
73 ptrdiff_t A_rem_beg = A_rem.ptr[i];
74 ptrdiff_t A_rem_end = A_rem.ptr[i + 1];
75
76 ptrdiff_t T_loc_beg = T_loc.ptr[i];
77 ptrdiff_t T_loc_end = T_loc.ptr[i + 1];
78
79 ptrdiff_t T_rem_beg = T_rem.ptr[i];
80 ptrdiff_t T_rem_end = T_rem.ptr[i + 1];
81
82 sort_row(A_loc.col + A_loc_beg, A_loc.val + A_loc_beg, A_loc_end - A_loc_beg);
83 sort_row(A_rem.col + A_rem_beg, A_rem.val + A_rem_beg, A_rem_end - A_rem_beg);
84
85 sort_row(T_loc.col + T_loc_beg, T_loc.val + T_loc_beg, T_loc_end - T_loc_beg);
86 sort_row(T_rem.col + T_rem_beg, T_rem.val + T_rem_beg, T_rem_end - T_rem_beg);
87
88 Ptr row_width = 0;
89
90 for (ptrdiff_t ja = A_loc_beg, jt = T_loc_beg; ja < A_loc_end || jt < T_loc_end;) {
91 ptrdiff_t c;
92 if (ja == A_loc_end) {
93 c = T_loc.col[jt];
94 ++jt;
95 }
96 else if (jt == T_loc_end) {
97 c = A_loc.col[ja];
98 ++ja;
99 }
100 else {
101 ptrdiff_t ca = A_loc.col[ja];
102 ptrdiff_t ct = T_loc.col[jt];
103 if (ca < ct) {
104 c = ca;
105 ++ja;
106 }
107 else if (ca == ct) {
108 c = ca;
109 ++ja;
110 ++jt;
111 }
112 else {
113 c = ct;
114 ++jt;
115 }
116 }
117
118 if (c != i)
119 ++row_width;
120 }
121
122 for (ptrdiff_t ja = A_rem_beg, jt = T_rem_beg; ja < A_rem_end || jt < T_rem_end;) {
123 if (ja == A_rem_end) {
124 ++jt;
125 }
126 else if (jt == T_rem_end) {
127 ++ja;
128 }
129 else {
130 ptrdiff_t ca = A_rem.col[ja];
131 ptrdiff_t ct = T_rem.col[jt];
132 if (ca < ct) {
133 ++ja;
134 }
135 else if (ca == ct) {
136 ++ja;
137 ++jt;
138 }
139 else {
140 ++jt;
141 }
142 }
143
144 ++row_width;
145 }
146
147 ptr[i + 1] = row_width;
148 }
149 });
150
151 std::partial_sum(ptr.begin(), ptr.end(), ptr.begin());
152
153 col.resize(ptr.back());
154 if (col.empty())
155 col.reserve(1); // So that col.data() is not NULL
156
157 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
158 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
159 ptrdiff_t A_loc_beg = A_loc.ptr[i];
160 ptrdiff_t A_loc_end = A_loc.ptr[i + 1];
161
162 ptrdiff_t A_rem_beg = A_rem.ptr[i];
163 ptrdiff_t A_rem_end = A_rem.ptr[i + 1];
164
165 ptrdiff_t T_loc_beg = T_loc.ptr[i];
166 ptrdiff_t T_loc_end = T_loc.ptr[i + 1];
167
168 ptrdiff_t T_rem_beg = T_rem.ptr[i];
169 ptrdiff_t T_rem_end = T_rem.ptr[i + 1];
170
171 Ptr head = ptr[i];
172
173 for (ptrdiff_t ja = A_loc_beg, jt = T_loc_beg; ja < A_loc_end || jt < T_loc_end;) {
174 ptrdiff_t c;
175 if (ja == A_loc_end) {
176 c = T_loc.col[jt];
177 ++jt;
178 }
179 else if (jt == T_loc_end) {
180 c = A_loc.col[ja];
181 ++ja;
182 }
183 else {
184 ptrdiff_t ca = A_loc.col[ja];
185 ptrdiff_t ct = T_loc.col[jt];
186
187 if (ca < ct) {
188 c = ca;
189 ++ja;
190 }
191 else if (ca == ct) {
192 c = ca;
193 ++ja;
194 ++jt;
195 }
196 else {
197 c = ct;
198 ++jt;
199 }
200 }
201 if (c != i)
202 col[head++] = c + row_beg;
203 }
204
205 for (ptrdiff_t ja = A_rem_beg, jt = T_rem_beg; ja < A_rem_end || jt < T_rem_end;) {
206 if (ja == A_rem_end) {
207 col[head] = T_rem.col[jt];
208 ++jt;
209 }
210 else if (jt == T_rem_end) {
211 col[head] = A_rem.col[ja];
212 ++ja;
213 }
214 else {
215 ptrdiff_t ca = A_rem.col[ja];
216 ptrdiff_t ct = T_rem.col[jt];
217
218 if (ca < ct) {
219 col[head] = ca;
220 ++ja;
221 }
222 else if (ca == ct) {
223 col[head] = ca;
224 ++ja;
225 ++jt;
226 }
227 else {
228 col[head] = ct;
229 ++jt;
230 }
231 }
232 ++head;
233 }
234 }
235 });
236
237 ARCCORE_ALINA_TOC("symm graph");
238}
239
240/*---------------------------------------------------------------------------*/
241/*---------------------------------------------------------------------------*/
242
243template <class Idx> std::tuple<ptrdiff_t, ptrdiff_t>
244mpi_graph_perm_index(mpi_communicator comm, int npart, const std::vector<Idx>& part,
245 std::vector<ptrdiff_t>& perm)
246{
247 ARCCORE_ALINA_TIC("perm index");
248 ptrdiff_t n = part.size();
249 perm.resize(n);
250
251 std::vector<ptrdiff_t> loc_part_cnt(npart, 0);
252 std::vector<ptrdiff_t> loc_part_beg(npart, 0);
253 std::vector<ptrdiff_t> glo_part_cnt(npart);
254 std::vector<ptrdiff_t> glo_part_beg(npart + 1);
255
256 for (Idx p : part)
257 ++loc_part_cnt[p];
258
259 MPI_Exscan(&loc_part_cnt[0], &loc_part_beg[0], npart, mpi_datatype<ptrdiff_t>(), MPI_SUM, comm);
260 MPI_Allreduce(&loc_part_cnt[0], &glo_part_cnt[0], npart, mpi_datatype<ptrdiff_t>(), MPI_SUM, comm);
261
262 glo_part_beg[0] = 0;
263 std::partial_sum(glo_part_cnt.begin(), glo_part_cnt.end(), glo_part_beg.begin() + 1);
264
265 std::vector<ptrdiff_t> cnt(npart, 0);
266 for (ptrdiff_t i = 0; i < n; ++i) {
267 Idx p = part[i];
268 perm[i] = glo_part_beg[p] + loc_part_beg[p] + cnt[p]++;
269 }
270
271 ARCCORE_ALINA_TOC("perm index");
272 return std::make_tuple(
273 glo_part_beg[std::min(npart, comm.rank)],
274 glo_part_beg[std::min(npart, comm.rank + 1)]);
275}
276
277/*---------------------------------------------------------------------------*/
278/*---------------------------------------------------------------------------*/
279
280template <class Backend, class Idx>
281std::shared_ptr<DistributedMatrix<Backend>>
282mpi_graph_perm_matrix(mpi_communicator comm, ptrdiff_t col_beg, ptrdiff_t col_end,
283 const std::vector<Idx>& perm)
284{
285 typedef typename Backend::value_type value_type;
286 using build_matrix = Backend::matrix;
287
288 ARCCORE_ALINA_TIC("perm matrix");
289
290 ptrdiff_t n = perm.size();
291 ptrdiff_t ncols = col_end - col_beg;
292
293 auto i_loc = std::make_shared<build_matrix>();
294 auto i_rem = std::make_shared<build_matrix>();
295
296 build_matrix& I_loc = *i_loc;
297 build_matrix& I_rem = *i_rem;
298
299 I_loc.set_size(n, ncols, false);
300 I_rem.set_size(n, 0, false);
301
302 I_loc.ptr[0] = 0;
303 I_rem.ptr[0] = 0;
304
305 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
306 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
307 ptrdiff_t j = perm[i];
308
309 if (col_beg <= j && j < col_end) {
310 I_loc.ptr[i + 1] = 1;
311 I_rem.ptr[i + 1] = 0;
312 }
313 else {
314 I_loc.ptr[i + 1] = 0;
315 I_rem.ptr[i + 1] = 1;
316 }
317 }
318 });
319
320 I_loc.set_nonzeros(I_loc.scan_row_sizes());
321 I_rem.set_nonzeros(I_rem.scan_row_sizes());
322
323 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
324 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
325 ptrdiff_t j = perm[i];
326
327 if (col_beg <= j && j < col_end) {
328 ptrdiff_t k = I_loc.ptr[i];
329 I_loc.col[k] = j - col_beg;
330 I_loc.val[k] = math::identity<value_type>();
331 }
332 else {
333 ptrdiff_t k = I_rem.ptr[i];
334 I_rem.col[k] = j;
335 I_rem.val[k] = math::identity<value_type>();
336 }
337 }
338 });
339
340 ARCCORE_ALINA_TOC("perm matrix");
341 return std::make_shared<DistributedMatrix<Backend>>(comm, i_loc, i_rem);
342}
343
344/*---------------------------------------------------------------------------*/
345/*---------------------------------------------------------------------------*/
346
347} // namespace Arcane::Alina
348
349/*---------------------------------------------------------------------------*/
350/*---------------------------------------------------------------------------*/
351
352#endif
Distributed Matrix using message passing.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
Convenience wrapper around MPI_Comm.