Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
CSRMatrixOperations.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* CSRMatrixOperations.h (C) 2000-2026 */
9/* */
10/* Operations on CSRMatrix. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_CSRMATRIXOPERATIONS_H
13#define ARCCORE_ALINA_CSRMATRIXOPERATIONS_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26#include "arccore/alina/NumaVector.h"
27#include "arccore/alina/ValueTypeInterface.h"
28#include "arccore/alina/CSRMatrix.h"
29#include "arccore/alina/BackendInterface.h"
30#include "arccore/alina/SparseMatrixMatrixProduct.h"
31
32#include "arccore/accelerator/Atomic.h"
33
34#include <random>
35
36/*---------------------------------------------------------------------------*/
37/*---------------------------------------------------------------------------*/
38
39namespace Arcane::Alina
40{
41
42/*---------------------------------------------------------------------------*/
43/*---------------------------------------------------------------------------*/
44
46template <typename V, typename C, typename P>
47void sort_rows(CSRMatrix<V, C, P>& A)
48{
49 const size_t n = A.nbRow();
50
51 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
52 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
53 P beg = A.ptr[i];
54 P end = A.ptr[i + 1];
55 detail::sort_row(A.col + beg, A.val + beg, end - beg);
56 }
57 });
58}
59
60/*---------------------------------------------------------------------------*/
61/*---------------------------------------------------------------------------*/
62
64template <typename V, typename C, typename P>
65std::shared_ptr<CSRMatrix<V, C, P>>
66transpose(const CSRMatrix<V, C, P>& A)
67{
68 const size_t n = A.nbRow();
69 const size_t m = A.ncols;
70 const size_t nnz = A.nbNonZero();
71
72 auto T = std::make_shared<CSRMatrix<V, C, P>>();
73 T->set_size(m, n, true);
74
75 for (size_t j = 0; j < nnz; ++j)
76 ++(T->ptr[A.col[j] + 1]);
77
78 T->scan_row_sizes();
79 T->set_nonzeros();
80
81 for (size_t i = 0; i < n; i++) {
82 for (P j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
83 P head = T->ptr[A.col[j]]++;
84
85 T->col[head] = static_cast<C>(i);
86 T->val[head] = math::adjoint(A.val[j]);
87 }
88 }
89
90 std::rotate(T->ptr.data(), T->ptr.data() + m, T->ptr.data() + m + 1);
91 T->ptr[0] = 0;
92
93 return T;
94}
95
96/*---------------------------------------------------------------------------*/
97/*---------------------------------------------------------------------------*/
98
100template <class Val, class Col, class Ptr>
101std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
102product(const CSRMatrix<Val, Col, Ptr>& A, const CSRMatrix<Val, Col, Ptr>& B, bool sort = false)
103{
104 auto C = std::make_shared<CSRMatrix<Val, Col, Ptr>>();
105
106 const int max_nb_threads = ConcurrencyBase::maxAllowedThread();
107
108 // TODO: find a way to configure this value for testing purpose.
109 if (max_nb_threads >= 16) {
110 spgemm_rmerge(A, B, *C);
111 }
112 else {
113 spgemm_saad(A, B, *C, sort);
114 }
115
116 return C;
117}
118
119/*---------------------------------------------------------------------------*/
120/*---------------------------------------------------------------------------*/
121
123template <class Val, class Col, class Ptr>
124std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
125sum(Val alpha, const CSRMatrix<Val, Col, Ptr>& A, Val beta,
126 const CSRMatrix<Val, Col, Ptr>& B, bool sort = false)
127{
128 typedef ptrdiff_t Idx;
129
130 auto C = std::make_shared<CSRMatrix<Val, Col, Ptr>>();
131 precondition(A.nbRow() == B.nbRow() && A.ncols == B.ncols, "matrices should have same shape!");
132 C->set_size(A.nbRow(), A.ncols);
133
134 C->ptr[0] = 0;
135 Int32 nb_row = static_cast<Idx>(C->nbRow());
136 arccoreParallelFor(0, nb_row, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
137 std::vector<ptrdiff_t> marker(C->ncols, -1);
138 for (Idx i = begin; i < (begin + size); ++i) {
139 Idx C_cols = 0;
140
141 for (Idx j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
142 Idx c = A.col[j];
143
144 if (marker[c] != i) {
145 marker[c] = i;
146 ++C_cols;
147 }
148 }
149
150 for (Idx j = B.ptr[i], e = B.ptr[i + 1]; j < e; ++j) {
151 Idx c = B.col[j];
152
153 if (marker[c] != i) {
154 marker[c] = i;
155 ++C_cols;
156 }
157 }
158
159 C->ptr[i + 1] = C_cols;
160 }
161 });
162
163 C->set_nonzeros(C->scan_row_sizes());
164
165 arccoreParallelFor(0, nb_row, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
166 std::vector<ptrdiff_t> marker(C->ncols, -1);
167 for (Idx i = begin; i < (begin + size); ++i) {
168 Idx row_beg = C->ptr[i];
169 Idx row_end = row_beg;
170
171 for (Idx j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
172 Idx c = A.col[j];
173 Val v = alpha * A.val[j];
174
175 if (marker[c] < row_beg) {
176 marker[c] = row_end;
177 C->col[row_end] = c;
178 C->val[row_end] = v;
179 ++row_end;
180 }
181 else {
182 C->val[marker[c]] += v;
183 }
184 }
185
186 for (Idx j = B.ptr[i], e = B.ptr[i + 1]; j < e; ++j) {
187 Idx c = B.col[j];
188 Val v = beta * B.val[j];
189
190 if (marker[c] < row_beg) {
191 marker[c] = row_end;
192 C->col[row_end] = c;
193 C->val[row_end] = v;
194 ++row_end;
195 }
196 else {
197 C->val[marker[c]] += v;
198 }
199 }
200
201 if (sort)
202 Alina::detail::sort_row(C->col + row_beg, C->val + row_beg, row_end - row_beg);
203 }
204 });
205
206 return C;
207}
208
209/*---------------------------------------------------------------------------*/
210/*---------------------------------------------------------------------------*/
211
213template <class Val, class Col, class Ptr, class T> void
214scale(CSRMatrix<Val, Col, Ptr>& A, T s)
215{
216 const ptrdiff_t nb_row = backend::nbRow(A);
217
218 arccoreParallelFor(0, nb_row, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
219 for (Int32 i = begin; i < (begin + size); ++i) {
220 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
221 A.val[j] *= s;
222 }
223 });
224}
225
226/*---------------------------------------------------------------------------*/
227/*---------------------------------------------------------------------------*/
228
229// Reduce matrix to a pointwise one
230template <class value_type, class col_type, class ptr_type>
231std::shared_ptr<CSRMatrix<typename math::scalar_of<value_type>::type, col_type, ptr_type>>
232pointwise_matrix(const CSRMatrix<value_type, col_type, ptr_type>& A, unsigned block_size)
233{
234 typedef value_type V;
235 typedef typename math::scalar_of<V>::type S;
236
237 ARCCORE_ALINA_TIC("pointwise_matrix");
238 const ptrdiff_t n = A.nbRow();
239 const ptrdiff_t m = A.ncols;
240 const ptrdiff_t np = n / block_size;
241 const ptrdiff_t mp = m / block_size;
242
243 precondition(np * block_size == n,
244 "Matrix size should be divisible by block_size");
245
246 auto ap = std::make_shared<CSRMatrix<S, col_type, ptr_type>>();
247 auto& Ap = *ap;
248
249 Ap.set_size(np, mp, true);
250
251 arccoreParallelFor(0, np, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
252 std::vector<ptr_type> j(block_size);
253 std::vector<ptr_type> e(block_size);
254 for (Int32 ip = begin; ip < (begin + size); ++ip) {
255 ptrdiff_t ia = ip * block_size;
256 col_type cur_col = 0;
257 bool done = true;
258
259 for (unsigned k = 0; k < block_size; ++k) {
260 ptr_type beg = j[k] = A.ptr[ia + k];
261 ptr_type end = e[k] = A.ptr[ia + k + 1];
262
263 if (beg == end)
264 continue;
265
266 col_type c = A.col[beg];
267
268 if (done) {
269 done = false;
270 cur_col = c;
271 }
272 else {
273 cur_col = std::min(cur_col, c);
274 }
275 }
276
277 while (!done) {
278 cur_col /= block_size;
279 ++Ap.ptr[ip + 1];
280
281 done = true;
282 col_type col_end = (cur_col + 1) * block_size;
283 for (unsigned k = 0; k < block_size; ++k) {
284 ptr_type beg = j[k];
285 ptr_type end = e[k];
286
287 while (beg < end) {
288 col_type c = A.col[beg++];
289
290 if (c >= col_end) {
291 if (done) {
292 done = false;
293 cur_col = c;
294 }
295 else {
296 cur_col = std::min(cur_col, c);
297 }
298
299 break;
300 }
301 }
302
303 j[k] = beg;
304 }
305 }
306 }
307 });
308
309 Ap.set_nonzeros(Ap.scan_row_sizes());
310
311 arccoreParallelFor(0, np, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
312 std::vector<ptr_type> j(block_size);
313 std::vector<ptr_type> e(block_size);
314 for (Int32 ip = begin; ip < (begin + size); ++ip) {
315 ptrdiff_t ia = ip * block_size;
316 col_type cur_col = 0;
317 ptr_type head = Ap.ptr[ip];
318 bool done = true;
319
320 for (unsigned k = 0; k < block_size; ++k) {
321 ptr_type beg = j[k] = A.ptr[ia + k];
322 ptr_type end = e[k] = A.ptr[ia + k + 1];
323
324 if (beg == end)
325 continue;
326
327 col_type c = A.col[beg];
328
329 if (done) {
330 done = false;
331 cur_col = c;
332 }
333 else {
334 cur_col = std::min(cur_col, c);
335 }
336 }
337
338 while (!done) {
339 cur_col /= block_size;
340
341 Ap.col[head] = cur_col;
342
343 done = true;
344 bool first = true;
345 S cur_val = math::zero<S>();
346
347 col_type col_end = (cur_col + 1) * block_size;
348 for (unsigned k = 0; k < block_size; ++k) {
349 ptr_type beg = j[k];
350 ptr_type end = e[k];
351
352 while (beg < end) {
353 col_type c = A.col[beg];
354 S v = math::norm(A.val[beg]);
355 ++beg;
356
357 if (c >= col_end) {
358 if (done) {
359 done = false;
360 cur_col = c;
361 }
362 else {
363 cur_col = std::min(cur_col, c);
364 }
365
366 break;
367 }
368
369 if (first) {
370 first = false;
371 cur_val = v;
372 }
373 else {
374 cur_val = std::max(cur_val, v);
375 }
376 }
377
378 j[k] = beg;
379 }
380
381 Ap.val[head++] = cur_val;
382 }
383 }
384 });
385
386 ARCCORE_ALINA_TOC("pointwise_matrix");
387 return ap;
388}
389
390/*---------------------------------------------------------------------------*/
391/*---------------------------------------------------------------------------*/
392
394template <typename V, typename C, typename P> std::shared_ptr<numa_vector<V>>
395diagonal(const CSRMatrix<V, C, P>& A, bool invert = false)
396{
397 const size_t nb_row = A.nbRow();
398 auto dia = std::make_shared<numa_vector<V>>(nb_row, false);
399
400 arccoreParallelFor(0, nb_row, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
401 for (Int32 i = begin; i < (begin + size); ++i) {
402 for (auto a = A.row_begin(i); a; ++a) {
403 if (a.col() == i) {
404 V d = a.value();
405 if (invert) {
406 d = math::is_zero(d) ? math::identity<V>() : math::inverse(d);
407 }
408 (*dia)[i] = d;
409 break;
410 }
411 }
412 }
413 });
414
415 return dia;
416}
417
418/*---------------------------------------------------------------------------*/
419/*---------------------------------------------------------------------------*/
420
421// Estimate spectral radius of the matrix.
422// Use Gershgorin disk theorem when power_iters == 0,
423// Use Power method when power_iters > 0.
424// When scale = true, scale the matrix by its inverse diagonal.
425template <bool scale, class Matrix>
427spectral_radius(const Matrix& A, int power_iters = 0)
428{
429 ARCCORE_ALINA_TIC("spectral radius");
430 typedef typename backend::value_type<Matrix>::type value_type;
431 typedef typename math::rhs_of<value_type>::type rhs_type;
432 typedef typename math::scalar_of<value_type>::type scalar_type;
433
434 const ptrdiff_t n = backend::nbRow(A);
435 scalar_type radius;
436
437 // TODO: CONCURRENCY Use arccore concurrency for spectral_radius
438 if (power_iters <= 0) {
439 // Use Gershgorin disk theorem.
440 radius = 0;
441
442 // NOTE: It is possible to do non blocking (nowait using OpenMP)
443 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
444 scalar_type emax = 0;
445 value_type dia = math::identity<value_type>();
446
447 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
448 scalar_type s = 0;
449
450 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
451 ptrdiff_t c = A.col[j];
452 value_type v = A.val[j];
453
454 s += math::norm(v);
455
456 if (scale && c == i)
457 dia = v;
458 }
459
460 if (scale)
461 s *= math::norm(math::inverse(dia));
462
463 emax = std::max(emax, s);
464 }
465
467 });
468 }
469 else {
470 // Power method.
471 numa_vector<rhs_type> b0(n, false), b1(n, false);
472
473 // Fill the initial vector with random values.
474 // Also extract the inverted matrix diagonal values.
475 std::atomic<scalar_type> atomic_b0_norm = 0;
476 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
477 const int tid = TaskFactory::currentTaskThreadIndex();
478 std::mt19937 rng(tid);
479 std::uniform_real_distribution<scalar_type> rnd(-1, 1);
480
481 scalar_type loc_norm = 0;
482
483 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
484 rhs_type v = math::constant<rhs_type>(rnd(rng));
485
486 b0[i] = v;
487 loc_norm += math::norm(math::inner_product(v, v));
488 }
489
490 // GG: Not reproducible
491 atomic_b0_norm += loc_norm;
492 });
493
494 scalar_type b0_norm = atomic_b0_norm;
495
496 // Normalize b0
497 b0_norm = 1 / sqrt(b0_norm);
498 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
499 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
500 b0[i] = b0_norm * b0[i];
501 }
502 });
503
504 for (int iter = 0; iter < power_iters;) {
505 // b1 = scale ? (D^1 * A) * b0 : A * b0
506 // b1_norm = ||b1||
507 // radius = <b1,b0>
508 std::atomic<scalar_type> atomic_b1_norm = 0;
509 std::atomic<scalar_type> atomic_radius = 0;
510 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
511 scalar_type loc_norm = 0;
512 scalar_type loc_radi = 0;
513 value_type dia = math::identity<value_type>();
514
515 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
516 rhs_type s = math::zero<rhs_type>();
517
518 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
519 ptrdiff_t c = A.col[j];
520 value_type v = A.val[j];
521 if (scale && c == i)
522 dia = v;
523 s += v * b0[c];
524 }
525
526 if (scale)
527 s = math::inverse(dia) * s;
528
529 loc_norm += math::norm(math::inner_product(s, s));
530 loc_radi += math::norm(math::inner_product(s, b0[i]));
531
532 b1[i] = s;
533 }
534
535 {
536 // GG: Not reproducible
537 atomic_b1_norm += loc_norm;
538 atomic_radius += loc_radi;
539 }
540 });
541 scalar_type b1_norm = atomic_b1_norm;
542 radius = atomic_radius;
543
544 if (++iter < power_iters) {
545 // b0 = b1 / b1_norm
546 b1_norm = 1 / sqrt(b1_norm);
547 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
548 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
549 b0[i] = b1_norm * b1[i];
550 }
551 });
552 }
553 }
554 }
555 ARCCORE_ALINA_TOC("spectral radius");
556
557 return radius < 0 ? static_cast<scalar_type>(2) : radius;
558}
559
560/*---------------------------------------------------------------------------*/
561/*---------------------------------------------------------------------------*/
562
563} // namespace Arcane::Alina
564
565/*---------------------------------------------------------------------------*/
566/*---------------------------------------------------------------------------*/
567
568#endif
NUMA-aware vector container.
Definition NumaVector.h:42
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
__host__ __device__ DataType doAtomic(DataType *ptr, ValueType value)
Applique l'opération atomique Operation à la valeur à l'adresse ptr avec la valeur value.
__host__ __device__ double sqrt(double v)
Racine carrée de v.
Definition Math.h:135
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
Sparse matrix stored in CSR (Compressed Sparse Row) format.
Definition CSRMatrix.h:98
Scalar type of a non-scalar type.