12#ifndef ARCCORE_ALINA_CSRMATRIXOPERATIONS_H
13#define ARCCORE_ALINA_CSRMATRIXOPERATIONS_H
26#include "arccore/alina/NumaVector.h"
27#include "arccore/alina/ValueTypeInterface.h"
28#include "arccore/alina/CSRMatrix.h"
29#include "arccore/alina/BackendInterface.h"
30#include "arccore/alina/SparseMatrixMatrixProduct.h"
32#include "arccore/accelerator/Atomic.h"
39namespace Arcane::Alina
46template <
typename V,
typename C,
typename P>
49 const size_t n = A.nbRow();
52 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
55 detail::sort_row(A.col + beg, A.val + beg, end - beg);
64template <
typename V,
typename C,
typename P>
65std::shared_ptr<CSRMatrix<V, C, P>>
68 const size_t n = A.nbRow();
69 const size_t m = A.ncols;
70 const size_t nnz = A.nbNonZero();
72 auto T = std::make_shared<CSRMatrix<V, C, P>>();
73 T->set_size(m, n,
true);
75 for (
size_t j = 0; j < nnz; ++j)
76 ++(T->ptr[A.col[j] + 1]);
81 for (
size_t i = 0; i < n; i++) {
82 for (P j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
83 P head = T->ptr[A.col[j]]++;
85 T->col[head] =
static_cast<C
>(i);
86 T->val[head] = math::adjoint(A.val[j]);
90 std::rotate(T->ptr.data(), T->ptr.data() + m, T->ptr.data() + m + 1);
100template <
class Val,
class Col,
class Ptr>
101std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
104 auto C = std::make_shared<CSRMatrix<Val, Col, Ptr>>();
109 if (max_nb_threads >= 16) {
110 spgemm_rmerge(A, B, *C);
113 spgemm_saad(A, B, *C, sort);
123template <
class Val,
class Col,
class Ptr>
124std::shared_ptr<CSRMatrix<Val, Col, Ptr>>
128 typedef ptrdiff_t Idx;
130 auto C = std::make_shared<CSRMatrix<Val, Col, Ptr>>();
131 precondition(A.nbRow() == B.nbRow() && A.ncols == B.ncols,
"matrices should have same shape!");
132 C->set_size(A.nbRow(), A.ncols);
135 Int32 nb_row =
static_cast<Idx
>(C->nbRow());
137 std::vector<ptrdiff_t> marker(C->ncols, -1);
138 for (Idx i = begin; i < (begin + size); ++i) {
141 for (Idx j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
144 if (marker[c] != i) {
150 for (Idx j = B.ptr[i], e = B.ptr[i + 1]; j < e; ++j) {
153 if (marker[c] != i) {
159 C->ptr[i + 1] = C_cols;
163 C->set_nonzeros(C->scan_row_sizes());
166 std::vector<ptrdiff_t> marker(C->ncols, -1);
167 for (Idx i = begin; i < (begin + size); ++i) {
168 Idx row_beg = C->ptr[i];
169 Idx row_end = row_beg;
171 for (Idx j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
173 Val v = alpha * A.val[j];
175 if (marker[c] < row_beg) {
182 C->val[marker[c]] += v;
186 for (Idx j = B.ptr[i], e = B.ptr[i + 1]; j < e; ++j) {
188 Val v = beta * B.val[j];
190 if (marker[c] < row_beg) {
197 C->val[marker[c]] += v;
202 Alina::detail::sort_row(C->col + row_beg, C->val + row_beg, row_end - row_beg);
213template <
class Val,
class Col,
class Ptr,
class T>
void
216 const ptrdiff_t nb_row = backend::nbRow(A);
219 for (
Int32 i = begin; i < (begin + size); ++i) {
220 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j)
230template <
class value_type,
class col_type,
class ptr_type>
231std::shared_ptr<CSRMatrix<typename math::scalar_of<value_type>::type, col_type, ptr_type>>
234 typedef value_type V;
235 typedef typename math::scalar_of<V>::type S;
237 ARCCORE_ALINA_TIC(
"pointwise_matrix");
238 const ptrdiff_t n = A.nbRow();
239 const ptrdiff_t m = A.ncols;
240 const ptrdiff_t np = n / block_size;
241 const ptrdiff_t mp = m / block_size;
243 precondition(np * block_size == n,
244 "Matrix size should be divisible by block_size");
246 auto ap = std::make_shared<CSRMatrix<S, col_type, ptr_type>>();
249 Ap.set_size(np, mp,
true);
252 std::vector<ptr_type> j(block_size);
253 std::vector<ptr_type> e(block_size);
254 for (
Int32 ip = begin; ip < (begin + size); ++ip) {
255 ptrdiff_t ia = ip * block_size;
256 col_type cur_col = 0;
259 for (
unsigned k = 0; k < block_size; ++k) {
260 ptr_type beg = j[k] = A.ptr[ia + k];
261 ptr_type end = e[k] = A.ptr[ia + k + 1];
266 col_type c = A.col[beg];
273 cur_col = std::min(cur_col, c);
278 cur_col /= block_size;
282 col_type col_end = (cur_col + 1) * block_size;
283 for (
unsigned k = 0; k < block_size; ++k) {
288 col_type c = A.col[beg++];
296 cur_col = std::min(cur_col, c);
309 Ap.set_nonzeros(Ap.scan_row_sizes());
312 std::vector<ptr_type> j(block_size);
313 std::vector<ptr_type> e(block_size);
314 for (
Int32 ip = begin; ip < (begin + size); ++ip) {
315 ptrdiff_t ia = ip * block_size;
316 col_type cur_col = 0;
317 ptr_type head = Ap.ptr[ip];
320 for (
unsigned k = 0; k < block_size; ++k) {
321 ptr_type beg = j[k] = A.ptr[ia + k];
322 ptr_type end = e[k] = A.ptr[ia + k + 1];
327 col_type c = A.col[beg];
334 cur_col = std::min(cur_col, c);
339 cur_col /= block_size;
341 Ap.col[head] = cur_col;
345 S cur_val = math::zero<S>();
347 col_type col_end = (cur_col + 1) * block_size;
348 for (
unsigned k = 0; k < block_size; ++k) {
353 col_type c = A.col[beg];
354 S v = math::norm(A.val[beg]);
363 cur_col = std::min(cur_col, c);
374 cur_val = std::max(cur_val, v);
381 Ap.val[head++] = cur_val;
386 ARCCORE_ALINA_TOC(
"pointwise_matrix");
394template <
typename V,
typename C,
typename P> std::shared_ptr<numa_vector<V>>
397 const size_t nb_row = A.nbRow();
398 auto dia = std::make_shared<numa_vector<V>>(nb_row,
false);
401 for (
Int32 i = begin; i < (begin + size); ++i) {
402 for (
auto a = A.row_begin(i); a; ++a) {
406 d = math::is_zero(d) ? math::identity<V>() : math::inverse(d);
425template <
bool scale,
class Matrix>
427spectral_radius(
const Matrix& A,
int power_iters = 0)
429 ARCCORE_ALINA_TIC(
"spectral radius");
430 typedef typename backend::value_type<Matrix>::type value_type;
431 typedef typename math::rhs_of<value_type>::type rhs_type;
432 typedef typename math::scalar_of<value_type>::type scalar_type;
434 const ptrdiff_t n = backend::nbRow(A);
438 if (power_iters <= 0) {
444 scalar_type emax = 0;
445 value_type dia = math::identity<value_type>();
447 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
450 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
451 ptrdiff_t c = A.col[j];
452 value_type v = A.val[j];
461 s *= math::norm(math::inverse(dia));
463 emax = std::max(emax, s);
475 std::atomic<scalar_type> atomic_b0_norm = 0;
478 std::mt19937 rng(tid);
479 std::uniform_real_distribution<scalar_type> rnd(-1, 1);
481 scalar_type loc_norm = 0;
483 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
484 rhs_type v = math::constant<rhs_type>(rnd(rng));
487 loc_norm += math::norm(math::inner_product(v, v));
491 atomic_b0_norm += loc_norm;
494 scalar_type b0_norm = atomic_b0_norm;
497 b0_norm = 1 /
sqrt(b0_norm);
499 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
500 b0[i] = b0_norm * b0[i];
504 for (
int iter = 0; iter < power_iters;) {
508 std::atomic<scalar_type> atomic_b1_norm = 0;
509 std::atomic<scalar_type> atomic_radius = 0;
511 scalar_type loc_norm = 0;
512 scalar_type loc_radi = 0;
513 value_type dia = math::identity<value_type>();
515 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
516 rhs_type s = math::zero<rhs_type>();
518 for (ptrdiff_t j = A.ptr[i], e = A.ptr[i + 1]; j < e; ++j) {
519 ptrdiff_t c = A.col[j];
520 value_type v = A.val[j];
527 s = math::inverse(dia) * s;
529 loc_norm += math::norm(math::inner_product(s, s));
530 loc_radi += math::norm(math::inner_product(s, b0[i]));
537 atomic_b1_norm += loc_norm;
538 atomic_radius += loc_radi;
541 scalar_type b1_norm = atomic_b1_norm;
542 radius = atomic_radius;
544 if (++iter < power_iters) {
546 b1_norm = 1 /
sqrt(b1_norm);
548 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
549 b0[i] = b1_norm * b1[i];
555 ARCCORE_ALINA_TOC(
"spectral radius");
557 return radius < 0 ? static_cast<scalar_type>(2) : radius;
NUMA-aware vector container.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
__host__ __device__ DataType doAtomic(DataType *ptr, ValueType value)
Applique l'opération atomique Operation à la valeur à l'adresse ptr avec la valeur value.
__host__ __device__ double sqrt(double v)
Racine carrée de v.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
std::int32_t Int32
Type entier signé sur 32 bits.
Sparse matrix stored in CSR (Compressed Sparse Row) format.
Scalar type of a non-scalar type.