Arcane  4.1.11.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
BuiltinBackend.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* BuiltinBackend.h (C) 2000-2026 */
9/* */
10/* Builtin backend using CSR matrix. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_BUILTINBACKEND_H
13#define ARCCORE_ALINA_BUILTINBACKEND_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26//#pragma GCC diagnostic ignored "-Wconversion"
27
28/*---------------------------------------------------------------------------*/
29/*---------------------------------------------------------------------------*/
30
31#include "arccore/alina/CSRMatrixOperations.h"
32#include "arccore/alina/SkylineLUSolver.h"
33#include "arccore/alina/MatrixOperationsImpl.h"
34
35#include "arccore/base/ConcurrencyBase.h"
36#include "arccore/common/SmallArray.h"
37
38#include <numeric>
39#include <random>
40
41/*---------------------------------------------------------------------------*/
42/*---------------------------------------------------------------------------*/
43
44namespace Arcane::Alina
45{
46
47/*---------------------------------------------------------------------------*/
48/*---------------------------------------------------------------------------*/
56template <typename ValueType,
57 typename ColumnType = AlinaDefaultColumnType,
58 typename PointerType = AlinaDefaultRowIndexType>
60{
61 typedef ValueType value_type;
62 typedef ColumnType index_type;
63 typedef ColumnType col_type;
64 typedef PointerType ptr_type;
65
66 typedef typename math::rhs_of<value_type>::type rhs_type;
67
68 struct provides_row_iterator : std::true_type
69 {};
70
72 typedef numa_vector<rhs_type> vector;
73 typedef numa_vector<value_type> matrix_diagonal;
74 typedef solver::SkylineLUSolver<value_type> direct_solver;
75
78
79 static std::string name() { return "builtin"; }
80
81 // Copy matrix. This is a noop for builtin backend.
82 static std::shared_ptr<matrix>
83 copy_matrix(std::shared_ptr<matrix> A, const params&)
84 {
85 return A;
86 }
87
88 // Copy vector to builtin backend.
89 template <class T>
90 static std::shared_ptr<numa_vector<T>>
91 copy_vector(const std::vector<T>& x, const params&)
92 {
93 return std::make_shared<numa_vector<T>>(x);
94 }
95
96 // Copy vector to builtin backend. This is a noop for builtin backend.
97 template <class T>
98 static std::shared_ptr<numa_vector<T>>
99 copy_vector(std::shared_ptr<numa_vector<T>> x, const params&)
100 {
101 return x;
102 }
103
104 // Create vector of the specified size.
105 static std::shared_ptr<vector>
106 create_vector(size_t size, const params&)
107 {
108 return std::make_shared<vector>(size);
109 }
110
111 struct gather
112 {
113 std::vector<col_type> I;
114
115 gather(size_t /*size*/, const std::vector<col_type>& I, const params&)
116 : I(I)
117 {}
118
119 template <class InVec, class OutVec>
120 void operator()(const InVec& vec, OutVec& vals) const
121 {
122 for (size_t i = 0; i < I.size(); ++i)
123 vals[i] = vec[I[i]];
124 }
125 };
126
127 struct scatter
128 {
129 std::vector<col_type> I;
130
131 scatter(size_t /*size*/, const std::vector<col_type>& I, const params&)
132 : I(I)
133 {}
134
135 template <class InVec, class OutVec>
136 void operator()(const InVec& vals, OutVec& vec) const
137 {
138 for (size_t i = 0; i < I.size(); ++i)
139 vec[I[i]] = vals[i];
140 }
141 };
142
143 // Create direct solver for coarse level
144 static std::shared_ptr<direct_solver>
145 create_solver(std::shared_ptr<matrix> A, const params&)
146 {
147 return std::make_shared<direct_solver>(*A);
148 }
149};
150
151/*---------------------------------------------------------------------------*/
152/*---------------------------------------------------------------------------*/
153
154} // namespace Arcane::Alina
155
156namespace Arcane::Alina::backend
157{
158
159/*---------------------------------------------------------------------------*/
160/*---------------------------------------------------------------------------*/
161
162template <class T>
163struct is_builtin_vector : std::false_type
164{};
165
166template <class V>
167struct is_builtin_vector<std::vector<V>> : std::is_arithmetic<V>
168{};
169
170template <class V>
171struct is_builtin_vector<SmallSpan<V>> : std::true_type
172{};
173
174template <class V>
175struct is_builtin_vector<Span<V>> : std::true_type
176{};
177
178template <class V>
179struct is_builtin_vector<numa_vector<V>> : std::true_type
180{};
181
182//---------------------------------------------------------------------------
183// Specialization of backend interface
184//---------------------------------------------------------------------------
185template <typename T1, typename T2>
186struct backends_compatible<BuiltinBackend<T1>, BuiltinBackend<T2>> : std::true_type
187{};
188
189template <typename V, typename C, typename P>
190struct rows_impl<CSRMatrix<V, C, P>>
191{
192 static size_t get(const CSRMatrix<V, C, P>& A)
193 {
194 return A.nbRow();
195 }
196};
197
198template <typename V, typename C, typename P>
199struct cols_impl<CSRMatrix<V, C, P>>
200{
201 static size_t get(const CSRMatrix<V, C, P>& A)
202 {
203 return A.ncols;
204 }
205};
206
207template <class Vec>
208struct bytes_impl<Vec, typename std::enable_if<is_builtin_vector<Vec>::value>::type>
209{
210 static size_t get(const Vec& x)
211 {
212 typedef typename backend::value_type<Vec>::type V;
213 return x.size() * sizeof(V);
214 }
215};
216
217template <typename V, typename C, typename P>
218struct ptr_data_impl<CSRMatrix<V, C, P>>
219{
220 typedef const P* type;
221 static type get(const CSRMatrix<V, C, P>& A)
222 {
223 return &A.ptr[0];
224 }
225};
226
227template <typename V, typename C, typename P>
228struct col_data_impl<CSRMatrix<V, C, P>>
229{
230 typedef const C* type;
231 static type get(const CSRMatrix<V, C, P>& A)
232 {
233 return &A.col[0];
234 }
235};
236
237template <typename V, typename C, typename P>
238struct val_data_impl<CSRMatrix<V, C, P>>
239{
240 typedef const V* type;
241 static type get(const CSRMatrix<V, C, P>& A)
242 {
243 return &A.val[0];
244 }
245};
246
247template <typename V, typename C, typename P>
248struct nonzeros_impl<CSRMatrix<V, C, P>>
249{
250 static size_t get(const CSRMatrix<V, C, P>& A)
251 {
252 return A.nbRow() == 0 ? 0 : A.ptr[A.nbRow()];
253 }
254};
255
256template <typename V, typename C, typename P>
258{
259 static size_t get(const CSRMatrix<V, C, P>& A, size_t row)
260 {
261 return A.ptr[row + 1] - A.ptr[row];
262 }
263};
264
265template <class Vec>
266struct clear_impl<Vec, typename std::enable_if<is_builtin_vector<Vec>::value>::type>
267{
268 static void apply(Vec& x)
269 {
270 typedef typename backend::value_type<Vec>::type V;
271
272 const size_t n = x.size();
273 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
274 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
275 x[i] = math::zero<V>();
276 }
277 });
278 }
279};
280
281/*---------------------------------------------------------------------------*/
282/*---------------------------------------------------------------------------*/
283
284template <class Vec1, class Vec2>
285struct inner_product_impl<Vec1, Vec2,
286 typename std::enable_if<
287 is_builtin_vector<Vec1>::value &&
288 is_builtin_vector<Vec2>::value>::type>
289{
290 typedef typename value_type<Vec1>::type V;
291
292 typedef typename math::inner_product_impl<V>::return_type return_type;
293
294 static return_type get(const Vec1& x, const Vec2& y)
295 {
297 return parallel(x, y);
298 }
299 else {
300 return serial(x, y);
301 }
302 }
303
304 static return_type serial(const Vec1& x, const Vec2& y)
305 {
306 const size_t n = x.size();
307
308 return_type s = math::zero<return_type>();
309 return_type c = math::zero<return_type>();
310
311 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
312 return_type d = math::inner_product(x[i], y[i]) - c;
313 return_type t = s + d;
314 c = (t - s) - d;
315 s = t;
316 }
317
318 return s;
319 }
320
321 static return_type parallel(const Vec1& x, const Vec2& y)
322 {
323 const size_t n = x.size();
324 // TODO: Use padding to avoir sharing cache line between threads
327 const return_type zero = math::zero<return_type>();
328 sum_array.resize(nb_thread, zero);
329 SmallSpan<return_type> sum = sum_array.view();
330 for (Int32 i = 0; i < nb_thread; ++i)
331 sum[i] = zero;
332 // NOTE GG: NOT reproducible
333
334 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
335 const int tid = TaskFactory::currentTaskThreadIndex();
336 return_type s = zero;
337 return_type c = zero;
338 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
339 return_type d = math::inner_product(x[i], y[i]) - c;
340 return_type t = s + d;
341 c = (t - s) - d;
342 s = t;
343 }
344
345 sum[tid] += s;
346 });
347 return_type total = zero;
348 for (Int32 i = 0; i < nb_thread; ++i) {
349 total += sum[i];
350 }
351 return total;
352 }
353};
354
355/*---------------------------------------------------------------------------*/
356/*---------------------------------------------------------------------------*/
357
358template <class A, class Vec1, class B, class Vec2>
359struct axpby_impl<A, Vec1, B, Vec2, typename std::enable_if<is_builtin_vector<Vec1>::value && is_builtin_vector<Vec2>::value>::type>
360{
361 static void apply(A a, const Vec1& x, B b, Vec2& y)
362 {
363 const size_t n = x.size();
364 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
365 if (!math::is_zero(b)) {
366 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
367 y[i] = a * x[i] + b * y[i];
368 }
369 }
370 else {
371 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
372 y[i] = a * x[i];
373 }
374 }
375 });
376 }
377};
378
379/*---------------------------------------------------------------------------*/
380/*---------------------------------------------------------------------------*/
381
382template <class A, class Vec1, class B, class Vec2, class C, class Vec3>
383struct axpbypcz_impl<A, Vec1, B, Vec2, C, Vec3,
384 typename std::enable_if<
385 is_builtin_vector<Vec1>::value &&
386 is_builtin_vector<Vec2>::value &&
387 is_builtin_vector<Vec3>::value>::type>
388{
389 static void apply(A a, const Vec1& x, B b, const Vec2& y, C c, Vec3& z)
390 {
391 const size_t n = x.size();
392 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
393 if (!math::is_zero(c)) {
394 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
395 z[i] = a * x[i] + b * y[i] + c * z[i];
396 }
397 }
398 else {
399 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
400 z[i] = a * x[i] + b * y[i];
401 }
402 }
403 });
404 }
405};
406
407/*---------------------------------------------------------------------------*/
408/*---------------------------------------------------------------------------*/
409
410template <class Alpha, class Vec1, class Vec2, class Beta, class Vec3>
411struct vmul_impl<Alpha, Vec1, Vec2, Beta, Vec3,
412 typename std::enable_if<
413 is_builtin_vector<Vec1>::value &&
414 is_builtin_vector<Vec2>::value &&
415 is_builtin_vector<Vec3>::value &&
416 math::static_rows<typename value_type<Vec1>::type>::value == math::static_rows<typename value_type<Vec2>::type>::value &&
417 math::static_rows<typename value_type<Vec1>::type>::value == math::static_rows<typename value_type<Vec3>::type>::value>::type>
418{
419 static void apply(Alpha a, const Vec1& x, const Vec2& y, Beta b, Vec3& z)
420 {
421 const size_t n = x.size();
422 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
423 if (!math::is_zero(b)) {
424 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
425 z[i] = a * x[i] * y[i] + b * z[i];
426 }
427 }
428 else {
429 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
430 z[i] = a * x[i] * y[i];
431 }
432 }
433 });
434 }
435};
436
437/*---------------------------------------------------------------------------*/
438/*---------------------------------------------------------------------------*/
439
440// Support for mixed scalar/nonscalar types
441template <class Alpha, class Vec1, class Vec2, class Beta, class Vec3>
442struct vmul_impl<Alpha, Vec1, Vec2, Beta, Vec3,
443 typename std::enable_if<is_builtin_vector<Vec1>::value &&
444 is_builtin_vector<Vec2>::value &&
445 is_builtin_vector<Vec3>::value &&
446 (math::static_rows<typename value_type<Vec1>::type>::value != math::static_rows<typename value_type<Vec2>::type>::value ||
447 math::static_rows<typename value_type<Vec1>::type>::value != math::static_rows<typename value_type<Vec3>::type>::value)>::type>
448{
449 static void apply(Alpha a, const Vec1& x, const Vec2& y, Beta b, Vec3& z)
450 {
451 typedef typename value_type<Vec1>::type M_type;
452 auto Y = backend::reinterpret_as_rhs<M_type>(y);
453 auto Z = backend::reinterpret_as_rhs<M_type>(z);
454
455 const size_t n = x.size();
456
457 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
458 if (!math::is_zero(b)) {
459 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
460 Z[i] = a * x[i] * Y[i] + b * Z[i];
461 }
462 }
463 else {
464 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
465 Z[i] = a * x[i] * Y[i];
466 }
467 }
468 });
469 }
470};
471
472/*---------------------------------------------------------------------------*/
473/*---------------------------------------------------------------------------*/
474
475template <class Vec1, class Vec2>
476struct copy_impl<Vec1, Vec2,
477 typename std::enable_if<
478 is_builtin_vector<Vec1>::value &&
479 is_builtin_vector<Vec2>::value>::type>
480{
481 static void apply(const Vec1& x, Vec2& y)
482 {
483 const size_t n = x.size();
484 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
485 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
486 y[i] = x[i];
487 }
488 });
489 }
490};
491
492template <class MatrixValue, class Vector, bool IsConst>
493struct reinterpret_as_rhs_impl<MatrixValue, Vector, IsConst,
494 typename std::enable_if<is_builtin_vector<Vector>::value>::type>
495{
496 typedef typename backend::value_type<Vector>::type src_type;
497 typedef typename math::scalar_of<src_type>::type scalar_type;
498 typedef typename math::rhs_of<MatrixValue>::type rhs_type;
499 typedef typename math::replace_scalar<rhs_type, scalar_type>::type dst_type;
500 typedef typename std::conditional<IsConst, const dst_type*, dst_type*>::type ptr_type;
501 typedef typename std::conditional<IsConst, const dst_type, dst_type>::type return_value_type;
502 typedef Span<return_value_type> return_type;
503
504 template <class V>
505 static return_type get(V&& x)
506 {
507 auto ptr = reinterpret_cast<ptr_type>(&x[0]);
508 const size_t n = x.size() * sizeof(src_type) / sizeof(dst_type);
509 return Span<return_value_type>(ptr, n);
510 }
511};
512
513/*---------------------------------------------------------------------------*/
514/*---------------------------------------------------------------------------*/
515
516namespace detail
517{
518
519 template <typename V, typename C, typename P>
521 : std::true_type
522 {};
523
524} // namespace detail
525
526} // namespace Arcane::Alina::backend
527
528namespace Arcane::Alina::detail
529{
530
531// Backend with scalar value_type of highest precision.
532template <class V1, class V2>
534 typename std::enable_if<math::static_rows<V1>::value != 1 || math::static_rows<V2>::value != 1>::type>
535{
536 typedef typename math::scalar_of<V1>::type S1;
537 typedef typename math::scalar_of<V2>::type S2;
538
539 typedef typename std::conditional<(sizeof(S1) > sizeof(S2)), BuiltinBackend<S1>, BuiltinBackend<S2>>::type type;
540};
541
542} // namespace Arcane::Alina::detail
543
544#endif
Class to handle empty parameters list.
Definition AlinaUtils.h:90
NUMA-aware vector container.
Definition NumaVector.h:42
Direct solver that uses Skyline LU factorization.
void resize(Int64 s)
Change le nombre d'éléments du tableau à s.
ArrayView< T > view() const
Vue mutable sur ce tableau.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
Informations d'exécution d'une boucle.
Tableau 1D de données avec buffer pré-alloué sur la pile.
Vue d'un tableau d'éléments de type T.
Definition Span.h:801
constexpr __host__ __device__ SizeType size() const noexcept
Retourne la taille du tableau.
Definition Span.h:325
Vue d'un tableau d'éléments de type T.
Definition Span.h:633
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
Vector class, to be used by user.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
Alina::detail::empty_params params
Sparse matrix stored in CSR (Compressed Sparse Row) format.
Definition CSRMatrix.h:98
Implementation for linear combination of two vectors.
Implementation for linear combination of three vectors.
Metafunction that checks if two backends are compatible.
Implementation for function returning number of bytes allocated for a matrix/vector.
Implementation for zeroing out a vector.
Implementation for function returning the number of columns in a matrix.
Implementation for vector copy.
Implementation for inner product.
Implementation for function returning the number of nonzeros in a matrix.
Reinterpret the vector to be compatible with the matrix value type.
Implementation for function returning the number of nonzeros in a matrix row.
Implementation for function returning the number of rows in a matrix.
Implementation for element-wize vector product.