Arcane  v4.1.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
BuiltinBackend.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* BuiltinBackend.h (C) 2000-2026 */
9/* */
10/* Builtin backend using CSR matrix. */
11/*---------------------------------------------------------------------------*/
12#ifndef ARCCORE_ALINA_BUILTINBACKEND_H
13#define ARCCORE_ALINA_BUILTINBACKEND_H
14/*---------------------------------------------------------------------------*/
15/*---------------------------------------------------------------------------*/
16/*
17 * This file is based on the work on AMGCL library (version march 2026)
18 * which can be found at https://github.com/ddemidov/amgcl.
19 *
20 * Copyright (c) 2012-2022 Denis Demidov <dennis.demidov@gmail.com>
21 * SPDX-License-Identifier: MIT
22 */
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26//#pragma GCC diagnostic ignored "-Wconversion"
27
28/*---------------------------------------------------------------------------*/
29/*---------------------------------------------------------------------------*/
30
31#include "arccore/alina/CSRMatrixOperations.h"
32#include "arccore/alina/SkylineLUSolver.h"
33#include "arccore/alina/MatrixOperationsImpl.h"
34
35#include "arccore/base/ConcurrencyBase.h"
36#include "arccore/common/SmallArray.h"
37
38#include <numeric>
39#include <random>
40
41/*---------------------------------------------------------------------------*/
42/*---------------------------------------------------------------------------*/
43
44namespace Arcane::Alina
45{
46
47/*---------------------------------------------------------------------------*/
48/*---------------------------------------------------------------------------*/
56template <typename ValueType, typename ColumnType = ptrdiff_t, typename PointerType = ColumnType>
58{
59 typedef ValueType value_type;
60 typedef ColumnType index_type;
61 typedef ColumnType col_type;
62 typedef PointerType ptr_type;
63
64 typedef typename math::rhs_of<value_type>::type rhs_type;
65
66 struct provides_row_iterator : std::true_type
67 {};
68
70 typedef numa_vector<rhs_type> vector;
71 typedef numa_vector<value_type> matrix_diagonal;
72 typedef solver::SkylineLUSolver<value_type> direct_solver;
73
76
77 static std::string name() { return "builtin"; }
78
79 // Copy matrix. This is a noop for builtin backend.
80 static std::shared_ptr<matrix>
81 copy_matrix(std::shared_ptr<matrix> A, const params&)
82 {
83 return A;
84 }
85
86 // Copy vector to builtin backend.
87 template <class T>
88 static std::shared_ptr<numa_vector<T>>
89 copy_vector(const std::vector<T>& x, const params&)
90 {
91 return std::make_shared<numa_vector<T>>(x);
92 }
93
94 // Copy vector to builtin backend. This is a noop for builtin backend.
95 template <class T>
96 static std::shared_ptr<numa_vector<T>>
97 copy_vector(std::shared_ptr<numa_vector<T>> x, const params&)
98 {
99 return x;
100 }
101
102 // Create vector of the specified size.
103 static std::shared_ptr<vector>
104 create_vector(size_t size, const params&)
105 {
106 return std::make_shared<vector>(size);
107 }
108
109 struct gather
110 {
111 std::vector<col_type> I;
112
113 gather(size_t /*size*/, const std::vector<col_type>& I, const params&)
114 : I(I)
115 {}
116
117 template <class InVec, class OutVec>
118 void operator()(const InVec& vec, OutVec& vals) const
119 {
120 for (size_t i = 0; i < I.size(); ++i)
121 vals[i] = vec[I[i]];
122 }
123 };
124
125 struct scatter
126 {
127 std::vector<col_type> I;
128
129 scatter(size_t /*size*/, const std::vector<col_type>& I, const params&)
130 : I(I)
131 {}
132
133 template <class InVec, class OutVec>
134 void operator()(const InVec& vals, OutVec& vec) const
135 {
136 for (size_t i = 0; i < I.size(); ++i)
137 vec[I[i]] = vals[i];
138 }
139 };
140
141 // Create direct solver for coarse level
142 static std::shared_ptr<direct_solver>
143 create_solver(std::shared_ptr<matrix> A, const params&)
144 {
145 return std::make_shared<direct_solver>(*A);
146 }
147};
148
149/*---------------------------------------------------------------------------*/
150/*---------------------------------------------------------------------------*/
151
152} // namespace Arcane::Alina
153
154namespace Arcane::Alina::backend
155{
156
157/*---------------------------------------------------------------------------*/
158/*---------------------------------------------------------------------------*/
159
160template <class T>
161struct is_builtin_vector : std::false_type
162{};
163
164template <class V>
165struct is_builtin_vector<std::vector<V>> : std::is_arithmetic<V>
166{};
167
168template <class V>
169struct is_builtin_vector<SmallSpan<V>> : std::true_type
170{};
171
172template <class V>
173struct is_builtin_vector<Span<V>> : std::true_type
174{};
175
176template <class V>
177struct is_builtin_vector<numa_vector<V>> : std::true_type
178{};
179
180//---------------------------------------------------------------------------
181// Specialization of backend interface
182//---------------------------------------------------------------------------
183template <typename T1, typename T2>
184struct backends_compatible<BuiltinBackend<T1>, BuiltinBackend<T2>> : std::true_type
185{};
186
187template <typename V, typename C, typename P>
188struct rows_impl<CSRMatrix<V, C, P>>
189{
190 static size_t get(const CSRMatrix<V, C, P>& A)
191 {
192 return A.nbRow();
193 }
194};
195
196template <typename V, typename C, typename P>
197struct cols_impl<CSRMatrix<V, C, P>>
198{
199 static size_t get(const CSRMatrix<V, C, P>& A)
200 {
201 return A.ncols;
202 }
203};
204
205template <class Vec>
206struct bytes_impl<Vec, typename std::enable_if<is_builtin_vector<Vec>::value>::type>
207{
208 static size_t get(const Vec& x)
209 {
210 typedef typename backend::value_type<Vec>::type V;
211 return x.size() * sizeof(V);
212 }
213};
214
215template <typename V, typename C, typename P>
216struct ptr_data_impl<CSRMatrix<V, C, P>>
217{
218 typedef const P* type;
219 static type get(const CSRMatrix<V, C, P>& A)
220 {
221 return &A.ptr[0];
222 }
223};
224
225template <typename V, typename C, typename P>
226struct col_data_impl<CSRMatrix<V, C, P>>
227{
228 typedef const C* type;
229 static type get(const CSRMatrix<V, C, P>& A)
230 {
231 return &A.col[0];
232 }
233};
234
235template <typename V, typename C, typename P>
236struct val_data_impl<CSRMatrix<V, C, P>>
237{
238 typedef const V* type;
239 static type get(const CSRMatrix<V, C, P>& A)
240 {
241 return &A.val[0];
242 }
243};
244
245template <typename V, typename C, typename P>
246struct nonzeros_impl<CSRMatrix<V, C, P>>
247{
248 static size_t get(const CSRMatrix<V, C, P>& A)
249 {
250 return A.nbRow() == 0 ? 0 : A.ptr[A.nbRow()];
251 }
252};
253
254template <typename V, typename C, typename P>
256{
257 static size_t get(const CSRMatrix<V, C, P>& A, size_t row)
258 {
259 return A.ptr[row + 1] - A.ptr[row];
260 }
261};
262
263template <class Vec>
264struct clear_impl<Vec, typename std::enable_if<is_builtin_vector<Vec>::value>::type>
265{
266 static void apply(Vec& x)
267 {
268 typedef typename backend::value_type<Vec>::type V;
269
270 const size_t n = x.size();
271 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
272 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
273 x[i] = math::zero<V>();
274 }
275 });
276 }
277};
278
279/*---------------------------------------------------------------------------*/
280/*---------------------------------------------------------------------------*/
281
282template <class Vec1, class Vec2>
283struct inner_product_impl<Vec1, Vec2,
284 typename std::enable_if<
285 is_builtin_vector<Vec1>::value &&
286 is_builtin_vector<Vec2>::value>::type>
287{
288 typedef typename value_type<Vec1>::type V;
289
290 typedef typename math::inner_product_impl<V>::return_type return_type;
291
292 static return_type get(const Vec1& x, const Vec2& y)
293 {
295 return parallel(x, y);
296 }
297 else {
298 return serial(x, y);
299 }
300 }
301
302 static return_type serial(const Vec1& x, const Vec2& y)
303 {
304 const size_t n = x.size();
305
306 return_type s = math::zero<return_type>();
307 return_type c = math::zero<return_type>();
308
309 for (ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
310 return_type d = math::inner_product(x[i], y[i]) - c;
311 return_type t = s + d;
312 c = (t - s) - d;
313 s = t;
314 }
315
316 return s;
317 }
318
319 static return_type parallel(const Vec1& x, const Vec2& y)
320 {
321 const size_t n = x.size();
322 // TODO: Use padding to avoir sharing cache line between threads
325 const return_type zero = math::zero<return_type>();
326 sum_array.resize(nb_thread, zero);
327 SmallSpan<return_type> sum = sum_array.view();
328 for (Int32 i = 0; i < nb_thread; ++i)
329 sum[i] = zero;
330 // NOTE GG: NOT reproducible
331
332 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
333 const int tid = TaskFactory::currentTaskThreadIndex();
334 return_type s = zero;
335 return_type c = zero;
336 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
337 return_type d = math::inner_product(x[i], y[i]) - c;
338 return_type t = s + d;
339 c = (t - s) - d;
340 s = t;
341 }
342
343 sum[tid] += s;
344 });
345 return_type total = zero;
346 for (Int32 i = 0; i < nb_thread; ++i) {
347 total += sum[i];
348 }
349 return total;
350 }
351};
352
353/*---------------------------------------------------------------------------*/
354/*---------------------------------------------------------------------------*/
355
356template <class A, class Vec1, class B, class Vec2>
357struct axpby_impl<A, Vec1, B, Vec2, typename std::enable_if<is_builtin_vector<Vec1>::value && is_builtin_vector<Vec2>::value>::type>
358{
359 static void apply(A a, const Vec1& x, B b, Vec2& y)
360 {
361 const size_t n = x.size();
362 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
363 if (!math::is_zero(b)) {
364 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
365 y[i] = a * x[i] + b * y[i];
366 }
367 }
368 else {
369 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
370 y[i] = a * x[i];
371 }
372 }
373 });
374 }
375};
376
377/*---------------------------------------------------------------------------*/
378/*---------------------------------------------------------------------------*/
379
380template <class A, class Vec1, class B, class Vec2, class C, class Vec3>
381struct axpbypcz_impl<A, Vec1, B, Vec2, C, Vec3,
382 typename std::enable_if<
383 is_builtin_vector<Vec1>::value &&
384 is_builtin_vector<Vec2>::value &&
385 is_builtin_vector<Vec3>::value>::type>
386{
387 static void apply(A a, const Vec1& x, B b, const Vec2& y, C c, Vec3& z)
388 {
389 const size_t n = x.size();
390 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
391 if (!math::is_zero(c)) {
392 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
393 z[i] = a * x[i] + b * y[i] + c * z[i];
394 }
395 }
396 else {
397 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
398 z[i] = a * x[i] + b * y[i];
399 }
400 }
401 });
402 }
403};
404
405/*---------------------------------------------------------------------------*/
406/*---------------------------------------------------------------------------*/
407
408template <class Alpha, class Vec1, class Vec2, class Beta, class Vec3>
409struct vmul_impl<Alpha, Vec1, Vec2, Beta, Vec3,
410 typename std::enable_if<
411 is_builtin_vector<Vec1>::value &&
412 is_builtin_vector<Vec2>::value &&
413 is_builtin_vector<Vec3>::value &&
414 math::static_rows<typename value_type<Vec1>::type>::value == math::static_rows<typename value_type<Vec2>::type>::value &&
415 math::static_rows<typename value_type<Vec1>::type>::value == math::static_rows<typename value_type<Vec3>::type>::value>::type>
416{
417 static void apply(Alpha a, const Vec1& x, const Vec2& y, Beta b, Vec3& z)
418 {
419 const size_t n = x.size();
420 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
421 if (!math::is_zero(b)) {
422 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
423 z[i] = a * x[i] * y[i] + b * z[i];
424 }
425 }
426 else {
427 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
428 z[i] = a * x[i] * y[i];
429 }
430 }
431 });
432 }
433};
434
435/*---------------------------------------------------------------------------*/
436/*---------------------------------------------------------------------------*/
437
438// Support for mixed scalar/nonscalar types
439template <class Alpha, class Vec1, class Vec2, class Beta, class Vec3>
440struct vmul_impl<Alpha, Vec1, Vec2, Beta, Vec3,
441 typename std::enable_if<is_builtin_vector<Vec1>::value &&
442 is_builtin_vector<Vec2>::value &&
443 is_builtin_vector<Vec3>::value &&
444 (math::static_rows<typename value_type<Vec1>::type>::value != math::static_rows<typename value_type<Vec2>::type>::value ||
445 math::static_rows<typename value_type<Vec1>::type>::value != math::static_rows<typename value_type<Vec3>::type>::value)>::type>
446{
447 static void apply(Alpha a, const Vec1& x, const Vec2& y, Beta b, Vec3& z)
448 {
449 typedef typename value_type<Vec1>::type M_type;
450 auto Y = backend::reinterpret_as_rhs<M_type>(y);
451 auto Z = backend::reinterpret_as_rhs<M_type>(z);
452
453 const size_t n = x.size();
454
455 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
456 if (!math::is_zero(b)) {
457 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
458 Z[i] = a * x[i] * Y[i] + b * Z[i];
459 }
460 }
461 else {
462 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
463 Z[i] = a * x[i] * Y[i];
464 }
465 }
466 });
467 }
468};
469
470/*---------------------------------------------------------------------------*/
471/*---------------------------------------------------------------------------*/
472
473template <class Vec1, class Vec2>
474struct copy_impl<Vec1, Vec2,
475 typename std::enable_if<
476 is_builtin_vector<Vec1>::value &&
477 is_builtin_vector<Vec2>::value>::type>
478{
479 static void apply(const Vec1& x, Vec2& y)
480 {
481 const size_t n = x.size();
482 arccoreParallelFor(0, n, ForLoopRunInfo{}, [&](Int32 begin, Int32 size) {
483 for (ptrdiff_t i = begin; i < (begin + size); ++i) {
484 y[i] = x[i];
485 }
486 });
487 }
488};
489
490template <class MatrixValue, class Vector, bool IsConst>
491struct reinterpret_as_rhs_impl<MatrixValue, Vector, IsConst,
492 typename std::enable_if<is_builtin_vector<Vector>::value>::type>
493{
494 typedef typename backend::value_type<Vector>::type src_type;
495 typedef typename math::scalar_of<src_type>::type scalar_type;
496 typedef typename math::rhs_of<MatrixValue>::type rhs_type;
497 typedef typename math::replace_scalar<rhs_type, scalar_type>::type dst_type;
498 typedef typename std::conditional<IsConst, const dst_type*, dst_type*>::type ptr_type;
499 typedef typename std::conditional<IsConst, const dst_type, dst_type>::type return_value_type;
500 typedef Span<return_value_type> return_type;
501
502 template <class V>
503 static return_type get(V&& x)
504 {
505 auto ptr = reinterpret_cast<ptr_type>(&x[0]);
506 const size_t n = x.size() * sizeof(src_type) / sizeof(dst_type);
507 return Span<return_value_type>(ptr, n);
508 }
509};
510
511/*---------------------------------------------------------------------------*/
512/*---------------------------------------------------------------------------*/
513
514namespace detail
515{
516
517 template <typename V, typename C, typename P>
519 : std::true_type
520 {};
521
522} // namespace detail
523
524} // namespace Arcane::Alina::backend
525
526namespace Arcane::Alina::detail
527{
528
529// Backend with scalar value_type of highest precision.
530template <class V1, class V2>
532 typename std::enable_if<math::static_rows<V1>::value != 1 || math::static_rows<V2>::value != 1>::type>
533{
534 typedef typename math::scalar_of<V1>::type S1;
535 typedef typename math::scalar_of<V2>::type S2;
536
537 typedef typename std::conditional<(sizeof(S1) > sizeof(S2)), BuiltinBackend<S1>, BuiltinBackend<S2>>::type type;
538};
539
540} // namespace Arcane::Alina::detail
541
542#endif
Class to handle empty parameters list.
Definition AlinaUtils.h:90
NUMA-aware vector container.
Definition NumaVector.h:42
Direct solver that uses Skyline LU factorization.
void resize(Int64 s)
Change le nombre d'éléments du tableau à s.
ArrayView< T > view() const
Vue mutable sur ce tableau.
static Int32 maxAllowedThread()
Nombre maximum de threads autorisés pour le multi-threading.
Informations d'exécution d'une boucle.
Tableau 1D de données avec buffer pré-alloué sur la pile.
Vue d'un tableau d'éléments de type T.
Definition Span.h:801
constexpr __host__ __device__ SizeType size() const noexcept
Retourne la taille du tableau.
Definition Span.h:325
Vue d'un tableau d'éléments de type T.
Definition Span.h:633
static Int32 currentTaskThreadIndex()
Indice (entre 0 et nbAllowedThread()-1) du thread exécutant la tâche actuelle.
Vector class, to be used by user.
void arccoreParallelFor(const ComplexForLoopRanges< RankValue > &loop_ranges, const ForLoopRunInfo &run_info, const LambdaType &lambda_function, const ReducerArgs &... reducer_args)
Applique en concurrence la fonction lambda lambda_function sur l'intervalle d'itération donné par loo...
Definition ParallelFor.h:85
std::int32_t Int32
Type entier signé sur 32 bits.
Alina::detail::empty_params params
Sparse matrix stored in CSR (Compressed Sparse Row) format.
Definition CSRMatrix.h:98
Implementation for linear combination of two vectors.
Implementation for linear combination of three vectors.
Metafunction that checks if two backends are compatible.
Implementation for function returning number of bytes allocated for a matrix/vector.
Implementation for zeroing out a vector.
Implementation for function returning the number of columns in a matrix.
Implementation for vector copy.
Implementation for inner product.
Implementation for function returning the number of nonzeros in a matrix.
Reinterpret the vector to be compatible with the matrix value type.
Implementation for function returning the number of nonzeros in a matrix row.
Implementation for function returning the number of rows in a matrix.
Implementation for element-wize vector product.