Alien  1.3.0
Developer documentation
Loading...
Searching...
No Matches
SYCLBEllPackInternal.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7
8
9#pragma once
10
11#include <alien/kernels/sycl/SYCLPrecomp.h>
12
13#ifdef USE_SYCL2020
14#include <sycl/sycl.hpp>
15#else
16#include <CL/sycl.hpp>
17#endif
18
19#include <alien/kernels/sycl/data/SYCLSendRecvOp.h>
20#include <alien/kernels/sycl/data/SYCLLUSendRecvOp.h>
21
22#include <alien/kernels/sycl/data/BEllPackStructInfo.h>
23
24/*---------------------------------------------------------------------------*/
25
26namespace Alien::SYCLInternal
27{
28
29#ifndef USE_SYCL2020
30 using namespace cl ;
31#endif
32
33template <int EllPackSize, typename IndexT>
34struct ALIEN_EXPORT StructInfoInternal
35{
36 // clang-format off
37 static const int ellpack_size = EllPackSize ;
38 using index_type = IndexT;
39 using IndexType = IndexT;
40 using index_buffer_type = sycl::buffer<index_type, 1>;
41 using IndexBufferType = sycl::buffer<index_type, 1>;
42 using MaskBufferType = sycl::buffer<uint8_t, 1>;
43 // clang-format on
44
45 StructInfoInternal(std::size_t nrows,
46 std::size_t nnz,
47 std::size_t block_nrows,
48 std::size_t block_nnz,
49 int const* h_kcol,
50 int const* h_cols,
51 int const* h_block_row_offset,
52 int const* h_local_row_size);
53
54 IndexBufferType& getBlockRowOffset() const { return m_block_row_offset; }
55
56 IndexBufferType& getBlockCols() const { return m_block_cols; }
57
58 IndexBufferType& getKCol() const { return m_kcol; }
59
60 int const* kcol() const
61 {
62 return m_h_kcol.data();
63 }
64
65 int const* cols() const
66 {
67 return m_h_cols.data();
68 }
69
70 int const* dcol() const
71 {
72 getUpperDiagOffset();
73 return m_h_dcol.data();
74 }
75
76 void getUpperDiagOffset() const;
77 void computeLowerUpperMask() const;
78
79 MaskBufferType& getLowerMask() const;
80 MaskBufferType& getUpperMask() const;
81
82 // clang-format off
83 std::size_t m_nrows = 0 ;
84 std::size_t m_nnz = 0 ;
85 std::size_t m_block_nrows = 0 ;
86 std::size_t m_block_nnz = 0 ;
87
88 std::vector<index_type> m_h_kcol ;
89 std::vector<index_type> m_h_cols ;
90 std::vector<index_type> m_h_block_cols ;
91
92 mutable IndexBufferType m_block_row_offset ;
93 mutable IndexBufferType m_block_cols ;
94 mutable IndexBufferType m_kcol ;
95
96 mutable bool m_lower_upper_mask_ready = false ;
97 mutable std::vector<index_type> m_h_dcol ;
98 mutable std::unique_ptr<MaskBufferType> m_lower_mask ;
99 mutable std::unique_ptr<MaskBufferType> m_upper_mask ;
100 // clang-format on
101};
102
103/*---------------------------------------------------------------------------*/
104
105template <typename ValueT, int EllPackSize>
106class MatrixInternal
107{
108 public:
109 // clang-format off
110 using ThisType = MatrixInternal<ValueT,EllPackSize>;
111
112 static const int ellpack_size = EllPackSize ;
113
114 using ValueType = ValueT;
115 using value_type = ValueT;
116
117 using ProfileType = BEllPackStructInfo<EllPackSize,int>;
118 using InternalProfileType = typename ProfileType::InternalType;
119 using IndexType = typename InternalProfileType::IndexType;
120 using IndexBufferType = typename InternalProfileType::IndexBufferType;
121 using IndexBufferPtrType = std::unique_ptr<IndexBufferType>;
122
123 using value_buffer_type = sycl::buffer<value_type, 1>;
124 using ValueBufferType = sycl::buffer<value_type, 1>;
125 using ValueBufferPtrType = std::unique_ptr<ValueBufferType>;
126
127 using QueueType = sycl::queue;
128 // clang-format on
129
130 template<int N>
131 struct TileT
132 {
133 static constexpr int NxN = N*N ;
134 inline std::size_t ijk(std::size_t k, int i, int j) const
135 {
136 return (k*NxN + i*N + j)*ellpack_size;
137 }
138
139 inline std::size_t ij(std::size_t local_id,int i, int j) const
140 {
141 return local_id*NxN+ i*N + j;
142 }
143 };
144
145 struct Tile
146 {
147 static const int ellpack_size = EllPackSize ;
148 int m_N = 0 ;
149 int m_NxN = 0 ;
150
151 Tile(int N)
152 : m_N(N)
153 , m_NxN(N*N)
154 {}
155
156 inline std::size_t _ijk(std::size_t k, int i, int j) const
157 {
158 return (k*m_NxN + i*m_N + j)*ellpack_size;
159 }
160
161 inline std::size_t _ij(std::size_t local_id,int i, int j) const
162 {
163 return local_id*m_NxN+ i*m_N + j;
164 }
165
166 template<typename MatrixValueAccessorT,
167 typename MatrixColAccessorT,
168 typename VectorAccessorT>
169 ValueType mult(int ieq,
170 std::size_t local_id,
171 std::size_t k,
172 MatrixColAccessorT& cols,
173 MatrixValueAccessorT& matrix,
174 VectorAccessorT& x) const
175 {
176 ValueType value = 0. ;
177 auto x_offset = cols[k*ellpack_size+local_id]*m_N ;
178 if(x_offset>=0)
179 {
180 for(int j=0;j<m_N;++j)
181 {
182 auto mat_offset = _ijk(k,ieq,j)+local_id ;
183 value += matrix[mat_offset]*x[x_offset+j] ;
184 //printf("\n %d %d %d %d : %f += %f*%f ",ieq,j,int(k),int(mat_offset),value,matrix[mat_offset],x[x_offset+j]) ;
185 }
186 }
187 return value ;
188 }
189
190 template<typename MatrixValueAccessorT,
191 typename MatrixColAccessorT,
192 typename MaskAccessorT,
193 typename VectorAccessorT>
194 ValueType mult(int ieq,
195 std::size_t local_id,
196 std::size_t k,
197 MatrixColAccessorT& cols,
198 MaskAccessorT& mask,
199 MatrixValueAccessorT& matrix,
200 VectorAccessorT& x) const
201 {
202 ValueType value = 0. ;
203 auto x_offset = cols[k*ellpack_size+local_id]*m_N ;
204 auto ma = mask[k*ellpack_size+local_id] ;
205 if(x_offset>=0 && ma==1)
206 {
207 for(int j=0;j<m_N;++j)
208 {
209 auto mat_offset = _ijk(k,ieq,j)+local_id ;
210 value += matrix[mat_offset]*x[x_offset+j] ;
211 //printf("\n %d %d %d %d : %f += %f*%f ",ieq,j,int(k),int(mat_offset),value,matrix[mat_offset],x[x_offset+j]) ;
212 }
213 }
214 return value ;
215 }
216 };
217
218 template<typename MatrixAccT,
219 typename VectorAccT,
220 typename LUAccT>
221 struct LU
222 {
223 static const int ellpack_size = EllPackSize ;
224 int m_N = 0 ;
225 int m_NxN = 0 ;
226 MatrixAccT m_matrix;
227
228 LU(int N, MatrixAccT& matrix)
229 : m_N(N)
230 , m_NxN(N*N)
231 , m_matrix(matrix)
232 {}
233
234 inline std::size_t _ijk(std::size_t k, int i, int j) const
235 {
236 return (k*m_NxN + i*m_N + j)*ellpack_size;
237 }
238
239 inline std::size_t _ij(std::size_t local_id,int i, int j) const
240 {
241 return local_id*m_NxN+ i*m_N + j;
242 }
243
244 void factorize(std::size_t global_id,
245 std::size_t local_id,
246 std::size_t block_id,
247 std::size_t kcol,
248 LUAccT m_LU) const
249 {
250 // Copy Diag Matrix in A
251 for(int i=0;i<m_N;++i)
252 for(int j=0;j<m_N;++j)
253 m_LU[_ijk(block_id,i,j)+local_id] = m_matrix[_ijk(kcol,i,j)+local_id] ;
254
255 //Factorize A = LU
256 for (int k = 0; k < m_N; ++k)
257 {
258 //assert(m_LU[_ijk(block_id,k,k)+local_id] != 0);
259 m_LU[_ijk(block_id,k,k)+local_id] = 1 / m_LU[_ijk(block_id,k,k)+local_id];
260 for (int i = k + 1; i < m_N; ++i) {
261 m_LU[_ijk(block_id,i,k)+local_id] *= m_LU[_ijk(block_id,k,k)+local_id];
262 }
263 for (int i = k + 1; i < m_N; ++i) {
264 for (int j = k + 1; j < m_N; ++j) {
265 m_LU[_ijk(block_id,i,j)+local_id] -= m_LU[_ijk(block_id,i,k)+local_id] * m_LU[_ijk(block_id,k,j)+local_id];
266 }
267 }
268 }
269 }
270
271 void inverse(std::size_t global_id,
272 std::size_t local_id,
273 std::size_t block_id,
274 LUAccT m_LU,
275 VectorAccT m_y) const
276 {
277 // SET Y to Id
278 for(int i=0;i<m_N;++i)
279 for(int j=0;j<m_N;++j)
280 m_y[_ij(global_id,i,j)] = 0. ;
281 for(int i=0;i<m_N;++i)
282 m_y[_ij(global_id,i,i)] = 1. ;
283
284 // L solve
285 for (int i = 1; i < m_N; ++i)
286 {
287 for (int j = 0; j < i; ++j)
288 {
289 for(int k=0;k<m_N;++k)
290 m_y[_ij(global_id,i,k)] -= m_LU[_ijk(block_id,i,j)+local_id] * m_y[_ij(global_id,j,k)];
291 }
292 }
293
294 // U solve
295 for (int i = m_N - 1; i >= 0; --i)
296 {
297 for (int j = m_N - 1; j > i; --j)
298 {
299 for(int k=0;k<m_N;++k)
300 m_y[_ij(global_id,i,k)] -= m_LU[_ijk(block_id,i,j)+local_id] * m_y[_ij(global_id,j,k)];
301 }
302 for(int k=0;k<m_N;++k)
303 m_y[_ij(global_id,i,k)] *= m_LU[_ijk(block_id,i,i)+local_id];
304 }
305 }
306 };
307
308 public:
309 MatrixInternal(ProfileType const* profile, int blk_size=1);
310
311 ~MatrixInternal() {}
312
313 bool setMatrixValues(ValueType const* values, bool only_host);
314 bool setMatrixValuesFromHost();
315
316 bool setMatrixValues(ValueBufferType& values);
317 bool setMatrixValues(ValueBufferType& values,
318 ValueBufferType& ext_values);
319
320 bool copy(std::size_t nb_blocks,
321 Integer block_size,
322 ValueBufferType& rhs_values,
323 Integer rhs_block_size);
324
325 bool copy(std::size_t nb_blocks,
326 Integer block_size,
327 ValueBufferType& rhs_values,
328 ValueBufferType& rhs_ext_values,
329 Integer rhs_block_size);
330
331 bool needUpdate();
332 void notifyChanges();
333 void endUpdate();
334
335 template <int N>
336 void multN(ValueBufferType& x, ValueBufferType& y, QueueType& queue) const
337 {
338 auto device = queue.get_device();
339
340 auto num_groups = queue.get_device().get_info<sycl::info::device::max_compute_units>();
341 // getting the maximum work group size per thread
342 auto max_work_group_size = queue.get_device().get_info<sycl::info::device::max_work_group_size>();
343 // building the best number of global thread
344
345 // clang-format off
346 std::size_t pack_size = ellpack_size;
347 auto nrows = m_profile->getNRows();
348 auto nnz = m_profile->getNnz();
349
350 auto internal_profile = m_profile->internal();
351 auto& kcol = internal_profile->getKCol();
352 auto& block_row_offset = internal_profile->getBlockRowOffset();
353 auto& block_cols = internal_profile->getBlockCols();
354
355 auto blocks_needed = (nrows + ellpack_size - 1) / ellpack_size;
356 auto blocks_target = std::max(blocks_needed, num_groups * 4UL);
357 auto total_threads = blocks_target * pack_size;
358
359 queue.submit(
360 [&](sycl::handler& cgh)
361 {
362 auto access_block_row_offset = block_row_offset.template get_access<sycl::access::mode::read>(cgh);
363 auto access_cols = block_cols.template get_access<sycl::access::mode::read>(cgh);
364 auto access_values = m_values.template get_access<sycl::access::mode::read>(cgh);
365
366 auto access_x = x.template get_access<sycl::access::mode::read>(cgh);
367 auto access_y = y.template get_access<sycl::access::mode::discard_write>(cgh);
368
369 auto tile = TileT<N>() ;
370
371 sycl::local_accessor<ValueType, 1> lds_x{pack_size*N, cgh};
372 sycl::nd_range<1> r{sycl::range<1>{total_threads},sycl::range<1>{pack_size}};
373 cgh.parallel_for<class compute_mult>(r,
374 [=](sycl::nd_item<1> item_id)
375 {
376 auto local_id = item_id.get_local_id(0);
377 auto global_id = item_id.get_global_id(0);
378
379 for (auto i = global_id; i < nrows; i += item_id.get_global_range()[0])
380 {
381 auto block_id = i/pack_size ;
382
383 int begin = access_block_row_offset[block_id] ;
384 int end = access_block_row_offset[block_id+1] ;
385
386 #pragma unroll
387 for(int ieq=0;ieq<N;++ieq)
388 {
389 ValueType value = 0. ;
390 for(int k=begin;k<end;++k)
391 {
392 //auto k = block_row_offset+j*ellpack_size+local_id ;
393 const int col = access_cols[k * pack_size + local_id];
394 if(col>=0)
395 for(int ju=0;ju<N;++ju)
396 lds_x[N*local_id+ju] = access_x[col*N+ju];
397 item_id.barrier(sycl::access::fence_space::local_space);
398 if(col>=0)
399 {
400 #pragma unroll
401 for(int ju=0;ju<N;++ju)
402 value += access_values[tile.ijk(k,ieq,ju) + local_id] * lds_x[local_id*N+ju] ;
403 }
404 item_id.barrier(sycl::access::fence_space::local_space);
405 }
406 access_y[i*N+ieq] = value ;
407 }
408 }
409 });
410 });
411 }
412 void mult(ValueBufferType& x, ValueBufferType& y) const;
413 void mult(ValueBufferType& x, ValueBufferType& y, QueueType& queue) const;
414
415 void addExtMult(ValueBufferType& x, ValueBufferType& y) const;
416 void addExtMult(ValueBufferType& x, ValueBufferType& y, QueueType& queue) const;
417
418 template<int N>
419 void addLMultN(ValueType alpha, ValueBufferType& x, ValueBufferType& y, QueueType& queue) const
420 {
421 auto device = queue.get_device();
422
423 auto num_groups = queue.get_device().get_info<sycl::info::device::max_compute_units>();
424 // getting the maximum work group size per thread
425 auto max_work_group_size = queue.get_device().get_info<sycl::info::device::max_work_group_size>();
426 // building the best number of global thread
427
428 std::size_t pack_size = ellpack_size;
429 auto nrows = m_profile->getNRows();
430 auto nnz = m_profile->getNnz();
431
432 auto internal_profile = m_profile->internal();
433 auto& kcol = internal_profile->getKCol();
434 auto& block_row_offset = internal_profile->getBlockRowOffset();
435 auto& block_cols = internal_profile->getBlockCols();
436
437 auto& mask = internal_profile->getLowerMask();
438 // clang-format on
439 // clang-format off
440 queue.submit(
441 [&](sycl::handler& cgh)
442 {
443 auto access_block_row_offset = block_row_offset.template get_access<sycl::access::mode::read>(cgh);
444 auto access_cols = block_cols.template get_access<sycl::access::mode::read>(cgh);
445 auto access_mask = mask.template get_access<sycl::access::mode::read>(cgh);
446 auto access_values = m_values.template get_access<sycl::access::mode::read>(cgh);
447
448
449 auto access_x = x.template get_access<sycl::access::mode::read>(cgh);
450 auto access_y = y.template get_access<sycl::access::mode::read_write>(cgh);
451
452 auto blocks_needed = (nrows + ellpack_size - 1) / ellpack_size;
453 auto blocks_target = std::max(blocks_needed, num_groups * 4UL);
454 auto total_threads = blocks_target * ellpack_size;
455
456 auto tile = TileT<N>() ;
457 sycl::local_accessor<ValueType, 1> lds_x{pack_size*N, cgh};
458 sycl::nd_range<1> r{sycl::range<1>{total_threads},sycl::range<1>{pack_size}};
459 cgh.parallel_for<class compute_lmultn>(r,
460 [=](sycl::nd_item<1> item_id)
461 {
462 auto local_id = item_id.get_local_id(0);
463 auto global_id = item_id.get_global_id(0);
464
465 //for (auto i = id; i < nrows; i += item_id.get_range()[0])
466 for (auto i = global_id; i < nrows; i += item_id.get_global_range()[0])
467 {
468 auto block_id = i/pack_size ;
469
470 int begin = access_block_row_offset[block_id] ;
471 int end = access_block_row_offset[block_id+1] ;
472 #pragma unroll
473 for(int ieq=0;ieq<N;++ieq)
474 {
475 ValueType value = 0. ;
476 for(int k=begin;k<end;++k)
477 {
478 //auto k = block_row_offset+j*ellpack_size+local_id ;
479 const int col = access_cols[k * pack_size + local_id];
480 if(col>=0)
481 #pragma unroll
482 for(int ju=0;ju<N;++ju)
483 lds_x[N*local_id+ju] = access_x[N*col+ju];
484 item_id.barrier(sycl::access::fence_space::local_space);
485 if(access_mask[k * pack_size + local_id])
486 #pragma unroll
487 for(int ju=0;ju<N;++ju)
488 value += access_values[tile.ijk(k,ieq,ju) + local_id] * lds_x[N*local_id+ju] ;
489 item_id.barrier(sycl::access::fence_space::local_space);
490 }
491 access_y[i*N+ieq] += alpha*value ;
492 }
493 }
494 });
495 });
496 }
497
498 template<int N>
499 void addUMultN(ValueType alpha, ValueBufferType& x, ValueBufferType& y, QueueType& queue) const
500 {
501 auto device = queue.get_device();
502
503 auto num_groups = queue.get_device().get_info<sycl::info::device::max_compute_units>();
504 // getting the maximum work group size per thread
505 auto max_work_group_size = queue.get_device().get_info<sycl::info::device::max_work_group_size>();
506 // building the best number of global thread
507 //auto total_threads = num_groups * ellpack_size;
508
509 // clang-format off
510 std::size_t pack_size = ellpack_size;
511 auto nrows = m_profile->getNRows() ;
512 auto nnz = m_profile->getNnz() ;
513
514 auto blocks_needed = (nrows + ellpack_size - 1) / ellpack_size;
515 auto blocks_target = std::max(blocks_needed, num_groups * 4UL);
516 auto total_threads = blocks_target * ellpack_size;
517
518 auto internal_profile = m_profile->internal() ;
519 auto& kcol = internal_profile->getKCol() ;
520 auto& block_row_offset = internal_profile->getBlockRowOffset() ;
521 auto& block_cols = internal_profile->getBlockCols() ;
522 auto& mask = internal_profile->getUpperMask() ;
523 // COMPUTE VALUES
524 queue.submit(
525 [&](sycl::handler& cgh)
526 {
527 auto access_block_row_offset = block_row_offset.template get_access<sycl::access::mode::read>(cgh);
528 auto access_cols = block_cols.template get_access<sycl::access::mode::read>(cgh);
529 auto access_mask = mask.template get_access<sycl::access::mode::read>(cgh);
530 auto access_values = m_values.template get_access<sycl::access::mode::read>(cgh);
531
532
533 auto access_x = x.template get_access<sycl::access::mode::read>(cgh);
534 auto access_y = y.template get_access<sycl::access::mode::read_write>(cgh);
535
536 auto tile = TileT<N>() ;
537 sycl::local_accessor<ValueType, 1> lds_x{pack_size*N, cgh};
538 sycl::nd_range<1> r{sycl::range<1>{total_threads},sycl::range<1>{pack_size}};
539 cgh.parallel_for<class compute_umultn>(r,
540 [=](sycl::nd_item<1> item_id)
541 {
542 auto local_id = item_id.get_local_id(0);
543 auto global_id = item_id.get_global_id(0);
544 for (auto i = global_id; i < nrows; i += item_id.get_global_range()[0])
545 {
546 auto block_id = i/pack_size ;
547
548 auto begin = access_block_row_offset[block_id] ;
549 auto end = access_block_row_offset[block_id+1] ;
550 #pragma unroll
551 for(int ieq=0;ieq<N;++ieq)
552 {
553 ValueType value = 0. ;
554 for(int k=begin;k<end;++k)
555 {
556 const int col = access_cols[k * pack_size + local_id];
557 if(col>=0)
558 #pragma unroll
559 for(int ju=0;ju<N;++ju)
560 lds_x[local_id*N+ju] = access_x[col*N+ju];
561 item_id.barrier(sycl::access::fence_space::local_space);
562 if(access_mask[k * pack_size + local_id])
563 #pragma unroll
564 for(int ju=0;ju<N;++ju)
565 value += access_values[tile.ijk(k,ieq,ju) + local_id] * lds_x[local_id*N+ju] ;
566 item_id.barrier(sycl::access::fence_space::local_space);
567 }
568 access_y[i*N+ieq] += alpha*value ;
569 }
570 }
571 });
572 });
573 }
574
575 void addLMult(ValueType alpha, ValueBufferType& x, ValueBufferType& y) const;
576 void addUMult(ValueType alpha, ValueBufferType& x, ValueBufferType& y) const;
577
578 void addLMult(ValueType alpha, ValueBufferType& x, ValueBufferType& y, QueueType& queue) const;
579 void addUMult(ValueType alpha, ValueBufferType& x, ValueBufferType& y, QueueType& queue) const;
580
581 void multDiag(ValueBufferType& x, ValueBufferType& y) const;
582 void multDiag(ValueBufferType& x, ValueBufferType& y, QueueType& queue) const;
583
584 void multDiag(ValueBufferType& y) const;
585 void multDiag(ValueBufferType& y, QueueType& queue) const;
586
587 void computeDiag(ValueBufferType& y) const;
588 void computeDiag(ValueBufferType& y, QueueType& queue) const;
589
590 void computeBlockDiag(ValueBufferType& y) const;
591 void computeBlockDiag(ValueBufferType& y, QueueType& queue) const;
592
593 void multInvDiag(ValueBufferType& y) const;
594 void multInvDiag(ValueBufferType& y, QueueType& queue) const;
595
596 void computeInvDiag(ValueBufferType& y) const;
597 void computeInvDiag(ValueBufferType& y, QueueType& queue) const;
598
599 void computeInvBlockDiag(ValueBufferType& y) const;
600 void computeInvBlockDiag(ValueBufferType& y, QueueType& queue) const;
601
602 void scal(ValueBufferType& y);
603
604 void scal(ValueBufferType& y, QueueType& queue);
605
606 void copyDevicePointers(int local_offset,
607 std::size_t nrows,
608 std::size_t nnz,
609 int* rows,
610 int* ncols,
611 int* cols,
612 ValueT* values) const ;
613
614 ValueBufferType& getValues() { return m_values; }
615
616 ValueBufferType const getValues() const { return m_values; }
617
618 //ProfileType* getProfile() { return m_profile; }
619
620 ProfileType const* getProfile() const { return m_profile; }
621
622 ValueType const* getHCsrData() const
623 {
624 return m_h_csr_values.data();
625 }
626
627 ValueType* getHCsrData()
628 {
629 return m_h_csr_values.data();
630 }
631
632 IndexBufferType& getSendIds() const
633 {
634 return *m_send_ids;
635 }
636 IndexBufferType& getRecvIds() const
637 {
638 return *m_recv_ids;
639 }
640
641 // clang-format off
642 int m_N = 1;
643 int m_NxN = 1;
644 ProfileType const* m_profile = nullptr;
645 ProfileType const* m_ext_profile = nullptr;
646
647 std::vector<ValueType> m_h_csr_values ;
648 std::vector<ValueType> m_h_values ;
649 mutable ValueBufferType m_values ;
650
651 std::vector<ValueType> m_h_csr_ext_values ;
652 std::vector<ValueType> m_h_ext_values ;
653 mutable ValueBufferPtrType m_ext_values ;
654 bool m_values_is_update = false ;
655
656 int const* m_h_interface_row_ids = nullptr;
657 mutable IndexBufferPtrType m_interface_row_ids ;
658 mutable IndexBufferPtrType m_send_ids ;
659 mutable IndexBufferPtrType m_recv_ids ;
660 mutable IndexBufferPtrType m_recv_uids ;
661 // clang-format on
662};
663
664
665/*---------------------------------------------------------------------------*/
666
667} // namespace Alien::SYCLInternal
668
669/*---------------------------------------------------------------------------*/