Alien  1.3.0
User documentation
Loading...
Searching...
No Matches
SYCLLUSendRecvOp.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* LUSendRecvOp.h (C) 2000-2025 */
9/* */
10/*---------------------------------------------------------------------------*/
11/*---------------------------------------------------------------------------*/
12
13#pragma once
14
15#include <vector>
16#include <map>
17
18#include <alien/kernels/simple_csr/SimpleCSRPrecomp.h>
19#include <alien/utils/Precomp.h>
20#include <alien/utils/Trace.h>
21
22#include <arccore/message_passing/Messages.h>
23#include <arccore/message_passing/Request.h>
24
25#include <alien/handlers/scalar/CSRModifierViewT.h>
26#include <alien/kernels/simple_csr/SendRecvOp.h>
27
30
31namespace Alien
32{
34}
35
36namespace Alien::SYCLInternal
37{
38
39#ifndef USE_SYCL2020
40 using namespace cl ;
41#endif
42
43template <typename MatrixT>
44class SYCLLUSendRecvOp
45{
46 public:
47 // clang-format off
48 typedef MatrixT MatrixType;
49 typedef typename MatrixType::ValueType ValueType;
50
51 typedef sycl::buffer<ValueType, 1> ValueBufferType ;
52
53 typedef sycl::buffer<int> IndexBufferType ;
54 typedef std::unique_ptr<IndexBufferType> IndexBufferPtrType ;
55 // clang-format on
56
57
58 SYCLLUSendRecvOp(MatrixType& matrix,
59 MatrixDistribution& distribution,
60 std::vector<int>& work,
61 Arccore::ITraceMng* trace_mng = nullptr)
62 : m_matrix(matrix)
63 , m_distribution(distribution)
64 , m_work(work)
65 , m_send_info(matrix.getDistStructInfo().m_send_info)
66 , m_recv_info(matrix.getDistStructInfo().m_recv_info)
67 , m_parallel_mng(matrix.getParallelMng())
68 , m_trace(trace_mng)
69 {
70 initSendRecvConnectivity();
71 }
72
73 virtual ~SYCLLUSendRecvOp()
74 {
75#ifdef USE_SYCL_USM
76 auto& queue = SYCLEnv::instance()->internal()->queue();
77 for(auto& buf : m_recv_lu_buffer)
78 sycl::free(buf, queue);
79 for(auto& buf : m_recv_lu_ibuffer)
80 sycl::free(buf, queue);
81 for(auto& buf : m_send_lu_buffer)
82 sycl::free(buf, queue);
83 for(auto& buf : m_send_lu_ibuffer)
84 sycl::free(buf, queue);
85#endif
86 }
87
88
89 void initSendRecvConnectivity()
90 {
91
92 CSRConstViewT<MatrixT> view(m_matrix);
93 // clang-format off
94 auto nrows = view.nrows() ;
95 auto kcol = view.kcol() ;
96 //auto dcol = view.dcol() ;
97 auto cols = view.cols() ;
98 // clang-format on
99 auto& local_row_size = m_matrix.getDistStructInfo().m_local_row_size;
100
101 //int my_rank = m_parallel_mng->commRank();
102
103 m_mpi_ext_inv_ids.resize(m_recv_info.m_first_upper_neighb);
104 for (int ineighb = 0; ineighb < m_recv_info.m_first_upper_neighb; ++ineighb) {
105 std::map<int, int>& inv_ids = m_mpi_ext_inv_ids[ineighb];
106 for (int i = m_recv_info.m_ids_offset[ineighb]; i < m_recv_info.m_ids_offset[ineighb]; ++i) {
107 inv_ids[m_recv_info.m_uids[i]] = i;
108 }
109 }
110 std::size_t recv_uids_size = m_recv_info.m_uids.size();
111 std::vector<int> conn_size(recv_uids_size);
112 std::fill(conn_size.begin(), conn_size.end(), 0);
113 for (int irow = 0; irow < nrows; ++irow) {
114 for (int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k) {
115 ++conn_size[cols[k] - nrows];
116 }
117 }
118 m_recv_connectivity_ids_ptr.resize(recv_uids_size + 1);
119 m_recv_connectivity_ids_ptr[0] = 0;
120 for (std::size_t i = 0; i < recv_uids_size; ++i)
121 m_recv_connectivity_ids_ptr[i + 1] = m_recv_connectivity_ids_ptr[i] + conn_size[i];
122 std::size_t total_conn_size = m_recv_connectivity_ids_ptr[recv_uids_size];
123 m_recv_connectivity_ids.resize(total_conn_size);
124 m_recv_connectivity_krow.resize(total_conn_size);
125 std::fill(conn_size.begin(), conn_size.end(), 0);
126 for (int irow = 0; irow < nrows; ++irow) {
127 for (int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k) {
128 int col = cols[k];
129 int id = col - nrows;
130 m_recv_connectivity_ids[m_recv_connectivity_ids_ptr[id] + conn_size[id]] = irow;
131 m_recv_connectivity_krow[m_recv_connectivity_ids_ptr[id] + conn_size[id]] = k;
132 ++conn_size[id];
133 }
134 }
135 }
136
137 void sendUpperNeighbLUData(ValueType* values)
138 {
139 CSRModifierViewT<MatrixType> modifier(m_matrix);
140 // clang-format off
141 auto nrows = modifier.nrows() ;
142 //auto nnz = modifier.nnz() ;
143 auto kcol = modifier.kcol() ;
144 auto dcol = modifier.dcol() ;
145 auto cols = modifier.cols() ;
146 //auto values = modifier.data() ;
147 // clang-format on
148
149 auto max_row_size = m_matrix.getProfile().getMaxRowSize();
150 auto& local_row_size = m_matrix.getDistStructInfo().m_local_row_size;
151
152 m_send_lu_ibuffer.resize(m_send_info.m_num_neighbours - m_send_info.m_first_upper_neighb);
153 m_send_lu_buffer.resize(m_send_info.m_num_neighbours - m_send_info.m_first_upper_neighb);
154 for (int ineighb = m_send_info.m_first_upper_neighb; ineighb < m_send_info.m_num_neighbours; ++ineighb) {
155 int neighb = m_send_info.m_ranks[ineighb];
156 auto& ibuffer = m_send_lu_ibuffer[ineighb - m_send_info.m_first_upper_neighb];
157 auto& buffer = m_send_lu_buffer[ineighb - m_send_info.m_first_upper_neighb];
158 int nb_send_rows = m_send_info.m_ids_offset[ineighb + 1] - m_send_info.m_ids_offset[ineighb];
159#ifdef USE_SYCL_USM
160 int icount = 0 ;
161 int count = 0 ;
162 ibuffer = sycl::malloc_shared<int>(nb_send_rows * max_row_size, queue);
163 buffer = sycl::malloc_shared<ValueType>(nb_send_rows * max_row_size, queue);
164#else
165 buffer.clear();
166 buffer.reserve(nb_send_rows * max_row_size);
167 ibuffer.clear();
168 ibuffer.reserve(nb_send_rows * max_row_size);
169#endif
170 for (int i = m_send_info.m_ids_offset[ineighb]; i < m_send_info.m_ids_offset[ineighb + 1]; ++i) {
171 int irow = m_send_info.m_ids[i];
172 int lrow_size = local_row_size[irow];
173 int int_row_size = kcol[irow] + lrow_size - dcol[irow];
174 int ext_row_size = kcol[irow + 1] - kcol[irow] - lrow_size;
175#ifdef USE_SYCL_USM
176 ibuffer[icount++] = int_row_size;
177 ibuffer[icount++] = ext_row_size;
178#else
179 ibuffer.push_back(int_row_size);
180 ibuffer.push_back(ext_row_size);
181#endif
182 for (int k = dcol[irow]; k < kcol[irow] + lrow_size; ++k) {
183#ifdef USE_SYCL_USM
184 buffer[count++] = values[k];
185 ibuffer[icount++] = cols[k]);
186#else
187 buffer.push_back(values[k]);
188 ibuffer.push_back(cols[k]);
189#endif
190 }
191 for (int k = kcol[irow] + lrow_size; k < kcol[irow + 1]; ++k) {
192#ifdef USE_SYCL_USM
193 buffer[count++] = values[k];
194 ibuffer[icount++] = m_recv_info.m_uids[cols[k] - nrows];
195#else
196 buffer.push_back(values[k]);
197 ibuffer.push_back(m_recv_info.m_uids[cols[k] - nrows]);
198#endif
199 }
200 }
201 UniqueArray<int> counts(2);
202#ifdef USE_SYCL_USM
203 counts[0] = icount;
204 counts[1] = count;
205 Arccore::MessagePassing::mpSend(m_parallel_mng, counts, neighb);
206 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<int>(counts[0],ibuffer), neighb);
207 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer), neighb);
208#else
209 counts[0] = (int) ibuffer.size();
210 counts[1] = (int) buffer.size();
211 Arccore::MessagePassing::mpSend(m_parallel_mng, counts, neighb);
212 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<int>(counts[0],ibuffer.data()), neighb);
213 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer.data()), neighb);
214#endif
215 }
216 }
217
218 void recvLowerNeighbLUData(ValueType* values)
219 {
220 CSRModifierViewT<MatrixT> modifier(m_matrix);
221 // clang-format off
222 auto nrows = modifier.nrows() ;
223 auto nnz = modifier.nnz() ;
224 auto kcol = modifier.kcol() ;
225 //auto dcol = modifier.dcol() ;
226 auto cols = modifier.cols() ;
227 //auto values = modifier.data() ;
228 // clang-format on
229 auto& local_row_size = m_matrix.getDistStructInfo().m_local_row_size;
230 auto const& distribution = m_distribution.rowDistribution();
231
232 int my_rank = m_parallel_mng->commRank();
233 int my_domain_offset = distribution.offset(my_rank);
234
235 m_recv_lu_ibuffer.resize(m_recv_info.m_first_upper_neighb);
236 m_recv_lu_buffer.resize(m_recv_info.m_first_upper_neighb);
237 for (int ineighb = 0; ineighb < m_recv_info.m_first_upper_neighb; ++ineighb) {
238 int neighb = m_recv_info.m_ranks[ineighb];
239 UniqueArray<int> counts(2);
240 Arccore::MessagePassing::mpReceive(m_parallel_mng, counts, neighb);
241 auto& ibuffer = m_recv_lu_ibuffer[ineighb];
242 auto& buffer = m_recv_lu_buffer[ineighb];
243#ifdef USE_SYCL_USM
244 ibuffer = sycl::malloc_shared<int>(counts[0], queue);
245 buffer = sycl::malloc_shared<int>(counts[1], queue);
246 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<int>(counts[0],ibuffer), neighb);
247 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer), neighb);
248#else
249 ibuffer.resize(counts[0]);
250 buffer.resize(counts[1]);
251 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<int>(counts[0],ibuffer.data()), neighb);
252 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer.data()), neighb);
253#endif
254 int icount = 0;
255 int icount2 = 0;
256 for (int i = m_recv_info.m_ids_offset[ineighb]; i < m_recv_info.m_ids_offset[ineighb + 1]; ++i) {
257 auto irow = i - nrows;
258 int int_row_size = ibuffer[icount++];
259 int ext_row_size = ibuffer[icount++];
260 for (int conn_k = m_recv_connectivity_ids_ptr[irow]; conn_k < m_recv_connectivity_ids_ptr[irow + 1]; ++conn_k) {
261 int conn_row = m_recv_connectivity_ids[conn_k];
262 int krow = m_recv_connectivity_krow[conn_k];
263 for (int k = krow + 1; k < kcol[conn_row + 1]; ++k) {
264 m_work[cols[k]] = k;
265 }
266 for (int k = kcol[conn_row]; k < kcol[conn_row] + local_row_size[conn_row]; ++k) {
267 m_work[cols[k]] = k;
268 }
269
270 std::map<int, int>& inv_ids = m_mpi_ext_inv_ids[ineighb];
271 ValueType aik = values[krow] / buffer[icount2]; // aik = aik/akk
272 //MatrixDataType aik = mpi_ext_values[krow] / buffer[icount2 ]; // aik = aik/akk
273 values[krow] = aik;
274 for (int k = 1; k < int_row_size; ++k) {
275 int uid = ibuffer[icount + k];
276 std::map<int, int>::iterator iter = inv_ids.find(uid);
277 if (iter != inv_ids.end()) {
278 int lid = iter->second;
279 int kj = m_work[lid];
280 if (kj != -1) {
281 values[kj] -= aik * buffer[icount2 + k]; // aij = aij - aik*akj
282 }
283 }
284 }
285 for (int k = 0; k < ext_row_size; ++k) {
286 int uid = ibuffer[icount + int_row_size + k];
287 int owner = distribution.owner(uid);
288 if (owner == my_rank) {
289 int lid = uid - my_domain_offset;
290 int kj = m_work[lid];
291 if (kj != -1) {
292 values[kj] -= aik * buffer[icount2 + int_row_size + k]; // aij = aij - aik*akj
293 }
294 }
295 else {
296 std::map<int, int>::iterator iter = inv_ids.find(uid);
297 if (iter != inv_ids.end()) {
298 int lid = iter->second;
299 int kj = m_work[lid];
300 if (kj != -1) {
301 values[kj] -= aik * buffer[icount2 + int_row_size + k]; // aij = aij - aik*akj
302 }
303 }
304 }
305 }
306
307 for (int k = krow + 1; k < kcol[conn_row + 1]; ++k) {
308 m_work[cols[k]] = -1;
309 }
310 for (int k = kcol[conn_row]; k < kcol[conn_row] + local_row_size[conn_row]; ++k) {
311 m_work[cols[k]] = -1;
312 }
313 }
314 icount += int_row_size + ext_row_size;
315 icount2 += int_row_size + ext_row_size;
316 }
317 }
318 }
319
320 private:
321 // clang-format off
322 MatrixType& m_matrix;
323 MatrixDistribution& m_distribution ;
324 std::vector< int >& m_work;
325 const Alien::SimpleCSRInternal::CommInfo& m_send_info;
326 const Alien::SimpleCSRInternal::CommInfo& m_recv_info;
327#ifdef USE_SYCL_USM
328 std::vector<ValueT*> m_send_lu_buffer;
329 std::vector<ValueT*> m_recv_lu_buffer;
330 std::vector<int*> m_send_lu_ibuffer;
331 std::vector<int*> m_recv_lu_ibuffer;
332#else
333 std::vector<std::vector<ValueType>> m_send_lu_buffer;
334 std::vector<std::vector<ValueType>> m_recv_lu_buffer;
335 std::vector<std::vector<int>> m_send_lu_ibuffer;
336 std::vector<std::vector<int>> m_recv_lu_ibuffer;
337#endif
338
339 UniqueArray< int > m_recv_connectivity_ids ;
340 UniqueArray< int > m_recv_connectivity_krow ;
341 UniqueArray< int > m_recv_connectivity_ids_ptr ;
342 UniqueArray< std::map<int, int> > m_mpi_ext_inv_ids ;
343
344 Arccore::MessagePassing::IMessagePassingMng* m_parallel_mng = nullptr;
345 Arccore::ITraceMng* m_trace = nullptr;
346 // clang-format on
347};
348
349} // namespace Alien::SimpleCSRInternal
MatrixDistribution.h.
VectorDistribution.h.
Computes a matrix distribution.
const VectorDistribution & rowDistribution() const
Get the row distribution.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
Definition BackEnd.h:17