48 typedef MatrixT MatrixType;
49 typedef typename MatrixType::ValueType ValueType;
51 typedef sycl::buffer<ValueType, 1> ValueBufferType ;
53 typedef sycl::buffer<int> IndexBufferType ;
54 typedef std::unique_ptr<IndexBufferType> IndexBufferPtrType ;
58 SYCLLUSendRecvOp(MatrixType& matrix,
60 std::vector<int>& work,
61 Arccore::ITraceMng* trace_mng =
nullptr)
63 , m_distribution(distribution)
65 , m_send_info(matrix.getDistStructInfo().m_send_info)
66 , m_recv_info(matrix.getDistStructInfo().m_recv_info)
67 , m_parallel_mng(matrix.getParallelMng())
70 initSendRecvConnectivity();
73 virtual ~SYCLLUSendRecvOp()
76 auto& queue = SYCLEnv::instance()->internal()->queue();
77 for(
auto& buf : m_recv_lu_buffer)
78 sycl::free(buf, queue);
79 for(
auto& buf : m_recv_lu_ibuffer)
80 sycl::free(buf, queue);
81 for(
auto& buf : m_send_lu_buffer)
82 sycl::free(buf, queue);
83 for(
auto& buf : m_send_lu_ibuffer)
84 sycl::free(buf, queue);
89 void initSendRecvConnectivity()
94 auto nrows = view.nrows() ;
95 auto kcol = view.kcol() ;
97 auto cols = view.cols() ;
99 auto& local_row_size = m_matrix.getDistStructInfo().m_local_row_size;
103 m_mpi_ext_inv_ids.resize(m_recv_info.m_first_upper_neighb);
104 for (
int ineighb = 0; ineighb < m_recv_info.m_first_upper_neighb; ++ineighb) {
105 std::map<int, int>& inv_ids = m_mpi_ext_inv_ids[ineighb];
106 for (
int i = m_recv_info.m_ids_offset[ineighb]; i < m_recv_info.m_ids_offset[ineighb]; ++i) {
107 inv_ids[m_recv_info.m_uids[i]] = i;
110 std::size_t recv_uids_size = m_recv_info.m_uids.size();
111 std::vector<int> conn_size(recv_uids_size);
112 std::fill(conn_size.begin(), conn_size.end(), 0);
113 for (
int irow = 0; irow < nrows; ++irow) {
114 for (
int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k) {
115 ++conn_size[cols[k] - nrows];
118 m_recv_connectivity_ids_ptr.resize(recv_uids_size + 1);
119 m_recv_connectivity_ids_ptr[0] = 0;
120 for (std::size_t i = 0; i < recv_uids_size; ++i)
121 m_recv_connectivity_ids_ptr[i + 1] = m_recv_connectivity_ids_ptr[i] + conn_size[i];
122 std::size_t total_conn_size = m_recv_connectivity_ids_ptr[recv_uids_size];
123 m_recv_connectivity_ids.resize(total_conn_size);
124 m_recv_connectivity_krow.resize(total_conn_size);
125 std::fill(conn_size.begin(), conn_size.end(), 0);
126 for (
int irow = 0; irow < nrows; ++irow) {
127 for (
int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k) {
129 int id = col - nrows;
130 m_recv_connectivity_ids[m_recv_connectivity_ids_ptr[id] + conn_size[id]] = irow;
131 m_recv_connectivity_krow[m_recv_connectivity_ids_ptr[id] + conn_size[id]] = k;
137 void sendUpperNeighbLUData(ValueType* values)
141 auto nrows = modifier.nrows() ;
143 auto kcol = modifier.kcol() ;
144 auto dcol = modifier.dcol() ;
145 auto cols = modifier.cols() ;
149 auto max_row_size = m_matrix.getProfile().getMaxRowSize();
150 auto& local_row_size = m_matrix.getDistStructInfo().m_local_row_size;
152 m_send_lu_ibuffer.resize(m_send_info.m_num_neighbours - m_send_info.m_first_upper_neighb);
153 m_send_lu_buffer.resize(m_send_info.m_num_neighbours - m_send_info.m_first_upper_neighb);
154 for (
int ineighb = m_send_info.m_first_upper_neighb; ineighb < m_send_info.m_num_neighbours; ++ineighb) {
155 int neighb = m_send_info.m_ranks[ineighb];
156 auto& ibuffer = m_send_lu_ibuffer[ineighb - m_send_info.m_first_upper_neighb];
157 auto& buffer = m_send_lu_buffer[ineighb - m_send_info.m_first_upper_neighb];
158 int nb_send_rows = m_send_info.m_ids_offset[ineighb + 1] - m_send_info.m_ids_offset[ineighb];
162 ibuffer = sycl::malloc_shared<int>(nb_send_rows * max_row_size, queue);
163 buffer = sycl::malloc_shared<ValueType>(nb_send_rows * max_row_size, queue);
166 buffer.reserve(nb_send_rows * max_row_size);
168 ibuffer.reserve(nb_send_rows * max_row_size);
170 for (
int i = m_send_info.m_ids_offset[ineighb]; i < m_send_info.m_ids_offset[ineighb + 1]; ++i) {
171 int irow = m_send_info.m_ids[i];
172 int lrow_size = local_row_size[irow];
173 int int_row_size = kcol[irow] + lrow_size - dcol[irow];
174 int ext_row_size = kcol[irow + 1] - kcol[irow] - lrow_size;
176 ibuffer[icount++] = int_row_size;
177 ibuffer[icount++] = ext_row_size;
179 ibuffer.push_back(int_row_size);
180 ibuffer.push_back(ext_row_size);
182 for (
int k = dcol[irow]; k < kcol[irow] + lrow_size; ++k) {
184 buffer[count++] = values[k];
185 ibuffer[icount++] = cols[k]);
187 buffer.push_back(values[k]);
188 ibuffer.push_back(cols[k]);
191 for (
int k = kcol[irow] + lrow_size; k < kcol[irow + 1]; ++k) {
193 buffer[count++] = values[k];
194 ibuffer[icount++] = m_recv_info.m_uids[cols[k] - nrows];
196 buffer.push_back(values[k]);
197 ibuffer.push_back(m_recv_info.m_uids[cols[k] - nrows]);
201 UniqueArray<int> counts(2);
205 Arccore::MessagePassing::mpSend(m_parallel_mng, counts, neighb);
206 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<int>(counts[0],ibuffer), neighb);
207 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer), neighb);
209 counts[0] = (int) ibuffer.size();
210 counts[1] = (int) buffer.size();
211 Arccore::MessagePassing::mpSend(m_parallel_mng, counts, neighb);
212 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<int>(counts[0],ibuffer.data()), neighb);
213 Arccore::MessagePassing::mpSend(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer.data()), neighb);
218 void recvLowerNeighbLUData(ValueType* values)
222 auto nrows = modifier.nrows() ;
223 auto nnz = modifier.nnz() ;
224 auto kcol = modifier.kcol() ;
226 auto cols = modifier.cols() ;
229 auto& local_row_size = m_matrix.getDistStructInfo().m_local_row_size;
232 int my_rank = m_parallel_mng->commRank();
233 int my_domain_offset = distribution.offset(my_rank);
235 m_recv_lu_ibuffer.resize(m_recv_info.m_first_upper_neighb);
236 m_recv_lu_buffer.resize(m_recv_info.m_first_upper_neighb);
237 for (
int ineighb = 0; ineighb < m_recv_info.m_first_upper_neighb; ++ineighb) {
238 int neighb = m_recv_info.m_ranks[ineighb];
239 UniqueArray<int> counts(2);
240 Arccore::MessagePassing::mpReceive(m_parallel_mng, counts, neighb);
241 auto& ibuffer = m_recv_lu_ibuffer[ineighb];
242 auto& buffer = m_recv_lu_buffer[ineighb];
244 ibuffer = sycl::malloc_shared<int>(counts[0], queue);
245 buffer = sycl::malloc_shared<int>(counts[1], queue);
246 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<int>(counts[0],ibuffer), neighb);
247 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer), neighb);
249 ibuffer.resize(counts[0]);
250 buffer.resize(counts[1]);
251 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<int>(counts[0],ibuffer.data()), neighb);
252 Arccore::MessagePassing::mpReceive(m_parallel_mng, ArrayView<ValueType>(counts[1],buffer.data()), neighb);
256 for (
int i = m_recv_info.m_ids_offset[ineighb]; i < m_recv_info.m_ids_offset[ineighb + 1]; ++i) {
257 auto irow = i - nrows;
258 int int_row_size = ibuffer[icount++];
259 int ext_row_size = ibuffer[icount++];
260 for (
int conn_k = m_recv_connectivity_ids_ptr[irow]; conn_k < m_recv_connectivity_ids_ptr[irow + 1]; ++conn_k) {
261 int conn_row = m_recv_connectivity_ids[conn_k];
262 int krow = m_recv_connectivity_krow[conn_k];
263 for (
int k = krow + 1; k < kcol[conn_row + 1]; ++k) {
266 for (
int k = kcol[conn_row]; k < kcol[conn_row] + local_row_size[conn_row]; ++k) {
270 std::map<int, int>& inv_ids = m_mpi_ext_inv_ids[ineighb];
271 ValueType aik = values[krow] / buffer[icount2];
274 for (
int k = 1; k < int_row_size; ++k) {
275 int uid = ibuffer[icount + k];
276 std::map<int, int>::iterator iter = inv_ids.find(uid);
277 if (iter != inv_ids.end()) {
278 int lid = iter->second;
279 int kj = m_work[lid];
281 values[kj] -= aik * buffer[icount2 + k];
285 for (
int k = 0; k < ext_row_size; ++k) {
286 int uid = ibuffer[icount + int_row_size + k];
287 int owner = distribution.
owner(uid);
288 if (owner == my_rank) {
289 int lid = uid - my_domain_offset;
290 int kj = m_work[lid];
292 values[kj] -= aik * buffer[icount2 + int_row_size + k];
296 std::map<int, int>::iterator iter = inv_ids.find(uid);
297 if (iter != inv_ids.end()) {
298 int lid = iter->second;
299 int kj = m_work[lid];
301 values[kj] -= aik * buffer[icount2 + int_row_size + k];
307 for (
int k = krow + 1; k < kcol[conn_row + 1]; ++k) {
308 m_work[cols[k]] = -1;
310 for (
int k = kcol[conn_row]; k < kcol[conn_row] + local_row_size[conn_row]; ++k) {
311 m_work[cols[k]] = -1;
314 icount += int_row_size + ext_row_size;
315 icount2 += int_row_size + ext_row_size;
322 MatrixType& m_matrix;
324 std::vector< int >& m_work;
328 std::vector<ValueT*> m_send_lu_buffer;
329 std::vector<ValueT*> m_recv_lu_buffer;
330 std::vector<int*> m_send_lu_ibuffer;
331 std::vector<int*> m_recv_lu_ibuffer;
333 std::vector<std::vector<ValueType>> m_send_lu_buffer;
334 std::vector<std::vector<ValueType>> m_recv_lu_buffer;
335 std::vector<std::vector<int>> m_send_lu_ibuffer;
336 std::vector<std::vector<int>> m_recv_lu_ibuffer;
339 UniqueArray< int > m_recv_connectivity_ids ;
340 UniqueArray< int > m_recv_connectivity_krow ;
341 UniqueArray< int > m_recv_connectivity_ids_ptr ;
342 UniqueArray< std::map<int, int> > m_mpi_ext_inv_ids ;
344 Arccore::MessagePassing::IMessagePassingMng* m_parallel_mng =
nullptr;
345 Arccore::ITraceMng* m_trace =
nullptr;