Arcane  4.1.12.0
Developer documentation
Loading...
Searching...
No Matches
DataSynchronizeBuffer.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* DataSynchronizeBuffer.cc (C) 2000-2025 */
9/* */
10/* Implementation of a generic buffer for data synchronization. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/impl/internal/DataSynchronizeBuffer.h"
15
16#include "arcane/utils/FatalErrorException.h"
17#include "arcane/utils/internal/MemoryBuffer.h"
18
19#include "arcane/impl/DataSynchronizeInfo.h"
20#include "arcane/impl/internal/IBufferCopier.h"
21
22#include "arcane/accelerator/core/Runner.h"
23#include "arcane/utils/FixedArray.h"
24#include "arcane/utils/ITraceMng.h"
25
26#include <cstddef>
27
28/*---------------------------------------------------------------------------*/
29/*---------------------------------------------------------------------------*/
30
31namespace Arcane
32{
33
34/*---------------------------------------------------------------------------*/
35/*---------------------------------------------------------------------------*/
36
37namespace
38{
40 constexpr Int64 ALIGNEMENT_SIZE = 64;
41 Int64 _applyPadding(Int64 original_size)
42 {
43 Int64 modulo = original_size % ALIGNEMENT_SIZE;
44 Int64 new_size = original_size;
45 if (modulo != 0)
46 new_size += (ALIGNEMENT_SIZE - modulo);
47 if ((new_size % ALIGNEMENT_SIZE) != 0)
48 ARCANE_FATAL("Bad padding");
49 return new_size;
50 }
51 void _checkAlignment(const void* address)
52 {
53 auto a = reinterpret_cast<intptr_t>(address);
54 intptr_t max_align = alignof(std::max_align_t);
55 intptr_t modulo = a % max_align;
56 if (modulo != 0)
57 ARCANE_FATAL("Address '{0}' is not aligned (align={1}, modulo={2})", address, max_align, modulo);
58 }
59} // namespace
60
61/*---------------------------------------------------------------------------*/
62/*---------------------------------------------------------------------------*/
63
66{
67 Int32 nb_rank = nbRank();
68 for (Int32 i = 0; i < nb_rank; ++i)
70 barrier();
71}
72
73/*---------------------------------------------------------------------------*/
74/*---------------------------------------------------------------------------*/
75
78{
79 Int32 nb_rank = nbRank();
80 for (Int32 i = 0; i < nb_rank; ++i)
82 barrier();
83}
84
85/*---------------------------------------------------------------------------*/
86/*---------------------------------------------------------------------------*/
87
88/*---------------------------------------------------------------------------*/
89/*---------------------------------------------------------------------------*/
90
92barrier()
93{
94 m_queue.barrier();
95}
96
97/*---------------------------------------------------------------------------*/
98/*---------------------------------------------------------------------------*/
99
103{
104 if (v.datatypeSize() != 1)
105 ARCANE_FATAL("Global buffer has to use a datatype of size 1 (current={0})", v.datatypeSize());
106 m_memory_view = v;
107}
108
109/*---------------------------------------------------------------------------*/
110/*---------------------------------------------------------------------------*/
111
113displacement(Int32 rank_index) const
114{
115 return m_displacements[rank_index][0];
116}
117
118/*---------------------------------------------------------------------------*/
119/*---------------------------------------------------------------------------*/
120
122localBufferSize(Int32 rank_index) const
123{
124 return m_local_buffer_size[rank_index];
125}
126
127/*---------------------------------------------------------------------------*/
128/*---------------------------------------------------------------------------*/
129
131localBuffer(Int32 rank_index) const
132{
133 std::byte* data = m_memory_view.data();
134 data += m_displacements[rank_index][0];
135 const Int64 nb_byte = m_local_buffer_size[rank_index];
136 return makeMutableMemoryView(data, 1, nb_byte);
137}
138
139/*---------------------------------------------------------------------------*/
140/*---------------------------------------------------------------------------*/
141
143dataLocalBuffer(Int32 rank_index, Int32 data_index) const
144{
145 std::byte* data = m_memory_view.data();
146 data += m_displacements[rank_index][data_index];
147 const Int32 nb_item = m_buffer_info->nbItem(rank_index);
148 return makeMutableMemoryView(data, m_datatype_sizes[data_index], nb_item);
149}
150
151/*---------------------------------------------------------------------------*/
152/*---------------------------------------------------------------------------*/
153
155localIds(Int32 index) const
156{
157 return m_buffer_info->localIds(index);
158}
159
160/*---------------------------------------------------------------------------*/
161/*---------------------------------------------------------------------------*/
162
172initialize(ConstArrayView<Int32> datatype_sizes, const DataSynchronizeBufferInfoList* buffer_info)
173{
174 ARCANE_CHECK_POINTER(buffer_info);
175 m_buffer_info = buffer_info;
176 m_datatype_sizes = datatype_sizes;
177 const Int32 nb_data = datatype_sizes.size();
178 const Int32 nb_rank = buffer_info->nbRank();
179 m_displacements.resize(nb_rank, nb_data);
180 m_local_buffer_size.resize(nb_rank);
181
182 // Calculates the offset for each data item from each rank
183 // ensuring that the offset is a multiple of ALIGNMENT_SIZE
184 Int64 data_offset = 0;
185 m_total_size = 0;
186 for (Int32 i = 0; i < nb_rank; ++i) {
187 const Int32 nb_item = buffer_info->nbItem(i);
188 Int64 local_buf_nb_byte = 0;
189 for (Int32 d = 0; d < nb_data; ++d) {
190 // Size needed for data \a d for rank \a i
191 // Padding is applied to this size to achieve
192 // a specific alignment.
193 const Int64 nb_byte = _applyPadding(nb_item * datatype_sizes[d]);
194 m_displacements[i][d] = data_offset;
195 local_buf_nb_byte += nb_byte;
196 data_offset += nb_byte;
197 }
198 m_local_buffer_size[i] = local_buf_nb_byte;
199 m_total_size += local_buf_nb_byte;
200 }
201}
202
203/*---------------------------------------------------------------------------*/
204/*---------------------------------------------------------------------------*/
205
206/*---------------------------------------------------------------------------*/
207/*---------------------------------------------------------------------------*/
208
209DataSynchronizeBufferBase::
210DataSynchronizeBufferBase(DataSynchronizeInfo* sync_info, Ref<IBufferCopier> copier)
211: m_sync_info(sync_info)
212, m_buffer_copier(copier)
213{
214}
215
216/*---------------------------------------------------------------------------*/
217/*---------------------------------------------------------------------------*/
218
220targetRank(Int32 index) const
221{
222 return m_sync_info->targetRank(index);
223}
224
225/*---------------------------------------------------------------------------*/
226/*---------------------------------------------------------------------------*/
227
229barrier()
230{
231 m_buffer_copier->barrier();
232}
233
234/*---------------------------------------------------------------------------*/
235/*---------------------------------------------------------------------------*/
236
242_compute(ConstArrayView<Int32> datatype_sizes)
243{
244 m_nb_rank = m_sync_info->size();
245
246 m_ghost_buffer_info.initialize(datatype_sizes, &m_sync_info->receiveInfo());
247 m_share_buffer_info.initialize(datatype_sizes, &m_sync_info->sendInfo());
248 m_compare_sync_buffer_info.initialize(datatype_sizes, &m_sync_info->receiveInfo());
249
251}
252
253/*---------------------------------------------------------------------------*/
254/*---------------------------------------------------------------------------*/
255
268{
269 const Int64 total_ghost_buffer = m_ghost_buffer_info.totalSize();
270 const Int64 total_share_buffer = m_share_buffer_info.totalSize();
271 Int64 total_compare_buffer = 0;
272 if (m_is_compare_sync_values)
273 total_compare_buffer = m_compare_sync_buffer_info.totalSize();
274
275 Int64 total_size = total_ghost_buffer + total_share_buffer + total_compare_buffer;
276 m_memory->resize(total_size);
277
278 Int64 share_offset = total_ghost_buffer;
279 Int64 check_sync_offset = share_offset + total_share_buffer;
280
281 Span<std::byte> buffer_span = m_memory->bytes();
282 auto s1 = buffer_span.subspan(0, share_offset);
283 m_ghost_buffer_info.setGlobalBuffer(makeMutableMemoryView(s1.data(), 1, total_ghost_buffer));
284 auto s2 = buffer_span.subspan(share_offset, total_share_buffer);
285 m_share_buffer_info.setGlobalBuffer(makeMutableMemoryView(s2.data(), 1, total_share_buffer));
286 if (m_is_compare_sync_values) {
287 auto s3 = buffer_span.subspan(check_sync_offset, total_ghost_buffer);
288 m_compare_sync_buffer_info.setGlobalBuffer(makeMutableMemoryView(s3.data(), 1, total_ghost_buffer));
289 }
290}
291
292/*---------------------------------------------------------------------------*/
293/*---------------------------------------------------------------------------*/
294
295/*---------------------------------------------------------------------------*/
296/*---------------------------------------------------------------------------*/
297
300{
301 m_ghost_buffer_info.checkValid();
302
303 MutableMemoryView var_values = dataView();
304 ConstArrayView<Int32> indexes = m_ghost_buffer_info.localIds(index);
305 ConstMemoryView local_buffer = m_ghost_buffer_info.dataLocalBuffer(index, 0);
306
307 m_buffer_copier->copyFromBufferAsync(indexes, local_buffer, var_values);
308}
309
310/*---------------------------------------------------------------------------*/
311/*---------------------------------------------------------------------------*/
312
314copySendAsync(Int32 index)
315{
316 m_share_buffer_info.checkValid();
317
318 ConstMemoryView var_values = dataView();
319 ConstArrayView<Int32> indexes = m_share_buffer_info.localIds(index);
320 MutableMemoryView local_buffer = m_share_buffer_info.dataLocalBuffer(index, 0);
321 m_buffer_copier->copyToBufferAsync(indexes, local_buffer, var_values);
322}
323
324/*---------------------------------------------------------------------------*/
325/*---------------------------------------------------------------------------*/
326
328prepareSynchronize(bool is_compare_sync)
329{
330 m_is_compare_sync_values = is_compare_sync;
331
333
334 if (is_compare_sync) {
335 // Recopy the current values of the ghost cells into the verification buffer.
336 MutableMemoryView var_values = dataView();
337 Int32 nb_rank = nbRank();
338 for (Int32 i = 0; i < nb_rank; ++i) {
340 MutableMemoryView local_buffer = m_compare_sync_buffer_info.dataLocalBuffer(i, 0);
341 m_buffer_copier->copyToBufferAsync(indexes, local_buffer, var_values);
342 }
343 // Normally no need to perform a barrier, because there will be the
344 // sends on the same queue and then a barrier.
345 }
346}
347
348/*---------------------------------------------------------------------------*/
349/*---------------------------------------------------------------------------*/
350
361{
362 if (!m_is_compare_sync_values)
363 return {};
364 ConstMemoryView reference_buffer = m_compare_sync_buffer_info.globalBuffer();
365 ConstMemoryView receive_buffer = m_ghost_buffer_info.globalBuffer();
366 Span<const std::byte> reference_bytes = reference_buffer.bytes();
367 Span<const std::byte> receive_bytes = receive_buffer.bytes();
368 Int64 reference_size = reference_bytes.size();
369 Int64 receive_size = receive_bytes.size();
370 if (reference_size != receive_size)
371 ARCANE_FATAL("Incoherent buffer size ref={0} receive={1}", reference_size, receive_size);
372 // TODO: handle the case where memory is on the device
373
375 bool is_same = std::memcmp(reference_bytes.data(), receive_bytes.data(), reference_size) == 0;
377 return result;
378}
379
380/*---------------------------------------------------------------------------*/
381/*---------------------------------------------------------------------------*/
382
383/*---------------------------------------------------------------------------*/
384/*---------------------------------------------------------------------------*/
385
390prepareSynchronize([[maybe_unused]] bool is_compare_sync)
391{
393}
394
395/*---------------------------------------------------------------------------*/
396/*---------------------------------------------------------------------------*/
397
399copyReceiveAsync(Int32 rank_index)
400{
401 IBufferCopier* copier = m_buffer_copier.get();
402 m_ghost_buffer_info.checkValid();
403
404 ConstArrayView<Int32> local_ids = m_ghost_buffer_info.localIds(rank_index);
405 Int32 data_index = 0;
406 for (MutableMemoryView var_values : m_data_views) {
407 ConstMemoryView local_buffer = m_ghost_buffer_info.dataLocalBuffer(rank_index, data_index);
408 _checkAlignment(local_buffer.data());
409 if (!local_buffer.bytes().empty())
410 copier->copyFromBufferAsync(local_ids, local_buffer, var_values);
411 ++data_index;
412 }
413}
414
415/*---------------------------------------------------------------------------*/
416/*---------------------------------------------------------------------------*/
417
419copySendAsync(Int32 rank_index)
420{
421 IBufferCopier* copier = m_buffer_copier.get();
422 m_ghost_buffer_info.checkValid();
423
424 ConstArrayView<Int32> local_ids = m_share_buffer_info.localIds(rank_index);
425 Int32 data_index = 0;
426 for (ConstMemoryView var_values : m_data_views) {
427 MutableMemoryView local_buffer = m_share_buffer_info.dataLocalBuffer(rank_index, data_index);
428 _checkAlignment(local_buffer.data());
429 if (!local_buffer.bytes().empty())
430 copier->copyToBufferAsync(local_ids, local_buffer, var_values);
431 ++data_index;
432 }
433}
434
435/*---------------------------------------------------------------------------*/
436/*---------------------------------------------------------------------------*/
437
438} // namespace Arcane
439
440/*---------------------------------------------------------------------------*/
441/*---------------------------------------------------------------------------*/
#define ARCANE_CHECK_POINTER(ptr)
Macro returning the pointer ptr if it is not null or throwing an exception if it is null.
#define ARCANE_FATAL(...)
Macro throwing a FatalErrorException.
Constant view of an array of type T.
constexpr Integer size() const noexcept
Number of elements in the array.
Constant view on a contiguous memory region containing fixed-size elements.
constexpr SpanType bytes() const
View in byte form.
constexpr const std::byte * data() const
Pointer to the memory region.
Int64 localBufferSize(Int32 rank_index) const
Size (in bytes) of the local buffer for rank rank_index.
Int64 displacement(Int32 rank_index) const
Displacement in globalBuffer() for the index-th rank.
ConstArrayView< Int32 > localIds(Int32 index) const
Local IDs of entities for rank index.
Int64 m_total_size
Total size (in bytes) of the buffer.
SmallArray< Int64 > m_local_buffer_size
Size (in bytes) of each local buffer.
MutableMemoryView m_memory_view
View onto the memory area of the buffer.
MutableMemoryView localBuffer(Int32 rank_index) const
Buffer for the index-th rank.
ConstArrayView< Int32 > m_datatype_sizes
Size (in bytes) of the type of each data item.
UniqueArray2< Int64 > m_displacements
Offset (in bytes) in globalBuffer() for each data item.
MutableMemoryView dataLocalBuffer(Int32 rank_index, Int32 data_index) const
Buffer for the index-th rank and the data_index-th data item.
void setGlobalBuffer(MutableMemoryView v)
Positions the global buffer.
void initialize(ConstArrayView< Int32 > datatype_sizes, const DataSynchronizeBufferInfoList *buffer_info)
Initializes the buffer information.
void barrier() final
Waits until the copies (copySendAsync() and copyReceiveAsync()) are finished.
Int32 targetRank(Int32 index) const final
Target rank of the index-th rank.
void _allocateBuffers()
Calculates and allocates the buffers necessary for sends and receives for 1D variable synchronization...
BufferInfo m_compare_sync_buffer_info
Buffer for testing if synchronization modified the values of ghost cells.
Int32 nbRank() const final
Number of ranks.
BufferInfo m_share_buffer_info
Buffer for all data of shared entities used for sending.
void _compute(ConstArrayView< Int32 > datatype_sizes)
Computes the information for the synchronization.
BufferInfo m_ghost_buffer_info
Buffer for all data of ghost entities used for reception.
Ref< MemoryBuffer > m_memory
Buffer containing the concatenated data for sending and receiving.
Information for sending (share) or receiving (ghost) messages.
Int32 nbItem(Int32 index) const
Number of entities for rank index.
Information necessary to synchronize entities across a group.
Information about the result of a synchronization.
void barrier() override
Blocks until the copies are finished.
Interface for copying elements between two regions with indexing.
virtual void barrier()=0
Waits until the copies (copySendAsync() and copyReceiveAsync()) are finished.
virtual void copyAllSend()
Copies all data into the send buffer.
virtual void copyAllReceive()
Copies all data from the receive buffer.
virtual void copyReceiveAsync(Int32 index)=0
Copies into the data from the receive buffer of the index-th rank.
virtual void copySendAsync(Int32 index)=0
Copies the data of the index-th rank into the send buffer.
virtual Int32 nbRank() const =0
Number of ranks.
SmallArray< Int32 > m_datatype_sizes
Array containing the sizes of the data types.
void prepareSynchronize(bool is_compare_sync) override
void copyReceiveAsync(Int32 rank_index) final
Copies into the data from the receive buffer of the index-th rank.
SmallArray< MutableMemoryView > m_data_views
View onto the data variables.
void copySendAsync(Int32 rank_index) final
Copies the data of the index-th rank into the send buffer.
Mutable view on a contiguous memory region containing fixed-size elements.
constexpr Int32 datatypeSize() const
Size of the associated data type (1 by default).
constexpr std::byte * data() const
Pointer to the memory region.
constexpr SpanType bytes() const
View in byte form.
Reference to an instance.
DataSynchronizeResult finalizeSynchronize()
Finalizes the synchronization.
void prepareSynchronize(bool is_compare_sync) override
Prepares the synchronization.
FixedArray< Int32, 1 > m_datatype_sizes
Array containing the sizes of the data types.
void copyReceiveAsync(Int32 index) final
Copies into the data from the receive buffer of the index-th rank.
void copySendAsync(Int32 index) final
Copies the data of the index-th rank into the send buffer.
MutableMemoryView dataView()
Memory area containing the values of the data to be synchronized.
constexpr __host__ __device__ pointer data() const noexcept
Pointer to the start of the view.
Definition Span.h:539
constexpr __host__ __device__ bool empty() const noexcept
Returns true if the array is empty (zero dimension).
Definition Span.h:492
constexpr __host__ __device__ SizeType size() const noexcept
Returns the size of the array.
Definition Span.h:327
View of an array of elements of type T.
Definition Span.h:635
constexpr __host__ __device__ Span< T, DynExtent > subspan(Int64 abegin, Int64 asize) const
Sub-view starting from element abegin and containing asize elements.
Definition Span.h:724
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
MutableMemoryView makeMutableMemoryView(void *ptr, Int32 datatype_size, Int64 nb_element)
Creates a mutable memory view.
Definition MemoryView.cc:26
std::int64_t Int64
Signed integer type of 64 bits.
@ Different
Different values before and after synchronization.
@ Same
Same values before and after synchronization.
std::int32_t Int32
Signed integer type of 32 bits.