40 using BaseClass::_toTrueType;
44 using BaseClass::m_extent;
50 _copyFrom(args.m_queue, args.m_indexes, _toTrueType(args.m_source), _toTrueType(args.m_destination));
55 _copyTo(args.m_queue, args.m_indexes, _toTrueType(args.m_source), _toTrueType(args.m_destination));
60 _fill(args.m_queue, args.m_indexes, _toTrueType(args.m_source), _toTrueType(args.m_destination));
65 _copyFrom(args.m_queue, args.m_indexes, args.m_multi_memory, _toTrueType(args.m_source_buffer));
70 _copyTo(args.m_queue, args.m_indexes, args.m_const_multi_memory, _toTrueType(args.m_destination_buffer));
75 _fill(args.m_queue, args.m_indexes, args.m_multi_memory, _toTrueType(args.m_source_buffer));
90 const Int64 sub_size = m_extent.v;
98 for (
Int32 z = 0; z < sub_size; ++z)
99 destination[
zindex + z] = source[
zci + z];
119 const Int32 sub_size = m_extent.v;
134 for (
Int32 z = 0, n = sub_size; z < n; ++z)
154 const Int32 sub_size = m_extent.v;
155 constexpr Int32 max_size = 24;
160 if (sub_size > max_size)
162 sizeof(DataType) * sub_size,
sizeof(DataType) * max_size);
164 for (
Int32 z = 0; z < sub_size; ++z)
166 for (
Int32 z = sub_size; z < max_size; ++z)
177 for (
Int32 z = 0; z < sub_size; ++z)
186 for (
Int32 z = 0; z < sub_size; ++z)
208 const Int32 sub_size = m_extent.v;
209 constexpr Int32 max_size = 24;
214 if (sub_size > max_size)
216 sizeof(DataType) * sub_size,
sizeof(DataType) * max_size);
218 for (
Int32 z = 0; z < sub_size; ++z)
220 for (
Int32 z = sub_size; z < max_size; ++z)
245 Int32 index0 = indexes[i * 2];
246 Int32 index1 = indexes[(i * 2) + 1];
247 Span<std::byte> orig_view_bytes = multi_views[index0];
248 auto* orig_view_data =
reinterpret_cast<DataType*
>(orig_view_bytes.data());
251 Span<DataType> orig_view = { orig_view_data, orig_view_bytes.size() / (
Int64)
sizeof(DataType) };
253 for (
Int32 z = 0, n = sub_size; z < n; ++z)
254 orig_view[zci + z] = local_source[z];
259 void _copyTo(RunQueue* queue, SmallSpan<const Int32> indexes, Span<const DataType> source,
260 Span<DataType> destination)
268 Int32 nb_index = indexes.size();
269 const Int64 sub_size = m_extent.v;
275 Int64 zindex = i * sub_size;
276 Int64 zci = indexes[i] * sub_size;
277 for (
Int32 z = 0; z < sub_size; ++z)
278 destination[zci + z] = source[zindex + z];
282 void _copyTo(RunQueue* queue, SmallSpan<const Int32> indexes, SmallSpan<
const Span<const std::byte>> multi_views,
283 Span<DataType> destination)
296 const Int32 nb_index = indexes.size() / 2;
299 const Int32 sub_size = m_extent.v;
305 Int32 index0 = indexes[i * 2];
306 Int32 index1 = indexes[(i * 2) + 1];
307 Span<const std::byte> orig_view_bytes = multi_views[index0];
308 auto* orig_view_data =
reinterpret_cast<const DataType*
>(orig_view_bytes.data());
311 Span<const DataType> orig_view = { orig_view_data, orig_view_bytes.size() / (
Int64)
sizeof(DataType) };
314 for (
Int32 z = 0, n = sub_size; z < n; ++z)
315 destination[z_index + z] = orig_view[zci + z];