40 using BaseClass::_toTrueType;
44 using BaseClass::m_extent;
50 _copyFrom(args.m_queue, args.m_indexes, _toTrueType(args.m_source), _toTrueType(args.m_destination));
55 _copyTo(args.m_queue, args.m_indexes, _toTrueType(args.m_source), _toTrueType(args.m_destination));
60 _fill(args.m_queue, args.m_indexes, _toTrueType(args.m_source), _toTrueType(args.m_destination));
65 _copyFrom(args.m_queue, args.m_indexes, args.m_multi_memory, _toTrueType(args.m_source_buffer));
70 _copyTo(args.m_queue, args.m_indexes, args.m_const_multi_memory, _toTrueType(args.m_destination_buffer));
75 _fill(args.m_queue, args.m_indexes, args.m_multi_memory, _toTrueType(args.m_source_buffer));
90 const Int64 sub_size = m_extent.v;
98 for (
Int32 z = 0; z < sub_size; ++z)
99 destination[
zindex + z] = source[
zci + z];
119 const Int32 sub_size = m_extent.v;
134 for (
Int32 z = 0, n = sub_size; z < n; ++z)
154 const Int32 sub_size = m_extent.v;
155 constexpr Int32 max_size = 24;
160 if (sub_size > max_size)
162 sizeof(DataType) * sub_size,
sizeof(DataType) * max_size);
164 for (
Int32 z = 0; z < sub_size; ++z)
166 for (
Int32 z = sub_size; z < max_size; ++z)
177 for (
Int32 z = 0; z < sub_size; ++z)
186 for (
Int32 z = 0; z < sub_size; ++z)
208 const Int32 sub_size = m_extent.v;
209 constexpr Int32 max_size = 24;
214 if (sub_size > max_size)
216 sizeof(DataType) * sub_size,
sizeof(DataType) * max_size);
218 for (
Int32 z = 0; z < sub_size; ++z)
220 for (
Int32 z = sub_size; z < max_size; ++z)
246 Int32 index0 = indexes[i * 2];
247 Int32 index1 = indexes[(i * 2) + 1];
248 Span<std::byte> orig_view_bytes = multi_views[index0];
249 auto* orig_view_data =
reinterpret_cast<DataType*
>(orig_view_bytes.data());
252 Span<DataType> orig_view = { orig_view_data, orig_view_bytes.size() / (
Int64)
sizeof(DataType) };
254 for (
Int32 z = 0, n = sub_size; z < n; ++z)
255 orig_view[zci + z] = local_source[z];
260 void _copyTo(
const RunQueue* queue, SmallSpan<const Int32> indexes, Span<const DataType> source,
261 Span<DataType> destination)
269 Int32 nb_index = indexes.size();
270 const Int64 sub_size = m_extent.v;
276 Int64 zindex = i * sub_size;
277 Int64 zci = indexes[i] * sub_size;
278 for (
Int32 z = 0; z < sub_size; ++z)
279 destination[zci + z] = source[zindex + z];
283 void _copyTo(
const RunQueue* queue, SmallSpan<const Int32> indexes, SmallSpan<
const Span<const std::byte>> multi_views,
284 Span<DataType> destination)
297 const Int32 nb_index = indexes.size() / 2;
300 const Int32 sub_size = m_extent.v;
306 Int32 index0 = indexes[i * 2];
307 Int32 index1 = indexes[(i * 2) + 1];
308 Span<const std::byte> orig_view_bytes = multi_views[index0];
309 auto* orig_view_data =
reinterpret_cast<const DataType*
>(orig_view_bytes.data());
312 Span<const DataType> orig_view = { orig_view_data, orig_view_bytes.size() / (
Int64)
sizeof(DataType) };
315 for (
Int32 z = 0, n = sub_size; z < n; ++z)
316 destination[z_index + z] = orig_view[zci + z];