10#include "SYCLVector.h"
11#include "SYCLVectorInternal.h"
16using namespace Arccore;
22 template <
typename ValueT>
28 template <
typename ValueT>
34 template <
typename ValueT>
35 SYCLVector<ValueT>::~SYCLVector()
38 template <
typename ValueT>
39 void SYCLVector<ValueT>::allocate()
42 auto block_size = blockSize() ;
43 m_h_values.resize(m_local_size*block_size);
44 m_internal.reset(
new VectorInternal(m_h_values.data(), m_local_size*block_size));
47 template <
typename ValueT>
48 void SYCLVector<ValueT>::resize(Integer alloc_size)
const
51 m_h_values.resize(alloc_size);
52 m_internal.reset(
new VectorInternal(m_h_values.data(), alloc_size));
55 template <
typename ValueT>
61 std::vector<ValueType>().swap(m_h_values);
64 template <
typename ValueT>
65 void SYCLVector<ValueT>::setValuesFromHost()
68 m_internal.reset(
new VectorInternal(m_h_values.data(), m_local_size*blockSize()));
71 template <
typename ValueT>
72 void SYCLVector<ValueT>::setValues(std::size_t size, ValueType
const* ptr)
75 m_h_values.resize(m_local_size*blockSize());
76 std::copy(ptr, ptr + size, m_h_values.begin());
77 m_internal.reset(
new VectorInternal(m_h_values.data(), m_local_size*blockSize()));
80 template <
typename ValueT>
81 void SYCLVector<ValueT>::setValuesFromDevice(std::size_t size, ValueType
const* ptr)
83 assert(m_internal.get());
84 m_internal->setValuesFromDevice(size,ptr);
87 template <
typename ValueT>
88 void SYCLVector<ValueT>::setValuesFromHost(std::size_t size, ValueType
const* ptr)
90 assert(m_internal.get());
91 m_internal->setValuesFromHost(size,ptr);
95 template <
typename ValueT>
96 void SYCLVector<ValueT>::copyValuesTo(std::size_t size, ValueType* ptr)
const
99 m_internal->copyValuesToHost(size, ptr);
102 template <
typename ValueT>
104 SYCLVector<ValueT>::copyValuesToDevice(std::size_t size, ValueT* ptr)
const
106 if (m_internal.get())
107 m_internal->copyValuesToDevice(size, ptr);
112 template <
typename ValueT>
113 void SYCLVector<ValueT>::initDevicePointers(
int** rows, ValueType** values)
const
115 if(m_internal.get()==
nullptr)
118 auto env = SYCLEnv::instance() ;
119 auto& queue = env->internal()->queue() ;
120 auto max_num_treads = env->maxNumThreads() ;
122 auto alloc_size = m_local_size*blockSize() ;
123 auto values_ptr = malloc_device<ValueT>(alloc_size, queue);
124 auto rows_ptr = malloc_device<IndexType>(alloc_size, queue);
126 queue.submit( [&](sycl::handler& cgh)
128 auto access_x = m_internal->m_values.template get_access<sycl::access::mode::read>(cgh);
129 std::size_t y_length = alloc_size ;
130 cgh.parallel_for<
class init_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
132 auto id = itemId.get_id(0);
133 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
135 values_ptr[i] = access_x[i];
136 rows_ptr[i] = IndexType(i) ;
141 *values = values_ptr;
145 template <
typename ValueT>
146 void SYCLVector<ValueT>::freeDevicePointers(
int* rows, ValueType* values)
148 auto env = SYCLEnv::instance() ;
149 auto& queue = env->internal()->queue() ;
150 sycl::free(values,queue) ;
151 sycl::free(rows,queue) ;
154 template <
typename ValueT>
155 void SYCLVector<ValueT>::freeDevicePointers(ValueType* values)
157 auto env = SYCLEnv::instance() ;
158 auto& queue = env->internal()->queue() ;
159 sycl::free(values,queue) ;
162 template <
typename ValueT>
163 void SYCLVector<ValueT>::allocateDevicePointers(std::size_t alloc_size,
168 auto env = SYCLEnv::instance() ;
169 auto& queue = env->internal()->queue() ;
170 auto max_num_treads = env->maxNumThreads() ;
172 auto values_ptr = malloc_device<ValueT>(alloc_size, queue);
173 auto rows_ptr = malloc_device<IndexType>(alloc_size, queue);
174 queue.submit( [&](sycl::handler& cgh)
176 std::size_t y_length = alloc_size ;
177 cgh.parallel_for<
class init2_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
179 auto id = itemId.get_id(0);
180 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
183 rows_ptr[i] = IndexType(i) ;
189 *values = values_ptr;
194 template <
typename ValueT>
195 void SYCLVector<ValueT>::allocateDevicePointers(std::size_t alloc_size,
199 auto env = SYCLEnv::instance() ;
200 auto& queue = env->internal()->queue() ;
201 auto max_num_treads = env->maxNumThreads() ;
203 auto values_ptr = malloc_device<ValueT>(alloc_size, queue);
204 queue.submit( [&](sycl::handler& cgh)
206 std::size_t y_length = alloc_size ;
207 cgh.parallel_for<
class init2_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
209 auto id = itemId.get_id(0);
210 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
218 *values = values_ptr;
222 template <
typename ValueT>
223 void SYCLVector<ValueT>::initDevicePointers(std::size_t alloc_size,
224 ValueType
const* host_values,
229 auto env = SYCLEnv::instance() ;
230 auto& queue = env->internal()->queue() ;
231 auto max_num_treads = env->maxNumThreads() ;
233 auto values_ptr = malloc_device<ValueT>(alloc_size, queue);
234 auto rows_ptr = malloc_device<IndexType>(alloc_size, queue);
235 sycl::buffer<ValueT, 1> values_buf(host_values, sycl::range<1>(alloc_size)) ;
236 queue.submit( [&](sycl::handler& cgh)
238 auto access_x = values_buf.template get_access<sycl::access::mode::read>(cgh);
239 std::size_t y_length = alloc_size ;
240 cgh.parallel_for<
class init3_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
242 auto id = itemId.get_id(0);
243 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
245 values_ptr[i] = access_x[i];
246 rows_ptr[i] = IndexType(i) ;
252 *values = values_ptr;
257 template <
typename ValueT>
258 void SYCLVector<ValueT>::copyDeviceToHost(std::size_t alloc_size,
259 ValueType
const* device_values,
260 ValueType* host_values)
263 auto env = SYCLEnv::instance() ;
264 auto& queue = env->internal()->queue() ;
265 auto max_num_treads = env->maxNumThreads() ;
267 sycl::buffer<ValueT, 1> values_buf(host_values, sycl::range<1>(alloc_size)) ;
268 queue.submit( [&](sycl::handler& cgh)
270 auto access_x = sycl::accessor { values_buf, cgh, sycl::write_only, sycl::property::no_init{}};
272 std::size_t y_length = alloc_size ;
273 cgh.parallel_for<
class copy_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
275 auto id = itemId.get_id(0);
276 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
278 access_x[i] = device_values[i];
286 template <
typename ValueT>
289 auto block_size = blockSize() ;
290 auto block_size_y = y.blockSize() ;
291 auto block_size_z = z.blockSize() ;
292 assert(block_size_y==block_size_z) ;
294 m_internal->pointWiseMult(y.m_internal->m_values,
295 z.m_internal->m_values) ;
298 assert(block_size==block_size_z*block_size_z) ;
299 m_internal->blockMult(m_local_size,
301 y.m_internal->m_values,
302 z.m_internal->m_values) ;
IVectorImpl(const MultiVectorImpl *multi_impl, BackEndId backend="")
Constructor.
SYCLVector()
Constructeur sans association un MultiImpl.
void clear()
Wipe out internal data.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --