36 typedef ValueT ValueType;
37 typedef VectorInternal<ValueType> ThisType;
38 typedef sycl::buffer<ValueType, 1> ValueBufferType;
39 typedef std::unique_ptr<ValueBufferType> ValueBufferPtrType;
43 VectorInternal(ValueType
const* ptr, std::size_t size)
44 : m_values(ptr, sycl::range<1>(size))
46 m_values.set_final_data(
nullptr);
49 virtual ~VectorInternal() {}
51 ValueBufferType& values()
56 ValueBufferType& values()
const
61 ValueBufferType& ghostValues(Integer ghost_size)
const
63 if (m_ghost_values.get() ==
nullptr || ghost_size > m_ghost_size) {
64 m_ghost_size = ghost_size;
65 m_ghost_values.reset(
new ValueBufferType(m_ghost_size));
67 return *m_ghost_values;
70 void copyValuesToHost(std::size_t size, ValueT* ptr)
72 auto h_values = m_values.get_host_access();
73 for (std::size_t i = 0; i < size; ++i)
77 void copyValuesToDevice(std::size_t size, ValueT* ptr)
const
79 auto env = SYCLEnv::instance() ;
80 auto& queue = env->internal()->queue() ;
81 auto max_num_treads = env->maxNumThreads() ;
83 queue.submit( [&](sycl::handler& cgh)
85 auto access_x = m_values.template get_access<sycl::access::mode::read>(cgh);
86 std::size_t y_length = size ;
87 cgh.parallel_for<
class init_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
89 auto id = itemId.get_id(0);
90 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
97 void setValuesFromDevice(std::size_t size, ValueT
const* ptr)
const
99 auto env = SYCLEnv::instance() ;
100 auto& queue = env->internal()->queue() ;
101 auto max_num_treads = env->maxNumThreads() ;
103 queue.submit( [&](sycl::handler& cgh)
105 auto access_x = m_values.template get_access<sycl::access::mode::discard_write>(cgh);
106 std::size_t y_length = size ;
107 cgh.parallel_for<
class init_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
109 auto id = itemId.get_id(0);
110 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
111 access_x[i] = ptr[i];
125 void setValuesFromHost(std::size_t size, ValueT
const* ptr)
const
127 auto env = SYCLEnv::instance() ;
128 auto& queue = env->internal()->queue() ;
129 auto max_num_treads = env->maxNumThreads() ;
131 auto rhs = ValueBufferType(ptr,sycl::range<1>(size)) ;
133 queue.submit( [&](sycl::handler& cgh)
135 auto access_x = m_values.template get_access<sycl::access::mode::discard_write>(cgh);
136 auto access_rhs = rhs.template get_access<sycl::access::mode::read>(cgh);
137 std::size_t y_length = size ;
138 cgh.parallel_for<
class init_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
140 auto id = itemId.get_id(0);
141 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
142 access_x[i] = access_rhs[i];
156 void copy(ValueBufferType& src)
158 auto env = SYCLEnv::instance() ;
159 env->internal()->queue().submit([&](sycl::handler& cgh)
161 auto access_x = m_values.template get_access<sycl::access::mode::read_write>(cgh);
162 auto access_src = src.template get_access<sycl::access::mode::read>(cgh);
163 cgh.copy(access_src,access_x) ;
167 void pointWiseMult(ValueBufferType& y, ValueBufferType& z)
169 auto env = SYCLEnv::instance() ;
170 auto& queue = env->internal()->queue() ;
171 auto max_num_treads = env->maxNumThreads() ;
173 queue.submit( [&](sycl::handler& cgh)
175 auto access_x = m_values.template get_access<sycl::access::mode::read>(cgh);
176 auto access_y = y.template get_access<sycl::access::mode::read>(cgh);
177 auto access_z = z.template get_access<sycl::access::mode::discard_write>(cgh);
178 std::size_t x_length = m_values.size() ;
179 cgh.parallel_for<
class init_vector_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
181 auto id = itemId.get_id(0);
182 for (
auto i =
id; i < x_length; i += itemId.get_range()[0])
183 access_z[i] = access_x[i]*access_y[i];
188 void blockMult(std::size_t nrows,
193 auto env = SYCLEnv::instance() ;
194 auto& queue = env->internal()->queue() ;
195 auto max_num_treads = env->maxNumThreads() ;
198 assert(m_values.size()>=nrows*NxN) ;
199 assert(y.size()>=nrows*N) ;
200 assert(z.size()>=nrows*N) ;
201 queue.submit( [&](sycl::handler& cgh)
203 auto access_x = m_values.template get_access<sycl::access::mode::read>(cgh);
204 auto access_y = y.template get_access<sycl::access::mode::read>(cgh);
205 auto access_z = z.template get_access<sycl::access::mode::discard_write>(cgh);
206 cgh.parallel_for<
class vector_block_mult>(
207 sycl::range<1>{max_num_treads},
208 [=] (sycl::item<1> itemId)
210 auto id = itemId.get_id(0);
211 for (
auto irow =
id; irow < nrows; irow += itemId.get_range()[0])
213 for(
int ieq=0;ieq<N;++ieq)
215 ValueType value = 0. ;
218 value += access_x[irow*NxN+ieq*N+j]*access_y[irow*N+j] ;
220 access_z[irow*N+ieq] = value ;
225#ifdef PRINT_DEBUG_INFO
227 sycl::host_accessor<ValueT, 1, sycl::access::mode::read> diag_acc(m_values);
228 sycl::host_accessor<ValueT, 1, sycl::access::mode::read> z_acc(z);
230 for (std::size_t irow = 0; irow < nrows; ++irow)
232 std::cout<<
"INV DIAG["<<irow<<
"]:\n";
236 std::cout<<diag_acc[irow*NxN+i*N+j]<<
" ";
237 std::cout<<std::endl;
239 std::cout<<
"Y["<<irow<<
"]=\n";
241 std::cout<<z_acc[irow*N+i]<<std::endl;
251 mutable ValueBufferType m_values;
253 mutable Integer m_ghost_size = 0 ;
254 mutable ValueBufferPtrType m_ghost_values;