9#include <alien/kernels/sycl/SYCLPrecomp.h>
12#include <alien/kernels/sycl/data/HCSRMatrix.h>
13#include <alien/kernels/sycl/data/HCSRMatrixInternal.h>
17template <
typename ValueT>
24 ValueT** values)
const
26 auto& hypre_profile = m_internal->getHypreProfile(m_local_offset) ;
27 auto env = SYCLEnv::instance() ;
28 auto max_num_treads = env->maxNumThreads() ;
29 auto& queue = env->internal()->queue() ;
30 auto ncols_ptr = malloc_device<IndexType>(nrows, queue);
31 auto rows_ptr = malloc_device<IndexType>(nrows, queue);
32 auto cols_ptr = malloc_device<IndexType>(nnz, queue);
33 auto values_ptr = malloc_device<ValueT>(nnz, queue);
35 *values = values_ptr ;
41template <
typename ValueT>
45 auto& hypre_profile = m_internal->getHypreProfile(m_local_offset) ;
46 auto env = SYCLEnv::instance() ;
47 auto max_num_treads = env->maxNumThreads() ;
48 auto nnz = m_profile->getNnz() ;
49 auto& queue = env->internal()->queue() ;
50 auto ncols_ptr = malloc_device<IndexType>(m_local_size, queue);
51 auto rows_ptr = malloc_device<IndexType>(m_local_size, queue);
52 auto cols_ptr = malloc_device<IndexType>(nnz, queue);
53 auto values_ptr = malloc_device<ValueT>(nnz, queue);
55 queue.submit( [&](sycl::handler& cgh)
57 auto access_x = m_internal->m_values.template get_access<sycl::access::mode::read>(cgh);
58 auto access_cols = m_internal->m_cols.template get_access<sycl::access::mode::read>(cgh);
60 cgh.parallel_for<
class init_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
62 auto id = itemId.get_id(0);
63 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
65 values_ptr[i] = access_x[i];
66 cols_ptr[i] = access_cols[i];
73 queue.submit( [&](sycl::handler& cgh)
75 auto access_ncols = hypre_profile.m_ncols.template get_access<sycl::access::mode::read>(cgh);
76 auto access_rows = hypre_profile.m_rows.template get_access<sycl::access::mode::read>(cgh);
77 auto y_length = m_local_size ;
78 cgh.parallel_for<
class init_ptr2>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
80 auto id = itemId.get_id(0);
81 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
83 ncols_ptr[i] = access_ncols[i];
84 rows_ptr[i] = access_rows[i];
90 *values = values_ptr ;
96template <
typename ValueT>
103 ValueT* values)
const
105 auto& hypre_profile = m_internal->getHypreProfile(m_local_offset) ;
106 auto env = SYCLEnv::instance() ;
107 auto max_num_treads = env->maxNumThreads() ;
108 auto& queue = env->internal()->queue() ;
111 queue.submit( [&](sycl::handler& cgh)
113 auto access_x = m_internal->m_values.template get_access<sycl::access::mode::read>(cgh);
114 auto access_cols = m_internal->m_cols.template get_access<sycl::access::mode::read>(cgh);
115 auto y_length = nnz ;
116 cgh.parallel_for<
class init_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
118 auto id = itemId.get_id(0);
119 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
121 values[i] = access_x[i];
122 cols[i] = access_cols[i];
129 queue.submit( [&](sycl::handler& cgh)
131 auto access_ncols = hypre_profile.m_ncols.template get_access<sycl::access::mode::read>(cgh);
132 auto access_rows = hypre_profile.m_rows.template get_access<sycl::access::mode::read>(cgh);
133 auto y_length = nrows ;
134 cgh.parallel_for<
class init_ptr2>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
136 auto id = itemId.get_id(0);
137 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
139 ncols[i] = access_ncols[i];
140 rows[i] = access_rows[i];
150template <
typename ValueT>
151HCSRMatrix<ValueT>::HCSRView
152HCSRMatrix<ValueT>::hcsrView(BackEnd::Memory::eType memory,
int nrows,
int nnz)
const
154 return HCSRView(
this,memory, nrows, nnz) ;
159template <
typename ValueT>
164 const int* h_ghost_uids_ptr = nullptr ;
166 h_ghost_uids_ptr = m_matrix_dist_info.m_recv_info.m_uids.data() ;
169 cout() <<
"initCOODevicePointers "<<m_local_offset<<
" "<<m_ghost_size<<
" "<< h_ghost_uids_ptr;
171 auto& coo_profile = m_internal->getCOOProfile(m_local_offset,m_ghost_size,h_ghost_uids_ptr) ;
173 cout() <<
"getCOOprofile OK " ;
175 auto env = SYCLEnv::instance() ;
176 auto max_num_treads = env->maxNumThreads() ;
177 auto nnz = m_profile->getNnz() ;
178 auto& queue = env->internal()->queue() ;
179 auto dof_uids_ptr = malloc_device<IndexType>(m_local_size+m_ghost_size, queue);
180 auto rows_ptr = malloc_device<IndexType>(nnz, queue);
181 auto cols_ptr = malloc_device<IndexType>(nnz, queue);
182 auto values_ptr = malloc_device<ValueT>(nnz, queue);
184 queue.submit( [&](sycl::handler& cgh)
186 auto access_x = m_internal->m_values.template get_access<sycl::access::mode::read>(cgh);
187 auto access_rows = coo_profile.m_rows.template get_access<sycl::access::mode::read>(cgh);
188 auto access_cols = m_internal->m_cols.template get_access<sycl::access::mode::read>(cgh);
189 auto y_length = nnz ;
190 cgh.parallel_for<
class init_ptr>(sycl::range<1>{max_num_treads}, [=] (sycl::item<1> itemId)
192 auto id = itemId.get_id(0);
193 for (
auto i =
id; i < y_length; i += itemId.get_range()[0])
195 values_ptr[i] = access_x[i];
196 rows_ptr[i] = access_rows[i];
197 cols_ptr[i] = access_cols[i];
203 cout() <<
"queue Submit OK " ;
229 *values = values_ptr ;
232 *dof_uids = dof_uids_ptr ;
234 cout() <<
"initCOODevicePointers OK " ;
239template <
typename ValueT>
240void HCSRMatrix<ValueT>::freeDevicePointers(
int* ncols,
int* rows,
int* cols, ValueT* values)
const
242 auto env = SYCLEnv::instance() ;
243 auto& queue = env->internal()->queue() ;
244 sycl::free(values,queue) ;
245 sycl::free(ncols,queue) ;
246 sycl::free(rows,queue) ;
247 sycl::free(cols,queue) ;
250template <
typename ValueT>
251void HCSRMatrix<ValueT>::freeCOODevicePointers(
int* dof_uids,
int* rows,
int* cols, ValueT* values)
const
253 auto env = SYCLEnv::instance() ;
254 auto& queue = env->internal()->queue() ;
255 sycl::free(values,queue) ;
256 sycl::free(dof_uids,queue) ;
257 sycl::free(rows,queue) ;
258 sycl::free(cols,queue) ;
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --