15#include <alien/kernels/sycl/data/HCSRMatrix.h>
16#include <alien/kernels/sycl/data/HCSRMatrixInternal.h>
18#include <alien/kernels/sycl/data/SYCLParallelEngine.h>
19#include <alien/kernels/sycl/data/SYCLParallelEngineImplT.h>
21#include <alien/handlers/scalar/sycl/CombineProfiledMatrixBuilderT.h>
39 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
40 class CombineProfiledMatrixBuilderT<ValueT,IndexT,CombineOpT>::Impl
43 typedef typename HCSRMatrix<ValueT>::InternalType MatrixInternalType ;
44 typedef typename MatrixInternalType::ValueBufferType ValueBufferType ;
45 typedef typename MatrixInternalType::IndexBufferType IndexBufferType ;
47 Impl(std::size_t size,
49 : m_combine_values_buffer(sycl::range(size))
50 , m_contributor_indexes_buffer(ptr,sycl::range(size))
52 m_contributor_indexes_buffer.set_final_data(
nullptr) ;
54 auto env = SYCLEnv::instance() ;
55 env->internal()->queue().submit([&](sycl::handler& cgh)
57 auto init_value = CombineOpT::init_value() ;
58 auto access_x = m_combine_values_buffer.template get_access<sycl::access::mode::read_write>(cgh);
59 cgh.fill(access_x,ValueT(init_value)) ;
63 ValueBufferType m_combine_values_buffer ;
64 IndexBufferType m_contributor_indexes_buffer ;
67 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
68 CombineProfiledMatrixBuilderT<ValueT,IndexT,CombineOpT>::CombineProfiledMatrixBuilderT(
IMatrix& matrix, ProfiledMatrixOptions::ResetFlag reset_values)
69 : BaseType(matrix,reset_values)
72 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
73 CombineProfiledMatrixBuilderT<ValueT,IndexT,CombineOpT>::~CombineProfiledMatrixBuilderT()
78 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
79 void CombineProfiledMatrixBuilderT<ValueT,IndexT,CombineOpT>::
80 setParallelAssembleStencil(std::size_t max_nb_contributors,
81 Arccore::ConstArrayView<IndexT> stencil_offsets,
82 Arccore::ConstArrayView<IndexT> stencil_indexes)
84 m_max_nb_contributors = max_nb_contributors ;
85 m_nnz = this->m_row_starts[this->m_local_size] ;
86 m_combine_size = m_max_nb_contributors*m_nnz ;
87 m_contributor_indexes.resize(m_combine_size) ;
88 m_contributor_indexes.assign(m_combine_size,-1) ;
90 auto f = [&](
int contrib_index,
int row_index,
int col_index)
92 auto eij = this->entryIndex(row_index,col_index) ;
93 auto offset = eij*m_max_nb_contributors ;
94 for(std::size_t c=0;c<m_max_nb_contributors;++c)
96 if(m_contributor_indexes[offset+c]==contrib_index)
98 if(m_contributor_indexes[offset+c]==-1)
100 m_contributor_indexes[offset+c] = contrib_index ;
105 for(
int irow=0;irow<this->m_local_size;++irow)
107 for(
auto k=stencil_offsets[irow];k<stencil_offsets[irow+1];++k)
109 auto col = stencil_indexes[k] ;
114 m_impl.reset(
new Impl{m_combine_size,m_contributor_indexes.data()}) ;
120 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
121 class CombineProfiledMatrixBuilderT<ValueT,IndexT,CombineOpT>::View
122 :
public ProfiledMatrixBuilderT<ValueT,IndexT>::View
124 sycl::handler* m_h = nullptr ;
125 sycl::buffer<ValueT,1>* m_vb = nullptr ;
126 sycl::buffer<IndexT,1>* m_ib = nullptr ;
127 using ValueAccessorType =
decltype(m_vb->template get_access<sycl::access::mode::read_write>(*m_h));
128 using IndexAccessorType =
decltype(m_ib->template get_access<sycl::access::mode::read>(*m_h));
132 explicit View(ValueAccessorType values_accessor,
133 IndexAccessorType cols_accessor,
134 IndexAccessorType kcol_accessor,
135 ValueAccessorType combine_values_accessor,
136 IndexAccessorType prow_cols_accessor,
137 std::size_t nb_contributor)
138 : BaseType(values_accessor,cols_accessor,kcol_accessor)
139 , m_combine_values_accessor(combine_values_accessor)
140 , m_prow_cols_accessor(prow_cols_accessor)
141 , m_nb_contributor(nb_contributor)
146 IndexT combineEntryIndex(IndexT prow, IndexT row, IndexT col)
const {
147 for(
auto k=this->m_kcol_accessor[row];k<this->m_kcol_accessor[row+1];++k)
148 if(this->m_cols_accessor[k]==col)
150 for(std::size_t j=0;j<m_nb_contributor;++j)
152 if(m_prow_cols_accessor[m_nb_contributor*k+j]==prow)
153 return (IndexT)m_nb_contributor*k+j ;
160 void combine(IndexT index,ValueT value)
const {
161 m_combine_values_accessor[index] = value ;
165 ValueAccessorType m_combine_values_accessor ;
166 IndexAccessorType m_prow_cols_accessor ;
167 std::size_t m_nb_contributor = 0 ;
171 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
174 return View(BaseType::m_impl->m_values_buffer.template get_access<sycl::access::mode::read_write>(cgh.m_internal),
175 BaseType::m_impl->m_cols_buffer.template get_access<sycl::access::mode::read>(cgh.m_internal),
176 BaseType::m_impl->m_kcol_buffer.template get_access<sycl::access::mode::read>(cgh.m_internal),
177 m_impl->m_combine_values_buffer.template get_access<sycl::access::mode::read_write>(cgh.m_internal),
178 m_impl->m_contributor_indexes_buffer.template get_access<sycl::access::mode::read>(cgh.m_internal),
179 m_max_nb_contributors) ;
184 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
188 auto env = SYCLEnv::instance() ;
190 auto nb_contributors = m_max_nb_contributors ;
191 auto total_threads = env->maxNumThreads() ;
192 env->internal()->queue().submit([&](sycl::handler& cgh)
194 auto values_acc = BaseType::m_impl->m_values_buffer.template get_access<sycl::access::mode::read_write>(cgh);
195 auto combine_values_acc = m_impl->m_combine_values_buffer.template get_access<sycl::access::mode::read>(cgh);
196 cgh.parallel_for<
class class_combine>( sycl::range<1>{total_threads},
197 [=] (sycl::item<1> itemId)
199 auto id = itemId.get_id(0);
200 for (
auto k =
id; k < nnz; k += itemId.get_range()[0])
202 auto value = values_acc[k] ;
203 for(std::size_t c=0;c<nb_contributors;++c)
205 value = CombineOpT::apply(value,combine_values_acc[nb_contributors*k+c]) ;
207 values_acc[k] = value ;
215 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
216 class CombineProfiledMatrixBuilderT<ValueT,IndexT,CombineOpT>::HostView
217 :
public ProfiledMatrixBuilderT<ValueT,IndexT>::HostView
220 sycl::buffer<ValueT,1>* m_b = nullptr ;
221 using ValueAccessorType =
decltype(m_b->get_host_access());
223 sycl::buffer<IndexT,1>* m_ib = nullptr ;
224 using IndexAccessorType =
decltype(m_ib->get_host_access());
229 HostView(ValueAccessorType values,
230 IndexAccessorType cols,
231 IndexAccessorType kcol,
232 ValueAccessorType combine_values,
233 IndexAccessorType prow_cols,
234 std::size_t nb_contributor)
235 : BaseType(values,cols,kcol)
236 , m_combine_values(combine_values)
237 , m_prow_cols(prow_cols)
238 , m_nb_contributor(nb_contributor)
242 IndexT combineEntryIndex(IndexT prow, IndexT row, IndexT col)
const {
243 for(
auto k=this->m_kcol[row];k<this->m_kcol[row+1];++k)
244 if(this->m_cols[k]==col)
246 for(std::size_t j=0;j<m_nb_contributor;++j)
248 if(m_prow_cols[m_nb_contributor*k+j]==prow)
249 return m_nb_contributor*k+j ;
256 ValueAccessorType m_combine_values ;
257 IndexAccessorType m_prow_cols;
258 std::size_t m_nb_contributor = 0 ;
262 template <
typename ValueT,
typename IndexT,
typename CombineOpT>
264 CombineProfiledMatrixBuilderT<ValueT,IndexT,CombineOpT>::hostView()
266 return HostView(BaseType::m_impl->m_values_buffer.get_host_access(),
267 BaseType::m_impl->m_cols_buffer.get_host_access(),
268 BaseType::m_impl->m_kcol_buffer.get_host_access(),
269 m_impl->m_combine_values_buffer.get_host_access(),
270 m_impl->m_contributor_indexes_buffer.get_host_access(),
271 m_max_nb_contributors) ;
Interface for all matrices.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --