13class FLUFactorisationAlgo
14:
public LUFactorisationAlgo<MatrixT, VectorT>
18 typedef LUFactorisationAlgo<MatrixT,VectorT> BaseType ;
19 typedef MatrixT MatrixType;
20 typedef VectorT VectorType;
21 typedef typename MatrixType::ProfileType ProfileType ;
22 typedef typename MatrixType::ValueType ValueType;
23 typedef typename MatrixType::TagType TagType ;
26 FLUFactorisationAlgo()
29 virtual ~FLUFactorisationAlgo()
32 void setParameter(std::string param,
int value)
34 if (param.compare(
"nb-factor-iter") == 0)
36 else if (param.compare(
"nb-solver-iter") == 0)
37 m_nb_solver_iter = value;
40 void setParameter(std::string param,
double value)
42 if (param.compare(
"tol") == 0)
46 template <
typename AlgebraT>
47 void init(AlgebraT& algebra, MatrixT
const& matrix)
49 BaseType::baseInit(algebra, matrix);
50 algebra.allocate(AlgebraT::resource(matrix), m_xk);
53 if(this->m_block_size==1)
54 BaseType::factorize(*this->m_lu_matrix,
true);
57 BaseType::blockFactorize(*this->m_lu_matrix);
61 factorizeMultiIter(*this->m_lu_matrix);
65 if(this->m_block_size==1)
67 algebra.allocate(AlgebraT::resource(matrix), m_inv_diag);
68 algebra.assign(m_inv_diag, 1.);
69 algebra.computeInvDiag(*this->m_lu_matrix, m_inv_diag);
73 m_inv_diag.setBlockSize(this->m_block_size*this->m_block_size) ;
74 algebra.allocate(AlgebraT::resource(matrix), m_inv_diag);
75 algebra.computeInvDiag(*this->m_lu_matrix, m_inv_diag);
79 if (MatrixType::on_host_only) {
80 if (this->m_is_parallel) {
82 this->m_x.resize((Integer)this->m_alloc_size);
83 m_xk.resize((Integer)this->m_alloc_size);
92 void factorizeIter(std::size_t nrows,
112 for (std::size_t irow = 1; irow < nrows; ++irow)
114 _factorizeRow(irow, kcol, dcol, cols, values, values0);
118 void factorizeMultiIter(MatrixT
const& matrix)
123 auto nrows = modifier.nrows() ;
124 auto nnz = modifier.nnz() ;
125 auto kcol = modifier.kcol() ;
126 auto dcol = modifier.dcol() ;
127 auto cols = modifier.cols() ;
128 auto values = modifier.data() ;
131 if (this->m_lu_matrix->isParallel()) {
135 std::vector<ValueType> guest_values(nnz);
136 std::copy(values, values + nnz, guest_values.data());
137 std::copy(matrix_values, matrix_values + nnz, values);
138 op.recvLowerNeighbLUData(values);
139 op.sendUpperNeighbLUData(guest_values.data());
140 factorizeIter(nrows, kcol, dcol, cols, values, guest_values.data());
146 std::vector<ValueType> guest_values(nnz);
147 std::copy(values, values + nnz, guest_values.data());
148 std::copy(matrix_values, matrix_values + nnz, values);
149 factorizeIter(nrows, kcol, dcol, cols, values, guest_values.data());
158 template <
typename AlgebraT>
159 void solveL(AlgebraT& algebra,
162 VectorType& xk)
const
164#ifdef ALIEN_USE_PERF_TIMER
165 typename MatrixType::SentryType sentry(this->m_lu_matrix->timer(),
"SolveL");
167 if constexpr (MatrixType::on_host_only)
171 auto nrows = view.nrows();
172 auto kcol = view.kcol();
173 auto dcol = view.dcol();
174 auto cols = view.cols();
175 auto values = view.data();
177 auto y_ptr = y.data();
178 auto x_ptr = x.data();
179 auto xk_ptr = xk.data();
182 std::copy(x_ptr, x_ptr + nrows, xk_ptr);
183 if (this->m_is_parallel)
186 SendRecvOpType op(xk_ptr,
187 this->m_lu_matrix->getDistStructInfo().m_send_info,
188 this->m_lu_matrix->getSendPolicy(),
190 this->m_lu_matrix->getDistStructInfo().m_recv_info,
191 this->m_lu_matrix->getRecvPolicy(),
192 this->m_lu_matrix->getParallelMng(),
195 auto& local_row_size = this->m_lu_matrix->getDistStructInfo().m_local_row_size;
196 int first_upper_ghost_index = this->m_lu_matrix->getDistStructInfo().m_first_upper_ghost_index;
199 for (std::size_t irow = 0; irow < nrows; ++irow) {
200 ValueType val = y_ptr[irow];
201 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
202 val -= values[k] * xk_ptr[cols[k]];
203 for (
int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k) {
204 if (cols[k] < first_upper_ghost_index)
205 val -= values[k] * xk_ptr[cols[k]];
212 for (std::size_t irow = 0; irow < nrows; ++irow) {
213 ValueType val = y_ptr[irow];
214 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
215 val -= values[k] * xk_ptr[cols[k]];
224 algebra.addLMult(-1, *this->m_lu_matrix, xk, x);
228 template <
typename AlgebraT>
229 void blockSolveL(AlgebraT& algebra,
232 VectorType& xk)
const
234#ifdef ALIEN_USE_PERF_TIMER
235 typename MatrixType::SentryType sentry(this->m_lu_matrix->timer(),
"SolveL");
237 if constexpr (MatrixType::on_host_only)
239#if defined (ALIEN_USE_EIGEN3) && !defined(EIGEN3_DISABLED)
240 using namespace Eigen;
241 using Block2D = Eigen::Matrix<ValueType,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor> ;
242 using Block2DView = Eigen::Map<Block2D> ;
243 using Block1D = Eigen::Matrix<ValueType,Dynamic,1> ;
244 using Block1DView = Eigen::Map<Block1D> ;
246 int N = this->m_block_size ;
252 auto nrows = view.nrows();
253 auto kcol = view.kcol();
254 auto dcol = view.dcol();
255 auto cols = view.cols();
256 auto values = view.data();
258 auto y_ptr = y.data();
259 auto x_ptr = x.data();
260 auto xk_ptr = xk.data();
263 std::copy(x_ptr, x_ptr + nrows*N, xk_ptr);
264 if (this->m_is_parallel)
267 SendRecvOpType op(xk_ptr,
268 this->m_lu_matrix->getDistStructInfo().m_send_info,
269 this->m_lu_matrix->getSendPolicy(),
271 this->m_lu_matrix->getDistStructInfo().m_recv_info,
272 this->m_lu_matrix->getRecvPolicy(),
273 this->m_lu_matrix->getParallelMng(),
276 auto& local_row_size = this->m_lu_matrix->getDistStructInfo().m_local_row_size;
277 int first_upper_ghost_index = this->m_lu_matrix->getDistStructInfo().m_first_upper_ghost_index;
280 for (std::size_t irow = 0; irow < nrows; ++irow)
283 val = Block1DView(
const_cast<ValueType*
>(y_ptr+irow*N),N) ;
284 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
287 val -= Block2DView(
const_cast<ValueType*
>(values+k*NxN),N,N) * Block1DView(xk_ptr+cols[k]*N,N) ;
289 for (
int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k)
291 if (cols[k] < first_upper_ghost_index)
294 val -= Block2DView(
const_cast<ValueType*
>(values+k*NxN),N,N) * Block1DView(xk_ptr+cols[k]*N,N) ;
298 Block1DView(x_ptr+irow*N,N) = val ;
303#ifdef PRINT_DEBUG_INFO
304 for (std::size_t irow = 0; irow < nrows; ++irow)
306 std::cout<<
"XK["<<irow<<
"]=\n";
308 std::cout<<xk_ptr[irow*N+i]<<std::endl;
311 for (std::size_t irow = 0; irow < nrows; ++irow)
314 val = Block1DView(
const_cast<ValueType*
>(y_ptr+irow*N),N) ;
315#ifdef PRINT_DEBUG_INFO
316 std::cout<<
"LSOLVE MAT["<<irow<<
"]:"<<std::endl;
318 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
321 val -= Block2DView(
const_cast<ValueType*
>(values+k*NxN),N,N) * Block1DView(xk_ptr+cols[k]*N,N) ;
322#ifdef PRINT_DEBUG_INFO
323 std::cout<<Block2DView(const_cast<ValueType*>(values+k*NxN),N,N)<<std::endl;
327 Block1DView(x_ptr+irow*N,N) = val ;
328#ifdef PRINT_DEBUG_INFO
329 std::cout<<
"Y =\n"<<val<<std::endl ;
339 algebra.addLMult(-1, *this->m_lu_matrix, xk, x);
343 template <
typename AlgebraT>
344 void solveU(AlgebraT& algebra, VectorType
const& y, VectorType& x, VectorType& xk)
const
346#ifdef ALIEN_USE_PERF_TIMER
347 typename MatrixType::SentryType sentry(this->m_lu_matrix->timer(),
"SolveU");
349 if constexpr (MatrixType::on_host_only)
353 auto nrows = view.nrows();
354 auto kcol = view.kcol();
355 auto dcol = view.dcol();
356 auto cols = view.cols();
357 auto values = view.data();
359 auto y_ptr = y.data();
360 auto x_ptr = x.data();
361 auto xk_ptr = xk.data();
364 std::copy(x_ptr, x_ptr + nrows, xk_ptr);
365 if (this->m_is_parallel) {
366 auto& local_row_size = this->m_lu_matrix->getDistStructInfo().m_local_row_size;
367 int first_upper_ghost_index = this->m_lu_matrix->getDistStructInfo().m_first_upper_ghost_index;
370 SendRecvOpType op(xk_ptr,
371 this->m_lu_matrix->getDistStructInfo().m_send_info,
372 this->m_lu_matrix->getSendPolicy(),
374 this->m_lu_matrix->getDistStructInfo().m_recv_info,
375 this->m_lu_matrix->getRecvPolicy(),
376 this->m_lu_matrix->getParallelMng(),
380 for (std::size_t irow = 0; irow < nrows; ++irow) {
382 ValueType val = y_ptr[irow];
383 for (
int k = dk + 1; k < kcol[irow] + local_row_size[irow]; ++k) {
384 val -= values[k] * xk_ptr[cols[k]];
386 for (
int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k) {
387 if (cols[k] >= first_upper_ghost_index)
388 val -= values[k] * xk_ptr[cols[k]];
390 val = val / values[dk];
395 for (std::size_t irow = 0; irow < nrows; ++irow) {
397 ValueType val = y_ptr[irow];
398 for (
int k = dk + 1; k < kcol[irow + 1]; ++k) {
399 val -= values[k] * xk_ptr[cols[k]];
401 val = val / values[dk];
410 algebra.addUMult(-1., *this->m_lu_matrix, xk, x);
411 algebra.pointwiseMult(m_inv_diag, x, x);
416 template <
typename AlgebraT>
417 void blockSolveU(AlgebraT& algebra, VectorType
const& y, VectorType& x, VectorType& xk)
const
419#ifdef ALIEN_USE_PERF_TIMER
420 typename MatrixType::SentryType sentry(this->m_lu_matrix->timer(),
"SolveU");
422 if constexpr (MatrixType::on_host_only)
424#if defined (ALIEN_USE_EIGEN3) && !defined(EIGEN3_DISABLED)
425 using namespace Eigen;
426 using Block2D = Eigen::Matrix<ValueType,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor> ;
427 using Block2DView = Eigen::Map<Block2D> ;
428 using Block1D = Eigen::Matrix<ValueType,Dynamic,1> ;
429 using Block1DView = Eigen::Map<Block1D> ;
431 int N = this->m_block_size ;
438 auto nrows = view.nrows();
439 auto kcol = view.kcol();
440 auto dcol = view.dcol();
441 auto cols = view.cols();
442 auto values = view.data();
444 auto y_ptr = y.data();
445 auto x_ptr = x.data();
446 auto xk_ptr = xk.data();
449 std::copy(x_ptr, x_ptr + nrows*N, xk_ptr);
450 if (this->m_is_parallel) {
451 auto& local_row_size = this->m_lu_matrix->getDistStructInfo().m_local_row_size;
452 int first_upper_ghost_index = this->m_lu_matrix->getDistStructInfo().m_first_upper_ghost_index;
455 SendRecvOpType op(xk_ptr,
456 this->m_lu_matrix->getDistStructInfo().m_send_info,
457 this->m_lu_matrix->getSendPolicy(),
459 this->m_lu_matrix->getDistStructInfo().m_recv_info,
460 this->m_lu_matrix->getRecvPolicy(),
461 this->m_lu_matrix->getParallelMng(),
465 for (std::size_t irow = 0; irow < nrows; ++irow) {
468 val = Block1DView(
const_cast<ValueType*
>(y_ptr+irow*N),N) ;
469 for (
int k = dk + 1; k < kcol[irow] + local_row_size[irow]; ++k) {
471 val -= Block2DView(
const_cast<ValueType*
>(values+k*NxN),N,N) * Block1DView(xk_ptr+cols[k]*N,N) ;
473 for (
int k = kcol[irow] + local_row_size[irow]; k < kcol[irow + 1]; ++k) {
474 if (cols[k] >= first_upper_ghost_index)
477 val -= Block2DView(
const_cast<ValueType*
>(values+k*NxN),N,N) * Block1DView(xk_ptr+cols[k]*N,N) ;
482 Block1DView(x_ptr+irow*N,N) = BaseType::inv(Block2DView(
const_cast<ValueType*
>(values+dk*NxN),N,N)) * val;
488#ifdef PRINT_DEBUG_INFO
489 for (std::size_t irow = 0; irow < nrows; ++irow)
491 std::cout<<
"XK["<<irow<<
"]=\n";
493 std::cout<<xk_ptr[irow*N+i]<<std::endl;
496 for (std::size_t irow = 0; irow < nrows; ++irow)
498#ifdef PRINT_DEBUG_INFO
499 std::cout<<
"U SOLVE MAT["<<irow<<
"]:"<<std::endl ;
503 val = Block1DView(
const_cast<ValueType*
>(y_ptr+irow*N),N) ;
504 for (
int k = dk + 1; k < kcol[irow + 1]; ++k) {
506 val -= Block2DView(
const_cast<ValueType*
>(values+k*NxN),N,N) * Block1DView(xk_ptr+cols[k]*N,N) ;
507#ifdef PRINT_DEBUG_INFO
508 std::cout<<Block2DView(const_cast<ValueType*>(values+k*NxN),N,N)<<std::endl;
513 Block1DView(x_ptr+irow*N,N) = BaseType::inv(Block2DView(
const_cast<ValueType*
>(values+dk*NxN),N,N)) * val;
514#ifdef PRINT_DEBUG_INFO
515 std::cout<<
"Y["<<irow<<
"]=\n"<<val<<std::endl;
519#ifdef PRINT_DEBUG_INFO
520 for (std::size_t irow = 0; irow < nrows; ++irow)
523 std::cout<<
"INV DIAG["<<irow<<
"]\n"<<BaseType::inv(Block2DView(
const_cast<ValueType*
>(values+dk*NxN),N,N))<<std::endl ;
524 std::cout<<
"Y["<<irow<<
"]=\n";
526 std::cout<<x_ptr[irow*N+i]<<std::endl;
536 algebra.addUMult(-1., *this->m_lu_matrix, xk, x);
538 algebra.multDiag(m_inv_diag, xk, x);
543 template <
typename AlgebraT>
544 void solve(AlgebraT& algebra, VectorType
const& x, VectorType& y)
const
551 algebra.copy(x, this->m_x);
552 if(this->m_block_size==1)
554 for (
int iter = 0; iter < m_nb_solver_iter; ++iter) {
555 solveL(algebra, x, this->m_x, m_xk);
560 for (
int iter = 0; iter < m_nb_solver_iter; ++iter) {
561 blockSolveL(algebra, x, this->m_x, m_xk);
569 algebra.copy(this->m_x, y);
570 if(this->m_block_size==1)
572 for (
int iter = 0; iter < m_nb_solver_iter; ++iter) {
573 solveU(algebra, this->m_x, y, m_xk);
578 for (
int iter = 0; iter < m_nb_solver_iter; ++iter) {
579 blockSolveU(algebra, this->m_x, y, m_xk);
585 void _factorizeRow(std::size_t irow,
603 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
606 typename BaseType::ValueType aik = values[k] / values0[dcol[krow]];
608 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
609 this->m_work[cols[l]] = l;
610 for (
int j = k + 1; j < kcol[irow + 1]; ++j)
613 int kj = this->m_work[jcol];
615 values[j] -= aik * values0[kj];
618 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
619 this->m_work[cols[l]] = -1;
624 mutable VectorType m_xk ;
625 mutable VectorType m_inv_diag ;
632 int m_nb_solver_iter = 0 ;
633 ValueType m_tol = 0 ;