214class LUFactorisationAlgo
218 typedef MatrixT MatrixType;
219 typedef VectorT VectorType;
220 typedef typename MatrixType::ProfileType ProfileType ;
221 typedef typename MatrixType::ValueType ValueType;
222 typedef typename MatrixType::TagType TagType ;
225 LUFactorisationAlgo()
228 virtual ~LUFactorisationAlgo()
231 template <
typename AlgebraT>
232 void baseInit(AlgebraT& algebra, MatrixT
const& matrix)
234 m_is_parallel = matrix.isParallel();
235 m_alloc_size = matrix.getAllocSize();
236 if constexpr (
requires{matrix.blockSize();})
237 m_block_size = matrix.blockSize();
240 m_distribution = matrix.distribution();
241 m_lu_matrix.reset(matrix.cloneTo(
nullptr));
242 m_profile = &m_lu_matrix->getProfile();
243 m_work.resize(m_alloc_size);
244 m_work.assign(m_work.size(), -1);
245 algebra.allocate(AlgebraT::resource(matrix), m_x);
248 template <
typename AlgebraT>
249 void init(AlgebraT& algebra, MatrixT
const& matrix)
251 baseInit(algebra, matrix);
253 factorize(*m_lu_matrix);
255 blockFactorize(*m_lu_matrix);
260 void factorize(MatrixT& matrix,
bool bjacobi =
true)
278 auto nrows = modifier.nrows() ;
279 auto kcol = modifier.kcol() ;
280 auto dcol = modifier.dcol() ;
281 auto cols = modifier.cols() ;
282 auto values = modifier.data() ;
285 auto& local_row_size = matrix.getDistStructInfo().m_local_row_size;
287 for (std::size_t irow = 1; irow < nrows; ++irow)
289 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
292 ValueType aik = values[k] / values[dcol[krow]];
294 for (
int l = kcol[krow]; l < kcol[krow] + local_row_size[krow]; ++l)
296 for (
int j = k + 1; j < kcol[krow] + local_row_size[irow]; ++j)
299 int kj = m_work[jcol];
301 values[j] -= aik * values[kj];
304 for (
int l = kcol[krow]; l < kcol[krow] + local_row_size[krow]; ++l)
305 m_work[cols[l]] = -1;
311 op.recvLowerNeighbLUData(values);
312 int first_upper_ghost_index = matrix.getDistStructInfo().m_first_upper_ghost_index;
313 for (std::size_t irow = 1; irow < nrows; ++irow)
315 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
318 ValueType aik = values[k] / values[dcol[krow]];
320 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
322 for (
int j = k + 1; j < kcol[irow] + local_row_size[irow]; ++j)
325 int kj = m_work[jcol];
327 values[j] -= aik * values[kj];
330 for (
int j = kcol[irow] + local_row_size[irow]; j < kcol[irow + 1]; ++j)
333 int kj = m_work[jcol];
334 if ((kj != -1) && (jcol >= first_upper_ghost_index)) {
335 values[j] -= aik * values[kj];
338 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
339 m_work[cols[l]] = -1;
342 op.sendUpperNeighbLUData(values);
346 for (std::size_t irow = 1; irow < nrows; ++irow)
348 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
351 ValueType aik = values[k] / values[dcol[krow]];
353 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
355 for (
int j = k + 1; j < kcol[irow + 1]; ++j)
358 int kj = m_work[jcol];
360 values[j] -= aik * values[kj];
363 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
364 m_work[cols[l]] = -1;
370 void solveL(ValueType
const* y, ValueType* x)
const
374 auto nrows = view.nrows() ;
375 auto kcol = view.kcol() ;
376 auto dcol = view.dcol() ;
377 auto cols = view.cols() ;
378 auto values = view.data() ;
381 for (std::size_t irow = 0; irow < nrows; ++irow) {
382 ValueType val = y[irow];
383 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
384 val -= values[k] * x[cols[k]];
389 void solveU(ValueType
const* y, ValueType* x)
const
393 auto nrows = view.nrows() ;
394 auto kcol = view.kcol() ;
395 auto dcol = view.dcol() ;
396 auto cols = view.cols() ;
397 auto values = view.data() ;
400 auto& local_row_size = m_lu_matrix->getDistStructInfo().m_local_row_size;
401 for (
int irow = (
int)nrows - 1; irow > -1; --irow) {
403 ValueType val = y[irow];
404 for (
int k = dk + 1; k < kcol[irow] + local_row_size[irow]; ++k) {
405 val -= values[k] * x[cols[k]];
407 x[irow] = val / values[dk];
411 for (
int irow = (
int)nrows - 1; irow > -1; --irow) {
413 ValueType val = y[irow];
414 for (
int k = dk + 1; k < kcol[irow + 1]; ++k) {
415 val -= values[k] * x[cols[k]];
417 x[irow] = val / values[dk];
422#if defined (ALIEN_USE_EIGEN3) && !defined(EIGEN3_DISABLED)
423 inline auto inv(Eigen::Map<Eigen::Matrix<ValueType,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor>>
const & block)
const
425 assert(block.determinant()!=0) ;
426 return block.inverse() ;
430 void blockFactorize(MatrixT& matrix,
bool bjacobi =
true)
446#if defined (ALIEN_USE_EIGEN3) && !defined(EIGEN3_DISABLED)
447 using namespace Eigen;
448 using Block2D = Eigen::Matrix<ValueType,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor> ;
449 using Block2DView = Eigen::Map<Block2D> ;
451 int N = m_block_size;
458 auto nrows = modifier.nrows() ;
459 auto kcol = modifier.kcol() ;
460 auto dcol = modifier.dcol() ;
461 auto cols = modifier.cols() ;
462 auto values = modifier.data() ;
471 auto& local_row_size = matrix.getDistStructInfo().m_local_row_size;
474 for (std::size_t irow = 1; irow < nrows; ++irow)
479 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
488 aik = Block2DView(values+k*N2,N,N) * inv(Block2DView(values+dcol[krow]*N2,N,N)) ;
489 Block2DView(values+k*N2,N,N) = aik ;
496 for (
int l = kcol[krow]; l < kcol[krow] + local_row_size[krow]; ++l)
498 for (
int j = k + 1; j < kcol[irow] + local_row_size[irow]; ++j)
501 int kj = m_work[jcol];
504 Block2DView(values+j*N2,N,N) -= aik * Block2DView(values+kj*N2,N,N) ;
513 for (
int l = kcol[krow]; l < kcol[krow] + local_row_size[krow]; ++l)
514 m_work[cols[l]] = -1;
520 op.recvLowerNeighbLUData(values);
521 int first_upper_ghost_index = matrix.getDistStructInfo().m_first_upper_ghost_index;
522 for (std::size_t irow = 1; irow < nrows; ++irow)
524 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
529 aik = Block2DView(values+k*N2,N,N) * inv(Block2DView(values+dcol[krow]*N2,N,N)) ;
530 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
532 for (
int j = k + 1; j < kcol[irow] + local_row_size[irow]; ++j)
535 int kj = m_work[jcol];
538 Block2DView(values+j*N2,N,N) -= aik * Block2DView(values+kj*N2,N,N) ;
541 for (
int j = kcol[irow] + local_row_size[irow]; j < kcol[irow + 1]; ++j)
544 int kj = m_work[jcol];
545 if ((kj != -1) && (jcol >= first_upper_ghost_index)) {
547 Block2DView(values+j*N2,N,N) -= aik * Block2DView(values+kj*N2,N,N) ;
550 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
551 m_work[cols[l]] = -1;
554 op.sendUpperNeighbLUData(values);
558 for (std::size_t irow = 1; irow < nrows; ++irow)
563 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
572 aik = Block2DView(values+k*N2,N,N) * inv(Block2DView(values+dcol[krow]*N2,N,N)) ;
573 Block2DView(values+k*N2,N,N) = aik ;
581 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
583 for (
int j = k + 1; j < kcol[irow + 1]; ++j)
586 int kj = m_work[jcol];
589 Block2DView(values+j*N2,N,N) -= aik * Block2DView(values+kj*N2,N,N) ;
598 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
599 m_work[cols[l]] = -1;
604 throw Arccore::FatalErrorException(
605 A_FUNCINFO,
"Eigen is required for BlockILU factorization");
608#if defined(EIGEN3_DISABLED)
613 int N = m_block_size;
620 auto nrows = modifier.nrows() ;
621 auto kcol = modifier.kcol() ;
622 auto dcol = modifier.dcol() ;
623 auto cols = modifier.cols() ;
624 auto values = modifier.data() ;
631 auto& local_row_size = matrix.getDistStructInfo().m_local_row_size;
633 for (std::size_t irow = 1; irow < nrows; ++irow)
635 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
640 aik = Block2DType{values+k*NxN,N} * lu.inv(Block2DType{values+dcol[krow]*NxN,N});
641 Block2DType{values+k*NxN,N} = aik ;
642 for (
int l = kcol[krow]; l < kcol[krow] + local_row_size[krow]; ++l)
644 for (
int j = k + 1; j < kcol[krow] + local_row_size[irow]; ++j)
647 int kj = m_work[jcol];
650 Block2DType{values+j*NxN,N} -= (aik * Block2DType{values+kj*NxN,N}) ;
653 for (
int l = kcol[krow]; l < kcol[krow] + local_row_size[krow]; ++l)
654 m_work[cols[l]] = -1;
660 op.recvLowerNeighbLUData(values);
661 int first_upper_ghost_index = matrix.getDistStructInfo().m_first_upper_ghost_index;
662 for (std::size_t irow = 1; irow < nrows; ++irow)
664 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
669 aik = Block2DType{values+k*NxN,N} * lu.inv(Block2DType{values+dcol[krow]*NxN,N}) ;
670 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
672 for (
int j = k + 1; j < kcol[irow] + local_row_size[irow]; ++j)
675 int kj = m_work[jcol];
678 Block2DType{values+j*NxN,N} -= aik * Block2DType{values+kj*NxN,N} ;
681 for (
int j = kcol[irow] + local_row_size[irow]; j < kcol[irow + 1]; ++j)
684 int kj = m_work[jcol];
685 if ((kj != -1) && (jcol >= first_upper_ghost_index)) {
687 Block2DType{values+j*NxN,N} -= aik * Block2DType{values+kj*NxN,N} ;
690 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
691 m_work[cols[l]] = -1;
694 op.sendUpperNeighbLUData(values);
699 for (std::size_t irow = 1; irow < nrows; ++irow)
701 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
706 aik = Block2DType{values+k*NxN,N} * lu.inv(Block2DType{values+dcol[krow]*NxN,N}) ;
707 Block2DType{values+k*NxN,N} = aik ;
708 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
710 for (
int j = k + 1; j < kcol[irow + 1]; ++j)
713 int kj = m_work[jcol];
716 Block2DType{values+j*NxN,N} -= aik * Block2DType{values+kj*NxN,N} ;
719 for (
int l = kcol[krow]; l < kcol[krow + 1]; ++l)
720 m_work[cols[l]] = -1;
728 void blockSolveL(ValueType
const* y, ValueType* x)
const
730 if constexpr (MatrixType::on_host_only)
732#if defined (ALIEN_USE_EIGEN3) && !defined(EIGEN3_DISABLED)
733 using namespace Eigen;
734 using Block2D = Eigen::Matrix<ValueType,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor> ;
735 using Block2DView = Eigen::Map<Block2D> ;
736 using Block1D = Eigen::Matrix<ValueType,Dynamic,1> ;
737 using Block1DView = Eigen::Map<Block1D> ;
739 int N = m_block_size ;
747 auto nrows = view.nrows() ;
748 auto kcol = view.kcol() ;
749 auto dcol = view.dcol() ;
750 auto cols = view.cols() ;
751 auto values = view.data() ;
754 for (std::size_t irow = 0; irow < nrows; ++irow) {
756 val = Block1DView(
const_cast<ValueType*
>(y+irow*N),N) ;
757 for (
int k = kcol[irow]; k < dcol[irow]; ++k)
760 val -= Block2DView(
const_cast<ValueType*
>(values+k*N2),N,N) * Block1DView(x+cols[k]*N,N) ;
763 Block1DView(x+irow*N,N) = val ;
769 void blockSolveU(ValueType
const* y, ValueType* x)
const
771 if constexpr (MatrixType::on_host_only)
773#if defined (ALIEN_USE_EIGEN3) && !defined(EIGEN3_DISABLED)
774 using namespace Eigen;
775 using Block2D = Eigen::Matrix<ValueType,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor> ;
776 using Block2DView = Eigen::Map<Block2D> ;
777 using Block1D = Eigen::Matrix<ValueType,Dynamic,1> ;
778 using Block1DView = Eigen::Map<Block1D> ;
780 int N = m_block_size;
787 auto nrows = view.nrows() ;
788 auto kcol = view.kcol() ;
789 auto dcol = view.dcol() ;
790 auto cols = view.cols() ;
791 auto values = view.data() ;
794 auto& local_row_size = m_lu_matrix->getDistStructInfo().m_local_row_size;
795 for (
int irow = (
int)nrows - 1; irow > -1; --irow) {
798 val = Block1DView(
const_cast<ValueType*
>(y+irow*N),N) ;
799 for (
int k = dk + 1; k < kcol[irow] + local_row_size[irow]; ++k) {
801 val -= Block2DView(
const_cast<ValueType*
>(values+k*N2),N,N) * Block1DView(x+cols[k]*N,N) ;
804 Block1DView(x+irow*N,N) = inv(Block2DView(
const_cast<ValueType*
>(values+dk*N2),N,N)) * val;
808 for (
int irow = (
int)nrows - 1; irow > -1; --irow) {
811 val = Block1DView(
const_cast<ValueType*
>(y+irow*N),N) ;
812 for (
int k = dk + 1; k < kcol[irow + 1]; ++k) {
814 val -= Block2DView(
const_cast<ValueType*
>(values+k*N2),N,N) * Block1DView(x+cols[k]*N,N) ;
817 Block1DView(x+irow*N,N) = inv(Block2DView(
const_cast<ValueType*
>(values+dk*N2),N,N)) * val;
825 template <
typename AlgebraT>
826 void solve([[maybe_unused]] AlgebraT& algebra, VectorType
const& y, VectorType& x)
const
834 solveL(y.data(), m_x.data());
840 solveU(m_x.data(), x.data());
848 blockSolveL(y.data(), m_x.data());
854 blockSolveU(m_x.data(), x.data());
858 const MatrixType& getLUMatrix()
const
865 std::unique_ptr<MatrixType> m_lu_matrix ;
866 int m_block_size = 1;
867 ProfileType
const* m_profile =
nullptr;
868 mutable VectorType m_x ;
871 std::vector<int> m_work ;
872 std::size_t m_alloc_size = 0 ;
873 bool m_is_parallel = false ;
874 bool m_bjacobi = false ;
876 std::vector<int> m_send_lu_ibuffer ;
877 std::vector<std::vector<ValueType>> m_send_lu_buffer ;