9#include "BaseDirectMatrixBuilder.h"
15#include <alien/utils/Precomp.h>
19#include <alien/kernels/simple_csr/CSRStructInfo.h>
20#include <alien/kernels/simple_csr/SimpleCSRMatrix.h>
29using namespace Arccore;
37#define KEY_OF(i) (i).key()
38#define VALUE_OF(i) (i).value()
42#define KEY_OF(i) (i)->first
43#define VALUE_OF(i) (i)->second
55 DirectMatrixBuilder::DirectMatrixBuilder(IMatrix& matrix,
56 const DirectMatrixOptions::ResetFlag reset_flag,
57 const DirectMatrixOptions::SymmetricFlag symmetric_flag)
59 , m_matrix_impl(nullptr)
64 , m_reset_flag(reset_flag)
67 , m_symmetric_profile(symmetric_flag == DirectMatrixOptions::eSymmetric)
69 , m_parallel_mng(nullptr)
72 m_matrix.impl()->lock();
73 m_matrix_impl = &m_matrix.impl()->get<BackEnd::tag::simplecsr>(
true);
75 const MatrixDistribution& dist = m_matrix_impl->distribution();
77 m_parallel_mng = dist.parallelMng();
79 if (!m_parallel_mng) {
83 m_nproc = m_parallel_mng->commSize();
86 m_local_size = dist.localRowSize();
87 m_global_size = dist.globalRowSize();
88 m_local_offset = dist.rowOffset();
90 m_col_global_size = m_matrix_impl->colSpace().size();
92 const bool never_allocated = (m_matrix_impl->getCSRProfile().getNRow() == 0);
93 if (m_reset_flag == DirectMatrixOptions::eResetAllocation or never_allocated) {
94 m_reset_flag = DirectMatrixOptions::eResetAllocation;
95 m_row_sizes.resize(m_local_size, 0);
98 m_row_sizes.resize(m_local_size);
99 SimpleCSRInternal::CSRStructInfo& profile =
100 m_matrix_impl->internal()->getCSRProfile();
101 ConstArrayView<Integer> row_starts = profile.getRowOffset();
102 for (Integer i = 0; i < m_local_size; ++i) {
103 const Integer row_capacity = row_starts[i + 1] - row_starts[i];
104 m_row_sizes[i] = row_capacity;
111 DirectMatrixBuilder::~DirectMatrixBuilder()
120 void DirectMatrixBuilder::reserve(
121 Integer n,
const DirectMatrixOptions::ReserveFlag flag)
123 ALIEN_ASSERT((!m_allocated), (
"Cannot reserve already allocated matrix"));
124 m_reset_flag = DirectMatrixOptions::eResetAllocation;
126 if (flag == DirectMatrixOptions::eResetReservation) {
130 ALIEN_ASSERT((flag == DirectMatrixOptions::eExtendReservation),
131 (
"Unexpected reservation flag"));
132 for (Integer i = 0, is = m_row_sizes.size(); i < is; ++i)
139 void DirectMatrixBuilder::reserve(
140 const ConstArrayView<Integer> indices, Integer n,
const ReserveFlag flag)
142 ALIEN_ASSERT((!m_allocated), (
"Cannot reserve already allocated matrix"));
143 m_reset_flag = DirectMatrixOptions::eResetAllocation;
145 if (flag == DirectMatrixOptions::eResetReservation) {
146 for (Integer i = 0; i < indices.size(); ++i)
147 m_row_sizes[indices[i] - m_local_offset] = n;
150 ALIEN_ASSERT((flag == DirectMatrixOptions::eExtendReservation),
151 (
"Unexpected reservation flag"));
152 for (Integer i = 0; i < indices.size(); ++i)
153 m_row_sizes[indices[i] - m_local_offset] += n;
159 void DirectMatrixBuilder::allocate()
162 ALIEN_ASSERT((!m_allocated), (
"Cannot allocate already allocated matrix"));
164 if (m_reset_flag == DirectMatrixOptions::eResetAllocation) {
165 computeProfile(m_row_sizes);
167 SimpleCSRInternal::CSRStructInfo& profile = m_matrix_impl->internal()->getCSRProfile();
169 profile.setSymmetric(m_symmetric_profile);
171 m_row_starts = profile.getRowOffset();
172 m_cols = profile.getCols();
173 m_values = m_matrix_impl->internal()->getValues();
175 if (m_reset_flag == DirectMatrixOptions::eResetAllocation or m_reset_flag == DirectMatrixOptions::eResetProfile or profile.getColOrdering() != SimpleCSRInternal::CSRStructInfo::eFull) {
176 profile.getColOrdering() = SimpleCSRInternal::CSRStructInfo::eUndef;
180 if (m_matrix_impl->isParallel()) {
182 if (m_reset_flag == DirectMatrixOptions::eResetValues) {
183 for (Integer i = 0; i < m_local_size; ++i) {
184 const Integer row_capacity = m_row_starts[i + 1] - m_row_starts[i];
185 m_row_sizes[i] = row_capacity;
186 auto view = ArrayView<Integer>(row_capacity, m_cols.data() + m_row_starts[i]);
187 std::sort(view.begin(), view.end());
191 std::set<std::pair<Integer, Real>> entries;
192 for (Integer i = 0; i < m_local_size; ++i) {
193 const Integer row_capacity = m_row_starts[i + 1] - m_row_starts[i];
194 m_row_sizes[i] = row_capacity;
196 for (Integer k = m_row_starts[i]; k < m_row_starts[i + 1]; ++k) {
197 entries.insert(std::make_pair(m_cols[k], m_values[k]));
200 for (
auto e = entries.begin(); e != entries.end(); ++e, ++k) {
201 m_cols[k] = e->first;
202 m_values[k] = e->second;
208 for (Integer i = 0; i < m_local_size; ++i) {
209 const Integer row_capacity = m_row_starts[i + 1] - m_row_starts[i];
210 m_row_sizes[i] = row_capacity;
215 if (m_reset_flag != DirectMatrixOptions::eNoReset)
224 void DirectMatrixBuilder::addData(
225 const Integer iIndex,
const Integer jIndex,
const Real value)
228 ALIEN_ASSERT((m_allocated), (
"Not allocated matrix"));
231 if (iIndex == -1 or jIndex == -1)
233 const Integer local_row = iIndex - m_local_offset;
234#ifdef CHECKPROFILE_ON_FILLING
235 if (local_row < 0 or local_row >= m_local_size)
236 throw FatalErrorException(
"Cannot add data on undefined row");
238 if (jIndex < -1 or jIndex >= m_col_global_size)
239 throw FatalErrorException(
"column index undefined");
240 const Integer row_start = m_row_starts[local_row];
241 Integer& row_size = m_row_sizes[local_row];
242 Integer row_capacity = m_row_starts[local_row + 1] - row_start;
244 Real* found_value = intrusive_vmap_insert(jIndex, hint_pos, row_size, row_capacity,
245 m_cols.unguardedBasePointer() + row_start,
246 m_values.unguardedBasePointer() + row_start);
248 *found_value += value;
250 m_extras[local_row][jIndex] += value;
256 void DirectMatrixBuilder::addData(
const Integer iIndex,
const Real factor,
257 ConstArrayView<Integer> jIndexes, ConstArrayView<Real> jValues)
260 ALIEN_ASSERT((m_allocated), (
"Not allocated matrix"));
261 ALIEN_ASSERT((jIndexes.size() == jValues.size()),
262 (
"Inconsistent sizes: %d vs %d", jIndexes.size(), jValues.size()));
266 const Integer local_row = iIndex - m_local_offset;
267#ifdef CHECKPROFILE_ON_FILLING
268 if (local_row < 0 or local_row >= m_local_size)
269 throw FatalErrorException(
"Cannot add data on undefined row");
271 const Integer row_start = m_row_starts[local_row];
272 Integer& row_size = m_row_sizes[local_row];
273 Integer row_capacity = m_row_starts[local_row + 1] - row_start;
274 ColValueData* local_extras =
nullptr;
276 for (Integer i = 0, n = jIndexes.size(); i < n; ++i) {
277 const Integer jIndex = jIndexes[i];
280 if (jIndex < -1 or jIndex >= m_col_global_size)
281 throw FatalErrorException(
"column index undefined");
283 Real* found_value = intrusive_vmap_insert(jIndex, hint_pos, row_size, row_capacity,
284 m_cols.unguardedBasePointer() + row_start,
285 m_values.unguardedBasePointer() + row_start);
287 *found_value += factor * jValues[i];
290 if (local_extras ==
nullptr)
291 local_extras = &m_extras[local_row];
292 (*local_extras)[jIndex] += factor * jValues[i];
300 void DirectMatrixBuilder::setData(
301 const Integer iIndex,
const Integer jIndex,
const Real value)
304 ALIEN_ASSERT((m_allocated), (
"Not allocated matrix"));
307 if (iIndex == -1 or jIndex == -1)
309 const Integer local_row = iIndex - m_local_offset;
310#ifdef CHECKPROFILE_ON_FILLING
311 if (local_row < 0 or local_row >= m_local_size)
312 throw FatalErrorException(
"Cannot add data on undefined row");
314 if (jIndex < -1 or jIndex >= m_col_global_size)
315 throw FatalErrorException(
"column index undefined");
316 const Integer row_start = m_row_starts[local_row];
317 Integer& row_size = m_row_sizes[local_row];
318 Integer row_capacity = m_row_starts[local_row + 1] - row_start;
320 Real* found_value = intrusive_vmap_insert(jIndex, hint_pos, row_size, row_capacity,
321 m_cols.unguardedBasePointer() + row_start,
322 m_values.unguardedBasePointer() + row_start);
324 *found_value = value;
326 m_extras[local_row][jIndex] = value;
332 void DirectMatrixBuilder::setData(
const Integer iIndex,
const Real factor,
333 ConstArrayView<Integer> jIndexes, ConstArrayView<Real> jValues)
336 ALIEN_ASSERT((m_allocated), (
"Not allocated matrix"));
337 ALIEN_ASSERT((jIndexes.size() == jValues.size()),
338 (
"Inconsistent sizes: %d vs %d", jIndexes.size(), jValues.size()));
342 const Integer local_row = iIndex - m_local_offset;
343#ifdef CHECKPROFILE_ON_FILLING
344 if (local_row < 0 or local_row >= m_local_size)
345 throw FatalErrorException(
"Cannot add data on undefined row");
347 const Integer row_start = m_row_starts[local_row];
348 Integer& row_size = m_row_sizes[local_row];
349 Integer row_capacity = m_row_starts[local_row + 1] - row_start;
350 ColValueData* local_extras =
nullptr;
352 for (Integer i = 0, n = jIndexes.size(); i < n; ++i) {
353 const Integer jIndex = jIndexes[i];
356 if (jIndex < -1 or jIndex >= m_col_global_size)
357 throw FatalErrorException(
"column index undefined");
359 Real* found_value = intrusive_vmap_insert(jIndex, hint_pos, row_size, row_capacity,
360 m_cols.unguardedBasePointer() + row_start,
361 m_values.unguardedBasePointer() + row_start);
363 *found_value = factor * jValues[i];
366 if (local_extras ==
nullptr)
367 local_extras = &m_extras[local_row];
368 (*local_extras)[jIndex] = factor * jValues[i];
376 void DirectMatrixBuilder::finalize()
381 m_matrix.impl()->unlock();
393 Finder(ConstArrayView<Integer> indexes,
const Integer offset)
395 for (Integer i = 0, is = indexes.size(); i < is; ++i)
396 m_index_set.insert(indexes[i] - offset);
398 bool operator()(
const Integer index)
const
400 return m_index_set.find(index) != m_index_set.end();
404 std::set<Integer> m_index_set;
408 IndexEnumerator(ConstArrayView<Integer> indexes,
const Integer offset)
413 [[nodiscard]]
bool end()
const {
return m_i >= m_indexes.size(); }
414 void operator++() { ++m_i; }
415 Integer operator*()
const {
return m_indexes[m_i] - m_offset; }
416 [[nodiscard]] Integer size()
const {
return m_indexes.size(); }
417 [[nodiscard]] Finder finder()
const {
return Finder(m_indexes, m_offset); }
421 const Integer m_offset;
422 const ConstArrayView<Integer> m_indexes;
423 std::set<Integer> m_index_set;
434 explicit Finder(
const Integer size)
437 bool operator()(
const Integer index)
const {
return index >= 0 and index < m_size; }
440 const Integer m_size;
444 explicit FullEnumerator(
const Integer size)
448 [[nodiscard]]
bool end()
const {
return m_i >= m_size; }
449 void operator++() { ++m_i; }
450 Integer operator*()
const {
return m_i; }
451 [[nodiscard]] Integer size()
const {
return m_size; }
452 [[nodiscard]] Finder finder()
const {
return Finder(m_size); }
456 const Integer m_size;
461 String DirectMatrixBuilder::stats()
const
463 std::ostringstream oss;
464 _stats(oss, FullEnumerator(m_local_size));
465 return String(oss.str());
470 String DirectMatrixBuilder::stats(ConstArrayView<Integer> ids)
const
472 std::ostringstream oss;
473 _stats(oss, IndexEnumerator(ids, m_local_offset));
474 return String(oss.str());
479 void DirectMatrixBuilder::squeeze()
481 bool need_squeeze =
false;
483 Integer total_size = 0, total_capacity = 0;
484 for (Integer i = 0; i < m_local_size; ++i) {
485 const Integer row_size = m_row_sizes[i];
486 const Integer row_capacity = m_row_starts[i + 1] - m_row_starts[i];
487 total_size += row_size;
488 total_capacity += row_capacity;
489 need_squeeze |= (row_size != row_capacity);
491 need_squeeze |= (!m_extras.empty());
492 for (
auto i = m_extras.begin(); i != m_extras.end(); ++i)
493 total_size += i->second.size();
497 need_squeeze = Arccore::MessagePassing::mpAllReduce(
498 m_parallel_mng, Arccore::MessagePassing::ReduceMax, need_squeeze);
504 UniqueArray<Integer> row_starts(m_local_size + 1);
505 UniqueArray<Integer> cols(total_size);
506 UniqueArray<Real> values(total_size);
509 for (Integer i = 0; i < m_local_size; ++i) {
510 auto ifinder = m_extras.find(i);
511 if (ifinder == m_extras.end()) {
512 const Integer row_start_orig = m_row_starts[i];
513 const Integer row_start = row_starts[i] = offset;
514 const Integer row_size = m_row_sizes[i];
515 for (Integer j = 0; j < row_size; ++j) {
516 cols[row_start + j] = m_cols[row_start_orig + j];
517 values[row_start + j] = m_values[row_start_orig + j];
522 const ColValueData& extra_col_value_data = ifinder->second;
523 const Integer row_start_orig = m_row_starts[i];
524 const Integer row_start = row_starts[i] = offset;
525 const Integer row_size = m_row_sizes[i];
527 Integer pos = row_start, j = 0;
528 ColValueData::const_iterator extra_j_iter = extra_col_value_data.begin();
532 while (extra_j_iter != extra_col_value_data.end()) {
533 cols[pos] = KEY_OF(extra_j_iter);
534 values[pos] = VALUE_OF(extra_j_iter);
540 if (extra_j_iter == extra_col_value_data.end()) {
541 while (j < row_size) {
542 cols[pos] = m_cols[row_start_orig + j];
543 values[pos] = m_values[row_start_orig + j];
551 if (m_cols[row_start_orig + j] < KEY_OF(extra_j_iter)) {
552 cols[pos] = m_cols[row_start_orig + j];
553 values[pos] = m_values[row_start_orig + j];
558 cols[pos] = KEY_OF(extra_j_iter);
559 values[pos] = VALUE_OF(extra_j_iter);
564 offset += row_size + extra_col_value_data.size();
567 row_starts[m_local_size] = offset;
568 ALIEN_ASSERT((offset == total_size), (
"Inconsistent total size"));
570 updateProfile(row_starts, cols, values);
575 void DirectMatrixBuilder::computeProfile(
const ConstArrayView<Integer> sizes)
577 UniqueArray<Integer> m_offset;
578 m_offset.resize(m_nproc + 1);
579 if (m_parallel_mng) {
580 Arccore::MessagePassing::mpAllGather(m_parallel_mng,
581 ConstArrayView<Integer>(1, &m_local_offset), m_offset.subView(0, m_nproc));
583 m_offset[m_nproc] = m_global_size;
585 SimpleCSRInternal::CSRStructInfo& profile = m_matrix_impl->internal()->getCSRProfile();
586 profile.init(m_local_size);
588 m_row_starts = profile.getRowOffset();
591 for (Integer i = 0; i < m_local_size; ++i) {
592 m_row_starts[i] = offset;
595 m_row_starts[m_local_size] = offset;
598 profile.setTimestamp(m_matrix_impl->timestamp() + 1);
600 m_cols = profile.getCols();
601 profile.getColOrdering() = SimpleCSRInternal::CSRStructInfo::eUndef;
603 m_matrix_impl->allocate();
604 m_values = m_matrix_impl->internal()->getValues();
614 void DirectMatrixBuilder::updateProfile(UniqueArray<Integer>& row_starts,
615 UniqueArray<Integer>& cols, UniqueArray<Real>& values)
617 SimpleCSRInternal::CSRStructInfo& profile = m_matrix_impl->internal()->getCSRProfile();
618 profile.getRowOffset().copy(row_starts);
619 m_row_starts = profile.getRowOffset();
620 profile.getCols() = cols;
621 m_cols = profile.getCols();
622 profile.getColOrdering() = SimpleCSRInternal::CSRStructInfo::eFull;
624 UniqueArray<Integer> offset;
625 offset.resize(m_nproc + 1);
626 if (m_parallel_mng) {
627 Arccore::MessagePassing::mpAllGather(m_parallel_mng,
628 ConstArrayView<Integer>(1, &m_local_offset), offset.subView(0, m_nproc));
630 offset[m_nproc] = m_global_size;
632 m_matrix_impl->allocate();
633 m_matrix_impl->internal()->getValues() = values;
636 m_matrix_impl->parallelStart(offset, m_parallel_mng,
true);
639 m_matrix_impl->sequentialStart();
640 m_values = m_matrix_impl->internal()->getValues();
645 template <
typename Enumerator>
646 void DirectMatrixBuilder::_stats(std::ostream& o,
const Enumerator& e)
const
648 bool need_squeeze =
false;
649 Integer total_used_data = 0;
650 Integer min_used_data = (m_local_size > 0) ? std::numeric_limits<Integer>::max() : 0,
652 Integer total_reserved_data = 0;
653 Integer min_reserved_data =
654 (m_local_size > 0) ? std::numeric_limits<Integer>::max() : 0,
655 max_reserved_data = 0;
656 for (Enumerator ie = e; !ie.end(); ++ie) {
657 const Integer i = *ie;
658 const Integer data_capacity = m_row_starts[i + 1] - m_row_starts[i];
659 max_reserved_data = std::max(max_reserved_data, data_capacity);
660 min_reserved_data = std::min(min_reserved_data, data_capacity);
661 total_reserved_data += data_capacity;
662 const Integer data_size = m_row_sizes[i];
663 min_used_data = std::min(min_used_data, data_size);
664 max_used_data = std::max(max_used_data, data_size);
665 total_used_data += data_size;
666 need_squeeze |= (data_capacity != data_size);
668 min_used_data = std::min(min_used_data, max_used_data);
669 min_reserved_data = std::min(min_reserved_data, max_reserved_data);
671 Integer total_extra_data = 0;
672 Integer min_extra_data =
673 (!m_extras.empty()) ? std::numeric_limits<Integer>::max() : 0,
675 Integer extra_count = 0;
676 typename Enumerator::Finder finder = e.finder();
677 for (ExtraRows::const_iterator j = m_extras.begin(); j != m_extras.end(); ++j) {
678 if (finder(j->first)) {
680 const Integer data_size = j->second.size();
681 min_extra_data = std::min(min_extra_data, data_size);
682 max_extra_data = std::max(max_extra_data, data_size);
683 total_extra_data += data_size;
686 min_extra_data = std::min(min_extra_data, max_extra_data);
687 need_squeeze |= (extra_count > 0);
689 need_squeeze = Arccore::MessagePassing::mpAllReduce(
690 m_parallel_mng, Arccore::MessagePassing::ReduceMax, need_squeeze);
692 const Integer size = e.size();
693 o <<
"Total used data = " << total_used_data <<
" on " << size
695 <<
"Min / Mean / Max used data = " << min_used_data << std::setprecision(2)
696 <<
" / " << ((size) ? (double(total_used_data) / size) : 0) <<
" / "
697 << max_used_data <<
"\n"
698 <<
"Total extra data = " << total_extra_data <<
" on " << extra_count
700 <<
"Min / Mean / Max extra data = " << min_extra_data << std::setprecision(2)
701 <<
" / " << ((extra_count) ? (double(total_extra_data) / extra_count) : 0) <<
" / "
702 << max_extra_data <<
"\n"
703 <<
"Total reserved data = " << total_reserved_data
704 << std::setprecision(3) <<
" ("
705 << ((total_used_data) ? (100 * double(total_reserved_data) / total_used_data) : 0)
706 <<
"% of used data)\n"
707 <<
"Min / Mean / Max reserved data = " << min_reserved_data << std::setprecision(2)
708 <<
" / " << ((size) ? (double(total_reserved_data) / size) : 0) <<
" / "
709 << max_reserved_data <<
"\n"
710 <<
"Need squeeze optimization = " << std::boolalpha << need_squeeze <<
"\n";
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --