12#ifndef ARCANE_CORE_PARALLEL_BITONICSORT_IMPL_H
13#define ARCANE_CORE_PARALLEL_BITONICSORT_IMPL_H
17#include "arcane/utils/TraceAccessor.h"
18#include "arcane/utils/FatalErrorException.h"
20#include "arcane/core/IParallelMng.h"
21#include "arcane/core/parallel/BitonicSort.h"
35, m_parallel_mng(parallel_mng)
39template <
typename KeyType,
typename KeyTypeTraits> BitonicSort<KeyType, KeyTypeTraits>::
40BitonicSort(IParallelMng* parallel_mng,
const KeyTypeTraits& traits)
41: TraceAccessor(parallel_mng->traceMng())
42, m_parallel_mng(parallel_mng)
50template <
typename KeyType,
typename KeyTypeTraits>
void BitonicSort<KeyType, KeyTypeTraits>::
51_init(ConstArrayView<KeyType> keys)
53 m_init_size = keys.size();
57 m_size = m_parallel_mng->reduce(Parallel::ReduceMax, m_init_size);
59 m_keys.resize(m_size);
60 m_key_ranks.resize(m_size);
61 m_key_indexes.resize(m_size);
63 Int32 rank = m_parallel_mng->commRank();
67 for (Integer i = 0; i < m_init_size; ++i) {
70 m_key_ranks[i] = rank;
75 KeyType max_value = m_traits.maxValue();
76 for (Int64 i = m_init_size; i < m_size; i++) {
77 m_keys[i] = max_value;
78 m_key_indexes[i] = -1;
79 m_key_ranks[i] = rank;
96template <
typename KeyType,
typename KeyTypeTraits>
void
97BitonicSort<KeyType, KeyTypeTraits>::
104 info() <<
"BITONIC_SORT (64bit) want_rank?=" << m_want_index_and_rank
105 <<
" size=" << m_size
106 <<
" memsize=" <<
sizeof(KeyType) * m_size
107 <<
" structsize=" <<
sizeof(KeyType);
115 Int32 nb_rank = m_parallel_mng->commSize();
116 while (nb_rank > test) {
123 for (
Int32 ilevel = 0; ilevel < nb_level; ++ilevel) {
127 for (
Int32 jproc = 0; jproc < nb_rank; jproc += size) {
128 _mergeLevels(jproc, size);
136 for (
Integer i = 0; i < m_size; ++i) {
137 if (!m_traits.isValid(m_keys[i])) {
143 info() <<
"END_BITONIC_SORT nb_merge=" << m_nb_merge;
145 m_keys.resize(m_size);
146 m_key_ranks.resize(m_size);
147 m_key_indexes.resize(m_size);
162template <
typename KeyType,
typename KeyTypeTraits>
void
167 for (
Int32 i = 0; i < size / 2; ++i)
168 _mergeProcessors(begin + i, begin + size - 1 - i);
174 Int32 div_size = size;
175 while (div_size > 1) {
177 for (
Int32 iproc = 0; iproc < size; iproc += div_size)
178 _separator(begin + iproc, div_size);
187template <
typename KeyType,
typename KeyTypeTraits>
void BitonicSort<KeyType, KeyTypeTraits>::
188_separator(Int32 begin, Int32 size)
190 for (Int32 i = 0; i < size / 2; ++i)
191 _mergeProcessors(begin + i, begin + i + size / 2);
210template <
typename KeyType,
typename KeyTypeTraits>
void BitonicSort<KeyType, KeyTypeTraits>::
211_mergeProcessors(Int32 iproc1, Int32 iproc2)
213 if (iproc1 >= iproc2)
214 ARCANE_FATAL(
"Invalid merge iproc1={0} iproc2={1}", iproc1, iproc2);
216 Int32 my_rank = m_parallel_mng->commRank();
218 if (iproc2 >= m_parallel_mng->commSize())
221 bool is_proc2 = (iproc2 == my_rank);
222 bool is_proc1 = (iproc1 == my_rank);
224 if (!is_proc1 && !is_proc2)
227 info() <<
"SORT iproc1=" << iproc1 <<
" iproc2=" << iproc2;
230 const Int64 buf_size = m_size;
231 const bool is_want_index_rank = m_want_index_and_rank;
238 UniqueArray<KeyType> send_keys(m_keys);
239 UniqueArray<Request> requests;
240 requests.add(m_traits.send(m_parallel_mng, iproc1, send_keys));
241 if (is_want_index_rank) {
242 requests.add(m_parallel_mng->send(m_key_indexes, iproc1,
false));
243 requests.add(m_parallel_mng->send(m_key_ranks, iproc1,
false));
246 requests.add(m_traits.recv(m_parallel_mng, iproc1, m_keys));
247 if (is_want_index_rank) {
248 requests.add(m_parallel_mng->recv(m_key_indexes, iproc1,
false));
249 requests.add(m_parallel_mng->recv(m_key_ranks, iproc1,
false));
251 m_parallel_mng->waitAllRequests(requests);
260 UniqueArray<KeyType> buf2(buf_size);
266 if (is_want_index_rank) {
267 buf2_proc.resize(buf_size);
268 buf2_local_id.resize(buf_size);
271 Request recv_request = m_traits.recv(m_parallel_mng, iproc2, buf2);
272 m_parallel_mng->waitAllRequests(ArrayView<Request>(1, &recv_request));
273 if (is_want_index_rank) {
274 m_parallel_mng->recv(buf2_local_id, iproc2);
275 m_parallel_mng->recv(buf2_proc, iproc2);
279 buf2.add(m_traits.maxValue());
282 Int64 total_size = m_size + buf_size;
283 UniqueArray<KeyType> buf_merge(total_size);
284 UniqueArray<Int32> buf_proc_merge;
285 UniqueArray<Int32> buf_local_id_merge;
286 if (is_want_index_rank) {
287 buf_proc_merge.resize(total_size);
288 buf_local_id_merge.resize(total_size);
292 info() <<
"SIZE=" << buf_size <<
" total=" << total_size <<
" m_size=" << m_size
293 <<
" proc2=" << iproc2 <<
" init-size=" << m_init_size;
300 for (Int64 i = 0; i < total_size; ++i) {
301 if (cursor1 >= m_size || (m_traits.compareLess(buf2[cursor2], m_keys[cursor1]) && cursor2 < buf_size)) {
302 buf_merge[i] = buf2[cursor2];
303 if (is_want_index_rank) {
304 buf_local_id_merge[i] = buf2_local_id[cursor2];
305 buf_proc_merge[i] = buf2_proc[cursor2];
310 buf_merge[i] = m_keys[cursor1];
311 if (is_want_index_rank) {
312 buf_local_id_merge[i] = m_key_indexes[cursor1];
313 buf_proc_merge[i] = m_key_ranks[cursor1];
318 buf2.resize(buf_size);
321 for (Integer ii = 0; ii < buf_size; ii++) {
322 buf2[ii] = buf_merge[m_size + ii];
323 if (is_want_index_rank) {
324 buf2_local_id[ii] = buf_local_id_merge[m_size + ii];
325 buf2_proc[ii] = buf_proc_merge[m_size + ii];
328 Request send_request = m_traits.send(m_parallel_mng, iproc2, buf2);
329 m_parallel_mng->waitAllRequests(ArrayView<Request>(1, &send_request));
330 if (is_want_index_rank) {
331 m_parallel_mng->send(buf2_local_id, iproc2);
332 m_parallel_mng->send(buf2_proc, iproc2);
336 for (Integer i = 0; i < buf_size; i++) {
337 m_keys[i] = buf_merge[i];
338 if (is_want_index_rank) {
339 m_key_indexes[i] = buf_local_id_merge[i];
340 m_key_ranks[i] = buf_proc_merge[i];
354template <
typename KeyType,
typename KeyTypeTraits>
355void BitonicSort<KeyType, KeyTypeTraits>::
363 Int64 l = m_size / 2;
364 Int64 ir = m_size - 1;
368 Integer tmp_local_id = NULL_ITEM_LOCAL_ID;
369 Integer tmp_proc = A_NULL_RANK;
375 tmp_local_id = m_key_indexes[l];
376 tmp_proc = m_key_ranks[l];
380 tmp_local_id = m_key_indexes[ir];
381 tmp_proc = m_key_ranks[ir];
383 m_keys[ir] = m_keys[0];
384 m_key_indexes[ir] = m_key_indexes[0];
385 m_key_ranks[ir] = m_key_ranks[0];
389 m_key_indexes[0] = tmp_local_id;
390 m_key_ranks[0] = tmp_proc;
397 if (j < ir && m_traits.compareLess(m_keys[j], m_keys[j + 1]))
399 if (m_traits.compareLess(rra, m_keys[j])) {
400 m_keys[i] = m_keys[j];
401 m_key_indexes[i] = m_key_indexes[j];
402 m_key_ranks[i] = m_key_ranks[j];
411 m_key_indexes[i] = tmp_local_id;
412 m_key_ranks[i] = tmp_proc;
#define ARCANE_FATAL(...)
Macro envoyant une exception FatalErrorException.
void resize(Int64 s)
Change le nombre d'éléments du tableau à s.
Vue constante d'un tableau de type T.
Interface du gestionnaire de parallélisme pour un sous-domaine.
Algorithme de tri bi-tonique parallèle.
ConstArrayView< KeyType > keys() const override
Après un tri, retourne la liste des éléments de ce rang.
Classe d'accès aux traces.
TraceMessage info() const
Flot pour un message d'information.
Implémentation de la concurrence.
Int32 Integer
Type représentant un entier.
UniqueArray< Int32 > Int32UniqueArray
Tableau dynamique à une dimension d'entiers 32 bits.
std::int32_t Int32
Type entier signé sur 32 bits.