Arcane  v4.1.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
ReduceMemoryImpl.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* ReduceMemoryImpl.cc (C) 2000-2025 */
9/* */
10/* Gestion de la mémoire pour les réductions. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/core/internal/ReduceMemoryImpl.h"
15
16#include "arccore/base/CheckedConvert.h"
17#include "arccore/base/PlatformUtils.h"
18
19#include "arccore/common/MemoryUtils.h"
20
21#include "arcane/accelerator/core/Runner.h"
22#include "arcane/accelerator/core/Memory.h"
23#include "arcane/accelerator/core/internal/IRunQueueStream.h"
24#include "arcane/accelerator/core/internal/RunCommandImpl.h"
25#include "arcane/accelerator/core/internal/RunnerImpl.h"
26
27/*---------------------------------------------------------------------------*/
28/*---------------------------------------------------------------------------*/
29
30namespace Arcane::Accelerator::impl
31{
32namespace
33{
34 IMemoryAllocator* _getAllocator(eMemoryRessource r)
35 {
37 }
38} // namespace
39
40/*---------------------------------------------------------------------------*/
41/*---------------------------------------------------------------------------*/
42
43ReduceMemoryImpl::
44ReduceMemoryImpl(RunCommandImpl* p)
45: m_command(p)
46, m_device_memory_bytes(_getAllocator(eMemoryRessource::Device))
47, m_host_memory_bytes(_getAllocator(eMemoryRessource::HostPinned))
48, m_grid_buffer(_getAllocator(eMemoryRessource::Device))
49, m_grid_device_count(_getAllocator(eMemoryRessource::Device))
50{
51 _allocateMemoryForReduceData(128);
52 _allocateMemoryForGridDeviceCount();
53 m_grid_memory_info.m_warp_size = p->runner()->deviceInfo().warpSize();
54}
55
56/*---------------------------------------------------------------------------*/
57/*---------------------------------------------------------------------------*/
58
59void ReduceMemoryImpl::
60release()
61{
62 m_command->releaseReduceMemoryImpl(this);
63}
64
65/*---------------------------------------------------------------------------*/
66/*---------------------------------------------------------------------------*/
67
68void* ReduceMemoryImpl::
69allocateReduceDataMemory(ConstMemoryView identity_view)
70{
71 auto identity_span = identity_view.bytes();
72 Int32 data_type_size = static_cast<Int32>(identity_span.size());
73 m_data_type_size = data_type_size;
74 if (data_type_size > m_size)
75 _allocateMemoryForReduceData(data_type_size);
76
77 // Recopie \a identity_view dans un buffer car on utilise l'asynchronisme
78 // et la zone pointée par \a identity_view n'est pas forcément conservée
79 m_identity_buffer.copy(identity_view.bytes());
80 MemoryCopyArgs copy_args(m_device_memory, m_identity_buffer.span().data(), data_type_size);
81 m_command->internalStream()->copyMemory(copy_args.addAsync());
82
83 return m_device_memory;
84}
85
86/*---------------------------------------------------------------------------*/
87/*---------------------------------------------------------------------------*/
88
89void ReduceMemoryImpl::
90_allocateGridDataMemory()
91{
92 // TODO: pouvoir utiliser un padding pour éviter que les lignes de cache
93 // entre les blocs se chevauchent
94 Int32 total_size = CheckedConvert::toInt32(m_data_type_size * m_grid_size);
95 if (total_size <= m_grid_memory_info.m_grid_memory_values.bytes().size())
96 return;
97
98 m_grid_buffer.resize(total_size);
99
100 auto mem_view = makeMutableMemoryView(m_grid_buffer.span());
101 m_grid_memory_info.m_grid_memory_values = mem_view;
102}
103
104/*---------------------------------------------------------------------------*/
105/*---------------------------------------------------------------------------*/
106
107void ReduceMemoryImpl::
108_allocateMemoryForGridDeviceCount()
109{
110 // Alloue sur le device la mémoire contenant le nombre de blocs restant à traiter
111 // Il s'agit d'un seul entier non signé.
112 Int64 size = sizeof(unsigned int);
113 const unsigned int zero = 0;
114 m_grid_device_count.resize(1);
115 auto* ptr = m_grid_device_count.data();
116
117 m_grid_memory_info.m_grid_device_count = ptr;
118
119 // Initialise cette zone mémoire avec 0.
120 MemoryCopyArgs copy_args(ptr, &zero, size);
121 m_command->internalStream()->copyMemory(copy_args);
122}
123
124/*---------------------------------------------------------------------------*/
125/*---------------------------------------------------------------------------*/
126
127void ReduceMemoryImpl::
128copyReduceValueFromDevice()
129{
130 void* destination = m_grid_memory_info.m_host_memory_for_reduced_value;
131 void* source = m_device_memory;
132 MemoryCopyArgs copy_args(destination, source, m_data_type_size);
133 m_command->internalStream()->copyMemory(copy_args);
134}
135
136/*---------------------------------------------------------------------------*/
137/*---------------------------------------------------------------------------*/
138
139extern "C++" IReduceMemoryImpl*
140internalGetOrCreateReduceMemoryImpl(RunCommand* command)
141{
142 return command->m_p->getOrCreateReduceMemoryImpl();
143}
144
145/*---------------------------------------------------------------------------*/
146/*---------------------------------------------------------------------------*/
147
148} // namespace Arcane::Accelerator::impl
149
150/*---------------------------------------------------------------------------*/
151/*---------------------------------------------------------------------------*/
Arguments pour la copie mémoire.
Definition Memory.h:61
Gestion d'une commande sur accélérateur.
Interface de la gestion mémoire pour les réductions.
std::byte * m_device_memory
Pointeur vers la mémoire unifiée contenant la donnée réduite.
Int64 m_data_type_size
Taille de la donnée actuelle.
UniqueArray< std::byte > m_identity_buffer
Buffer pour conserver la valeur de l'identité
Int64 m_size
Taille allouée pour m_device_memory.
Implémentation d'une commande pour accélérateur.
Vue constante sur une zone mémoire contigue contenant des éléments de taille fixe.
constexpr SpanType bytes() const
Vue sous forme d'octets.
ARCCORE_COMMON_EXPORT IMemoryAllocator * getAllocator(eMemoryResource mem_resource)
Allocateur par défaut pour la ressource mem_resource.
std::int64_t Int64
Type entier signé sur 64 bits.
MutableMemoryView makeMutableMemoryView(void *ptr, Int32 datatype_size, Int64 nb_element)
Créé une vue mémoire modifiable.
Definition MemoryView.cc:26
Arcane::eMemoryResource eMemoryRessource
Typedef pour la version Arcane historique (avec 2's')
@ HostPinned
Alloue sur l'hôte.
std::int32_t Int32
Type entier signé sur 32 bits.