Arcane  v3.16.9.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
ReduceMemoryImpl.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* ReduceMemoryImpl.cc (C) 2000-2025 */
9/* */
10/* Gestion de la mémoire pour les réductions. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arcane/accelerator/core/internal/ReduceMemoryImpl.h"
15
16#include "arcane/utils/CheckedConvert.h"
17#include "arcane/utils/PlatformUtils.h"
19
20#include "arcane/accelerator/core/Runner.h"
21#include "arcane/accelerator/core/Memory.h"
22#include "arcane/accelerator/core/internal/IRunQueueStream.h"
23#include "arcane/accelerator/core/internal/RunCommandImpl.h"
24#include "arcane/accelerator/core/internal/RunnerImpl.h"
25
26/*---------------------------------------------------------------------------*/
27/*---------------------------------------------------------------------------*/
28
29namespace Arcane::Accelerator::impl
30{
31namespace
32{
33 IMemoryAllocator* _getAllocator(eMemoryRessource r)
34 {
36 }
37} // namespace
38
39/*---------------------------------------------------------------------------*/
40/*---------------------------------------------------------------------------*/
41
42ReduceMemoryImpl::
43ReduceMemoryImpl(RunCommandImpl* p)
44: m_command(p)
45, m_device_memory_bytes(_getAllocator(eMemoryRessource::Device))
46, m_host_memory_bytes(_getAllocator(eMemoryRessource::HostPinned))
47, m_grid_buffer(_getAllocator(eMemoryRessource::Device))
48, m_grid_device_count(_getAllocator(eMemoryRessource::Device))
49{
50 _allocateMemoryForReduceData(128);
51 _allocateMemoryForGridDeviceCount();
52 m_grid_memory_info.m_warp_size = p->runner()->deviceInfo().warpSize();
53}
54
55/*---------------------------------------------------------------------------*/
56/*---------------------------------------------------------------------------*/
57
58void ReduceMemoryImpl::
59release()
60{
61 m_command->releaseReduceMemoryImpl(this);
62}
63
64/*---------------------------------------------------------------------------*/
65/*---------------------------------------------------------------------------*/
66
67void* ReduceMemoryImpl::
68allocateReduceDataMemory(ConstMemoryView identity_view)
69{
70 auto identity_span = identity_view.bytes();
71 Int32 data_type_size = static_cast<Int32>(identity_span.size());
72 m_data_type_size = data_type_size;
73 if (data_type_size > m_size)
74 _allocateMemoryForReduceData(data_type_size);
75
76 // Recopie \a identity_view dans un buffer car on utilise l'asynchronisme
77 // et la zone pointée par \a identity_view n'est pas forcément conservée
78 m_identity_buffer.copy(identity_view.bytes());
79 MemoryCopyArgs copy_args(m_device_memory, m_identity_buffer.span().data(), data_type_size);
80 m_command->internalStream()->copyMemory(copy_args.addAsync());
81
82 return m_device_memory;
83}
84
85/*---------------------------------------------------------------------------*/
86/*---------------------------------------------------------------------------*/
87
88void ReduceMemoryImpl::
89_allocateGridDataMemory()
90{
91 // TODO: pouvoir utiliser un padding pour éviter que les lignes de cache
92 // entre les blocs se chevauchent
93 Int32 total_size = CheckedConvert::toInt32(m_data_type_size * m_grid_size);
94 if (total_size <= m_grid_memory_info.m_grid_memory_values.bytes().size())
95 return;
96
97 m_grid_buffer.resize(total_size);
98
99 auto mem_view = makeMutableMemoryView(m_grid_buffer.span());
100 m_grid_memory_info.m_grid_memory_values = mem_view;
101}
102
103/*---------------------------------------------------------------------------*/
104/*---------------------------------------------------------------------------*/
105
106void ReduceMemoryImpl::
107_allocateMemoryForGridDeviceCount()
108{
109 // Alloue sur le device la mémoire contenant le nombre de blocs restant à traiter
110 // Il s'agit d'un seul entier non signé.
111 Int64 size = sizeof(unsigned int);
112 const unsigned int zero = 0;
113 m_grid_device_count.resize(1);
114 auto* ptr = m_grid_device_count.data();
115
116 m_grid_memory_info.m_grid_device_count = ptr;
117
118 // Initialise cette zone mémoire avec 0.
119 MemoryCopyArgs copy_args(ptr, &zero, size);
120 m_command->internalStream()->copyMemory(copy_args);
121}
122
123/*---------------------------------------------------------------------------*/
124/*---------------------------------------------------------------------------*/
125
126void ReduceMemoryImpl::
127copyReduceValueFromDevice()
128{
129 void* destination = m_grid_memory_info.m_host_memory_for_reduced_value;
130 void* source = m_device_memory;
131 MemoryCopyArgs copy_args(destination, source, m_data_type_size);
132 m_command->internalStream()->copyMemory(copy_args);
133}
134
135/*---------------------------------------------------------------------------*/
136/*---------------------------------------------------------------------------*/
137
138extern "C++" IReduceMemoryImpl*
139internalGetOrCreateReduceMemoryImpl(RunCommand* command)
140{
141 return command->m_p->getOrCreateReduceMemoryImpl();
142}
143
144/*---------------------------------------------------------------------------*/
145/*---------------------------------------------------------------------------*/
146
147} // namespace Arcane::Accelerator::impl
148
149/*---------------------------------------------------------------------------*/
150/*---------------------------------------------------------------------------*/
Fonctions de gestion mémoire et des allocateurs.
Arguments pour la copie mémoire.
Definition Memory.h:63
Gestion d'une commande sur accélérateur.
Interface de la gestion mémoire pour les réductions.
std::byte * m_device_memory
Pointeur vers la mémoire unifiée contenant la donnée réduite.
Int64 m_data_type_size
Taille de la donnée actuelle.
UniqueArray< std::byte > m_identity_buffer
Buffer pour conserver la valeur de l'identité
Int64 m_size
Taille allouée pour m_device_memory.
Implémentation d'une commande pour accélérateur.
Vue constante sur une zone mémoire contigue contenant des éléments de taille fixe.
Definition MemoryView.h:38
constexpr SpanType bytes() const
Vue sous forme d'octets.
Definition MemoryView.h:107
Int32 toInt32(Int64 v)
Converti un Int64 en un Int32.
IMemoryAllocator * getAllocator(eMemoryResource mem_resource)
Allocateur par défaut pour la ressource mem_resource.
MutableMemoryView makeMutableMemoryView(void *ptr, Int32 datatype_size, Int64 nb_element)
Créé une vue mémoire modifiable.
std::int64_t Int64
Type entier signé sur 64 bits.
Arcane::eMemoryResource eMemoryRessource
Typedef pour la version Arcane historique (avec 2's')
@ HostPinned
Alloue sur l'hôte.
std::int32_t Int32
Type entier signé sur 32 bits.