d4/dc1/SyclAcceleratorRuntime_8cc_source.html

// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-

//-----------------------------------------------------------------------------

// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)

// See the top-level COPYRIGHT file for details.

// SPDX-License-Identifier: Apache-2.0

//-----------------------------------------------------------------------------

/*---------------------------------------------------------------------------*/

/* SyclAcceleratorRuntime.cc                                   (C) 2000-2026 */

/*                                                                           */

/* Runtime for 'SYCL'.                                                       */

/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


#include "arccore/accelerator_native/SyclAccelerator.h"


#include "arccore/base/FatalErrorException.h"

#include "arccore/base/NotImplementedException.h"

#include "arccore/base/NotSupportedException.h"


#include "arccore/common/AlignedMemoryAllocator.h"

#include "arccore/common/AllocatedMemoryInfo.h"

#include "arccore/common/internal/MemoryUtilsInternal.h"

#include "arccore/common/internal/IMemoryResourceMngInternal.h"


#include "arccore/common/accelerator/RunQueueBuildInfo.h"

#include "arccore/common/accelerator/Memory.h"

#include "arccore/common/accelerator/DeviceInfoList.h"

#include "arccore/common/accelerator/KernelLaunchArgs.h"

#include "arccore/common/accelerator/RunQueue.h"

#include "arccore/common/accelerator/DeviceMemoryInfo.h"

#include "arccore/common/accelerator/NativeStream.h"

#include "arccore/common/accelerator/internal/IRunnerRuntime.h"

#include "arccore/common/accelerator/internal/RegisterRuntimeInfo.h"

#include "arccore/common/accelerator/internal/RunCommandImpl.h"

#include "arccore/common/accelerator/internal/IRunQueueStream.h"

#include "arccore/common/accelerator/internal/IRunQueueEventImpl.h"


namespace Arcane::Accelerator::Sycl

{

using Arcane::Accelerator::Impl::KernelLaunchArgs;


#define ARCCORE_SYCL_FUNC_NOT_HANDLED \

  std::cout << "WARNING: SYCL: function not handled " << A_FUNCINFO << "\n"


class SyclRunnerRuntime;


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


// This file is used for allocations.

// It must therefore always exist because we do not know when

// the last deallocation will occur.

sycl::queue global_default_queue;

namespace

{

  sycl::queue& _defaultQueue()

  {

    return global_default_queue;

  }

} // namespace


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class SyclMemoryAllocatorBase

: public AlignedMemoryAllocator

{

 public:


  SyclMemoryAllocatorBase()

  : AlignedMemoryAllocator(128)

  {}


  bool hasRealloc(MemoryAllocationArgs) const override { return true; }


  AllocatedMemoryInfo allocate(MemoryAllocationArgs args, Int64 new_size) override

  {

    sycl::queue& q = _defaultQueue();

    void* out = nullptr;

    _allocate(&out, new_size, args, q);

    if (!out)

      ARCCORE_FATAL("Can not allocate memory size={0}", new_size);

    Int64 a = reinterpret_cast<Int64>(out);

    if ((a % 128) != 0)

      ARCCORE_FATAL("Bad alignment for SYCL allocator: offset={0}", (a % 128));

    return { out, new_size };

  }


  AllocatedMemoryInfo reallocate(MemoryAllocationArgs args, AllocatedMemoryInfo current_ptr, Int64 new_size) override

  {

    sycl::queue& q = _defaultQueue();

    AllocatedMemoryInfo a = allocate(args, new_size);

    q.submit([&](sycl::handler& cgh) {

      cgh.memcpy(a.baseAddress(), current_ptr.baseAddress(), current_ptr.size());

    });

    q.wait();


    deallocate(args, current_ptr);

    return a;

  }


  void deallocate(MemoryAllocationArgs args, AllocatedMemoryInfo ptr) override

  {

    sycl::queue& q = _defaultQueue();

    _deallocate(ptr.baseAddress(), args, q);

  }


 protected:


  virtual void _allocate(void** ptr, size_t new_size, MemoryAllocationArgs, sycl::queue& q) = 0;

  virtual void _deallocate(void* ptr, MemoryAllocationArgs, sycl::queue& q) = 0;

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class UnifiedMemorySyclMemoryAllocator

: public SyclMemoryAllocatorBase

{

 protected:


  void _allocate(void** ptr, size_t new_size, MemoryAllocationArgs, sycl::queue& q) override

  {

    *ptr = sycl::malloc_shared(new_size, q);

  }

  void _deallocate(void* ptr, MemoryAllocationArgs, sycl::queue& q) override

  {

    sycl::free(ptr, q);

  }

  eMemoryResource memoryResource() const override { return eMemoryResource::UnifiedMemory; }

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class HostPinnedSyclMemoryAllocator

: public SyclMemoryAllocatorBase

{

 protected:


  void _allocate(void** ptr, size_t new_size, MemoryAllocationArgs, sycl::queue& q) override

  {

    // TODO: Make host-pinned

    *ptr = sycl::malloc_host(new_size, q);

  }

  void _deallocate(void* ptr, MemoryAllocationArgs, sycl::queue& q) override

  {

    sycl::free(ptr, q);

  }

  eMemoryResource memoryResource() const override { return eMemoryResource::HostPinned; }

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class DeviceSyclMemoryAllocator

: public SyclMemoryAllocatorBase

{

 protected:


  void _allocate(void** ptr, size_t new_size, MemoryAllocationArgs, sycl::queue& q) override

  {

    *ptr = sycl::malloc_device(new_size, q);

  }

  void _deallocate(void* ptr, MemoryAllocationArgs, sycl::queue& q) override

  {

    sycl::free(ptr, q);

  }

  eMemoryResource memoryResource() const override { return eMemoryResource::Device; }

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


namespace

{

  UnifiedMemorySyclMemoryAllocator unified_memory_sycl_memory_allocator;

  HostPinnedSyclMemoryAllocator host_pinned_sycl_memory_allocator;

  DeviceSyclMemoryAllocator device_sycl_memory_allocator;

} // namespace


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class SyclRunQueueStream

: public Impl::IRunQueueStream

{

 public:


  SyclRunQueueStream(SyclRunnerRuntime* runtime, const RunQueueBuildInfo& bi);

  ~SyclRunQueueStream() override

  {

  }


 public:


  void notifyBeginLaunchKernel([[maybe_unused]] Impl::RunCommandImpl& c) override

  {

    return m_runtime->notifyBeginLaunchKernel();

  }


  void notifyEndLaunchKernel(Impl::RunCommandImpl&) override

  {

    return m_runtime->notifyEndLaunchKernel();

  }


  void barrier() override

  {

    m_sycl_stream->wait_and_throw();

  }


  bool _barrierNoException() override

  {

    m_sycl_stream->wait();

    return false;

  }


  void copyMemory(const MemoryCopyArgs& args) override

  {

    auto source_bytes = args.source().bytes();

    m_sycl_stream->memcpy(args.destination().data(), source_bytes.data(),

                          source_bytes.size());

    if (!args.isAsync())

      this->barrier();

  }


  void prefetchMemory([[maybe_unused]] const MemoryPrefetchArgs& args) override

  {

    auto source_bytes = args.source().bytes();

    Int64 nb_byte = source_bytes.size();

    if (nb_byte == 0)

      return;

    m_sycl_stream->prefetch(source_bytes.data(), nb_byte);

    if (!args.isAsync())

      this->barrier();

  }


  Impl::NativeStream nativeStream() override

  {

    return Impl::NativeStream(m_sycl_stream.get());

  }


  void _setSyclLastCommandEvent([[maybe_unused]] void* sycl_event_ptr) override

  {

    sycl::event last_event;

    if (sycl_event_ptr)

      last_event = *(reinterpret_cast<sycl::event*>(sycl_event_ptr));

    m_last_command_event = last_event;

  }


 public:


  static sycl::async_handler _getAsyncHandler()

  {

    auto err_handler = [](const sycl::exception_list& exceptions) {

      std::ostringstream ostr;

      ostr << "Error in SYCL runtime\n";

      for (const std::exception_ptr& e : exceptions) {

        try {

          std::rethrow_exception(e);

        }

        catch (const sycl::exception& e) {

          ostr << "SYCL exception: " << e.what() << "\n";

        }

      }

      ARCCORE_FATAL(ostr.str());

    };

    return err_handler;

  }


  sycl::event lastCommandEvent() { return m_last_command_event; }


 public:


  sycl::queue& trueStream() const

  {

    return *m_sycl_stream;

  }


 private:


  Impl::IRunnerRuntime* m_runtime;

  std::unique_ptr<sycl::queue> m_sycl_stream;

  sycl::event m_last_command_event;

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class SyclRunQueueEvent

: public Impl::IRunQueueEventImpl

{

 public:


  explicit SyclRunQueueEvent([[maybe_unused]] bool has_timer)

  {

  }

  ~SyclRunQueueEvent() override

  {

  }


 public:


  // Record the event within a RunQueue

  void recordQueue([[maybe_unused]] Impl::IRunQueueStream* stream) final

  {

    ARCCORE_CHECK_POINTER(stream);

    auto* rq = static_cast<SyclRunQueueStream*>(stream);

    m_sycl_event = rq->lastCommandEvent();

#if defined(__ADAPTIVECPP__)

    m_recorded_stream = stream;

    // TODO: Check if anything needs to be done

#elif defined(__INTEL_LLVM_COMPILER)

    //m_sycl_event = rq->trueStream().ext_oneapi_submit_barrier();

#else

    ARCCORE_THROW(NotSupportedException, "Only supported for AdaptiveCpp and Intel DPC++ implementation");

#endif

  }


  void wait() final

  {

    //ARCCORE_SYCL_FUNC_NOT_HANDLED;

    // TODO: Check exactly what this means

    m_sycl_event.wait();

  }


  void waitForEvent([[maybe_unused]] Impl::IRunQueueStream* stream) final

  {

#if defined(__ADAPTIVECPP__)

    auto* rq = static_cast<SyclRunQueueStream*>(stream);

    m_sycl_event.wait(rq->trueStream().get_wait_list());

#elif defined(__INTEL_LLVM_COMPILER)

    std::vector<sycl::event> events;

    events.push_back(m_sycl_event);

    auto* rq = static_cast<SyclRunQueueStream*>(stream);

    rq->trueStream().ext_oneapi_submit_barrier(events);

#else

    ARCCORE_THROW(NotSupportedException, "Only supported for AdaptiveCpp and Intel DPC++ implementation");

#endif

  }


  Int64 elapsedTime([[maybe_unused]] IRunQueueEventImpl* start_event) final

  {

    ARCCORE_CHECK_POINTER(start_event);

    // We must take the start event because we are certain it contains

    // the correct 'sycl::event' value.

    sycl::event event = (static_cast<SyclRunQueueEvent*>(start_event))->m_sycl_event;

    // If there is no associated event, we do nothing to avoid an exception

    if (event == sycl::event())

      return 0;


    bool is_submitted = event.get_info<sycl::info::event::command_execution_status>() == sycl::info::event_command_status::complete;

    if (!is_submitted)

      return 0;

    Int64 start = event.get_profiling_info<sycl::info::event_profiling::command_start>();

    Int64 end = event.get_profiling_info<sycl::info::event_profiling::command_end>();

    return (end - start);

  }


  bool hasPendingWork() final

  {

    ARCCORE_THROW(NotImplementedException, "hasPendingWork()");

  }


 private:


  sycl::event m_sycl_event;

  Impl::IRunQueueStream* m_recorded_stream = nullptr;

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class SyclRunnerRuntime

: public Impl::IRunnerRuntime

{

  friend class SyclRunQueueStream;


 public:


  void notifyBeginLaunchKernel() override

  {

  }

  void notifyEndLaunchKernel() override

  {

  }

  void barrier() override

  {

    // TODO Waiting on the default queue is not strictly equivalent

    // to the CUDA barrier which synchronizes the entire device.

    m_default_queue->wait();

  }

  eExecutionPolicy executionPolicy() const override

  {

    return eExecutionPolicy::SYCL;

  }

  Impl::IRunQueueStream* createStream(const RunQueueBuildInfo& bi) override

  {

    return new SyclRunQueueStream(this, bi);

  }

  Impl::IRunQueueEventImpl* createEventImpl() override

  {

    return new SyclRunQueueEvent(false);

  }

  Impl::IRunQueueEventImpl* createEventImplWithTimer() override

  {

    return new SyclRunQueueEvent(true);

  }

  void setMemoryAdvice([[maybe_unused]] ConstMemoryView buffer, [[maybe_unused]] eMemoryAdvice advice,

                       [[maybe_unused]] DeviceId device_id) override

  {

  }

  void unsetMemoryAdvice([[maybe_unused]] ConstMemoryView buffer,

                         [[maybe_unused]] eMemoryAdvice advice, [[maybe_unused]] DeviceId device_id) override

  {

  }


  void setCurrentDevice([[maybe_unused]] DeviceId device_id) final

  {

    ARCCORE_SYCL_FUNC_NOT_HANDLED;

  }

  const IDeviceInfoList* deviceInfoList() override { return &m_device_info_list; }


  void getPointerAttribute(PointerAttribute& attribute, const void* ptr) override

  {

    sycl::usm::alloc sycl_mem_type = sycl::get_pointer_type(ptr, *m_default_context);

    ePointerMemoryType mem_type = ePointerMemoryType::Unregistered;

    const void* host_ptr = nullptr;

    const void* device_ptr = nullptr;

    if (sycl_mem_type == sycl::usm::alloc::host) {

      // HostPinned. Must be accessible from the device but

      //

      mem_type = ePointerMemoryType::Host;

      host_ptr = ptr;

      // TODO: Look into how to retrieve the value

      device_ptr = ptr;

    }

    else if (sycl_mem_type == sycl::usm::alloc::device) {

      mem_type = ePointerMemoryType::Device;

      device_ptr = ptr;

    }

    else if (sycl_mem_type == sycl::usm::alloc::shared) {

      mem_type = ePointerMemoryType::Managed;

      // TODO: for now we fill it with the pointer because we don't

      // know how to retrieve the info.

      host_ptr = ptr;

      device_ptr = ptr;

    }

    // TODO: to be corrected

    Int32 device_id = 0;

    _fillPointerAttribute(attribute, mem_type, device_id, ptr, device_ptr, host_ptr);

  }


  DeviceMemoryInfo getDeviceMemoryInfo([[maybe_unused]] DeviceId device_id) override

  {

    return {};

  }


  KernelLaunchArgs computeKernalLaunchArgs(const KernelLaunchArgs& orig_args,

                                           const void* kernel_ptr,

                                           Int64 total_loop_size) override

  {

    Int32 shared_memory = orig_args.sharedMemorySize();

    if (orig_args.isCooperative()) {

      // In cooperative mode, ensures that we do not launch more blocks

      // than the maximum that can reside on the GPU.

      // Int32 nb_thread = orig_args.nbThreadPerBlock();

      Int32 nb_block = orig_args.nbBlockPerGrid();

      // With Sycl, there is no way to retrieve the maximum number

      // of active blocks for a given function and number of threads.

      // We assume we can take a maximum of 4 blocks per SM.

      int nb_block_per_sm = 4;

      int max_block = nb_block_per_sm * m_multi_processor_count;

      if (nb_block > max_block) {

        KernelLaunchArgs modified_args(orig_args);

        modified_args.setNbBlockPerGrid(max_block);

        return modified_args;

      }

    }

    return orig_args;

  }


  void fillDevicesAndSetDefaultQueue(bool is_verbose);

  sycl::queue& defaultQueue() const { return *m_default_queue; }

  sycl::device& defaultDevice() const { return *m_default_device; }


  void finalize(ITraceMng*) override

  {

    // Removes the global queue used for allocations.

    global_default_queue = sycl::queue{};

  }


 private:


  Impl::DeviceInfoList m_device_info_list;

  std::unique_ptr<sycl::device> m_default_device;

  std::unique_ptr<sycl::context> m_default_context;

  std::unique_ptr<sycl::queue> m_default_queue;

  int m_multi_processor_count = 0;


 private:


  void _init(sycl::device& device)

  {

    m_default_device = std::make_unique<sycl::device>(device);

    m_default_queue = std::make_unique<sycl::queue>(device);

    m_default_context = std::make_unique<sycl::context>(device);

  }

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


SyclRunQueueStream::

SyclRunQueueStream(SyclRunnerRuntime* runtime, const RunQueueBuildInfo& bi)

: m_runtime(runtime)

{

  sycl::device& d = runtime->defaultDevice();

  // Indicates that the launched commands are implicitly executed one after

  // the other.

  auto queue_property = sycl::property::queue::in_order();

  // For profiling

  auto profiling_property = sycl::property::queue::enable_profiling();

  sycl::property_list queue_properties(queue_property, profiling_property);


  // Error handler.

  sycl::async_handler err_handler;

  err_handler = _getAsyncHandler();

  if (bi.isDefault())

    m_sycl_stream = std::make_unique<sycl::queue>(d, err_handler, queue_properties);

  else {

    ARCCORE_SYCL_FUNC_NOT_HANDLED;

    m_sycl_stream = std::make_unique<sycl::queue>(d, err_handler, queue_properties);

  }

}


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


void SyclRunnerRuntime::

fillDevicesAndSetDefaultQueue(bool is_verbose)

{

  if (is_verbose) {

    for (auto platform : sycl::platform::get_platforms()) {

      std::cout << "Platform: "

                << platform.get_info<sycl::info::platform::name>()

                << std::endl;

    }

  }


  sycl::device device{ sycl::gpu_selector_v };

  if (is_verbose)

    std::cout << "\nDevice: " << device.get_info<sycl::info::device::name>()

              << "\nVersion=" << device.get_info<sycl::info::device::version>()

              << "\nDriverVersion=" << device.get_info<sycl::info::device::driver_version>()

              << "\nMaxComputeUnits=" << device.get_info<sycl::info::device::max_compute_units>()

              << "\nMaxWorkGroupSize=" << device.get_info<sycl::info::device::max_work_group_size>()

              << "\nLocalMemSize=" << device.get_info<sycl::info::device::local_mem_size>()

              << "\nGlobalMemSize=" << device.get_info<sycl::info::device::global_mem_size>()

              << "\nMaxMemAllocSize=" << device.get_info<sycl::info::device::max_mem_alloc_size>()

              << "\n";

  m_multi_processor_count = device.get_info<sycl::info::device::max_compute_units>();

  // For now, we take the first found queue as the default and only

  // consider one accessible device.

  _init(device);


  DeviceInfo device_info;

  device_info.setDescription("No description info");

  device_info.setDeviceId(DeviceId(0));

  device_info.setName(device.get_info<sycl::info::device::name>());

  m_device_info_list.addDevice(device_info);

}


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


class SyclMemoryCopier

: public IMemoryCopier

{

  void copy(ConstMemoryView from, eMemoryResource from_mem,

            MutableMemoryView to, eMemoryResource to_mem,

            const RunQueue* queue) override;

};


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


} // namespace Arcane::Accelerator::Sycl


namespace

{

Arcane::Accelerator::Sycl::SyclRunnerRuntime global_sycl_runtime;

Arcane::Accelerator::Sycl::SyclMemoryCopier global_sycl_memory_copier;

} // namespace


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


namespace Arcane::Accelerator::Sycl

{


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


void SyclMemoryCopier::

copy(ConstMemoryView from, [[maybe_unused]] eMemoryResource from_mem,

     MutableMemoryView to, [[maybe_unused]] eMemoryResource to_mem,

     const RunQueue* queue)

{

  if (queue) {

    queue->copyMemory(MemoryCopyArgs(to.bytes(), from.bytes()).addAsync(queue->isAsync()));

    return;

  }

  sycl::queue& q = global_sycl_runtime.defaultQueue();

  q.memcpy(to.data(), from.data(), from.bytes().size()).wait();

}


} // namespace Arcane::Accelerator::Sycl


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/


// This function is the entry point used during the dynamic loading

// of this library

extern "C" ARCCORE_EXPORT void

arcaneRegisterAcceleratorRuntimesycl(Arcane::Accelerator::RegisterRuntimeInfo& init_info)

{

  using namespace Arcane;

  using namespace Arcane::Accelerator::Sycl;

  Arcane::Accelerator::Impl::setUsingSYCLRuntime(true);

  Arcane::Accelerator::Impl::setSYCLRunQueueRuntime(&global_sycl_runtime);

  MemoryUtils::setAcceleratorHostMemoryAllocator(&unified_memory_sycl_memory_allocator);

  MemoryUtils::setDefaultDataMemoryResource(eMemoryResource::UnifiedMemory);

  IMemoryResourceMngInternal* mrm = MemoryUtils::getDataMemoryResourceMng()->_internal();

  mrm->setIsAccelerator(true);

  mrm->setAllocator(eMemoryResource::UnifiedMemory, &unified_memory_sycl_memory_allocator);

  mrm->setAllocator(eMemoryResource::HostPinned, &host_pinned_sycl_memory_allocator);

  mrm->setAllocator(eMemoryResource::Device, &device_sycl_memory_allocator);

  mrm->setCopier(&global_sycl_memory_copier);

  global_sycl_runtime.fillDevicesAndSetDefaultQueue(init_info.isVerbose());

  global_default_queue = global_sycl_runtime.defaultQueue();

}


/*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*/

ARCCORE_FATAL
#define ARCCORE_FATAL(...)
Macro throwing a FatalErrorException.
Definition ArccoreGlobal.h:552

ARCCORE_THROW
#define ARCCORE_THROW(exception_class,...)
Macro to throw an exception with formatting.
Definition ArccoreGlobal.h:527

ARCCORE_CHECK_POINTER
#define ARCCORE_CHECK_POINTER(ptr)
Macro that returns the pointer ptr if it is not null or throws an exception if it is null.
Definition ArccoreGlobal.h:800

Arcane::Accelerator::DeviceId
Identifier of a system component.
Definition arccore/src/common/arccore/common/accelerator/DeviceId.h:34

Arcane::Accelerator::DeviceMemoryInfo
Accelerator memory information.
Definition arccore/src/common/arccore/common/accelerator/DeviceMemoryInfo.h:32

Arcane::Accelerator::IDeviceInfoList
Interface of a list of devices.
Definition arccore/src/common/arccore/common/accelerator/IDeviceInfoList.h:32

Arcane::Accelerator::Impl::DeviceInfoList
Interface for a list of devices.
Definition arccore/src/common/arccore/common/accelerator/DeviceInfoList.h:36

Arcane::Accelerator::Impl::IRunQueueEventImpl
Interface for event implementation.
Definition IRunQueueEventImpl.h:33

Arcane::Accelerator::Impl::IRunQueueStream
Interface of an execution stream for a RunQueue.
Definition IRunQueueStream.h:33

Arcane::Accelerator::Impl::IRunnerRuntime
Interface of the runtime associated with an accelerator.
Definition IRunnerRuntime.h:36

Arcane::Accelerator::Impl::KernelLaunchArgs
Arguments for launching a kernel.
Definition arccore/src/common/arccore/common/accelerator/KernelLaunchArgs.h:33

Arcane::Accelerator::Impl::KernelLaunchArgs::isCooperative
bool isCooperative() const
Indicates if running in cooperative mode (i.e. cudaLaunchCooperativeKernel).
Definition arccore/src/common/arccore/common/accelerator/KernelLaunchArgs.h:63

Arcane::Accelerator::Impl::KernelLaunchArgs::nbBlockPerGrid
Int32 nbBlockPerGrid() const
Number of grid blocks.
Definition arccore/src/common/arccore/common/accelerator/KernelLaunchArgs.h:48

Arcane::Accelerator::Impl::KernelLaunchArgs::setNbBlockPerGrid
void setNbBlockPerGrid(Int32 v)
Number of grid blocks.
Definition arccore/src/common/arccore/common/accelerator/KernelLaunchArgs.h:50

Arcane::Accelerator::Impl::KernelLaunchArgs::sharedMemorySize
Int32 sharedMemorySize() const
Shared memory to allocate for the kernel.
Definition arccore/src/common/arccore/common/accelerator/KernelLaunchArgs.h:58

Arcane::Accelerator::Impl::NativeStream
Opaque type to encapsulate a native 'stream'.
Definition arccore/src/common/arccore/common/accelerator/NativeStream.h:55

Arcane::Accelerator::Impl::RunCommandImpl
Implementation of a command for accelerator.
Definition arccore/src/common/arccore/common/accelerator/internal/RunCommandImpl.h:41

Arcane::Accelerator::MemoryCopyArgs
Memory copy arguments.
Definition arccore/src/common/arccore/common/accelerator/Memory.h:63

Arcane::Accelerator::MemoryPrefetchArgs
Memory prefetching arguments.
Definition arccore/src/common/arccore/common/accelerator/Memory.h:126

Arcane::Accelerator::PointerAttribute
Information about a memory address.
Definition arccore/src/common/arccore/common/accelerator/PointerAttribute.h:37

Arcane::Accelerator::RegisterRuntimeInfo
Information to initialize the accelerator runtime.
Definition RegisterRuntimeInfo.h:32

Arcane::Accelerator::RunQueueBuildInfo
Information to create a RunQueue.
Definition arccore/src/common/arccore/common/accelerator/RunQueueBuildInfo.h:32

Arcane::Accelerator::RunQueueBuildInfo::isDefault
bool isDefault() const
Indicates if the instance only has default values.
Definition arccore/src/common/arccore/common/accelerator/RunQueueBuildInfo.h:53

Arcane::Accelerator::RunQueue
Execution queue for an accelerator.
Definition arccore/src/common/arccore/common/accelerator/RunQueue.h:53

Arcane::Accelerator::RunQueue::isAsync
bool isAsync() const
Indicates if the execution queue is asynchronous.
Definition RunQueue.cc:320

Arcane::Accelerator::RunQueue::copyMemory
void copyMemory(const MemoryCopyArgs &args) const
Copies information between two memory regions.
Definition RunQueue.cc:237

Arcane::Accelerator::Sycl::DeviceSyclMemoryAllocator
Definition SyclAcceleratorRuntime.cc:158

Arcane::Accelerator::Sycl::DeviceSyclMemoryAllocator::memoryResource
eMemoryResource memoryResource() const override
Memory resource provided by the allocator.
Definition SyclAcceleratorRuntime.cc:169

Arcane::Accelerator::Sycl::HostPinnedSyclMemoryAllocator
Definition SyclAcceleratorRuntime.cc:138

Arcane::Accelerator::Sycl::HostPinnedSyclMemoryAllocator::memoryResource
eMemoryResource memoryResource() const override
Memory resource provided by the allocator.
Definition SyclAcceleratorRuntime.cc:150

Arcane::Accelerator::Sycl::SyclMemoryAllocatorBase::hasRealloc
bool hasRealloc(MemoryAllocationArgs) const override
Indicates whether the allocator supports realloc semantics.
Definition SyclAcceleratorRuntime.cc:77

Arcane::Accelerator::Sycl::SyclMemoryAllocatorBase::reallocate
AllocatedMemoryInfo reallocate(MemoryAllocationArgs args, AllocatedMemoryInfo current_ptr, Int64 new_size) override
Reallocates memory for new_size bytes and returns the pointer.
Definition SyclAcceleratorRuntime.cc:90

Arcane::Accelerator::Sycl::SyclMemoryAllocatorBase::allocate
AllocatedMemoryInfo allocate(MemoryAllocationArgs args, Int64 new_size) override
Definition SyclAcceleratorRuntime.cc:78

Arcane::Accelerator::Sycl::SyclMemoryAllocatorBase::deallocate
void deallocate(MemoryAllocationArgs args, AllocatedMemoryInfo ptr) override
Frees the memory whose base address is ptr.
Definition SyclAcceleratorRuntime.cc:102

Arcane::Accelerator::Sycl::SyclMemoryCopier
Definition SyclAcceleratorRuntime.cc:574

Arcane::Accelerator::Sycl::SyclMemoryCopier::copy
void copy(ConstMemoryView from, eMemoryResource from_mem, MutableMemoryView to, eMemoryResource to_mem, const RunQueue *queue) override
Copies the data from from to to with the queue queue.
Definition SyclAcceleratorRuntime.cc:601

Arcane::Accelerator::Sycl::SyclRunQueueEvent
Definition SyclAcceleratorRuntime.cc:287

Arcane::Accelerator::Sycl::SyclRunQueueStream
Definition SyclAcceleratorRuntime.cc:187

Arcane::Accelerator::Sycl::SyclRunQueueStream::notifyBeginLaunchKernel
void notifyBeginLaunchKernel(Impl::RunCommandImpl &c) override
Notification before command launch.
Definition SyclAcceleratorRuntime.cc:197

Arcane::Accelerator::Sycl::SyclRunQueueStream::barrier
void barrier() override
Blocks until all actions associated with this queue are finished.
Definition SyclAcceleratorRuntime.cc:205

Arcane::Accelerator::Sycl::SyclRunQueueStream::lastCommandEvent
sycl::event lastCommandEvent()
Event corresponding to the last command.
Definition SyclAcceleratorRuntime.cc:266

Arcane::Accelerator::Sycl::SyclRunQueueStream::prefetchMemory
void prefetchMemory(const MemoryPrefetchArgs &args) override
Performs a prefetch of a memory region.
Definition SyclAcceleratorRuntime.cc:222

Arcane::Accelerator::Sycl::SyclRunQueueStream::nativeStream
Impl::NativeStream nativeStream() override
Pointer to the internal structure dependent on the implementation.
Definition SyclAcceleratorRuntime.cc:232

Arcane::Accelerator::Sycl::SyclRunQueueStream::copyMemory
void copyMemory(const MemoryCopyArgs &args) override
Performs a copy between two memory regions.
Definition SyclAcceleratorRuntime.cc:214

Arcane::Accelerator::Sycl::SyclRunQueueStream::_barrierNoException
bool _barrierNoException() override
Barrier without exception. Returns true in case of error.
Definition SyclAcceleratorRuntime.cc:209

Arcane::Accelerator::Sycl::SyclRunQueueStream::_setSyclLastCommandEvent
void _setSyclLastCommandEvent(void *sycl_event_ptr) override
For SYCL, positions the event associated with the last executed command.
Definition SyclAcceleratorRuntime.cc:237

Arcane::Accelerator::Sycl::SyclRunQueueStream::notifyEndLaunchKernel
void notifyEndLaunchKernel(Impl::RunCommandImpl &) override
Notification of command launch completion.
Definition SyclAcceleratorRuntime.cc:201

Arcane::Accelerator::Sycl::SyclRunnerRuntime
Definition SyclAcceleratorRuntime.cc:371

Arcane::Accelerator::Sycl::UnifiedMemorySyclMemoryAllocator
Definition SyclAcceleratorRuntime.cc:119

Arcane::Accelerator::Sycl::UnifiedMemorySyclMemoryAllocator::memoryResource
eMemoryResource memoryResource() const override
Memory resource provided by the allocator.
Definition SyclAcceleratorRuntime.cc:130

Arcane::AllocatedMemoryInfo
Information about an allocated memory region.
Definition AllocatedMemoryInfo.h:32

Arcane::AllocatedMemoryInfo::baseAddress
void * baseAddress() const
Address of the start of the allocated region.
Definition AllocatedMemoryInfo.h:51

Arcane::AllocatedMemoryInfo::size
Int64 size() const
Size in bytes of the used memory region. (-1) if unknown.
Definition AllocatedMemoryInfo.h:53

Arcane::ConstMemoryView
Constant view on a contiguous memory region containing fixed-size elements.
Definition arccore/src/base/arccore/base/MemoryView.h:39

Arcane::ConstMemoryView::bytes
constexpr SpanType bytes() const
View in byte form.
Definition arccore/src/base/arccore/base/MemoryView.h:108

Arcane::ConstMemoryView::data
constexpr const std::byte * data() const
Pointer to the memory region.
Definition arccore/src/base/arccore/base/MemoryView.h:111

Arcane::IMemoryCopier
Interface for memory copies with accelerator support.
Definition IMemoryCopier.h:33

Arcane::IMemoryResourceMngInternal
Internal part of Arcane's 'IMemoryResourceMng'.
Definition IMemoryResourceMngInternal.h:32

Arcane::IMemoryResourceMngInternal::setAllocator
virtual void setAllocator(eMemoryResource r, IMemoryAllocator *allocator)=0
Sets the allocator for resource r.

Arcane::IMemoryResourceMngInternal::setIsAccelerator
virtual void setIsAccelerator(bool v)=0
Indicates if an accelerator is available.

Arcane::IMemoryResourceMngInternal::setCopier
virtual void setCopier(IMemoryCopier *copier)=0
Sets the copying instance.

Arcane::IMemoryResourceMng::_internal
virtual IMemoryResourceMngInternal * _internal()=0
Internal interface.

Arcane::ITraceMng
Trace manager interface.
Definition arccore/src/trace/arccore/trace/ITraceMng.h:175

Arcane::MemoryAllocationArgs
Class containing information to specialize allocations.
Definition common/arccore/common/MemoryAllocationArgs.h:32

Arcane::MutableMemoryView
Mutable view on a contiguous memory region containing fixed-size elements.
Definition arccore/src/base/arccore/base/MemoryView.h:158

Arcane::MutableMemoryView::data
constexpr std::byte * data() const
Pointer to the memory region.
Definition arccore/src/base/arccore/base/MemoryView.h:220

Arcane::MutableMemoryView::bytes
constexpr SpanType bytes() const
View in byte form.
Definition arccore/src/base/arccore/base/MemoryView.h:217

Arcane::NotImplementedException
Exception when a function is not implemented.
Definition arccore/src/base/arccore/base/NotImplementedException.h:34

Arcane::NotSupportedException
Exception when an operation is not supported.
Definition arccore/src/base/arccore/base/NotSupportedException.h:34

Arcane::SpanImpl::size
constexpr __host__ __device__ SizeType size() const noexcept
Returns the size of the array.
Definition Span.h:325

Arcane::Accelerator::eMemoryAdvice
eMemoryAdvice
Memory management advice.
Definition arccore/src/common/arccore/common/accelerator/Memory.h:35

Arcane::Accelerator::ePointerMemoryType
ePointerMemoryType
Memory type for a pointer.
Definition CommonAcceleratorGlobal.h:153

Arcane::Accelerator::eExecutionPolicy
eExecutionPolicy
Execution policy for a Runner.
Definition CommonAcceleratorGlobal.h:90

Arcane::Accelerator::eExecutionPolicy::SYCL
@ SYCL
Execution policy using the SYCL environment.
Definition CommonAcceleratorGlobal.h:102

Arcane::MemoryUtils::getDataMemoryResourceMng
IMemoryRessourceMng * getDataMemoryResourceMng()
Memory resource manager for data.
Definition arccore/src/common/arccore/common/MemoryUtils.cc:131

Arcane::MemoryUtils::setAcceleratorHostMemoryAllocator
IMemoryAllocator * setAcceleratorHostMemoryAllocator(IMemoryAllocator *a)
Sets the specific allocator for accelerators.
Definition arccore/src/common/arccore/common/MemoryUtils.cc:152

Arcane::MemoryUtils::setDefaultDataMemoryResource
void setDefaultDataMemoryResource(eMemoryResource mem_resource)
Sets the memory resource used for the data memory allocator.
Definition arccore/src/common/arccore/common/MemoryUtils.cc:110

Arcane::platform
Namespace for platform-dependent functions.
Definition arcane/src/arcane/utils/PlatformUtils.cc:100

Arcane
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
Definition arcane/src/arcane/accelerator/AcceleratorGlobal.h:37

Arcane::Int64
std::int64_t Int64
Signed integer type of 64 bits.
Definition ArccoreGlobal.h:235

Arcane::eMemoryResource
eMemoryResource
List of available memory resources.
Definition CommonGlobal.h:179

Arcane::eMemoryResource::HostPinned
@ HostPinned
Allocates on the host.
Definition CommonGlobal.h:185

Arcane::eMemoryResource::UnifiedMemory
@ UnifiedMemory
Allocates using unified memory.
Definition CommonGlobal.h:189

Arcane::eMemoryResource::Device
@ Device
Allocates on the device.
Definition CommonGlobal.h:187

Arcane::Int32
std::int32_t Int32
Signed integer type of 32 bits.
Definition ArccoreGlobal.h:233