Alien  1.3.0
Developer documentation
Loading...
Searching...
No Matches
SYCLPrecomp.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7
8#pragma once
9
10#include <alien/utils/Precomp.h>
11
12#ifndef USE_SYCL_USM
13//#define USE_SYCL_USM
14#endif
15#ifndef USE_HIPSYCL
16#ifndef USE_ACPPSYCL
17#define USE_ONEAPI
18#endif
19#endif
20#ifdef USE_ACPPSYCL
21#ifndef USE_SYCL2020
22#define USE_SYCL2020
23#endif
24#endif
25
26// Sélection à la compilation selon la cible
27#if defined(__HIP_PLATFORM_AMD__) || defined(__AMDGCN__)
28 // Constantes tuned pour MI300 (gfx942)
29 // - CU count : 228 CUs
30 // - Wavefront : 64 threads (RDNA/CDNA)
31 // - LDS/CU : 64 KB → 1024 threads × 8 B (double) = 8 KB/workgroup, safe
32
33 static constexpr int PKSIZE = 1024; // wavefront MI300 should be 64
34 //static constexpr int WG_SIZE = 256; // 4 WF/bloc
35 static constexpr int TARGET_WAVES = 4;
36
37 static constexpr int WG_SIZE = 256; // 16 wavefronts/WG
38 static constexpr int ITEMS_PER_WI = 8; // unroll : chaque WI traite 8 éléments
39
40
41#else
42 static constexpr int PKSIZE = 1024; // warp H100 should be 32
43 //static constexpr int WG_SIZE = 256; // 8 warps/bloc
44 static constexpr int WG_SIZE = 512; // 16 warps/bloc
45 static constexpr int ITEMS_PER_WI = 8; // unroll ILP
46 static constexpr int WARP_SIZE = 32; // natif CUDA/H100
47 static constexpr int TARGET_WAVES = 4; // waves/SM pour masquer latence
48#endif
49
50// Grid dynamique (remplace m_total_threads figé)