Alien
1.3.0
Developer documentation
Loading...
Searching...
No Matches
SYCLPrecomp.h
1
// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2
//-----------------------------------------------------------------------------
3
// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4
// See the top-level COPYRIGHT file for details.
5
// SPDX-License-Identifier: Apache-2.0
6
//-----------------------------------------------------------------------------
7
8
#pragma once
9
10
#include <alien/utils/Precomp.h>
11
12
#ifndef USE_SYCL_USM
13
//#define USE_SYCL_USM
14
#endif
15
#ifndef USE_HIPSYCL
16
#ifndef USE_ACPPSYCL
17
#define USE_ONEAPI
18
#endif
19
#endif
20
#ifdef USE_ACPPSYCL
21
#ifndef USE_SYCL2020
22
#define USE_SYCL2020
23
#endif
24
#endif
25
26
// Sélection à la compilation selon la cible
27
#if defined(__HIP_PLATFORM_AMD__) || defined(__AMDGCN__)
28
// Constantes tuned pour MI300 (gfx942)
29
// - CU count : 228 CUs
30
// - Wavefront : 64 threads (RDNA/CDNA)
31
// - LDS/CU : 64 KB → 1024 threads × 8 B (double) = 8 KB/workgroup, safe
32
33
static
constexpr
int
PKSIZE = 1024;
// wavefront MI300 should be 64
34
//static constexpr int WG_SIZE = 256; // 4 WF/bloc
35
static
constexpr
int
TARGET_WAVES = 4;
36
37
static
constexpr
int
WG_SIZE = 256;
// 16 wavefronts/WG
38
static
constexpr
int
ITEMS_PER_WI = 8;
// unroll : chaque WI traite 8 éléments
39
40
41
#else
42
static
constexpr
int
PKSIZE = 1024;
// warp H100 should be 32
43
//static constexpr int WG_SIZE = 256; // 8 warps/bloc
44
static
constexpr
int
WG_SIZE = 512;
// 16 warps/bloc
45
static
constexpr
int
ITEMS_PER_WI = 8;
// unroll ILP
46
static
constexpr
int
WARP_SIZE = 32;
// natif CUDA/H100
47
static
constexpr
int
TARGET_WAVES = 4;
// waves/SM pour masquer latence
48
#endif
49
50
// Grid dynamique (remplace m_total_threads figé)
core
alien
kernels
sycl
SYCLPrecomp.h
Generated on
for Alien by
1.16.1