17#include <cuda_runtime.h>
19#include "arcane/utils/PlatformUtils.h"
20#include "arcane/utils/NotSupportedException.h"
21#include "arcane/utils/Real3.h"
22#include "arcane/utils/NumArray.h"
25#include "arcane/core/Item.h"
28#include "arcane/accelerator/core/Runner.h"
29#include "arcane/accelerator/core/RunQueue.h"
31#include "arcane/accelerator/cuda/CudaAccelerator.h"
34#include <cooperative_groups.h>
35namespace cg = cooperative_groups;
41__global__
void MyVecAdd3(
double* a,
double* b,
double* out,
int nb_value)
43 int i = blockDim.x * blockIdx.x + threadIdx.x;
44 cg::grid_group this_grid_group = cg::this_grid();
47 this_grid_group.sync();
50 printf(
"A=%d %lf %lf %lf grid_size=%llu \n", i, a[i], b[i], out[i], this_grid_group.size());
54extern "C" void arcaneTestCooperativeLaunch()
56 std::cout <<
"Test Cooperative Launch\n";
57 constexpr int vsize = 2000;
58 std::vector<double> a(vsize);
59 std::vector<double> b(vsize);
60 std::vector<double> out(vsize);
61 for (
size_t i = 0; i < vsize; ++i) {
62 a[i] = (double)(i + 1);
63 b[i] = (double)(i * i + 1);
66 size_t mem_size = vsize *
sizeof(double);
67 double* d_a =
nullptr;
68 cudaMalloc(&d_a, mem_size);
69 double* d_b =
nullptr;
70 cudaMalloc(&d_b, mem_size);
71 double* d_out =
nullptr;
72 cudaMalloc(&d_out, mem_size);
74 cudaMemcpy(d_a, a.data(), mem_size, cudaMemcpyHostToDevice);
75 cudaMemcpy(d_b, b.data(), mem_size, cudaMemcpyHostToDevice);
76 int threadsPerBlock = 256;
77 int blocksPerGrid = (vsize + threadsPerBlock - 1) / threadsPerBlock;
78 std::cout <<
"CALLING kernel tpb=" << threadsPerBlock <<
" bpg=" << blocksPerGrid <<
"\n";
80 void* args[] = { &d_a, &d_b, &d_out, &nb_value };
81 const void* func_ptr =
reinterpret_cast<const void*
>(&MyVecAdd3);
82 ARCANE_CHECK_CUDA(cudaLaunchCooperativeKernel(func_ptr, dim3(blocksPerGrid), dim3(threadsPerBlock), args, 0, 0));
83 ARCANE_CHECK_CUDA(cudaDeviceSynchronize());
84 ARCANE_CHECK_CUDA(cudaMemcpy(out.data(), d_out, mem_size, cudaMemcpyDeviceToHost));
85 for (
size_t i = 0; i < 10; ++i)
86 std::cout <<
"V=" << out[i] <<
"\n";
Fonctions mathématiques diverses.
Fonctions de gestion mémoire et des allocateurs.
Types et macros pour gérer les boucles sur les accélérateurs.
Espace de nom pour l'utilisation des accélérateurs.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Espace de nom de Arccore.