Arcane  v3.16.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
Test2.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* Test2.cpp (C) 2000-2025 */
9/* */
10/* Fichier contenant les tests pour l'implémentation CUDA. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include <vector>
15#include <iostream>
16
17#include <cuda_runtime.h>
18
19#include "arcane/utils/PlatformUtils.h"
20#include "arcane/utils/NotSupportedException.h"
21#include "arcane/utils/Real3.h"
22#include "arcane/utils/NumArray.h"
24
25#include "arcane/core/Item.h"
27
28#include "arcane/accelerator/core/Runner.h"
29#include "arcane/accelerator/core/RunQueue.h"
30
31#include "arcane/accelerator/cuda/CudaAccelerator.h"
33
34#include <cooperative_groups.h>
35namespace cg = cooperative_groups;
36
37using namespace Arccore;
38using namespace Arcane;
39using namespace Arcane::Accelerator;
40
41__global__ void MyVecAdd3(double* a, double* b, double* out, int nb_value)
42{
43 int i = blockDim.x * blockIdx.x + threadIdx.x;
44 cg::grid_group this_grid_group = cg::this_grid();
45 if (i >= nb_value)
46 return;
47 this_grid_group.sync();
48 out[i] = a[i] + b[i];
49 if (i < 10) {
50 printf("A=%d %lf %lf %lf grid_size=%llu \n", i, a[i], b[i], out[i], this_grid_group.size());
51 }
52}
53
54extern "C" void arcaneTestCooperativeLaunch()
55{
56 std::cout << "Test Cooperative Launch\n";
57 constexpr int vsize = 2000;
58 std::vector<double> a(vsize);
59 std::vector<double> b(vsize);
60 std::vector<double> out(vsize);
61 for (size_t i = 0; i < vsize; ++i) {
62 a[i] = (double)(i + 1);
63 b[i] = (double)(i * i + 1);
64 out[i] = 0.0; //a[i] + b[i];
65 }
66 size_t mem_size = vsize * sizeof(double);
67 double* d_a = nullptr;
68 cudaMalloc(&d_a, mem_size);
69 double* d_b = nullptr;
70 cudaMalloc(&d_b, mem_size);
71 double* d_out = nullptr;
72 cudaMalloc(&d_out, mem_size);
73
74 cudaMemcpy(d_a, a.data(), mem_size, cudaMemcpyHostToDevice);
75 cudaMemcpy(d_b, b.data(), mem_size, cudaMemcpyHostToDevice);
76 int threadsPerBlock = 256;
77 int blocksPerGrid = (vsize + threadsPerBlock - 1) / threadsPerBlock;
78 std::cout << "CALLING kernel tpb=" << threadsPerBlock << " bpg=" << blocksPerGrid << "\n";
79 int nb_value = vsize;
80 void* args[] = { &d_a, &d_b, &d_out, &nb_value };
81 const void* func_ptr = reinterpret_cast<const void*>(&MyVecAdd3);
82 ARCANE_CHECK_CUDA(cudaLaunchCooperativeKernel(func_ptr, dim3(blocksPerGrid), dim3(threadsPerBlock), args, 0, 0));
83 ARCANE_CHECK_CUDA(cudaDeviceSynchronize());
84 ARCANE_CHECK_CUDA(cudaMemcpy(out.data(), d_out, mem_size, cudaMemcpyDeviceToHost));
85 for (size_t i = 0; i < 10; ++i)
86 std::cout << "V=" << out[i] << "\n";
87}
Fonctions mathématiques diverses.
Fonctions de gestion mémoire et des allocateurs.
Types et macros pour gérer les boucles sur les accélérateurs.
Espace de nom pour l'utilisation des accélérateurs.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
Espace de nom de Arccore.