12#ifndef ARCCORE_BASE_FLOATCONVERSION_H
13#define ARCCORE_BASE_FLOATCONVERSION_H
17#include "arccore/base/ArccoreGlobal.h"
27namespace Arccore::impl
61inline constexpr uint16_t
62convertFloat16ToUint16Impl(
float v)
67 constexpr detail::float32_bits f32infty = { 255 << 23 };
68 constexpr detail::float32_bits f16max = { (127 + 16) << 23 };
69 constexpr detail::float32_bits denorm_magic = { ((127 - 15) + (23 - 10) + 1) << 23 };
70 constexpr unsigned int sign_mask = 0x80000000u;
71 uint16_t val =
static_cast<uint16_t
>(0x0u);
73 unsigned int sign = f.u & sign_mask;
81 if (f.u >= f16max.u) {
82 val = (f.u > f32infty.u) ? 0x7e00 : 0x7c00;
85 if (f.u < (113 << 23)) {
89 f.f += denorm_magic.f;
92 val =
static_cast<uint16_t
>(f.u - denorm_magic.u);
95 unsigned int mant_odd = (f.u >> 13) & 1;
104 val =
static_cast<uint16_t
>(f.u >> 13);
108 val |=
static_cast<uint16_t
>(sign >> 16);
116convertToFloat16Impl(uint16_t val)
118 constexpr detail::float32_bits magic = { 113 << 23 };
119 constexpr unsigned int shifted_exp = 0x7c00 << 13;
120 detail::float32_bits o{};
122 o.u = (val & 0x7fff) << 13;
123 unsigned int exp = shifted_exp & o.u;
124 o.u += (127 - 15) << 23;
127 if (exp == shifted_exp) {
128 o.u += (128 - 16) << 23;
136 o.u |= (val & 0x8000U) << 16U;
146convertBFloat16ToUint16Impl(
float v)
149 std::memcpy(&input, &v,
sizeof(uint32_t));
151 uint32_t lsb = (input >> 16) & 1;
152 uint32_t rounding_bias = 0x7fff + lsb;
153 input += rounding_bias;
154 uint16_t output =
static_cast<uint16_t
>(input >> 16);
162convertToBFloat16Impl(uint16_t val)
165 char*
const first =
reinterpret_cast<char*
>(&result);
166 char*
const second = first +
sizeof(uint16_t);
170#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
171 std::memcpy(first, &val,
sizeof(uint16_t));
172 std::memset(second, 0,
sizeof(uint16_t));
174 std::memset(first, 0,
sizeof(uint16_t));
175 std::memcpy(second, &val,
sizeof(uint16_t));
__host__ __device__ double exp(double v)
Exponentielle de v.