Arcane  v3.16.8.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
diyfp.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2// Tencent is pleased to support the open source community by making RapidJSON available.
3//
4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
5//
6// Licensed under the MIT License (the "License"); you may not use this file except
7// in compliance with the License. You may obtain a copy of the License at
8//
9// http://opensource.org/licenses/MIT
10//
11// Unless required by applicable law or agreed to in writing, software distributed
12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
13// CONDITIONS OF ANY KIND, either express or implied. See the License for the
14// specific language governing permissions and limitations under the License.
15
16// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
17// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
18// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
19
20#ifndef RAPIDJSON_DIYFP_H_
21#define RAPIDJSON_DIYFP_H_
22
23#include "../rapidjson.h"
24#include "clzll.h"
25#include <limits>
26
27#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
28#include <intrin.h>
29#if !defined(_ARM64EC_)
30#pragma intrinsic(_umul128)
31#else
32#pragma comment(lib,"softintrin")
33#endif
34#endif
35
37namespace internal {
38
39#ifdef __GNUC__
40RAPIDJSON_DIAG_PUSH
41RAPIDJSON_DIAG_OFF(effc++)
42#endif
43
44#ifdef __clang__
45RAPIDJSON_DIAG_PUSH
46RAPIDJSON_DIAG_OFF(padded)
47#endif
48
49struct DiyFp {
50 DiyFp() : f(), e() {}
51
52 DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
53
54 explicit DiyFp(double d) {
55 union {
56 double d;
57 uint64_t u64;
58 } u = { d };
59
60 int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
61 uint64_t significand = (u.u64 & kDpSignificandMask);
62 if (biased_e != 0) {
63 f = significand + kDpHiddenBit;
64 e = biased_e - kDpExponentBias;
65 }
66 else {
67 f = significand;
68 e = kDpMinExponent + 1;
69 }
70 }
71
72 DiyFp operator-(const DiyFp& rhs) const {
73 return DiyFp(f - rhs.f, e);
74 }
75
76 DiyFp operator*(const DiyFp& rhs) const {
77#if defined(_MSC_VER) && defined(_M_AMD64)
78 uint64_t h;
79 uint64_t l = _umul128(f, rhs.f, &h);
80 if (l & (uint64_t(1) << 63)) // rounding
81 h++;
82 return DiyFp(h, e + rhs.e + 64);
83#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
84 __extension__ typedef unsigned __int128 uint128;
85 uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
86 uint64_t h = static_cast<uint64_t>(p >> 64);
87 uint64_t l = static_cast<uint64_t>(p);
88 if (l & (uint64_t(1) << 63)) // rounding
89 h++;
90 return DiyFp(h, e + rhs.e + 64);
91#else
92 const uint64_t M32 = 0xFFFFFFFF;
93 const uint64_t a = f >> 32;
94 const uint64_t b = f & M32;
95 const uint64_t c = rhs.f >> 32;
96 const uint64_t d = rhs.f & M32;
97 const uint64_t ac = a * c;
98 const uint64_t bc = b * c;
99 const uint64_t ad = a * d;
100 const uint64_t bd = b * d;
101 uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
102 tmp += 1U << 31;
103 return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
104#endif
105 }
106
107 DiyFp Normalize() const {
108 int s = static_cast<int>(clzll(f));
109 return DiyFp(f << s, e - s);
110 }
111
112 DiyFp NormalizeBoundary() const {
113 DiyFp res = *this;
114 while (!(res.f & (kDpHiddenBit << 1))) {
115 res.f <<= 1;
116 res.e--;
117 }
118 res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
119 res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
120 return res;
121 }
122
123 void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
124 DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
125 DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
126 mi.f <<= mi.e - pl.e;
127 mi.e = pl.e;
128 *plus = pl;
129 *minus = mi;
130 }
131
132 double ToDouble() const {
133 union {
134 double d;
135 uint64_t u64;
136 }u;
137 RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
138 if (e < kDpDenormalExponent) {
139 // Underflow.
140 return 0.0;
141 }
142 if (e >= kDpMaxExponent) {
143 // Overflow.
144 return std::numeric_limits<double>::infinity();
145 }
146 const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
147 static_cast<uint64_t>(e + kDpExponentBias);
148 u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
149 return u.d;
150 }
151
152 static const int kDiySignificandSize = 64;
153 static const int kDpSignificandSize = 52;
154 static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
155 static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
156 static const int kDpMinExponent = -kDpExponentBias;
157 static const int kDpDenormalExponent = -kDpExponentBias + 1;
158 static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
159 static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
160 static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
161
162 uint64_t f;
163 int e;
164};
165
166inline DiyFp GetCachedPowerByIndex(size_t index) {
167 // 10^-348, 10^-340, ..., 10^340
168 static const uint64_t kCachedPowers_F[] = {
169 RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
170 RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
171 RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
172 RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
173 RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
174 RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
175 RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
176 RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
177 RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
178 RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
179 RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
180 RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
181 RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
182 RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
183 RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
184 RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
185 RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
186 RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
187 RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
188 RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
189 RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
190 RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
191 RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
192 RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
193 RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
194 RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
195 RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
196 RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
197 RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
198 RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
199 RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
200 RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
201 RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
202 RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
203 RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
204 RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
205 RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
206 RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
207 RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
208 RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
209 RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
210 RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
211 RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
212 RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
213 };
214 static const int16_t kCachedPowers_E[] = {
215 -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
216 -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
217 -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
218 -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
219 -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
220 109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
221 375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
222 641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
223 907, 933, 960, 986, 1013, 1039, 1066
224 };
225 RAPIDJSON_ASSERT(index < 87);
226 return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
227}
228
229inline DiyFp GetCachedPower(int e, int* K) {
230
231 //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
232 double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
233 int k = static_cast<int>(dk);
234 if (dk - k > 0.0)
235 k++;
236
237 unsigned index = static_cast<unsigned>((k >> 3) + 1);
238 *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
239
240 return GetCachedPowerByIndex(index);
241}
242
243inline DiyFp GetCachedPower10(int exp, int *outExp) {
244 RAPIDJSON_ASSERT(exp >= -348);
245 unsigned index = static_cast<unsigned>(exp + 348) / 8u;
246 *outExp = -348 + static_cast<int>(index) * 8;
247 return GetCachedPowerByIndex(index);
248}
249
250#ifdef __GNUC__
251RAPIDJSON_DIAG_POP
252#endif
253
254#ifdef __clang__
255RAPIDJSON_DIAG_POP
256RAPIDJSON_DIAG_OFF(padded)
257#endif
258
259} // namespace internal
261
262#endif // RAPIDJSON_DIYFP_H_
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:438
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:122
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:125
__host__ __device__ double exp(double v)
Exponentielle de v.
Definition Math.h:116
common definitions and configuration
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition rapidjson.h:321
DiyFp operator*(const DiyFp &rhs) const
Definition diyfp.h:76