Arcane  v3.15.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
diyfp.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2// Tencent is pleased to support the open source community by making RapidJSON available.
3//
4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
5//
6// Licensed under the MIT License (the "License"); you may not use this file except
7// in compliance with the License. You may obtain a copy of the License at
8//
9// http://opensource.org/licenses/MIT
10//
11// Unless required by applicable law or agreed to in writing, software distributed
12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
13// CONDITIONS OF ANY KIND, either express or implied. See the License for the
14// specific language governing permissions and limitations under the License.
15
16// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
17// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
18// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
19
20#ifndef RAPIDJSON_DIYFP_H_
21#define RAPIDJSON_DIYFP_H_
22
23#include "../rapidjson.h"
24#include "clzll.h"
25#include <limits>
26
27#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
28#include <intrin.h>
29#pragma intrinsic(_umul128)
30#endif
31
33namespace internal {
34
35#ifdef __GNUC__
36RAPIDJSON_DIAG_PUSH
37RAPIDJSON_DIAG_OFF(effc++)
38#endif
39
40#ifdef __clang__
41RAPIDJSON_DIAG_PUSH
42RAPIDJSON_DIAG_OFF(padded)
43#endif
44
45struct DiyFp {
46 DiyFp() : f(), e() {}
47
48 DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
49
50 explicit DiyFp(double d) {
51 union {
52 double d;
53 uint64_t u64;
54 } u = { d };
55
56 int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
57 uint64_t significand = (u.u64 & kDpSignificandMask);
58 if (biased_e != 0) {
59 f = significand + kDpHiddenBit;
60 e = biased_e - kDpExponentBias;
61 }
62 else {
63 f = significand;
64 e = kDpMinExponent + 1;
65 }
66 }
67
68 DiyFp operator-(const DiyFp& rhs) const {
69 return DiyFp(f - rhs.f, e);
70 }
71
72 DiyFp operator*(const DiyFp& rhs) const {
73#if defined(_MSC_VER) && defined(_M_AMD64)
74 uint64_t h;
75 uint64_t l = _umul128(f, rhs.f, &h);
76 if (l & (uint64_t(1) << 63)) // rounding
77 h++;
78 return DiyFp(h, e + rhs.e + 64);
79#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) && !defined(__NVCOMPILER)
80 __extension__ typedef unsigned __int128 uint128;
81 uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
82 uint64_t h = static_cast<uint64_t>(p >> 64);
83 uint64_t l = static_cast<uint64_t>(p);
84 if (l & (uint64_t(1) << 63)) // rounding
85 h++;
86 return DiyFp(h, e + rhs.e + 64);
87#else
88 const uint64_t M32 = 0xFFFFFFFF;
89 const uint64_t a = f >> 32;
90 const uint64_t b = f & M32;
91 const uint64_t c = rhs.f >> 32;
92 const uint64_t d = rhs.f & M32;
93 const uint64_t ac = a * c;
94 const uint64_t bc = b * c;
95 const uint64_t ad = a * d;
96 const uint64_t bd = b * d;
97 uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
98 tmp += 1U << 31;
99 return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
100#endif
101 }
102
103 DiyFp Normalize() const {
104 int s = static_cast<int>(RAPIDJSON_CLZLL(f));
105 return DiyFp(f << s, e - s);
106 }
107
108 DiyFp NormalizeBoundary() const {
109 DiyFp res = *this;
110 while (!(res.f & (kDpHiddenBit << 1))) {
111 res.f <<= 1;
112 res.e--;
113 }
114 res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
115 res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
116 return res;
117 }
118
119 void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
120 DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
121 DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
122 mi.f <<= mi.e - pl.e;
123 mi.e = pl.e;
124 *plus = pl;
125 *minus = mi;
126 }
127
128 double ToDouble() const {
129 union {
130 double d;
131 uint64_t u64;
132 }u;
133 RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
134 if (e < kDpDenormalExponent) {
135 // Underflow.
136 return 0.0;
137 }
138 if (e >= kDpMaxExponent) {
139 // Overflow.
140 return std::numeric_limits<double>::infinity();
141 }
142 const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
143 static_cast<uint64_t>(e + kDpExponentBias);
144 u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
145 return u.d;
146 }
147
148 static const int kDiySignificandSize = 64;
149 static const int kDpSignificandSize = 52;
150 static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
151 static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
152 static const int kDpMinExponent = -kDpExponentBias;
153 static const int kDpDenormalExponent = -kDpExponentBias + 1;
154 static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
155 static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
156 static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
157
158 uint64_t f;
159 int e;
160};
161
162inline DiyFp GetCachedPowerByIndex(size_t index) {
163 // 10^-348, 10^-340, ..., 10^340
164 static const uint64_t kCachedPowers_F[] = {
165 RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
166 RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
167 RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
168 RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
169 RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
170 RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
171 RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
172 RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
173 RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
174 RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
175 RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
176 RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
177 RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
178 RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
179 RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
180 RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
181 RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
182 RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
183 RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
184 RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
185 RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
186 RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
187 RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
188 RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
189 RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
190 RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
191 RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
192 RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
193 RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
194 RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
195 RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
196 RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
197 RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
198 RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
199 RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
200 RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
201 RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
202 RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
203 RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
204 RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
205 RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
206 RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
207 RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
208 RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
209 };
210 static const int16_t kCachedPowers_E[] = {
211 -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
212 -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
213 -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
214 -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
215 -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
216 109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
217 375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
218 641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
219 907, 933, 960, 986, 1013, 1039, 1066
220 };
221 RAPIDJSON_ASSERT(index < 87);
222 return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
223}
224
225inline DiyFp GetCachedPower(int e, int* K) {
226
227 //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
228 double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
229 int k = static_cast<int>(dk);
230 if (dk - k > 0.0)
231 k++;
232
233 unsigned index = static_cast<unsigned>((k >> 3) + 1);
234 *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
235
236 return GetCachedPowerByIndex(index);
237}
238
239inline DiyFp GetCachedPower10(int exp, int *outExp) {
240 RAPIDJSON_ASSERT(exp >= -348);
241 unsigned index = static_cast<unsigned>(exp + 348) / 8u;
242 *outExp = -348 + static_cast<int>(index) * 8;
243 return GetCachedPowerByIndex(index);
244}
245
246#ifdef __GNUC__
247RAPIDJSON_DIAG_POP
248#endif
249
250#ifdef __clang__
251RAPIDJSON_DIAG_POP
252RAPIDJSON_DIAG_OFF(padded)
253#endif
254
255} // namespace internal
257
258#endif // RAPIDJSON_DIYFP_H_
Lecteur des fichiers de maillage via la bibliothèque LIMA.
Definition Lima.cc:149
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:407
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:122
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:125
ARCCORE_HOST_DEVICE double exp(double v)
Exponentielle de v.
Definition Math.h:116
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition rapidjson.h:290
DiyFp operator*(const DiyFp &rhs) const
Definition diyfp.h:72