Arcane  v3.16.8.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
reader.h
Aller à la documentation de ce fichier.
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2// Tencent is pleased to support the open source community by making RapidJSON available.
3//
4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
5//
6// Licensed under the MIT License (the "License"); you may not use this file except
7// in compliance with the License. You may obtain a copy of the License at
8//
9// http://opensource.org/licenses/MIT
10//
11// Unless required by applicable law or agreed to in writing, software distributed
12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
13// CONDITIONS OF ANY KIND, either express or implied. See the License for the
14// specific language governing permissions and limitations under the License.
15
16#ifndef RAPIDJSON_READER_H_
17#define RAPIDJSON_READER_H_
18
20
21#include "allocators.h"
22#include "stream.h"
23#include "encodedstream.h"
24#include "internal/clzll.h"
25#include "internal/meta.h"
26#include "internal/stack.h"
27#include "internal/strtod.h"
28#include <limits>
29
30#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
31#include <intrin.h>
32#pragma intrinsic(_BitScanForward)
33#endif
34#ifdef RAPIDJSON_SSE42
35#include <nmmintrin.h>
36#elif defined(RAPIDJSON_SSE2)
37#include <emmintrin.h>
38#elif defined(RAPIDJSON_NEON)
39#include <arm_neon.h>
40#endif
41
42#ifdef __clang__
43RAPIDJSON_DIAG_PUSH
44RAPIDJSON_DIAG_OFF(old-style-cast)
45RAPIDJSON_DIAG_OFF(padded)
46RAPIDJSON_DIAG_OFF(switch-enum)
47#elif defined(_MSC_VER)
48RAPIDJSON_DIAG_PUSH
49RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
50RAPIDJSON_DIAG_OFF(4702) // unreachable code
51#endif
52
53#ifdef __GNUC__
54RAPIDJSON_DIAG_PUSH
55RAPIDJSON_DIAG_OFF(effc++)
56#endif
57
59#define RAPIDJSON_NOTHING /* deliberately empty */
60#ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
61#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
62 RAPIDJSON_MULTILINEMACRO_BEGIN \
63 if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
64 RAPIDJSON_MULTILINEMACRO_END
65#endif
66#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
67 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
69
100#ifndef RAPIDJSON_PARSE_ERROR_NORETURN
101#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
102 RAPIDJSON_MULTILINEMACRO_BEGIN \
103 RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
104 SetParseError(parseErrorCode, offset); \
105 RAPIDJSON_MULTILINEMACRO_END
106#endif
107
119#ifndef RAPIDJSON_PARSE_ERROR
120#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
121 RAPIDJSON_MULTILINEMACRO_BEGIN \
122 RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
123 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
124 RAPIDJSON_MULTILINEMACRO_END
125#endif
126
127#include "error/error.h" // ParseErrorCode, ParseResult
128
130
132// ParseFlag
133
140#ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
141#define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
142#endif
143
145
161
163// Handler
164
181 bool RawNumber(const Ch* str, SizeType length, bool copy);
182 bool String(const Ch* str, SizeType length, bool copy);
183 bool StartObject();
184 bool Key(const Ch* str, SizeType length, bool copy);
185 bool EndObject(SizeType memberCount);
186 bool StartArray();
187 bool EndArray(SizeType elementCount);
188};
189\endcode
190*/
192// BaseReaderHandler
193
195
198template<typename Encoding = UTF8<>, typename Derived = void>
200 typedef typename Encoding::Ch Ch;
201
202 typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
203
204 bool Default() { return true; }
205 bool Null() { return static_cast<Override&>(*this).Default(); }
206 bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
207 bool Int(int) { return static_cast<Override&>(*this).Default(); }
208 bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
209 bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
210 bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
211 bool Double(double) { return static_cast<Override&>(*this).Default(); }
213 bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
214 bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
215 bool StartObject() { return static_cast<Override&>(*this).Default(); }
216 bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
217 bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
218 bool StartArray() { return static_cast<Override&>(*this).Default(); }
219 bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
220};
221
223// StreamLocalCopy
224
225namespace internal {
226
227template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
229
231template<typename Stream>
232class StreamLocalCopy<Stream, 1> {
233public:
234 StreamLocalCopy(Stream& original) : s(original), original_(original) {}
235 ~StreamLocalCopy() { original_ = s; }
236
237 Stream s;
238
239private:
240 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
241
242 Stream& original_;
243};
244
246template<typename Stream>
247class StreamLocalCopy<Stream, 0> {
248public:
249 StreamLocalCopy(Stream& original) : s(original) {}
250
251 Stream& s;
252
253private:
254 StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
255};
256
257} // namespace internal
258
260// SkipWhitespace
261
263
266template<typename InputStream>
267void SkipWhitespace(InputStream& is) {
269 InputStream& s(copy.s);
270
271 typename InputStream::Ch c;
272 while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
273 s.Take();
274}
275
276inline const char* SkipWhitespace(const char* p, const char* end) {
277 while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
278 ++p;
279 return p;
280}
281
282#ifdef RAPIDJSON_SSE42
284inline const char *SkipWhitespace_SIMD(const char* p) {
285 // Fast return for single non-whitespace
286 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
287 ++p;
288 else
289 return p;
290
291 // 16-byte align to the next boundary
292 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
293 while (p != nextAligned)
294 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
295 ++p;
296 else
297 return p;
298
299 // The rest of string using SIMD
300 static const char whitespace[16] = " \n\r\t";
301 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
302
303 for (;; p += 16) {
304 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
305 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
306 if (r != 16) // some of characters is non-whitespace
307 return p + r;
308 }
309}
310
311inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
312 // Fast return for single non-whitespace
313 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
314 ++p;
315 else
316 return p;
317
318 // The middle of string using SIMD
319 static const char whitespace[16] = " \n\r\t";
320 const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
321
322 for (; p <= end - 16; p += 16) {
323 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
324 const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
325 if (r != 16) // some of characters is non-whitespace
326 return p + r;
327 }
328
329 return SkipWhitespace(p, end);
330}
331
332#elif defined(RAPIDJSON_SSE2)
333
335inline const char *SkipWhitespace_SIMD(const char* p) {
336 // Fast return for single non-whitespace
337 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
338 ++p;
339 else
340 return p;
341
342 // 16-byte align to the next boundary
343 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
344 while (p != nextAligned)
345 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
346 ++p;
347 else
348 return p;
349
350 // The rest of string
351 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
352 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
353 #undef C16
354
355 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
356 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
357 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
358 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
359
360 for (;; p += 16) {
361 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
362 __m128i x = _mm_cmpeq_epi8(s, w0);
363 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
364 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
365 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
366 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
367 if (r != 0) { // some of characters may be non-whitespace
368#ifdef _MSC_VER // Find the index of first non-whitespace
369 unsigned long offset;
370 _BitScanForward(&offset, r);
371 return p + offset;
372#else
373 return p + __builtin_ffs(r) - 1;
374#endif
375 }
376 }
377}
378
379inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
380 // Fast return for single non-whitespace
381 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
382 ++p;
383 else
384 return p;
385
386 // The rest of string
387 #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
388 static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
389 #undef C16
390
391 const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
392 const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
393 const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
394 const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
395
396 for (; p <= end - 16; p += 16) {
397 const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
398 __m128i x = _mm_cmpeq_epi8(s, w0);
399 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
400 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
401 x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
402 unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
403 if (r != 0) { // some of characters may be non-whitespace
404#ifdef _MSC_VER // Find the index of first non-whitespace
405 unsigned long offset;
406 _BitScanForward(&offset, r);
407 return p + offset;
408#else
409 return p + __builtin_ffs(r) - 1;
410#endif
411 }
412 }
413
414 return SkipWhitespace(p, end);
415}
416
417#elif defined(RAPIDJSON_NEON)
418
420inline const char *SkipWhitespace_SIMD(const char* p) {
421 // Fast return for single non-whitespace
422 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
423 ++p;
424 else
425 return p;
426
427 // 16-byte align to the next boundary
428 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
429 while (p != nextAligned)
430 if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
431 ++p;
432 else
433 return p;
434
435 const uint8x16_t w0 = vmovq_n_u8(' ');
436 const uint8x16_t w1 = vmovq_n_u8('\n');
437 const uint8x16_t w2 = vmovq_n_u8('\r');
438 const uint8x16_t w3 = vmovq_n_u8('\t');
439
440 for (;; p += 16) {
441 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
442 uint8x16_t x = vceqq_u8(s, w0);
443 x = vorrq_u8(x, vceqq_u8(s, w1));
444 x = vorrq_u8(x, vceqq_u8(s, w2));
445 x = vorrq_u8(x, vceqq_u8(s, w3));
446
447 x = vmvnq_u8(x); // Negate
448 x = vrev64q_u8(x); // Rev in 64
449 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
450 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
451
452 if (low == 0) {
453 if (high != 0) {
454 uint32_t lz = internal::clzll(high);
455 return p + 8 + (lz >> 3);
456 }
457 } else {
458 uint32_t lz = internal::clzll(low);
459 return p + (lz >> 3);
460 }
461 }
462}
463
464inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
465 // Fast return for single non-whitespace
466 if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
467 ++p;
468 else
469 return p;
470
471 const uint8x16_t w0 = vmovq_n_u8(' ');
472 const uint8x16_t w1 = vmovq_n_u8('\n');
473 const uint8x16_t w2 = vmovq_n_u8('\r');
474 const uint8x16_t w3 = vmovq_n_u8('\t');
475
476 for (; p <= end - 16; p += 16) {
477 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
478 uint8x16_t x = vceqq_u8(s, w0);
479 x = vorrq_u8(x, vceqq_u8(s, w1));
480 x = vorrq_u8(x, vceqq_u8(s, w2));
481 x = vorrq_u8(x, vceqq_u8(s, w3));
482
483 x = vmvnq_u8(x); // Negate
484 x = vrev64q_u8(x); // Rev in 64
485 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
486 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
487
488 if (low == 0) {
489 if (high != 0) {
490 uint32_t lz = internal::clzll(high);
491 return p + 8 + (lz >> 3);
492 }
493 } else {
494 uint32_t lz = internal::clzll(low);
495 return p + (lz >> 3);
496 }
497 }
498
499 return SkipWhitespace(p, end);
500}
501
502#endif // RAPIDJSON_NEON
503
504#ifdef RAPIDJSON_SIMD
506template<> inline void SkipWhitespace(InsituStringStream& is) {
507 is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
508}
509
511template<> inline void SkipWhitespace(StringStream& is) {
512 is.src_ = SkipWhitespace_SIMD(is.src_);
513}
514
515template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
516 is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
517}
518#endif // RAPIDJSON_SIMD
519
521// GenericReader
522
524
539template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
541public:
542 typedef typename SourceEncoding::Ch Ch;
543
545
548 GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
549 stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
550
552
559 template <unsigned parseFlags, typename InputStream, typename Handler>
560 ParseResult Parse(InputStream& is, Handler& handler) {
561 if (parseFlags & kParseIterativeFlag)
562 return IterativeParse<parseFlags>(is, handler);
563
564 parseResult_.Clear();
565
566 ClearStackOnExit scope(*this);
567
568 SkipWhitespaceAndComments<parseFlags>(is);
569 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
570
571 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
573 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
574 }
575 else {
576 ParseValue<parseFlags>(is, handler);
577 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
578
579 if (!(parseFlags & kParseStopWhenDoneFlag)) {
580 SkipWhitespaceAndComments<parseFlags>(is);
581 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
582
583 if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
585 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
586 }
587 }
588 }
589
590 return parseResult_;
591 }
592
594
600 template <typename InputStream, typename Handler>
601 ParseResult Parse(InputStream& is, Handler& handler) {
602 return Parse<kParseDefaultFlags>(is, handler);
603 }
604
606
609 parseResult_.Clear();
610 state_ = IterativeParsingStartState;
611 }
612
614
620 template <unsigned parseFlags, typename InputStream, typename Handler>
621 bool IterativeParseNext(InputStream& is, Handler& handler) {
622 while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
623 SkipWhitespaceAndComments<parseFlags>(is);
624
625 Token t = Tokenize(is.Peek());
626 IterativeParsingState n = Predict(state_, t);
627 IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
628
629 // If we've finished or hit an error...
630 if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
631 // Report errors.
632 if (d == IterativeParsingErrorState) {
633 HandleError(state_, is);
634 return false;
635 }
636
637 // Transition to the finish state.
638 RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
639 state_ = d;
640
641 // If StopWhenDone is not set...
642 if (!(parseFlags & kParseStopWhenDoneFlag)) {
643 // ... and extra non-whitespace data is found...
644 SkipWhitespaceAndComments<parseFlags>(is);
645 if (is.Peek() != '\0') {
646 // ... this is considered an error.
647 HandleError(state_, is);
648 return false;
649 }
650 }
651
652 // Success! We are done!
653 return true;
654 }
655
656 // Transition to the new state.
657 state_ = d;
658
659 // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
660 if (!IsIterativeParsingDelimiterState(n))
661 return true;
662 }
663
664 // We reached the end of file.
665 stack_.Clear();
666
667 if (state_ != IterativeParsingFinishState) {
668 HandleError(state_, is);
669 return false;
670 }
671
672 return true;
673 }
674
676
678 RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
679 return IsIterativeParsingCompleteState(state_);
680 }
681
683 bool HasParseError() const { return parseResult_.IsError(); }
684
686 ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
687
689 size_t GetErrorOffset() const { return parseResult_.Offset(); }
690
691protected:
692 void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
693
694private:
695 // Prohibit copy constructor & assignment operator.
697 GenericReader& operator=(const GenericReader&);
698
699 void ClearStack() { stack_.Clear(); }
700
701 // clear stack on any exit from ParseStream, e.g. due to exception
702 struct ClearStackOnExit {
703 explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
704 ~ClearStackOnExit() { r_.ClearStack(); }
705 private:
706 GenericReader& r_;
707 ClearStackOnExit(const ClearStackOnExit&);
708 ClearStackOnExit& operator=(const ClearStackOnExit&);
709 };
710
711 template<unsigned parseFlags, typename InputStream>
712 void SkipWhitespaceAndComments(InputStream& is) {
713 SkipWhitespace(is);
714
715 if (parseFlags & kParseCommentsFlag) {
716 while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
717 if (Consume(is, '*')) {
718 while (true) {
719 if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
721 else if (Consume(is, '*')) {
722 if (Consume(is, '/'))
723 break;
724 }
725 else
726 is.Take();
727 }
728 }
729 else if (RAPIDJSON_LIKELY(Consume(is, '/')))
730 while (is.Peek() != '\0' && is.Take() != '\n') {}
731 else
733
734 SkipWhitespace(is);
735 }
736 }
737 }
738
739 // Parse object: { string : value, ... }
740 template<unsigned parseFlags, typename InputStream, typename Handler>
741 void ParseObject(InputStream& is, Handler& handler) {
742 RAPIDJSON_ASSERT(is.Peek() == '{');
743 is.Take(); // Skip '{'
744
745 if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
747
748 SkipWhitespaceAndComments<parseFlags>(is);
749 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
750
751 if (Consume(is, '}')) {
752 if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object
754 return;
755 }
756
757 for (SizeType memberCount = 0;;) {
758 if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
760
761 ParseString<parseFlags>(is, handler, true);
762 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
763
764 SkipWhitespaceAndComments<parseFlags>(is);
765 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
766
767 if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
769
770 SkipWhitespaceAndComments<parseFlags>(is);
771 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
772
773 ParseValue<parseFlags>(is, handler);
774 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
775
776 SkipWhitespaceAndComments<parseFlags>(is);
777 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
778
779 ++memberCount;
780
781 switch (is.Peek()) {
782 case ',':
783 is.Take();
784 SkipWhitespaceAndComments<parseFlags>(is);
785 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
786 break;
787 case '}':
788 is.Take();
789 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
791 return;
792 default:
793 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
794 }
795
796 if (parseFlags & kParseTrailingCommasFlag) {
797 if (is.Peek() == '}') {
798 if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
800 is.Take();
801 return;
802 }
803 }
804 }
805 }
806
807 // Parse array: [ value, ... ]
808 template<unsigned parseFlags, typename InputStream, typename Handler>
809 void ParseArray(InputStream& is, Handler& handler) {
810 RAPIDJSON_ASSERT(is.Peek() == '[');
811 is.Take(); // Skip '['
812
813 if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
815
816 SkipWhitespaceAndComments<parseFlags>(is);
817 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
818
819 if (Consume(is, ']')) {
820 if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
822 return;
823 }
824
825 for (SizeType elementCount = 0;;) {
826 ParseValue<parseFlags>(is, handler);
827 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
828
829 ++elementCount;
830 SkipWhitespaceAndComments<parseFlags>(is);
831 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
832
833 if (Consume(is, ',')) {
834 SkipWhitespaceAndComments<parseFlags>(is);
835 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
836 }
837 else if (Consume(is, ']')) {
838 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
840 return;
841 }
842 else
844
845 if (parseFlags & kParseTrailingCommasFlag) {
846 if (is.Peek() == ']') {
847 if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
849 is.Take();
850 return;
851 }
852 }
853 }
854 }
855
856 template<unsigned parseFlags, typename InputStream, typename Handler>
857 void ParseNull(InputStream& is, Handler& handler) {
858 RAPIDJSON_ASSERT(is.Peek() == 'n');
859 is.Take();
860
861 if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
862 if (RAPIDJSON_UNLIKELY(!handler.Null()))
864 }
865 else
867 }
868
869 template<unsigned parseFlags, typename InputStream, typename Handler>
870 void ParseTrue(InputStream& is, Handler& handler) {
871 RAPIDJSON_ASSERT(is.Peek() == 't');
872 is.Take();
873
874 if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
875 if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
877 }
878 else
880 }
881
882 template<unsigned parseFlags, typename InputStream, typename Handler>
883 void ParseFalse(InputStream& is, Handler& handler) {
884 RAPIDJSON_ASSERT(is.Peek() == 'f');
885 is.Take();
886
887 if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
888 if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
890 }
891 else
893 }
894
895 template<typename InputStream>
896 RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
897 if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
898 is.Take();
899 return true;
900 }
901 else
902 return false;
903 }
904
905 // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
906 template<typename InputStream>
907 unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
908 unsigned codepoint = 0;
909 for (int i = 0; i < 4; i++) {
910 Ch c = is.Peek();
911 codepoint <<= 4;
912 codepoint += static_cast<unsigned>(c);
913 if (c >= '0' && c <= '9')
914 codepoint -= '0';
915 else if (c >= 'A' && c <= 'F')
916 codepoint -= 'A' - 10;
917 else if (c >= 'a' && c <= 'f')
918 codepoint -= 'a' - 10;
919 else {
921 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
922 }
923 is.Take();
924 }
925 return codepoint;
926 }
927
928 template <typename CharType>
929 class StackStream {
930 public:
931 typedef CharType Ch;
932
933 StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
934 RAPIDJSON_FORCEINLINE void Put(Ch c) {
935 *stack_.template Push<Ch>() = c;
936 ++length_;
937 }
938
939 RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
940 length_ += count;
941 return stack_.template Push<Ch>(count);
942 }
943
944 size_t Length() const { return length_; }
945
946 Ch* Pop() {
947 return stack_.template Pop<Ch>(length_);
948 }
949
950 private:
951 StackStream(const StackStream&);
952 StackStream& operator=(const StackStream&);
953
955 SizeType length_;
956 };
957
958 // Parse string and generate String event. Different code paths for kParseInsituFlag.
959 template<unsigned parseFlags, typename InputStream, typename Handler>
960 void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
962 InputStream& s(copy.s);
963
964 RAPIDJSON_ASSERT(s.Peek() == '\"');
965 s.Take(); // Skip '\"'
966
967 bool success = false;
968 if (parseFlags & kParseInsituFlag) {
969 typename InputStream::Ch *head = s.PutBegin();
970 ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
971 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
972 size_t length = s.PutEnd(head) - 1;
973 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
974 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
975 success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
976 }
977 else {
978 StackStream<typename TargetEncoding::Ch> stackStream(stack_);
979 ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
980 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
981 SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
982 const typename TargetEncoding::Ch* const str = stackStream.Pop();
983 success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
984 }
985 if (RAPIDJSON_UNLIKELY(!success))
987 }
988
989 // Parse string to an output is
990 // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
991 template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
992 RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
994#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
995 static const char escape[256] = {
996 Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '/',
997 Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
998 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
999 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1000 Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
1001 };
1002#undef Z16
1004
1005 for (;;) {
1006 // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
1007 if (!(parseFlags & kParseValidateEncodingFlag))
1008 ScanCopyUnescapedString(is, os);
1009
1010 Ch c = is.Peek();
1011 if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape
1012 size_t escapeOffset = is.Tell(); // For invalid escaping, report the initial '\\' as error offset
1013 is.Take();
1014 Ch e = is.Peek();
1015 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
1016 is.Take();
1017 os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
1018 }
1019 else if ((parseFlags & kParseEscapedApostropheFlag) && RAPIDJSON_LIKELY(e == '\'')) { // Allow escaped apostrophe
1020 is.Take();
1021 os.Put('\'');
1022 }
1023 else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode
1024 is.Take();
1025 unsigned codepoint = ParseHex4(is, escapeOffset);
1026 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1027 if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
1028 // high surrogate, check if followed by valid low surrogate
1029 if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
1030 // Handle UTF-16 surrogate pair
1031 if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
1033 unsigned codepoint2 = ParseHex4(is, escapeOffset);
1034 RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1035 if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
1037 codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
1038 }
1039 // single low surrogate
1040 else
1041 {
1043 }
1044 }
1045 TEncoding::Encode(os, codepoint);
1046 }
1047 else
1049 }
1050 else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote
1051 is.Take();
1052 os.Put('\0'); // null-terminate the string
1053 return;
1054 }
1055 else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
1056 if (c == '\0')
1058 else
1060 }
1061 else {
1062 size_t offset = is.Tell();
1067 }
1068 }
1069 }
1070
1071 template<typename InputStream, typename OutputStream>
1072 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
1073 // Do nothing for generic version
1074 }
1075
1076#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
1077 // StringStream -> StackStream<char>
1078 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1079 const char* p = is.src_;
1080
1081 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1082 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1083 while (p != nextAligned)
1084 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1085 is.src_ = p;
1086 return;
1087 }
1088 else
1089 os.Put(*p++);
1090
1091 // The rest of string using SIMD
1092 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1093 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1094 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1095 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1096 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1097 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1098
1099 for (;; p += 16) {
1100 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1101 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1102 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1103 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1104 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1105 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1106 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1107 SizeType length;
1108 #ifdef _MSC_VER // Find the index of first escaped
1109 unsigned long offset;
1110 _BitScanForward(&offset, r);
1111 length = offset;
1112 #else
1113 length = static_cast<SizeType>(__builtin_ffs(r) - 1);
1114 #endif
1115 if (length != 0) {
1116 char* q = reinterpret_cast<char*>(os.Push(length));
1117 for (size_t i = 0; i < length; i++)
1118 q[i] = p[i];
1119
1120 p += length;
1121 }
1122 break;
1123 }
1124 _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
1125 }
1126
1127 is.src_ = p;
1128 }
1129
1130 // InsituStringStream -> InsituStringStream
1131 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1132 RAPIDJSON_ASSERT(&is == &os);
1133 (void)os;
1134
1135 if (is.src_ == is.dst_) {
1136 SkipUnescapedString(is);
1137 return;
1138 }
1139
1140 char* p = is.src_;
1141 char *q = is.dst_;
1142
1143 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1144 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1145 while (p != nextAligned)
1146 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1147 is.src_ = p;
1148 is.dst_ = q;
1149 return;
1150 }
1151 else
1152 *q++ = *p++;
1153
1154 // The rest of string using SIMD
1155 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1156 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1157 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1158 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1159 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1160 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1161
1162 for (;; p += 16, q += 16) {
1163 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1164 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1165 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1166 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1167 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1168 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1169 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1170 size_t length;
1171#ifdef _MSC_VER // Find the index of first escaped
1172 unsigned long offset;
1173 _BitScanForward(&offset, r);
1174 length = offset;
1175#else
1176 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1177#endif
1178 for (const char* pend = p + length; p != pend; )
1179 *q++ = *p++;
1180 break;
1181 }
1182 _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
1183 }
1184
1185 is.src_ = p;
1186 is.dst_ = q;
1187 }
1188
1189 // When read/write pointers are the same for insitu stream, just skip unescaped characters
1190 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1191 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1192 char* p = is.src_;
1193
1194 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1195 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1196 for (; p != nextAligned; p++)
1197 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1198 is.src_ = is.dst_ = p;
1199 return;
1200 }
1201
1202 // The rest of string using SIMD
1203 static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
1204 static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1205 static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
1206 const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
1207 const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
1208 const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
1209
1210 for (;; p += 16) {
1211 const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
1212 const __m128i t1 = _mm_cmpeq_epi8(s, dq);
1213 const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1214 const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
1215 const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
1216 unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
1217 if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
1218 size_t length;
1219#ifdef _MSC_VER // Find the index of first escaped
1220 unsigned long offset;
1221 _BitScanForward(&offset, r);
1222 length = offset;
1223#else
1224 length = static_cast<size_t>(__builtin_ffs(r) - 1);
1225#endif
1226 p += length;
1227 break;
1228 }
1229 }
1230
1231 is.src_ = is.dst_ = p;
1232 }
1233#elif defined(RAPIDJSON_NEON)
1234 // StringStream -> StackStream<char>
1235 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
1236 const char* p = is.src_;
1237
1238 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1239 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1240 while (p != nextAligned)
1241 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1242 is.src_ = p;
1243 return;
1244 }
1245 else
1246 os.Put(*p++);
1247
1248 // The rest of string using SIMD
1249 const uint8x16_t s0 = vmovq_n_u8('"');
1250 const uint8x16_t s1 = vmovq_n_u8('\\');
1251 const uint8x16_t s2 = vmovq_n_u8('\b');
1252 const uint8x16_t s3 = vmovq_n_u8(32);
1253
1254 for (;; p += 16) {
1255 const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
1256 uint8x16_t x = vceqq_u8(s, s0);
1257 x = vorrq_u8(x, vceqq_u8(s, s1));
1258 x = vorrq_u8(x, vceqq_u8(s, s2));
1259 x = vorrq_u8(x, vcltq_u8(s, s3));
1260
1261 x = vrev64q_u8(x); // Rev in 64
1262 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1263 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1264
1265 SizeType length = 0;
1266 bool escaped = false;
1267 if (low == 0) {
1268 if (high != 0) {
1269 uint32_t lz = internal::clzll(high);
1270 length = 8 + (lz >> 3);
1271 escaped = true;
1272 }
1273 } else {
1274 uint32_t lz = internal::clzll(low);
1275 length = lz >> 3;
1276 escaped = true;
1277 }
1278 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1279 if (length != 0) {
1280 char* q = reinterpret_cast<char*>(os.Push(length));
1281 for (size_t i = 0; i < length; i++)
1282 q[i] = p[i];
1283
1284 p += length;
1285 }
1286 break;
1287 }
1288 vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
1289 }
1290
1291 is.src_ = p;
1292 }
1293
1294 // InsituStringStream -> InsituStringStream
1295 static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
1296 RAPIDJSON_ASSERT(&is == &os);
1297 (void)os;
1298
1299 if (is.src_ == is.dst_) {
1300 SkipUnescapedString(is);
1301 return;
1302 }
1303
1304 char* p = is.src_;
1305 char *q = is.dst_;
1306
1307 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1308 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1309 while (p != nextAligned)
1310 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1311 is.src_ = p;
1312 is.dst_ = q;
1313 return;
1314 }
1315 else
1316 *q++ = *p++;
1317
1318 // The rest of string using SIMD
1319 const uint8x16_t s0 = vmovq_n_u8('"');
1320 const uint8x16_t s1 = vmovq_n_u8('\\');
1321 const uint8x16_t s2 = vmovq_n_u8('\b');
1322 const uint8x16_t s3 = vmovq_n_u8(32);
1323
1324 for (;; p += 16, q += 16) {
1325 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1326 uint8x16_t x = vceqq_u8(s, s0);
1327 x = vorrq_u8(x, vceqq_u8(s, s1));
1328 x = vorrq_u8(x, vceqq_u8(s, s2));
1329 x = vorrq_u8(x, vcltq_u8(s, s3));
1330
1331 x = vrev64q_u8(x); // Rev in 64
1332 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1333 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1334
1335 SizeType length = 0;
1336 bool escaped = false;
1337 if (low == 0) {
1338 if (high != 0) {
1339 uint32_t lz = internal::clzll(high);
1340 length = 8 + (lz >> 3);
1341 escaped = true;
1342 }
1343 } else {
1344 uint32_t lz = internal::clzll(low);
1345 length = lz >> 3;
1346 escaped = true;
1347 }
1348 if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped
1349 for (const char* pend = p + length; p != pend; ) {
1350 *q++ = *p++;
1351 }
1352 break;
1353 }
1354 vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
1355 }
1356
1357 is.src_ = p;
1358 is.dst_ = q;
1359 }
1360
1361 // When read/write pointers are the same for insitu stream, just skip unescaped characters
1362 static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
1363 RAPIDJSON_ASSERT(is.src_ == is.dst_);
1364 char* p = is.src_;
1365
1366 // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
1367 const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
1368 for (; p != nextAligned; p++)
1369 if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
1370 is.src_ = is.dst_ = p;
1371 return;
1372 }
1373
1374 // The rest of string using SIMD
1375 const uint8x16_t s0 = vmovq_n_u8('"');
1376 const uint8x16_t s1 = vmovq_n_u8('\\');
1377 const uint8x16_t s2 = vmovq_n_u8('\b');
1378 const uint8x16_t s3 = vmovq_n_u8(32);
1379
1380 for (;; p += 16) {
1381 const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
1382 uint8x16_t x = vceqq_u8(s, s0);
1383 x = vorrq_u8(x, vceqq_u8(s, s1));
1384 x = vorrq_u8(x, vceqq_u8(s, s2));
1385 x = vorrq_u8(x, vcltq_u8(s, s3));
1386
1387 x = vrev64q_u8(x); // Rev in 64
1388 uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract
1389 uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract
1390
1391 if (low == 0) {
1392 if (high != 0) {
1393 uint32_t lz = internal::clzll(high);
1394 p += 8 + (lz >> 3);
1395 break;
1396 }
1397 } else {
1398 uint32_t lz = internal::clzll(low);
1399 p += lz >> 3;
1400 break;
1401 }
1402 }
1403
1404 is.src_ = is.dst_ = p;
1405 }
1406#endif // RAPIDJSON_NEON
1407
1408 template<typename InputStream, typename StackCharacter, bool backup, bool pushOnTake>
1410
1411 template<typename InputStream, typename StackCharacter>
1412 class NumberStream<InputStream, StackCharacter, false, false> {
1413 public:
1414 typedef typename InputStream::Ch Ch;
1415
1416 NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; }
1417
1418 RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
1419 RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
1420 RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
1421 RAPIDJSON_FORCEINLINE void Push(char) {}
1422
1423 size_t Tell() { return is.Tell(); }
1424 size_t Length() { return 0; }
1425 const StackCharacter* Pop() { return 0; }
1426
1427 protected:
1428 NumberStream& operator=(const NumberStream&);
1429
1430 InputStream& is;
1431 };
1432
1433 template<typename InputStream, typename StackCharacter>
1434 class NumberStream<InputStream, StackCharacter, true, false> : public NumberStream<InputStream, StackCharacter, false, false> {
1435 typedef NumberStream<InputStream, StackCharacter, false, false> Base;
1436 public:
1437 NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s), stackStream(reader.stack_) {}
1438
1439 RAPIDJSON_FORCEINLINE Ch TakePush() {
1440 stackStream.Put(static_cast<StackCharacter>(Base::is.Peek()));
1441 return Base::is.Take();
1442 }
1443
1444 RAPIDJSON_FORCEINLINE void Push(StackCharacter c) {
1445 stackStream.Put(c);
1446 }
1447
1448 size_t Length() { return stackStream.Length(); }
1449
1450 const StackCharacter* Pop() {
1451 stackStream.Put('\0');
1452 return stackStream.Pop();
1453 }
1454
1455 private:
1456 StackStream<StackCharacter> stackStream;
1457 };
1458
1459 template<typename InputStream, typename StackCharacter>
1460 class NumberStream<InputStream, StackCharacter, true, true> : public NumberStream<InputStream, StackCharacter, true, false> {
1461 typedef NumberStream<InputStream, StackCharacter, true, false> Base;
1462 public:
1463 NumberStream(GenericReader& reader, InputStream& s) : Base(reader, s) {}
1464
1465 RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
1466 };
1467
1468 template<unsigned parseFlags, typename InputStream, typename Handler>
1469 void ParseNumber(InputStream& is, Handler& handler) {
1470 typedef typename internal::SelectIf<internal::BoolType<(parseFlags & kParseNumbersAsStringsFlag) != 0>, typename TargetEncoding::Ch, char>::Type NumberCharacter;
1471
1473 NumberStream<InputStream, NumberCharacter,
1474 ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
1475 ((parseFlags & kParseInsituFlag) == 0) :
1476 ((parseFlags & kParseFullPrecisionFlag) != 0),
1477 (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
1478 (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
1479
1480 size_t startOffset = s.Tell();
1481 double d = 0.0;
1482 bool useNanOrInf = false;
1483
1484 // Parse minus
1485 bool minus = Consume(s, '-');
1486
1487 // Parse int: zero / ( digit1-9 *DIGIT )
1488 unsigned i = 0;
1489 uint64_t i64 = 0;
1490 bool use64bit = false;
1491 int significandDigit = 0;
1492 if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
1493 i = 0;
1494 s.TakePush();
1495 }
1496 else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
1497 i = static_cast<unsigned>(s.TakePush() - '0');
1498
1499 if (minus)
1500 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1501 if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
1502 if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
1503 i64 = i;
1504 use64bit = true;
1505 break;
1506 }
1507 }
1508 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1509 significandDigit++;
1510 }
1511 else
1512 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1513 if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
1514 if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
1515 i64 = i;
1516 use64bit = true;
1517 break;
1518 }
1519 }
1520 i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
1521 significandDigit++;
1522 }
1523 }
1524 // Parse NaN or Infinity here
1525 else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
1526 if (Consume(s, 'N')) {
1527 if (Consume(s, 'a') && Consume(s, 'N')) {
1528 d = std::numeric_limits<double>::quiet_NaN();
1529 useNanOrInf = true;
1530 }
1531 }
1532 else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
1533 if (Consume(s, 'n') && Consume(s, 'f')) {
1534 d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
1535 useNanOrInf = true;
1536
1537 if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
1538 && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
1540 }
1541 }
1542 }
1543
1544 if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
1546 }
1547 }
1548 else
1550
1551 // Parse 64bit int
1552 bool useDouble = false;
1553 if (use64bit) {
1554 if (minus)
1555 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1556 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
1557 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
1558 d = static_cast<double>(i64);
1559 useDouble = true;
1560 break;
1561 }
1562 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1563 significandDigit++;
1564 }
1565 else
1566 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1567 if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
1568 if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
1569 d = static_cast<double>(i64);
1570 useDouble = true;
1571 break;
1572 }
1573 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1574 significandDigit++;
1575 }
1576 }
1577
1578 // Force double for big integer
1579 if (useDouble) {
1580 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1581 d = d * 10 + (s.TakePush() - '0');
1582 }
1583 }
1584
1585 // Parse frac = decimal-point 1*DIGIT
1586 int expFrac = 0;
1587 size_t decimalPosition;
1588 if (!useNanOrInf && Consume(s, '.')) {
1589 decimalPosition = s.Length();
1590
1591 if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
1593
1594 if (!useDouble) {
1595#if RAPIDJSON_64BIT
1596 // Use i64 to store significand in 64-bit architecture
1597 if (!use64bit)
1598 i64 = i;
1599
1600 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1601 if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
1602 break;
1603 else {
1604 i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
1605 --expFrac;
1606 if (i64 != 0)
1607 significandDigit++;
1608 }
1609 }
1610
1611 d = static_cast<double>(i64);
1612#else
1613 // Use double to store significand in 32-bit architecture
1614 d = static_cast<double>(use64bit ? i64 : i);
1615#endif
1616 useDouble = true;
1617 }
1618
1619 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1620 if (significandDigit < 17) {
1621 d = d * 10.0 + (s.TakePush() - '0');
1622 --expFrac;
1623 if (RAPIDJSON_LIKELY(d > 0.0))
1624 significandDigit++;
1625 }
1626 else
1627 s.TakePush();
1628 }
1629 }
1630 else
1631 decimalPosition = s.Length(); // decimal position at the end of integer.
1632
1633 // Parse exp = e [ minus / plus ] 1*DIGIT
1634 int exp = 0;
1635 if (!useNanOrInf && (Consume(s, 'e') || Consume(s, 'E'))) {
1636 if (!useDouble) {
1637 d = static_cast<double>(use64bit ? i64 : i);
1638 useDouble = true;
1639 }
1640
1641 bool expMinus = false;
1642 if (Consume(s, '+'))
1643 ;
1644 else if (Consume(s, '-'))
1645 expMinus = true;
1646
1647 if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1648 exp = static_cast<int>(s.Take() - '0');
1649 if (expMinus) {
1650 // (exp + expFrac) must not underflow int => we're detecting when -exp gets
1651 // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
1652 // underflow territory):
1653 //
1654 // -(exp * 10 + 9) + expFrac >= INT_MIN
1655 // <=> exp <= (expFrac - INT_MIN - 9) / 10
1656 RAPIDJSON_ASSERT(expFrac <= 0);
1657 int maxExp = (expFrac + 2147483639) / 10;
1658
1659 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1660 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1661 if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
1662 while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent
1663 s.Take();
1664 }
1665 }
1666 }
1667 else { // positive exp
1668 int maxExp = 308 - expFrac;
1669 while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
1670 exp = exp * 10 + static_cast<int>(s.Take() - '0');
1671 if (RAPIDJSON_UNLIKELY(exp > maxExp))
1673 }
1674 }
1675 }
1676 else
1678
1679 if (expMinus)
1680 exp = -exp;
1681 }
1682
1683 // Finish parsing, call event according to the type of number.
1684 bool cont = true;
1685
1686 if (parseFlags & kParseNumbersAsStringsFlag) {
1687 if (parseFlags & kParseInsituFlag) {
1688 s.Pop(); // Pop stack no matter if it will be used or not.
1689 typename InputStream::Ch* head = is.PutBegin();
1690 const size_t length = s.Tell() - startOffset;
1691 RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
1692 // unable to insert the \0 character here, it will erase the comma after this number
1693 const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
1694 cont = handler.RawNumber(str, SizeType(length), false);
1695 }
1696 else {
1697 SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
1698 GenericStringStream<UTF8<NumberCharacter> > srcStream(s.Pop());
1700 while (numCharsToCopy--) {
1701 Transcoder<UTF8<typename TargetEncoding::Ch>, TargetEncoding>::Transcode(srcStream, dstStream);
1702 }
1703 dstStream.Put('\0');
1704 const typename TargetEncoding::Ch* str = dstStream.Pop();
1705 const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
1706 cont = handler.RawNumber(str, SizeType(length), true);
1707 }
1708 }
1709 else {
1710 size_t length = s.Length();
1711 const NumberCharacter* decimal = s.Pop(); // Pop stack no matter if it will be used or not.
1712
1713 if (useDouble) {
1714 int p = exp + expFrac;
1715 if (parseFlags & kParseFullPrecisionFlag)
1716 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
1717 else
1718 d = internal::StrtodNormalPrecision(d, p);
1719
1720 // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
1721 if (d > (std::numeric_limits<double>::max)()) {
1722 // Overflow
1723 // TODO: internal::StrtodX should report overflow (or underflow)
1725 }
1726
1727 cont = handler.Double(minus ? -d : d);
1728 }
1729 else if (useNanOrInf) {
1730 cont = handler.Double(d);
1731 }
1732 else {
1733 if (use64bit) {
1734 if (minus)
1735 cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
1736 else
1737 cont = handler.Uint64(i64);
1738 }
1739 else {
1740 if (minus)
1741 cont = handler.Int(static_cast<int32_t>(~i + 1));
1742 else
1743 cont = handler.Uint(i);
1744 }
1745 }
1746 }
1747 if (RAPIDJSON_UNLIKELY(!cont))
1749 }
1750
1751 // Parse any JSON value
1752 template<unsigned parseFlags, typename InputStream, typename Handler>
1753 void ParseValue(InputStream& is, Handler& handler) {
1754 switch (is.Peek()) {
1755 case 'n': ParseNull <parseFlags>(is, handler); break;
1756 case 't': ParseTrue <parseFlags>(is, handler); break;
1757 case 'f': ParseFalse <parseFlags>(is, handler); break;
1758 case '"': ParseString<parseFlags>(is, handler); break;
1759 case '{': ParseObject<parseFlags>(is, handler); break;
1760 case '[': ParseArray <parseFlags>(is, handler); break;
1761 default :
1762 ParseNumber<parseFlags>(is, handler);
1763 break;
1764
1765 }
1766 }
1767
1768 // Iterative Parsing
1769
1770 // States
1771 enum IterativeParsingState {
1772 IterativeParsingFinishState = 0, // sink states at top
1773 IterativeParsingErrorState, // sink states at top
1774 IterativeParsingStartState,
1775
1776 // Object states
1777 IterativeParsingObjectInitialState,
1778 IterativeParsingMemberKeyState,
1779 IterativeParsingMemberValueState,
1780 IterativeParsingObjectFinishState,
1781
1782 // Array states
1783 IterativeParsingArrayInitialState,
1784 IterativeParsingElementState,
1785 IterativeParsingArrayFinishState,
1786
1787 // Single value state
1788 IterativeParsingValueState,
1789
1790 // Delimiter states (at bottom)
1791 IterativeParsingElementDelimiterState,
1792 IterativeParsingMemberDelimiterState,
1793 IterativeParsingKeyValueDelimiterState,
1794
1795 cIterativeParsingStateCount
1796 };
1797
1798 // Tokens
1799 enum Token {
1800 LeftBracketToken = 0,
1801 RightBracketToken,
1802
1803 LeftCurlyBracketToken,
1804 RightCurlyBracketToken,
1805
1806 CommaToken,
1807 ColonToken,
1808
1809 StringToken,
1810 FalseToken,
1811 TrueToken,
1812 NullToken,
1813 NumberToken,
1814
1815 kTokenCount
1816 };
1817
1818 RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
1819
1821#define N NumberToken
1822#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1823 // Maps from ASCII to Token
1824 static const unsigned char tokenMap[256] = {
1825 N16, // 00~0F
1826 N16, // 10~1F
1827 N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1828 N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1829 N16, // 40~4F
1830 N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1831 N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1832 N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1833 N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1834 };
1835#undef N
1836#undef N16
1838
1839 if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
1840 return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
1841 else
1842 return NumberToken;
1843 }
1844
1845 RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
1846 // current state x one lookahead token -> new state
1847 static const char G[cIterativeParsingStateCount][kTokenCount] = {
1848 // Finish(sink state)
1849 {
1850 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1851 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1852 IterativeParsingErrorState
1853 },
1854 // Error(sink state)
1855 {
1856 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1857 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1858 IterativeParsingErrorState
1859 },
1860 // Start
1861 {
1862 IterativeParsingArrayInitialState, // Left bracket
1863 IterativeParsingErrorState, // Right bracket
1864 IterativeParsingObjectInitialState, // Left curly bracket
1865 IterativeParsingErrorState, // Right curly bracket
1866 IterativeParsingErrorState, // Comma
1867 IterativeParsingErrorState, // Colon
1868 IterativeParsingValueState, // String
1869 IterativeParsingValueState, // False
1870 IterativeParsingValueState, // True
1871 IterativeParsingValueState, // Null
1872 IterativeParsingValueState // Number
1873 },
1874 // ObjectInitial
1875 {
1876 IterativeParsingErrorState, // Left bracket
1877 IterativeParsingErrorState, // Right bracket
1878 IterativeParsingErrorState, // Left curly bracket
1879 IterativeParsingObjectFinishState, // Right curly bracket
1880 IterativeParsingErrorState, // Comma
1881 IterativeParsingErrorState, // Colon
1882 IterativeParsingMemberKeyState, // String
1883 IterativeParsingErrorState, // False
1884 IterativeParsingErrorState, // True
1885 IterativeParsingErrorState, // Null
1886 IterativeParsingErrorState // Number
1887 },
1888 // MemberKey
1889 {
1890 IterativeParsingErrorState, // Left bracket
1891 IterativeParsingErrorState, // Right bracket
1892 IterativeParsingErrorState, // Left curly bracket
1893 IterativeParsingErrorState, // Right curly bracket
1894 IterativeParsingErrorState, // Comma
1895 IterativeParsingKeyValueDelimiterState, // Colon
1896 IterativeParsingErrorState, // String
1897 IterativeParsingErrorState, // False
1898 IterativeParsingErrorState, // True
1899 IterativeParsingErrorState, // Null
1900 IterativeParsingErrorState // Number
1901 },
1902 // MemberValue
1903 {
1904 IterativeParsingErrorState, // Left bracket
1905 IterativeParsingErrorState, // Right bracket
1906 IterativeParsingErrorState, // Left curly bracket
1907 IterativeParsingObjectFinishState, // Right curly bracket
1908 IterativeParsingMemberDelimiterState, // Comma
1909 IterativeParsingErrorState, // Colon
1910 IterativeParsingErrorState, // String
1911 IterativeParsingErrorState, // False
1912 IterativeParsingErrorState, // True
1913 IterativeParsingErrorState, // Null
1914 IterativeParsingErrorState // Number
1915 },
1916 // ObjectFinish(sink state)
1917 {
1918 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1919 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1920 IterativeParsingErrorState
1921 },
1922 // ArrayInitial
1923 {
1924 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1925 IterativeParsingArrayFinishState, // Right bracket
1926 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1927 IterativeParsingErrorState, // Right curly bracket
1928 IterativeParsingErrorState, // Comma
1929 IterativeParsingErrorState, // Colon
1930 IterativeParsingElementState, // String
1931 IterativeParsingElementState, // False
1932 IterativeParsingElementState, // True
1933 IterativeParsingElementState, // Null
1934 IterativeParsingElementState // Number
1935 },
1936 // Element
1937 {
1938 IterativeParsingErrorState, // Left bracket
1939 IterativeParsingArrayFinishState, // Right bracket
1940 IterativeParsingErrorState, // Left curly bracket
1941 IterativeParsingErrorState, // Right curly bracket
1942 IterativeParsingElementDelimiterState, // Comma
1943 IterativeParsingErrorState, // Colon
1944 IterativeParsingErrorState, // String
1945 IterativeParsingErrorState, // False
1946 IterativeParsingErrorState, // True
1947 IterativeParsingErrorState, // Null
1948 IterativeParsingErrorState // Number
1949 },
1950 // ArrayFinish(sink state)
1951 {
1952 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1953 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1954 IterativeParsingErrorState
1955 },
1956 // Single Value (sink state)
1957 {
1958 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1959 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1960 IterativeParsingErrorState
1961 },
1962 // ElementDelimiter
1963 {
1964 IterativeParsingArrayInitialState, // Left bracket(push Element state)
1965 IterativeParsingArrayFinishState, // Right bracket
1966 IterativeParsingObjectInitialState, // Left curly bracket(push Element state)
1967 IterativeParsingErrorState, // Right curly bracket
1968 IterativeParsingErrorState, // Comma
1969 IterativeParsingErrorState, // Colon
1970 IterativeParsingElementState, // String
1971 IterativeParsingElementState, // False
1972 IterativeParsingElementState, // True
1973 IterativeParsingElementState, // Null
1974 IterativeParsingElementState // Number
1975 },
1976 // MemberDelimiter
1977 {
1978 IterativeParsingErrorState, // Left bracket
1979 IterativeParsingErrorState, // Right bracket
1980 IterativeParsingErrorState, // Left curly bracket
1981 IterativeParsingObjectFinishState, // Right curly bracket
1982 IterativeParsingErrorState, // Comma
1983 IterativeParsingErrorState, // Colon
1984 IterativeParsingMemberKeyState, // String
1985 IterativeParsingErrorState, // False
1986 IterativeParsingErrorState, // True
1987 IterativeParsingErrorState, // Null
1988 IterativeParsingErrorState // Number
1989 },
1990 // KeyValueDelimiter
1991 {
1992 IterativeParsingArrayInitialState, // Left bracket(push MemberValue state)
1993 IterativeParsingErrorState, // Right bracket
1994 IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state)
1995 IterativeParsingErrorState, // Right curly bracket
1996 IterativeParsingErrorState, // Comma
1997 IterativeParsingErrorState, // Colon
1998 IterativeParsingMemberValueState, // String
1999 IterativeParsingMemberValueState, // False
2000 IterativeParsingMemberValueState, // True
2001 IterativeParsingMemberValueState, // Null
2002 IterativeParsingMemberValueState // Number
2003 },
2004 }; // End of G
2005
2006 return static_cast<IterativeParsingState>(G[state][token]);
2007 }
2008
2009 // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
2010 // May return a new state on state pop.
2011 template <unsigned parseFlags, typename InputStream, typename Handler>
2012 RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
2013 (void)token;
2014
2015 switch (dst) {
2016 case IterativeParsingErrorState:
2017 return dst;
2018
2019 case IterativeParsingObjectInitialState:
2020 case IterativeParsingArrayInitialState:
2021 {
2022 // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
2023 // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
2024 IterativeParsingState n = src;
2025 if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
2026 n = IterativeParsingElementState;
2027 else if (src == IterativeParsingKeyValueDelimiterState)
2028 n = IterativeParsingMemberValueState;
2029 // Push current state.
2030 *stack_.template Push<SizeType>(1) = n;
2031 // Initialize and push the member/element count.
2032 *stack_.template Push<SizeType>(1) = 0;
2033 // Call handler
2034 bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
2035 // On handler short circuits the parsing.
2036 if (!hr) {
2038 return IterativeParsingErrorState;
2039 }
2040 else {
2041 is.Take();
2042 return dst;
2043 }
2044 }
2045
2046 case IterativeParsingMemberKeyState:
2047 ParseString<parseFlags>(is, handler, true);
2048 if (HasParseError())
2049 return IterativeParsingErrorState;
2050 else
2051 return dst;
2052
2053 case IterativeParsingKeyValueDelimiterState:
2054 RAPIDJSON_ASSERT(token == ColonToken);
2055 is.Take();
2056 return dst;
2057
2058 case IterativeParsingMemberValueState:
2059 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2060 ParseValue<parseFlags>(is, handler);
2061 if (HasParseError()) {
2062 return IterativeParsingErrorState;
2063 }
2064 return dst;
2065
2066 case IterativeParsingElementState:
2067 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2068 ParseValue<parseFlags>(is, handler);
2069 if (HasParseError()) {
2070 return IterativeParsingErrorState;
2071 }
2072 return dst;
2073
2074 case IterativeParsingMemberDelimiterState:
2075 case IterativeParsingElementDelimiterState:
2076 is.Take();
2077 // Update member/element count.
2078 *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
2079 return dst;
2080
2081 case IterativeParsingObjectFinishState:
2082 {
2083 // Transit from delimiter is only allowed when trailing commas are enabled
2084 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
2086 return IterativeParsingErrorState;
2087 }
2088 // Get member count.
2089 SizeType c = *stack_.template Pop<SizeType>(1);
2090 // If the object is not empty, count the last member.
2091 if (src == IterativeParsingMemberValueState)
2092 ++c;
2093 // Restore the state.
2094 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2095 // Transit to Finish state if this is the topmost scope.
2096 if (n == IterativeParsingStartState)
2097 n = IterativeParsingFinishState;
2098 // Call handler
2099 bool hr = handler.EndObject(c);
2100 // On handler short circuits the parsing.
2101 if (!hr) {
2103 return IterativeParsingErrorState;
2104 }
2105 else {
2106 is.Take();
2107 return n;
2108 }
2109 }
2110
2111 case IterativeParsingArrayFinishState:
2112 {
2113 // Transit from delimiter is only allowed when trailing commas are enabled
2114 if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
2116 return IterativeParsingErrorState;
2117 }
2118 // Get element count.
2119 SizeType c = *stack_.template Pop<SizeType>(1);
2120 // If the array is not empty, count the last element.
2121 if (src == IterativeParsingElementState)
2122 ++c;
2123 // Restore the state.
2124 IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
2125 // Transit to Finish state if this is the topmost scope.
2126 if (n == IterativeParsingStartState)
2127 n = IterativeParsingFinishState;
2128 // Call handler
2129 bool hr = handler.EndArray(c);
2130 // On handler short circuits the parsing.
2131 if (!hr) {
2133 return IterativeParsingErrorState;
2134 }
2135 else {
2136 is.Take();
2137 return n;
2138 }
2139 }
2140
2141 default:
2142 // This branch is for IterativeParsingValueState actually.
2143 // Use `default:` rather than
2144 // `case IterativeParsingValueState:` is for code coverage.
2145
2146 // The IterativeParsingStartState is not enumerated in this switch-case.
2147 // It is impossible for that case. And it can be caught by following assertion.
2148
2149 // The IterativeParsingFinishState is not enumerated in this switch-case either.
2150 // It is a "derivative" state which cannot triggered from Predict() directly.
2151 // Therefore it cannot happen here. And it can be caught by following assertion.
2152 RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
2153
2154 // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
2155 ParseValue<parseFlags>(is, handler);
2156 if (HasParseError()) {
2157 return IterativeParsingErrorState;
2158 }
2159 return IterativeParsingFinishState;
2160 }
2161 }
2162
2163 template <typename InputStream>
2164 void HandleError(IterativeParsingState src, InputStream& is) {
2165 if (HasParseError()) {
2166 // Error flag has been set.
2167 return;
2168 }
2169
2170 switch (src) {
2171 case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
2172 case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
2173 case IterativeParsingObjectInitialState:
2174 case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
2175 case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
2176 case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
2177 case IterativeParsingKeyValueDelimiterState:
2178 case IterativeParsingArrayInitialState:
2179 case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
2180 default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
2181 }
2182 }
2183
2184 RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
2185 return s >= IterativeParsingElementDelimiterState;
2186 }
2187
2188 RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
2189 return s <= IterativeParsingErrorState;
2190 }
2191
2192 template <unsigned parseFlags, typename InputStream, typename Handler>
2193 ParseResult IterativeParse(InputStream& is, Handler& handler) {
2194 parseResult_.Clear();
2195 ClearStackOnExit scope(*this);
2196 IterativeParsingState state = IterativeParsingStartState;
2197
2198 SkipWhitespaceAndComments<parseFlags>(is);
2199 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2200 while (is.Peek() != '\0') {
2201 Token t = Tokenize(is.Peek());
2202 IterativeParsingState n = Predict(state, t);
2203 IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
2204
2205 if (d == IterativeParsingErrorState) {
2206 HandleError(state, is);
2207 break;
2208 }
2209
2210 state = d;
2211
2212 // Do not further consume streams if a root JSON has been parsed.
2213 if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
2214 break;
2215
2216 SkipWhitespaceAndComments<parseFlags>(is);
2217 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
2218 }
2219
2220 // Handle the end of file.
2221 if (state != IterativeParsingFinishState)
2222 HandleError(state, is);
2223
2224 return parseResult_;
2225 }
2226
2227 static const size_t kDefaultStackCapacity = 256;
2229 ParseResult parseResult_;
2230 IterativeParsingState state_;
2231}; // class GenericReader
2232
2235
2237
2238#if defined(__clang__) || defined(_MSC_VER)
2239RAPIDJSON_DIAG_POP
2240#endif
2241
2242
2243#ifdef __GNUC__
2244RAPIDJSON_DIAG_POP
2245#endif
2246
2247#endif // RAPIDJSON_READER_H_
Input byte stream wrapper with a statically bound encoding.
SAX-style JSON parser. Use Reader for UTF8 encoding and default allocator.
Definition reader.h:540
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text.
Definition reader.h:560
bool IterativeParseNext(InputStream &is, Handler &handler)
Parse one token from JSON text.
Definition reader.h:621
ParseResult Parse(InputStream &is, Handler &handler)
Parse JSON text (with kParseDefaultFlags)
Definition reader.h:601
void IterativeParseInit()
Initialize JSON text token-by-token parsing.
Definition reader.h:608
ParseErrorCode GetParseErrorCode() const
Get the ParseErrorCode of last parsing.
Definition reader.h:686
RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const
Check if token-by-token parsing JSON text is complete.
Definition reader.h:678
GenericReader(StackAllocator *stackAllocator=0, size_t stackCapacity=kDefaultStackCapacity)
Constructor.
Definition reader.h:548
bool HasParseError() const
Whether a parse error has occurred in the last parsing.
Definition reader.h:683
size_t GetErrorOffset() const
Get the position of last parsing error in input, 0 otherwise.
Definition reader.h:689
A type-unsafe stack for storing different types of data.
Definition stack.h:38
Concept for receiving events from GenericReader upon parsing. The functions return true if no error o...
Concept for reading and writing characters.
#define RAPIDJSON_LIKELY(x)
Compiler branching hint for expression with high probability to be true.
Definition rapidjson.h:495
#define RAPIDJSON_UNLIKELY(x)
Compiler branching hint for expression with low probability to be true.
Definition rapidjson.h:508
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:438
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:122
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:125
#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset)
Macro to indicate a parse error.
Definition reader.h:101
ParseErrorCode
Error code of parsing.
Definition error.h:65
#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset)
(Internal) macro to indicate and handle a parse error.
Definition reader.h:120
@ kParseErrorDocumentEmpty
The document is empty.
Definition error.h:68
@ kParseErrorNumberMissFraction
Miss fraction part in number.
Definition error.h:86
@ kParseErrorStringInvalidEncoding
Invalid encoding in string.
Definition error.h:83
@ kParseErrorValueInvalid
Invalid value.
Definition error.h:71
@ kParseErrorDocumentRootNotSingular
The document root must not follow by other values.
Definition error.h:69
@ kParseErrorUnspecificSyntaxError
Unspecific syntax error.
Definition error.h:90
@ kParseErrorObjectMissCommaOrCurlyBracket
Missing a comma or '}' after an object member.
Definition error.h:75
@ kParseErrorObjectMissColon
Missing a colon after a name of object member.
Definition error.h:74
@ kParseErrorStringMissQuotationMark
Missing a closing quotation mark in string.
Definition error.h:82
@ kParseErrorTermination
Parsing was terminated.
Definition error.h:89
@ kParseErrorNumberMissExponent
Miss exponent in number.
Definition error.h:87
@ kParseErrorStringEscapeInvalid
Invalid escape character in string.
Definition error.h:81
@ kParseErrorArrayMissCommaOrSquareBracket
Missing a comma or ']' after an array element.
Definition error.h:77
@ kParseErrorStringUnicodeSurrogateInvalid
The surrogate pair in string is invalid.
Definition error.h:80
@ kParseErrorObjectMissName
Missing a name for object member.
Definition error.h:73
@ kParseErrorNumberTooBig
Number too big to be stored in double.
Definition error.h:85
@ kParseErrorStringUnicodeEscapeInvalidHex
Incorrect hex digit after \u escape in string.
Definition error.h:79
__host__ __device__ double exp(double v)
Exponentielle de v.
Definition Math.h:116
Type
Type of JSON value.
Definition rapidjson.h:730
RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition rapidjson.h:416
#define RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition rapidjson.h:321
void SkipWhitespace(InputStream &is)
Skip the JSON white spaces in a stream.
Definition reader.h:267
ParseFlag
Combination of parseFlags.
Definition reader.h:147
@ kParseFullPrecisionFlag
Parse number in full precision (but slower).
Definition reader.h:153
@ kParseInsituFlag
In-situ(destructive) parsing.
Definition reader.h:149
@ kParseNoFlags
No flags are set.
Definition reader.h:148
@ kParseCommentsFlag
Allow one-line (//) and multi-line (/‍**/) comments.
Definition reader.h:154
@ kParseEscapedApostropheFlag
Allow escaped apostrophe in strings.
Definition reader.h:158
@ kParseDefaultFlags
Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS.
Definition reader.h:159
@ kParseTrailingCommasFlag
Allow trailing commas at the end of objects and arrays.
Definition reader.h:156
@ kParseNanAndInfFlag
Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
Definition reader.h:157
@ kParseValidateEncodingFlag
Validate encoding of JSON strings.
Definition reader.h:150
@ kParseNumbersAsStringsFlag
Parse all numbers (ints/doubles) as strings.
Definition reader.h:155
@ kParseIterativeFlag
Iterative(constant complexity in terms of function call stack size) parsing.
Definition reader.h:151
@ kParseStopWhenDoneFlag
After parsing a complete JSON root from stream, stop further processing the rest of stream....
Definition reader.h:152
Default implementation of Handler.
Definition reader.h:199
bool RawNumber(const Ch *str, SizeType len, bool copy)
enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
Definition reader.h:213
const Ch * src_
Current read position.
Definition stream.h:169
Represents an in-memory input byte stream.
Result of parsing (wraps ParseErrorCode)
Definition error.h:107
void Set(ParseErrorCode code, size_t offset=0)
Update error code and offset.
Definition error.h:137
static RAPIDJSON_FORCEINLINE bool Transcode(InputStream &is, OutputStream &os)
Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the outp...
Definition encodings.h:662
static RAPIDJSON_FORCEINLINE bool Validate(InputStream &is, OutputStream &os)
Validate one Unicode codepoint from an encoded stream.
Definition encodings.h:681
UTF-8 encoding.
Definition encodings.h:97