Arcane  v3.14.10.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
encodedstream.h
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2// Tencent is pleased to support the open source community by making RapidJSON available.
3//
4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
5//
6// Licensed under the MIT License (the "License"); you may not use this file except
7// in compliance with the License. You may obtain a copy of the License at
8//
9// http://opensource.org/licenses/MIT
10//
11// Unless required by applicable law or agreed to in writing, software distributed
12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
13// CONDITIONS OF ANY KIND, either express or implied. See the License for the
14// specific language governing permissions and limitations under the License.
15
16#ifndef RAPIDJSON_ENCODEDSTREAM_H_
17#define RAPIDJSON_ENCODEDSTREAM_H_
18
19#include "stream.h"
20#include "memorystream.h"
21
22#ifdef __GNUC__
23RAPIDJSON_DIAG_PUSH
24RAPIDJSON_DIAG_OFF(effc++)
25#endif
26
27#ifdef __clang__
28RAPIDJSON_DIAG_PUSH
29RAPIDJSON_DIAG_OFF(padded)
30#endif
31
33
35
39template <typename Encoding, typename InputByteStream>
41 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
42public:
43 typedef typename Encoding::Ch Ch;
44
45 EncodedInputStream(InputByteStream& is) : is_(is) {
46 current_ = Encoding::TakeBOM(is_);
47 }
48
49 Ch Peek() const { return current_; }
50 Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
51 size_t Tell() const { return is_.Tell(); }
52
53 // Not implemented
54 void Put(Ch) { RAPIDJSON_ASSERT(false); }
55 void Flush() { RAPIDJSON_ASSERT(false); }
56 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
57 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
58
59private:
61 EncodedInputStream& operator=(const EncodedInputStream&);
62
63 InputByteStream& is_;
64 Ch current_;
65};
66
68template <>
70public:
71 typedef UTF8<>::Ch Ch;
72
73 EncodedInputStream(MemoryStream& is) : is_(is) {
74 if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
75 if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
76 if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
77 }
78 Ch Peek() const { return is_.Peek(); }
79 Ch Take() { return is_.Take(); }
80 size_t Tell() const { return is_.Tell(); }
81
82 // Not implemented
83 void Put(Ch) {}
84 void Flush() {}
85 Ch* PutBegin() { return 0; }
86 size_t PutEnd(Ch*) { return 0; }
87
88 MemoryStream& is_;
89
90private:
92 EncodedInputStream& operator=(const EncodedInputStream&);
93};
94
96
100template <typename Encoding, typename OutputByteStream>
102 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
103public:
104 typedef typename Encoding::Ch Ch;
105
106 EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) {
107 if (putBOM)
108 Encoding::PutBOM(os_);
109 }
110
111 void Put(Ch c) { Encoding::Put(os_, c); }
112 void Flush() { os_.Flush(); }
113
114 // Not implemented
115 Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
116 Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
117 size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
118 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
119 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
120
121private:
123 EncodedOutputStream& operator=(const EncodedOutputStream&);
124
125 OutputByteStream& os_;
126};
127
128#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
129
131
135template <typename CharType, typename InputByteStream>
137 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
138public:
139 typedef CharType Ch;
140
142
146 AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
147 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
148 DetectType();
149 static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
150 takeFunc_ = f[type_];
151 current_ = takeFunc_(*is_);
152 }
153
154 UTFType GetType() const { return type_; }
155 bool HasBOM() const { return hasBOM_; }
156
157 Ch Peek() const { return current_; }
158 Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
159 size_t Tell() const { return is_->Tell(); }
160
161 // Not implemented
162 void Put(Ch) { RAPIDJSON_ASSERT(false); }
163 void Flush() { RAPIDJSON_ASSERT(false); }
164 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
165 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
166
167private:
169 AutoUTFInputStream& operator=(const AutoUTFInputStream&);
170
171 // Detect encoding type with BOM or RFC 4627
172 void DetectType() {
173 // BOM (Byte Order Mark):
174 // 00 00 FE FF UTF-32BE
175 // FF FE 00 00 UTF-32LE
176 // FE FF UTF-16BE
177 // FF FE UTF-16LE
178 // EF BB BF UTF-8
179
180 const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
181 if (!c)
182 return;
183
184 unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
185 hasBOM_ = false;
186 if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
187 else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
188 else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
189 else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
190 else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
191
192 // RFC 4627: Section 3
193 // "Since the first two characters of a JSON text will always be ASCII
194 // characters [RFC0020], it is possible to determine whether an octet
195 // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
196 // at the pattern of nulls in the first four octets."
197 // 00 00 00 xx UTF-32BE
198 // 00 xx 00 xx UTF-16BE
199 // xx 00 00 00 UTF-32LE
200 // xx 00 xx 00 UTF-16LE
201 // xx xx xx xx UTF-8
202
203 if (!hasBOM_) {
204 int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
205 switch (pattern) {
206 case 0x08: type_ = kUTF32BE; break;
207 case 0x0A: type_ = kUTF16BE; break;
208 case 0x01: type_ = kUTF32LE; break;
209 case 0x05: type_ = kUTF16LE; break;
210 case 0x0F: type_ = kUTF8; break;
211 default: break; // Use type defined by user.
212 }
213 }
214
215 // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
216 if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
217 if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
218 }
219
220 typedef Ch (*TakeFunc)(InputByteStream& is);
221 InputByteStream* is_;
222 UTFType type_;
223 Ch current_;
224 TakeFunc takeFunc_;
225 bool hasBOM_;
226};
227
229
233template <typename CharType, typename OutputByteStream>
235 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
236public:
237 typedef CharType Ch;
238
240
245 AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
246 RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
247
248 // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
249 if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
250 if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
251
252 static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
253 putFunc_ = f[type_];
254
255 if (putBOM)
256 PutBOM();
257 }
258
259 UTFType GetType() const { return type_; }
260
261 void Put(Ch c) { putFunc_(*os_, c); }
262 void Flush() { os_->Flush(); }
263
264 // Not implemented
265 Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
266 Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
267 size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
268 Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
269 size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
270
271private:
273 AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
274
275 void PutBOM() {
276 typedef void (*PutBOMFunc)(OutputByteStream&);
277 static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
278 f[type_](*os_);
279 }
280
281 typedef void (*PutFunc)(OutputByteStream&, Ch);
282
283 OutputByteStream* os_;
284 UTFType type_;
285 PutFunc putFunc_;
286};
287
288#undef RAPIDJSON_ENCODINGS_FUNC
289
291
292#ifdef __clang__
293RAPIDJSON_DIAG_POP
294#endif
295
296#ifdef __GNUC__
297RAPIDJSON_DIAG_POP
298#endif
299
300#endif // RAPIDJSON_FILESTREAM_H_
Input stream wrapper with dynamically bound encoding and automatic encoding detection.
AutoUTFInputStream(InputByteStream &is, UTFType type=kUTF8)
Constructor.
Output stream wrapper with dynamically bound encoding and automatic encoding detection.
AutoUTFOutputStream(OutputByteStream &os, UTFType type, bool putBOM)
Constructor.
Input byte stream wrapper with a statically bound encoding.
Output byte stream wrapper with statically bound encoding.
#define RAPIDJSON_ASSERT(x)
Assertion.
Definition rapidjson.h:407
#define RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition rapidjson.h:122
#define RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition rapidjson.h:125
#define RAPIDJSON_STATIC_ASSERT(x)
(Internal) macro to check for conditions at compile-time
Definition rapidjson.h:446
Represents an in-memory input byte stream.
UTF-8 encoding.
Definition encodings.h:97