Arcane  4.1.12.0
Developer documentation
Loading...
Searching...
No Matches
StringImpl.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2026 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* StringImpl.cc (C) 2000-2025 */
9/* */
10/* Implementation of a UTF-8 or UTF-16 character string. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/base/internal/StringImpl.h"
15#include "arccore/base/BasicTranscoder.h"
16#include "arccore/base/CStringUtils.h"
17#include "arccore/base/StringView.h"
18
19#include <cstring>
20
21//#define ARCCORE_DEBUG_UNISTRING
22
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26namespace Arcane
27{
28
29/*---------------------------------------------------------------------------*/
30/*---------------------------------------------------------------------------*/
31
32bool global_arccore_debug_string = false;
33
34namespace
35{
36 const char* const global_empty_string = "";
37}
38
39/*---------------------------------------------------------------------------*/
40/*---------------------------------------------------------------------------*/
41
42class StringException
43: public std::exception
44{
45 public:
46
47 StringException(const char* where)
48 : m_where(where)
49 {}
50 ~StringException() ARCCORE_NOEXCEPT {}
51 virtual const char* what() const ARCCORE_NOEXCEPT
52 {
53 return m_where;
54 }
55
56 private:
57
58 const char* m_where;
59};
60
61/*---------------------------------------------------------------------------*/
62/*---------------------------------------------------------------------------*/
63
64#ifdef ARCCORE_DEBUG_UNISTRING
65static void
66_badStringImplReference(StringImpl* ptr)
67{
68 cerr << "** FATAL: Trying to use deleted StringImpl " << ptr << '\n';
69}
70#endif
71
72inline void StringImpl::
73_checkReference()
74{
75#ifdef ARCCORE_DEBUG_UNISTRING
76 if (m_nb_ref.value() <= 0) {
77 _badStringImplReference(this);
78 }
79#endif
80}
81
82/*---------------------------------------------------------------------------*/
83/*---------------------------------------------------------------------------*/
84
85inline void StringImpl::
86_finalizeUtf8Creation()
87{
88 m_flags |= eValidUtf8;
89 // \a m_utf8_array must always have a null terminator.
90 if (m_utf8_array.empty())
91 m_utf8_array.add('\0');
92 else if (m_utf8_array.back() != '\0')
93 m_utf8_array.add('\0');
94}
95
96/*---------------------------------------------------------------------------*/
97/*---------------------------------------------------------------------------*/
98
99inline void StringImpl::
100_initFromSpan(Span<const Byte> bytes)
101{
102 m_utf8_array = bytes;
103 _finalizeUtf8Creation();
104}
105
106/*---------------------------------------------------------------------------*/
107/*---------------------------------------------------------------------------*/
108
109StringImpl::
110StringImpl(std::string_view str)
111: m_nb_ref(0)
112, m_flags(0)
113{
114 auto b = reinterpret_cast<const Byte*>(str.data());
115 _initFromSpan(Span<const Byte>(b, str.size()));
116}
117
118/*---------------------------------------------------------------------------*/
119/*---------------------------------------------------------------------------*/
120
121StringImpl::
122StringImpl(Span<const UChar> uchars)
123: m_nb_ref(0)
124, m_flags(0)
125{
126 _setUtf16(uchars);
127 m_flags = eValidUtf16;
128}
129
130/*---------------------------------------------------------------------------*/
131/*---------------------------------------------------------------------------*/
132
133StringImpl::
134StringImpl(const StringImpl& str)
135: m_nb_ref(0)
136, m_flags(str.m_flags)
137, m_utf16_array(str.m_utf16_array)
138, m_utf8_array(str.m_utf8_array)
139{
140}
141
142/*---------------------------------------------------------------------------*/
143/*---------------------------------------------------------------------------*/
144
145StringImpl::
146StringImpl(Span<const Byte> bytes)
147: m_nb_ref(0)
148, m_flags(0)
149{
150 _initFromSpan(bytes);
151}
152
153/*---------------------------------------------------------------------------*/
154/*---------------------------------------------------------------------------*/
155
156StringImpl::
157StringImpl()
158: m_nb_ref(0)
159, m_flags(0)
160{
161}
162
163/*---------------------------------------------------------------------------*/
164/*---------------------------------------------------------------------------*/
165
167bytes()
168{
170 Int64 size = x.size();
171 if (size > 0)
172 return { x.data(), size - 1 };
173 // Should not normally happen, but if it does, we return
174 // a view of the empty string because this method guarantees a
175 // null terminator at the end.
176 // NOTE: No exception is thrown because this method is used in
177 // outputs via operator<< and can be used notably in
178 // destructors of objects.
179 std::cerr << "INTERNAL ERROR: Null size in StringImpl::bytes()";
180 return { reinterpret_cast<const Byte*>(global_empty_string), 0 };
181}
182
183/*---------------------------------------------------------------------------*/
184/*---------------------------------------------------------------------------*/
185
186std::string_view StringImpl::
187toStdStringView()
188{
190 return std::string_view(reinterpret_cast<const char*>(x.data()), x.size());
191}
192
193/*---------------------------------------------------------------------------*/
194/*---------------------------------------------------------------------------*/
195
196StringView StringImpl::
197view()
198{
199 return StringView(bytes());
200}
201
202/*---------------------------------------------------------------------------*/
203/*---------------------------------------------------------------------------*/
204
205void StringImpl::
206addReference()
207{
208 ++m_nb_ref;
209 _checkReference();
210}
211
212/*---------------------------------------------------------------------------*/
213/*---------------------------------------------------------------------------*/
214
215void StringImpl::
216removeReference()
217{
218 _checkReference();
219 Int32 r = --m_nb_ref;
220#ifndef ARCCORE_DEBUG_UNISTRING
221 if (r == 0)
222 delete this;
223#endif
224}
225
226/*---------------------------------------------------------------------------*/
227/*---------------------------------------------------------------------------*/
228
229ConstArrayView<UChar> StringImpl::
230utf16()
231{
232 _checkReference();
233 _createUtf16();
234 return m_utf16_array.view();
235}
236
237/*---------------------------------------------------------------------------*/
238/*---------------------------------------------------------------------------*/
239
241largeUtf8()
242{
243 _checkReference();
244 _createUtf8();
245 return m_utf8_array.view();
246}
247
248/*---------------------------------------------------------------------------*/
249/*---------------------------------------------------------------------------*/
250
251bool StringImpl::
252isEqual(StringImpl* str)
253{
254 _checkReference();
255 _createUtf8();
256 Span<const Byte> ref_array = str->largeUtf8();
257 bool v = CStringUtils::isEqual((const char*)ref_array.data(), (const char*)m_utf8_array.data());
258 return v;
259}
260
261/*---------------------------------------------------------------------------*/
262/*---------------------------------------------------------------------------*/
263
264bool StringImpl::
265isLessThan(StringImpl* str)
266{
267 _checkReference();
268 _createUtf8();
269 if (m_flags & eValidUtf8) {
270 Span<const Byte> ref_array = str->largeUtf8();
271 bool v = CStringUtils::isLess((const char*)m_utf8_array.data(), (const char*)ref_array.data());
272 return v;
273 }
274 ARCCORE_ASSERT((0), ("InternalError in StringImpl::isEqual()"));
275 return true;
276}
277
278/*---------------------------------------------------------------------------*/
279/*---------------------------------------------------------------------------*/
280
281bool StringImpl::
282isEqual(StringView str)
283{
284 _checkReference();
285 _createUtf8();
286 return str.toStdStringView() == toStdStringView();
287}
288
289/*---------------------------------------------------------------------------*/
290/*---------------------------------------------------------------------------*/
291
292bool StringImpl::
293isLessThan(StringView str)
294{
295 _checkReference();
296 _createUtf8();
297 return toStdStringView() < str.toStdStringView();
298}
299
300/*---------------------------------------------------------------------------*/
301/*---------------------------------------------------------------------------*/
302
303StringImpl* StringImpl::
304clone()
305{
306 _checkReference();
307 _createUtf8();
308 StringImpl* n = new StringImpl(*this);
309 return n;
310}
311
312/*---------------------------------------------------------------------------*/
313/*---------------------------------------------------------------------------*/
314
315bool StringImpl::
316empty()
317{
318 _checkReference();
319 if (m_flags & eValidUtf8) {
320 ARCCORE_ASSERT((!m_utf8_array.empty()), ("Not 0 terminated utf8 encoding"));
321 return m_utf8_array.size() <= 1; // Counts the terminal 0
322 }
323 if (m_flags & eValidUtf16) {
324 ARCCORE_ASSERT((!m_utf16_array.empty()), ("Not 0 terminated utf16 encoding"));
325 return m_utf16_array.size() <= 1; // Counts the terminal 0
326 }
327 ARCCORE_ASSERT((0), ("InternalError in StringImpl::empty()"));
328 return false;
329}
330
331/*---------------------------------------------------------------------------*/
332/*---------------------------------------------------------------------------*/
333
334StringImpl* StringImpl::
335append(StringImpl* str)
336{
337 _checkReference();
338 _createUtf8();
339 Span<const Byte> ref_str = str->largeUtf8();
340 _appendUtf8(ref_str);
341 return this;
342}
343
344/*---------------------------------------------------------------------------*/
345/*---------------------------------------------------------------------------*/
346
347StringImpl* StringImpl::
348append(StringView str)
349{
350 Span<const Byte> str_bytes = str.bytes();
351 if (!str_bytes.data())
352 return this;
353
354 _checkReference();
355 _createUtf8();
356
357 _appendUtf8(Span<const Byte>(str_bytes.data(), str_bytes.size() + 1));
358 ;
359 return this;
360}
361
362/*---------------------------------------------------------------------------*/
363/*---------------------------------------------------------------------------*/
364
365void StringImpl::
366_appendUtf8(Span<const Byte> ref_str)
367{
368 Int64 ref_size = ref_str.size();
369 Int64 utf8_size = m_utf8_array.size();
370 Int64 current_size = utf8_size - 1;
371
372 ARCCORE_ASSERT((ref_size > 0), ("Bad ref_size"));
373 ARCCORE_ASSERT((utf8_size > 0), ("Bad utf8_size"));
374 ARCCORE_ASSERT((ref_str[ref_size - 1] == 0), ("Bad ref null terminal"));
375 ARCCORE_ASSERT((m_utf8_array[utf8_size - 1] == 0), ("Bad ref null terminal"));
376
377 m_utf8_array.resize(current_size + ref_size);
378 std::memcpy(&m_utf8_array[current_size], ref_str.data(), ref_size);
379
380 m_flags |= eValidUtf8;
381 _invalidateUtf16();
382}
383
384/*---------------------------------------------------------------------------*/
385/*---------------------------------------------------------------------------*/
386
387StringImpl* StringImpl::
388replaceWhiteSpace()
389{
390 _createUtf8();
391 _invalidateUtf16();
392 Impl::BasicTranscoder::replaceWS(m_utf8_array);
393 return this;
394}
395
396/*---------------------------------------------------------------------------*/
397/*---------------------------------------------------------------------------*/
398
399StringImpl* StringImpl::
400collapseWhiteSpace()
401{
402 _createUtf8();
403 _invalidateUtf16();
404 Impl::BasicTranscoder::collapseWS(m_utf8_array);
405 return this;
406}
407
408/*---------------------------------------------------------------------------*/
409/*---------------------------------------------------------------------------*/
410
411StringImpl* StringImpl::
412toUpper()
413{
414 _createUtf8();
415 _invalidateUtf16();
416 Impl::BasicTranscoder::upperCase(m_utf8_array);
417 return this;
418}
419
420/*---------------------------------------------------------------------------*/
421/*---------------------------------------------------------------------------*/
422
423StringImpl* StringImpl::
424toLower()
425{
426 _createUtf8();
427 _invalidateUtf16();
428 Impl::BasicTranscoder::lowerCase(m_utf8_array);
429 return this;
430}
431
432/*---------------------------------------------------------------------------*/
433/*---------------------------------------------------------------------------*/
434
435StringImpl* StringImpl::
436substring(StringImpl* str, Int64 pos, Int64 len)
437{
438 StringImpl* s = new StringImpl();
439 Impl::BasicTranscoder::substring(s->m_utf8_array, str->largeUtf8(), pos, len);
440 s->m_flags |= eValidUtf8;
441 return s;
442}
443
444/*---------------------------------------------------------------------------*/
445/*---------------------------------------------------------------------------*/
446
447void StringImpl::
448_createUtf16()
449{
450 if (m_flags & eValidUtf16)
451 return;
452
453 if (m_flags & eValidUtf8) {
454 ARCCORE_ASSERT(m_utf16_array.empty(), ("Not empty utf16_array"));
455 Impl::BasicTranscoder::transcodeFromUtf8ToUtf16(m_utf8_array, m_utf16_array);
456 m_flags |= eValidUtf16;
457 return;
458 }
459
460 ARCCORE_ASSERT((0), ("InternalError in StringImpl::_createUtf16()"));
461}
462
463/*---------------------------------------------------------------------------*/
464/*---------------------------------------------------------------------------*/
465
466void StringImpl::
467_createUtf8()
468{
469 if (m_flags & eValidUtf8)
470 return;
471
472 if (m_flags & eValidUtf16) {
473 ARCCORE_ASSERT(m_utf8_array.empty(), ("Not empty utf8_array"));
474 Impl::BasicTranscoder::transcodeFromUtf16ToUtf8(m_utf16_array, m_utf8_array);
475 _finalizeUtf8Creation();
476 return;
477 }
478
479 ARCCORE_ASSERT((0), ("InternalError in StringImpl::_createUtf16()"));
480}
481
482/*---------------------------------------------------------------------------*/
483/*---------------------------------------------------------------------------*/
484
485void StringImpl::
486_setUtf16(Span<const UChar> src)
487{
488 m_utf16_array = src;
489 if (m_utf16_array.empty())
490 m_utf16_array.add(0);
491 else if (m_utf16_array.back() != '\0')
492 m_utf16_array.add(0);
493}
494
495/*---------------------------------------------------------------------------*/
496/*---------------------------------------------------------------------------*/
497
498void StringImpl::
499_invalidateUtf16()
500{
501 m_flags &= ~eValidUtf16;
502 m_utf16_array.clear();
503}
504
505/*---------------------------------------------------------------------------*/
506/*---------------------------------------------------------------------------*/
507
508void StringImpl::
509_invalidateUtf8()
510{
511 m_flags &= ~eValidUtf8;
512 m_utf8_array.clear();
513}
514
515/*---------------------------------------------------------------------------*/
516/*---------------------------------------------------------------------------*/
517
518void StringImpl::
519_printStrUtf16(std::ostream& o, Span<const UChar> str)
520{
521 Int64 buf_size = str.size();
522 o << "(bufsize=" << buf_size
523 << " begin=" << str.data() << " - ";
524 for (Int64 i = 0; i < buf_size; ++i)
525 o << (int)str[i] << ' ';
526 o << ")";
527}
528
529/*---------------------------------------------------------------------------*/
530/*---------------------------------------------------------------------------*/
531
532void StringImpl::
533_printStrUtf8(std::ostream& o, Span<const Byte> str)
534{
535 Int64 buf_size = str.size();
536 o << "(bufsize=" << buf_size << " - ";
537 for (Int64 i = 0; i < buf_size; ++i)
538 o << (int)str[i] << ' ';
539 o << ")";
540}
541
542/*---------------------------------------------------------------------------*/
543/*---------------------------------------------------------------------------*/
544
545void StringImpl::
546internalDump(std::ostream& ostr)
547{
548 ostr << "(utf8=valid=" << ((m_flags & eValidUtf8) != 0)
549 << ",len=" << m_utf8_array.size() << ",val=";
550 _printStrUtf8(ostr, m_utf8_array);
551 ostr << ")";
552
553 ostr << "(utf16=valid=" << ((m_flags & eValidUtf16) != 0)
554 << ",len=" << m_utf16_array.size() << ",val=";
555 _printStrUtf16(ostr, m_utf16_array);
556 ostr << ")";
557}
558
559/*---------------------------------------------------------------------------*/
560/*---------------------------------------------------------------------------*/
561
562} // namespace Arcane
563
564/*---------------------------------------------------------------------------*/
565/*---------------------------------------------------------------------------*/
Constant view of an array of type T.
static void transcodeFromUtf16ToUtf8(Span< const UChar > utf16, CoreArray< Byte > &utf8)
Translates from UTF16 to UTF8.
constexpr __host__ __device__ pointer data() const noexcept
Pointer to the start of the view.
Definition Span.h:539
constexpr __host__ __device__ SizeType size() const noexcept
Returns the size of the array.
Definition Span.h:327
View of an array of elements of type T.
Definition Span.h:635
Span< const Byte > bytes()
same as largeUtf8() but WITHOUT the null terminator
Span< const Byte > largeUtf8()
View of the UTF-8 encoding WITH null terminator.
View of a UTF-8 character string.
Definition StringView.h:44
bool isLess(const char *s1, const char *s2)
Returns true if s1 is less than (alphabetical order) s2 , false otherwise.
bool isEqual(const char *s1, const char *s2)
Returns true if s1 and s2 are identical, false otherwise.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
std::int64_t Int64
Signed integer type of 64 bits.
unsigned char Byte
Type of a byte.
Definition BaseTypes.h:43
std::int32_t Int32
Signed integer type of 32 bits.