Arcane  v4.1.0.0
Documentation développeur
Chargement...
Recherche...
Aucune correspondance
StringImpl.cc
1// -*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
2//-----------------------------------------------------------------------------
3// Copyright 2000-2025 CEA (www.cea.fr) IFPEN (www.ifpenergiesnouvelles.com)
4// See the top-level COPYRIGHT file for details.
5// SPDX-License-Identifier: Apache-2.0
6//-----------------------------------------------------------------------------
7/*---------------------------------------------------------------------------*/
8/* StringImpl.cc (C) 2000-2025 */
9/* */
10/* Implémentation d'une chaîne de caractère UTf-8 ou UTF-16. */
11/*---------------------------------------------------------------------------*/
12/*---------------------------------------------------------------------------*/
13
14#include "arccore/base/internal/StringImpl.h"
15#include "arccore/base/BasicTranscoder.h"
16#include "arccore/base/CStringUtils.h"
17#include "arccore/base/StringView.h"
18
19#include <cstring>
20
21//#define ARCCORE_DEBUG_UNISTRING
22
23/*---------------------------------------------------------------------------*/
24/*---------------------------------------------------------------------------*/
25
26namespace Arcane
27{
28
29/*---------------------------------------------------------------------------*/
30/*---------------------------------------------------------------------------*/
31
32bool global_arccore_debug_string = false;
33
34namespace
35{
36const char* const global_empty_string = "";
37}
38
39/*---------------------------------------------------------------------------*/
40/*---------------------------------------------------------------------------*/
41
42class StringException
43: public std::exception
44{
45 public:
46 StringException(const char* where) : m_where(where) {}
47 ~StringException() ARCCORE_NOEXCEPT {}
48 virtual const char* what() const ARCCORE_NOEXCEPT
49 {
50 return m_where;
51 }
52 private:
53 const char* m_where;
54};
55
56/*---------------------------------------------------------------------------*/
57/*---------------------------------------------------------------------------*/
58
59#ifdef ARCCORE_DEBUG_UNISTRING
60static void
61_badStringImplReference(StringImpl* ptr)
62{
63 cerr << "** FATAL: Trying to use deleted StringImpl " << ptr << '\n';
64}
65#endif
66
67inline void StringImpl::
68_checkReference()
69{
70#ifdef ARCCORE_DEBUG_UNISTRING
71 if (m_nb_ref.value()<=0){
72 _badStringImplReference(this);
73 }
74#endif
75}
76
77/*---------------------------------------------------------------------------*/
78/*---------------------------------------------------------------------------*/
79
80inline void StringImpl::
81_finalizeUtf8Creation()
82{
83 m_flags |= eValidUtf8;
84 // \a m_utf8_array doit toujours avoir un zéro terminal.
85 if (m_utf8_array.empty())
86 m_utf8_array.add('\0');
87 else if (m_utf8_array.back()!='\0')
88 m_utf8_array.add('\0');
89}
90
91/*---------------------------------------------------------------------------*/
92/*---------------------------------------------------------------------------*/
93
94inline void StringImpl::
95_initFromSpan(Span<const Byte> bytes)
96{
97 m_utf8_array = bytes;
98 _finalizeUtf8Creation();
99}
100
101/*---------------------------------------------------------------------------*/
102/*---------------------------------------------------------------------------*/
103
104StringImpl::
105StringImpl(std::string_view str)
106: m_nb_ref(0)
107, m_flags(0)
108{
109 auto b = reinterpret_cast<const Byte*>(str.data());
110 _initFromSpan(Span<const Byte>(b,str.size()));
111}
112
113/*---------------------------------------------------------------------------*/
114/*---------------------------------------------------------------------------*/
115
116StringImpl::
117StringImpl(Span<const UChar> uchars)
118: m_nb_ref(0)
119, m_flags(0)
120{
121 _setUtf16(uchars);
122 m_flags = eValidUtf16;
123}
124
125/*---------------------------------------------------------------------------*/
126/*---------------------------------------------------------------------------*/
127
128StringImpl::
129StringImpl(const StringImpl& str)
130: m_nb_ref(0)
131, m_flags(str.m_flags)
132, m_utf16_array(str.m_utf16_array)
133, m_utf8_array(str.m_utf8_array)
134{
135}
136
137/*---------------------------------------------------------------------------*/
138/*---------------------------------------------------------------------------*/
139
140StringImpl::
141StringImpl(Span<const Byte> bytes)
142: m_nb_ref(0)
143, m_flags(0)
144{
145 _initFromSpan(bytes);
146}
147
148/*---------------------------------------------------------------------------*/
149/*---------------------------------------------------------------------------*/
150
151StringImpl::
152StringImpl()
153: m_nb_ref(0)
154, m_flags(0)
155{
156}
157
158/*---------------------------------------------------------------------------*/
159/*---------------------------------------------------------------------------*/
160
162bytes()
163{
165 Int64 size = x.size();
166 if (size>0)
167 return { x.data(), size-1 };
168 // Ne devrait normalement pas arriver mais si c'est le cas on retourne
169 // une vue sur la chaîne vide car cette méthode garantit qu'il y a un
170 // zéro terminal à la fin.
171 // NOTE: On ne lève pas d'exception car cette méthode est utilisée dans les
172 // sorties via operator<< et cela peut être utilisé notamment dans
173 // les destructeurs des objets.
174 std::cerr << "INTERNAL ERROR: Null size in StringImpl::bytes()";
175 return { reinterpret_cast<const Byte*>(global_empty_string), 0 };
176}
177
178/*---------------------------------------------------------------------------*/
179/*---------------------------------------------------------------------------*/
180
181std::string_view StringImpl::
182toStdStringView()
183{
185 return std::string_view(reinterpret_cast<const char*>(x.data()),x.size());
186}
187
188/*---------------------------------------------------------------------------*/
189/*---------------------------------------------------------------------------*/
190
191StringView StringImpl::
192view()
193{
194 return StringView(bytes());
195}
196
197/*---------------------------------------------------------------------------*/
198/*---------------------------------------------------------------------------*/
199
200void StringImpl::
201addReference()
202{
203 ++m_nb_ref;
204 _checkReference();
205}
206
207/*---------------------------------------------------------------------------*/
208/*---------------------------------------------------------------------------*/
209
210void StringImpl::
211removeReference()
212{
213 _checkReference();
214 Int32 r = --m_nb_ref;
215#ifndef ARCCORE_DEBUG_UNISTRING
216 if (r==0)
217 delete this;
218#endif
219}
220
221/*---------------------------------------------------------------------------*/
222/*---------------------------------------------------------------------------*/
223
224ConstArrayView<UChar> StringImpl::
225utf16()
226{
227 _checkReference();
228 _createUtf16();
229 return m_utf16_array.view().smallView();
230}
231
232/*---------------------------------------------------------------------------*/
233/*---------------------------------------------------------------------------*/
234
236largeUtf8()
237{
238 _checkReference();
239 _createUtf8();
240 return m_utf8_array.view();
241}
242
243/*---------------------------------------------------------------------------*/
244/*---------------------------------------------------------------------------*/
245
246bool StringImpl::
247isEqual(StringImpl* str)
248{
249 _checkReference();
250 _createUtf8();
251 Span<const Byte> ref_array = str->largeUtf8();
252 bool v = CStringUtils::isEqual((const char*)ref_array.data(),(const char*)m_utf8_array.data());
253 return v;
254}
255
256/*---------------------------------------------------------------------------*/
257/*---------------------------------------------------------------------------*/
258
259bool StringImpl::
260isLessThan(StringImpl* str)
261{
262 _checkReference();
263 _createUtf8();
264 if (m_flags & eValidUtf8){
265 Span<const Byte> ref_array = str->largeUtf8();
266 bool v = CStringUtils::isLess((const char*)m_utf8_array.data(),(const char*)ref_array.data());
267 return v;
268 }
269 ARCCORE_ASSERT((0),("InternalError in StringImpl::isEqual()"));
270 return true;
271}
272
273/*---------------------------------------------------------------------------*/
274/*---------------------------------------------------------------------------*/
275
276bool StringImpl::
277isEqual(StringView str)
278{
279 _checkReference();
280 _createUtf8();
281 return str.toStdStringView() == toStdStringView();
282}
283
284/*---------------------------------------------------------------------------*/
285/*---------------------------------------------------------------------------*/
286
287bool StringImpl::
288isLessThan(StringView str)
289{
290 _checkReference();
291 _createUtf8();
292 return toStdStringView() < str.toStdStringView();
293}
294
295/*---------------------------------------------------------------------------*/
296/*---------------------------------------------------------------------------*/
297
298StringImpl* StringImpl::
299clone()
300{
301 _checkReference();
302 _createUtf8();
303 StringImpl* n = new StringImpl(*this);
304 return n;
305}
306
307/*---------------------------------------------------------------------------*/
308/*---------------------------------------------------------------------------*/
309
310bool StringImpl::
311empty()
312{
313 _checkReference();
314 if (m_flags & eValidUtf8) {
315 ARCCORE_ASSERT((!m_utf8_array.empty()),("Not 0 terminated utf8 encoding"));
316 return m_utf8_array.size()<=1; // Décompte le 0 terminal
317 }
318 if (m_flags & eValidUtf16) {
319 ARCCORE_ASSERT((!m_utf16_array.empty()),("Not 0 terminated utf16 encoding"));
320 return m_utf16_array.size()<=1; // Décompte le 0 terminal
321 }
322 ARCCORE_ASSERT((0),("InternalError in StringImpl::empty()"));
323 return false;
324}
325
326/*---------------------------------------------------------------------------*/
327/*---------------------------------------------------------------------------*/
328
329StringImpl* StringImpl::
330append(StringImpl* str)
331{
332 _checkReference();
333 _createUtf8();
334 Span<const Byte> ref_str = str->largeUtf8();
335 _appendUtf8(ref_str);
336 return this;
337}
338
339/*---------------------------------------------------------------------------*/
340/*---------------------------------------------------------------------------*/
341
342StringImpl* StringImpl::
343append(StringView str)
344{
345 Span<const Byte> str_bytes = str.bytes();
346 if (!str_bytes.data())
347 return this;
348
349 _checkReference();
350 _createUtf8();
351
352 _appendUtf8(Span<const Byte>(str_bytes.data(),str_bytes.size() + 1));;
353 return this;
354}
355
356/*---------------------------------------------------------------------------*/
357/*---------------------------------------------------------------------------*/
358
359void StringImpl::
360_appendUtf8(Span<const Byte> ref_str)
361{
362 Int64 ref_size = ref_str.size();
363 Int64 utf8_size = m_utf8_array.size();
364 Int64 current_size = utf8_size - 1;
365
366 ARCCORE_ASSERT((ref_size>0),("Bad ref_size"));
367 ARCCORE_ASSERT((utf8_size>0),("Bad utf8_size"));
368 ARCCORE_ASSERT((ref_str[ref_size-1]==0),("Bad ref null terminal"));
369 ARCCORE_ASSERT((m_utf8_array[utf8_size-1]==0),("Bad ref null terminal"));
370
371 m_utf8_array.resize(current_size + ref_size);
372 std::memcpy(&m_utf8_array[current_size],ref_str.data(),ref_size);
373
374 m_flags |= eValidUtf8;
375 _invalidateUtf16();
376}
377
378/*---------------------------------------------------------------------------*/
379/*---------------------------------------------------------------------------*/
380
381StringImpl* StringImpl::
382replaceWhiteSpace()
383{
384 _createUtf8();
385 _invalidateUtf16();
386 BasicTranscoder::replaceWS(m_utf8_array);
387 return this;
388}
389
390/*---------------------------------------------------------------------------*/
391/*---------------------------------------------------------------------------*/
392
393StringImpl* StringImpl::
394collapseWhiteSpace()
395{
396 _createUtf8();
397 _invalidateUtf16();
398 BasicTranscoder::collapseWS(m_utf8_array);
399 return this;
400}
401
402/*---------------------------------------------------------------------------*/
403/*---------------------------------------------------------------------------*/
404
405StringImpl* StringImpl::
406toUpper()
407{
408 _createUtf8();
409 _invalidateUtf16();
410 BasicTranscoder::upperCase(m_utf8_array);
411 return this;
412}
413
414/*---------------------------------------------------------------------------*/
415/*---------------------------------------------------------------------------*/
416
417StringImpl* StringImpl::
418toLower()
419{
420 _createUtf8();
421 _invalidateUtf16();
422 BasicTranscoder::lowerCase(m_utf8_array);
423 return this;
424}
425
426/*---------------------------------------------------------------------------*/
427/*---------------------------------------------------------------------------*/
428
429StringImpl* StringImpl::
430substring(StringImpl* str,Int64 pos,Int64 len)
431{
432 StringImpl* s = new StringImpl();
433 BasicTranscoder::substring(s->m_utf8_array,str->largeUtf8(),pos,len);
434 s->m_flags |= eValidUtf8;
435 return s;
436}
437
438/*---------------------------------------------------------------------------*/
439/*---------------------------------------------------------------------------*/
440
441void StringImpl::
442_createUtf16()
443{
444 if (m_flags & eValidUtf16)
445 return;
446
447 if (m_flags & eValidUtf8){
448 ARCCORE_ASSERT(m_utf16_array.empty(),("Not empty utf16_array"));
449 BasicTranscoder::transcodeFromUtf8ToUtf16(m_utf8_array,m_utf16_array);
450 m_flags |= eValidUtf16;
451 return;
452 }
453
454 ARCCORE_ASSERT((0),("InternalError in StringImpl::_createUtf16()"));
455}
456
457/*---------------------------------------------------------------------------*/
458/*---------------------------------------------------------------------------*/
459
460void StringImpl::
461_createUtf8()
462{
463 if (m_flags & eValidUtf8)
464 return;
465
466 if (m_flags & eValidUtf16){
467 ARCCORE_ASSERT(m_utf8_array.empty(),("Not empty utf8_array"));
468 BasicTranscoder::transcodeFromUtf16ToUtf8(m_utf16_array,m_utf8_array);
469 _finalizeUtf8Creation();
470 return;
471 }
472
473 ARCCORE_ASSERT((0),("InternalError in StringImpl::_createUtf16()"));
474}
475
476/*---------------------------------------------------------------------------*/
477/*---------------------------------------------------------------------------*/
478
479void StringImpl::
480_setUtf16(Span<const UChar> src)
481{
482 m_utf16_array = src;
483 if (m_utf16_array.empty())
484 m_utf16_array.add(0);
485 else if (m_utf16_array.back()!='\0')
486 m_utf16_array.add(0);
487}
488
489/*---------------------------------------------------------------------------*/
490/*---------------------------------------------------------------------------*/
491
492void StringImpl::
493_invalidateUtf16()
494{
495 m_flags &= ~eValidUtf16;
496 m_utf16_array.clear();
497}
498
499/*---------------------------------------------------------------------------*/
500/*---------------------------------------------------------------------------*/
501
502void StringImpl::
503_invalidateUtf8()
504{
505 m_flags &= ~eValidUtf8;
506 m_utf8_array.clear();
507}
508
509/*---------------------------------------------------------------------------*/
510/*---------------------------------------------------------------------------*/
511
512void StringImpl::
513_printStrUtf16(std::ostream& o,Span<const UChar> str)
514{
515 Int64 buf_size = str.size();
516 o << "(bufsize=" << buf_size
517 << " begin=" << str.data() << " - ";
518 for( Int64 i=0; i<buf_size; ++i )
519 o << (int)str[i] << ' ';
520 o << ")";
521}
522
523/*---------------------------------------------------------------------------*/
524/*---------------------------------------------------------------------------*/
525
526void StringImpl::
527_printStrUtf8(std::ostream& o,Span<const Byte> str)
528{
529 Int64 buf_size = str.size();
530 o << "(bufsize=" << buf_size << " - ";
531 for( Int64 i=0; i<buf_size; ++i )
532 o << (int)str[i] << ' ';
533 o << ")";
534}
535
536/*---------------------------------------------------------------------------*/
537/*---------------------------------------------------------------------------*/
538
539void StringImpl::
540internalDump(std::ostream& ostr)
541{
542 ostr << "(utf8=valid=" << ((m_flags & eValidUtf8)!=0)
543 << ",len=" << m_utf8_array.size() << ",val=";
544 _printStrUtf8(ostr,m_utf8_array);
545 ostr << ")";
546
547 ostr << "(utf16=valid=" << ((m_flags & eValidUtf16)!=0)
548 << ",len=" << m_utf16_array.size() << ",val=";
549 _printStrUtf16(ostr,m_utf16_array);
550 ostr << ")";
551}
552
553/*---------------------------------------------------------------------------*/
554/*---------------------------------------------------------------------------*/
555
556} // End namespace Arccore
557
558/*---------------------------------------------------------------------------*/
559/*---------------------------------------------------------------------------*/
static void transcodeFromUtf16ToUtf8(Span< const UChar > utf16, CoreArray< Byte > &utf8)
Traduit depuis UTF16 vers UTF8.
Vue constante d'un tableau de type T.
constexpr __host__ __device__ SizeType size() const noexcept
Retourne la taille du tableau.
Definition Span.h:305
constexpr __host__ __device__ pointer data() const noexcept
Pointeur sur le début de la vue.
Definition Span.h:517
Vue d'un tableau d'éléments de type T.
Definition Span.h:613
Span< const Byte > bytes()
idem largeUtf8() mais SANS le zéro terminal
Span< const Byte > largeUtf8()
Vue sur l'encodage UTF-8 AVEC zéro terminal.
Vue sur une chaîne de caractères UTF-8.
Definition StringView.h:47
bool isLess(const char *s1, const char *s2)
Retourne true si s1 est inférieur (ordre alphabétique) à s2 , false sinon.
bool isEqual(const char *s1, const char *s2)
Retourne true si s1 et s2 sont identiques, false sinon.
-*- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature -*-
std::int64_t Int64
Type entier signé sur 64 bits.
unsigned char Byte
Type d'un octet.
Definition BaseTypes.h:43
std::int32_t Int32
Type entier signé sur 32 bits.