14#include "arccore/base/internal/StringImpl.h"
15#include "arccore/base/BasicTranscoder.h"
16#include "arccore/base/CStringUtils.h"
17#include "arccore/base/StringView.h"
32bool global_arccore_debug_string =
false;
36 const char*
const global_empty_string =
"";
43:
public std::exception
47 StringException(
const char* where)
50 ~StringException() ARCCORE_NOEXCEPT {}
51 virtual const char* what()
const ARCCORE_NOEXCEPT
64#ifdef ARCCORE_DEBUG_UNISTRING
68 cerr <<
"** FATAL: Trying to use deleted StringImpl " << ptr <<
'\n';
72inline void StringImpl::
75#ifdef ARCCORE_DEBUG_UNISTRING
76 if (m_nb_ref.value() <= 0) {
77 _badStringImplReference(
this);
85inline void StringImpl::
86_finalizeUtf8Creation()
88 m_flags |= eValidUtf8;
90 if (m_utf8_array.empty())
91 m_utf8_array.add(
'\0');
92 else if (m_utf8_array.back() !=
'\0')
93 m_utf8_array.add(
'\0');
99inline void StringImpl::
102 m_utf8_array =
bytes;
103 _finalizeUtf8Creation();
110StringImpl(std::string_view str)
114 auto b =
reinterpret_cast<const Byte*
>(str.data());
115 _initFromSpan(Span<const Byte>(b, str.size()));
127 m_flags = eValidUtf16;
136, m_flags(str.m_flags)
137, m_utf16_array(str.m_utf16_array)
138, m_utf8_array(str.m_utf8_array)
150 _initFromSpan(bytes);
172 return { x.
data(), size - 1 };
179 std::cerr <<
"INTERNAL ERROR: Null size in StringImpl::bytes()";
180 return {
reinterpret_cast<const Byte*
>(global_empty_string), 0 };
186std::string_view StringImpl::
190 return std::string_view(
reinterpret_cast<const char*
>(x.
data()), x.
size());
196StringView StringImpl::
199 return StringView(
bytes());
219 Int32 r = --m_nb_ref;
220#ifndef ARCCORE_DEBUG_UNISTRING
234 return m_utf16_array.view();
245 return m_utf8_array.view();
265isLessThan(StringImpl* str)
269 if (m_flags & eValidUtf8) {
270 Span<const Byte> ref_array = str->largeUtf8();
274 ARCCORE_ASSERT((0), (
"InternalError in StringImpl::isEqual()"));
286 return str.toStdStringView() == toStdStringView();
297 return toStdStringView() < str.toStdStringView();
308 StringImpl* n =
new StringImpl(*
this);
319 if (m_flags & eValidUtf8) {
320 ARCCORE_ASSERT((!m_utf8_array.empty()), (
"Not 0 terminated utf8 encoding"));
321 return m_utf8_array.size() <= 1;
323 if (m_flags & eValidUtf16) {
324 ARCCORE_ASSERT((!m_utf16_array.empty()), (
"Not 0 terminated utf16 encoding"));
325 return m_utf16_array.size() <= 1;
327 ARCCORE_ASSERT((0), (
"InternalError in StringImpl::empty()"));
339 Span<const Byte> ref_str = str->largeUtf8();
340 _appendUtf8(ref_str);
350 Span<const Byte> str_bytes = str.
bytes();
351 if (!str_bytes.data())
357 _appendUtf8(Span<const Byte>(str_bytes.data(), str_bytes.size() + 1));
368 Int64 ref_size = ref_str.size();
369 Int64 utf8_size = m_utf8_array.size();
370 Int64 current_size = utf8_size - 1;
372 ARCCORE_ASSERT((ref_size > 0), (
"Bad ref_size"));
373 ARCCORE_ASSERT((utf8_size > 0), (
"Bad utf8_size"));
374 ARCCORE_ASSERT((ref_str[ref_size - 1] == 0), (
"Bad ref null terminal"));
375 ARCCORE_ASSERT((m_utf8_array[utf8_size - 1] == 0), (
"Bad ref null terminal"));
377 m_utf8_array.resize(current_size + ref_size);
378 std::memcpy(&m_utf8_array[current_size], ref_str.data(), ref_size);
380 m_flags |= eValidUtf8;
392 Impl::BasicTranscoder::replaceWS(m_utf8_array);
404 Impl::BasicTranscoder::collapseWS(m_utf8_array);
416 Impl::BasicTranscoder::upperCase(m_utf8_array);
428 Impl::BasicTranscoder::lowerCase(m_utf8_array);
438 StringImpl* s =
new StringImpl();
439 Impl::BasicTranscoder::substring(s->m_utf8_array, str->largeUtf8(), pos, len);
440 s->m_flags |= eValidUtf8;
450 if (m_flags & eValidUtf16)
453 if (m_flags & eValidUtf8) {
454 ARCCORE_ASSERT(m_utf16_array.empty(), (
"Not empty utf16_array"));
455 Impl::BasicTranscoder::transcodeFromUtf8ToUtf16(m_utf8_array, m_utf16_array);
456 m_flags |= eValidUtf16;
460 ARCCORE_ASSERT((0), (
"InternalError in StringImpl::_createUtf16()"));
469 if (m_flags & eValidUtf8)
472 if (m_flags & eValidUtf16) {
473 ARCCORE_ASSERT(m_utf8_array.empty(), (
"Not empty utf8_array"));
475 _finalizeUtf8Creation();
479 ARCCORE_ASSERT((0), (
"InternalError in StringImpl::_createUtf16()"));
489 if (m_utf16_array.empty())
490 m_utf16_array.add(0);
491 else if (m_utf16_array.back() !=
'\0')
492 m_utf16_array.add(0);
501 m_flags &= ~eValidUtf16;
502 m_utf16_array.clear();
511 m_flags &= ~eValidUtf8;
512 m_utf8_array.clear();
521 Int64 buf_size = str.size();
522 o <<
"(bufsize=" << buf_size
523 <<
" begin=" << str.data() <<
" - ";
524 for (
Int64 i = 0; i < buf_size; ++i)
525 o << (
int)str[i] <<
' ';
535 Int64 buf_size = str.size();
536 o <<
"(bufsize=" << buf_size <<
" - ";
537 for (
Int64 i = 0; i < buf_size; ++i)
538 o << (
int)str[i] <<
' ';
546internalDump(std::ostream& ostr)
548 ostr <<
"(utf8=valid=" << ((m_flags & eValidUtf8) != 0)
549 <<
",len=" << m_utf8_array.size() <<
",val=";
550 _printStrUtf8(ostr, m_utf8_array);
553 ostr <<
"(utf16=valid=" << ((m_flags & eValidUtf16) != 0)
554 <<
",len=" << m_utf16_array.size() <<
",val=";
555 _printStrUtf16(ostr, m_utf16_array);
Constant view of an array of type T.
static void transcodeFromUtf16ToUtf8(Span< const UChar > utf16, CoreArray< Byte > &utf8)
Translates from UTF16 to UTF8.
constexpr __host__ __device__ pointer data() const noexcept
Pointer to the start of the view.
constexpr __host__ __device__ SizeType size() const noexcept
Returns the size of the array.
View of an array of elements of type T.
Implementation of the String class.
Span< const Byte > bytes()
same as largeUtf8() but WITHOUT the null terminator
Span< const Byte > largeUtf8()
View of the UTF-8 encoding WITH null terminator.
View of a UTF-8 character string.
bool isLess(const char *s1, const char *s2)
Returns true if s1 is less than (alphabetical order) s2 , false otherwise.
bool isEqual(const char *s1, const char *s2)
Returns true if s1 and s2 are identical, false otherwise.
-- tab-width: 2; indent-tabs-mode: nil; coding: utf-8-with-signature --
std::int64_t Int64
Signed integer type of 64 bits.
unsigned char Byte
Type of a byte.
std::int32_t Int32
Signed integer type of 32 bits.