14#include "arccore/base/CoreArray.h"
15#include "arccore/base/BasicTranscoder.h"
17#ifdef ARCCORE_HAS_GLIB
32bool _isSpace(
Int32 wc)
34#ifdef ARCCORE_HAS_GLIB
35 return g_unichar_isspace(wc);
37 return std::iswspace(wc);
42#ifdef ARCCORE_HAS_GLIB
43 return g_unichar_toupper(wc);
45 return std::towupper(wc);
50#ifdef ARCCORE_HAS_GLIB
51 return g_unichar_tolower(wc);
53 return std::towlower(wc);
59 std::cout <<
"WARNING: Invalid sequence '" << wc <<
"' in conversion input (position=" << pos <<
")\n";
64int _notEnoughChar(
Int32& wc)
66 std::cout <<
"WARNING: Invalid sequence '" << wc <<
"' in conversion input (unexpected eof)\n";
92 else if (wc < 0x10000)
94 else if (wc < 0x200000)
96 else if (wc < 0x4000000)
102 r[5] = 0x80 | (wc & 0x3f);
107 r[4] = 0x80 | (wc & 0x3f);
112 r[3] = 0x80 | (wc & 0x3f);
117 r[2] = 0x80 | (wc & 0x3f);
122 r[1] = 0x80 | (wc & 0x3f);
129 for (
int i = 0; i < count; ++i)
130 utf8.add((
Byte)r[i]);
148 const Byte* s = uchar.
data() + index;
149 unsigned char c = s[0];
157 return _invalidChar(1, wc);
161 return _notEnoughChar(wc);
162 if (!((s[1] ^ 0x80) < 0x40))
163 return _invalidChar(2, wc);
164 wc = ((
Int32)(c & 0x1f) << 6) | (
Int32)(s[1] ^ 0x80);
170 return _notEnoughChar(wc);
171 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (c >= 0xe1 || s[1] >= 0xa0)))
172 return _invalidChar(4, wc);
173 wc = ((
Int32)(c & 0x0f) << 12) | ((
Int32)(s[1] ^ 0x80) << 6) | (
Int32)(s[2] ^ 0x80);
179 return _notEnoughChar(wc);
180 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (c >= 0xf1 || s[1] >= 0x90)))
181 return _invalidChar(5, wc);
182 wc = ((
Int32)(c & 0x07) << 18) | ((
Int32)(s[1] ^ 0x80) << 12) | ((
Int32)(s[2] ^ 0x80) << 6) | (
Int32)(s[3] ^ 0x80);
192 return _notEnoughChar(wc);
193 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (c >= 0xf9 || s[1] >= 0x88)))
194 return _invalidChar(7, wc);
195 wc = ((
Int32)(c & 0x03) << 24) | ((
Int32)(s[1] ^ 0x80) << 18) | ((
Int32)(s[2] ^ 0x80) << 12) | ((
Int32)(s[3] ^ 0x80) << 6) | (
Int32)(s[4] ^ 0x80);
200 return _notEnoughChar(wc);
201 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40 && (s[5] ^ 0x80) < 0x40 && (c >= 0xfd || s[1] >= 0x84)))
202 return _invalidChar(8, wc);
203 wc = ((
Int32)(c & 0x01) << 30) | ((
Int32)(s[1] ^ 0x80) << 24) | ((
Int32)(s[2] ^ 0x80) << 18) | ((
Int32)(s[3] ^ 0x80) << 12) | ((
Int32)(s[4] ^ 0x80) << 6) | (
Int32)(s[5] ^ 0x80);
206 return _invalidChar(9, wc);
226 if (wc >= 0xd800 && wc < 0xdc00) {
227 if ((index + 1) == uchar.
size()) {
228 std::cout <<
"WARNING: utf16_to_ucs4(): Invalid sequence in conversion input (unexpected eof)\n";
232 Int32 wc2 = uchar[index + 1];
233 if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) {
234 std::cout <<
"WARNING: utf16_to_ucs4(): Invalid sequence (1) '" << wc2 <<
"' in conversion input\n";
238 wc = (0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00));
241 else if (wc >= 0xdc00 && wc < 0xe0000) {
242 std::cout <<
"WARNING: utf16_to_ucs4(): Invalid sequence (2) '" << wc <<
"' in conversion input\n";
271 uchar.add((
UChar)wc);
275 std::cout <<
"WARNING: ucs4_to_utf16(): Invalid sequence in conversion input\n";
280 uchar.add((
UChar)wc);
284 uchar.add( (
UChar) ((wc - 0x10000) / 0x400 + 0xd800) );
285 uchar.add( (
UChar) ((wc - 0x10000) % 0x400 + 0xdc00) );
288 std::cerr <<
"WARNING: ucs4_to_utf16(): Invalid sequence in conversion input\n";
298Int64 BasicTranscoder::
299stringLen(
const UChar* ustr)
301 if (!ustr || ustr[0] == 0)
303 const UChar* u = ustr + 1;
318 i += utf16_to_ucs4(utf16,i,wc);
319 ucs4_to_utf8(wc,utf8);
326void BasicTranscoder::
331 i += utf8_to_ucs4(utf8,i,wc);
332 ucs4_to_utf16(wc,utf16);
339void BasicTranscoder::
345 for (
Int64 i = 0, n = utf8.
size(); i < n;) {
347 i += utf8_to_ucs4(utf8, i, wc);
351 ucs4_to_utf8(wc, out_utf8);
358void BasicTranscoder::
371 bool old_is_space =
true;
372 bool has_spaces_only =
true;
377 i += utf8_to_ucs4(utf8, i, wc);
384 old_is_space =
false;
385 ucs4_to_utf8(wc, out_utf8);
386 has_spaces_only =
false;
389 if (old_is_space && (!has_spaces_only)) {
390 if (out_utf8.
size() > 0)
403void BasicTranscoder::
409 for (
Int64 i = 0, n = utf8.
size(); i < n;) {
411 i += utf8_to_ucs4(utf8, i, wc);
412 Int32 upper_wc = _toUpper(wc);
413 ucs4_to_utf8(upper_wc, out_utf8);
420void BasicTranscoder::
426 for (
Int64 i = 0, n = utf8.
size(); i < n;) {
428 i += utf8_to_ucs4(utf8, i, wc);
429 Int32 upper_wc = _toLower(wc);
430 ucs4_to_utf8(upper_wc, out_utf8);
437void BasicTranscoder::
441 Int64 current_pos = 0;
444 i += utf8_to_ucs4(utf8,i,wc);
445 if (current_pos>=pos && current_pos<(pos+len)){
448 ucs4_to_utf8(wc,out_utf8);
453 ucs4_to_utf8(0,out_utf8);
Lecteur des fichiers de maillage via la bibliothèque LIMA.
static void transcodeFromUtf16ToUtf8(Span< const UChar > utf16, CoreArray< Byte > &utf8)
Traduit depuis UTF16 vers UTF8.
Tableau interne pour Arccore.
Int64 size() const
Retourne la taille du tableau.
constexpr ARCCORE_HOST_DEVICE pointer data() const noexcept
Pointeur sur le début de la vue.
constexpr ARCCORE_HOST_DEVICE SizeType size() const noexcept
Retourne la taille du tableau.
Vue d'un tableau d'éléments de type T.
Espace de nom de Arccore.
unsigned short UChar
Type d'un caractère unicode.
std::int64_t Int64
Type entier signé sur 64 bits.
constexpr ARCCORE_HOST_DEVICE Int64 arccoreCheckLargeArraySize(size_t size)
Vérifie que size peut être converti dans un 'Int64' pour servir de taille à un tableau.
std::int32_t Int32
Type entier signé sur 32 bits.
unsigned char Byte
Type d'un octet.