38 template <
typename In>
39 In Utf<8>::decode(In begin, In end, Uint32& output, Uint32 replacement)
42 static const int trailing[256] =
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
53 static const Uint32 offsets[6] =
55 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
59 int trailingBytes = trailing[
static_cast<Uint8
>(*begin)];
60 if (begin + trailingBytes < end)
63 switch (trailingBytes)
65 case 5 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
66 case 4 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
67 case 3 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
68 case 2 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
69 case 1 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
70 case 0 : output +=
static_cast<Uint8
>(*begin++);
72 output -= offsets[trailingBytes];
86 template <
typename Out>
87 Out Utf<8>::encode(Uint32 input, Out output, Uint8 replacement)
90 static const Uint8 firstBytes[7] =
92 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
96 if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
100 *output++ = replacement;
107 int bytestoWrite = 1;
108 if (input < 0x80) bytestoWrite = 1;
109 else if (input < 0x800) bytestoWrite = 2;
110 else if (input < 0x10000) bytestoWrite = 3;
111 else if (input <= 0x0010FFFF) bytestoWrite = 4;
115 switch (bytestoWrite)
117 case 4 : bytes[3] =
static_cast<Uint8
>((input | 0x80) & 0xBF); input >>= 6;
118 case 3 : bytes[2] =
static_cast<Uint8
>((input | 0x80) & 0xBF); input >>= 6;
119 case 2 : bytes[1] =
static_cast<Uint8
>((input | 0x80) & 0xBF); input >>= 6;
120 case 1 : bytes[0] =
static_cast<Uint8
> (input | firstBytes[bytestoWrite]);
124 const Uint8* currentByte = bytes;
125 switch (bytestoWrite)
127 case 4 : *output++ = *currentByte++;
128 case 3 : *output++ = *currentByte++;
129 case 2 : *output++ = *currentByte++;
130 case 1 : *output++ = *currentByte++;
139 template <
typename In>
140 In Utf<8>::next(In begin, In end)
143 return decode(begin, end, codepoint);
148 template <
typename In>
149 std::size_t Utf<8>::count(In begin, In end)
151 std::size_t length = 0;
154 begin = next(begin, end);
163 template <
typename In,
typename Out>
164 Out Utf<8>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
168 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
169 output = encode(codepoint, output);
177 template <
typename In,
typename Out>
178 Out Utf<8>::fromWide(In begin, In end, Out output)
182 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
183 output = encode(codepoint, output);
191 template <
typename In,
typename Out>
192 Out Utf<8>::fromLatin1(In begin, In end, Out output)
197 output = encode(*begin++, output);
204 template <
typename In,
typename Out>
205 Out Utf<8>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
210 begin = decode(begin, end, codepoint);
211 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
219 template <
typename In,
typename Out>
220 Out Utf<8>::toWide(In begin, In end, Out output,
wchar_t replacement)
225 begin = decode(begin, end, codepoint);
226 output = Utf<32>::encodeWide(codepoint, output, replacement);
234 template <
typename In,
typename Out>
235 Out Utf<8>::toLatin1(In begin, In end, Out output,
char replacement)
242 begin = decode(begin, end, codepoint);
243 *output++ = codepoint < 256 ? static_cast<char>(codepoint) : replacement;
251 template <
typename In,
typename Out>
252 Out Utf<8>::toUtf8(In begin, In end, Out output)
255 *output++ = *begin++;
262 template <
typename In,
typename Out>
263 Out Utf<8>::toUtf16(In begin, In end, Out output)
268 begin = decode(begin, end, codepoint);
269 output = Utf<16>::encode(codepoint, output);
277 template <
typename In,
typename Out>
278 Out Utf<8>::toUtf32(In begin, In end, Out output)
283 begin = decode(begin, end, codepoint);
284 *output++ = codepoint;
292 template <
typename In>
293 In Utf<16>::decode(In begin, In end, Uint32& output, Uint32 replacement)
295 Uint16 first = *begin++;
298 if ((first >= 0xD800) && (first <= 0xDBFF))
302 Uint32 second = *begin++;
303 if ((second >= 0xDC00) && (second <= 0xDFFF))
306 output =
static_cast<Uint32
>(((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000);
311 output = replacement;
318 output = replacement;
332 template <
typename Out>
333 Out Utf<16>::encode(Uint32 input, Out output, Uint16 replacement)
338 if ((input >= 0xD800) && (input <= 0xDFFF))
342 *output++ = replacement;
347 *output++ =
static_cast<Uint16
>(input);
350 else if (input > 0x0010FFFF)
354 *output++ = replacement;
360 *output++ =
static_cast<Uint16
>((input >> 10) + 0xD800);
361 *output++ =
static_cast<Uint16
>((input & 0x3FFUL) + 0xDC00);
369 template <
typename In>
370 In Utf<16>::next(In begin, In end)
373 return decode(begin, end, codepoint);
378 template <
typename In>
379 std::size_t Utf<16>::count(In begin, In end)
381 std::size_t length = 0;
384 begin = next(begin, end);
393 template <
typename In,
typename Out>
394 Out Utf<16>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
398 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
399 output = encode(codepoint, output);
407 template <
typename In,
typename Out>
408 Out Utf<16>::fromWide(In begin, In end, Out output)
412 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
413 output = encode(codepoint, output);
421 template <
typename In,
typename Out>
422 Out Utf<16>::fromLatin1(In begin, In end, Out output)
427 *output++ = *begin++;
434 template <
typename In,
typename Out>
435 Out Utf<16>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
440 begin = decode(begin, end, codepoint);
441 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
449 template <
typename In,
typename Out>
450 Out Utf<16>::toWide(In begin, In end, Out output,
wchar_t replacement)
455 begin = decode(begin, end, codepoint);
456 output = Utf<32>::encodeWide(codepoint, output, replacement);
464 template <
typename In,
typename Out>
465 Out Utf<16>::toLatin1(In begin, In end, Out output,
char replacement)
471 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
480 template <
typename In,
typename Out>
481 Out Utf<16>::toUtf8(In begin, In end, Out output)
486 begin = decode(begin, end, codepoint);
487 output = Utf<8>::encode(codepoint, output);
495 template <
typename In,
typename Out>
496 Out Utf<16>::toUtf16(In begin, In end, Out output)
499 *output++ = *begin++;
506 template <
typename In,
typename Out>
507 Out Utf<16>::toUtf32(In begin, In end, Out output)
512 begin = decode(begin, end, codepoint);
513 *output++ = codepoint;
521 template <
typename In>
522 In Utf<32>::decode(In begin, In , Uint32& output, Uint32 )
530 template <
typename Out>
531 Out Utf<32>::encode(Uint32 input, Out output, Uint32 )
539 template <
typename In>
540 In Utf<32>::next(In begin, In )
547 template <
typename In>
548 std::size_t Utf<32>::count(In begin, In end)
555 template <
typename In,
typename Out>
556 Out Utf<32>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
559 *output++ = decodeAnsi(*begin++, locale);
566 template <
typename In,
typename Out>
567 Out Utf<32>::fromWide(In begin, In end, Out output)
570 *output++ = decodeWide(*begin++);
577 template <
typename In,
typename Out>
578 Out Utf<32>::fromLatin1(In begin, In end, Out output)
583 *output++ = *begin++;
590 template <
typename In,
typename Out>
591 Out Utf<32>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
594 output = encodeAnsi(*begin++, output, replacement, locale);
601 template <
typename In,
typename Out>
602 Out Utf<32>::toWide(In begin, In end, Out output,
wchar_t replacement)
605 output = encodeWide(*begin++, output, replacement);
612 template <
typename In,
typename Out>
613 Out Utf<32>::toLatin1(In begin, In end, Out output,
char replacement)
619 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
628 template <
typename In,
typename Out>
629 Out Utf<32>::toUtf8(In begin, In end, Out output)
632 output = Utf<8>::encode(*begin++, output);
638 template <
typename In,
typename Out>
639 Out Utf<32>::toUtf16(In begin, In end, Out output)
642 output = Utf<16>::encode(*begin++, output);
649 template <
typename In,
typename Out>
650 Out Utf<32>::toUtf32(In begin, In end, Out output)
653 *output++ = *begin++;
660 template <
typename In>
661 Uint32 Utf<32>::decodeAnsi(In input,
const std::locale& locale)
668 #if defined(SFML_SYSTEM_WINDOWS) && \
669 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
670 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
674 wchar_t character = 0;
675 mbtowc(&character, &input, 1);
676 return static_cast<Uint32
>(character);
681 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
684 return static_cast<Uint32
>(facet.widen(input));
691 template <
typename In>
692 Uint32 Utf<32>::decodeWide(In input)
705 template <
typename Out>
706 Out Utf<32>::encodeAnsi(Uint32 codepoint, Out output,
char replacement,
const std::locale& locale)
713 #if defined(SFML_SYSTEM_WINDOWS) && \
714 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
715 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
720 if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
721 *output++ = character;
722 else if (replacement)
723 *output++ = replacement;
730 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
733 *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
742 template <
typename Out>
743 Out Utf<32>::encodeWide(Uint32 codepoint, Out output,
wchar_t replacement)
751 switch (
sizeof(
wchar_t))
755 *output++ =
static_cast<wchar_t>(codepoint);
761 if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
763 *output++ =
static_cast<wchar_t>(codepoint);
765 else if (replacement)
767 *output++ = replacement;