12#if __DCL_HAVE_ALLOC_DEBUG
13#undef __DCL_ALLOC_LEVEL
14#define __DCL_ALLOC_LEVEL __DCL_ALLOC_INTERNAL
26CharsetConvertException::CharsetConvertException(
int _errorCode)
29 __errorCode = _errorCode;
40 str = L
"Illegal UCS value.. can't convert to multi-bytes";
43 str = L
"Source multi bytes few";
46 str = L
"Illegal bytes sequence";
49 str = L
"Can not convert UCS4 to UCS2";
53 str = L
"Unknown error";
58#define ILLEGAL_UCS4 -1
59#define BUFFER_SMALL -2
67#if __SIZEOF_WCHAR_T__ == 4
70#if __SIZEOF_WCHAR_T__ == 2
74#if __BYTE_ORDER == __BIG_ENDIAN
83int CharsetEncoder::encode(
93 byte_t* dstend = dst + _outCount;
96 const wchar_t* src = _in;
97 const wchar_t* srcend = src + _inCount;
101 while(src < srcend && (dstlen = dstend - dst) > 0) {
110 _inCount = src - _in;
111 _outCount = dst - _out;
129size_t CharsetEncoder::getEncodedLength(
const wchar_t* _wcs,
size_t _wcslen)
132 const wchar_t* _in = _wcs;
133 size_t inTotal = _wcslen;
138 size_t _inCount = inTotal;
140 int r = encode(_in, _inCount,
buf, _outCount);
141 outTotal += _outCount;
156ByteString CharsetEncoder::encode(
const wchar_t* _wcs,
size_t _wcslen)
160 if (_wcslen == (
size_t)-1)
161 _wcslen = String::length(_wcs);
165 const wchar_t* _in = _wcs;
166 size_t _inCount = _wcslen;
168 size_t _outCount = getEncodedLength(_wcs, _wcslen);
170 size_t _outCount = _inCount * 6;
173 ByteBuffer*
buf = ByteBuffer::create(_outCount);
174 int rn = encode(_in, _inCount, (
byte_t*)
buf->data(), _outCount);
180 buf->__dataLength = _outCount;
181 ByteBuffer::shrink(
buf);
189ByteString CharsetEncoder::encode(
const String& _str)
192 return encode(_str, _str.length());
197UTF8Encoder::UTF8Encoder(
205void UTF8Encoder::reset()
210int UTF8Encoder::encode(
219 if (__addBOM && !__addedBOM) {
220 if (_outCount >= 3) {
230 int r = CharsetEncoder::encode(_in, _inCount, _out, _outCount);
240 return CharsetEncoder::encode(_in, _inCount, _out, _outCount);
248 else if (_uc < 0x800)
250 else if (_uc < 0x10000)
252 else if (_uc < 0x200000)
254 else if (_uc < 0x4000000)
256 else if (_uc <= 0x7fffffff)
261 if (_mbslen < (
size_t)count)
265 case 6: _mbs[5] = 0x80 | (_uc & 0x3f); _uc = _uc >> 6; _uc |= 0x4000000;
266 case 5: _mbs[4] = 0x80 | (_uc & 0x3f); _uc = _uc >> 6; _uc |= 0x200000;
267 case 4: _mbs[3] = 0x80 | (_uc & 0x3f); _uc = _uc >> 6; _uc |= 0x10000;
268 case 3: _mbs[2] = 0x80 | (_uc & 0x3f); _uc = _uc >> 6; _uc |= 0x800;
269 case 2: _mbs[1] = 0x80 | (_uc & 0x3f); _uc = _uc >> 6; _uc |= 0xc0;
270 case 1: _mbs[0] = _uc;
277UTF16Encoder::UTF16Encoder(
288void UTF16Encoder::reset()
293int UTF16Encoder::encode(
302 if (__addBOM && !__addedBOM) {
303 if (_outCount >= 2) {
318 return CharsetEncoder::encode(_in, _inCount, _out, _outCount);
324 return CharsetEncoder::encode(_in, _inCount, _out, _outCount);
329 if (_uc != 0xFFFE && !(_uc >= 0xD800 && _uc < 0xE000)) {
334 _mbs[0] = (
unsigned char) (_uc >> 8);
335 _mbs[1] = (
unsigned char) _uc;
338 _mbs[1] = (
unsigned char) (_uc >> 8);
339 _mbs[0] = (
unsigned char) _uc;
346 else if (_uc < 0x110000) {
348 ucs4_t uc1 = 0xd800 + ((_uc - 0x10000) >> 10);
349 ucs4_t uc2 = 0xdc00 + ((_uc - 0x10000) & 0x3FF);
351 _mbs[0] = (
unsigned char) (uc1 >> 8);
352 _mbs[1] = (
unsigned char) uc1;
353 _mbs[2] = (
unsigned char) (uc2 >> 8);
354 _mbs[3] = (
unsigned char) uc2;
357 _mbs[3] = (
unsigned char) (uc1 >> 8);
358 _mbs[2] = (
unsigned char) uc1;
359 _mbs[1] = (
unsigned char) (uc2 >> 8);
360 _mbs[0] = (
unsigned char) uc2;
373UTF32Encoder::UTF32Encoder(
384void UTF32Encoder::reset()
389int UTF32Encoder::encode(
398 if (__addBOM && !__addedBOM) {
399 if (_outCount >= 4) {
418 return CharsetEncoder::encode(_in, _inCount, _out, _outCount);
424 return CharsetEncoder::encode(_in, _inCount, _out, _outCount);
429 if (_uc < 0x110000 && !(_uc >= 0xd800 && _uc < 0xe000)) {
431 if (_uc < 0x110000) {
435 _mbs[1] = (
unsigned char) (_uc >> 16);
436 _mbs[2] = (
unsigned char) (_uc >> 8);
437 _mbs[3] = (
unsigned char) _uc;
441 _mbs[2] = (
unsigned char) (_uc >> 16);
442 _mbs[1] = (
unsigned char) (_uc >> 8);
443 _mbs[0] = (
unsigned char) _uc;
456AsciiEncoder::AsciiEncoder()
471Latin1Encoder::Latin1Encoder()
493 memset(&__mbstate, 0,
sizeof(__mbstate));
507 byte_t* dstend = dst + _outCount;
510 const wchar_t* src = _in;
511 const wchar_t* srcend = src + _inCount;
515 while (src < srcend && (dstlen = dstend - dst) > 0) {
516 n = wcrtomb(aBuf, *src, &__mbstate);
517 if (
n == (
size_t) -1 ||
n > dstlen)
521 strncpy((
char*)dst, aBuf,
n);
527 _inCount = src - _in;
528 _outCount = dst - _out;
530 if (
n == (
size_t) -1)
__DCL_BEGIN_NAMESPACE typedef uint32_t ucs4_t
#define __countof(array, type)
#define __DCL_ASSERT(expr)
#define IMPLEMENT_CLASSINFO(class_name, base_class_name)
void CharsetConvertException *size_t n
virtual int toMultiByte(ucs4_t _uc, byte_t *_mbs, size_t _mbslen)
virtual int toMultiByte(ucs4_t _uc, byte_t *_mbs, size_t _mbslen)=0
virtual String toString() const
virtual int toMultiByte(ucs4_t _uc, byte_t *_mbs, size_t _mbslen)
virtual int encode(const wchar_t *_in, size_t &_inCount, byte_t *_out, size_t &_outCount)
virtual int toMultiByte(ucs4_t _uc, byte_t *_mbs, size_t _mbslen)
virtual int toMultiByte(ucs4_t _uc, byte_t *_mbs, size_t _mbslen)
virtual int toMultiByte(ucs4_t _uc, byte_t *_mbs, size_t _mbslen)
virtual int toMultiByte(ucs4_t _uc, byte_t *_mbs, size_t _mbslen)