DCL 4.0
Loading...
Searching...
No Matches
UTF8Decoder Class Reference

#include <Charset.h>

Inheritance diagram for UTF8Decoder:
CharsetDecoder Object

Protected Member Functions

virtual int toWideChar (const byte_t *_mbs, size_t _mbslen, ucs4_t *_uc)
Protected Member Functions inherited from CharsetDecoder
 CharsetDecoder ()
Protected Member Functions inherited from Object
virtual ~Object ()
 Object ()

Additional Inherited Members

Public Member Functions inherited from CharsetDecoder
virtual int decode (const byte_t *_in, size_t &_inCount, wchar_t *_out, size_t &_outCount)
size_t getDecodedLength (const char *_mbs, size_t _mbslen) __DCL_THROWS1(CharsetConvertException *)
String decode (const char *_mbs, size_t _mbslen=(size_t) -1) __DCL_THROWS1(CharsetConvertException *)
String decode (const ByteString &_str) __DCL_THROWS1(CharsetConvertException *)
Public Member Functions inherited from Object
virtual String toString () const
virtual void destroy ()
String className () const
bool isInstanceOf (const std::type_info &typeinfo) const
virtual const std::type_info & typeInfo () const

Detailed Description

Definition at line 174 of file Charset.h.

Member Function Documentation

◆ toWideChar()

int UTF8Decoder::toWideChar ( const byte_t * _mbs,
size_t _mbslen,
ucs4_t * _uc )
protectedvirtual

Implements CharsetDecoder.

Definition at line 180 of file CharsetDecoder.cpp.

181{
182 int count = 0;
183 while (_mbslen > 0) {
184 byte_t c = _mbs[0];
185
186 if (c < 0x80) {
187 *_uc = c;
188 return count + 1;
189 }
190 else if (c < 0xC2) {
191 return ILLEGAL_SEQUENCE;
192 }
193 else if (c < 0xE0) {
194 if (_mbslen < 2)
195 return SOURCE_FEW_N(0);
196
197 if (!((_mbs[1] ^ 0x80) < 0x40))
198 return ILLEGAL_SEQUENCE;
199
200 *_uc = ((ucs4_t)(c & 0x1F) << 6)
201 | (ucs4_t)(_mbs[1] ^ 0x80);
202
203 return count + 2;
204 }
205 else if (c < 0xF0) {
206 if (_mbslen < 3)
207 return SOURCE_FEW_N(0);
208
209 if (!((_mbs[1] ^ 0x80) < 0x40 && (_mbs[2] ^ 0x80) < 0x40
210 && (c >= 0xE1 || _mbs[1] >= 0xA0)))
211 return ILLEGAL_SEQUENCE;
212
213 ucs4_t ucTemp = ((ucs4_t) (c & 0x0F) << 12)
214 | ((ucs4_t) (_mbs[1] ^ 0x80) << 6)
215 | (ucs4_t) (_mbs[2] ^ 0x80);
216 if (ucTemp == 0xFEFF) {
217 // BOM
218 __hasBOM = true;
219 _mbslen -= 3;
220 _mbs += 3;
221 }
222 else {
223 *_uc = ucTemp;
224 return count + 3;
225 }
226 }
227 else if (c < 0xF8) {
228 if (_mbslen < 4)
229 return SOURCE_FEW_N(0);
230
231 if (!((_mbs[1] ^ 0x80) < 0x40 && (_mbs[2] ^ 0x80) < 0x40
232 && (_mbs[3] ^ 0x80) < 0x40 && (c >= 0xF1 || _mbs[1] >= 0x90)))
233 return ILLEGAL_SEQUENCE;
234
235 *_uc = ((ucs4_t) (c & 0x07) << 18)
236 | ((ucs4_t) (_mbs[1] ^ 0x80) << 12)
237 | ((ucs4_t) (_mbs[2] ^ 0x80) << 6)
238 | (ucs4_t) (_mbs[3] ^ 0x80);
239
240 return count + 4;
241 }
242 else if (c < 0xFC) {
243 if (_mbslen < 5)
244 return SOURCE_FEW_N(0);
245
246 if (!((_mbs[1] ^ 0x80) < 0x40 && (_mbs[2] ^ 0x80) < 0x40
247 && (_mbs[3] ^ 0x80) < 0x40 && (_mbs[4] ^ 0x80) < 0x40
248 && (c >= 0xF9 || _mbs[1] >= 0x88)))
249 return ILLEGAL_SEQUENCE;
250
251 *_uc = ((ucs4_t) (c & 0x03) << 24)
252 | ((ucs4_t) (_mbs[1] ^ 0x80) << 18)
253 | ((ucs4_t) (_mbs[2] ^ 0x80) << 12)
254 | ((ucs4_t) (_mbs[3] ^ 0x80) << 6)
255 | (ucs4_t) (_mbs[4] ^ 0x80);
256
257 return count + 5;
258 }
259 else if (c < 0xFE) {
260 if (_mbslen < 6)
261 return SOURCE_FEW_N(0);
262
263 if (!((_mbs[1] ^ 0x80) < 0x40 && (_mbs[2] ^ 0x80) < 0x40
264 && (_mbs[3] ^ 0x80) < 0x40 && (_mbs[4] ^ 0x80) < 0x40
265 && (_mbs[5] ^ 0x80) < 0x40
266 && (c >= 0xFD || _mbs[1] >= 0x84)))
267 return ILLEGAL_SEQUENCE;
268
269 *_uc = ((ucs4_t) (c & 0x01) << 30)
270 | ((ucs4_t) (_mbs[1] ^ 0x80) << 24)
271 | ((ucs4_t) (_mbs[2] ^ 0x80) << 18)
272 | ((ucs4_t) (_mbs[3] ^ 0x80) << 12)
273 | ((ucs4_t) (_mbs[4] ^ 0x80) << 6)
274 | (ucs4_t) (_mbs[5] ^ 0x80);
275
276 return count + 6;
277 }
278 else
279 return ILLEGAL_SEQUENCE;
280 }
281 return SOURCE_FEW_N(0);
282}
__DCL_BEGIN_NAMESPACE typedef uint32_t ucs4_t
Definition Charset.h:29
#define ILLEGAL_SEQUENCE
#define SOURCE_FEW_N(_mbslen)
unsigned char byte_t
Definition Config.h:274

The documentation for this class was generated from the following files: