#ifndef __DCL_CHARSET_H__
#define __DCL_CHARSET_H__	20071008

#ifndef __DCL_CONFIG_H__
#include <dcl/Config.h>
#endif

#if __DCL_WINDOWS
	#ifndef _MSC_VER
		// MinGW
		#include <wchar.h>
	#endif
#else
	#include <bits/types/mbstate_t.h>
#endif

#ifndef __DCL_OBJECT_H__
#include <dcl/Object.h>
#endif
#ifndef __DCL_EXCEPTION_H__
#include <dcl/Exception.h>
#endif
#ifndef __DCL_STRING_H__
#include <dcl/String.h>
#endif

__DCL_BEGIN_NAMESPACE

typedef uint32_t	ucs4_t;
typedef uint16_t	utf16_t;
typedef uint32_t	utf32_t;

#define IS_UTF8(bom)	((bom[0] == 0xEF) && (bom[1] == 0xBB) && (bom[2] == 0xBF))
#define IS_UTF16BE(bom)	((bom[0] == 0xFE) && (bom[1] == 0xFF))
#define IS_UTF16LE(bom)	((bom[0] == 0xFF) && (bom[1] == 0xFE))
#define IS_UTF16(bom)	(IS_UTF16BE(bom) || IS_UTF16LE(bom))
#define IS_UTF32BE(bom)	((bom[0] == 0x00) && (bom[1] == 0x00) \
								&& (bom[2] == 0xFE) && (bom[3] == 0xFF))
#define IS_UTF32LE(bom)	((bom[0] == 0xFF) && (bom[1] == 0xFE) \
								&& (bom[2] == 0x00) && (bom[3] == 0x00))
#define IS_UTF32(bom)	(IS_UTF32BE(bom) || IS_UTF32LE(bom))

enum Charset
{
	CS_LOCALE	= (int)0,
	CS_ASCII,					// 7bit, US_ASCII
	CS_LATIN1,				// 8bit, ISO-8859-1
	CS_UTF8,					// http://www.faqs.org/rfcs/rfc3629.html
	CS_UTF16,
	CS_UTF32
};

enum UnicodeByteOrder {
	// UTF16, UTF32
	CS_DEFAULT_ENDIAN	= __BYTE_ORDER,		// platform dependent default
	CS_LITTLE_ENDIAN	= __LITTLE_ENDIAN,
	CS_BIG_ENDIAN		= __BIG_ENDIAN
};

enum {
	CS_NOERROR			= 0,
	// encode()
	CS_ILLEGAL_UCS,				// can't UCS4 ==> MB
	// decode()
	CS_SOURCE_FEW,				// source bytes few
	CS_ILLEGAL_SEQUENCE,		// illegal bytes sequence
	CS_ILLEGAL_UCS2				// can't UCS4 ==> UCS2 , on sizeof(wchar_t) == 2
};

class DCLCAPI CharsetConvertException : public Exception
{
	DECLARE_CLASSINFO(CharsetConvertException)
public:
	CharsetConvertException(int _errorCode);
	virtual String toString() const;
protected:
	int __errorCode;
};

class DCLCAPI CharsetEncoder : public Object
{
	DECLARE_CLASSINFO(CharsetEncoder)
public:
	// return: CS_BUFFER_SMALL, CS_ILLEGAL_UCS4
	virtual int encode(				// UCS ==> MB, UTF
		const wchar_t*	_in,		// in: input wide characters
		size_t&			_inCount,	// in: count of input wchars, out: count of processed wchars
		byte_t*			_out,		// out: output buffer
		size_t&			_outCount	// in: buffer size (countof(_out[], byte_t), out: count of converted bytes
		);

	size_t getEncodedLength(const wchar_t* _wcs, size_t _wcslen)
		__DCL_THROWS1(CharsetConvertException*);

	ByteString encode(const wchar_t* _wcs, size_t _wcslen = (size_t)-1)
		__DCL_THROWS1(CharsetConvertException*);

	ByteString encode(const String& _str)
		__DCL_THROWS1(CharsetConvertException*);

protected:
	CharsetEncoder();
	virtual int toMultiByte(ucs4_t _uc, byte_t* _mbs, size_t _mbslen) = 0;
};

class DCLCAPI CharsetDecoder : public Object
{
	DECLARE_CLASSINFO(CharsetDecoder);
public:
	// return: CS_SOURCE_FEW, CS_ILLEGAL_SEQUENCE, CS_ILLEGAL_UCS2
	virtual int decode(				// MB, UTF ==> UCS
		const byte_t*	_in,		// in: input bytes
		size_t&			_inCount,	// in: count of input bytes, out: count of processed bytes
		wchar_t*		_out,		// out: output buffer
		size_t&			_outCount	// in: buffer size (countof(_out[], wchar_t)), out: count of converted wchars
		);

	size_t getDecodedLength(const char* _mbs, size_t _mbslen)
		__DCL_THROWS1(CharsetConvertException*);

	String decode(const char* _mbs, size_t _mbslen = (size_t)-1)
		__DCL_THROWS1(CharsetConvertException*);

	String decode(const ByteString& _str)
		__DCL_THROWS1(CharsetConvertException*);

protected:
	CharsetDecoder();
	virtual int toWideChar(const byte_t* _mbs, size_t _mbslen, ucs4_t* _uc) = 0;
};

class DCLCAPI UTF8Encoder : public CharsetEncoder
{
	DECLARE_CLASSINFO(UTF8Encoder)
public:
	UTF8Encoder(
		bool _addBOM = false			// Byte Order Mark
		);
	void reset();

	virtual int encode(				// UCS ==> MB, UTF
		const wchar_t*	_in,		// in: input wide characters
		size_t&			_inCount,	// in: count of input wchars, out: count of processed wchars
		byte_t*			_out,		// out: output buffer
		size_t&			_outCount	// in: buffer size (countof(_out[], byte_t), out: count of converted bytes
		);

	static ByteString encode(const wchar_t* _wcs, size_t _wcslen)
		__DCL_THROWS1(CharsetConvertException*)
	{
		UTF8Encoder encoder(false);
		return ((CharsetEncoder*)&encoder)->encode(_wcs, _wcslen);
	}

	static ByteString encode(const String& _str)
		__DCL_THROWS1(CharsetConvertException*)
	{
		return UTF8Encoder::encode(_str, _str.length());
	}

	// return countof(byte_t)
	// Unicode 5.0 : countOfWchars * 4 + 3
	// current implementation countOfWchars * 6 + 3
	static size_t maxOutCount(size_t countOfWchars) { return countOfWchars * 4 + 3; }

protected:
	virtual int toMultiByte(ucs4_t _uc, byte_t* _mbs, size_t _mbslen);

private:
	bool	__addBOM;
	bool	__addedBOM;
};

class DCLCAPI UTF8Decoder : public CharsetDecoder
{
	DECLARE_CLASSINFO(UTF8Decoder)
public:
	UTF8Decoder();
	void reset();
	bool hasBOM() const { return __hasBOM; }

	static String decode(const char* _mbs, size_t _mbslen)
		__DCL_THROWS1(CharsetConvertException*)
	{
		UTF8Decoder decoder;
		return ((CharsetDecoder*)&decoder)->decode(_mbs, _mbslen);
	}

	static String decode(const ByteString& _str)
		__DCL_THROWS1(CharsetConvertException*)
	{
		return UTF8Decoder::decode(_str, _str.length());
	}

	// return countof(wchar_t)
	static size_t maxOutCount(size_t _countOfBytes) { return _countOfBytes; }

protected:
	virtual int toWideChar(const byte_t* _mbs, size_t _mbslen, ucs4_t* _uc);

private:
	bool	__hasBOM;					// BOM decoded?
};

class DCLCAPI UTF16Encoder : public CharsetEncoder
{
	DECLARE_CLASSINFO(UTF16Encoder)
public:
	UTF16Encoder(
		bool _addBOM = true,			// Byte Order Mark
		int	_byteOrder = CS_DEFAULT_ENDIAN
		);
	void reset();

	// return countof(utf16_t)
	static size_t maxOutCount(size_t countOfWchars) { return countOfWchars * 2 + 1; }

	virtual int encode(				// UCS ==> MB, UTF
		const wchar_t*	_in,		// in: input wide characters
		size_t&			_inCount,	// in: count of input wchars, out: count of processed wchars
		byte_t*			_out,		// out: output buffer
		size_t&			_outCount	// in: buffer size (countof(_out[], byte_t), out: count of converted bytes
		);

protected:
	virtual int toMultiByte(ucs4_t _uc, byte_t* _mbs, size_t _mbslen);

private:
	bool	__addBOM;
	bool	__addedBOM;
	bool	__bigEndian;
};

class DCLCAPI UTF16Decoder : public CharsetDecoder
{
	DECLARE_CLASSINFO(UTF16Decoder)
public:
	UTF16Decoder(
		int	nDefaultByteOrder = CS_DEFAULT_ENDIAN
		);
	void reset();
	bool hasBOM() const { return __hasBOM; }
	int byteOrder() const { return __bigEndian ? CS_BIG_ENDIAN : CS_LITTLE_ENDIAN; }
	bool byteOrderChanged() const { return __bigEndian != __defaultBigEndian; }

protected:
	virtual int toWideChar(const byte_t* _mbs, size_t _mbslen, ucs4_t* _uc);

private:
	bool	__hasBOM;					// BOM decoded?
	bool	__bigEndian;
	bool	__defaultBigEndian;
};

class DCLCAPI UTF32Encoder : public CharsetEncoder
{
	DECLARE_CLASSINFO(UTF32Encoder)
public:
	UTF32Encoder(
		bool _addBOM = true,			// Byte Order Mark
		int	_byteOrder = CS_DEFAULT_ENDIAN
		);
	void reset();

	virtual int encode(				// UCS ==> MB, UTF
		const wchar_t*	_in,		// in: input wide characters
		size_t&			_inCount,	// in: count of input wchars, out: count of processed wchars
		byte_t*			_out,		// out: output buffer
		size_t&			_outCount	// in: buffer size (countof(_out[], byte_t), out: count of converted bytes
		);

	// return countof(utf32_t)
	static size_t maxOutCount(size_t countOfWchars) { return countOfWchars + 1; }

protected:
	virtual int toMultiByte(ucs4_t _uc, byte_t* _mbs, size_t _mbslen);

private:
	bool	__addBOM;
	bool	__addedBOM;
	bool	__bigEndian;
};

class DCLCAPI UTF32Decoder : public CharsetDecoder
{
	DECLARE_CLASSINFO(UTF32Decoder)
public:
	UTF32Decoder(
		int	nDefaultByteOrder = CS_DEFAULT_ENDIAN
		);
	void reset();
	bool hasBOM() const { return __hasBOM; }
	int byteOrder() const { return __bigEndian ? CS_BIG_ENDIAN : CS_LITTLE_ENDIAN; }
	bool byteOrderChanged() const { return __bigEndian != __defaultBigEndian; }

protected:
	virtual int toWideChar(const byte_t* _mbs, size_t _mbslen, ucs4_t* _uc);

private:
	bool	__hasBOM;					// BOM decoded?
	bool	__bigEndian;
	bool	__defaultBigEndian;
};
	
class DCLCAPI AsciiEncoder : public CharsetEncoder
{
	DECLARE_CLASSINFO(AsciiEncoder)
public:
	AsciiEncoder();

	static ByteString encode(const wchar_t* _wcs, size_t _wcslen)
		__DCL_THROWS1(CharsetConvertException*)
	{
		AsciiEncoder encoder;
		return ((CharsetEncoder*)&encoder)->encode(_wcs, _wcslen);
	}

	static ByteString encode(const String& _str)
		__DCL_THROWS1(CharsetConvertException*)
	{
		return AsciiEncoder::encode(_str, _str.length());
	}

protected:
	virtual int toMultiByte(ucs4_t _uc, byte_t* _mbs, size_t _mbslen);
};

class DCLCAPI AsciiDecoder : public CharsetDecoder
{
	DECLARE_CLASSINFO(AsciiDecoder)
public:
	AsciiDecoder();

protected:
	virtual int toWideChar(const byte_t* _mbs, size_t _mbslen, ucs4_t* _uc);

public:
	static String decode(const char* _mbs, size_t _mbslen = (size_t)-1);
};

class DCLCAPI Latin1Encoder : public CharsetEncoder
{
	DECLARE_CLASSINFO(Latin1Encoder)
public:
	Latin1Encoder();

protected:
	virtual int toMultiByte(ucs4_t _uc, byte_t* _mbs, size_t _mbslen);
};

class DCLCAPI Latin1Decoder : public CharsetDecoder
{
	DECLARE_CLASSINFO(Latin1Decoder)
public:
	Latin1Decoder();

protected:
	virtual int toWideChar(const byte_t* _mbs, size_t _mbslen, ucs4_t* _uc);

public:
	static String decode(const char* _mbs, size_t _nmbs = (size_t)-1);
};

// note: setlocale("", CTYPE);
// locale dependent encoder
class DCLCAPI LocaleEncoder : public CharsetEncoder
{
	DECLARE_CLASSINFO(LocaleEncoder);
public:
	LocaleEncoder();
	virtual void reset();

	// CS_BUFFER_SMALL, CS_ELLEGAL_UCS4
	virtual int encode(					// UCS ==> MB, UTF
		const wchar_t*	_in,			// in: input wide characters
		size_t&			_inCount	,	// in: count of input wchars, out: count of processed wchars
		byte_t*			_out,			// out: output buffer
		size_t&			_outCount	// in: buffer size (countof(_out[], byte_t), out: count of converted bytes
		);

	static ByteString encode(const wchar_t* _wcs, size_t _wcslen)
		__DCL_THROWS1(CharsetConvertException*)
	{
		LocaleEncoder encoder;
		return ((CharsetEncoder*)&encoder)->encode(_wcs, _wcslen);
	}

	static ByteString encode(const String& _str)
		__DCL_THROWS1(CharsetConvertException*)
	{
		return LocaleEncoder::encode(_str, _str.length());
	}

protected:
	virtual int toMultiByte(ucs4_t _uc, byte_t* _mbs, size_t _mbslen);

private:
	mbstate_t	__mbstate;
};

class DCLCAPI LocaleDecoder : public CharsetDecoder
{
	DECLARE_CLASSINFO(LocaleDecoder)
public:
	LocaleDecoder();
	virtual void reset();

	// return: CS_SOURCE_FEW, CS_ILLEGAL_SEQUENCE, CS_ILLEGAL_UCS2
	virtual int decode(					// MB, UTF ==> UCS
		const byte_t*	_in,			// in: input bytes
		size_t&			_inCount,		// in: count of input bytes, out: count of processed bytes
		wchar_t*		_out,			// out: output buffer
		size_t&			_outCount	// in: buffer size (countof(_out[], wchar_t)), out: count of converted wchars
		);

	static String decode(const char* _mbs, size_t _nmbs = (size_t)-1)
		__DCL_THROWS1(CharsetConvertException*)
	{
		LocaleDecoder decoder;
		return ((CharsetDecoder*)&decoder)->decode(_mbs, _nmbs);
	}

	static String decode(const ByteString& _str)
		__DCL_THROWS1(CharsetConvertException*)
	{
		return LocaleDecoder::decode(_str, _str.length());
	}

protected:
	virtual int toWideChar(const byte_t* _mbs, size_t _mbslen, ucs4_t* _uc);

private:
	mbstate_t	__mbstate;
};

__DCL_END_NAMESPACE

#endif	// __DCL_CHARSET_H__
