DCL 4.0
Loading...
Searching...
No Matches
Html.cpp
Go to the documentation of this file.
1#include <dcl/Object.h>
2
3#if __DCL_HAVE_ALLOC_DEBUG
4#undef __DCL_ALLOC_LEVEL
5#define __DCL_ALLOC_LEVEL __DCL_ALLOC_INTERNAL
6#endif
7
8#include <dcl/String.h>
9#include <dcl/Array.h>
10#include <dcl/Regex.h>
11#include <dcl/Html.h>
12
13#if __DCL_DEBUG
14#undef __THIS_FILE__
15static const char_t* __THIS_FILE__ = __T("dcl/Html.cpp");
16#endif
17
18__DCL_BEGIN_NAMESPACE
19
20// http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
21// http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
22// http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
23// http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
24//
25// notes!
26// User-Agent가 DTD를 읽어들이지 못하면 다음의 니모닉들이 제대로 표시하지 못할 수 있다.
27// apos의 경우 IE에서 &apos;를 문자열 그대로 표시한다.
28
30{
31 wchar_t ch;
32 const wchar_t* psz;
33 size_t n;
34};
35
36static EntitySpecial __spacialChars[] =
37{
38 { L'<', L"&lt;", 4 },
39 { L'>', L"&gt;", 4 },
40 { L'\"', L"&quot;", 6 },
41 { L'\'', L"&#39;", 5 },
42 { L'\0', NULL, 0 }
43};
44
45static const EntitySpecial* __special(wchar_t c)
46{
47 const EntitySpecial* pEntity = __spacialChars;
48 for( ; pEntity->ch != L'\0'; pEntity++)
49 {
50 if (c == pEntity->ch)
51 return pEntity;
52 }
53 return NULL;
54}
55
56static bool __exists(const wchar_t* psz, wchar_t c)
57{
58 if (psz == NULL)
59 return true;
60
61 for( ; *psz; psz++)
62 {
63 if (*psz == c)
64 return true;
65 }
66 return false;
67}
68
70 const String& _str,
71 const wchar_t* _chars // " <>"'&
72 )
73{
74 StringBuilder r;
75
76 const EntitySpecial* pEntity = NULL;
77
78 const wchar_t* pStart = _str.data();
79 const wchar_t* pCurrent = pStart;
80
81 while(*pCurrent)
82 {
83 if (__exists(_chars, *pCurrent)
84 && (pEntity = __special(*pCurrent)))
85 {
86 if (pStart < pCurrent)
87 r.append(pStart, pCurrent - pStart);
88 r.append(pEntity->psz, pEntity->n);
89 pStart = ++pCurrent;
90 }
91 else
92 ++pCurrent;
93 }
94
95 if (pStart < pCurrent)
96 r.append(pStart, pCurrent - pStart);
97
98/*
99 __DCL_TRACE2(
100 "HtmlEntity::escape length src: %d, res: %d\n",
101 nSourceLength,
102 r.length()
103 );
104*/
105
106 return r;
107
108}
109
110static String __GetSpace(int nCount)
111{
112 StringBuilder r;
113 for(int i = 0; i < nCount; i++)
114 {
115 if (i % 2)
116 r.append(L"&nbsp;", 6);
117 else
118 r.append(' ', 1);
119 }
120 return r;
121}
122
124 const String& _str,
125 int _tab2Space,
126 const String& _beginOfLine,
127 const String& _endOfLine
128 )
129{
130 StringBuilder r;
131 r = _beginOfLine;
132
133 const EntitySpecial* pEntity = NULL;
134 const wchar_t* pStart = _str.data();
135 const wchar_t* pCurrent = pStart;
136
137 while(*pCurrent) {
138 switch(*pCurrent) {
139 case L'\t' :
140 if (pStart < pCurrent)
141 r.append(pStart, pCurrent - pStart);
142
143 if (_tab2Space > 0) {
144 // TAB을 ' '으로 변환한다.
145 int nSpace = _tab2Space;
146 while(*(++pCurrent)) {
147 if (*pCurrent == L' ')
148 nSpace++;
149 else if (*pCurrent == L'\t')
150 nSpace += _tab2Space;
151 else
152 break;
153 }
154 r += __GetSpace(nSpace);
155 pStart = pCurrent;
156 }
157 else {
158 // TAB을 변환하지않는다.
159 r.append(L'\t', 1);
160 pStart = ++pCurrent;
161 }
162 break;
163 case ' ' :
164 if (pStart < pCurrent)
165 r.append(pStart, pCurrent - pStart);
166
167 if (_tab2Space > 0) {
168 int nSpace = 1;
169 while(*(++pCurrent)) {
170 if (*pCurrent == L' ')
171 nSpace++;
172 else if (*pCurrent == L'\t')
173 nSpace += _tab2Space;
174 else
175 break;
176 }
177 r += __GetSpace(nSpace);
178 pStart = pCurrent;
179 }
180 else {
181 int nSpace = 1;
182 while(*(++pCurrent) == L' ')
183 nSpace++;
184 r += __GetSpace(nSpace);
185 pStart = pCurrent;
186 }
187 break;
188 case '\r' :
189 if (pStart < pCurrent)
190 r.append(pStart, pCurrent - pStart);
191
192 pStart = ++pCurrent;
193 break;
194 case '\n' :
195 if (pStart < pCurrent)
196 r.append(pStart, pCurrent - pStart);
197 r += _endOfLine;
198 r.append(L"\r\n", 2);
199 r += _beginOfLine;
200 pStart = ++pCurrent;
201 break;
202 default :
203 if (__exists(L"<>&", *pCurrent)
204 && (pEntity = __special(*pCurrent))) {
205 // <, >, & 문자를 escape 한다.
206 if (pStart < pCurrent)
207 r.append(pStart, pCurrent - pStart);
208 r.append(pEntity->psz, pEntity->n);
209 pStart = ++pCurrent;
210 }
211 else
212 ++pCurrent;
213 }
214 }
215
216 if (pStart < pCurrent)
217 r.append(pStart, pCurrent - pStart); // 나머지
218
219 r += _endOfLine;
220
221/*
222 __DCL_TRACE2(
223 "HtmlEntity::format length src: %d, res: %d\n",
224 nSourceLength,
225 r.length()
226 );
227*/
228 return r;
229}
230
232 const String& _str,
233 const wchar_t* _elementNames // ',' delimiter ex: "html,head,meta,!"
234 )
235{
236 String pattern;
237 StringBuilder r;
238
239 if (_elementNames) {
240 StringBuilder sb;
241 StringArray a;
242 String::split(
243 _elementNames, _elementNames + String::length(_elementNames),
244 L',', a);
245
246 for( StringArray::Iterator it = a.begin();
247 it != a.end(); it++) {
248 (*it).trim();
249 if (!(*it).isEmpty())
250 {
251 if (!pattern.isEmpty())
252 sb += L"|";
253 sb += L"(<+[ \t\r\n]*/*[ \t\r\n]*";
254 sb += *it;
255 sb += L"[ \t\r\n]*/*[^>]*>+)";
256 }
257 }
258 pattern = sb.toString();
259 }
260 else {
261 // NULL 이면 모든 html
262 pattern = L"(<+[^>]*>+)|(&[a-z]+;)";
263 }
264
265 Regex re(pattern, Regex::ICASE);
267 const wchar_t* begin = _str.data();
268 const wchar_t* end = begin + _str.length();
269 while(begin < end && re.search(begin, end, match)) {
270 __DCL_ASSERT(match.size() > 0);
271 r.append(begin, match[0].first);
272 begin = match[0].second;
273 }
274
275 if (begin < end) {
276 r.append(begin, end);
277 }
278
279 return r;
280}
281
282__DCL_END_NAMESPACE
#define __THIS_FILE__
Definition _trace.h:14
#define NULL
Definition Config.h:340
wchar_t char_t
Definition Config.h:275
#define __DCL_ASSERT(expr)
Definition Object.h:371
#define __T(str)
Definition Object.h:44
ByteString r
static String strip(const String &_str, const wchar_t *_elementNames)
Definition Html.cpp:231
static String format(const String &_str, int _tab2Space, const String &_beginOfLine, const String &_endOfLine)
Definition Html.cpp:123
static String escape(const String &_str, const wchar_t *_chars)
Definition Html.cpp:69
size_t size() const
Definition Regex.h:57
Definition Regex.h:32
@ ICASE
Definition Regex.h:35
bool search(const wchar_t *_begin, const wchar_t *_end, unsigned int _flags=0) __DCL_THROWS1(RegexException *)
Definition Regex.cpp:122
const wchar_t * psz
Definition Html.cpp:32
size_t n
Definition Html.cpp:33
wchar_t ch
Definition Html.cpp:31