DCL 3.7.4
Loading...
Searching...
No Matches
Html.cpp
Go to the documentation of this file.
1#include <dcl/Object.h>
2
3#if __DCL_HAVE_ALLOC_DEBUG
4#undef __DCL_ALLOC_LEVEL
5#define __DCL_ALLOC_LEVEL __DCL_ALLOC_INTERNAL
6#endif
7
8#include <dcl/String.h>
9#include <dcl/Array.h>
10#include <dcl/Regex.h>
11#include <dcl/Html.h>
12
13#if __DCL_HAVE_THIS_FILE__
14#undef __THIS_FILE__
15static const char_t* __THIS_FILE__ = __T("dcl/Html.cpp");
16#endif
17
18__DCL_BEGIN_NAMESPACE
19
20// http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters
21// http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
22// http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
23// http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
24//
25// notes!
26// User-Agent가 DTD를 읽어들이지 못하면 다음의 니모닉들이 제대로 표시하지 못할 수 있다.
27// apos의 경우 IE에서 &apos;를 문자열 그대로 표시한다.
28
30{
31 wchar_t ch;
32 const wchar_t* psz;
33 size_t n;
34};
35
36static EntitySpecial __spacialChars[] =
37{
38 { L'<', L"&lt;", 4 },
39 { L'>', L"&gt;", 4 },
40 { L'\"', L"&quot;", 6 },
41 { L'\'', L"&#39;", 5 },
42 { L'\0', NULL, 0 }
43};
44
45static const EntitySpecial* __special(wchar_t c)
46{
47 const EntitySpecial* pEntity = __spacialChars;
48 for( ; pEntity->ch != L'\0'; pEntity++)
49 {
50 if (c == pEntity->ch)
51 return pEntity;
52 }
53 return NULL;
54}
55
56static bool __exists(const wchar_t* psz, wchar_t c)
57{
58 if (psz == NULL)
59 return true;
60
61 for( ; *psz; psz++)
62 {
63 if (*psz == c)
64 return true;
65 }
66 return false;
67}
68
70 const String& _str,
71 const wchar_t* _chars // " <>"'&
72)
73{
74 StringBuilder r;
75
76 const EntitySpecial* pEntity = NULL;
77
78 const wchar_t* pStart = _str.data();
79 const wchar_t* pCurrent = pStart;
80
81 while(*pCurrent)
82 {
83 if (__exists(_chars, *pCurrent)
84 && (pEntity = __special(*pCurrent)))
85 {
86 if (pStart < pCurrent)
87 r.append(pStart, pCurrent - pStart);
88 r.append(pEntity->psz, pEntity->n);
89 pStart = ++pCurrent;
90 }
91 else
92 ++pCurrent;
93 }
94
95 if (pStart < pCurrent)
96 r.append(pStart, pCurrent - pStart);
97
98/*
99 __DCL_TRACE2(
100 "HtmlEntity::escape length src: %d, res: %d\n",
101 nSourceLength,
102 r.length()
103 );
104*/
105
106 return r;
107
108}
109
110static String __GetSpace(int nCount)
111{
112 StringBuilder r;
113 for(int i = 0; i < nCount; i++) {
114 if (i % 2)
115 r.append(L"&nbsp;", 6);
116 else
117 r.append(' ', 1);
118 }
119 return r;
120}
121
123 const String& _str,
124 int _tab2Space,
125 const String& _beginOfLine,
126 const String& _endOfLine
127)
128{
129 StringBuilder r;
130 r = _beginOfLine;
131
132 const EntitySpecial* pEntity = NULL;
133 const wchar_t* pStart = _str.data();
134 const wchar_t* pCurrent = pStart;
135
136 while(*pCurrent) {
137 switch(*pCurrent) {
138 case L'\t' :
139 if (pStart < pCurrent)
140 r.append(pStart, pCurrent - pStart);
141
142 if (_tab2Space > 0) {
143 // TAB을 ' '으로 변환한다.
144 int nSpace = _tab2Space;
145 while(*(++pCurrent)) {
146 if (*pCurrent == L' ')
147 nSpace++;
148 else if (*pCurrent == L'\t')
149 nSpace += _tab2Space;
150 else
151 break;
152 }
153 r += __GetSpace(nSpace);
154 pStart = pCurrent;
155 }
156 else {
157 // TAB을 변환하지않는다.
158 r.append(L'\t', 1);
159 pStart = ++pCurrent;
160 }
161 break;
162 case ' ' :
163 if (pStart < pCurrent)
164 r.append(pStart, pCurrent - pStart);
165
166 if (_tab2Space > 0) {
167 int nSpace = 1;
168 while(*(++pCurrent)) {
169 if (*pCurrent == L' ')
170 nSpace++;
171 else if (*pCurrent == L'\t')
172 nSpace += _tab2Space;
173 else
174 break;
175 }
176 r += __GetSpace(nSpace);
177 pStart = pCurrent;
178 }
179 else {
180 int nSpace = 1;
181 while(*(++pCurrent) == L' ')
182 nSpace++;
183 r += __GetSpace(nSpace);
184 pStart = pCurrent;
185 }
186 break;
187 case '\r' :
188 if (pStart < pCurrent)
189 r.append(pStart, pCurrent - pStart);
190
191 pStart = ++pCurrent;
192 break;
193 case '\n' :
194 if (pStart < pCurrent)
195 r.append(pStart, pCurrent - pStart);
196 r += _endOfLine;
197 r.append(L"\r\n", 2);
198 r += _beginOfLine;
199 pStart = ++pCurrent;
200 break;
201 default :
202 if (__exists(L"<>&", *pCurrent)
203 && (pEntity = __special(*pCurrent))) {
204 // <, >, & 문자를 escape 한다.
205 if (pStart < pCurrent)
206 r.append(pStart, pCurrent - pStart);
207 r.append(pEntity->psz, pEntity->n);
208 pStart = ++pCurrent;
209 }
210 else
211 ++pCurrent;
212 }
213 }
214
215 if (pStart < pCurrent)
216 r.append(pStart, pCurrent - pStart); // 나머지
217
218 r += _endOfLine;
219
220/*
221 __DCL_TRACE2(
222 "HtmlEntity::format length src: %d, res: %d\n",
223 nSourceLength,
224 r.length()
225 );
226*/
227 return r;
228}
229
231 const String& _str,
232 const wchar_t* _elementNames // ',' delimiter ex: "html,head,meta,!"
233)
234{
235 String pattern;
236 StringBuilder r;
237
238 if (_elementNames) {
239 StringBuilder sb;
240 StringArray a;
241 String::split(
242 _elementNames, _elementNames + String::length(_elementNames),
243 L',', a);
244
245 for( StringArray::Iterator it = a.begin();
246 it != a.end(); it++) {
247 (*it).trim();
248 if (!(*it).isEmpty())
249 {
250 if (!pattern.isEmpty())
251 sb += L"|";
252 sb += L"(<+[ \t\r\n]*/*[ \t\r\n]*";
253 sb += *it;
254 sb += L"[ \t\r\n]*/*[^>]*>+)";
255 }
256 }
257 pattern = sb.toString();
258 }
259 else {
260 // NULL 이면 모든 html
261 pattern = L"(<+[^>]*>+)|(&[a-z]+;)";
262 }
263
264 Regex re(pattern, Regex::ICASE);
266 const wchar_t* begin = _str.data();
267 const wchar_t* end = begin + _str.length();
268 while(begin < end && re.search(begin, end, match)) {
269 __DCL_ASSERT(match.size() > 0);
270 r.append(begin, match[0].first);
271 begin = match[0].second;
272 }
273
274 if (begin < end) {
275 r.append(begin, end);
276 }
277
278 return r;
279}
280
281__DCL_END_NAMESPACE
#define __THIS_FILE__
Definition _trace.h:14
#define NULL
Definition Config.h:312
wchar_t char_t
Definition Config.h:247
IOException *size_t r
Definition MediaInfo.cpp:82
#define __DCL_ASSERT(expr)
Definition Object.h:394
#define __T(str)
Definition Object.h:60
static String strip(const String &_str, const wchar_t *_elementNames)
Definition Html.cpp:230
static String format(const String &_str, int _tab2Space, const String &_beginOfLine, const String &_endOfLine)
Definition Html.cpp:122
static String escape(const String &_str, const wchar_t *_chars)
Definition Html.cpp:69
size_t size() const
Definition Regex.h:57
Definition Regex.h:32
@ ICASE
Definition Regex.h:35
bool search(const wchar_t *_begin, const wchar_t *_end, unsigned int _flags=0) __DCL_THROWS1(RegexException *)
Definition Regex.cpp:120
const wchar_t * psz
Definition Html.cpp:32
size_t n
Definition Html.cpp:33
wchar_t ch
Definition Html.cpp:31