DCL 4.0
Loading...
Searching...
No Matches
ID3v2.cpp
Go to the documentation of this file.
1#include <dcl/Config.h>
2
3#include <ctype.h>
4#include <stdlib.h>
5#include <string.h>
6
7#if __DCL_WINDOWS
8#include <windows.h>
9#endif
10
11#include <dcl/size_t.h>
12#include <dcl/String.h>
13#include <dcl/Charset.h>
14#include <dcl/File.h>
15
16#include "ID3v2.h"
17
18#define __TRACE_THIS 0
19#if __TRACE_THIS
20#define __DCL_TRACE1_N __DCL_TRACE1
21#define __DCL_TRACE2_N __DCL_TRACE2
22#define __DCL_TRACE3_N __DCL_TRACE3
23#define __DCL_TRACE4_N __DCL_TRACE4
24#else
25#define __DCL_TRACE1_N(fmt, arg)
26#define __DCL_TRACE2_N(fmt, arg1, arg2)
27#define __DCL_TRACE3_N(fmt, arg1, arg2, arg3)
28#define __DCL_TRACE4_N(fmt, arg1, arg2, arg3, arg4)
29#endif
30
31__DCL_BEGIN_NAMESPACE
32
33#if __DCL_DEBUG
34#undef __THIS_FILE__
35static const char_t __THIS_FILE__[] = __T("media/ID3v2.cpp");
36#endif
37
39
40ID3v2::ID3v2()
41 : __header{'\0', }, __data(NULL), __fsbits(0)
42{
43}
44
45ID3v2::~ID3v2()
46{
47 if (__data) {
48 free(__data);
49 __data = NULL;
50 }
51
52 for (size_t i = 0; i < __frames.size(); i++) {
53 delete (ID3v2Frame*)__frames[i];
54 }
55}
56
57uint32_t ID3v2::word7(const char* _bytes, size_t _n)
58{
59 uint32_t r = 0;
60 const char* end = _bytes + _n;
61 while (_bytes < end) {
62 r = r << 7 | (0x7f & *_bytes++);
63 }
64 return r;
65}
66
67uint32_t ID3v2::word8(const char* _bytes, size_t _n)
68{
69 uint32_t r = 0;
70 const char* end = _bytes + _n;
71 while (_bytes < end) {
72 // r = r << 7 | (0x7f & *_bytes++);
73 // spec에는 최상위 비트를 사용하지 않는다고 되어 있는데
74 // 사용하는 경우가 있다. 버그를 허용한다.
75 r = r << 8 | ((byte_t)*_bytes++);
76 }
77 return r;
78}
79
80bool __IS_UPPER(const char* _begin, const char* _end)
81{
82 while (_begin < _end) {
83 if (!(isupper(*_begin) || isdigit(*_begin))) {
84 return false;
85 }
86 _begin++;
87 }
88 return true;
89}
90
91int __find_frame_size_bits(char _version,
92 const char* _begin, const char* _end, bool _debug)
93{
94 size_t __3_or_4 = 3;
95 size_t __6_or_10 = 6;
96 if (_version > 2) {
97 __3_or_4 = 4;
98 __6_or_10 = 10;
99 }
100
101 // 8bits를 검사한다.
102 const char* p = _begin;
103 bool valid = true;
104 while (p + __6_or_10 < _end && *p != '\0') {
105 if (__IS_UPPER(p, p + __3_or_4)) {
106 String name = AsciiDecoder::decode(p, __3_or_4);
107 uint32_t size = ID3v2::word8(p + __3_or_4, __3_or_4);
108 if (_debug) {
109 __DCL_TRACE2(L"[%ls][%d]\n", name.data(), size);
110 }
111 p += size + __6_or_10;
112 }
113 else {
114 if (_debug) {
115 __DCL_TRACE0(L"false\n");
116 }
117 valid = false;
118 break;
119 }
120 }
121
122 if (p > _end || (p < _end && *p != '\0')) {
123 // 마지막 프레임의 size로 인해 p가 _end 지나쳤거나
124 // padding 위치가 아니면
125 if (_debug) {
126 __DCL_TRACE3(L"false [%p][%p][%x]\n", p, _end, *p);
127 }
128 valid = false;
129 }
130
131 if (valid) {
132 return 8;
133 }
134
135 // 7bits를 검사한다.
136 p = _begin;
137 valid = true;
138 while (p + __6_or_10 < _end && *p != '\0') {
139 if (__IS_UPPER(p, p + __3_or_4)) {
140 String name = AsciiDecoder::decode(p, __3_or_4);
141 uint32_t size = ID3v2::word7(p + __3_or_4, __3_or_4);
142 if (_debug) {
143 __DCL_TRACE2(L"[%ls][%d]\n", name.data(), size);
144 }
145 p += size + __6_or_10;
146 }
147 else {
148 if (_debug) {
149 __DCL_TRACE0(L"false\n");
150 }
151 valid = false;
152 break;
153 }
154 }
155
156 if (p > _end || (p < _end && *p != '\0')) {
157 // 마지막 프레임의 size로 인해 p가 _end 지나쳤거나
158 // padding 위치가 아니면
159 if (_debug) {
160 __DCL_TRACE3(L"false [%p][%p][%x]\n", p, _end, *p);
161 }
162 valid = false;
163 }
164
165 if (valid) {
166 return 7;
167 }
168
169 return 0;
170}
171
172bool ID3v2::read(File& _file, const char _header[10])
173{
174 char buf[10];
175 if (_header == NULL) {
176 // 파일의 처음으로 이동한다.
177 _file.seek(0, File::BEGIN);
178 size_t n = _file.read(buf, 10);
179 if (n < 10) {
180 return false;
181 }
182 _header = buf;
183 }
184 else {
185 _file.seek(10, File::BEGIN);
186 }
187
188 __DCL_TRACE1_N(L"[%ls]\n", String::tryString(_header, 10).data());
189 if (!(_header[0] == 'I' && _header[1] == 'D' && _header[2] == '3'
190 && _header[3] <= 4 // version 4
191 // _header[4] revision
192 // _header[5] flags
193 && _header[6] < 0x80 && _header[7] < 0x80 // size
194 && _header[8] < 0x80 && _header[9] < 0x80)) {
195 return false;
196 }
197
198 __ID3 = AsciiDecoder::decode(&_header[0], 3);
199 __size = word7(&_header[6], 4);
200
201 __data = (char*) malloc(__size);
202 size_t n = _file.read(__data, __size);
203 if (n < __size) {
204 return false;
205 }
206
207 memcpy(__header, _header, 10);
208 __DCL_TRACE1_N(L"%ls\n", toString().data());
209 __DCL_TRACE1_N(L"data [%ls]\n", String::tryString(__data, n, 200).data());
210
211 const char* _begin = __data;
212 const char* _end = _begin + n;
213
214 // Extended header 있으면 이것을 SKIP 한다.
215 if (Extended_header(flags())) {
216 _begin += skipExtendedHeader(_begin, _end);
217 }
218
219 // 프레임을 검사한다.
220 __fsbits = __find_frame_size_bits(version(), _begin, _end, false);
221 if (__fsbits == 0) {
222 __DCL_TRACE1(L"ID3v2 Invalid Frames [%ls]\n", _file.path().data());
223 __fsbits = __find_frame_size_bits(version(), _begin, _end, true);
224 return false;
225 }
226
227 // 프레임을 읽는다.
228 // 최소 프레임 헤더는 6 또는 10 bytes 이어야 한다.
229 // Padding은 $00으로 체워져 있다.
230 size_t __3_or_4 = 3;
231 size_t __6_or_10 = 6;
232 if (version() > 2) {
233 __3_or_4 = 4;
234 __6_or_10 = 10;
235 }
236 while ((_begin + __6_or_10 < _end) && *_begin != '\0') {
237 if ('A' <= *_begin && *_begin <= 'Z') {
238 ID3v2Frame* frame = new ID3v2Frame(*this);
239 _begin += frame->read(_begin, _end);
240#ifdef __DCL_DEBUG
241 try {
242 __DCL_TRACE1_N(L"[%ls]\n", frame->toString().data());
243 }
244 catch (Exception* e) {
245 __DCL_TRACE1_N(L"%ls\n", e->toStringAll().data());
246 __DCL_TRACE1_N(L"[%ls]\n", String::tryString(_begin, _end - _begin, 80).data());
247 e->destroy();
248 }
249#endif
250 __frames.add(frame);
251 }
252 else {
253 __DCL_TRACE1(L"ID3v2 Frame Read Failed [%ls]\n", _file.path().data());
254 // throw new GenerialException(L"ID3v2 Frame Read Failed ["
255 // + _file.path() + L"]");
256 break;
257 }
258 }
259
260 return true;
261}
262
263String ID3v2::toString() const
264{
265 return String::format(L""
266 "%ls version[%d] revision[%d] flags[%x] size[%d] fsbits[%d]",
267 ID3().data(), version(), revision(), flags(), size(), fsbits()
268 );
269}
270
272 const char* _begin,
273 const char* _end
274 ) const
275{
276 // 최소 6 bytes 되어야 한다.
277 __DCL_ASSERT(_begin + 6 <= _end);
278 uint32_t r = word7(&_begin[0], 4);
279 __DCL_TRACE1(L"Number of flag bytes [%d]\n", (int)_begin[4]);
280 return r;
281}
282
284
285ID3v2Frame::ID3v2Frame(const ID3v2& _tag)
286 : __tag(_tag)
287{
288 __size = 0;
289 __flags = 0;
290 __encoding = 0;
291 __type = 0;
292}
293
294String ID3v2Frame::decode(
295 char _encoding,
296 const char* _mbs, size_t _nmbs
297 ) const
298{
299 String r;
300 try {
301 switch (_encoding) {
302 default:
303 case 0: {
304 Latin1Decoder decoder;
305 r = decoder.decode(_mbs, _nmbs);
306 break;
307 }
308 case 1: {
309 if (_nmbs == 2 && (byte_t)*_mbs == 0xFF) {
310 /*
311 UTF-16에서 BOM이 포함되면, 길이가 0인 문자열이
312 "$FF $FE $00 $00"의 바이트로 표현된다.
313 UTF16Decoder::decodede에 BOM "$FF $FE" "$FE $FF"만 주어지면
314 CS_SOURCE_FEW 예외가 발생한다.
315 UTF16Decoder::decodede에 "$FF $FE $00 $00"을 적용하면
316 L"\0" 길이가 1인 String을 반환한다.
317 _nmbs는 $00 직전까지를 이므로, 이 경우는 제외한다.
318 */
319 break;
320 }
321 UTF16Decoder decoder(CS_LITTLE_ENDIAN);
322 r = decoder.decode(_mbs, _nmbs);
323 break;
324 }
325 case 2: {
326 if (_nmbs == 2 && (byte_t)*_mbs == 0xFE) {
327 // "$FE $FF"를 제외한다.
328 break;
329 }
330 UTF16Decoder decoder(CS_BIG_ENDIAN);
331 r = decoder.decode(_mbs, _nmbs);
332 break;
333 }
334 case 3: {
335 UTF8Decoder decoder;
336 r = decoder.decode(_mbs, _nmbs);
337 break;
338 }
339 }
340 }
341 catch (Exception* _e) {
342 __DCL_TRACE3_N(L"%ls[%zd][%ls]\n", _e->toStringAll().data(),
343 _nmbs, String::tryString(_mbs, _nmbs).data());
344 _e->destroy();
345 }
346 return r;
347}
348
349size_t __decode_length__(char _encoding, const char* _psz, size_t _max)
350{
351 __DCL_ASSERT_PARAM(_psz != NULL);
352 size_t r = 0;
353 if (_encoding == 1 || _encoding == 2) {
354 // UTF-16 LE (1), UTF-16 BE (2)
355 while ((*_psz++ | *_psz++) && r < _max) {
356 r++; r++;
357 }
358 return r < _max ? r: _max;
359 }
360 else {
361 // ISO-8859-1 (0), UTF-8 (3)
362 // cf. ByteString::length(const char* _psz, size_t _max)
363 while (*_psz++ && r < _max) {
364 r++;
365 }
366 }
367 return r;
368}
369
370size_t __skip_length__(char _encoding, size_t _n)
371{
372 // UTF-16은 $00 $00, ASCII ISO-8858-1 UTF-8는 $00
373 return _encoding == 1 || _encoding == 2 ? _n + 2 : _n + 1;
374}
375
376bool __valid_language_code(const char _lang[3])
377{
378 // https://www.loc.gov/standards/iso639-2/
379 return (_lang[0] == 'X' && _lang[1] == 'X' && _lang[2] == 'X') ||
380 (islower(_lang[0]) && islower(_lang[1]) && islower(_lang[2]));
381}
382
383size_t ID3v2Frame::read(const char* _begin, const char* _end)
384{
385 size_t __6_OR_10__ = __tag.version() == 2 ? 6 : 10;
386 // 프레임 헤더를 읽는다.
387 // 최소 __6_OR_10__ bytes보다 커야 한다.
388 __DCL_ASSERT(_begin + __6_OR_10__ < _end);
389 char FIRST = *_begin;
390 if (__tag.version() == 2) {
391 __id = AsciiDecoder::decode(_begin, 3); // Frame ID 3 bytes
392 __size = word(_begin + 3, 3); // size 3 bytes
393 }
394 else {
395 // 2.3, 2.4
396 __id = AsciiDecoder::decode(_begin, 4); // Frame ID 4 bytes
397 __size = word(_begin + 4, 4); // size 4 bytes
398 __flags = *(_begin + 8); // flags 2 bytes
399 __flags = __flags << 8 | *(_begin + 9);
400 }
401
402#define __IF_ENCODING__ \
403 if (_begin < _end) { \
404 __encoding = *_begin++; \
405 }
406#define __IF_LANGUAGE__ \
407 if (_begin + 3 < _end && __valid_language_code(_begin)) { \
408 __url = AsciiDecoder::decode(_begin, 3).trim(); \
409 _begin += 3; \
410 }
411#define __IF_DESCRIPTION__ \
412 if (_begin < _end) { \
413 size_t n = __decode_length__(__encoding, _begin, _end - _begin); \
414 __description = decode(__encoding, _begin, n).trim(); \
415 _begin += __skip_length__(__encoding, n); \
416 }
417#define __IF_TEXT__ \
418 if (_begin < _end) { \
419 size_t n = __decode_length__(__encoding, _begin, _end - _begin); \
420 __text = decode(__encoding, _begin, n).trim(); \
421 _begin += __skip_length__(__encoding, n); \
422 }
423#define __IF_URL__ \
424 if (_begin < _end) { \
425 size_t n = ByteString::length(_begin, _end - _begin); \
426 __url = decode(0, _begin, n).trim(); \
427 _begin += n + 1; \
428 }
429#define __IF_TYPE__ \
430 if (_begin < _end) { \
431 __type = *_begin++; \
432 }
433#define __IF_BINARY__ \
434 if (_begin < _end) { \
435 __binary.assign(_begin, _end - _begin); \
436 }
437
438 _begin += __6_OR_10__;
439 _end = _begin + __size;
440 switch (FIRST) {
441 default: {
442 // "ENCR" "CRM" Owner identifier, Moehod symbol, Encryption data (b)
443 // "EQU2" "EQU" Interpolation method, Identification
444 // "ETCO" "ETC" Time stamp format
445 // "LINK" "LNK" Frame identifier, URL, ID
446 // "MCDI" "MCI" CD TOC
447 // "MLLT" "MLL" MPEG frames between reference, ...
448 // "OWNE" Text encoding, Price paid, ... Seller
449 // "RBUF" "BUF" Buffer size, Embedded info flag, Offset to next tag
450 // "RVA2" "RVA" Identification
451 // "RVRB" "REV" Reverb left (ms), ...
452 // "SEEK" Minium offset to next tag
453 // "SIGN" Group symbol, Signature (b)
454 // "SYLT" "SLT" Text encoding, Language, Time stamp format, ...
455 // "SYTC" "STC" Time stamp format, Temp data (b)
457 __DCL_TRACE3_N(L"[%ls][%zd][%ls]\n",
458 __id.data(), __size, String::toHexString(__binary, 40).data());
459 __DCL_TRACE1_N(L"[%ls]\n", String::tryString(__binary, 40).data());
460 break;
461 }
462 case 'A': {
463 if (__id == L"AENC") { // Audio encryption
464 __IF_URL__ // Owner identifier
465 __IF_BINARY__ // Preview start, Preview length, Encryption info
466 }
467 else if (__id == L"APIC") {
468 uint32_t dataLength = 0;
469 if (__flags & 0x01) {
470 dataLength = word(_begin, 4);
471 _begin += 4;
472 }
473 __DCL_TRACE4_N(L"[%ls][%d][%d][%ls]\n", __id.data(), __size,
474 dataLength, String::tryString(_begin, __size, 40).data());
476 __IF_URL__ // MIME type or URL
477 __IF_TYPE__ // Picture type
479 if (_begin < _end) {// Picture data
480 __binary.assign(_begin,
481 dataLength > 0 ? dataLength : _end - _begin);
482 }
483 }
484 else {
485 // ASPI
487 }
488 break;
489 }
490 case 'C': {
491 if (__id == L"COMM" || __id == L"COM") {
496 }
497 else if (__id == L"CRA") { // Audio encryption
498 __IF_URL__ // Owner identifier
499 __IF_BINARY__ // Preview start, Preview length, Encryption info
500 }
501 else {
502 // COMR
503 __IF_BINARY__ // Text encoding, Price string, ... Seller logo
504 }
505 break;
506 }
507 case 'G': {
508 if (__id == L"GEOB" || __id == L"GEO") {
510 __IF_URL__ // MIME type or URL
511 __IF_TEXT__ // Filename
513 __IF_BINARY__ // Encapsulated object
514 }
515 else {
516 // GRID
517 __IF_BINARY__ // Owner identifier, Group symbol, Group dependent data
518 }
519 break;
520 }
521 case 'P': {
522 if (__id == L"PIC") {
524 if (_begin < _end) {
525 __url = AsciiDecoder::decode(_begin, 3);
526 _begin += 3;
527 }
528 __IF_TYPE__ // Picture type
530 __IF_BINARY__ // Picture data
531 }
532 else if (__id == L"PRIV") {
533 __IF_URL__ // Owner identifier
534 __IF_BINARY__ // The private data (b)
535 }
536 else {
537 // "PCNT" "CNT" Counter
538 // "POPM" "POP" Email to user, Rating, Counter
539 // "POSS" Time stamp format, Position
541 }
542 break;
543 }
544 case 'I': {
545 if (__id.compare(L"IPL", 3) != 0) { // Involved people list
547 break;
548 }
549 // IPLS(3), IPL(2), TIPL(4)
550 }
551 case 'T': {
553 if (__id.compare(L"TXX", 3) == 0) {
555 }
557 break;
558 }
559 case 'U': {
560 // UFID USER USLT
561 if (__id.compare(L"UFI", 3) == 0) {
563 if (_begin < _end) {
564 // up to 64 bytes binary data
565 size_t n = _end - _begin;
566 __binary.assign(_begin, __MIN(64, n));
567 if (n <= 64) {
568 __text = String::tryString(__binary);
569 }
570 }
571 break;
572 }
575
576 if (__id == L"USLT" || __id == L"ULT") {
578 }
580 break;
581 }
582 case 'W': {
583 if (__id.compare(L"WXX", 3)) {
586 }
588 break;
589 }
590 }
591
592 // 프레임 데이터 크기 + 프레임 헤더 6 또는 10 bytes
593 return __size + __6_OR_10__;
594}
595
597{
598 StringBuilder sb(__id);
599 sb.format(L" size[%d] flags[%x] encoding[%d] type[%d]",
601 if (!__url.isEmpty()) {
602 sb.format(L" url[%ls]", __url.data());
603 }
604 if (!__description.isEmpty()) {
605 sb.format(L" description[%ls]", __description.data());
606 }
607 if (!__text.isEmpty()) {
608 sb.format(L" text[%ls]", __text.data());
609 }
610 if (!__binary.isEmpty()) {
611 sb.format(L" binary[%zd][%ls]", __binary.length(),
612 String::tryString(__binary, 40).data());
613 }
614 return sb;
615}
616
617__DCL_END_NAMESPACE
#define __THIS_FILE__
Definition _trace.h:14
@ CS_BIG_ENDIAN
Definition Charset.h:57
@ CS_LITTLE_ENDIAN
Definition Charset.h:56
#define NULL
Definition Config.h:340
wchar_t char_t
Definition Config.h:275
unsigned char byte_t
Definition Config.h:274
#define __DCL_TRACE1_N(fmt, arg)
#define __DCL_TRACE3_N(fmt, arg1, arg2, arg3)
#define __IF_TYPE__
bool __valid_language_code(const char _lang[3])
Definition ID3v2.cpp:376
#define __IF_TEXT__
#define __IF_URL__
bool __IS_UPPER(const char *_begin, const char *_end)
Definition ID3v2.cpp:80
size_t __skip_length__(char _encoding, size_t _n)
Definition ID3v2.cpp:370
#define __IF_ENCODING__
#define __IF_DESCRIPTION__
size_t __decode_length__(char _encoding, const char *_psz, size_t _max)
Definition ID3v2.cpp:349
int __find_frame_size_bits(char _version, const char *_begin, const char *_end, bool _debug)
Definition ID3v2.cpp:91
#define __IF_LANGUAGE__
#define __IF_BINARY__
#define Extended_header(flags)
Definition ID3v2.h:91
#define __DCL_TRACE4_N(fmt, arg1, arg2, arg3, arg4)
Definition IFXField.cpp:41
#define __DCL_TRACE0(psz)
Definition Object.h:375
#define __DCL_TRACE1(fmt, arg1)
Definition Object.h:376
#define __DCL_ASSERT_PARAM(expr)
Definition Object.h:384
#define __DCL_TRACE3(fmt, arg1, arg2, arg3)
Definition Object.h:378
#define __DCL_ASSERT(expr)
Definition Object.h:371
#define IMPLEMENT_CLASSINFO(class_name, base_class_name)
Definition Object.h:228
#define __T(str)
Definition Object.h:44
#define __DCL_TRACE2(fmt, arg1, arg2)
Definition Object.h:377
ByteString r
ByteBuffer * buf
void CharsetConvertException *size_t n
Definition SQLField.cpp:253
static String decode(const char *_mbs, size_t _mbslen=(size_t) -1)
virtual void destroy()
Definition Exception.cpp:74
String toStringAll() const
Definition Exception.cpp:45
Definition File.h:38
@ BEGIN
Definition File.h:205
off_t seek(off_t _offset, int _whence) __DCL_THROWS1(IOException *)
Definition File.cpp:590
virtual size_t read(void *_buf, size_t _n) __DCL_THROWS1(IOException *)
Definition File.cpp:476
const String & path() const
Definition File.h:247
String __url
Definition ID3v2.h:196
ID3v2Frame(const ID3v2 &_tag)
uint32_t word(const char *_bytes, size_t _n)
Definition ID3v2.h:128
short __flags
Definition ID3v2.h:188
const ID3v2 & __tag
Definition ID3v2.h:180
ByteString __binary
Definition ID3v2.h:199
char __encoding
Definition ID3v2.h:191
String __id
Definition ID3v2.h:183
String __text
Definition ID3v2.h:198
uint32_t __size
Definition ID3v2.h:186
String __description
Definition ID3v2.h:197
virtual String toString() const
Definition ID3v2.cpp:596
size_t read(const char *_begin, const char *_end)
Definition ID3v2.cpp:383
char __type
Definition ID3v2.h:193
Definition ID3v2.h:16
unsigned __fsbits
Definition ID3v2.h:97
char __header[10]
Definition ID3v2.h:95
size_t skipExtendedHeader(const char *_begin, const char *_end) const
Definition ID3v2.cpp:271
char * __data
Definition ID3v2.h:100
String __ID3
Definition ID3v2.h:105
uint32_t __size
Definition ID3v2.h:110
PointerArray __frames
Definition ID3v2.h:112
static String decode(const char *_mbs, size_t _nmbs=(size_t) -1)
virtual String toString() const
Definition Object.cpp:187
size_t __MIN(size_t x, size_t y)
Definition size_t.h:27