1 | /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd |
2 | See the file COPYING for copying permission. |
3 | */ |
4 | |
5 | /* This file is included! */ |
6 | #ifdef XML_TOK_IMPL_C |
7 | |
8 | #ifndef IS_INVALID_CHAR |
9 | #define IS_INVALID_CHAR(enc, ptr, n) (0) |
10 | #endif |
11 | |
12 | #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
13 | case BT_LEAD ## n: \ |
14 | if (end - ptr < n) \ |
15 | return XML_TOK_PARTIAL_CHAR; \ |
16 | if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
17 | *(nextTokPtr) = (ptr); \ |
18 | return XML_TOK_INVALID; \ |
19 | } \ |
20 | ptr += n; \ |
21 | break; |
22 | |
23 | #define INVALID_CASES(ptr, nextTokPtr) \ |
24 | INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
25 | INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
26 | INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
27 | case BT_NONXML: \ |
28 | case BT_MALFORM: \ |
29 | case BT_TRAIL: \ |
30 | *(nextTokPtr) = (ptr); \ |
31 | return XML_TOK_INVALID; |
32 | |
33 | #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
34 | case BT_LEAD ## n: \ |
35 | if (end - ptr < n) \ |
36 | return XML_TOK_PARTIAL_CHAR; \ |
37 | if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
38 | *nextTokPtr = ptr; \ |
39 | return XML_TOK_INVALID; \ |
40 | } \ |
41 | ptr += n; \ |
42 | break; |
43 | |
44 | #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
45 | case BT_NONASCII: \ |
46 | if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
47 | *nextTokPtr = ptr; \ |
48 | return XML_TOK_INVALID; \ |
49 | } \ |
50 | case BT_NMSTRT: \ |
51 | case BT_HEX: \ |
52 | case BT_DIGIT: \ |
53 | case BT_NAME: \ |
54 | case BT_MINUS: \ |
55 | ptr += MINBPC(enc); \ |
56 | break; \ |
57 | CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
58 | CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
59 | CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
60 | |
61 | #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
62 | case BT_LEAD ## n: \ |
63 | if (end - ptr < n) \ |
64 | return XML_TOK_PARTIAL_CHAR; \ |
65 | if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
66 | *nextTokPtr = ptr; \ |
67 | return XML_TOK_INVALID; \ |
68 | } \ |
69 | ptr += n; \ |
70 | break; |
71 | |
72 | #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
73 | case BT_NONASCII: \ |
74 | if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
75 | *nextTokPtr = ptr; \ |
76 | return XML_TOK_INVALID; \ |
77 | } \ |
78 | case BT_NMSTRT: \ |
79 | case BT_HEX: \ |
80 | ptr += MINBPC(enc); \ |
81 | break; \ |
82 | CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
83 | CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
84 | CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
85 | |
86 | #ifndef PREFIX |
87 | #define PREFIX(ident) ident |
88 | #endif |
89 | |
90 | |
91 | #define HAS_CHARS(enc, ptr, end, count) \ |
92 | (end - ptr >= count * MINBPC(enc)) |
93 | |
94 | #define HAS_CHAR(enc, ptr, end) \ |
95 | HAS_CHARS(enc, ptr, end, 1) |
96 | |
97 | #define REQUIRE_CHARS(enc, ptr, end, count) \ |
98 | { \ |
99 | if (! HAS_CHARS(enc, ptr, end, count)) { \ |
100 | return XML_TOK_PARTIAL; \ |
101 | } \ |
102 | } |
103 | |
104 | #define REQUIRE_CHAR(enc, ptr, end) \ |
105 | REQUIRE_CHARS(enc, ptr, end, 1) |
106 | |
107 | |
108 | /* ptr points to character following "<!-" */ |
109 | |
110 | static int PTRCALL |
111 | PREFIX(scanComment)(const ENCODING *enc, const char *ptr, |
112 | const char *end, const char **nextTokPtr) |
113 | { |
114 | if (HAS_CHAR(enc, ptr, end)) { |
115 | if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
116 | *nextTokPtr = ptr; |
117 | return XML_TOK_INVALID; |
118 | } |
119 | ptr += MINBPC(enc); |
120 | while (HAS_CHAR(enc, ptr, end)) { |
121 | switch (BYTE_TYPE(enc, ptr)) { |
122 | INVALID_CASES(ptr, nextTokPtr) |
123 | case BT_MINUS: |
124 | ptr += MINBPC(enc); |
125 | REQUIRE_CHAR(enc, ptr, end); |
126 | if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) { |
127 | ptr += MINBPC(enc); |
128 | REQUIRE_CHAR(enc, ptr, end); |
129 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
130 | *nextTokPtr = ptr; |
131 | return XML_TOK_INVALID; |
132 | } |
133 | *nextTokPtr = ptr + MINBPC(enc); |
134 | return XML_TOK_COMMENT; |
135 | } |
136 | break; |
137 | default: |
138 | ptr += MINBPC(enc); |
139 | break; |
140 | } |
141 | } |
142 | } |
143 | return XML_TOK_PARTIAL; |
144 | } |
145 | |
146 | /* ptr points to character following "<!" */ |
147 | |
148 | static int PTRCALL |
149 | PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, |
150 | const char *end, const char **nextTokPtr) |
151 | { |
152 | REQUIRE_CHAR(enc, ptr, end); |
153 | switch (BYTE_TYPE(enc, ptr)) { |
154 | case BT_MINUS: |
155 | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
156 | case BT_LSQB: |
157 | *nextTokPtr = ptr + MINBPC(enc); |
158 | return XML_TOK_COND_SECT_OPEN; |
159 | case BT_NMSTRT: |
160 | case BT_HEX: |
161 | ptr += MINBPC(enc); |
162 | break; |
163 | default: |
164 | *nextTokPtr = ptr; |
165 | return XML_TOK_INVALID; |
166 | } |
167 | while (HAS_CHAR(enc, ptr, end)) { |
168 | switch (BYTE_TYPE(enc, ptr)) { |
169 | case BT_PERCNT: |
170 | REQUIRE_CHARS(enc, ptr, end, 2); |
171 | /* don't allow <!ENTITY% foo "whatever"> */ |
172 | switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
173 | case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
174 | *nextTokPtr = ptr; |
175 | return XML_TOK_INVALID; |
176 | } |
177 | /* fall through */ |
178 | case BT_S: case BT_CR: case BT_LF: |
179 | *nextTokPtr = ptr; |
180 | return XML_TOK_DECL_OPEN; |
181 | case BT_NMSTRT: |
182 | case BT_HEX: |
183 | ptr += MINBPC(enc); |
184 | break; |
185 | default: |
186 | *nextTokPtr = ptr; |
187 | return XML_TOK_INVALID; |
188 | } |
189 | } |
190 | return XML_TOK_PARTIAL; |
191 | } |
192 | |
193 | static int PTRCALL |
194 | PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, |
195 | const char *end, int *tokPtr) |
196 | { |
197 | int upper = 0; |
198 | *tokPtr = XML_TOK_PI; |
199 | if (end - ptr != MINBPC(enc)*3) |
200 | return 1; |
201 | switch (BYTE_TO_ASCII(enc, ptr)) { |
202 | case ASCII_x: |
203 | break; |
204 | case ASCII_X: |
205 | upper = 1; |
206 | break; |
207 | default: |
208 | return 1; |
209 | } |
210 | ptr += MINBPC(enc); |
211 | switch (BYTE_TO_ASCII(enc, ptr)) { |
212 | case ASCII_m: |
213 | break; |
214 | case ASCII_M: |
215 | upper = 1; |
216 | break; |
217 | default: |
218 | return 1; |
219 | } |
220 | ptr += MINBPC(enc); |
221 | switch (BYTE_TO_ASCII(enc, ptr)) { |
222 | case ASCII_l: |
223 | break; |
224 | case ASCII_L: |
225 | upper = 1; |
226 | break; |
227 | default: |
228 | return 1; |
229 | } |
230 | if (upper) |
231 | return 0; |
232 | *tokPtr = XML_TOK_XML_DECL; |
233 | return 1; |
234 | } |
235 | |
236 | /* ptr points to character following "<?" */ |
237 | |
238 | static int PTRCALL |
239 | PREFIX(scanPi)(const ENCODING *enc, const char *ptr, |
240 | const char *end, const char **nextTokPtr) |
241 | { |
242 | int tok; |
243 | const char *target = ptr; |
244 | REQUIRE_CHAR(enc, ptr, end); |
245 | switch (BYTE_TYPE(enc, ptr)) { |
246 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
247 | default: |
248 | *nextTokPtr = ptr; |
249 | return XML_TOK_INVALID; |
250 | } |
251 | while (HAS_CHAR(enc, ptr, end)) { |
252 | switch (BYTE_TYPE(enc, ptr)) { |
253 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
254 | case BT_S: case BT_CR: case BT_LF: |
255 | if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
256 | *nextTokPtr = ptr; |
257 | return XML_TOK_INVALID; |
258 | } |
259 | ptr += MINBPC(enc); |
260 | while (HAS_CHAR(enc, ptr, end)) { |
261 | switch (BYTE_TYPE(enc, ptr)) { |
262 | INVALID_CASES(ptr, nextTokPtr) |
263 | case BT_QUEST: |
264 | ptr += MINBPC(enc); |
265 | REQUIRE_CHAR(enc, ptr, end); |
266 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
267 | *nextTokPtr = ptr + MINBPC(enc); |
268 | return tok; |
269 | } |
270 | break; |
271 | default: |
272 | ptr += MINBPC(enc); |
273 | break; |
274 | } |
275 | } |
276 | return XML_TOK_PARTIAL; |
277 | case BT_QUEST: |
278 | if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
279 | *nextTokPtr = ptr; |
280 | return XML_TOK_INVALID; |
281 | } |
282 | ptr += MINBPC(enc); |
283 | REQUIRE_CHAR(enc, ptr, end); |
284 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
285 | *nextTokPtr = ptr + MINBPC(enc); |
286 | return tok; |
287 | } |
288 | /* fall through */ |
289 | default: |
290 | *nextTokPtr = ptr; |
291 | return XML_TOK_INVALID; |
292 | } |
293 | } |
294 | return XML_TOK_PARTIAL; |
295 | } |
296 | |
297 | static int PTRCALL |
298 | PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, |
299 | const char *end, const char **nextTokPtr) |
300 | { |
301 | static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, |
302 | ASCII_T, ASCII_A, ASCII_LSQB }; |
303 | int i; |
304 | /* CDATA[ */ |
305 | REQUIRE_CHARS(enc, ptr, end, 6); |
306 | for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
307 | if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { |
308 | *nextTokPtr = ptr; |
309 | return XML_TOK_INVALID; |
310 | } |
311 | } |
312 | *nextTokPtr = ptr; |
313 | return XML_TOK_CDATA_SECT_OPEN; |
314 | } |
315 | |
316 | static int PTRCALL |
317 | PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, |
318 | const char *end, const char **nextTokPtr) |
319 | { |
320 | if (ptr >= end) |
321 | return XML_TOK_NONE; |
322 | if (MINBPC(enc) > 1) { |
323 | size_t n = end - ptr; |
324 | if (n & (MINBPC(enc) - 1)) { |
325 | n &= ~(MINBPC(enc) - 1); |
326 | if (n == 0) |
327 | return XML_TOK_PARTIAL; |
328 | end = ptr + n; |
329 | } |
330 | } |
331 | switch (BYTE_TYPE(enc, ptr)) { |
332 | case BT_RSQB: |
333 | ptr += MINBPC(enc); |
334 | REQUIRE_CHAR(enc, ptr, end); |
335 | if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
336 | break; |
337 | ptr += MINBPC(enc); |
338 | REQUIRE_CHAR(enc, ptr, end); |
339 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
340 | ptr -= MINBPC(enc); |
341 | break; |
342 | } |
343 | *nextTokPtr = ptr + MINBPC(enc); |
344 | return XML_TOK_CDATA_SECT_CLOSE; |
345 | case BT_CR: |
346 | ptr += MINBPC(enc); |
347 | REQUIRE_CHAR(enc, ptr, end); |
348 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
349 | ptr += MINBPC(enc); |
350 | *nextTokPtr = ptr; |
351 | return XML_TOK_DATA_NEWLINE; |
352 | case BT_LF: |
353 | *nextTokPtr = ptr + MINBPC(enc); |
354 | return XML_TOK_DATA_NEWLINE; |
355 | INVALID_CASES(ptr, nextTokPtr) |
356 | default: |
357 | ptr += MINBPC(enc); |
358 | break; |
359 | } |
360 | while (HAS_CHAR(enc, ptr, end)) { |
361 | switch (BYTE_TYPE(enc, ptr)) { |
362 | #define LEAD_CASE(n) \ |
363 | case BT_LEAD ## n: \ |
364 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
365 | *nextTokPtr = ptr; \ |
366 | return XML_TOK_DATA_CHARS; \ |
367 | } \ |
368 | ptr += n; \ |
369 | break; |
370 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
371 | #undef LEAD_CASE |
372 | case BT_NONXML: |
373 | case BT_MALFORM: |
374 | case BT_TRAIL: |
375 | case BT_CR: |
376 | case BT_LF: |
377 | case BT_RSQB: |
378 | *nextTokPtr = ptr; |
379 | return XML_TOK_DATA_CHARS; |
380 | default: |
381 | ptr += MINBPC(enc); |
382 | break; |
383 | } |
384 | } |
385 | *nextTokPtr = ptr; |
386 | return XML_TOK_DATA_CHARS; |
387 | } |
388 | |
389 | /* ptr points to character following "</" */ |
390 | |
391 | static int PTRCALL |
392 | PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, |
393 | const char *end, const char **nextTokPtr) |
394 | { |
395 | REQUIRE_CHAR(enc, ptr, end); |
396 | switch (BYTE_TYPE(enc, ptr)) { |
397 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
398 | default: |
399 | *nextTokPtr = ptr; |
400 | return XML_TOK_INVALID; |
401 | } |
402 | while (HAS_CHAR(enc, ptr, end)) { |
403 | switch (BYTE_TYPE(enc, ptr)) { |
404 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
405 | case BT_S: case BT_CR: case BT_LF: |
406 | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
407 | switch (BYTE_TYPE(enc, ptr)) { |
408 | case BT_S: case BT_CR: case BT_LF: |
409 | break; |
410 | case BT_GT: |
411 | *nextTokPtr = ptr + MINBPC(enc); |
412 | return XML_TOK_END_TAG; |
413 | default: |
414 | *nextTokPtr = ptr; |
415 | return XML_TOK_INVALID; |
416 | } |
417 | } |
418 | return XML_TOK_PARTIAL; |
419 | #ifdef XML_NS |
420 | case BT_COLON: |
421 | /* no need to check qname syntax here, |
422 | since end-tag must match exactly */ |
423 | ptr += MINBPC(enc); |
424 | break; |
425 | #endif |
426 | case BT_GT: |
427 | *nextTokPtr = ptr + MINBPC(enc); |
428 | return XML_TOK_END_TAG; |
429 | default: |
430 | *nextTokPtr = ptr; |
431 | return XML_TOK_INVALID; |
432 | } |
433 | } |
434 | return XML_TOK_PARTIAL; |
435 | } |
436 | |
437 | /* ptr points to character following "&#X" */ |
438 | |
439 | static int PTRCALL |
440 | PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, |
441 | const char *end, const char **nextTokPtr) |
442 | { |
443 | if (HAS_CHAR(enc, ptr, end)) { |
444 | switch (BYTE_TYPE(enc, ptr)) { |
445 | case BT_DIGIT: |
446 | case BT_HEX: |
447 | break; |
448 | default: |
449 | *nextTokPtr = ptr; |
450 | return XML_TOK_INVALID; |
451 | } |
452 | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
453 | switch (BYTE_TYPE(enc, ptr)) { |
454 | case BT_DIGIT: |
455 | case BT_HEX: |
456 | break; |
457 | case BT_SEMI: |
458 | *nextTokPtr = ptr + MINBPC(enc); |
459 | return XML_TOK_CHAR_REF; |
460 | default: |
461 | *nextTokPtr = ptr; |
462 | return XML_TOK_INVALID; |
463 | } |
464 | } |
465 | } |
466 | return XML_TOK_PARTIAL; |
467 | } |
468 | |
469 | /* ptr points to character following "&#" */ |
470 | |
471 | static int PTRCALL |
472 | PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, |
473 | const char *end, const char **nextTokPtr) |
474 | { |
475 | if (HAS_CHAR(enc, ptr, end)) { |
476 | if (CHAR_MATCHES(enc, ptr, ASCII_x)) |
477 | return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
478 | switch (BYTE_TYPE(enc, ptr)) { |
479 | case BT_DIGIT: |
480 | break; |
481 | default: |
482 | *nextTokPtr = ptr; |
483 | return XML_TOK_INVALID; |
484 | } |
485 | for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
486 | switch (BYTE_TYPE(enc, ptr)) { |
487 | case BT_DIGIT: |
488 | break; |
489 | case BT_SEMI: |
490 | *nextTokPtr = ptr + MINBPC(enc); |
491 | return XML_TOK_CHAR_REF; |
492 | default: |
493 | *nextTokPtr = ptr; |
494 | return XML_TOK_INVALID; |
495 | } |
496 | } |
497 | } |
498 | return XML_TOK_PARTIAL; |
499 | } |
500 | |
501 | /* ptr points to character following "&" */ |
502 | |
503 | static int PTRCALL |
504 | PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
505 | const char **nextTokPtr) |
506 | { |
507 | REQUIRE_CHAR(enc, ptr, end); |
508 | switch (BYTE_TYPE(enc, ptr)) { |
509 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
510 | case BT_NUM: |
511 | return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
512 | default: |
513 | *nextTokPtr = ptr; |
514 | return XML_TOK_INVALID; |
515 | } |
516 | while (HAS_CHAR(enc, ptr, end)) { |
517 | switch (BYTE_TYPE(enc, ptr)) { |
518 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
519 | case BT_SEMI: |
520 | *nextTokPtr = ptr + MINBPC(enc); |
521 | return XML_TOK_ENTITY_REF; |
522 | default: |
523 | *nextTokPtr = ptr; |
524 | return XML_TOK_INVALID; |
525 | } |
526 | } |
527 | return XML_TOK_PARTIAL; |
528 | } |
529 | |
530 | /* ptr points to character following first character of attribute name */ |
531 | |
532 | static int PTRCALL |
533 | PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
534 | const char **nextTokPtr) |
535 | { |
536 | #ifdef XML_NS |
537 | int hadColon = 0; |
538 | #endif |
539 | while (HAS_CHAR(enc, ptr, end)) { |
540 | switch (BYTE_TYPE(enc, ptr)) { |
541 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
542 | #ifdef XML_NS |
543 | case BT_COLON: |
544 | if (hadColon) { |
545 | *nextTokPtr = ptr; |
546 | return XML_TOK_INVALID; |
547 | } |
548 | hadColon = 1; |
549 | ptr += MINBPC(enc); |
550 | REQUIRE_CHAR(enc, ptr, end); |
551 | switch (BYTE_TYPE(enc, ptr)) { |
552 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
553 | default: |
554 | *nextTokPtr = ptr; |
555 | return XML_TOK_INVALID; |
556 | } |
557 | break; |
558 | #endif |
559 | case BT_S: case BT_CR: case BT_LF: |
560 | for (;;) { |
561 | int t; |
562 | |
563 | ptr += MINBPC(enc); |
564 | REQUIRE_CHAR(enc, ptr, end); |
565 | t = BYTE_TYPE(enc, ptr); |
566 | if (t == BT_EQUALS) |
567 | break; |
568 | switch (t) { |
569 | case BT_S: |
570 | case BT_LF: |
571 | case BT_CR: |
572 | break; |
573 | default: |
574 | *nextTokPtr = ptr; |
575 | return XML_TOK_INVALID; |
576 | } |
577 | } |
578 | /* fall through */ |
579 | case BT_EQUALS: |
580 | { |
581 | int open; |
582 | #ifdef XML_NS |
583 | hadColon = 0; |
584 | #endif |
585 | for (;;) { |
586 | ptr += MINBPC(enc); |
587 | REQUIRE_CHAR(enc, ptr, end); |
588 | open = BYTE_TYPE(enc, ptr); |
589 | if (open == BT_QUOT || open == BT_APOS) |
590 | break; |
591 | switch (open) { |
592 | case BT_S: |
593 | case BT_LF: |
594 | case BT_CR: |
595 | break; |
596 | default: |
597 | *nextTokPtr = ptr; |
598 | return XML_TOK_INVALID; |
599 | } |
600 | } |
601 | ptr += MINBPC(enc); |
602 | /* in attribute value */ |
603 | for (;;) { |
604 | int t; |
605 | REQUIRE_CHAR(enc, ptr, end); |
606 | t = BYTE_TYPE(enc, ptr); |
607 | if (t == open) |
608 | break; |
609 | switch (t) { |
610 | INVALID_CASES(ptr, nextTokPtr) |
611 | case BT_AMP: |
612 | { |
613 | int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
614 | if (tok <= 0) { |
615 | if (tok == XML_TOK_INVALID) |
616 | *nextTokPtr = ptr; |
617 | return tok; |
618 | } |
619 | break; |
620 | } |
621 | case BT_LT: |
622 | *nextTokPtr = ptr; |
623 | return XML_TOK_INVALID; |
624 | default: |
625 | ptr += MINBPC(enc); |
626 | break; |
627 | } |
628 | } |
629 | ptr += MINBPC(enc); |
630 | REQUIRE_CHAR(enc, ptr, end); |
631 | switch (BYTE_TYPE(enc, ptr)) { |
632 | case BT_S: |
633 | case BT_CR: |
634 | case BT_LF: |
635 | break; |
636 | case BT_SOL: |
637 | goto sol; |
638 | case BT_GT: |
639 | goto gt; |
640 | default: |
641 | *nextTokPtr = ptr; |
642 | return XML_TOK_INVALID; |
643 | } |
644 | /* ptr points to closing quote */ |
645 | for (;;) { |
646 | ptr += MINBPC(enc); |
647 | REQUIRE_CHAR(enc, ptr, end); |
648 | switch (BYTE_TYPE(enc, ptr)) { |
649 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
650 | case BT_S: case BT_CR: case BT_LF: |
651 | continue; |
652 | case BT_GT: |
653 | gt: |
654 | *nextTokPtr = ptr + MINBPC(enc); |
655 | return XML_TOK_START_TAG_WITH_ATTS; |
656 | case BT_SOL: |
657 | sol: |
658 | ptr += MINBPC(enc); |
659 | REQUIRE_CHAR(enc, ptr, end); |
660 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
661 | *nextTokPtr = ptr; |
662 | return XML_TOK_INVALID; |
663 | } |
664 | *nextTokPtr = ptr + MINBPC(enc); |
665 | return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
666 | default: |
667 | *nextTokPtr = ptr; |
668 | return XML_TOK_INVALID; |
669 | } |
670 | break; |
671 | } |
672 | break; |
673 | } |
674 | default: |
675 | *nextTokPtr = ptr; |
676 | return XML_TOK_INVALID; |
677 | } |
678 | } |
679 | return XML_TOK_PARTIAL; |
680 | } |
681 | |
682 | /* ptr points to character following "<" */ |
683 | |
684 | static int PTRCALL |
685 | PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
686 | const char **nextTokPtr) |
687 | { |
688 | #ifdef XML_NS |
689 | int hadColon; |
690 | #endif |
691 | REQUIRE_CHAR(enc, ptr, end); |
692 | switch (BYTE_TYPE(enc, ptr)) { |
693 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
694 | case BT_EXCL: |
695 | ptr += MINBPC(enc); |
696 | REQUIRE_CHAR(enc, ptr, end); |
697 | switch (BYTE_TYPE(enc, ptr)) { |
698 | case BT_MINUS: |
699 | return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
700 | case BT_LSQB: |
701 | return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), |
702 | end, nextTokPtr); |
703 | } |
704 | *nextTokPtr = ptr; |
705 | return XML_TOK_INVALID; |
706 | case BT_QUEST: |
707 | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
708 | case BT_SOL: |
709 | return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
710 | default: |
711 | *nextTokPtr = ptr; |
712 | return XML_TOK_INVALID; |
713 | } |
714 | #ifdef XML_NS |
715 | hadColon = 0; |
716 | #endif |
717 | /* we have a start-tag */ |
718 | while (HAS_CHAR(enc, ptr, end)) { |
719 | switch (BYTE_TYPE(enc, ptr)) { |
720 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
721 | #ifdef XML_NS |
722 | case BT_COLON: |
723 | if (hadColon) { |
724 | *nextTokPtr = ptr; |
725 | return XML_TOK_INVALID; |
726 | } |
727 | hadColon = 1; |
728 | ptr += MINBPC(enc); |
729 | REQUIRE_CHAR(enc, ptr, end); |
730 | switch (BYTE_TYPE(enc, ptr)) { |
731 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
732 | default: |
733 | *nextTokPtr = ptr; |
734 | return XML_TOK_INVALID; |
735 | } |
736 | break; |
737 | #endif |
738 | case BT_S: case BT_CR: case BT_LF: |
739 | { |
740 | ptr += MINBPC(enc); |
741 | while (HAS_CHAR(enc, ptr, end)) { |
742 | switch (BYTE_TYPE(enc, ptr)) { |
743 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
744 | case BT_GT: |
745 | goto gt; |
746 | case BT_SOL: |
747 | goto sol; |
748 | case BT_S: case BT_CR: case BT_LF: |
749 | ptr += MINBPC(enc); |
750 | continue; |
751 | default: |
752 | *nextTokPtr = ptr; |
753 | return XML_TOK_INVALID; |
754 | } |
755 | return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
756 | } |
757 | return XML_TOK_PARTIAL; |
758 | } |
759 | case BT_GT: |
760 | gt: |
761 | *nextTokPtr = ptr + MINBPC(enc); |
762 | return XML_TOK_START_TAG_NO_ATTS; |
763 | case BT_SOL: |
764 | sol: |
765 | ptr += MINBPC(enc); |
766 | REQUIRE_CHAR(enc, ptr, end); |
767 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
768 | *nextTokPtr = ptr; |
769 | return XML_TOK_INVALID; |
770 | } |
771 | *nextTokPtr = ptr + MINBPC(enc); |
772 | return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
773 | default: |
774 | *nextTokPtr = ptr; |
775 | return XML_TOK_INVALID; |
776 | } |
777 | } |
778 | return XML_TOK_PARTIAL; |
779 | } |
780 | |
781 | static int PTRCALL |
782 | PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
783 | const char **nextTokPtr) |
784 | { |
785 | if (ptr >= end) |
786 | return XML_TOK_NONE; |
787 | if (MINBPC(enc) > 1) { |
788 | size_t n = end - ptr; |
789 | if (n & (MINBPC(enc) - 1)) { |
790 | n &= ~(MINBPC(enc) - 1); |
791 | if (n == 0) |
792 | return XML_TOK_PARTIAL; |
793 | end = ptr + n; |
794 | } |
795 | } |
796 | switch (BYTE_TYPE(enc, ptr)) { |
797 | case BT_LT: |
798 | return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
799 | case BT_AMP: |
800 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
801 | case BT_CR: |
802 | ptr += MINBPC(enc); |
803 | if (! HAS_CHAR(enc, ptr, end)) |
804 | return XML_TOK_TRAILING_CR; |
805 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
806 | ptr += MINBPC(enc); |
807 | *nextTokPtr = ptr; |
808 | return XML_TOK_DATA_NEWLINE; |
809 | case BT_LF: |
810 | *nextTokPtr = ptr + MINBPC(enc); |
811 | return XML_TOK_DATA_NEWLINE; |
812 | case BT_RSQB: |
813 | ptr += MINBPC(enc); |
814 | if (! HAS_CHAR(enc, ptr, end)) |
815 | return XML_TOK_TRAILING_RSQB; |
816 | if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) |
817 | break; |
818 | ptr += MINBPC(enc); |
819 | if (! HAS_CHAR(enc, ptr, end)) |
820 | return XML_TOK_TRAILING_RSQB; |
821 | if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
822 | ptr -= MINBPC(enc); |
823 | break; |
824 | } |
825 | *nextTokPtr = ptr; |
826 | return XML_TOK_INVALID; |
827 | INVALID_CASES(ptr, nextTokPtr) |
828 | default: |
829 | ptr += MINBPC(enc); |
830 | break; |
831 | } |
832 | while (HAS_CHAR(enc, ptr, end)) { |
833 | switch (BYTE_TYPE(enc, ptr)) { |
834 | #define LEAD_CASE(n) \ |
835 | case BT_LEAD ## n: \ |
836 | if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
837 | *nextTokPtr = ptr; \ |
838 | return XML_TOK_DATA_CHARS; \ |
839 | } \ |
840 | ptr += n; \ |
841 | break; |
842 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
843 | #undef LEAD_CASE |
844 | case BT_RSQB: |
845 | if (HAS_CHARS(enc, ptr, end, 2)) { |
846 | if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { |
847 | ptr += MINBPC(enc); |
848 | break; |
849 | } |
850 | if (HAS_CHARS(enc, ptr, end, 3)) { |
851 | if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { |
852 | ptr += MINBPC(enc); |
853 | break; |
854 | } |
855 | *nextTokPtr = ptr + 2*MINBPC(enc); |
856 | return XML_TOK_INVALID; |
857 | } |
858 | } |
859 | /* fall through */ |
860 | case BT_AMP: |
861 | case BT_LT: |
862 | case BT_NONXML: |
863 | case BT_MALFORM: |
864 | case BT_TRAIL: |
865 | case BT_CR: |
866 | case BT_LF: |
867 | *nextTokPtr = ptr; |
868 | return XML_TOK_DATA_CHARS; |
869 | default: |
870 | ptr += MINBPC(enc); |
871 | break; |
872 | } |
873 | } |
874 | *nextTokPtr = ptr; |
875 | return XML_TOK_DATA_CHARS; |
876 | } |
877 | |
878 | /* ptr points to character following "%" */ |
879 | |
880 | static int PTRCALL |
881 | PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
882 | const char **nextTokPtr) |
883 | { |
884 | REQUIRE_CHAR(enc, ptr, end); |
885 | switch (BYTE_TYPE(enc, ptr)) { |
886 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
887 | case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
888 | *nextTokPtr = ptr; |
889 | return XML_TOK_PERCENT; |
890 | default: |
891 | *nextTokPtr = ptr; |
892 | return XML_TOK_INVALID; |
893 | } |
894 | while (HAS_CHAR(enc, ptr, end)) { |
895 | switch (BYTE_TYPE(enc, ptr)) { |
896 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
897 | case BT_SEMI: |
898 | *nextTokPtr = ptr + MINBPC(enc); |
899 | return XML_TOK_PARAM_ENTITY_REF; |
900 | default: |
901 | *nextTokPtr = ptr; |
902 | return XML_TOK_INVALID; |
903 | } |
904 | } |
905 | return XML_TOK_PARTIAL; |
906 | } |
907 | |
908 | static int PTRCALL |
909 | PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
910 | const char **nextTokPtr) |
911 | { |
912 | REQUIRE_CHAR(enc, ptr, end); |
913 | switch (BYTE_TYPE(enc, ptr)) { |
914 | CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
915 | default: |
916 | *nextTokPtr = ptr; |
917 | return XML_TOK_INVALID; |
918 | } |
919 | while (HAS_CHAR(enc, ptr, end)) { |
920 | switch (BYTE_TYPE(enc, ptr)) { |
921 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
922 | case BT_CR: case BT_LF: case BT_S: |
923 | case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
924 | *nextTokPtr = ptr; |
925 | return XML_TOK_POUND_NAME; |
926 | default: |
927 | *nextTokPtr = ptr; |
928 | return XML_TOK_INVALID; |
929 | } |
930 | } |
931 | return -XML_TOK_POUND_NAME; |
932 | } |
933 | |
934 | static int PTRCALL |
935 | PREFIX(scanLit)(int open, const ENCODING *enc, |
936 | const char *ptr, const char *end, |
937 | const char **nextTokPtr) |
938 | { |
939 | while (HAS_CHAR(enc, ptr, end)) { |
940 | int t = BYTE_TYPE(enc, ptr); |
941 | switch (t) { |
942 | INVALID_CASES(ptr, nextTokPtr) |
943 | case BT_QUOT: |
944 | case BT_APOS: |
945 | ptr += MINBPC(enc); |
946 | if (t != open) |
947 | break; |
948 | if (! HAS_CHAR(enc, ptr, end)) |
949 | return -XML_TOK_LITERAL; |
950 | *nextTokPtr = ptr; |
951 | switch (BYTE_TYPE(enc, ptr)) { |
952 | case BT_S: case BT_CR: case BT_LF: |
953 | case BT_GT: case BT_PERCNT: case BT_LSQB: |
954 | return XML_TOK_LITERAL; |
955 | default: |
956 | return XML_TOK_INVALID; |
957 | } |
958 | default: |
959 | ptr += MINBPC(enc); |
960 | break; |
961 | } |
962 | } |
963 | return XML_TOK_PARTIAL; |
964 | } |
965 | |
966 | static int PTRCALL |
967 | PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
968 | const char **nextTokPtr) |
969 | { |
970 | int tok; |
971 | if (ptr >= end) |
972 | return XML_TOK_NONE; |
973 | if (MINBPC(enc) > 1) { |
974 | size_t n = end - ptr; |
975 | if (n & (MINBPC(enc) - 1)) { |
976 | n &= ~(MINBPC(enc) - 1); |
977 | if (n == 0) |
978 | return XML_TOK_PARTIAL; |
979 | end = ptr + n; |
980 | } |
981 | } |
982 | switch (BYTE_TYPE(enc, ptr)) { |
983 | case BT_QUOT: |
984 | return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
985 | case BT_APOS: |
986 | return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
987 | case BT_LT: |
988 | { |
989 | ptr += MINBPC(enc); |
990 | REQUIRE_CHAR(enc, ptr, end); |
991 | switch (BYTE_TYPE(enc, ptr)) { |
992 | case BT_EXCL: |
993 | return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
994 | case BT_QUEST: |
995 | return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
996 | case BT_NMSTRT: |
997 | case BT_HEX: |
998 | case BT_NONASCII: |
999 | case BT_LEAD2: |
1000 | case BT_LEAD3: |
1001 | case BT_LEAD4: |
1002 | *nextTokPtr = ptr - MINBPC(enc); |
1003 | return XML_TOK_INSTANCE_START; |
1004 | } |
1005 | *nextTokPtr = ptr; |
1006 | return XML_TOK_INVALID; |
1007 | } |
1008 | case BT_CR: |
1009 | if (ptr + MINBPC(enc) == end) { |
1010 | *nextTokPtr = end; |
1011 | /* indicate that this might be part of a CR/LF pair */ |
1012 | return -XML_TOK_PROLOG_S; |
1013 | } |
1014 | /* fall through */ |
1015 | case BT_S: case BT_LF: |
1016 | for (;;) { |
1017 | ptr += MINBPC(enc); |
1018 | if (! HAS_CHAR(enc, ptr, end)) |
1019 | break; |
1020 | switch (BYTE_TYPE(enc, ptr)) { |
1021 | case BT_S: case BT_LF: |
1022 | break; |
1023 | case BT_CR: |
1024 | /* don't split CR/LF pair */ |
1025 | if (ptr + MINBPC(enc) != end) |
1026 | break; |
1027 | /* fall through */ |
1028 | default: |
1029 | *nextTokPtr = ptr; |
1030 | return XML_TOK_PROLOG_S; |
1031 | } |
1032 | } |
1033 | *nextTokPtr = ptr; |
1034 | return XML_TOK_PROLOG_S; |
1035 | case BT_PERCNT: |
1036 | return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1037 | case BT_COMMA: |
1038 | *nextTokPtr = ptr + MINBPC(enc); |
1039 | return XML_TOK_COMMA; |
1040 | case BT_LSQB: |
1041 | *nextTokPtr = ptr + MINBPC(enc); |
1042 | return XML_TOK_OPEN_BRACKET; |
1043 | case BT_RSQB: |
1044 | ptr += MINBPC(enc); |
1045 | if (! HAS_CHAR(enc, ptr, end)) |
1046 | return -XML_TOK_CLOSE_BRACKET; |
1047 | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1048 | REQUIRE_CHARS(enc, ptr, end, 2); |
1049 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { |
1050 | *nextTokPtr = ptr + 2*MINBPC(enc); |
1051 | return XML_TOK_COND_SECT_CLOSE; |
1052 | } |
1053 | } |
1054 | *nextTokPtr = ptr; |
1055 | return XML_TOK_CLOSE_BRACKET; |
1056 | case BT_LPAR: |
1057 | *nextTokPtr = ptr + MINBPC(enc); |
1058 | return XML_TOK_OPEN_PAREN; |
1059 | case BT_RPAR: |
1060 | ptr += MINBPC(enc); |
1061 | if (! HAS_CHAR(enc, ptr, end)) |
1062 | return -XML_TOK_CLOSE_PAREN; |
1063 | switch (BYTE_TYPE(enc, ptr)) { |
1064 | case BT_AST: |
1065 | *nextTokPtr = ptr + MINBPC(enc); |
1066 | return XML_TOK_CLOSE_PAREN_ASTERISK; |
1067 | case BT_QUEST: |
1068 | *nextTokPtr = ptr + MINBPC(enc); |
1069 | return XML_TOK_CLOSE_PAREN_QUESTION; |
1070 | case BT_PLUS: |
1071 | *nextTokPtr = ptr + MINBPC(enc); |
1072 | return XML_TOK_CLOSE_PAREN_PLUS; |
1073 | case BT_CR: case BT_LF: case BT_S: |
1074 | case BT_GT: case BT_COMMA: case BT_VERBAR: |
1075 | case BT_RPAR: |
1076 | *nextTokPtr = ptr; |
1077 | return XML_TOK_CLOSE_PAREN; |
1078 | } |
1079 | *nextTokPtr = ptr; |
1080 | return XML_TOK_INVALID; |
1081 | case BT_VERBAR: |
1082 | *nextTokPtr = ptr + MINBPC(enc); |
1083 | return XML_TOK_OR; |
1084 | case BT_GT: |
1085 | *nextTokPtr = ptr + MINBPC(enc); |
1086 | return XML_TOK_DECL_CLOSE; |
1087 | case BT_NUM: |
1088 | return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1089 | #define LEAD_CASE(n) \ |
1090 | case BT_LEAD ## n: \ |
1091 | if (end - ptr < n) \ |
1092 | return XML_TOK_PARTIAL_CHAR; \ |
1093 | if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
1094 | ptr += n; \ |
1095 | tok = XML_TOK_NAME; \ |
1096 | break; \ |
1097 | } \ |
1098 | if (IS_NAME_CHAR(enc, ptr, n)) { \ |
1099 | ptr += n; \ |
1100 | tok = XML_TOK_NMTOKEN; \ |
1101 | break; \ |
1102 | } \ |
1103 | *nextTokPtr = ptr; \ |
1104 | return XML_TOK_INVALID; |
1105 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1106 | #undef LEAD_CASE |
1107 | case BT_NMSTRT: |
1108 | case BT_HEX: |
1109 | tok = XML_TOK_NAME; |
1110 | ptr += MINBPC(enc); |
1111 | break; |
1112 | case BT_DIGIT: |
1113 | case BT_NAME: |
1114 | case BT_MINUS: |
1115 | #ifdef XML_NS |
1116 | case BT_COLON: |
1117 | #endif |
1118 | tok = XML_TOK_NMTOKEN; |
1119 | ptr += MINBPC(enc); |
1120 | break; |
1121 | case BT_NONASCII: |
1122 | if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
1123 | ptr += MINBPC(enc); |
1124 | tok = XML_TOK_NAME; |
1125 | break; |
1126 | } |
1127 | if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
1128 | ptr += MINBPC(enc); |
1129 | tok = XML_TOK_NMTOKEN; |
1130 | break; |
1131 | } |
1132 | /* fall through */ |
1133 | default: |
1134 | *nextTokPtr = ptr; |
1135 | return XML_TOK_INVALID; |
1136 | } |
1137 | while (HAS_CHAR(enc, ptr, end)) { |
1138 | switch (BYTE_TYPE(enc, ptr)) { |
1139 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1140 | case BT_GT: case BT_RPAR: case BT_COMMA: |
1141 | case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
1142 | case BT_S: case BT_CR: case BT_LF: |
1143 | *nextTokPtr = ptr; |
1144 | return tok; |
1145 | #ifdef XML_NS |
1146 | case BT_COLON: |
1147 | ptr += MINBPC(enc); |
1148 | switch (tok) { |
1149 | case XML_TOK_NAME: |
1150 | REQUIRE_CHAR(enc, ptr, end); |
1151 | tok = XML_TOK_PREFIXED_NAME; |
1152 | switch (BYTE_TYPE(enc, ptr)) { |
1153 | CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
1154 | default: |
1155 | tok = XML_TOK_NMTOKEN; |
1156 | break; |
1157 | } |
1158 | break; |
1159 | case XML_TOK_PREFIXED_NAME: |
1160 | tok = XML_TOK_NMTOKEN; |
1161 | break; |
1162 | } |
1163 | break; |
1164 | #endif |
1165 | case BT_PLUS: |
1166 | if (tok == XML_TOK_NMTOKEN) { |
1167 | *nextTokPtr = ptr; |
1168 | return XML_TOK_INVALID; |
1169 | } |
1170 | *nextTokPtr = ptr + MINBPC(enc); |
1171 | return XML_TOK_NAME_PLUS; |
1172 | case BT_AST: |
1173 | if (tok == XML_TOK_NMTOKEN) { |
1174 | *nextTokPtr = ptr; |
1175 | return XML_TOK_INVALID; |
1176 | } |
1177 | *nextTokPtr = ptr + MINBPC(enc); |
1178 | return XML_TOK_NAME_ASTERISK; |
1179 | case BT_QUEST: |
1180 | if (tok == XML_TOK_NMTOKEN) { |
1181 | *nextTokPtr = ptr; |
1182 | return XML_TOK_INVALID; |
1183 | } |
1184 | *nextTokPtr = ptr + MINBPC(enc); |
1185 | return XML_TOK_NAME_QUESTION; |
1186 | default: |
1187 | *nextTokPtr = ptr; |
1188 | return XML_TOK_INVALID; |
1189 | } |
1190 | } |
1191 | return -tok; |
1192 | } |
1193 | |
1194 | static int PTRCALL |
1195 | PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, |
1196 | const char *end, const char **nextTokPtr) |
1197 | { |
1198 | const char *start; |
1199 | if (ptr >= end) |
1200 | return XML_TOK_NONE; |
1201 | else if (! HAS_CHAR(enc, ptr, end)) { |
1202 | /* This line cannot be executed. The incoming data has already |
1203 | * been tokenized once, so incomplete characters like this have |
1204 | * already been eliminated from the input. Retaining the paranoia |
1205 | * check is still valuable, however. |
1206 | */ |
1207 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
1208 | } |
1209 | start = ptr; |
1210 | while (HAS_CHAR(enc, ptr, end)) { |
1211 | switch (BYTE_TYPE(enc, ptr)) { |
1212 | #define LEAD_CASE(n) \ |
1213 | case BT_LEAD ## n: ptr += n; break; |
1214 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1215 | #undef LEAD_CASE |
1216 | case BT_AMP: |
1217 | if (ptr == start) |
1218 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1219 | *nextTokPtr = ptr; |
1220 | return XML_TOK_DATA_CHARS; |
1221 | case BT_LT: |
1222 | /* this is for inside entity references */ |
1223 | *nextTokPtr = ptr; |
1224 | return XML_TOK_INVALID; |
1225 | case BT_LF: |
1226 | if (ptr == start) { |
1227 | *nextTokPtr = ptr + MINBPC(enc); |
1228 | return XML_TOK_DATA_NEWLINE; |
1229 | } |
1230 | *nextTokPtr = ptr; |
1231 | return XML_TOK_DATA_CHARS; |
1232 | case BT_CR: |
1233 | if (ptr == start) { |
1234 | ptr += MINBPC(enc); |
1235 | if (! HAS_CHAR(enc, ptr, end)) |
1236 | return XML_TOK_TRAILING_CR; |
1237 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
1238 | ptr += MINBPC(enc); |
1239 | *nextTokPtr = ptr; |
1240 | return XML_TOK_DATA_NEWLINE; |
1241 | } |
1242 | *nextTokPtr = ptr; |
1243 | return XML_TOK_DATA_CHARS; |
1244 | case BT_S: |
1245 | if (ptr == start) { |
1246 | *nextTokPtr = ptr + MINBPC(enc); |
1247 | return XML_TOK_ATTRIBUTE_VALUE_S; |
1248 | } |
1249 | *nextTokPtr = ptr; |
1250 | return XML_TOK_DATA_CHARS; |
1251 | default: |
1252 | ptr += MINBPC(enc); |
1253 | break; |
1254 | } |
1255 | } |
1256 | *nextTokPtr = ptr; |
1257 | return XML_TOK_DATA_CHARS; |
1258 | } |
1259 | |
1260 | static int PTRCALL |
1261 | PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, |
1262 | const char *end, const char **nextTokPtr) |
1263 | { |
1264 | const char *start; |
1265 | if (ptr >= end) |
1266 | return XML_TOK_NONE; |
1267 | else if (! HAS_CHAR(enc, ptr, end)) { |
1268 | /* This line cannot be executed. The incoming data has already |
1269 | * been tokenized once, so incomplete characters like this have |
1270 | * already been eliminated from the input. Retaining the paranoia |
1271 | * check is still valuable, however. |
1272 | */ |
1273 | return XML_TOK_PARTIAL; /* LCOV_EXCL_LINE */ |
1274 | } |
1275 | start = ptr; |
1276 | while (HAS_CHAR(enc, ptr, end)) { |
1277 | switch (BYTE_TYPE(enc, ptr)) { |
1278 | #define LEAD_CASE(n) \ |
1279 | case BT_LEAD ## n: ptr += n; break; |
1280 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1281 | #undef LEAD_CASE |
1282 | case BT_AMP: |
1283 | if (ptr == start) |
1284 | return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
1285 | *nextTokPtr = ptr; |
1286 | return XML_TOK_DATA_CHARS; |
1287 | case BT_PERCNT: |
1288 | if (ptr == start) { |
1289 | int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc), |
1290 | end, nextTokPtr); |
1291 | return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok; |
1292 | } |
1293 | *nextTokPtr = ptr; |
1294 | return XML_TOK_DATA_CHARS; |
1295 | case BT_LF: |
1296 | if (ptr == start) { |
1297 | *nextTokPtr = ptr + MINBPC(enc); |
1298 | return XML_TOK_DATA_NEWLINE; |
1299 | } |
1300 | *nextTokPtr = ptr; |
1301 | return XML_TOK_DATA_CHARS; |
1302 | case BT_CR: |
1303 | if (ptr == start) { |
1304 | ptr += MINBPC(enc); |
1305 | if (! HAS_CHAR(enc, ptr, end)) |
1306 | return XML_TOK_TRAILING_CR; |
1307 | if (BYTE_TYPE(enc, ptr) == BT_LF) |
1308 | ptr += MINBPC(enc); |
1309 | *nextTokPtr = ptr; |
1310 | return XML_TOK_DATA_NEWLINE; |
1311 | } |
1312 | *nextTokPtr = ptr; |
1313 | return XML_TOK_DATA_CHARS; |
1314 | default: |
1315 | ptr += MINBPC(enc); |
1316 | break; |
1317 | } |
1318 | } |
1319 | *nextTokPtr = ptr; |
1320 | return XML_TOK_DATA_CHARS; |
1321 | } |
1322 | |
1323 | #ifdef XML_DTD |
1324 | |
1325 | static int PTRCALL |
1326 | PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, |
1327 | const char *end, const char **nextTokPtr) |
1328 | { |
1329 | int level = 0; |
1330 | if (MINBPC(enc) > 1) { |
1331 | size_t n = end - ptr; |
1332 | if (n & (MINBPC(enc) - 1)) { |
1333 | n &= ~(MINBPC(enc) - 1); |
1334 | end = ptr + n; |
1335 | } |
1336 | } |
1337 | while (HAS_CHAR(enc, ptr, end)) { |
1338 | switch (BYTE_TYPE(enc, ptr)) { |
1339 | INVALID_CASES(ptr, nextTokPtr) |
1340 | case BT_LT: |
1341 | ptr += MINBPC(enc); |
1342 | REQUIRE_CHAR(enc, ptr, end); |
1343 | if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { |
1344 | ptr += MINBPC(enc); |
1345 | REQUIRE_CHAR(enc, ptr, end); |
1346 | if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { |
1347 | ++level; |
1348 | ptr += MINBPC(enc); |
1349 | } |
1350 | } |
1351 | break; |
1352 | case BT_RSQB: |
1353 | ptr += MINBPC(enc); |
1354 | REQUIRE_CHAR(enc, ptr, end); |
1355 | if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { |
1356 | ptr += MINBPC(enc); |
1357 | REQUIRE_CHAR(enc, ptr, end); |
1358 | if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { |
1359 | ptr += MINBPC(enc); |
1360 | if (level == 0) { |
1361 | *nextTokPtr = ptr; |
1362 | return XML_TOK_IGNORE_SECT; |
1363 | } |
1364 | --level; |
1365 | } |
1366 | } |
1367 | break; |
1368 | default: |
1369 | ptr += MINBPC(enc); |
1370 | break; |
1371 | } |
1372 | } |
1373 | return XML_TOK_PARTIAL; |
1374 | } |
1375 | |
1376 | #endif /* XML_DTD */ |
1377 | |
1378 | static int PTRCALL |
1379 | PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
1380 | const char **badPtr) |
1381 | { |
1382 | ptr += MINBPC(enc); |
1383 | end -= MINBPC(enc); |
1384 | for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { |
1385 | switch (BYTE_TYPE(enc, ptr)) { |
1386 | case BT_DIGIT: |
1387 | case BT_HEX: |
1388 | case BT_MINUS: |
1389 | case BT_APOS: |
1390 | case BT_LPAR: |
1391 | case BT_RPAR: |
1392 | case BT_PLUS: |
1393 | case BT_COMMA: |
1394 | case BT_SOL: |
1395 | case BT_EQUALS: |
1396 | case BT_QUEST: |
1397 | case BT_CR: |
1398 | case BT_LF: |
1399 | case BT_SEMI: |
1400 | case BT_EXCL: |
1401 | case BT_AST: |
1402 | case BT_PERCNT: |
1403 | case BT_NUM: |
1404 | #ifdef XML_NS |
1405 | case BT_COLON: |
1406 | #endif |
1407 | break; |
1408 | case BT_S: |
1409 | if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) { |
1410 | *badPtr = ptr; |
1411 | return 0; |
1412 | } |
1413 | break; |
1414 | case BT_NAME: |
1415 | case BT_NMSTRT: |
1416 | if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
1417 | break; |
1418 | default: |
1419 | switch (BYTE_TO_ASCII(enc, ptr)) { |
1420 | case 0x24: /* $ */ |
1421 | case 0x40: /* @ */ |
1422 | break; |
1423 | default: |
1424 | *badPtr = ptr; |
1425 | return 0; |
1426 | } |
1427 | break; |
1428 | } |
1429 | } |
1430 | return 1; |
1431 | } |
1432 | |
1433 | /* This must only be called for a well-formed start-tag or empty |
1434 | element tag. Returns the number of attributes. Pointers to the |
1435 | first attsMax attributes are stored in atts. |
1436 | */ |
1437 | |
1438 | static int PTRCALL |
1439 | PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
1440 | int attsMax, ATTRIBUTE *atts) |
1441 | { |
1442 | enum { other, inName, inValue } state = inName; |
1443 | int nAtts = 0; |
1444 | int open = 0; /* defined when state == inValue; |
1445 | initialization just to shut up compilers */ |
1446 | |
1447 | for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
1448 | switch (BYTE_TYPE(enc, ptr)) { |
1449 | #define START_NAME \ |
1450 | if (state == other) { \ |
1451 | if (nAtts < attsMax) { \ |
1452 | atts[nAtts].name = ptr; \ |
1453 | atts[nAtts].normalized = 1; \ |
1454 | } \ |
1455 | state = inName; \ |
1456 | } |
1457 | #define LEAD_CASE(n) \ |
1458 | case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
1459 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1460 | #undef LEAD_CASE |
1461 | case BT_NONASCII: |
1462 | case BT_NMSTRT: |
1463 | case BT_HEX: |
1464 | START_NAME |
1465 | break; |
1466 | #undef START_NAME |
1467 | case BT_QUOT: |
1468 | if (state != inValue) { |
1469 | if (nAtts < attsMax) |
1470 | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
1471 | state = inValue; |
1472 | open = BT_QUOT; |
1473 | } |
1474 | else if (open == BT_QUOT) { |
1475 | state = other; |
1476 | if (nAtts < attsMax) |
1477 | atts[nAtts].valueEnd = ptr; |
1478 | nAtts++; |
1479 | } |
1480 | break; |
1481 | case BT_APOS: |
1482 | if (state != inValue) { |
1483 | if (nAtts < attsMax) |
1484 | atts[nAtts].valuePtr = ptr + MINBPC(enc); |
1485 | state = inValue; |
1486 | open = BT_APOS; |
1487 | } |
1488 | else if (open == BT_APOS) { |
1489 | state = other; |
1490 | if (nAtts < attsMax) |
1491 | atts[nAtts].valueEnd = ptr; |
1492 | nAtts++; |
1493 | } |
1494 | break; |
1495 | case BT_AMP: |
1496 | if (nAtts < attsMax) |
1497 | atts[nAtts].normalized = 0; |
1498 | break; |
1499 | case BT_S: |
1500 | if (state == inName) |
1501 | state = other; |
1502 | else if (state == inValue |
1503 | && nAtts < attsMax |
1504 | && atts[nAtts].normalized |
1505 | && (ptr == atts[nAtts].valuePtr |
1506 | || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE |
1507 | || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE |
1508 | || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
1509 | atts[nAtts].normalized = 0; |
1510 | break; |
1511 | case BT_CR: case BT_LF: |
1512 | /* This case ensures that the first attribute name is counted |
1513 | Apart from that we could just change state on the quote. */ |
1514 | if (state == inName) |
1515 | state = other; |
1516 | else if (state == inValue && nAtts < attsMax) |
1517 | atts[nAtts].normalized = 0; |
1518 | break; |
1519 | case BT_GT: |
1520 | case BT_SOL: |
1521 | if (state != inValue) |
1522 | return nAtts; |
1523 | break; |
1524 | default: |
1525 | break; |
1526 | } |
1527 | } |
1528 | /* not reached */ |
1529 | } |
1530 | |
1531 | static int PTRFASTCALL |
1532 | PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) |
1533 | { |
1534 | int result = 0; |
1535 | /* skip &# */ |
1536 | ptr += 2*MINBPC(enc); |
1537 | if (CHAR_MATCHES(enc, ptr, ASCII_x)) { |
1538 | for (ptr += MINBPC(enc); |
1539 | !CHAR_MATCHES(enc, ptr, ASCII_SEMI); |
1540 | ptr += MINBPC(enc)) { |
1541 | int c = BYTE_TO_ASCII(enc, ptr); |
1542 | switch (c) { |
1543 | case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4: |
1544 | case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9: |
1545 | result <<= 4; |
1546 | result |= (c - ASCII_0); |
1547 | break; |
1548 | case ASCII_A: case ASCII_B: case ASCII_C: |
1549 | case ASCII_D: case ASCII_E: case ASCII_F: |
1550 | result <<= 4; |
1551 | result += 10 + (c - ASCII_A); |
1552 | break; |
1553 | case ASCII_a: case ASCII_b: case ASCII_c: |
1554 | case ASCII_d: case ASCII_e: case ASCII_f: |
1555 | result <<= 4; |
1556 | result += 10 + (c - ASCII_a); |
1557 | break; |
1558 | } |
1559 | if (result >= 0x110000) |
1560 | return -1; |
1561 | } |
1562 | } |
1563 | else { |
1564 | for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) { |
1565 | int c = BYTE_TO_ASCII(enc, ptr); |
1566 | result *= 10; |
1567 | result += (c - ASCII_0); |
1568 | if (result >= 0x110000) |
1569 | return -1; |
1570 | } |
1571 | } |
1572 | return checkCharRefNumber(result); |
1573 | } |
1574 | |
1575 | static int PTRCALL |
1576 | PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, |
1577 | const char *end) |
1578 | { |
1579 | switch ((end - ptr)/MINBPC(enc)) { |
1580 | case 2: |
1581 | if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) { |
1582 | switch (BYTE_TO_ASCII(enc, ptr)) { |
1583 | case ASCII_l: |
1584 | return ASCII_LT; |
1585 | case ASCII_g: |
1586 | return ASCII_GT; |
1587 | } |
1588 | } |
1589 | break; |
1590 | case 3: |
1591 | if (CHAR_MATCHES(enc, ptr, ASCII_a)) { |
1592 | ptr += MINBPC(enc); |
1593 | if (CHAR_MATCHES(enc, ptr, ASCII_m)) { |
1594 | ptr += MINBPC(enc); |
1595 | if (CHAR_MATCHES(enc, ptr, ASCII_p)) |
1596 | return ASCII_AMP; |
1597 | } |
1598 | } |
1599 | break; |
1600 | case 4: |
1601 | switch (BYTE_TO_ASCII(enc, ptr)) { |
1602 | case ASCII_q: |
1603 | ptr += MINBPC(enc); |
1604 | if (CHAR_MATCHES(enc, ptr, ASCII_u)) { |
1605 | ptr += MINBPC(enc); |
1606 | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1607 | ptr += MINBPC(enc); |
1608 | if (CHAR_MATCHES(enc, ptr, ASCII_t)) |
1609 | return ASCII_QUOT; |
1610 | } |
1611 | } |
1612 | break; |
1613 | case ASCII_a: |
1614 | ptr += MINBPC(enc); |
1615 | if (CHAR_MATCHES(enc, ptr, ASCII_p)) { |
1616 | ptr += MINBPC(enc); |
1617 | if (CHAR_MATCHES(enc, ptr, ASCII_o)) { |
1618 | ptr += MINBPC(enc); |
1619 | if (CHAR_MATCHES(enc, ptr, ASCII_s)) |
1620 | return ASCII_APOS; |
1621 | } |
1622 | } |
1623 | break; |
1624 | } |
1625 | } |
1626 | return 0; |
1627 | } |
1628 | |
1629 | /* This function does not appear to be called from anywhere within the |
1630 | * library code. It is used via the macro XmlSameName(), which is |
1631 | * defined but never used. Since it appears in the encoding function |
1632 | * table, removing it is not a thing to be undertaken lightly. For |
1633 | * the moment, we simply exclude it from coverage tests. |
1634 | * |
1635 | * LCOV_EXCL_START |
1636 | */ |
1637 | static int PTRCALL |
1638 | PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
1639 | { |
1640 | for (;;) { |
1641 | switch (BYTE_TYPE(enc, ptr1)) { |
1642 | #define LEAD_CASE(n) \ |
1643 | case BT_LEAD ## n: \ |
1644 | if (*ptr1++ != *ptr2++) \ |
1645 | return 0; |
1646 | LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) |
1647 | #undef LEAD_CASE |
1648 | /* fall through */ |
1649 | if (*ptr1++ != *ptr2++) |
1650 | return 0; |
1651 | break; |
1652 | case BT_NONASCII: |
1653 | case BT_NMSTRT: |
1654 | #ifdef XML_NS |
1655 | case BT_COLON: |
1656 | #endif |
1657 | case BT_HEX: |
1658 | case BT_DIGIT: |
1659 | case BT_NAME: |
1660 | case BT_MINUS: |
1661 | if (*ptr2++ != *ptr1++) |
1662 | return 0; |
1663 | if (MINBPC(enc) > 1) { |
1664 | if (*ptr2++ != *ptr1++) |
1665 | return 0; |
1666 | if (MINBPC(enc) > 2) { |
1667 | if (*ptr2++ != *ptr1++) |
1668 | return 0; |
1669 | if (MINBPC(enc) > 3) { |
1670 | if (*ptr2++ != *ptr1++) |
1671 | return 0; |
1672 | } |
1673 | } |
1674 | } |
1675 | break; |
1676 | default: |
1677 | if (MINBPC(enc) == 1 && *ptr1 == *ptr2) |
1678 | return 1; |
1679 | switch (BYTE_TYPE(enc, ptr2)) { |
1680 | case BT_LEAD2: |
1681 | case BT_LEAD3: |
1682 | case BT_LEAD4: |
1683 | case BT_NONASCII: |
1684 | case BT_NMSTRT: |
1685 | #ifdef XML_NS |
1686 | case BT_COLON: |
1687 | #endif |
1688 | case BT_HEX: |
1689 | case BT_DIGIT: |
1690 | case BT_NAME: |
1691 | case BT_MINUS: |
1692 | return 0; |
1693 | default: |
1694 | return 1; |
1695 | } |
1696 | } |
1697 | } |
1698 | /* not reached */ |
1699 | } |
1700 | /* LCOV_EXCL_STOP */ |
1701 | |
1702 | static int PTRCALL |
1703 | PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, |
1704 | const char *end1, const char *ptr2) |
1705 | { |
1706 | for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
1707 | if (end1 - ptr1 < MINBPC(enc)) { |
1708 | /* This line cannot be executed. THe incoming data has already |
1709 | * been tokenized once, so imcomplete characters like this have |
1710 | * already been eliminated from the input. Retaining the |
1711 | * paranoia check is still valuable, however. |
1712 | */ |
1713 | return 0; /* LCOV_EXCL_LINE */ |
1714 | } |
1715 | if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
1716 | return 0; |
1717 | } |
1718 | return ptr1 == end1; |
1719 | } |
1720 | |
1721 | static int PTRFASTCALL |
1722 | PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
1723 | { |
1724 | const char *start = ptr; |
1725 | for (;;) { |
1726 | switch (BYTE_TYPE(enc, ptr)) { |
1727 | #define LEAD_CASE(n) \ |
1728 | case BT_LEAD ## n: ptr += n; break; |
1729 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1730 | #undef LEAD_CASE |
1731 | case BT_NONASCII: |
1732 | case BT_NMSTRT: |
1733 | #ifdef XML_NS |
1734 | case BT_COLON: |
1735 | #endif |
1736 | case BT_HEX: |
1737 | case BT_DIGIT: |
1738 | case BT_NAME: |
1739 | case BT_MINUS: |
1740 | ptr += MINBPC(enc); |
1741 | break; |
1742 | default: |
1743 | return (int)(ptr - start); |
1744 | } |
1745 | } |
1746 | } |
1747 | |
1748 | static const char * PTRFASTCALL |
1749 | PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
1750 | { |
1751 | for (;;) { |
1752 | switch (BYTE_TYPE(enc, ptr)) { |
1753 | case BT_LF: |
1754 | case BT_CR: |
1755 | case BT_S: |
1756 | ptr += MINBPC(enc); |
1757 | break; |
1758 | default: |
1759 | return ptr; |
1760 | } |
1761 | } |
1762 | } |
1763 | |
1764 | static void PTRCALL |
1765 | PREFIX(updatePosition)(const ENCODING *enc, |
1766 | const char *ptr, |
1767 | const char *end, |
1768 | POSITION *pos) |
1769 | { |
1770 | while (HAS_CHAR(enc, ptr, end)) { |
1771 | switch (BYTE_TYPE(enc, ptr)) { |
1772 | #define LEAD_CASE(n) \ |
1773 | case BT_LEAD ## n: \ |
1774 | ptr += n; \ |
1775 | break; |
1776 | LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
1777 | #undef LEAD_CASE |
1778 | case BT_LF: |
1779 | pos->columnNumber = (XML_Size)-1; |
1780 | pos->lineNumber++; |
1781 | ptr += MINBPC(enc); |
1782 | break; |
1783 | case BT_CR: |
1784 | pos->lineNumber++; |
1785 | ptr += MINBPC(enc); |
1786 | if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) |
1787 | ptr += MINBPC(enc); |
1788 | pos->columnNumber = (XML_Size)-1; |
1789 | break; |
1790 | default: |
1791 | ptr += MINBPC(enc); |
1792 | break; |
1793 | } |
1794 | pos->columnNumber++; |
1795 | } |
1796 | } |
1797 | |
1798 | #undef DO_LEAD_CASE |
1799 | #undef MULTIBYTE_CASES |
1800 | #undef INVALID_CASES |
1801 | #undef CHECK_NAME_CASE |
1802 | #undef CHECK_NAME_CASES |
1803 | #undef CHECK_NMSTRT_CASE |
1804 | #undef CHECK_NMSTRT_CASES |
1805 | |
1806 | #endif /* XML_TOK_IMPL_C */ |
1807 | |