1 | // |
2 | // CppToken.cpp |
3 | // |
4 | // Library: CppParser |
5 | // Package: CppParser |
6 | // Module: CppToken |
7 | // |
8 | // Copyright (c) 2006, Applied Informatics Software Engineering GmbH. |
9 | // and Contributors. |
10 | // |
11 | // SPDX-License-Identifier: BSL-1.0 |
12 | // |
13 | |
14 | |
15 | #include "Poco/CppParser/CppToken.h" |
16 | #include "Poco/Exception.h" |
17 | #include "Poco/NumberParser.h" |
18 | #include <cctype> |
19 | #include <cstdlib> |
20 | |
21 | |
22 | using Poco::Token; |
23 | using Poco::SyntaxException; |
24 | |
25 | |
26 | namespace Poco { |
27 | namespace CppParser { |
28 | |
29 | |
30 | CppToken::CppToken() |
31 | { |
32 | } |
33 | |
34 | |
35 | CppToken::~CppToken() |
36 | { |
37 | } |
38 | |
39 | |
40 | void CppToken::syntaxError(const std::string& expected, const std::string& actual) |
41 | { |
42 | std::string msg("expected: " ); |
43 | msg.append(expected); |
44 | msg.append(", got: " ); |
45 | msg.append(actual); |
46 | throw SyntaxException(msg); |
47 | } |
48 | |
49 | |
50 | OperatorToken::OperatorToken() |
51 | { |
52 | int i = 1; |
53 | _opMap["[" ] = i++; |
54 | _opMap["]" ] = i++; |
55 | _opMap["(" ] = i++; |
56 | _opMap[")" ] = i++; |
57 | _opMap["{" ] = i++; |
58 | _opMap["}" ] = i++; |
59 | _opMap["<" ] = i++; |
60 | _opMap["<=" ] = i++; |
61 | _opMap["<<" ] = i++; |
62 | _opMap["<<=" ] = i++; |
63 | _opMap[">" ] = i++; |
64 | _opMap[">=" ] = i++; |
65 | _opMap[">>" ] = i++; |
66 | _opMap[">>=" ] = i++; |
67 | _opMap["=" ] = i++; |
68 | _opMap["==" ] = i++; |
69 | _opMap["!" ] = i++; |
70 | _opMap["!=" ] = i++; |
71 | _opMap["&" ] = i++; |
72 | _opMap["&=" ] = i++; |
73 | _opMap["&&" ] = i++; |
74 | _opMap["|" ] = i++; |
75 | _opMap["|=" ] = i++; |
76 | _opMap["||" ] = i++; |
77 | _opMap["^" ] = i++; |
78 | _opMap["^=" ] = i++; |
79 | _opMap["~" ] = i++; |
80 | _opMap["*" ] = i++; |
81 | _opMap["*=" ] = i++; |
82 | _opMap["/" ] = i++; |
83 | _opMap["/=" ] = i++; |
84 | _opMap["+" ] = i++; |
85 | _opMap["+=" ] = i++; |
86 | _opMap["++" ] = i++; |
87 | _opMap["-" ] = i++; |
88 | _opMap["-=" ] = i++; |
89 | _opMap["--" ] = i++; |
90 | _opMap["->" ] = i++; |
91 | _opMap["%" ] = i++; |
92 | _opMap["%=" ] = i++; |
93 | _opMap["," ] = i++; |
94 | _opMap["." ] = i++; |
95 | _opMap["..." ] = i++; |
96 | _opMap[":" ] = i++; |
97 | _opMap["::" ] = i++; |
98 | _opMap[";" ] = i++; |
99 | _opMap["?" ] = i++; |
100 | } |
101 | |
102 | |
103 | OperatorToken::~OperatorToken() |
104 | { |
105 | } |
106 | |
107 | |
108 | Token::Class OperatorToken::tokenClass() const |
109 | { |
110 | return Token::OPERATOR_TOKEN; |
111 | } |
112 | |
113 | |
114 | bool OperatorToken::start(char c, std::istream& istr) |
115 | { |
116 | _value = c; |
117 | char next = (char) istr.peek(); |
118 | switch (_value[0]) |
119 | { |
120 | case '[': |
121 | case ']': |
122 | case '(': |
123 | case ')': |
124 | case '{': |
125 | case '}': |
126 | case '<': |
127 | case '>': |
128 | case '=': |
129 | case '!': |
130 | case '&': |
131 | case '|': |
132 | case '*': |
133 | case '+': |
134 | case '-': |
135 | case '^': |
136 | case '~': |
137 | case ',': |
138 | case ':': |
139 | case ';': |
140 | case '%': |
141 | case '?': |
142 | return true; |
143 | case '.': |
144 | return !(next >= '0' && next <= '9'); |
145 | case '/': |
146 | return !(next == '/' || next == '*'); |
147 | default: |
148 | return false; |
149 | } |
150 | } |
151 | |
152 | |
153 | void OperatorToken::finish(std::istream& istr) |
154 | { |
155 | int next = (char) istr.peek(); |
156 | switch (_value[0]) |
157 | { |
158 | case '(': |
159 | case ')': |
160 | case '{': |
161 | case '}': |
162 | case '[': |
163 | case ']': |
164 | case ';': |
165 | case '?': |
166 | case '~': |
167 | case ',': |
168 | break; |
169 | case '.': |
170 | if (next == '.') |
171 | { |
172 | _value += (char) istr.get(); |
173 | if (istr.peek() != '.') syntaxError("." , std::string(1, (char) istr.peek())); |
174 | _value += (char) istr.get(); |
175 | } |
176 | break; |
177 | case ':': |
178 | if (next == ':') _value += (char) istr.get(); |
179 | break; |
180 | case '<': |
181 | if (next == '<') |
182 | { |
183 | _value += (char) istr.get(); |
184 | next = (char) istr.peek(); |
185 | } |
186 | if (next == '=') _value += (char) istr.get(); |
187 | break; |
188 | case '>': |
189 | if (next == '>') |
190 | { |
191 | _value += (char) istr.get(); |
192 | next = (char) istr.peek(); |
193 | } |
194 | if (next == '=') _value += (char) istr.get(); |
195 | break; |
196 | case '&': |
197 | if (next == '&' || next == '=') _value += (char) istr.get(); |
198 | break; |
199 | case '|': |
200 | if (next == '|' || next == '=') _value += (char) istr.get(); |
201 | break; |
202 | case '+': |
203 | if (next == '+' || next == '=') _value += (char) istr.get(); |
204 | break; |
205 | case '-': |
206 | if (next == '-' || next == '=' || next == '>') _value += (char) istr.get(); |
207 | break; |
208 | case '=': |
209 | case '!': |
210 | case '*': |
211 | case '/': |
212 | case '^': |
213 | case '%': |
214 | if (next == '=') _value += (char) istr.get(); |
215 | break; |
216 | default: |
217 | poco_bugcheck(); |
218 | } |
219 | } |
220 | |
221 | |
222 | int OperatorToken::asInteger() const |
223 | { |
224 | OpMap::const_iterator it = _opMap.find(_value); |
225 | if (it != _opMap.end()) |
226 | return it->second; |
227 | else |
228 | return 0; |
229 | } |
230 | |
231 | |
232 | IdentifierToken::IdentifierToken() |
233 | { |
234 | int i = 1; |
235 | _kwMap["alignas" ] = i++; |
236 | _kwMap["alignof" ] = i++; |
237 | _kwMap["and" ] = i++; |
238 | _kwMap["and_eq" ] = i++; |
239 | _kwMap["asm" ] = i++; |
240 | _kwMap["auto" ] = i++; |
241 | _kwMap["bitand" ] = i++; |
242 | _kwMap["bitor" ] = i++; |
243 | _kwMap["bool" ] = i++; |
244 | _kwMap["break" ] = i++; |
245 | _kwMap["case" ] = i++; |
246 | _kwMap["catch" ] = i++; |
247 | _kwMap["char" ] = i++; |
248 | _kwMap["char16_t" ] = i++; |
249 | _kwMap["char32_t" ] = i++; |
250 | _kwMap["class" ] = i++; |
251 | _kwMap["compl" ] = i++; |
252 | _kwMap["const" ] = i++; |
253 | _kwMap["constexpr" ] = i++; |
254 | _kwMap["const_cast" ] = i++; |
255 | _kwMap["continue" ] = i++; |
256 | _kwMap["decltype" ] = i++; |
257 | _kwMap["default" ] = i++; |
258 | _kwMap["delete" ] = i++; |
259 | _kwMap["do" ] = i++; |
260 | _kwMap["double" ] = i++; |
261 | _kwMap["dynamic_cast" ] = i++; |
262 | _kwMap["else" ] = i++; |
263 | _kwMap["enum" ] = i++; |
264 | _kwMap["explicit" ] = i++; |
265 | _kwMap["export" ] = i++; |
266 | _kwMap["extern" ] = i++; |
267 | _kwMap["false" ] = i++; |
268 | _kwMap["float" ] = i++; |
269 | _kwMap["for" ] = i++; |
270 | _kwMap["friend" ] = i++; |
271 | _kwMap["goto" ] = i++; |
272 | _kwMap["if" ] = i++; |
273 | _kwMap["inline" ] = i++; |
274 | _kwMap["int" ] = i++; |
275 | _kwMap["long" ] = i++; |
276 | _kwMap["mutable" ] = i++; |
277 | _kwMap["namespace" ] = i++; |
278 | _kwMap["new" ] = i++; |
279 | _kwMap["noexcept" ] = i++; |
280 | _kwMap["not" ] = i++; |
281 | _kwMap["not_eq" ] = i++; |
282 | _kwMap["nullptr" ] = i++; |
283 | _kwMap["operator" ] = i++; |
284 | _kwMap["or" ] = i++; |
285 | _kwMap["or_eq" ] = i++; |
286 | _kwMap["private" ] = i++; |
287 | _kwMap["protected" ] = i++; |
288 | _kwMap["public" ] = i++; |
289 | _kwMap["register" ] = i++; |
290 | _kwMap["reinterpret_cast" ] = i++; |
291 | _kwMap["return" ] = i++; |
292 | _kwMap["short" ] = i++; |
293 | _kwMap["signed" ] = i++; |
294 | _kwMap["sizeof" ] = i++; |
295 | _kwMap["static" ] = i++; |
296 | _kwMap["static_assert" ] = i++; |
297 | _kwMap["static_cast" ] = i++; |
298 | _kwMap["struct" ] = i++; |
299 | _kwMap["switch" ] = i++; |
300 | _kwMap["template" ] = i++; |
301 | _kwMap["this" ] = i++; |
302 | _kwMap["thread_local" ] = i++; |
303 | _kwMap["throw" ] = i++; |
304 | _kwMap["true" ] = i++; |
305 | _kwMap["try" ] = i++; |
306 | _kwMap["typedef" ] = i++; |
307 | _kwMap["typeid" ] = i++; |
308 | _kwMap["typename" ] = i++; |
309 | _kwMap["union" ] = i++; |
310 | _kwMap["unsigned" ] = i++; |
311 | _kwMap["using" ] = i++; |
312 | _kwMap["virtual" ] = i++; |
313 | _kwMap["void" ] = i++; |
314 | _kwMap["volatile" ] = i++; |
315 | _kwMap["wchar_t" ] = i++; |
316 | _kwMap["while" ] = i++; |
317 | _kwMap["xor" ] = i++; |
318 | _kwMap["xor_eq" ] = i++; |
319 | } |
320 | |
321 | |
322 | IdentifierToken::~IdentifierToken() |
323 | { |
324 | } |
325 | |
326 | |
327 | Token::Class IdentifierToken::tokenClass() const |
328 | { |
329 | return asInteger() ? Token::KEYWORD_TOKEN : Token::IDENTIFIER_TOKEN; |
330 | } |
331 | |
332 | |
333 | bool IdentifierToken::start(char c, std::istream& /*istr*/) |
334 | { |
335 | _value = c; |
336 | return (c >= 'A' && c <= 'Z') || |
337 | (c >= 'a' && c <= 'z') || |
338 | (c == '_' || c == '$'); |
339 | } |
340 | |
341 | |
342 | void IdentifierToken::finish(std::istream& istr) |
343 | { |
344 | int next = (char) istr.peek(); |
345 | while ((next >= 'A' && next <= 'Z') || |
346 | (next >= 'a' && next <= 'z') || |
347 | (next >= '0' && next <= '9') || |
348 | (next == '_' || next == '$')) |
349 | { |
350 | _value += (char) istr.get(); |
351 | next = istr.peek(); |
352 | } |
353 | } |
354 | |
355 | |
356 | int IdentifierToken::asInteger() const |
357 | { |
358 | KWMap::const_iterator it = _kwMap.find(_value); |
359 | if (it != _kwMap.end()) |
360 | return it->second; |
361 | else |
362 | return 0; |
363 | } |
364 | |
365 | |
366 | StringLiteralToken::StringLiteralToken() |
367 | { |
368 | } |
369 | |
370 | |
371 | StringLiteralToken::~StringLiteralToken() |
372 | { |
373 | } |
374 | |
375 | |
376 | Token::Class StringLiteralToken::tokenClass() const |
377 | { |
378 | return Token::STRING_LITERAL_TOKEN; |
379 | } |
380 | |
381 | |
382 | bool StringLiteralToken::start(char c, std::istream& /*istr*/) |
383 | { |
384 | _value = c; |
385 | return c == '"'; |
386 | } |
387 | |
388 | |
389 | void StringLiteralToken::finish(std::istream& istr) |
390 | { |
391 | int next = istr.peek(); |
392 | while (next != -1 && next != '"' && next != '\n' && next != '\r') |
393 | { |
394 | if (next == '\\') _value += (char) istr.get(); |
395 | _value += (char) istr.get(); |
396 | next = istr.peek(); |
397 | } |
398 | if (next == '"') |
399 | { |
400 | next = istr.get(); |
401 | _value += (char) next; |
402 | } |
403 | else throw SyntaxException("Unterminated string literal" ); |
404 | } |
405 | |
406 | |
407 | std::string StringLiteralToken::asString() const |
408 | { |
409 | std::string result; |
410 | std::string::const_iterator it = _value.begin(); |
411 | std::string::const_iterator end = _value.end(); |
412 | if (it != end) |
413 | { |
414 | if (*it == '"') ++it; |
415 | while (it != end && *it != '"') |
416 | { |
417 | if (*it == '\\') ++it; |
418 | if (it != end) result += *it++; |
419 | } |
420 | } |
421 | return result; |
422 | } |
423 | |
424 | |
425 | CharLiteralToken::CharLiteralToken() |
426 | { |
427 | } |
428 | |
429 | |
430 | CharLiteralToken::~CharLiteralToken() |
431 | { |
432 | } |
433 | |
434 | |
435 | Token::Class CharLiteralToken::tokenClass() const |
436 | { |
437 | return Token::CHAR_LITERAL_TOKEN; |
438 | } |
439 | |
440 | |
441 | bool CharLiteralToken::start(char c, std::istream& /*istr*/) |
442 | { |
443 | _value = c; |
444 | return c == '\''; |
445 | } |
446 | |
447 | |
448 | void CharLiteralToken::finish(std::istream& istr) |
449 | { |
450 | int next = istr.peek(); |
451 | while (next != -1 && next != '\'' && next != '\n' && next != '\r') |
452 | { |
453 | if (next == '\\') _value += (char) istr.get(); |
454 | _value += (char) istr.get(); |
455 | next = istr.peek(); |
456 | } |
457 | if (next == '\'') |
458 | { |
459 | next = istr.get(); |
460 | _value += (char) next; |
461 | } |
462 | else throw SyntaxException("Unterminated character literal" ); |
463 | } |
464 | |
465 | |
466 | char CharLiteralToken::asChar() const |
467 | { |
468 | char result('\0'); |
469 | std::string::const_iterator it = _value.begin(); |
470 | std::string::const_iterator end = _value.end(); |
471 | if (it != end) |
472 | { |
473 | if (*it == '\'') ++it; |
474 | while (it != end && *it != '\'') |
475 | { |
476 | if (*it == '\\') ++it; |
477 | if (it != end) result = *it++; |
478 | } |
479 | } |
480 | return result; |
481 | } |
482 | |
483 | |
484 | NumberLiteralToken::NumberLiteralToken(): |
485 | _isFloat(false) |
486 | { |
487 | } |
488 | |
489 | |
490 | NumberLiteralToken::~NumberLiteralToken() |
491 | { |
492 | } |
493 | |
494 | |
495 | Token::Class NumberLiteralToken::tokenClass() const |
496 | { |
497 | return _isFloat ? Token::FLOAT_LITERAL_TOKEN : Token::INTEGER_LITERAL_TOKEN; |
498 | } |
499 | |
500 | |
501 | bool NumberLiteralToken::start(char c, std::istream& istr) |
502 | { |
503 | _value = c; |
504 | int next = istr.peek(); |
505 | return (c >= '0' && c <= '9') || |
506 | (c == '.' && next >= '0' && next <= '9'); |
507 | } |
508 | |
509 | |
510 | void NumberLiteralToken::finish(std::istream& istr) |
511 | { |
512 | int next = istr.peek(); |
513 | _isFloat = false; |
514 | if (_value[0] != '.') // starts with digit |
515 | { |
516 | if (_value[0] == '0') |
517 | { |
518 | if (next == 'x' || next == 'X') |
519 | { |
520 | return finishHex(istr, next); |
521 | } |
522 | else if (next == 'b' || next == 'B') |
523 | { |
524 | return finishBin(istr, next); |
525 | } |
526 | } |
527 | while ((next >= '0' && next <= '9') || next == '\'') |
528 | { |
529 | _value += (char) istr.get(); |
530 | next = istr.peek(); |
531 | } |
532 | if (next == '.') |
533 | { |
534 | next = istr.get(); |
535 | next = istr.peek(); |
536 | if (next != '.') |
537 | { |
538 | _isFloat = true; |
539 | _value += '.'; |
540 | } |
541 | else // double period |
542 | { |
543 | istr.unget(); |
544 | } |
545 | } |
546 | } |
547 | else |
548 | { |
549 | _isFloat = true; |
550 | _value += istr.get(); |
551 | next = istr.peek(); |
552 | } |
553 | while (next >= '0' && next <= '9') |
554 | { |
555 | _value += (char) istr.get(); |
556 | next = istr.peek(); |
557 | } |
558 | if (next == 'e' || next == 'E') |
559 | { |
560 | _isFloat = true; |
561 | finishExp(istr, next); |
562 | } |
563 | finishSuffix(istr, next); |
564 | } |
565 | |
566 | |
567 | void NumberLiteralToken::finishHex(std::istream& istr, int next) |
568 | { |
569 | _value += (char) istr.get(); |
570 | next = istr.peek(); |
571 | while (std::isxdigit(next) || next == '\'') |
572 | { |
573 | _value += (char) istr.get(); |
574 | next = istr.peek(); |
575 | } |
576 | if (next == '.') |
577 | { |
578 | _isFloat = true; |
579 | _value += (char) istr.get(); |
580 | next = istr.peek(); |
581 | while (std::isxdigit(next) || next == '\'') |
582 | { |
583 | _value += (char) istr.get(); |
584 | next = istr.peek(); |
585 | } |
586 | } |
587 | if (next == 'p' || next == 'P') |
588 | { |
589 | finishExp(istr, next); |
590 | } |
591 | finishSuffix(istr, next); |
592 | } |
593 | |
594 | |
595 | void NumberLiteralToken::finishBin(std::istream& istr, int next) |
596 | { |
597 | _value += (char) istr.get(); |
598 | next = istr.peek(); |
599 | while (next == '0' || next == '1' || next == '\'') |
600 | { |
601 | _value += (char) istr.get(); |
602 | next = istr.peek(); |
603 | } |
604 | finishSuffix(istr, next); |
605 | } |
606 | |
607 | |
608 | void NumberLiteralToken::finishExp(std::istream& istr, int next) |
609 | { |
610 | _isFloat = true; |
611 | _value += (char) istr.get(); |
612 | next = istr.peek(); |
613 | if (next == '+' || next == '-') |
614 | { |
615 | _value += (char) istr.get(); |
616 | next = istr.peek(); |
617 | } |
618 | if (next >= '0' && next <= '9') |
619 | { |
620 | while (next >= '0' && next <= '9') |
621 | { |
622 | _value += (char) istr.get(); |
623 | next = istr.peek(); |
624 | } |
625 | } |
626 | else |
627 | { |
628 | std::string s(1, (char) next); |
629 | syntaxError("digit" , s); |
630 | } |
631 | } |
632 | |
633 | |
634 | void NumberLiteralToken::finishSuffix(std::istream& istr, int next) |
635 | { |
636 | if (_isFloat) |
637 | { |
638 | if (next == 'L' || next == 'l' || next == 'F' || next == 'f') |
639 | _value += (char) istr.get(); |
640 | } |
641 | else |
642 | { |
643 | while (next == 'L' || next == 'l' || next == 'U' || next == 'u') |
644 | { |
645 | _value += (char) istr.get(); |
646 | next = istr.peek(); |
647 | } |
648 | } |
649 | } |
650 | |
651 | |
652 | int NumberLiteralToken::asInteger() const |
653 | { |
654 | return static_cast<int>(std::strtol(_value.c_str(), 0, 0)); |
655 | } |
656 | |
657 | |
658 | double NumberLiteralToken::asFloat() const |
659 | { |
660 | return std::strtod(_value.c_str(), 0); |
661 | } |
662 | |
663 | |
664 | CommentToken::() |
665 | { |
666 | } |
667 | |
668 | |
669 | CommentToken::() |
670 | { |
671 | } |
672 | |
673 | |
674 | Token::Class CommentToken::() const |
675 | { |
676 | return (_value.length() > 2 && _value[2] == '/') ? Token::SPECIAL_COMMENT_TOKEN : Token::COMMENT_TOKEN; |
677 | } |
678 | |
679 | |
680 | bool CommentToken::(char c, std::istream& istr) |
681 | { |
682 | _value = c; |
683 | int next = istr.peek(); |
684 | return c == '/' && (next == '*' || next == '/'); |
685 | } |
686 | |
687 | |
688 | void CommentToken::(std::istream& istr) |
689 | { |
690 | int next = istr.peek(); |
691 | if (next == '/') |
692 | { |
693 | while (next != -1 && next != '\r' && next != '\n') |
694 | { |
695 | _value += (char) istr.get(); |
696 | next = istr.peek(); |
697 | } |
698 | } |
699 | else |
700 | { |
701 | _value += (char) istr.get(); // * |
702 | next = istr.peek(); |
703 | while (next != -1) |
704 | { |
705 | next = istr.get(); |
706 | _value += (char) next; |
707 | if (next == '*' && istr.peek() == '/') |
708 | { |
709 | _value += (char) istr.get(); |
710 | break; |
711 | } |
712 | } |
713 | } |
714 | } |
715 | |
716 | |
717 | std::string CommentToken::() const |
718 | { |
719 | if (_value.length() > 2 && _value[2] == '/') |
720 | return _value.substr(3); |
721 | else |
722 | return _value.substr(2); |
723 | } |
724 | |
725 | |
726 | PreprocessorToken::PreprocessorToken() |
727 | { |
728 | } |
729 | |
730 | |
731 | PreprocessorToken::~PreprocessorToken() |
732 | { |
733 | } |
734 | |
735 | |
736 | Token::Class PreprocessorToken::tokenClass() const |
737 | { |
738 | return Token::PREPROCESSOR_TOKEN; |
739 | } |
740 | |
741 | |
742 | bool PreprocessorToken::start(char c, std::istream& /*istr*/) |
743 | { |
744 | _value = c; |
745 | return c == '#'; |
746 | } |
747 | |
748 | |
749 | void PreprocessorToken::finish(std::istream& istr) |
750 | { |
751 | int pb = -1; |
752 | int next = istr.peek(); |
753 | while (next != -1 && next != '\r' && next != '\n') |
754 | { |
755 | if (next == '\\') |
756 | { |
757 | istr.get(); |
758 | int p = istr.peek(); |
759 | if (p == '\r') |
760 | { |
761 | istr.get(); |
762 | if (istr.peek() == '\n') |
763 | p = istr.get(); |
764 | next = p; |
765 | } |
766 | else if (p == '\n') |
767 | { |
768 | next = p; |
769 | } |
770 | else |
771 | { |
772 | pb = next; |
773 | } |
774 | } |
775 | if (next != -1) |
776 | { |
777 | _value += (char) (pb != -1 ? pb : istr.get()); |
778 | next = istr.peek(); |
779 | pb = -1; |
780 | } |
781 | } |
782 | } |
783 | |
784 | |
785 | } } // namespace Poco::CppParser |
786 | |
787 | |