1//
2// CppToken.cpp
3//
4// Library: CppParser
5// Package: CppParser
6// Module: CppToken
7//
8// Copyright (c) 2006, Applied Informatics Software Engineering GmbH.
9// and Contributors.
10//
11// SPDX-License-Identifier: BSL-1.0
12//
13
14
15#include "Poco/CppParser/CppToken.h"
16#include "Poco/Exception.h"
17#include "Poco/NumberParser.h"
18#include <cctype>
19#include <cstdlib>
20
21
22using Poco::Token;
23using Poco::SyntaxException;
24
25
26namespace Poco {
27namespace CppParser {
28
29
30CppToken::CppToken()
31{
32}
33
34
35CppToken::~CppToken()
36{
37}
38
39
40void CppToken::syntaxError(const std::string& expected, const std::string& actual)
41{
42 std::string msg("expected: ");
43 msg.append(expected);
44 msg.append(", got: ");
45 msg.append(actual);
46 throw SyntaxException(msg);
47}
48
49
50OperatorToken::OperatorToken()
51{
52 int i = 1;
53 _opMap["["] = i++;
54 _opMap["]"] = i++;
55 _opMap["("] = i++;
56 _opMap[")"] = i++;
57 _opMap["{"] = i++;
58 _opMap["}"] = i++;
59 _opMap["<"] = i++;
60 _opMap["<="] = i++;
61 _opMap["<<"] = i++;
62 _opMap["<<="] = i++;
63 _opMap[">"] = i++;
64 _opMap[">="] = i++;
65 _opMap[">>"] = i++;
66 _opMap[">>="] = i++;
67 _opMap["="] = i++;
68 _opMap["=="] = i++;
69 _opMap["!"] = i++;
70 _opMap["!="] = i++;
71 _opMap["&"] = i++;
72 _opMap["&="] = i++;
73 _opMap["&&"] = i++;
74 _opMap["|"] = i++;
75 _opMap["|="] = i++;
76 _opMap["||"] = i++;
77 _opMap["^"] = i++;
78 _opMap["^="] = i++;
79 _opMap["~"] = i++;
80 _opMap["*"] = i++;
81 _opMap["*="] = i++;
82 _opMap["/"] = i++;
83 _opMap["/="] = i++;
84 _opMap["+"] = i++;
85 _opMap["+="] = i++;
86 _opMap["++"] = i++;
87 _opMap["-"] = i++;
88 _opMap["-="] = i++;
89 _opMap["--"] = i++;
90 _opMap["->"] = i++;
91 _opMap["%"] = i++;
92 _opMap["%="] = i++;
93 _opMap[","] = i++;
94 _opMap["."] = i++;
95 _opMap["..."] = i++;
96 _opMap[":"] = i++;
97 _opMap["::"] = i++;
98 _opMap[";"] = i++;
99 _opMap["?"] = i++;
100}
101
102
103OperatorToken::~OperatorToken()
104{
105}
106
107
108Token::Class OperatorToken::tokenClass() const
109{
110 return Token::OPERATOR_TOKEN;
111}
112
113
114bool OperatorToken::start(char c, std::istream& istr)
115{
116 _value = c;
117 char next = (char) istr.peek();
118 switch (_value[0])
119 {
120 case '[':
121 case ']':
122 case '(':
123 case ')':
124 case '{':
125 case '}':
126 case '<':
127 case '>':
128 case '=':
129 case '!':
130 case '&':
131 case '|':
132 case '*':
133 case '+':
134 case '-':
135 case '^':
136 case '~':
137 case ',':
138 case ':':
139 case ';':
140 case '%':
141 case '?':
142 return true;
143 case '.':
144 return !(next >= '0' && next <= '9');
145 case '/':
146 return !(next == '/' || next == '*');
147 default:
148 return false;
149 }
150}
151
152
153void OperatorToken::finish(std::istream& istr)
154{
155 int next = (char) istr.peek();
156 switch (_value[0])
157 {
158 case '(':
159 case ')':
160 case '{':
161 case '}':
162 case '[':
163 case ']':
164 case ';':
165 case '?':
166 case '~':
167 case ',':
168 break;
169 case '.':
170 if (next == '.')
171 {
172 _value += (char) istr.get();
173 if (istr.peek() != '.') syntaxError(".", std::string(1, (char) istr.peek()));
174 _value += (char) istr.get();
175 }
176 break;
177 case ':':
178 if (next == ':') _value += (char) istr.get();
179 break;
180 case '<':
181 if (next == '<')
182 {
183 _value += (char) istr.get();
184 next = (char) istr.peek();
185 }
186 if (next == '=') _value += (char) istr.get();
187 break;
188 case '>':
189 if (next == '>')
190 {
191 _value += (char) istr.get();
192 next = (char) istr.peek();
193 }
194 if (next == '=') _value += (char) istr.get();
195 break;
196 case '&':
197 if (next == '&' || next == '=') _value += (char) istr.get();
198 break;
199 case '|':
200 if (next == '|' || next == '=') _value += (char) istr.get();
201 break;
202 case '+':
203 if (next == '+' || next == '=') _value += (char) istr.get();
204 break;
205 case '-':
206 if (next == '-' || next == '=' || next == '>') _value += (char) istr.get();
207 break;
208 case '=':
209 case '!':
210 case '*':
211 case '/':
212 case '^':
213 case '%':
214 if (next == '=') _value += (char) istr.get();
215 break;
216 default:
217 poco_bugcheck();
218 }
219}
220
221
222int OperatorToken::asInteger() const
223{
224 OpMap::const_iterator it = _opMap.find(_value);
225 if (it != _opMap.end())
226 return it->second;
227 else
228 return 0;
229}
230
231
232IdentifierToken::IdentifierToken()
233{
234 int i = 1;
235 _kwMap["alignas"] = i++;
236 _kwMap["alignof"] = i++;
237 _kwMap["and"] = i++;
238 _kwMap["and_eq"] = i++;
239 _kwMap["asm"] = i++;
240 _kwMap["auto"] = i++;
241 _kwMap["bitand"] = i++;
242 _kwMap["bitor"] = i++;
243 _kwMap["bool"] = i++;
244 _kwMap["break"] = i++;
245 _kwMap["case"] = i++;
246 _kwMap["catch"] = i++;
247 _kwMap["char"] = i++;
248 _kwMap["char16_t"] = i++;
249 _kwMap["char32_t"] = i++;
250 _kwMap["class"] = i++;
251 _kwMap["compl"] = i++;
252 _kwMap["const"] = i++;
253 _kwMap["constexpr"] = i++;
254 _kwMap["const_cast"] = i++;
255 _kwMap["continue"] = i++;
256 _kwMap["decltype"] = i++;
257 _kwMap["default"] = i++;
258 _kwMap["delete"] = i++;
259 _kwMap["do"] = i++;
260 _kwMap["double"] = i++;
261 _kwMap["dynamic_cast"] = i++;
262 _kwMap["else"] = i++;
263 _kwMap["enum"] = i++;
264 _kwMap["explicit"] = i++;
265 _kwMap["export"] = i++;
266 _kwMap["extern"] = i++;
267 _kwMap["false"] = i++;
268 _kwMap["float"] = i++;
269 _kwMap["for"] = i++;
270 _kwMap["friend"] = i++;
271 _kwMap["goto"] = i++;
272 _kwMap["if"] = i++;
273 _kwMap["inline"] = i++;
274 _kwMap["int"] = i++;
275 _kwMap["long"] = i++;
276 _kwMap["mutable"] = i++;
277 _kwMap["namespace"] = i++;
278 _kwMap["new"] = i++;
279 _kwMap["noexcept"] = i++;
280 _kwMap["not"] = i++;
281 _kwMap["not_eq"] = i++;
282 _kwMap["nullptr"] = i++;
283 _kwMap["operator"] = i++;
284 _kwMap["or"] = i++;
285 _kwMap["or_eq"] = i++;
286 _kwMap["private"] = i++;
287 _kwMap["protected"] = i++;
288 _kwMap["public"] = i++;
289 _kwMap["register"] = i++;
290 _kwMap["reinterpret_cast"] = i++;
291 _kwMap["return"] = i++;
292 _kwMap["short"] = i++;
293 _kwMap["signed"] = i++;
294 _kwMap["sizeof"] = i++;
295 _kwMap["static"] = i++;
296 _kwMap["static_assert"] = i++;
297 _kwMap["static_cast"] = i++;
298 _kwMap["struct"] = i++;
299 _kwMap["switch"] = i++;
300 _kwMap["template"] = i++;
301 _kwMap["this"] = i++;
302 _kwMap["thread_local"] = i++;
303 _kwMap["throw"] = i++;
304 _kwMap["true"] = i++;
305 _kwMap["try"] = i++;
306 _kwMap["typedef"] = i++;
307 _kwMap["typeid"] = i++;
308 _kwMap["typename"] = i++;
309 _kwMap["union"] = i++;
310 _kwMap["unsigned"] = i++;
311 _kwMap["using"] = i++;
312 _kwMap["virtual"] = i++;
313 _kwMap["void"] = i++;
314 _kwMap["volatile"] = i++;
315 _kwMap["wchar_t"] = i++;
316 _kwMap["while"] = i++;
317 _kwMap["xor"] = i++;
318 _kwMap["xor_eq"] = i++;
319}
320
321
322IdentifierToken::~IdentifierToken()
323{
324}
325
326
327Token::Class IdentifierToken::tokenClass() const
328{
329 return asInteger() ? Token::KEYWORD_TOKEN : Token::IDENTIFIER_TOKEN;
330}
331
332
333bool IdentifierToken::start(char c, std::istream& /*istr*/)
334{
335 _value = c;
336 return (c >= 'A' && c <= 'Z') ||
337 (c >= 'a' && c <= 'z') ||
338 (c == '_' || c == '$');
339}
340
341
342void IdentifierToken::finish(std::istream& istr)
343{
344 int next = (char) istr.peek();
345 while ((next >= 'A' && next <= 'Z') ||
346 (next >= 'a' && next <= 'z') ||
347 (next >= '0' && next <= '9') ||
348 (next == '_' || next == '$'))
349 {
350 _value += (char) istr.get();
351 next = istr.peek();
352 }
353}
354
355
356int IdentifierToken::asInteger() const
357{
358 KWMap::const_iterator it = _kwMap.find(_value);
359 if (it != _kwMap.end())
360 return it->second;
361 else
362 return 0;
363}
364
365
366StringLiteralToken::StringLiteralToken()
367{
368}
369
370
371StringLiteralToken::~StringLiteralToken()
372{
373}
374
375
376Token::Class StringLiteralToken::tokenClass() const
377{
378 return Token::STRING_LITERAL_TOKEN;
379}
380
381
382bool StringLiteralToken::start(char c, std::istream& /*istr*/)
383{
384 _value = c;
385 return c == '"';
386}
387
388
389void StringLiteralToken::finish(std::istream& istr)
390{
391 int next = istr.peek();
392 while (next != -1 && next != '"' && next != '\n' && next != '\r')
393 {
394 if (next == '\\') _value += (char) istr.get();
395 _value += (char) istr.get();
396 next = istr.peek();
397 }
398 if (next == '"')
399 {
400 next = istr.get();
401 _value += (char) next;
402 }
403 else throw SyntaxException("Unterminated string literal");
404}
405
406
407std::string StringLiteralToken::asString() const
408{
409 std::string result;
410 std::string::const_iterator it = _value.begin();
411 std::string::const_iterator end = _value.end();
412 if (it != end)
413 {
414 if (*it == '"') ++it;
415 while (it != end && *it != '"')
416 {
417 if (*it == '\\') ++it;
418 if (it != end) result += *it++;
419 }
420 }
421 return result;
422}
423
424
425CharLiteralToken::CharLiteralToken()
426{
427}
428
429
430CharLiteralToken::~CharLiteralToken()
431{
432}
433
434
435Token::Class CharLiteralToken::tokenClass() const
436{
437 return Token::CHAR_LITERAL_TOKEN;
438}
439
440
441bool CharLiteralToken::start(char c, std::istream& /*istr*/)
442{
443 _value = c;
444 return c == '\'';
445}
446
447
448void CharLiteralToken::finish(std::istream& istr)
449{
450 int next = istr.peek();
451 while (next != -1 && next != '\'' && next != '\n' && next != '\r')
452 {
453 if (next == '\\') _value += (char) istr.get();
454 _value += (char) istr.get();
455 next = istr.peek();
456 }
457 if (next == '\'')
458 {
459 next = istr.get();
460 _value += (char) next;
461 }
462 else throw SyntaxException("Unterminated character literal");
463}
464
465
466char CharLiteralToken::asChar() const
467{
468 char result('\0');
469 std::string::const_iterator it = _value.begin();
470 std::string::const_iterator end = _value.end();
471 if (it != end)
472 {
473 if (*it == '\'') ++it;
474 while (it != end && *it != '\'')
475 {
476 if (*it == '\\') ++it;
477 if (it != end) result = *it++;
478 }
479 }
480 return result;
481}
482
483
484NumberLiteralToken::NumberLiteralToken():
485 _isFloat(false)
486{
487}
488
489
490NumberLiteralToken::~NumberLiteralToken()
491{
492}
493
494
495Token::Class NumberLiteralToken::tokenClass() const
496{
497 return _isFloat ? Token::FLOAT_LITERAL_TOKEN : Token::INTEGER_LITERAL_TOKEN;
498}
499
500
501bool NumberLiteralToken::start(char c, std::istream& istr)
502{
503 _value = c;
504 int next = istr.peek();
505 return (c >= '0' && c <= '9') ||
506 (c == '.' && next >= '0' && next <= '9');
507}
508
509
510void NumberLiteralToken::finish(std::istream& istr)
511{
512 int next = istr.peek();
513 _isFloat = false;
514 if (_value[0] != '.') // starts with digit
515 {
516 if (_value[0] == '0')
517 {
518 if (next == 'x' || next == 'X')
519 {
520 return finishHex(istr, next);
521 }
522 else if (next == 'b' || next == 'B')
523 {
524 return finishBin(istr, next);
525 }
526 }
527 while ((next >= '0' && next <= '9') || next == '\'')
528 {
529 _value += (char) istr.get();
530 next = istr.peek();
531 }
532 if (next == '.')
533 {
534 next = istr.get();
535 next = istr.peek();
536 if (next != '.')
537 {
538 _isFloat = true;
539 _value += '.';
540 }
541 else // double period
542 {
543 istr.unget();
544 }
545 }
546 }
547 else
548 {
549 _isFloat = true;
550 _value += istr.get();
551 next = istr.peek();
552 }
553 while (next >= '0' && next <= '9')
554 {
555 _value += (char) istr.get();
556 next = istr.peek();
557 }
558 if (next == 'e' || next == 'E')
559 {
560 _isFloat = true;
561 finishExp(istr, next);
562 }
563 finishSuffix(istr, next);
564}
565
566
567void NumberLiteralToken::finishHex(std::istream& istr, int next)
568{
569 _value += (char) istr.get();
570 next = istr.peek();
571 while (std::isxdigit(next) || next == '\'')
572 {
573 _value += (char) istr.get();
574 next = istr.peek();
575 }
576 if (next == '.')
577 {
578 _isFloat = true;
579 _value += (char) istr.get();
580 next = istr.peek();
581 while (std::isxdigit(next) || next == '\'')
582 {
583 _value += (char) istr.get();
584 next = istr.peek();
585 }
586 }
587 if (next == 'p' || next == 'P')
588 {
589 finishExp(istr, next);
590 }
591 finishSuffix(istr, next);
592}
593
594
595void NumberLiteralToken::finishBin(std::istream& istr, int next)
596{
597 _value += (char) istr.get();
598 next = istr.peek();
599 while (next == '0' || next == '1' || next == '\'')
600 {
601 _value += (char) istr.get();
602 next = istr.peek();
603 }
604 finishSuffix(istr, next);
605}
606
607
608void NumberLiteralToken::finishExp(std::istream& istr, int next)
609{
610 _isFloat = true;
611 _value += (char) istr.get();
612 next = istr.peek();
613 if (next == '+' || next == '-')
614 {
615 _value += (char) istr.get();
616 next = istr.peek();
617 }
618 if (next >= '0' && next <= '9')
619 {
620 while (next >= '0' && next <= '9')
621 {
622 _value += (char) istr.get();
623 next = istr.peek();
624 }
625 }
626 else
627 {
628 std::string s(1, (char) next);
629 syntaxError("digit", s);
630 }
631 }
632
633
634void NumberLiteralToken::finishSuffix(std::istream& istr, int next)
635{
636 if (_isFloat)
637 {
638 if (next == 'L' || next == 'l' || next == 'F' || next == 'f')
639 _value += (char) istr.get();
640 }
641 else
642 {
643 while (next == 'L' || next == 'l' || next == 'U' || next == 'u')
644 {
645 _value += (char) istr.get();
646 next = istr.peek();
647 }
648 }
649}
650
651
652int NumberLiteralToken::asInteger() const
653{
654 return static_cast<int>(std::strtol(_value.c_str(), 0, 0));
655}
656
657
658double NumberLiteralToken::asFloat() const
659{
660 return std::strtod(_value.c_str(), 0);
661}
662
663
664CommentToken::CommentToken()
665{
666}
667
668
669CommentToken::~CommentToken()
670{
671}
672
673
674Token::Class CommentToken::tokenClass() const
675{
676 return (_value.length() > 2 && _value[2] == '/') ? Token::SPECIAL_COMMENT_TOKEN : Token::COMMENT_TOKEN;
677}
678
679
680bool CommentToken::start(char c, std::istream& istr)
681{
682 _value = c;
683 int next = istr.peek();
684 return c == '/' && (next == '*' || next == '/');
685}
686
687
688void CommentToken::finish(std::istream& istr)
689{
690 int next = istr.peek();
691 if (next == '/')
692 {
693 while (next != -1 && next != '\r' && next != '\n')
694 {
695 _value += (char) istr.get();
696 next = istr.peek();
697 }
698 }
699 else
700 {
701 _value += (char) istr.get(); // *
702 next = istr.peek();
703 while (next != -1)
704 {
705 next = istr.get();
706 _value += (char) next;
707 if (next == '*' && istr.peek() == '/')
708 {
709 _value += (char) istr.get();
710 break;
711 }
712 }
713 }
714}
715
716
717std::string CommentToken::asString() const
718{
719 if (_value.length() > 2 && _value[2] == '/')
720 return _value.substr(3);
721 else
722 return _value.substr(2);
723}
724
725
726PreprocessorToken::PreprocessorToken()
727{
728}
729
730
731PreprocessorToken::~PreprocessorToken()
732{
733}
734
735
736Token::Class PreprocessorToken::tokenClass() const
737{
738 return Token::PREPROCESSOR_TOKEN;
739}
740
741
742bool PreprocessorToken::start(char c, std::istream& /*istr*/)
743{
744 _value = c;
745 return c == '#';
746}
747
748
749void PreprocessorToken::finish(std::istream& istr)
750{
751 int pb = -1;
752 int next = istr.peek();
753 while (next != -1 && next != '\r' && next != '\n')
754 {
755 if (next == '\\')
756 {
757 istr.get();
758 int p = istr.peek();
759 if (p == '\r')
760 {
761 istr.get();
762 if (istr.peek() == '\n')
763 p = istr.get();
764 next = p;
765 }
766 else if (p == '\n')
767 {
768 next = p;
769 }
770 else
771 {
772 pb = next;
773 }
774 }
775 if (next != -1)
776 {
777 _value += (char) (pb != -1 ? pb : istr.get());
778 next = istr.peek();
779 pb = -1;
780 }
781 }
782}
783
784
785} } // namespace Poco::CppParser
786
787