lexer.cpp source code [SuperTux/external/sexp-cpp/src/lexer.cpp]

1	// SExp - A S-Expression Parser for C++
2	// Copyright (C) 2006 Matthias Braun <matze@braunis.de>
3	// 2015 Ingo Ruhnke <grumbel@gmail.com>
4	//
5	// This program is free software: you can redistribute it and/or modify
6	// it under the terms of the GNU General Public License as published by
7	// the Free Software Foundation, either version 3 of the License, or
8	// (at your option) any later version.
9	//
10	// This program is distributed in the hope that it will be useful,
11	// but WITHOUT ANY WARRANTY; without even the implied warranty of
12	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	// GNU General Public License for more details.
14	//
15	// You should have received a copy of the GNU General Public License
16	// along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18	#include "sexp/lexer.hpp"
19
20	#include <assert.h>
21	#include <string.h>
22	#include <sstream>
23	#include <stdexcept>
24	#include <stdio.h>
25
26	namespace sexp {
27
28	Lexer::Lexer(std::istream& newstream, bool use_arrays) :
29	m_stream(newstream),
30	m_use_arrays(use_arrays),
31	m_eof(false),
32	m_linenumber(`0`),
33	m_bufend(),
34	m_bufpos(),
35	m_c(),
36	m_token_string ()
37	{
38	// trigger a refill of the buffer
39	m_bufpos = nullptr;
40	m_bufend = nullptr;
41	next_char();
42	}
43
44	Lexer::~Lexer()
45	{
46	}
47
48	void
49	Lexer::next_char()
50	{
51	if (m_bufpos >= m_bufend \|\| (m_bufpos == nullptr && m_bufend == nullptr) / Initial refill trigger /) {
52	if (m_eof) {
53	m_c = EOF;
54	return;
55	}
56	m_stream.read(m_buffer, BUFFER_SIZE);
57	std::streamsize bytes_read = m_stream.gcount();
58
59	m_bufpos = m_buffer;
60	m_bufend = m_buffer + bytes_read;
61
62	// the following is a hack that appends an additional ' ' at the end of
63	// the file to avoid problems when parsing symbols/elements and a sudden
64	// EOF. This is faster than relying on unget and IMO also nicer.
65	if (bytes_read == `0` \|\| m_stream.eof()) {
66	m_eof = true;
67	*m_bufend = `' '`;
68	++m_bufend;
69	}
70	}
71
72	if (m_bufpos == nullptr) {
73	return;
74	}
75
76	m_c = *m_bufpos++;
77	if (m_c == `'\n'`) {
78	++m_linenumber;
79	}
80	}
81
82	void
83	Lexer::add_char()
84	{
85	m_token_string += static_cast<char>(m_c);
86	next_char();
87	}
88
89	Lexer::TokenType
90	Lexer::get_next_token()
91	{
92	static const char* delims = "\"();";
93
94	while(isspace(m_c)) {
95	next_char();
96	}
97
98	m_token_string.clear();
99
100	switch(m_c)
101	{
102	case `';'`: // comment
103	while(m_c != `'\n'`) {
104	next_char();
105	}
106	return get_next_token(); // and again
107
108	case `'('`:
109	next_char();
110	if (m_use_arrays)
111	{
112	return TOKEN_ARRAY_START;
113	}
114	else
115	{
116	return TOKEN_OPEN_PAREN;
117	}
118	case `')'`:
119	next_char();
120	return TOKEN_CLOSE_PAREN;
121
122	case `'"'`: { // string
123	int startline = m_linenumber;
124	while(`1`) {
125	next_char();
126	switch(m_c) {
127	case `'"'`:
128	next_char();
129	goto string_finished;
130	case `'\r'`:
131	continue;
132	case `'\n'`:
133	break;
134	case `'\\'`:
135	next_char();
136	switch(m_c) {
137	case `'n'`:
138	m_c = `'\n'`;
139	break;
140	case `'t'`:
141	m_c = `'\t'`;
142	break;
143	}
144	break;
145	case EOF: {
146	std::stringstream msg;
147	msg << "Parse error in line " << startline << ": "
148	<< "EOF while parsing string.";
149	throw std::runtime_error (msg.str());
150	}
151	default:
152	break;
153	}
154	m_token_string += static_cast<char>(m_c);
155	}
156	string_finished:
157	return TOKEN_STRING;
158	}
159	case `'#'`: // constant
160	next_char();
161
162	if (m_c == `'('`)
163	{
164	next_char();
165	return TOKEN_ARRAY_START;
166	}
167	else
168	{
169	while(isalnum(m_c) \|\| m_c == `'_'`) {
170	add_char();
171	}
172
173	if (m_token_string == "t")
174	{
175	return TOKEN_TRUE;
176	}
177	else if (m_token_string == "f")
178	{
179	return TOKEN_FALSE;
180	}
181	else
182	{
183	// we only handle #t and #f constants at the moment...
184	std::stringstream msg;
185	msg << "Parse Error in line " << m_linenumber << ": "
186	<< "Unknown constant '" << m_token_string << "'.";
187	throw std::runtime_error (msg.str());
188	}
189	}
190
191	case EOF:
192	return TOKEN_EOF;
193
194	default:
195	{
196	enum {
197	STATE_INIT,
198	STATE_SYMBOL,
199	STATE_MAYBE_DOT,
200	STATE_MAYBE_INTEGER_SIGN,
201	STATE_MAYBE_INTEGER_PART,
202	STATE_MAYBE_FRACTIONAL_START,
203	STATE_MAYBE_FRACTIONAL_PART,
204	STATE_MAYBE_EXPONENT_SIGN,
205	STATE_MAYBE_EXPONENT_START,
206	STATE_MAYBE_EXPONENT_PART,
207	} state = STATE_INIT;
208
209	bool has_integer_part = false;
210	bool has_fractional_part = false;
211	do
212	{
213	switch(state)
214	{
215	case STATE_INIT:
216	if (isdigit(m_c)) {
217	has_integer_part = true;
218	state = STATE_MAYBE_INTEGER_PART;
219	} else if (m_c == `'-'` \|\| m_c == `'+'`) {
220	state = STATE_MAYBE_INTEGER_SIGN;
221	} else if (m_c == `'.'`) {
222	state = STATE_MAYBE_DOT;
223	} else {
224	state = STATE_SYMBOL;
225	}
226	break;
227
228	case STATE_SYMBOL:
229	break;
230
231	case STATE_MAYBE_DOT:
232	if (isdigit(m_c)) {
233	state = STATE_MAYBE_FRACTIONAL_START;
234	} else {
235	state = STATE_SYMBOL;
236	}
237	break;
238
239	case STATE_MAYBE_INTEGER_SIGN:
240	if (isdigit(m_c)) {
241	has_integer_part = true;
242	state = STATE_MAYBE_INTEGER_PART;
243	} else if (m_c == `'.'`) {
244	state = STATE_MAYBE_FRACTIONAL_START;
245	}
246	break;
247
248	case STATE_MAYBE_INTEGER_PART:
249	if (isdigit(m_c)) {
250	// loop
251	} else if (m_c == `'.'`) {
252	state = STATE_MAYBE_FRACTIONAL_START;
253	} else if (m_c == `'e'` \|\| m_c == `'E'`) {
254	state = STATE_MAYBE_EXPONENT_SIGN;
255	} else {
256	state = STATE_SYMBOL;
257	}
258	break;
259
260	case STATE_MAYBE_FRACTIONAL_START:
261	if (isdigit(m_c)) {
262	has_fractional_part = true;
263	state = STATE_MAYBE_FRACTIONAL_PART;
264	} else {
265	state = STATE_SYMBOL;
266	}
267	break;
268
269	case STATE_MAYBE_FRACTIONAL_PART:
270	if (isdigit(m_c)) {
271	// loop
272	} else if ((has_integer_part \|\| has_fractional_part) &&
273	(m_c == `'e'` \|\| m_c == `'E'`)) {
274	state = STATE_MAYBE_EXPONENT_SIGN;
275	} else {
276	state = STATE_SYMBOL;
277	}
278	break;
279
280	case STATE_MAYBE_EXPONENT_SIGN:
281	if (m_c == `'-'` \|\| m_c == `'+'`) {
282	state = STATE_MAYBE_EXPONENT_START;
283	} else if (isdigit(m_c)) {
284	state = STATE_MAYBE_EXPONENT_PART;
285	} else {
286	state = STATE_SYMBOL;
287	}
288	break;
289
290	case STATE_MAYBE_EXPONENT_START:
291	if (isdigit(m_c)) {
292	state = STATE_MAYBE_EXPONENT_PART;
293	} else {
294	state = STATE_SYMBOL;
295	}
296	break;
297
298	case STATE_MAYBE_EXPONENT_PART:
299	if (isdigit(m_c)) {
300	// loop
301	} else {
302	state = STATE_SYMBOL;
303	}
304	break;
305	}
306
307	add_char();
308	}
309	while(!isspace(m_c) && !strchr(delims, m_c));
310
311	switch(state)
312	{
313	case STATE_INIT:
314	assert(false && "never reached");
315	return TOKEN_EOF;
316
317	case STATE_SYMBOL:
318	return TOKEN_SYMBOL;
319
320	case STATE_MAYBE_DOT:
321	return TOKEN_DOT;
322
323	case STATE_MAYBE_INTEGER_SIGN:
324	return TOKEN_SYMBOL;
325
326	case STATE_MAYBE_INTEGER_PART:
327	return TOKEN_INTEGER;
328
329	case STATE_MAYBE_FRACTIONAL_START:
330	if (has_integer_part) {
331	return TOKEN_REAL;
332	} else {
333	return TOKEN_SYMBOL;
334	}
335
336	case STATE_MAYBE_FRACTIONAL_PART:
337	return TOKEN_REAL;
338
339	case STATE_MAYBE_EXPONENT_SIGN:
340	case STATE_MAYBE_EXPONENT_START:
341	return TOKEN_SYMBOL;
342
343	case STATE_MAYBE_EXPONENT_PART:
344	return TOKEN_REAL;
345	}
346	}
347	assert(false && "never reached");
348	return TOKEN_EOF;
349	}
350	}
351
352	} // namespace sexp
353
354	/ EOF /
355

Browse the source code of SuperTux/external/sexp-cpp/src/lexer.cpp