1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 *
6 * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V.
7 */
8
9/* (c): M. L. Kersten
10*/
11
12#include "monetdb_config.h"
13#include "mal_parser.h"
14#include "mal_resolve.h"
15#include "mal_linker.h"
16#include "mal_atom.h" /* for malAtomDefinition(), malAtomProperty() */
17#include "mal_interpreter.h" /* for showErrors() */
18#include "mal_instruction.h" /* for pushEndInstruction(), findVariableLength() */
19#include "mal_namespace.h"
20#include "mal_utils.h"
21#include "mal_builder.h"
22#include "mal_type.h"
23#include "mal_private.h"
24
25#define FATALINPUT MAXERRORS+1
26#define NL(X) ((X)=='\n' || (X)=='\r')
27
28static str idCopy(Client cntxt, int len);
29static str strCopy(Client cntxt, int len);
30
31/*
32 * For error reporting we may have to find the start of the previous line,
33 * which, ofcourse, is easy given the client buffer.
34 * The remaining functions are self-explanatory.
35*/
36static str
37lastline(Client cntxt)
38{
39 str s = CURRENT(cntxt);
40 if (NL(*s))
41 s++;
42 while (s > cntxt->fdin->buf && !NL(*s))
43 s--;
44 if (NL(*s))
45 s++;
46 return s;
47}
48
49static ssize_t
50position(Client cntxt)
51{
52 str s = lastline(cntxt);
53 return (ssize_t) (CURRENT(cntxt) - s);
54}
55
56/*
57 * Upon encountering an error we skip to the nearest semicolon,
58 * or comment terminated by a new line
59 */
60static inline void
61skipToEnd(Client cntxt)
62{
63 char c;
64 while ((c = *CURRENT(cntxt)) != ';' && c && c != '\n')
65 nextChar(cntxt);
66 if (c && c != '\n')
67 nextChar(cntxt);
68}
69
70/*
71 * Keep on syntax error for reflection and correction.
72 */
73static void
74parseError(Client cntxt, str msg)
75{
76 MalBlkPtr mb = cntxt->curprg->def;
77 char *old, *new;
78 char buf[1028]={0};
79 char *s = buf, *t, *line="", *marker="";
80 char *l = lastline(cntxt);
81 ssize_t i;
82
83 s= buf;
84 for (t = l; *t && *t != '\n' && s < buf+sizeof(buf)-4; t++) {
85 *s++ = *t;
86 }
87 *s++ = '\n';
88 *s = 0;
89 line = createException( SYNTAX, "parseError", "%s", buf);
90
91 /* produce the position marker*/
92 s= buf;
93 i = position(cntxt);
94 for (; i > 0 && s < buf+sizeof(buf)-4; i--) {
95 *s++ = ((l && *(l + 1) && *l++ != '\t')) ? ' ' : '\t';
96 }
97 *s++ = '^';
98 *s = 0;
99 marker = createException( SYNTAX, "parseError", "%s%s", buf,msg);
100
101 old = mb->errors;
102 new = GDKzalloc((old? strlen(old):0) + strlen(line) + strlen(marker) + 64);
103 if (new == NULL){
104 freeException(line);
105 freeException(marker);
106 skipToEnd(cntxt);
107 return ; // just stick to old error message
108 }
109 if (old){
110 strcpy(new, old);
111 GDKfree(old);
112 }
113 strcat(new,line);
114 strcat(new,marker);
115
116 mb->errors = new;
117 freeException(line);
118 freeException(marker);
119 skipToEnd(cntxt);
120}
121/* Before a line is parsed we check for a request to echo it.
122 * This command should be executed at the beginning of a parse
123 * request and each time we encounter EOL.
124*/
125static void
126echoInput(Client cntxt)
127{
128 char *c = CURRENT(cntxt);
129 if (cntxt->listing == 1 && *c && !NL(*c)) {
130 mnstr_printf(cntxt->fdout,"#");
131 while (*c && !NL(*c)) {
132 mnstr_printf(cntxt->fdout, "%c", *c++);
133 }
134 mnstr_printf(cntxt->fdout, "\n");
135 }
136}
137
138static inline void
139skipSpace(Client cntxt)
140{
141 char *s= &currChar(cntxt);
142 for (;;) {
143 switch (*s++) {
144 case ' ':
145 case '\t':
146 case '\n':
147 case '\r':
148 nextChar(cntxt);
149 break;
150 default:
151 return;
152 }
153 }
154}
155
156static inline void
157advance(Client cntxt, size_t length)
158{
159 cntxt->yycur += length;
160 skipSpace(cntxt);
161}
162
163/*
164 * The most recurring situation is to recognize identifiers.
165 * This process is split into a few steps to simplify subsequent
166 * construction and comparison.
167 * IdLength searches the end of an identifier without changing
168 * the cursor into the input pool.
169 * IdCopy subsequently prepares a GDK string for inclusion in the
170 * instruction datastructures.
171*/
172
173short opCharacter[256];
174short idCharacter[256];
175short idCharacter2[256];
176
177void
178initParser(void)
179{
180 int i;
181
182 for (i = 0; i < 256; i++) {
183 idCharacter2[i] = isalnum(i);
184 idCharacter[i] = isalpha(i);
185 }
186 for (i = 0; i < 256; i++)
187 switch (i) {
188 case '-': case '!': case '\\': case '$': case '%':
189 case '^': case '*': case '~': case '+': case '&':
190 case '|': case '<': case '>': case '=': case '/':
191 case ':':
192 opCharacter[i] = 1;
193 }
194
195 idCharacter[TMPMARKER] = 1;
196 idCharacter2[TMPMARKER] = 1;
197 idCharacter2['@'] = 1;
198}
199
200static int
201idLength(Client cntxt)
202{
203 str s,t;
204 int len = 0;
205
206 skipSpace(cntxt);
207 s = CURRENT(cntxt);
208 t = s;
209
210 if (!idCharacter[(unsigned char) (*s)])
211 return 0;
212 /* avoid a clash with old temporaries */
213 if (s[0] == TMPMARKER)
214 s[0] = REFMARKER;
215 /* prepare escape of temporary names */
216 s++;
217 while (len < IDLENGTH && idCharacter2[(unsigned char) (*s)]){
218 s++;
219 len++;
220 }
221 if( len == IDLENGTH)
222 // skip remainder
223 while (idCharacter2[(unsigned char) (*s)])
224 s++;
225 return (int) (s-t);;
226}
227
228/* Simple type identifiers can not be marked with a type variable. */
229static size_t
230typeidLength(Client cntxt)
231{
232 size_t l;
233 char id[IDLENGTH], *t= id;
234 str s;
235 skipSpace(cntxt);
236 s = CURRENT(cntxt);
237
238 if (!idCharacter[(unsigned char) (*s)])
239 return 0;
240 l = 1;
241 *t++ = *s++;
242 while (l < IDLENGTH && (idCharacter[(unsigned char) (*s)] || isdigit((unsigned char) *s)) ) {
243 *t++ = *s++;
244 l++;
245 }
246 /* recognize the special type variables {any, any_<nr>} */
247 if( strncmp(id, "any",3) == 0)
248 return 3;
249 if( strncmp(id, "any_",4) == 0)
250 return 4;
251 return l;
252}
253
254static str
255idCopy(Client cntxt, int length)
256{
257 str s = GDKmalloc(length + 1);
258 if (s == NULL)
259 return NULL;
260 memcpy(s, CURRENT(cntxt), (size_t) length);
261 s[length] = 0;
262 /* avoid a clash with old temporaries */
263 advance(cntxt, length);
264 return s;
265}
266
267static int
268MALlookahead(Client cntxt, str kw, int length)
269{
270 int i;
271
272 /* avoid double test or use lowercase only. */
273 if (currChar(cntxt) == *kw &&
274 strncmp(CURRENT(cntxt), kw, length) == 0 &&
275 !idCharacter[(unsigned char) (CURRENT(cntxt)[length])] &&
276 !isdigit((unsigned char) (CURRENT(cntxt)[length]))) {
277 return 1;
278 }
279 /* check for captialized versions */
280 for (i = 0; i < length; i++)
281 if (tolower(CURRENT(cntxt)[i]) != kw[i])
282 return 0;
283 if (!idCharacter[(unsigned char) (CURRENT(cntxt)[length])] &&
284 !isdigit((unsigned char) (CURRENT(cntxt)[length]))) {
285 return 1;
286 }
287 return 0;
288}
289
290static inline int
291MALkeyword(Client cntxt, str kw, int length)
292{
293 skipSpace(cntxt);
294 if (MALlookahead(cntxt, kw, length)) {
295 advance(cntxt, length);
296 return 1;
297 }
298 return 0;
299}
300
301/*
302 * Keyphrase testing is limited to a few characters only
303 * (check manually). To speed this up we use a pipelined and inline macros.
304*/
305
306static inline int
307keyphrase1(Client cntxt, str kw)
308{
309 skipSpace(cntxt);
310 if (currChar(cntxt) == *kw) {
311 advance(cntxt, 1);
312 return 1;
313 }
314 return 0;
315}
316
317static inline int
318keyphrase2(Client cntxt, str kw)
319{
320 skipSpace(cntxt);
321 if (CURRENT(cntxt)[0] == kw[0] && CURRENT(cntxt)[1] == kw[1]) {
322 advance(cntxt, 2);
323 return 1;
324 }
325 return 0;
326}
327
328/*
329 * A similar approach is used for string literals.
330 * Beware, string lengths returned include the
331 * brackets and escapes. They are eaten away in strCopy.
332 * We should provide the C-method to split strings and
333 * concatenate them upon retrieval[todo]
334*/
335static int
336stringLength(Client cntxt)
337{
338 int l = 0;
339 int quote = 0;
340 str s;
341 skipSpace(cntxt);
342 s = CURRENT(cntxt);
343
344 if (*s != '"')
345 return 0;
346 for (s++;*s; l++, s++) {
347 if (quote) {
348 quote = 0;
349 } else {
350 if (*s == '"')
351 break;
352 quote = *s == '\\';
353 }
354 }
355 return l + 2;
356}
357
358/*Beware, the idcmp routine uses a short cast to compare multiple bytes
359 * at once. This may cause problems when the net string length is zero.
360*/
361
362str
363strCopy(Client cntxt, int length)
364{
365 str s;
366 int i;
367
368 i = length < 4 ? 4 : length;
369 s = GDKmalloc(i);
370 if (s == 0)
371 return NULL;
372 memcpy(s, CURRENT(cntxt) + 1, (size_t) (length - 2));
373 s[length-2] = 0;
374 mal_unquote(s);
375 return s;
376}
377
378/*
379 * And a similar approach is used for operator names.
380 * A lookup table is considered, because it generally is
381 * faster then a non-dense switch.
382*/
383static int
384operatorLength(Client cntxt)
385{
386 int l = 0;
387 str s;
388
389 skipSpace(cntxt);
390 for (s = CURRENT(cntxt); *s; s++) {
391 if (opCharacter[(unsigned char) (*s)])
392 l++;
393 else
394 return l;
395 }
396 return l;
397}
398
399/*
400 * The lexical analyser for constants is a little more complex.
401 * Aside from getting its length, we need an indication of its type.
402 * The constant structure is initialized for later use.
403 */
404static int
405cstToken(Client cntxt, ValPtr cst)
406{
407 int i = 0;
408 int hex = 0;
409 str s = CURRENT(cntxt);
410
411 cst->vtype = TYPE_int;
412 cst->val.lval = 0;
413 switch (*s) {
414 case '{': case '[':
415 /* JSON Literal */
416 break;
417 case '"':
418 cst->vtype = TYPE_str;
419 i = stringLength(cntxt);
420 cst->val.sval = strCopy(cntxt, i);
421 if (cst->val.sval)
422 cst->len = strlen(cst->val.sval);
423 else
424 cst->len = 0;
425 return i;
426 case '-':
427 i++;
428 s++;
429 /* fall through */
430 case '0':
431 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
432 /* deal with hex */
433 hex = TRUE;
434 i += 2;
435 s += 2;
436 }
437 /* fall through */
438 case '1': case '2': case '3': case '4': case '5':
439 case '6': case '7': case '8': case '9':
440 if (hex) {
441 while (isxdigit((unsigned char) *s)) {
442 i++;
443 s++;
444 }
445 goto handleInts;
446 } else
447 while (isdigit((unsigned char) *s)) {
448 i++;
449 s++;
450 }
451
452 /* fall through */
453 case '.':
454 if (*s == '.' && isdigit((unsigned char) *(s + 1))) {
455 i++;
456 s++;
457 while (isdigit((unsigned char) *s)) {
458 i++;
459 s++;
460 }
461 cst->vtype = TYPE_dbl;
462 }
463 if (*s == 'e' || *s == 'E') {
464 i++;
465 s++;
466 if (*s == '-' || *s == '+') {
467 i++;
468 s++;
469 }
470 cst->vtype = TYPE_dbl;
471 while (isdigit((unsigned char) *s)) {
472 i++;
473 s++;
474 }
475 }
476 if (cst->vtype == TYPE_flt) {
477 size_t len = sizeof(flt);
478 float *pval = &cst->val.fval;
479 if (fltFromStr(CURRENT(cntxt), &len, &pval, true) < 0) {
480 parseError(cntxt, GDKerrbuf);
481 return i;
482 }
483 }
484 if (cst->vtype == TYPE_dbl) {
485 size_t len = sizeof(dbl);
486 double *pval = &cst->val.dval;
487 if (dblFromStr(CURRENT(cntxt), &len, &pval, true) < 0) {
488 parseError(cntxt, GDKerrbuf);
489 return i;
490 }
491 }
492 if (*s == '@') {
493 size_t len = sizeof(lng);
494 lng l, *pval = &l;
495 if (lngFromStr(CURRENT(cntxt), &len, &pval, true) < 0) {
496 parseError(cntxt, GDKerrbuf);
497 return i;
498 }
499 if (is_lng_nil(l) || l < 0
500#if SIZEOF_OID < SIZEOF_LNG
501 || l > GDK_oid_max
502#endif
503 )
504 cst->val.oval = oid_nil;
505 else
506 cst->val.oval = (oid) l;
507 cst->vtype = TYPE_oid;
508 i++;
509 s++;
510 while (isdigit((unsigned char) *s)) {
511 i++;
512 s++;
513 }
514 return i;
515 }
516 if (*s == 'L') {
517 if (cst->vtype == TYPE_int)
518 cst->vtype = TYPE_lng;
519 if (cst->vtype == TYPE_flt)
520 cst->vtype = TYPE_dbl;
521 i++;
522 s++;
523 if (*s == 'L') {
524 i++;
525 s++;
526 }
527 if (cst->vtype == TYPE_dbl) {
528 size_t len = sizeof(dbl);
529 dbl *pval = &cst->val.dval;
530 if (dblFromStr(CURRENT(cntxt), &len, &pval, true) < 0) {
531 parseError(cntxt, GDKerrbuf);
532 return i;
533 }
534 } else {
535 size_t len = sizeof(lng);
536 lng *pval = &cst->val.lval;
537 if (lngFromStr(CURRENT(cntxt), &len, &pval, true) < 0) {
538 parseError(cntxt, GDKerrbuf);
539 return i;
540 }
541 }
542 return i;
543 }
544#ifdef HAVE_HGE
545 if (*s == 'H' && cst->vtype == TYPE_int) {
546 size_t len = sizeof(hge);
547 hge *pval = &cst->val.hval;
548 cst->vtype = TYPE_hge;
549 i++;
550 s++;
551 if (*s == 'H') {
552 i++;
553 s++;
554 }
555 if (hgeFromStr(CURRENT(cntxt), &len, &pval, true) < 0) {
556 parseError(cntxt, GDKerrbuf);
557 return i;
558 }
559 return i;
560 }
561#endif
562handleInts:
563 assert(cst->vtype != TYPE_lng);
564#ifdef HAVE_HGE
565 assert(cst->vtype != TYPE_hge);
566#endif
567 if (cst->vtype == TYPE_int) {
568#ifdef HAVE_HGE
569 size_t len = sizeof(hge);
570 hge l, *pval = &l;
571 if (hgeFromStr(CURRENT(cntxt), &len, &pval, true) < 0)
572 l = hge_nil;
573
574 if ((hge) GDK_int_min <= l && l <= (hge) GDK_int_max) {
575 cst->vtype = TYPE_int;
576 cst->val.ival = (int) l;
577 } else
578 if ((hge) GDK_lng_min <= l && l <= (hge) GDK_lng_max) {
579 cst->vtype = TYPE_lng;
580 cst->val.lval = (lng) l;
581 } else {
582 cst->vtype = TYPE_hge;
583 cst->val.hval = l;
584 if (is_hge_nil(l))
585 parseError(cntxt, "convertConstant: integer parse error\n");
586 }
587#else
588 size_t len = sizeof(lng);
589 lng l, *pval = &l;
590 if (lngFromStr(CURRENT(cntxt), &len, &pval, true) < 0)
591 l = lng_nil;
592
593 if ((lng) GDK_int_min <= l && l <= (lng) GDK_int_max) {
594 cst->vtype = TYPE_int;
595 cst->val.ival = (int) l;
596 } else {
597 cst->vtype = TYPE_lng;
598 cst->val.lval = l;
599 if (is_lng_nil(l))
600 parseError(cntxt, "convertConstant: integer parse error\n");
601 }
602#endif
603 }
604 return i;
605
606 case 'f':
607 if (strncmp(s, "false", 5) == 0 && !isalnum((unsigned char) *(s + 5)) &&
608 *(s + 5) != '_') {
609 cst->vtype = TYPE_bit;
610 cst->val.btval = 0;
611 cst->len = 1;
612 return 5;
613 }
614 return 0;
615 case 't':
616 if (strncmp(s, "true", 4) == 0 && !isalnum((unsigned char) *(s + 4)) &&
617 *(s + 4) != '_') {
618 cst->vtype = TYPE_bit;
619 cst->val.btval = 1;
620 cst->len = 1;
621 return 4;
622 }
623 return 0;
624 case 'n':
625 if (strncmp(s, "nil", 3) == 0 && !isalnum((unsigned char) *(s + 3)) &&
626 *(s + 3) != '_') {
627 cst->vtype = TYPE_void;
628 cst->len = 0;
629 cst->val.oval = oid_nil;
630 return 3;
631 }
632 }
633 return 0;
634}
635
636#define cstCopy(C,I) idCopy(C,I)
637
638/* Type qualifier
639 * Types are recognized as identifiers preceded by a colon.
640 *
641 * The type ANY matches any type specifier.
642 * Appending it with an alias turns it into a type variable.
643 * The type alias is \$DIGIT (1-9) and can be used to relate types
644 * by type equality.
645 * The type variable are defined within the context of a function
646 * scope.
647 * Additional information, such as a repetition factor,
648 * encoding tables, or type dependency should be modeled as properties.
649 *
650 * It would make more sense for tpe parameter to be an int, but simpleTypeId returns a size_t
651 */
652static int
653typeAlias(Client cntxt, int tpe)
654{
655 int t;
656
657 if (tpe != TYPE_any)
658 return -1;
659 if (currChar(cntxt) == TMPMARKER) {
660 nextChar(cntxt);
661 t = currChar(cntxt) - '0';
662 if (t <= 0 || t > 9)
663 parseError(cntxt, "[1-9] expected\n");
664 else
665 nextChar(cntxt);
666 return t;
667 }
668 return -1;
669}
670
671/*
672 * The simple type analysis currently assumes a proper type identifier.
673 * We should change getMALtype to return a failure instead.
674 */
675static int
676simpleTypeId(Client cntxt)
677{
678 int tpe;
679 size_t l;
680
681 nextChar(cntxt);
682 l = typeidLength(cntxt);
683 if (l == 0) {
684 parseError(cntxt, "Type identifier expected\n");
685 cntxt->yycur--; /* keep it */
686 return -1;
687 }
688 tpe = getAtomIndex(CURRENT(cntxt), l, -1);
689 if (tpe < 0) {
690 parseError(cntxt, "Type identifier expected\n");
691 cntxt->yycur -= l; /* keep it */
692 return TYPE_void;
693 }
694 advance(cntxt, l);
695 return tpe;
696}
697
698static int
699parseTypeId(Client cntxt, int defaultType)
700{
701 int i = TYPE_any, kt = 0;
702 char *s = CURRENT(cntxt);
703 int tt;
704
705 if (s[0] == ':' && s[1] == 'b' && s[2] == 'a' && s[3] == 't' && s[4] == '[') {
706 /* parse :bat[:oid,:type] */
707 advance(cntxt, 5);
708 if (currChar(cntxt) == ':') {
709 tt = simpleTypeId(cntxt);
710 kt = typeAlias(cntxt, tt);
711 } else{
712 parseError(cntxt, "':bat[:any]' expected\n");
713 return TYPE_bat;
714 }
715
716 i = newBatType(tt);
717 if (kt > 0)
718 setTypeIndex(i, kt);
719
720 if (currChar(cntxt) != ']')
721 parseError(cntxt, "']' expected\n");
722 nextChar(cntxt); // skip ']'
723 skipSpace(cntxt);
724 return i;
725 }
726 if (currChar(cntxt) == ':') {
727 tt = simpleTypeId(cntxt);
728 kt = typeAlias(cntxt, tt);
729 if (kt > 0)
730 setTypeIndex(tt, kt);
731 return tt;
732 }
733 parseError(cntxt, "<type identifier> expected\n");
734 return defaultType;
735}
736
737static inline int
738typeElm(Client cntxt, int def)
739{
740 if (currChar(cntxt) != ':')
741 return def; /* no type qualifier */
742 return parseTypeId(cntxt, def);
743}
744
745 /*
746 * The Parser
747 * The client is responsible to collect the
748 * input for parsing in a single string before calling the parser.
749 * Once the input is available parsing runs in a critial section for
750 * a single client thread.
751 *
752 * The parser uses the rigid structure of the language to speedup
753 * analysis. In particular, each input line is translated into
754 * a MAL instruction record as quickly as possible. Its context is
755 * manipulated during the parsing process, by keeping the curPrg,
756 * curBlk, and curInstr variables.
757 *
758 * The language statements of the parser are gradually introduced, with
759 * the overall integration framework last.
760 * The convention is to return a zero when an error has been
761 * reported or when the structure can not be recognized.
762 * Furthermore, we assume that blancs have been skipped before entering
763 * recognition of a new token.
764 *
765 * Module statement.
766 * The module and import commands have immediate effect.
767 * The module statement switches the location for symbol table update
768 * to a specific named area. The effect is that all definitions may become
769 * globally known (?) and symbol table should be temporarilly locked
770 * for updates by concurrent users.
771 *
772 * @multitable @columnfractions 0.15 0.8
773 * @item moduleStmt
774 * @tab : @sc{atom} ident [':'ident]
775 * @item
776 * @tab | @sc{module} ident
777 * @end multitable
778 *
779 * An atom statement does not introduce a new module.
780*/
781static void
782helpInfo(Client cntxt, str *help)
783{
784 int l;
785
786 if (MALkeyword(cntxt, "comment", 7)) {
787 skipSpace(cntxt);
788 if ((l = stringLength(cntxt))) {
789 GDKfree(*help);
790 *help = strCopy(cntxt, l);
791 if (*help)
792 advance(cntxt, l - 1);
793 skipToEnd(cntxt);
794 } else {
795 parseError(cntxt, "<string> expected\n");
796 }
797 } else if (currChar(cntxt) != ';')
798 parseError(cntxt, "';' expected\n");
799}
800
801static InstrPtr
802binding(Client cntxt, MalBlkPtr curBlk, InstrPtr curInstr, int flag)
803{
804 int l, varid = -1;
805 malType type;
806
807 l = idLength(cntxt);
808 if (l > 0) {
809 varid = findVariableLength(curBlk, CURRENT(cntxt), l);
810 if (varid < 0) {
811 varid = newVariable(curBlk, CURRENT(cntxt), l, TYPE_any);
812 advance(cntxt, l);
813 if ( varid < 0)
814 return curInstr;
815 type = typeElm(cntxt, TYPE_any);
816 if (isPolymorphic(type))
817 setPolymorphic(curInstr, type, TRUE);
818 setVarType(curBlk, varid, type);
819 } else if (flag) {
820 parseError(cntxt, "Argument defined twice\n");
821 typeElm(cntxt, getVarType(curBlk, varid));
822 } else {
823 advance(cntxt, l);
824 type = typeElm(cntxt, getVarType(curBlk, varid));
825 if( type != getVarType(curBlk,varid))
826 parseError(cntxt, "Incompatible argument type\n");
827 if (isPolymorphic(type))
828 setPolymorphic(curInstr, type, TRUE);
829 setVarType(curBlk, varid, type);
830 }
831 } else if (currChar(cntxt) == ':') {
832 type = typeElm(cntxt, TYPE_any);
833 varid = newTmpVariable(curBlk, type);
834 if ( varid < 0)
835 return curInstr;
836 if ( isPolymorphic(type))
837 setPolymorphic(curInstr, type, TRUE);
838 setVarType(curBlk, varid, type);
839 } else {
840 parseError(cntxt, "argument expected\n");
841 return curInstr;
842 }
843 if( varid >=0)
844 curInstr = pushArgument(curBlk, curInstr, varid);
845 return curInstr;
846}
847
848/*
849 * At this stage the LHS part has been parsed and the destination
850 * variables have been set. Next step is to parse the expression,
851 * which starts with an operand.
852 * This code is used in both positions of the expression
853 */
854static int
855term(Client cntxt, MalBlkPtr curBlk, InstrPtr *curInstr, int ret)
856{
857 int i, idx, flag, free = 1;
858 ValRecord cst;
859 int cstidx = -1;
860 malType tpe = TYPE_any;
861
862 if ((i = cstToken(cntxt, &cst))) {
863 advance(cntxt, i);
864 if (currChar(cntxt) != ':' && cst.vtype == TYPE_dbl && cst.val.dval > FLT_MIN && cst.val.dval <= FLT_MAX) {
865 cst.vtype = TYPE_flt;
866 cst.val.fval = (flt) cst.val.dval;
867 }
868 cstidx = fndConstant(curBlk, &cst, MAL_VAR_WINDOW);
869 if (cstidx >= 0) {
870
871 if (currChar(cntxt) == ':') {
872 tpe = typeElm(cntxt, getVarType(curBlk, cstidx));
873 if (tpe < 0)
874 return 3;
875 if(tpe == getVarType(curBlk,cstidx) ){
876 setVarUDFtype(curBlk, cstidx);
877 } else {
878 cstidx = defConstant(curBlk, tpe, &cst);
879 setPolymorphic(*curInstr, tpe, FALSE);
880 setVarUDFtype(curBlk, cstidx);
881 free = 0;
882 }
883 } else if (cst.vtype != getVarType(curBlk, cstidx)) {
884 cstidx = defConstant(curBlk, cst.vtype, &cst);
885 setPolymorphic(*curInstr, cst.vtype, FALSE);
886 free = 0;
887 }
888 /* protect against leaks coming from constant reuse */
889 if (free && ATOMextern(cst.vtype) && cst.val.pval)
890 VALclear(&cst);
891 *curInstr = pushArgument(curBlk, *curInstr, cstidx);
892 return ret;
893 } else {
894 /* add a new constant */
895 flag = currChar(cntxt) == ':';
896 tpe = typeElm(cntxt, cst.vtype);
897 if (tpe < 0)
898 return 3;
899 cstidx = defConstant(curBlk, tpe, &cst);
900 setPolymorphic(*curInstr, tpe, FALSE);
901 if (flag)
902 setVarUDFtype(curBlk, cstidx);
903 *curInstr = pushArgument(curBlk, *curInstr, cstidx);
904 return ret;
905 }
906 } else if ((i = idLength(cntxt))) {
907 if ((idx = findVariableLength(curBlk, CURRENT(cntxt), i)) == -1) {
908 idx = newVariable(curBlk, CURRENT(cntxt), i, TYPE_any);
909 advance(cntxt, i);
910 if( idx <0)
911 return 0;
912 } else {
913 advance(cntxt, i);
914 }
915 if (currChar(cntxt) == ':') {
916 /* skip the type description */
917 tpe = typeElm(cntxt, TYPE_any);
918 if (getVarType(curBlk, idx) == TYPE_any)
919 setVarType(curBlk,idx, tpe);
920 else if (getVarType(curBlk, idx) != tpe){
921 /* non-matching types */
922 return 4;
923 }
924 }
925 *curInstr = pushArgument(curBlk, *curInstr, idx);
926 } else if (currChar(cntxt) == ':') {
927 tpe = typeElm(cntxt, TYPE_any);
928 if (tpe < 0)
929 return 3;
930 setPolymorphic(*curInstr, tpe, FALSE);
931 idx = newTypeVariable(curBlk, tpe);
932 *curInstr = pushArgument(curBlk, *curInstr, idx);
933 return ret;
934 }
935 return 0;
936}
937
938static int
939parseAtom(Client cntxt)
940{
941 str modnme = 0;
942 int l, tpe;
943 char *nxt = CURRENT(cntxt);
944
945 if ((l = idLength(cntxt)) <= 0){
946 parseError(cntxt, "atom name expected\n");
947 return -1;
948 }
949
950 /* parse: ATOM id:type */
951 modnme = putNameLen(nxt, l);
952 advance(cntxt, l);
953 if (currChar(cntxt) != ':')
954 tpe = TYPE_void; /* no type qualifier */
955 else
956 tpe = parseTypeId(cntxt, TYPE_int);
957 if( ATOMindex(modnme) >= 0)
958 parseError(cntxt, "Atom redefinition\n");
959 else {
960 if(cntxt->curprg->def->errors)
961 GDKfree(cntxt->curprg->def->errors);
962 cntxt->curprg->def->errors = malAtomDefinition(modnme, tpe);
963 }
964 if( strcmp(modnme,"user"))
965 cntxt->curmodule = fixModule(modnme);
966 else cntxt->curmodule = cntxt->usermodule;
967 cntxt->usermodule->isAtomModule = TRUE;
968 skipSpace(cntxt);
969 helpInfo(cntxt, &cntxt->usermodule->help);
970 return 0;
971}
972
973/*
974 * All modules, except 'user', should be global
975 */
976static int
977parseModule(Client cntxt)
978{
979 str modnme = 0;
980 int l;
981 char *nxt;
982
983 nxt = CURRENT(cntxt);
984 if ((l = idLength(cntxt)) <= 0){
985 parseError(cntxt, "<module path> expected\n");
986 return -1;
987 }
988 modnme = putNameLen(nxt, l);
989 advance(cntxt, l);
990 if( strcmp(modnme, cntxt->usermodule->name) ==0){
991 // ignore this module definition
992 } else
993 if( getModule(modnme) == NULL){
994#ifdef _DEBUG_PARSER_
995 fprintf(stderr,"Module create %s\n",modnme);
996#endif
997 if( globalModule(modnme) == NULL)
998 parseError(cntxt,"<module> could not be created");
999 }
1000 if( strcmp(modnme,"user"))
1001 cntxt->curmodule = fixModule(modnme);
1002 else cntxt->curmodule = cntxt->usermodule;
1003 skipSpace(cntxt);
1004 helpInfo(cntxt, &cntxt->usermodule->help);
1005 return 0;
1006}
1007
1008/*
1009 * Include files should be handled in line with parsing. This way we
1010 * are ensured that any possible signature definition will be known
1011 * afterwards. The effect is that errors in the include sequence are
1012 * marked as warnings.
1013 */
1014static int
1015parseInclude(Client cntxt)
1016{
1017 str modnme = 0, s;
1018 int x;
1019 char *nxt;
1020
1021 nxt = CURRENT(cntxt);
1022
1023 if ((x = idLength(cntxt)) > 0) {
1024 modnme = putNameLen(nxt, x);
1025 advance(cntxt, x);
1026 } else if ((x = stringLength(cntxt)) > 0) {
1027 modnme = putNameLen(nxt + 1, x - 1);
1028 advance(cntxt, x);
1029 } else{
1030 parseError(cntxt, "<module name> expected\n");
1031 return -1;
1032 }
1033
1034 if (currChar(cntxt) != ';') {
1035 parseError(cntxt, "';' expected\n");
1036 return 0;
1037 }
1038 skipToEnd(cntxt);
1039
1040 if (!malLibraryEnabled(modnme)) {
1041 return 0;
1042 }
1043
1044 s = loadLibrary(modnme, FALSE);
1045 if (s) {
1046 parseError(cntxt, s);
1047 GDKfree(s);
1048 return 0;
1049 }
1050 if ((s = malInclude(cntxt, modnme, 0))) {
1051 parseError(cntxt, s);
1052 GDKfree(s);
1053 return 0;
1054 }
1055 return 0;
1056}
1057
1058/*
1059 * Definition
1060 * The definition statements share a lot in common, which calls for factoring
1061 * out the code in a few text macros. Upon encountering a definition, we
1062 * initialize a MAL instruction container. We should also check for
1063 * non-terminated definitions.
1064 *
1065 * Beware, a function signature f(a1..an):(b1..bn) is parsed in such a way that
1066 * the symbol table and stackframe contains the sequence
1067 * f,a1..an,b1..bn. This slightly complicates the implementation
1068 * of the return statement.
1069 *
1070 * Note, the function name could be mod.fcn, which calls for storing
1071 * the function definition in a particular module instead of the current one.
1072 */
1073static MalBlkPtr
1074fcnHeader(Client cntxt, int kind)
1075{
1076 int l;
1077 malType tpe;
1078 str fnme, modnme = NULL;
1079 char ch;
1080 Symbol curPrg;
1081 MalBlkPtr curBlk = 0;
1082 InstrPtr curInstr;
1083
1084 l = operatorLength(cntxt);
1085 if (l == 0)
1086 l = idLength(cntxt);
1087 if (l == 0) {
1088 parseError(cntxt, "<identifier> | <operator> expected\n");
1089 return 0;
1090 }
1091
1092 fnme = putNameLen(((char *) CURRENT(cntxt)), l);
1093 advance(cntxt, l);
1094
1095 if (currChar(cntxt) == '.') {
1096 nextChar(cntxt); /* skip '.' */
1097 modnme = fnme;
1098 if( strcmp(modnme,"user") && getModule(modnme) == NULL){
1099 parseError(cntxt, "<module> name not defined\n");
1100 return 0;
1101 }
1102 l = operatorLength(cntxt);
1103 if (l == 0)
1104 l = idLength(cntxt);
1105 if (l == 0){
1106 parseError(cntxt, "<identifier> | <operator> expected\n");
1107 return 0;
1108 }
1109 fnme = putNameLen(((char *) CURRENT(cntxt)), l);
1110 advance(cntxt, l);
1111 } else
1112 modnme= cntxt->curmodule->name;
1113
1114 /* temporary suspend capturing statements in main block */
1115 if (cntxt->backup){
1116 parseError(cntxt, "mal_parser: unexpected recursion\n");
1117 return 0;
1118 }
1119 if (currChar(cntxt) != '('){
1120 parseError(cntxt, "function header '(' expected\n");
1121 return curBlk;
1122 }
1123 advance(cntxt, 1);
1124
1125 assert(!cntxt->backup);
1126 cntxt->backup = cntxt->curprg;
1127 cntxt->curprg = newFunction( modnme, fnme, kind);
1128 if(cntxt->curprg == NULL) {
1129 cntxt->curprg = cntxt->backup;
1130 parseError(cntxt, SQLSTATE(HY001) MAL_MALLOC_FAIL);
1131 return 0;
1132 }
1133 cntxt->curprg->def->errors = cntxt->backup->def->errors;
1134 cntxt->backup->def->errors = 0;
1135 curPrg = cntxt->curprg;
1136 curBlk = curPrg->def;
1137 curInstr = getInstrPtr(curBlk, 0);
1138
1139 /* get calling parameters */
1140 ch = currChar(cntxt);
1141 while (ch != ')' && ch && !NL(ch)) {
1142 curInstr = binding(cntxt, curBlk, curInstr, 1);
1143 /* the last argument may be variable length */
1144 if (MALkeyword(cntxt, "...", 3)) {
1145 curInstr->varargs |= VARARGS;
1146 setPolymorphic(curInstr, TYPE_any, TRUE);
1147 break;
1148 }
1149 if ((ch = currChar(cntxt)) != ',') {
1150 if (ch == ')')
1151 break;
1152 if (cntxt->backup) {
1153 freeSymbol(cntxt->curprg);
1154 cntxt->curprg = cntxt->backup;
1155 cntxt->backup = 0;
1156 curBlk = NULL;
1157 }
1158 parseError(cntxt, "',' expected\n");
1159 return curBlk;
1160 } else
1161 nextChar(cntxt); /* skip ',' */
1162 skipSpace(cntxt);
1163 ch = currChar(cntxt);
1164 }
1165 if (currChar(cntxt) != ')') {
1166 pushInstruction(curBlk, curInstr);
1167 if (cntxt->backup) {
1168 freeSymbol(cntxt->curprg);
1169 cntxt->curprg = cntxt->backup;
1170 cntxt->backup = 0;
1171 curBlk = NULL;
1172 }
1173 parseError(cntxt, "')' expected\n");
1174 return curBlk;
1175 }
1176 advance(cntxt, 1); /* skip ')' */
1177/*
1178 The return type is either a single type or multiple return type structure.
1179 We simply keep track of the number of arguments added and
1180 during the final phase reshuffle the return values to the beginning (?)
1181 */
1182 if (currChar(cntxt) == ':') {
1183 tpe = typeElm(cntxt, TYPE_void);
1184 setPolymorphic(curInstr, tpe, TRUE);
1185 setVarType(curBlk, curInstr->argv[0], tpe);
1186 /* we may be confronted by a variable target type list */
1187 if (MALkeyword(cntxt, "...", 3)) {
1188 curInstr->varargs |= VARRETS;
1189 setPolymorphic(curInstr, TYPE_any, TRUE);
1190 }
1191
1192 } else if (keyphrase1(cntxt, "(")) { /* deal with compound return */
1193 int retc = curInstr->argc, i1, i2 = 0;
1194 int max;
1195 short *newarg;
1196 /* parse multi-target result */
1197 /* skipSpace(cntxt);*/
1198 ch = currChar(cntxt);
1199 while (ch != ')' && ch && !NL(ch)) {
1200 curInstr = binding(cntxt, curBlk, curInstr, 0);
1201 /* we may be confronted by a variable target type list */
1202 if (MALkeyword(cntxt, "...", 3)) {
1203 curInstr->varargs |= VARRETS;
1204 setPolymorphic(curInstr, TYPE_any, TRUE);
1205 }
1206 if ((ch = currChar(cntxt)) != ',') {
1207 if (ch == ')')
1208 break;
1209 if (cntxt->backup) {
1210 freeSymbol(cntxt->curprg);
1211 cntxt->curprg = cntxt->backup;
1212 cntxt->backup = 0;
1213 curBlk = NULL;
1214 }
1215 parseError(cntxt, "',' expected\n");
1216 return curBlk;
1217 } else {
1218 nextChar(cntxt); /* skip ',' */
1219 }
1220 skipSpace(cntxt);
1221 ch = currChar(cntxt);
1222 }
1223 /* re-arrange the parameters, results first*/
1224 max = curInstr->maxarg;
1225 newarg = (short *) GDKmalloc(max * sizeof(curInstr->argv[0]));
1226 if (newarg == NULL){
1227 parseError(cntxt, SQLSTATE(HY001) MAL_MALLOC_FAIL);
1228 if (cntxt->backup) {
1229 freeSymbol(cntxt->curprg);
1230 cntxt->curprg = cntxt->backup;
1231 cntxt->backup = 0;
1232 curBlk = NULL;
1233 }
1234 return curBlk;
1235 }
1236 for (i1 = retc; i1 < curInstr->argc; i1++)
1237 newarg[i2++] = curInstr->argv[i1];
1238 curInstr->retc = curInstr->argc - retc;
1239 for (i1 = 1; i1 < retc; i1++)
1240 newarg[i2++] = curInstr->argv[i1];
1241 curInstr->argc = i2;
1242 for (; i2 < max; i2++)
1243 newarg[i2] = 0;
1244 for (i1 = 0; i1 < max; i1++)
1245 curInstr->argv[i1] = newarg[i1];
1246 GDKfree(newarg);
1247 if (currChar(cntxt) != ')') {
1248 freeInstruction(curInstr);
1249 if (cntxt->backup) {
1250 freeSymbol(cntxt->curprg);
1251 cntxt->curprg = cntxt->backup;
1252 cntxt->backup = 0;
1253 curBlk = NULL;
1254 }
1255 parseError(cntxt, "')' expected\n");
1256 return curBlk;
1257 }
1258 nextChar(cntxt); /* skip ')' */
1259 } else { /* default */
1260 setVarType(curBlk, 0, TYPE_void);
1261 }
1262 if (curInstr != getInstrPtr(curBlk, 0)) {
1263 freeInstruction(getInstrPtr(curBlk, 0));
1264 getInstrPtr(curBlk, 0) = curInstr;
1265 }
1266 return curBlk;
1267}
1268
1269static MalBlkPtr
1270parseCommandPattern(Client cntxt, int kind)
1271{
1272 MalBlkPtr curBlk = 0;
1273 Symbol curPrg = 0;
1274 InstrPtr curInstr = 0;
1275 str modnme = NULL;
1276 size_t l = 0;
1277
1278 curBlk = fcnHeader(cntxt, kind);
1279 if (curBlk == NULL) {
1280 cntxt->blkmode = 0;
1281 return curBlk;
1282 }
1283 getInstrPtr(curBlk, 0)->token = kind;
1284 curPrg = cntxt->curprg;
1285 curPrg->kind = kind;
1286 curInstr = getInstrPtr(curBlk, 0);
1287
1288 modnme = getModuleId(getInstrPtr(curBlk, 0));
1289 if (modnme && (getModule(modnme) == FALSE && strcmp(modnme,"user"))){
1290 parseError(cntxt, "<module> not defined\n");
1291 cntxt->blkmode = 0;
1292 return curBlk;
1293 }
1294 modnme = modnme ? modnme : cntxt->usermodule->name;
1295
1296 l = strlen(modnme);
1297 modnme = putNameLen(modnme, l);
1298 if ( strcmp(modnme,"user")== 0 || getModule(modnme)){
1299 if ( strcmp(modnme,"user") == 0)
1300 insertSymbol(cntxt->usermodule, curPrg);
1301 else
1302 insertSymbol(getModule(modnme), curPrg);
1303 chkProgram(cntxt->usermodule, curBlk);
1304 if(cntxt->curprg->def->errors)
1305 GDKfree(cntxt->curprg->def->errors);
1306 cntxt->curprg->def->errors = cntxt->backup->def->errors;
1307 cntxt->backup->def->errors = 0;
1308 cntxt->curprg = cntxt->backup;
1309 cntxt->backup = 0;
1310 } else {
1311 freeSymbol(curPrg);
1312 cntxt->curprg = cntxt->backup;
1313 cntxt->backup = 0;
1314 parseError(cntxt, "<module> not found\n");
1315 return 0;
1316 }
1317/*
1318 * Short-cut function calls
1319 * Most functions are (dynamically) linked with the kernel as
1320 * commands or pattern definitions. This enables for fast execution.
1321 *
1322 * In addition we allow functions to be bound to both
1323 * a linked C-function and a MAL specification block.
1324 * It the function address is not available, the interpreter
1325 * will use the MAL block instead.
1326 * This scheme is intended for just-in-time compilation.
1327 *
1328 * [note, command and patterns do not have a MAL block]
1329 */
1330 if (MALkeyword(cntxt, "address", 7)) {
1331 int i;
1332 i = idLength(cntxt);
1333 if (i == 0) {
1334 parseError(cntxt, "address <identifier> expected\n");
1335 return 0;
1336 }
1337 cntxt->blkmode = 0;
1338 if (getModuleId(curInstr))
1339 setModuleId(curInstr, NULL);
1340 setModuleScope(curInstr,
1341 findModule(cntxt->usermodule, modnme));
1342
1343 memcpy(curBlk->binding, CURRENT(cntxt), (size_t)(i < IDLENGTH? i:IDLENGTH-1));
1344 curBlk->binding[(i< IDLENGTH? i:IDLENGTH-1)] = 0;
1345 /* avoid a clash with old temporaries */
1346 advance(cntxt, i);
1347 curInstr->fcn = getAddress(curBlk->binding);
1348
1349 if (cntxt->usermodule->isAtomModule) {
1350 if (curInstr->fcn == NULL) {
1351 parseError(cntxt, "<address> not found\n");
1352 return 0;
1353 }
1354 malAtomProperty(curBlk, curInstr);
1355 }
1356 skipSpace(cntxt);
1357 } else {
1358 parseError(cntxt, "'address' expected\n");
1359 return 0;
1360 }
1361 helpInfo(cntxt, &curBlk->help);
1362#ifdef HAVE_HGE
1363 if (!have_hge)
1364 have_hge = strcmp(modnme, "calc") == 0 && strcmp(getFunctionId(curInstr), "hge") == 0;
1365#endif
1366 return curBlk;
1367}
1368
1369static MalBlkPtr
1370parseFunction(Client cntxt, int kind)
1371{
1372 MalBlkPtr curBlk = 0;
1373
1374 curBlk = fcnHeader(cntxt, kind);
1375 if (curBlk == NULL)
1376 return curBlk;
1377 if (MALkeyword(cntxt, "address", 7)) {
1378 str nme;
1379 int i;
1380 InstrPtr curInstr = getInstrPtr(curBlk, 0);
1381 i = idLength(cntxt);
1382 if (i == 0) {
1383 parseError(cntxt, "<identifier> expected\n");
1384 return 0;
1385 }
1386 nme = idCopy(cntxt, i);
1387 if (nme == NULL) {
1388 parseError(cntxt, SQLSTATE(HY001) MAL_MALLOC_FAIL);
1389 return 0;
1390 }
1391 curInstr->fcn = getAddress(nme);
1392 GDKfree(nme);
1393 if (curInstr->fcn == NULL) {
1394 parseError(cntxt, "<address> not found\n");
1395 return 0;
1396 }
1397 skipSpace(cntxt);
1398 }
1399 /* block is terminated at the END statement */
1400 helpInfo(cntxt, &curBlk->help);
1401 return curBlk;
1402}
1403
1404/*
1405 * Functions and factories end with a labeled end-statement.
1406 * The routine below checks for misalignment of the closing statements.
1407 * Any instruction parsed after the function block is considered an error.
1408 */
1409static int
1410parseEnd(Client cntxt)
1411{
1412 Symbol curPrg = 0;
1413 size_t l;
1414 InstrPtr sig;
1415 str errors = MAL_SUCCEED;
1416
1417 if (MALkeyword(cntxt, "end", 3)) {
1418 curPrg = cntxt->curprg;
1419 l = idLength(cntxt);
1420 if (l == 0)
1421 l = operatorLength(cntxt);
1422 sig = getInstrPtr(cntxt->curprg->def,0);
1423 if (strncmp(CURRENT(cntxt), getModuleId(sig), l) == 0) {
1424 advance(cntxt, l);
1425 skipSpace(cntxt);
1426 if (currChar(cntxt) == '.')
1427 nextChar(cntxt);
1428 skipSpace(cntxt);
1429 l = idLength(cntxt);
1430 if (l == 0)
1431 l = operatorLength(cntxt);
1432 }
1433 /* parse fcn */
1434 if ((l == strlen(curPrg->name) &&
1435 strncmp(CURRENT(cntxt), curPrg->name, l) == 0) || l == 0)
1436 advance(cntxt, l);
1437 else
1438 parseError(cntxt, "non matching end label\n");
1439 pushEndInstruction(cntxt->curprg->def);
1440 cntxt->blkmode = 0;
1441 if ( strcmp(getModuleId(sig),"user")== 0 )
1442 insertSymbol(cntxt->usermodule, cntxt->curprg);
1443 else
1444 insertSymbol(getModule(getModuleId(sig)), cntxt->curprg);
1445
1446 if (cntxt->curprg->def->errors) {
1447 errors = cntxt->curprg->def->errors;
1448 cntxt->curprg->def->errors=0;
1449 }
1450 chkProgram(cntxt->usermodule, cntxt->curprg->def);
1451 // check for newly identified errors
1452 if (errors == NULL){
1453 errors = cntxt->curprg->def->errors;
1454 cntxt->curprg->def->errors=0;
1455 } else if (cntxt->curprg->def->errors) {
1456 //collect all errors for reporting
1457 str new = GDKzalloc(strlen(errors) + strlen(cntxt->curprg->def->errors) +16);
1458 if (new){
1459 strcpy(new, errors);
1460 if( new[strlen(new)-1] != '\n')
1461 strcat(new,"\n");
1462 strcat(new,"!");
1463 strcat(new,cntxt->curprg->def->errors);
1464
1465 freeException(errors);
1466 freeException(cntxt->curprg->def->errors);
1467
1468 cntxt->curprg->def->errors=0;
1469 errors = new;
1470 }
1471 }
1472
1473 if (cntxt->backup) {
1474 cntxt->curprg = cntxt->backup;
1475 cntxt->backup = 0;
1476 } else {
1477 str msg;
1478 if((msg = MSinitClientPrg(cntxt,cntxt->curmodule->name,"main")) != MAL_SUCCEED) {
1479 if(!errors)
1480 cntxt->curprg->def->errors = msg;
1481 else
1482 freeException(msg);
1483 return 1;
1484 }
1485 }
1486 // pass collected errors to context
1487 assert(cntxt->curprg->def->errors == NULL);
1488 cntxt->curprg->def->errors = errors;
1489 return 1;
1490 }
1491 return 0;
1492}
1493/*
1494 * Most instructions are simple assignments, possibly
1495 * modified with a barrier/catch tag.
1496 *
1497 * The basic types are also predefined as a variable.
1498 * This makes it easier to communicate types to MAL patterns.
1499 */
1500
1501#define GETvariable(FREE) \
1502 if ((varid = findVariableLength(curBlk, CURRENT(cntxt), l)) == -1) { \
1503 varid = newVariable(curBlk, CURRENT(cntxt),l, TYPE_any); \
1504 advance(cntxt, l); \
1505 if(varid < 0) { FREE; return; } \
1506 } else \
1507 advance(cntxt, l);
1508
1509/* The parameter of parseArguments is the return value of the enclosing function. */
1510static int
1511parseArguments(Client cntxt, MalBlkPtr curBlk, InstrPtr *curInstr)
1512{
1513 while (currChar(cntxt) != ')') {
1514 switch (term(cntxt, curBlk, curInstr, 0)) {
1515 case 0:
1516 break;
1517 case 2: return 2;
1518 case 3: return 3;
1519 case 4:
1520 parseError(cntxt, "Argument type overwrites previous definition\n");
1521 return 0;
1522 default:
1523 parseError(cntxt, "<factor> expected\n");
1524 pushInstruction(curBlk, *curInstr);
1525 return 1;
1526 }
1527 if (currChar(cntxt) == ',')
1528 advance(cntxt, 1);
1529 else if (currChar(cntxt) != ')') {
1530 parseError(cntxt, "',' expected\n");
1531 cntxt->yycur--; /* keep it */
1532 break;
1533 }
1534 }
1535 if (currChar(cntxt) == ')')
1536 advance(cntxt, 1);
1537 return 0;
1538}
1539
1540static void
1541parseAssign(Client cntxt, int cntrl)
1542{
1543 InstrPtr curInstr;
1544 MalBlkPtr curBlk;
1545 Symbol curPrg;
1546 int i = 0, l, type = TYPE_any, varid = -1;
1547 str arg = 0;
1548 ValRecord cst;
1549
1550 curPrg = cntxt->curprg;
1551 curBlk = curPrg->def;
1552 if((curInstr = newInstruction(curBlk, NULL, NULL)) == NULL) {
1553 parseError(cntxt, SQLSTATE(HY001) MAL_MALLOC_FAIL);
1554 return;
1555 }
1556
1557 if( cntrl){
1558 curInstr->token = ASSIGNsymbol;
1559 curInstr->barrier = cntrl;
1560 }
1561
1562 /* start the parsing by recognition of the lhs of an assignment */
1563 if (currChar(cntxt) == '(') {
1564 /* parsing multi-assignment */
1565 advance(cntxt, 1);
1566 curInstr->argc = 0; /*reset to handle pushArg correctly !! */
1567 curInstr->retc = 0;
1568 while (currChar(cntxt) != ')' && currChar(cntxt)) {
1569 l = idLength(cntxt);
1570 i = cstToken(cntxt, &cst);
1571 if (l == 0 || i) {
1572 parseError(cntxt, "<identifier> expected\n");
1573 pushInstruction(curBlk, curInstr);
1574 return;
1575 }
1576 GETvariable(freeInstruction(curInstr));
1577 if (currChar(cntxt) == ':') {
1578 setVarUDFtype(curBlk, varid);
1579 type = typeElm(cntxt, getVarType(curBlk, varid));
1580 if (type < 0)
1581 goto part3;
1582 setPolymorphic(curInstr, type, FALSE);
1583 setVarType(curBlk, varid, type);
1584 }
1585 curInstr = pushArgument(curBlk, curInstr, varid);
1586 curInstr->retc++;
1587 if (currChar(cntxt) == ')')
1588 break;
1589 if (currChar(cntxt) == ',')
1590 keyphrase1(cntxt, ",");
1591 }
1592 advance(cntxt, 1); /* skip ')' */
1593 if (curInstr->retc == 0) {
1594 /* add dummy variable */
1595 curInstr = pushArgument(curBlk, curInstr, newTmpVariable(curBlk, TYPE_any));
1596 curInstr->retc++;
1597 }
1598 } else {
1599 /* are we dealing with a simple assignment? */
1600 l = idLength(cntxt);
1601 i = cstToken(cntxt, &cst);
1602 if (l == 0 || i) {
1603 /* we haven't seen a target variable */
1604 /* flow of control statements may end here. */
1605 /* shouldn't allow for nameless controls todo*/
1606 if (i && cst.vtype == TYPE_str)
1607 GDKfree(cst.val.sval);
1608 if (cntrl == LEAVEsymbol || cntrl == REDOsymbol ||
1609 cntrl == RETURNsymbol || cntrl == EXITsymbol) {
1610 curInstr->argv[0] = getBarrierEnvelop(curBlk);
1611 pushInstruction(curBlk, curInstr);
1612 if (currChar(cntxt) != ';')
1613 parseError(cntxt, "<identifier> expected in control statement\n");
1614 return;
1615 }
1616 getArg(curInstr, 0) = newTmpVariable(curBlk, TYPE_any);
1617 pushInstruction(curBlk, curInstr);
1618 parseError(cntxt, "<identifier> expected\n");
1619 return;
1620 }
1621 /* Check if we are dealing with module.fcn call*/
1622 if (CURRENT(cntxt)[l] == '.' || CURRENT(cntxt)[l] == '(') {
1623 curInstr->argv[0] = newTmpVariable(curBlk, TYPE_any);
1624 goto FCNcallparse;
1625 }
1626
1627 /* Get target variable details*/
1628 GETvariable(freeInstruction(curInstr));
1629 if (!(currChar(cntxt) == ':' && CURRENT(cntxt)[1] == '=')) {
1630 curInstr->argv[0] = varid;
1631 if (currChar(cntxt) == ':') {
1632 setVarUDFtype(curBlk, varid);
1633 type = typeElm(cntxt, getVarType(curBlk, varid));
1634 if (type < 0)
1635 goto part3;
1636 setPolymorphic(curInstr, type, FALSE);
1637 setVarType(curBlk, varid, type);
1638 }
1639 }
1640 curInstr->argv[0] = varid;
1641 }
1642 /* look for assignment operator */
1643 if (!keyphrase2(cntxt, ":=")) {
1644 /* no assignment !! a control variable is allowed */
1645 /* for the case RETURN X, we normalize it to include the function arguments */
1646 if (cntrl == RETURNsymbol || cntrl == YIELDsymbol) {
1647 int e;
1648 InstrPtr sig = getInstrPtr(curBlk,0);
1649 curInstr->retc = 0;
1650 for (e = 0; e < sig->retc; e++)
1651 curInstr = pushReturn(curBlk, curInstr, getArg(sig, e));
1652 }
1653
1654 goto part3;
1655 }
1656 if (currChar(cntxt) == '(') {
1657 /* parse multi assignment */
1658 advance(cntxt, 1);
1659 switch (parseArguments(cntxt, curBlk, &curInstr)) {
1660 case 2: goto part2;
1661 default:
1662 case 3: goto part3;
1663 }
1664 /* unreachable */
1665 }
1666/*
1667 * We have so far the LHS part of an assignment. The remainder is
1668 * either a simple term expression, a multi assignent, or the start
1669 * of a function call.
1670 */
1671FCNcallparse:
1672 if ((l = idLength(cntxt)) && CURRENT(cntxt)[l] == '(') {
1673 /* parseError(cntxt,"<module> expected\n");*/
1674 setModuleId(curInstr, cntxt->curmodule->name);
1675 i = l;
1676 goto FCNcallparse2;
1677 } else if ((l = idLength(cntxt)) && CURRENT(cntxt)[l] == '.') {
1678 /* continue with parseing a function/operator call */
1679 arg = putNameLen(CURRENT(cntxt), l);
1680 advance(cntxt, l + 1); /* skip '.' too */
1681 setModuleId(curInstr, arg);
1682 i = idLength(cntxt);
1683 if (i == 0)
1684 i = operatorLength(cntxt);
1685FCNcallparse2:
1686 if (i) {
1687 setFunctionId(curInstr, putNameLen(((char *) CURRENT(cntxt)), i));
1688 advance(cntxt, i);
1689 } else {
1690 parseError(cntxt, "<functionname> expected\n");
1691 pushInstruction(curBlk, curInstr);
1692 return;
1693 }
1694 skipSpace(cntxt);
1695 if (currChar(cntxt) != '(') {
1696 parseError(cntxt, "'(' expected\n");
1697 pushInstruction(curBlk, curInstr);
1698 return;
1699 }
1700 advance(cntxt, 1);
1701 switch (parseArguments(cntxt, curBlk, &curInstr)) {
1702 case 2: goto part2;
1703 default:
1704 case 3: goto part3;
1705 }
1706 /* unreachable */
1707 }
1708 /* Handle the ordinary assignments and expressions */
1709 switch (term(cntxt, curBlk, &curInstr, 2)) {
1710 case 2: goto part2;
1711 case 3: goto part3;
1712 }
1713part2: /* consume <operator><term> part of expression */
1714 if ((i = operatorLength(cntxt))) {
1715 /* simple arithmetic operator expression */
1716 setFunctionId(curInstr, putNameLen(((char *) CURRENT(cntxt)), i));
1717 advance(cntxt, i);
1718 curInstr->modname = putName("calc");
1719 if ((l = idLength(cntxt)) && !(l == 3 && strncmp(CURRENT(cntxt), "nil", 3) == 0)) {
1720 GETvariable(freeInstruction(curInstr));
1721 curInstr = pushArgument(curBlk, curInstr, varid);
1722 goto part3;
1723 }
1724 switch (term(cntxt, curBlk, &curInstr, 3)) {
1725 case 2: goto part2;
1726 case 3: goto part3;
1727 }
1728 parseError(cntxt, "<term> expected\n");
1729 pushInstruction(curBlk, curInstr);
1730 return;
1731 } else {
1732 skipSpace(cntxt);
1733 if (currChar(cntxt) == '(')
1734 parseError(cntxt, "module name missing\n");
1735 else if (currChar(cntxt) != ';' && currChar(cntxt) != '#')
1736 parseError(cntxt, "operator expected\n");
1737 pushInstruction(curBlk, curInstr);
1738 return;
1739 }
1740part3:
1741 skipSpace(cntxt);
1742 if (currChar(cntxt) != ';') {
1743 parseError(cntxt, "';' expected\n");
1744 skipToEnd(cntxt);
1745 pushInstruction(curBlk, curInstr);
1746 return;
1747 }
1748 skipToEnd(cntxt);
1749 pushInstruction(curBlk, curInstr);
1750 if (cntrl == RETURNsymbol && !(curInstr->token == ASSIGNsymbol || getModuleId(curInstr) != 0))
1751 parseError(cntxt, "return assignment expected\n");
1752}
1753
1754void
1755parseMAL(Client cntxt, Symbol curPrg, int skipcomments, int lines)
1756{
1757 int cntrl = 0;
1758 /*Symbol curPrg= cntxt->curprg;*/
1759 char c;
1760 int inlineProp =0, unsafeProp = 0, sealedProp = 0;
1761
1762 (void) curPrg;
1763 echoInput(cntxt);
1764 /* here the work takes place */
1765 while ((c = currChar(cntxt)) && lines > 0) {
1766 switch (c) {
1767 case '\n': case '\r': case '\f':
1768 lines -= c =='\n';
1769 nextChar(cntxt);
1770 echoInput(cntxt);
1771 continue;
1772 case ';': case '\t': case ' ':
1773 nextChar(cntxt);
1774 continue;
1775 case '#':
1776 { /* keep the full line comments */
1777 char start[256], *e = start, c;
1778 MalBlkPtr curBlk = cntxt->curprg->def;
1779 InstrPtr curInstr;
1780
1781 *e = 0;
1782 nextChar(cntxt);
1783 while ((c = currChar(cntxt))) {
1784 if (e < start + 256 - 1)
1785 *e++ = c;
1786 nextChar(cntxt);
1787 if (c == '\n' || c == '\r') {
1788 *e = 0;
1789 if (e > start)
1790 e--;
1791 /* prevChar(cntxt);*/
1792 break;
1793 }
1794 }
1795 if (e > start)
1796 *e = 0;
1797 if (! skipcomments && e > start && curBlk->stop > 0 ) {
1798 ValRecord cst;
1799 if((curInstr = newInstruction(curBlk, NULL, NULL)) == NULL) {
1800 parseError(cntxt, SQLSTATE(HY001) MAL_MALLOC_FAIL);
1801 continue;
1802 }
1803 curInstr->token= REMsymbol;
1804 curInstr->barrier= 0;
1805 cst.vtype = TYPE_str;
1806 cst.len = strlen(start);
1807 if((cst.val.sval = GDKstrdup(start)) == NULL) {
1808 parseError(cntxt, SQLSTATE(HY001) MAL_MALLOC_FAIL);
1809 freeInstruction(curInstr);
1810 continue;
1811 }
1812 getArg(curInstr, 0) = defConstant(curBlk, TYPE_str, &cst);
1813 clrVarConstant(curBlk, getArg(curInstr, 0));
1814 setVarDisabled(curBlk, getArg(curInstr, 0));
1815 pushInstruction(curBlk, curInstr);
1816 }
1817 echoInput(cntxt);
1818 }
1819 continue;
1820 case 'A': case 'a':
1821 if (MALkeyword(cntxt, "atom", 4) &&
1822 parseAtom(cntxt) == 0)
1823 break;
1824 goto allLeft;
1825 case 'b': case 'B':
1826 if (MALkeyword(cntxt, "barrier", 7)) {
1827 cntxt->blkmode++;
1828 cntrl = BARRIERsymbol;
1829 }
1830 goto allLeft;
1831 case 'C': case 'c':
1832 if (MALkeyword(cntxt, "command", 7)) {
1833 MalBlkPtr p = parseCommandPattern(cntxt, COMMANDsymbol);
1834 if (p) {
1835 p->unsafeProp = unsafeProp;
1836 p->sealedProp = sealedProp;
1837 }
1838 cntxt->curprg->def->unsafeProp = unsafeProp;
1839 cntxt->curprg->def->sealedProp = sealedProp;
1840 if (inlineProp)
1841 parseError(cntxt, "<identifier> expected\n");
1842 inlineProp = 0;
1843 unsafeProp = 0;
1844 sealedProp = 0;
1845 continue;
1846 }
1847 if (MALkeyword(cntxt, "catch", 5)) {
1848 cntxt->blkmode++;
1849 cntrl = CATCHsymbol;
1850 goto allLeft;
1851 }
1852 goto allLeft;
1853 case 'E': case 'e':
1854 if (MALkeyword(cntxt, "exit", 4)) {
1855 if (cntxt->blkmode > 0)
1856 cntxt->blkmode--;
1857 cntrl = EXITsymbol;
1858 } else if (parseEnd(cntxt)) {
1859 break;
1860 }
1861 goto allLeft;
1862 case 'F': case 'f':
1863 if (MALkeyword(cntxt, "function", 8)) {
1864 MalBlkPtr p;
1865 cntxt->blkmode++;
1866 if ((p = parseFunction(cntxt, FUNCTIONsymbol))){
1867 p->unsafeProp = unsafeProp;
1868 p->sealedProp = sealedProp;
1869 cntxt->curprg->def->inlineProp = inlineProp;
1870 cntxt->curprg->def->unsafeProp = unsafeProp;
1871 cntxt->curprg->def->sealedProp = sealedProp;
1872 inlineProp = 0;
1873 unsafeProp = 0;
1874 sealedProp = 0;
1875 break;
1876 }
1877 } else if (MALkeyword(cntxt, "factory", 7)) {
1878 if( inlineProp )
1879 parseError(cntxt, "parseError:INLINE ignored\n");
1880 if( unsafeProp)
1881 parseError(cntxt, "parseError:UNSAFE ignored\n");
1882 if( sealedProp)
1883 parseError(cntxt, "parseError:SEALED ignored\n");
1884 inlineProp = 0;
1885 unsafeProp = 0;
1886 sealedProp = 0;
1887 cntxt->blkmode++;
1888 parseFunction(cntxt, FACTORYsymbol);
1889 break;
1890 }
1891 goto allLeft;
1892 case 'I': case 'i':
1893 if (MALkeyword(cntxt, "inline", 6)) {
1894 inlineProp= 1;
1895 skipSpace(cntxt);
1896 continue;
1897 } else
1898 if (MALkeyword(cntxt, "include", 7)){
1899 parseInclude(cntxt);
1900 break;;
1901 }
1902 goto allLeft;
1903 case 'L': case 'l':
1904 if (MALkeyword(cntxt, "leave", 5))
1905 cntrl = LEAVEsymbol;
1906 goto allLeft;
1907 case 'M': case 'm':
1908 if (MALkeyword(cntxt, "module", 6) &&
1909 parseModule(cntxt) == 0)
1910 break;
1911 goto allLeft;
1912 case 'P': case 'p':
1913 if (MALkeyword(cntxt, "pattern", 7)) {
1914 MalBlkPtr p;
1915 if( inlineProp )
1916 parseError(cntxt, "parseError:INLINE ignored\n");
1917 p = parseCommandPattern(cntxt, PATTERNsymbol);
1918 if (p) {
1919 p->unsafeProp = unsafeProp;
1920 p->sealedProp = sealedProp;
1921 }
1922 cntxt->curprg->def->unsafeProp = unsafeProp;
1923 cntxt->curprg->def->sealedProp = sealedProp;
1924 inlineProp = 0;
1925 unsafeProp = 0;
1926 sealedProp = 0;
1927 continue;
1928 }
1929 goto allLeft;
1930 case 'R': case 'r':
1931 if (MALkeyword(cntxt, "redo", 4)) {
1932 cntrl = REDOsymbol;
1933 goto allLeft;
1934 }
1935 if (MALkeyword(cntxt, "raise", 5)) {
1936 cntrl = RAISEsymbol;
1937 goto allLeft;
1938 }
1939 if (MALkeyword(cntxt, "return", 6)) {
1940 cntrl = RETURNsymbol;
1941 }
1942 goto allLeft;
1943 case 's':
1944 if (MALkeyword(cntxt, "sealed", 6)) {
1945 sealedProp= 1;
1946 skipSpace(cntxt);
1947 continue;
1948 }
1949 goto allLeft;
1950 case 'U': case 'u':
1951 if (MALkeyword(cntxt, "unsafe", 6)) {
1952 unsafeProp= 1;
1953 skipSpace(cntxt);
1954 continue;
1955 }
1956 goto allLeft;
1957 case 'Y': case 'y':
1958 if (MALkeyword(cntxt, "yield", 5)) {
1959 cntrl = YIELDsymbol;
1960 goto allLeft;
1961 }
1962 /* fall through */
1963 default: allLeft :
1964 parseAssign(cntxt, cntrl);
1965 cntrl = 0;
1966 }
1967 }
1968 skipSpace(cntxt);
1969}
1970