1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /* |
10 | * (c) M. Kersten |
11 | * MAL Type System |
12 | * The MAL type module overloads the atom structure managed in the GDK library. |
13 | * For the time being, we assume GDK to support at most 127 different atomic types. |
14 | * Type composition is limited to the builtin scalar type and a column type. |
15 | * Furthermore, the polymorphic MAL type :any can be qualified |
16 | * with a type variable index :any_I, where I is a digit (1-9). |
17 | * BEWARE, the TYPE_any is a speudo type known within MAL only. |
18 | * |
19 | * Within the MAL layer types are encoded in 32-bit integers using |
20 | * bit stuffing to save some space. |
21 | * The integer contains the following fields: |
22 | * anyHeadIndex (bit 25-22), anyTypeIndex (bit 21-18), |
23 | * batType (bit 17) headType (16-9) and tailType(8-0) |
24 | * This encoding scheme permits a limited number of different bat types. |
25 | * The headless case assumes all head types are TYPE_void/TYPE_oid |
26 | */ |
27 | #include "monetdb_config.h" |
28 | #include "mal_type.h" |
29 | |
30 | /* |
31 | * At any point we should be able to construct an ascii representation of |
32 | * the type descriptor. Including the variable references. |
33 | */ |
34 | str |
35 | getTypeName(malType tpe) |
36 | { |
37 | char buf[FILENAME_MAX]; |
38 | int k; |
39 | |
40 | if (tpe == TYPE_any) |
41 | return GDKstrdup("any" ); |
42 | if (isaBatType(tpe)) { |
43 | k = getTypeIndex(tpe); |
44 | if (k) |
45 | snprintf(buf, sizeof(buf), "bat[:any%c%d]" ,TMPMARKER, k); |
46 | else if (getBatType(tpe) == TYPE_any) |
47 | snprintf(buf, sizeof(buf), "bat[:any]" ); |
48 | else |
49 | snprintf(buf, sizeof(buf), "bat[:%s]" , ATOMname(getBatType(tpe))); |
50 | return GDKstrdup(buf); |
51 | } |
52 | if (isAnyExpression(tpe)) { |
53 | snprintf(buf, sizeof(buf), "any%c%d" , |
54 | TMPMARKER, getTypeIndex(tpe)); |
55 | return GDKstrdup(buf); |
56 | } |
57 | return GDKstrdup(ATOMname(tpe)); |
58 | } |
59 | /* |
60 | * It might be handy to encode the type information in an identifier |
61 | * string for ease of comparison later. |
62 | */ |
63 | str |
64 | getTypeIdentifier(malType tpe){ |
65 | str s,t,v; |
66 | s= getTypeName(tpe); |
67 | if (s == NULL) |
68 | return NULL; |
69 | for ( t=s; *t; t++) |
70 | if ( !isalnum((unsigned char) *t) ) |
71 | *t='_'; |
72 | t--; |
73 | if (*t == '_') |
74 | *t = 0; |
75 | for (v=s, t=s+1; *t; t++){ |
76 | if ( !(*t == '_' && *v == '_' ) ) |
77 | *++v = *t; |
78 | } |
79 | *++v =0; |
80 | return s; |
81 | } |
82 | |
83 | |
84 | /* |
85 | * In many places we need a confirmed type identifier. |
86 | * GDK returns the next available index when it can not find the type. |
87 | * This is not sufficient here, an error message may have to be generated. |
88 | * It is assumed that the type table does not change in the mean time. |
89 | * Use the information that identifiers are at least one character |
90 | * and are terminated by a null to speedup comparison |
91 | */ |
92 | |
93 | /* |
94 | * The ATOMindex routine is pretty slow, because it performs a |
95 | * linear search through the type table. This code should actually |
96 | * be integrated with the kernel. |
97 | */ |
98 | #define qt(x) (nme[1]==x[1] && nme[2]==x[2] ) |
99 | |
100 | int |
101 | getAtomIndex(const char *nme, size_t len, int deftype) |
102 | { |
103 | int i; |
104 | |
105 | if (len >= IDLENGTH) { |
106 | /* name too long: cannot match any atom name */ |
107 | return deftype; |
108 | } |
109 | if (len == 3) |
110 | switch (*nme) { |
111 | case 'a': |
112 | if (qt("any" )) |
113 | return TYPE_any; |
114 | break; |
115 | case 'b': |
116 | if (qt("bat" )) |
117 | return TYPE_bat; |
118 | if (qt("bit" )) |
119 | return TYPE_bit; |
120 | if (qt("bte" )) |
121 | return TYPE_bte; |
122 | break; |
123 | case 'd': |
124 | if (qt("dbl" )) |
125 | return TYPE_dbl; |
126 | break; |
127 | case 'i': |
128 | if (qt("int" )) |
129 | return TYPE_int; |
130 | break; |
131 | case 'f': |
132 | if (qt("flt" )) |
133 | return TYPE_flt; |
134 | break; |
135 | case 'l': |
136 | if (qt("lng" )) |
137 | return TYPE_lng; |
138 | break; |
139 | case 'p': |
140 | if (qt("ptr" )) |
141 | return TYPE_ptr; |
142 | break; |
143 | #ifdef HAVE_HGE |
144 | case 'h': |
145 | if (qt("hge" )) |
146 | return TYPE_hge; |
147 | break; |
148 | #endif |
149 | case 'o': |
150 | if (qt("oid" )) |
151 | return TYPE_oid; |
152 | break; |
153 | case 's': |
154 | if (qt("str" )) |
155 | return TYPE_str; |
156 | if (qt("sht" )) |
157 | return TYPE_sht; |
158 | break; |
159 | } |
160 | else if (len == 4 && nme[0]=='v' && qt("voi" ) && nme[3] == 'd') |
161 | return TYPE_void; |
162 | for (i = TYPE_str; i < GDKatomcnt; i++) |
163 | if (BATatoms[i].name[0] == nme[0] && |
164 | strncmp(nme, BATatoms[i].name, len) == 0 && |
165 | BATatoms[i].name[len] == 0) |
166 | return i; |
167 | return deftype; |
168 | } |
169 | |
170 | inline int |
171 | findGDKtype(int type) |
172 | { |
173 | if (type == TYPE_any || type== TYPE_void) |
174 | return TYPE_void; |
175 | if (isaBatType(type)) |
176 | return TYPE_bat; |
177 | return ATOMtype(type); |
178 | } |
179 | |
180 | int |
181 | isIdentifier(str s) |
182 | { |
183 | if (!isalpha((unsigned char) *s)) |
184 | return -1; |
185 | for (; s && *s; s++) |
186 | if (!isalnum((unsigned char) *s) && *s != '_') |
187 | return -1; |
188 | return 0; |
189 | } |
190 | |