1 | /* -*- c-basic-offset: 2 -*- */ |
2 | /* |
3 | Copyright(C) 2016 Brazil |
4 | |
5 | This library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License version 2.1 as published by the Free Software Foundation. |
8 | |
9 | This library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with this library; if not, write to the Free Software |
16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ |
18 | |
19 | #ifdef GRN_EMBEDDED |
20 | # define GRN_PLUGIN_FUNCTION_TAG functions_string |
21 | #endif |
22 | |
23 | #include <groonga/plugin.h> |
24 | |
25 | /* |
26 | * func_string_length() returns the number of characters in a string. |
27 | * If the string contains an invalid byte sequence, this function returns the |
28 | * number of characters before the invalid byte sequence. |
29 | */ |
30 | static grn_obj * |
31 | func_string_length(grn_ctx *ctx, int n_args, grn_obj **args, |
32 | grn_user_data *user_data) |
33 | { |
34 | grn_obj *target; |
35 | unsigned int length = 0; |
36 | grn_obj *grn_length; |
37 | |
38 | if (n_args != 1) { |
39 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
40 | "string_length(): wrong number of arguments (%d for 1)" , |
41 | n_args); |
42 | return NULL; |
43 | } |
44 | |
45 | target = args[0]; |
46 | if (!(target->header.type == GRN_BULK && |
47 | ((target->header.domain == GRN_DB_SHORT_TEXT) || |
48 | (target->header.domain == GRN_DB_TEXT) || |
49 | (target->header.domain == GRN_DB_LONG_TEXT)))) { |
50 | grn_obj inspected; |
51 | |
52 | GRN_TEXT_INIT(&inspected, 0); |
53 | grn_inspect(ctx, &inspected, target); |
54 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
55 | "string_length(): target object must be a text bulk: " |
56 | "<%.*s>" , |
57 | (int)GRN_TEXT_LEN(&inspected), |
58 | GRN_TEXT_VALUE(&inspected)); |
59 | GRN_OBJ_FIN(ctx, &inspected); |
60 | return NULL; |
61 | } |
62 | |
63 | { |
64 | const char *s = GRN_TEXT_VALUE(target); |
65 | const char *e = GRN_TEXT_VALUE(target) + GRN_TEXT_LEN(target); |
66 | const char *p; |
67 | unsigned int cl = 0; |
68 | for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) { |
69 | length++; |
70 | } |
71 | } |
72 | |
73 | grn_length = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0); |
74 | if (!grn_length) { |
75 | return NULL; |
76 | } |
77 | |
78 | GRN_UINT32_SET(ctx, grn_length, length); |
79 | |
80 | return grn_length; |
81 | } |
82 | |
83 | static grn_obj * |
84 | func_string_substring(grn_ctx *ctx, int n_args, grn_obj **args, |
85 | grn_user_data *user_data) |
86 | { |
87 | grn_obj *target; |
88 | grn_obj *from_raw; |
89 | grn_obj *length_raw = NULL; |
90 | int64_t from = 0; |
91 | int64_t length = -1; |
92 | const char *start = NULL; |
93 | const char *end = NULL; |
94 | grn_obj *substring; |
95 | |
96 | if (n_args < 2) { |
97 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
98 | "string_substring(): wrong number of arguments (%d for 2..3)" , |
99 | n_args); |
100 | return NULL; |
101 | } |
102 | |
103 | target = args[0]; |
104 | from_raw = args[1]; |
105 | if (n_args == 3) { |
106 | length_raw = args[2]; |
107 | } |
108 | |
109 | if (!(target->header.type == GRN_BULK && |
110 | grn_type_id_is_text_family(ctx, target->header.domain))) { |
111 | grn_obj inspected; |
112 | |
113 | GRN_TEXT_INIT(&inspected, 0); |
114 | grn_inspect(ctx, &inspected, target); |
115 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
116 | "string_substring(): target object must be a text bulk: " |
117 | "<%.*s>" , |
118 | (int)GRN_TEXT_LEN(&inspected), |
119 | GRN_TEXT_VALUE(&inspected)); |
120 | GRN_OBJ_FIN(ctx, &inspected); |
121 | return NULL; |
122 | } |
123 | |
124 | /* TODO: extract as grn_func_arg_int64() */ |
125 | if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) { |
126 | grn_obj inspected; |
127 | |
128 | GRN_TEXT_INIT(&inspected, 0); |
129 | grn_inspect(ctx, &inspected, from_raw); |
130 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
131 | "string_substring(): from must be a number: <%.*s>" , |
132 | (int)GRN_TEXT_LEN(&inspected), |
133 | GRN_TEXT_VALUE(&inspected)); |
134 | GRN_OBJ_FIN(ctx, &inspected); |
135 | return NULL; |
136 | } |
137 | if (from_raw->header.domain == GRN_DB_INT32) { |
138 | from = GRN_INT32_VALUE(from_raw); |
139 | } else if (from_raw->header.domain == GRN_DB_INT64) { |
140 | from = GRN_INT64_VALUE(from_raw); |
141 | } else { |
142 | grn_obj buffer; |
143 | grn_rc rc; |
144 | |
145 | GRN_INT64_INIT(&buffer, 0); |
146 | rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE); |
147 | if (rc == GRN_SUCCESS) { |
148 | from = GRN_INT64_VALUE(&buffer); |
149 | } |
150 | GRN_OBJ_FIN(ctx, &buffer); |
151 | |
152 | if (rc != GRN_SUCCESS) { |
153 | grn_obj inspected; |
154 | |
155 | GRN_TEXT_INIT(&inspected, 0); |
156 | grn_inspect(ctx, &inspected, from_raw); |
157 | GRN_PLUGIN_ERROR(ctx, rc, |
158 | "string_substring(): " |
159 | "failed to cast from value to number: <%.*s>" , |
160 | (int)GRN_TEXT_LEN(&inspected), |
161 | GRN_TEXT_VALUE(&inspected)); |
162 | GRN_OBJ_FIN(ctx, &inspected); |
163 | return NULL; |
164 | } |
165 | } |
166 | |
167 | if (length_raw) { |
168 | /* TODO: extract as grn_func_arg_int64() */ |
169 | if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) { |
170 | grn_obj inspected; |
171 | |
172 | GRN_TEXT_INIT(&inspected, 0); |
173 | grn_inspect(ctx, &inspected, length_raw); |
174 | GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT, |
175 | "string_substring(): length must be a number: <%.*s>" , |
176 | (int)GRN_TEXT_LEN(&inspected), |
177 | GRN_TEXT_VALUE(&inspected)); |
178 | GRN_OBJ_FIN(ctx, &inspected); |
179 | return NULL; |
180 | } |
181 | if (length_raw->header.domain == GRN_DB_INT32) { |
182 | length = GRN_INT32_VALUE(length_raw); |
183 | } else if (length_raw->header.domain == GRN_DB_INT64) { |
184 | length = GRN_INT64_VALUE(length_raw); |
185 | } else { |
186 | grn_obj buffer; |
187 | grn_rc rc; |
188 | |
189 | GRN_INT64_INIT(&buffer, 0); |
190 | rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE); |
191 | if (rc == GRN_SUCCESS) { |
192 | length = GRN_INT64_VALUE(&buffer); |
193 | } |
194 | GRN_OBJ_FIN(ctx, &buffer); |
195 | |
196 | if (rc != GRN_SUCCESS) { |
197 | grn_obj inspected; |
198 | |
199 | GRN_TEXT_INIT(&inspected, 0); |
200 | grn_inspect(ctx, &inspected, length_raw); |
201 | GRN_PLUGIN_ERROR(ctx, rc, |
202 | "string_substring(): " |
203 | "failed to cast length value to number: <%.*s>" , |
204 | (int)GRN_TEXT_LEN(&inspected), |
205 | GRN_TEXT_VALUE(&inspected)); |
206 | GRN_OBJ_FIN(ctx, &inspected); |
207 | return NULL; |
208 | } |
209 | } |
210 | } |
211 | |
212 | substring = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, 0); |
213 | if (!substring) { |
214 | return NULL; |
215 | } |
216 | |
217 | GRN_BULK_REWIND(substring); |
218 | |
219 | if (GRN_TEXT_LEN(target) == 0) { |
220 | return substring; |
221 | } |
222 | if (length == 0) { |
223 | return substring; |
224 | } |
225 | |
226 | while (from < 0) { |
227 | from += GRN_TEXT_LEN(target); |
228 | } |
229 | |
230 | { |
231 | const char *p; |
232 | |
233 | start = NULL; |
234 | p = GRN_TEXT_VALUE(target); |
235 | end = p + GRN_TEXT_LEN(target); |
236 | |
237 | if (from == 0) { |
238 | start = p; |
239 | } else { |
240 | unsigned int char_length = 0; |
241 | size_t n_chars = 0; |
242 | |
243 | for (; |
244 | p < end && (char_length = grn_charlen(ctx, p, end)); |
245 | p += char_length, n_chars++) { |
246 | if (n_chars == from) { |
247 | start = p; |
248 | break; |
249 | } |
250 | } |
251 | } |
252 | |
253 | if (start && length > 0) { |
254 | unsigned int char_length = 0; |
255 | size_t n_chars = 0; |
256 | |
257 | for (; |
258 | p < end && (char_length = grn_charlen(ctx, p, end)); |
259 | p += char_length, n_chars++) { |
260 | if (n_chars == length) { |
261 | end = p; |
262 | break; |
263 | } |
264 | } |
265 | } |
266 | } |
267 | |
268 | if (start) { |
269 | GRN_TEXT_SET(ctx, substring, start, end - start); |
270 | } |
271 | |
272 | return substring; |
273 | } |
274 | |
275 | grn_rc |
276 | GRN_PLUGIN_INIT(grn_ctx *ctx) |
277 | { |
278 | return ctx->rc; |
279 | } |
280 | |
281 | grn_rc |
282 | GRN_PLUGIN_REGISTER(grn_ctx *ctx) |
283 | { |
284 | grn_rc rc = GRN_SUCCESS; |
285 | |
286 | grn_proc_create(ctx, "string_length" , -1, GRN_PROC_FUNCTION, func_string_length, |
287 | NULL, NULL, 0, NULL); |
288 | |
289 | grn_proc_create(ctx, "string_substring" , -1, GRN_PROC_FUNCTION, func_string_substring, |
290 | NULL, NULL, 0, NULL); |
291 | |
292 | return rc; |
293 | } |
294 | |
295 | grn_rc |
296 | GRN_PLUGIN_FIN(grn_ctx *ctx) |
297 | { |
298 | return GRN_SUCCESS; |
299 | } |
300 | |