1/* -*- c-basic-offset: 2 -*- */
2/*
3 Copyright(C) 2016 Brazil
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License version 2.1 as published by the Free Software Foundation.
8
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with this library; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17*/
18
19#ifdef GRN_EMBEDDED
20# define GRN_PLUGIN_FUNCTION_TAG functions_string
21#endif
22
23#include <groonga/plugin.h>
24
25/*
26 * func_string_length() returns the number of characters in a string.
27 * If the string contains an invalid byte sequence, this function returns the
28 * number of characters before the invalid byte sequence.
29 */
30static grn_obj *
31func_string_length(grn_ctx *ctx, int n_args, grn_obj **args,
32 grn_user_data *user_data)
33{
34 grn_obj *target;
35 unsigned int length = 0;
36 grn_obj *grn_length;
37
38 if (n_args != 1) {
39 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
40 "string_length(): wrong number of arguments (%d for 1)",
41 n_args);
42 return NULL;
43 }
44
45 target = args[0];
46 if (!(target->header.type == GRN_BULK &&
47 ((target->header.domain == GRN_DB_SHORT_TEXT) ||
48 (target->header.domain == GRN_DB_TEXT) ||
49 (target->header.domain == GRN_DB_LONG_TEXT)))) {
50 grn_obj inspected;
51
52 GRN_TEXT_INIT(&inspected, 0);
53 grn_inspect(ctx, &inspected, target);
54 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
55 "string_length(): target object must be a text bulk: "
56 "<%.*s>",
57 (int)GRN_TEXT_LEN(&inspected),
58 GRN_TEXT_VALUE(&inspected));
59 GRN_OBJ_FIN(ctx, &inspected);
60 return NULL;
61 }
62
63 {
64 const char *s = GRN_TEXT_VALUE(target);
65 const char *e = GRN_TEXT_VALUE(target) + GRN_TEXT_LEN(target);
66 const char *p;
67 unsigned int cl = 0;
68 for (p = s; p < e && (cl = grn_charlen(ctx, p, e)); p += cl) {
69 length++;
70 }
71 }
72
73 grn_length = grn_plugin_proc_alloc(ctx, user_data, GRN_DB_UINT32, 0);
74 if (!grn_length) {
75 return NULL;
76 }
77
78 GRN_UINT32_SET(ctx, grn_length, length);
79
80 return grn_length;
81}
82
83static grn_obj *
84func_string_substring(grn_ctx *ctx, int n_args, grn_obj **args,
85 grn_user_data *user_data)
86{
87 grn_obj *target;
88 grn_obj *from_raw;
89 grn_obj *length_raw = NULL;
90 int64_t from = 0;
91 int64_t length = -1;
92 const char *start = NULL;
93 const char *end = NULL;
94 grn_obj *substring;
95
96 if (n_args < 2) {
97 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
98 "string_substring(): wrong number of arguments (%d for 2..3)",
99 n_args);
100 return NULL;
101 }
102
103 target = args[0];
104 from_raw = args[1];
105 if (n_args == 3) {
106 length_raw = args[2];
107 }
108
109 if (!(target->header.type == GRN_BULK &&
110 grn_type_id_is_text_family(ctx, target->header.domain))) {
111 grn_obj inspected;
112
113 GRN_TEXT_INIT(&inspected, 0);
114 grn_inspect(ctx, &inspected, target);
115 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
116 "string_substring(): target object must be a text bulk: "
117 "<%.*s>",
118 (int)GRN_TEXT_LEN(&inspected),
119 GRN_TEXT_VALUE(&inspected));
120 GRN_OBJ_FIN(ctx, &inspected);
121 return NULL;
122 }
123
124 /* TODO: extract as grn_func_arg_int64() */
125 if (!grn_type_id_is_number_family(ctx, from_raw->header.domain)) {
126 grn_obj inspected;
127
128 GRN_TEXT_INIT(&inspected, 0);
129 grn_inspect(ctx, &inspected, from_raw);
130 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
131 "string_substring(): from must be a number: <%.*s>",
132 (int)GRN_TEXT_LEN(&inspected),
133 GRN_TEXT_VALUE(&inspected));
134 GRN_OBJ_FIN(ctx, &inspected);
135 return NULL;
136 }
137 if (from_raw->header.domain == GRN_DB_INT32) {
138 from = GRN_INT32_VALUE(from_raw);
139 } else if (from_raw->header.domain == GRN_DB_INT64) {
140 from = GRN_INT64_VALUE(from_raw);
141 } else {
142 grn_obj buffer;
143 grn_rc rc;
144
145 GRN_INT64_INIT(&buffer, 0);
146 rc = grn_obj_cast(ctx, from_raw, &buffer, GRN_FALSE);
147 if (rc == GRN_SUCCESS) {
148 from = GRN_INT64_VALUE(&buffer);
149 }
150 GRN_OBJ_FIN(ctx, &buffer);
151
152 if (rc != GRN_SUCCESS) {
153 grn_obj inspected;
154
155 GRN_TEXT_INIT(&inspected, 0);
156 grn_inspect(ctx, &inspected, from_raw);
157 GRN_PLUGIN_ERROR(ctx, rc,
158 "string_substring(): "
159 "failed to cast from value to number: <%.*s>",
160 (int)GRN_TEXT_LEN(&inspected),
161 GRN_TEXT_VALUE(&inspected));
162 GRN_OBJ_FIN(ctx, &inspected);
163 return NULL;
164 }
165 }
166
167 if (length_raw) {
168 /* TODO: extract as grn_func_arg_int64() */
169 if (!grn_type_id_is_number_family(ctx, length_raw->header.domain)) {
170 grn_obj inspected;
171
172 GRN_TEXT_INIT(&inspected, 0);
173 grn_inspect(ctx, &inspected, length_raw);
174 GRN_PLUGIN_ERROR(ctx, GRN_INVALID_ARGUMENT,
175 "string_substring(): length must be a number: <%.*s>",
176 (int)GRN_TEXT_LEN(&inspected),
177 GRN_TEXT_VALUE(&inspected));
178 GRN_OBJ_FIN(ctx, &inspected);
179 return NULL;
180 }
181 if (length_raw->header.domain == GRN_DB_INT32) {
182 length = GRN_INT32_VALUE(length_raw);
183 } else if (length_raw->header.domain == GRN_DB_INT64) {
184 length = GRN_INT64_VALUE(length_raw);
185 } else {
186 grn_obj buffer;
187 grn_rc rc;
188
189 GRN_INT64_INIT(&buffer, 0);
190 rc = grn_obj_cast(ctx, length_raw, &buffer, GRN_FALSE);
191 if (rc == GRN_SUCCESS) {
192 length = GRN_INT64_VALUE(&buffer);
193 }
194 GRN_OBJ_FIN(ctx, &buffer);
195
196 if (rc != GRN_SUCCESS) {
197 grn_obj inspected;
198
199 GRN_TEXT_INIT(&inspected, 0);
200 grn_inspect(ctx, &inspected, length_raw);
201 GRN_PLUGIN_ERROR(ctx, rc,
202 "string_substring(): "
203 "failed to cast length value to number: <%.*s>",
204 (int)GRN_TEXT_LEN(&inspected),
205 GRN_TEXT_VALUE(&inspected));
206 GRN_OBJ_FIN(ctx, &inspected);
207 return NULL;
208 }
209 }
210 }
211
212 substring = grn_plugin_proc_alloc(ctx, user_data, target->header.domain, 0);
213 if (!substring) {
214 return NULL;
215 }
216
217 GRN_BULK_REWIND(substring);
218
219 if (GRN_TEXT_LEN(target) == 0) {
220 return substring;
221 }
222 if (length == 0) {
223 return substring;
224 }
225
226 while (from < 0) {
227 from += GRN_TEXT_LEN(target);
228 }
229
230 {
231 const char *p;
232
233 start = NULL;
234 p = GRN_TEXT_VALUE(target);
235 end = p + GRN_TEXT_LEN(target);
236
237 if (from == 0) {
238 start = p;
239 } else {
240 unsigned int char_length = 0;
241 size_t n_chars = 0;
242
243 for (;
244 p < end && (char_length = grn_charlen(ctx, p, end));
245 p += char_length, n_chars++) {
246 if (n_chars == from) {
247 start = p;
248 break;
249 }
250 }
251 }
252
253 if (start && length > 0) {
254 unsigned int char_length = 0;
255 size_t n_chars = 0;
256
257 for (;
258 p < end && (char_length = grn_charlen(ctx, p, end));
259 p += char_length, n_chars++) {
260 if (n_chars == length) {
261 end = p;
262 break;
263 }
264 }
265 }
266 }
267
268 if (start) {
269 GRN_TEXT_SET(ctx, substring, start, end - start);
270 }
271
272 return substring;
273}
274
275grn_rc
276GRN_PLUGIN_INIT(grn_ctx *ctx)
277{
278 return ctx->rc;
279}
280
281grn_rc
282GRN_PLUGIN_REGISTER(grn_ctx *ctx)
283{
284 grn_rc rc = GRN_SUCCESS;
285
286 grn_proc_create(ctx, "string_length", -1, GRN_PROC_FUNCTION, func_string_length,
287 NULL, NULL, 0, NULL);
288
289 grn_proc_create(ctx, "string_substring", -1, GRN_PROC_FUNCTION, func_string_substring,
290 NULL, NULL, 0, NULL);
291
292 return rc;
293}
294
295grn_rc
296GRN_PLUGIN_FIN(grn_ctx *ctx)
297{
298 return GRN_SUCCESS;
299}
300