1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Copyright (c) 1997-2012 University of Cambridge |
10 | |
11 | ----------------------------------------------------------------------------- |
12 | Redistribution and use in source and binary forms, with or without |
13 | modification, are permitted provided that the following conditions are met: |
14 | |
15 | * Redistributions of source code must retain the above copyright notice, |
16 | this list of conditions and the following disclaimer. |
17 | |
18 | * Redistributions in binary form must reproduce the above copyright |
19 | notice, this list of conditions and the following disclaimer in the |
20 | documentation and/or other materials provided with the distribution. |
21 | |
22 | * Neither the name of the University of Cambridge nor the names of its |
23 | contributors may be used to endorse or promote products derived from |
24 | this software without specific prior written permission. |
25 | |
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
36 | POSSIBILITY OF SUCH DAMAGE. |
37 | ----------------------------------------------------------------------------- |
38 | */ |
39 | |
40 | |
41 | /* This module contains some convenience functions for extracting substrings |
42 | from the subject string after a regex match has succeeded. The original idea |
43 | for these functions came from Scott Wimer. */ |
44 | |
45 | |
46 | #include "pcre_config.h" |
47 | #include "pcre_internal.h" |
48 | |
49 | |
50 | /************************************************* |
51 | * Find number for named string * |
52 | *************************************************/ |
53 | |
54 | /* This function is used by the get_first_set() function below, as well |
55 | as being generally available. It assumes that names are unique. |
56 | |
57 | Arguments: |
58 | code the compiled regex |
59 | stringname the name whose number is required |
60 | |
61 | Returns: the number of the named parentheses, or a negative number |
62 | (PCRE_ERROR_NOSUBSTRING) if not found |
63 | */ |
64 | |
65 | #if defined COMPILE_PCRE8 |
66 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
67 | pcre_get_stringnumber(const pcre *code, const char *stringname) |
68 | #elif defined COMPILE_PCRE16 |
69 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
70 | pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname) |
71 | #elif defined COMPILE_PCRE32 |
72 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
73 | pcre32_get_stringnumber(const pcre32 *code, PCRE_SPTR32 stringname) |
74 | #endif |
75 | { |
76 | int rc; |
77 | int entrysize; |
78 | int top, bot; |
79 | pcre_uchar *nametable; |
80 | |
81 | #ifdef COMPILE_PCRE8 |
82 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
83 | return rc; |
84 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
85 | |
86 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
87 | return rc; |
88 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
89 | return rc; |
90 | #endif |
91 | #ifdef COMPILE_PCRE16 |
92 | if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
93 | return rc; |
94 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
95 | |
96 | if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
97 | return rc; |
98 | if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
99 | return rc; |
100 | #endif |
101 | #ifdef COMPILE_PCRE32 |
102 | if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
103 | return rc; |
104 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
105 | |
106 | if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
107 | return rc; |
108 | if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
109 | return rc; |
110 | #endif |
111 | |
112 | bot = 0; |
113 | while (top > bot) |
114 | { |
115 | int mid = (top + bot) / 2; |
116 | pcre_uchar *entry = nametable + entrysize*mid; |
117 | int c = STRCMP_UC_UC((pcre_uchar *)stringname, |
118 | (pcre_uchar *)(entry + IMM2_SIZE)); |
119 | if (c == 0) return GET2(entry, 0); |
120 | if (c > 0) bot = mid + 1; else top = mid; |
121 | } |
122 | |
123 | return PCRE_ERROR_NOSUBSTRING; |
124 | } |
125 | |
126 | |
127 | |
128 | /************************************************* |
129 | * Find (multiple) entries for named string * |
130 | *************************************************/ |
131 | |
132 | /* This is used by the get_first_set() function below, as well as being |
133 | generally available. It is used when duplicated names are permitted. |
134 | |
135 | Arguments: |
136 | code the compiled regex |
137 | stringname the name whose entries required |
138 | firstptr where to put the pointer to the first entry |
139 | lastptr where to put the pointer to the last entry |
140 | |
141 | Returns: the length of each entry, or a negative number |
142 | (PCRE_ERROR_NOSUBSTRING) if not found |
143 | */ |
144 | |
145 | #if defined COMPILE_PCRE8 |
146 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
147 | pcre_get_stringtable_entries(const pcre *code, const char *stringname, |
148 | char **firstptr, char **lastptr) |
149 | #elif defined COMPILE_PCRE16 |
150 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
151 | pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname, |
152 | PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr) |
153 | #elif defined COMPILE_PCRE32 |
154 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
155 | pcre32_get_stringtable_entries(const pcre32 *code, PCRE_SPTR32 stringname, |
156 | PCRE_UCHAR32 **firstptr, PCRE_UCHAR32 **lastptr) |
157 | #endif |
158 | { |
159 | int rc; |
160 | int entrysize; |
161 | int top, bot; |
162 | pcre_uchar *nametable, *lastentry; |
163 | |
164 | #ifdef COMPILE_PCRE8 |
165 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
166 | return rc; |
167 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
168 | |
169 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
170 | return rc; |
171 | if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
172 | return rc; |
173 | #endif |
174 | #ifdef COMPILE_PCRE16 |
175 | if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
176 | return rc; |
177 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
178 | |
179 | if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
180 | return rc; |
181 | if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
182 | return rc; |
183 | #endif |
184 | #ifdef COMPILE_PCRE32 |
185 | if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) |
186 | return rc; |
187 | if (top <= 0) return PCRE_ERROR_NOSUBSTRING; |
188 | |
189 | if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) |
190 | return rc; |
191 | if ((rc = pcre32_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) |
192 | return rc; |
193 | #endif |
194 | |
195 | lastentry = nametable + entrysize * (top - 1); |
196 | bot = 0; |
197 | while (top > bot) |
198 | { |
199 | int mid = (top + bot) / 2; |
200 | pcre_uchar *entry = nametable + entrysize*mid; |
201 | int c = STRCMP_UC_UC((pcre_uchar *)stringname, |
202 | (pcre_uchar *)(entry + IMM2_SIZE)); |
203 | if (c == 0) |
204 | { |
205 | pcre_uchar *first = entry; |
206 | pcre_uchar *last = entry; |
207 | while (first > nametable) |
208 | { |
209 | if (STRCMP_UC_UC((pcre_uchar *)stringname, |
210 | (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break; |
211 | first -= entrysize; |
212 | } |
213 | while (last < lastentry) |
214 | { |
215 | if (STRCMP_UC_UC((pcre_uchar *)stringname, |
216 | (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break; |
217 | last += entrysize; |
218 | } |
219 | #if defined COMPILE_PCRE8 |
220 | *firstptr = (char *)first; |
221 | *lastptr = (char *)last; |
222 | #elif defined COMPILE_PCRE16 |
223 | *firstptr = (PCRE_UCHAR16 *)first; |
224 | *lastptr = (PCRE_UCHAR16 *)last; |
225 | #elif defined COMPILE_PCRE32 |
226 | *firstptr = (PCRE_UCHAR32 *)first; |
227 | *lastptr = (PCRE_UCHAR32 *)last; |
228 | #endif |
229 | return entrysize; |
230 | } |
231 | if (c > 0) bot = mid + 1; else top = mid; |
232 | } |
233 | |
234 | return PCRE_ERROR_NOSUBSTRING; |
235 | } |
236 | |
237 | |
238 | |
239 | /************************************************* |
240 | * Find first set of multiple named strings * |
241 | *************************************************/ |
242 | |
243 | /* This function allows for duplicate names in the table of named substrings. |
244 | It returns the number of the first one that was set in a pattern match. |
245 | |
246 | Arguments: |
247 | code the compiled regex |
248 | stringname the name of the capturing substring |
249 | ovector the vector of matched substrings |
250 | stringcount number of captured substrings |
251 | |
252 | Returns: the number of the first that is set, |
253 | or the number of the last one if none are set, |
254 | or a negative number on error |
255 | */ |
256 | |
257 | #if defined COMPILE_PCRE8 |
258 | static int |
259 | get_first_set(const pcre *code, const char *stringname, int *ovector, |
260 | int stringcount) |
261 | #elif defined COMPILE_PCRE16 |
262 | static int |
263 | get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector, |
264 | int stringcount) |
265 | #elif defined COMPILE_PCRE32 |
266 | static int |
267 | get_first_set(const pcre32 *code, PCRE_SPTR32 stringname, int *ovector, |
268 | int stringcount) |
269 | #endif |
270 | { |
271 | const REAL_PCRE *re = (const REAL_PCRE *)code; |
272 | int entrysize; |
273 | pcre_uchar *entry; |
274 | #if defined COMPILE_PCRE8 |
275 | char *first, *last; |
276 | #elif defined COMPILE_PCRE16 |
277 | PCRE_UCHAR16 *first, *last; |
278 | #elif defined COMPILE_PCRE32 |
279 | PCRE_UCHAR32 *first, *last; |
280 | #endif |
281 | |
282 | #if defined COMPILE_PCRE8 |
283 | if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) |
284 | return pcre_get_stringnumber(code, stringname); |
285 | entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); |
286 | #elif defined COMPILE_PCRE16 |
287 | if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) |
288 | return pcre16_get_stringnumber(code, stringname); |
289 | entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last); |
290 | #elif defined COMPILE_PCRE32 |
291 | if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) |
292 | return pcre32_get_stringnumber(code, stringname); |
293 | entrysize = pcre32_get_stringtable_entries(code, stringname, &first, &last); |
294 | #endif |
295 | if (entrysize <= 0) return entrysize; |
296 | for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) |
297 | { |
298 | int n = GET2(entry, 0); |
299 | if (n < stringcount && ovector[n*2] >= 0) return n; |
300 | } |
301 | return GET2(entry, 0); |
302 | } |
303 | |
304 | |
305 | |
306 | |
307 | /************************************************* |
308 | * Copy captured string to given buffer * |
309 | *************************************************/ |
310 | |
311 | /* This function copies a single captured substring into a given buffer. |
312 | Note that we use memcpy() rather than strncpy() in case there are binary zeros |
313 | in the string. |
314 | |
315 | Arguments: |
316 | subject the subject string that was matched |
317 | ovector pointer to the offsets table |
318 | stringcount the number of substrings that were captured |
319 | (i.e. the yield of the pcre_exec call, unless |
320 | that was zero, in which case it should be 1/3 |
321 | of the offset table size) |
322 | stringnumber the number of the required substring |
323 | buffer where to put the substring |
324 | size the size of the buffer |
325 | |
326 | Returns: if successful: |
327 | the length of the copied string, not including the zero |
328 | that is put on the end; can be zero |
329 | if not successful: |
330 | PCRE_ERROR_NOMEMORY (-6) buffer too small |
331 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring |
332 | */ |
333 | |
334 | #if defined COMPILE_PCRE8 |
335 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
336 | pcre_copy_substring(const char *subject, int *ovector, int stringcount, |
337 | int stringnumber, char *buffer, int size) |
338 | #elif defined COMPILE_PCRE16 |
339 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
340 | pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, |
341 | int stringnumber, PCRE_UCHAR16 *buffer, int size) |
342 | #elif defined COMPILE_PCRE32 |
343 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
344 | pcre32_copy_substring(PCRE_SPTR32 subject, int *ovector, int stringcount, |
345 | int stringnumber, PCRE_UCHAR32 *buffer, int size) |
346 | #endif |
347 | { |
348 | int yield; |
349 | if (stringnumber < 0 || stringnumber >= stringcount) |
350 | return PCRE_ERROR_NOSUBSTRING; |
351 | stringnumber *= 2; |
352 | yield = ovector[stringnumber+1] - ovector[stringnumber]; |
353 | if (size < yield + 1) return PCRE_ERROR_NOMEMORY; |
354 | memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield)); |
355 | buffer[yield] = 0; |
356 | return yield; |
357 | } |
358 | |
359 | |
360 | |
361 | /************************************************* |
362 | * Copy named captured string to given buffer * |
363 | *************************************************/ |
364 | |
365 | /* This function copies a single captured substring into a given buffer, |
366 | identifying it by name. If the regex permits duplicate names, the first |
367 | substring that is set is chosen. |
368 | |
369 | Arguments: |
370 | code the compiled regex |
371 | subject the subject string that was matched |
372 | ovector pointer to the offsets table |
373 | stringcount the number of substrings that were captured |
374 | (i.e. the yield of the pcre_exec call, unless |
375 | that was zero, in which case it should be 1/3 |
376 | of the offset table size) |
377 | stringname the name of the required substring |
378 | buffer where to put the substring |
379 | size the size of the buffer |
380 | |
381 | Returns: if successful: |
382 | the length of the copied string, not including the zero |
383 | that is put on the end; can be zero |
384 | if not successful: |
385 | PCRE_ERROR_NOMEMORY (-6) buffer too small |
386 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring |
387 | */ |
388 | |
389 | #if defined COMPILE_PCRE8 |
390 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
391 | pcre_copy_named_substring(const pcre *code, const char *subject, |
392 | int *ovector, int stringcount, const char *stringname, |
393 | char *buffer, int size) |
394 | #elif defined COMPILE_PCRE16 |
395 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
396 | pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject, |
397 | int *ovector, int stringcount, PCRE_SPTR16 stringname, |
398 | PCRE_UCHAR16 *buffer, int size) |
399 | #elif defined COMPILE_PCRE32 |
400 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
401 | pcre32_copy_named_substring(const pcre32 *code, PCRE_SPTR32 subject, |
402 | int *ovector, int stringcount, PCRE_SPTR32 stringname, |
403 | PCRE_UCHAR32 *buffer, int size) |
404 | #endif |
405 | { |
406 | int n = get_first_set(code, stringname, ovector, stringcount); |
407 | if (n <= 0) return n; |
408 | #if defined COMPILE_PCRE8 |
409 | return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); |
410 | #elif defined COMPILE_PCRE16 |
411 | return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size); |
412 | #elif defined COMPILE_PCRE32 |
413 | return pcre32_copy_substring(subject, ovector, stringcount, n, buffer, size); |
414 | #endif |
415 | } |
416 | |
417 | |
418 | |
419 | /************************************************* |
420 | * Copy all captured strings to new store * |
421 | *************************************************/ |
422 | |
423 | /* This function gets one chunk of store and builds a list of pointers and all |
424 | of the captured substrings in it. A NULL pointer is put on the end of the list. |
425 | |
426 | Arguments: |
427 | subject the subject string that was matched |
428 | ovector pointer to the offsets table |
429 | stringcount the number of substrings that were captured |
430 | (i.e. the yield of the pcre_exec call, unless |
431 | that was zero, in which case it should be 1/3 |
432 | of the offset table size) |
433 | listptr set to point to the list of pointers |
434 | |
435 | Returns: if successful: 0 |
436 | if not successful: |
437 | PCRE_ERROR_NOMEMORY (-6) failed to get store |
438 | */ |
439 | |
440 | #if defined COMPILE_PCRE8 |
441 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
442 | pcre_get_substring_list(const char *subject, int *ovector, int stringcount, |
443 | const char ***listptr) |
444 | #elif defined COMPILE_PCRE16 |
445 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
446 | pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount, |
447 | PCRE_SPTR16 **listptr) |
448 | #elif defined COMPILE_PCRE32 |
449 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
450 | pcre32_get_substring_list(PCRE_SPTR32 subject, int *ovector, int stringcount, |
451 | PCRE_SPTR32 **listptr) |
452 | #endif |
453 | { |
454 | int i; |
455 | int size = sizeof(pcre_uchar *); |
456 | int double_count = stringcount * 2; |
457 | pcre_uchar **stringlist; |
458 | pcre_uchar *p; |
459 | |
460 | for (i = 0; i < double_count; i += 2) |
461 | { |
462 | size += sizeof(pcre_uchar *) + IN_UCHARS(1); |
463 | if (ovector[i+1] > ovector[i]) size += IN_UCHARS(ovector[i+1] - ovector[i]); |
464 | } |
465 | |
466 | stringlist = (pcre_uchar **)(PUBL(malloc))(size); |
467 | if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; |
468 | |
469 | #if defined COMPILE_PCRE8 |
470 | *listptr = (const char **)stringlist; |
471 | #elif defined COMPILE_PCRE16 |
472 | *listptr = (PCRE_SPTR16 *)stringlist; |
473 | #elif defined COMPILE_PCRE32 |
474 | *listptr = (PCRE_SPTR32 *)stringlist; |
475 | #endif |
476 | p = (pcre_uchar *)(stringlist + stringcount + 1); |
477 | |
478 | for (i = 0; i < double_count; i += 2) |
479 | { |
480 | int len = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0; |
481 | memcpy(p, subject + ovector[i], IN_UCHARS(len)); |
482 | *stringlist++ = p; |
483 | p += len; |
484 | *p++ = 0; |
485 | } |
486 | |
487 | *stringlist = NULL; |
488 | return 0; |
489 | } |
490 | |
491 | |
492 | |
493 | /************************************************* |
494 | * Free store obtained by get_substring_list * |
495 | *************************************************/ |
496 | |
497 | /* This function exists for the benefit of people calling PCRE from non-C |
498 | programs that can call its functions, but not free() or (PUBL(free))() |
499 | directly. |
500 | |
501 | Argument: the result of a previous pcre_get_substring_list() |
502 | Returns: nothing |
503 | */ |
504 | |
505 | #if defined COMPILE_PCRE8 |
506 | PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
507 | pcre_free_substring_list(const char **pointer) |
508 | #elif defined COMPILE_PCRE16 |
509 | PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
510 | pcre16_free_substring_list(PCRE_SPTR16 *pointer) |
511 | #elif defined COMPILE_PCRE32 |
512 | PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
513 | pcre32_free_substring_list(PCRE_SPTR32 *pointer) |
514 | #endif |
515 | { |
516 | (PUBL(free))((void *)pointer); |
517 | } |
518 | |
519 | |
520 | |
521 | /************************************************* |
522 | * Copy captured string to new store * |
523 | *************************************************/ |
524 | |
525 | /* This function copies a single captured substring into a piece of new |
526 | store |
527 | |
528 | Arguments: |
529 | subject the subject string that was matched |
530 | ovector pointer to the offsets table |
531 | stringcount the number of substrings that were captured |
532 | (i.e. the yield of the pcre_exec call, unless |
533 | that was zero, in which case it should be 1/3 |
534 | of the offset table size) |
535 | stringnumber the number of the required substring |
536 | stringptr where to put a pointer to the substring |
537 | |
538 | Returns: if successful: |
539 | the length of the string, not including the zero that |
540 | is put on the end; can be zero |
541 | if not successful: |
542 | PCRE_ERROR_NOMEMORY (-6) failed to get store |
543 | PCRE_ERROR_NOSUBSTRING (-7) substring not present |
544 | */ |
545 | |
546 | #if defined COMPILE_PCRE8 |
547 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
548 | pcre_get_substring(const char *subject, int *ovector, int stringcount, |
549 | int stringnumber, const char **stringptr) |
550 | #elif defined COMPILE_PCRE16 |
551 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
552 | pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, |
553 | int stringnumber, PCRE_SPTR16 *stringptr) |
554 | #elif defined COMPILE_PCRE32 |
555 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
556 | pcre32_get_substring(PCRE_SPTR32 subject, int *ovector, int stringcount, |
557 | int stringnumber, PCRE_SPTR32 *stringptr) |
558 | #endif |
559 | { |
560 | int yield; |
561 | pcre_uchar *substring; |
562 | if (stringnumber < 0 || stringnumber >= stringcount) |
563 | return PCRE_ERROR_NOSUBSTRING; |
564 | stringnumber *= 2; |
565 | yield = ovector[stringnumber+1] - ovector[stringnumber]; |
566 | substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1)); |
567 | if (substring == NULL) return PCRE_ERROR_NOMEMORY; |
568 | memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield)); |
569 | substring[yield] = 0; |
570 | #if defined COMPILE_PCRE8 |
571 | *stringptr = (const char *)substring; |
572 | #elif defined COMPILE_PCRE16 |
573 | *stringptr = (PCRE_SPTR16)substring; |
574 | #elif defined COMPILE_PCRE32 |
575 | *stringptr = (PCRE_SPTR32)substring; |
576 | #endif |
577 | return yield; |
578 | } |
579 | |
580 | |
581 | |
582 | /************************************************* |
583 | * Copy named captured string to new store * |
584 | *************************************************/ |
585 | |
586 | /* This function copies a single captured substring, identified by name, into |
587 | new store. If the regex permits duplicate names, the first substring that is |
588 | set is chosen. |
589 | |
590 | Arguments: |
591 | code the compiled regex |
592 | subject the subject string that was matched |
593 | ovector pointer to the offsets table |
594 | stringcount the number of substrings that were captured |
595 | (i.e. the yield of the pcre_exec call, unless |
596 | that was zero, in which case it should be 1/3 |
597 | of the offset table size) |
598 | stringname the name of the required substring |
599 | stringptr where to put the pointer |
600 | |
601 | Returns: if successful: |
602 | the length of the copied string, not including the zero |
603 | that is put on the end; can be zero |
604 | if not successful: |
605 | PCRE_ERROR_NOMEMORY (-6) couldn't get memory |
606 | PCRE_ERROR_NOSUBSTRING (-7) no such captured substring |
607 | */ |
608 | |
609 | #if defined COMPILE_PCRE8 |
610 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
611 | pcre_get_named_substring(const pcre *code, const char *subject, |
612 | int *ovector, int stringcount, const char *stringname, |
613 | const char **stringptr) |
614 | #elif defined COMPILE_PCRE16 |
615 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
616 | pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject, |
617 | int *ovector, int stringcount, PCRE_SPTR16 stringname, |
618 | PCRE_SPTR16 *stringptr) |
619 | #elif defined COMPILE_PCRE32 |
620 | PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
621 | pcre32_get_named_substring(const pcre32 *code, PCRE_SPTR32 subject, |
622 | int *ovector, int stringcount, PCRE_SPTR32 stringname, |
623 | PCRE_SPTR32 *stringptr) |
624 | #endif |
625 | { |
626 | int n = get_first_set(code, stringname, ovector, stringcount); |
627 | if (n <= 0) return n; |
628 | #if defined COMPILE_PCRE8 |
629 | return pcre_get_substring(subject, ovector, stringcount, n, stringptr); |
630 | #elif defined COMPILE_PCRE16 |
631 | return pcre16_get_substring(subject, ovector, stringcount, n, stringptr); |
632 | #elif defined COMPILE_PCRE32 |
633 | return pcre32_get_substring(subject, ovector, stringcount, n, stringptr); |
634 | #endif |
635 | } |
636 | |
637 | |
638 | |
639 | |
640 | /************************************************* |
641 | * Free store obtained by get_substring * |
642 | *************************************************/ |
643 | |
644 | /* This function exists for the benefit of people calling PCRE from non-C |
645 | programs that can call its functions, but not free() or (PUBL(free))() |
646 | directly. |
647 | |
648 | Argument: the result of a previous pcre_get_substring() |
649 | Returns: nothing |
650 | */ |
651 | |
652 | #if defined COMPILE_PCRE8 |
653 | PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
654 | pcre_free_substring(const char *pointer) |
655 | #elif defined COMPILE_PCRE16 |
656 | PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
657 | pcre16_free_substring(PCRE_SPTR16 pointer) |
658 | #elif defined COMPILE_PCRE32 |
659 | PCRE_EXP_DEFN void PCRE_CALL_CONVENTION |
660 | pcre32_free_substring(PCRE_SPTR32 pointer) |
661 | #endif |
662 | { |
663 | (PUBL(free))((void *)pointer); |
664 | } |
665 | |
666 | /* End of pcre_get.c */ |
667 | |