| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (C) 2004-2016, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | * file name: uregex.h |
| 9 | * encoding: UTF-8 |
| 10 | * indentation:4 |
| 11 | * |
| 12 | * created on: 2004mar09 |
| 13 | * created by: Andy Heninger |
| 14 | * |
| 15 | * ICU Regular Expressions, API for C |
| 16 | */ |
| 17 | |
| 18 | /** |
| 19 | * \file |
| 20 | * \brief C API: Regular Expressions |
| 21 | * |
| 22 | * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p> |
| 23 | */ |
| 24 | |
| 25 | #ifndef UREGEX_H |
| 26 | #define UREGEX_H |
| 27 | |
| 28 | #include "unicode/utext.h" |
| 29 | #include "unicode/utypes.h" |
| 30 | |
| 31 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 32 | |
| 33 | #include "unicode/localpointer.h" |
| 34 | #include "unicode/parseerr.h" |
| 35 | |
| 36 | struct URegularExpression; |
| 37 | /** |
| 38 | * Structure representing a compiled regular expression, plus the results |
| 39 | * of a match operation. |
| 40 | * @stable ICU 3.0 |
| 41 | */ |
| 42 | typedef struct URegularExpression URegularExpression; |
| 43 | |
| 44 | |
| 45 | /** |
| 46 | * Constants for Regular Expression Match Modes. |
| 47 | * @stable ICU 2.4 |
| 48 | */ |
| 49 | typedef enum URegexpFlag{ |
| 50 | |
| 51 | #ifndef U_HIDE_DRAFT_API |
| 52 | /** Forces normalization of pattern and strings. |
| 53 | Not implemented yet, just a placeholder, hence draft. |
| 54 | @draft ICU 2.4 */ |
| 55 | UREGEX_CANON_EQ = 128, |
| 56 | #endif /* U_HIDE_DRAFT_API */ |
| 57 | /** Enable case insensitive matching. @stable ICU 2.4 */ |
| 58 | UREGEX_CASE_INSENSITIVE = 2, |
| 59 | |
| 60 | /** Allow white space and comments within patterns @stable ICU 2.4 */ |
| 61 | = 4, |
| 62 | |
| 63 | /** If set, '.' matches line terminators, otherwise '.' matching stops at line end. |
| 64 | * @stable ICU 2.4 */ |
| 65 | UREGEX_DOTALL = 32, |
| 66 | |
| 67 | /** If set, treat the entire pattern as a literal string. |
| 68 | * Metacharacters or escape sequences in the input sequence will be given |
| 69 | * no special meaning. |
| 70 | * |
| 71 | * The flag UREGEX_CASE_INSENSITIVE retains its impact |
| 72 | * on matching when used in conjunction with this flag. |
| 73 | * The other flags become superfluous. |
| 74 | * |
| 75 | * @stable ICU 4.0 |
| 76 | */ |
| 77 | UREGEX_LITERAL = 16, |
| 78 | |
| 79 | /** Control behavior of "$" and "^" |
| 80 | * If set, recognize line terminators within string, |
| 81 | * otherwise, match only at start and end of input string. |
| 82 | * @stable ICU 2.4 */ |
| 83 | UREGEX_MULTILINE = 8, |
| 84 | |
| 85 | /** Unix-only line endings. |
| 86 | * When this mode is enabled, only \\u000a is recognized as a line ending |
| 87 | * in the behavior of ., ^, and $. |
| 88 | * @stable ICU 4.0 |
| 89 | */ |
| 90 | UREGEX_UNIX_LINES = 1, |
| 91 | |
| 92 | /** Unicode word boundaries. |
| 93 | * If set, \b uses the Unicode TR 29 definition of word boundaries. |
| 94 | * Warning: Unicode word boundaries are quite different from |
| 95 | * traditional regular expression word boundaries. See |
| 96 | * http://unicode.org/reports/tr29/#Word_Boundaries |
| 97 | * @stable ICU 2.8 |
| 98 | */ |
| 99 | UREGEX_UWORD = 256, |
| 100 | |
| 101 | /** Error on Unrecognized backslash escapes. |
| 102 | * If set, fail with an error on patterns that contain |
| 103 | * backslash-escaped ASCII letters without a known special |
| 104 | * meaning. If this flag is not set, these |
| 105 | * escaped letters represent themselves. |
| 106 | * @stable ICU 4.0 |
| 107 | */ |
| 108 | UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512 |
| 109 | |
| 110 | } URegexpFlag; |
| 111 | |
| 112 | /** |
| 113 | * Open (compile) an ICU regular expression. Compiles the regular expression in |
| 114 | * string form into an internal representation using the specified match mode flags. |
| 115 | * The resulting regular expression handle can then be used to perform various |
| 116 | * matching operations. |
| 117 | * |
| 118 | * |
| 119 | * @param pattern The Regular Expression pattern to be compiled. |
| 120 | * @param patternLength The length of the pattern, or -1 if the pattern is |
| 121 | * NUL terminated. |
| 122 | * @param flags Flags that alter the default matching behavior for |
| 123 | * the regular expression, UREGEX_CASE_INSENSITIVE, for |
| 124 | * example. For default behavior, set this parameter to zero. |
| 125 | * See <code>enum URegexpFlag</code>. All desired flags |
| 126 | * are bitwise-ORed together. |
| 127 | * @param pe Receives the position (line and column numbers) of any syntax |
| 128 | * error within the source regular expression string. If this |
| 129 | * information is not wanted, pass NULL for this parameter. |
| 130 | * @param status Receives error detected by this function. |
| 131 | * @stable ICU 3.0 |
| 132 | * |
| 133 | */ |
| 134 | U_STABLE URegularExpression * U_EXPORT2 |
| 135 | uregex_open( const UChar *pattern, |
| 136 | int32_t patternLength, |
| 137 | uint32_t flags, |
| 138 | UParseError *pe, |
| 139 | UErrorCode *status); |
| 140 | |
| 141 | /** |
| 142 | * Open (compile) an ICU regular expression. Compiles the regular expression in |
| 143 | * string form into an internal representation using the specified match mode flags. |
| 144 | * The resulting regular expression handle can then be used to perform various |
| 145 | * matching operations. |
| 146 | * <p> |
| 147 | * The contents of the pattern UText will be extracted and saved. Ownership of the |
| 148 | * UText struct itself remains with the caller. This is to match the behavior of |
| 149 | * uregex_open(). |
| 150 | * |
| 151 | * @param pattern The Regular Expression pattern to be compiled. |
| 152 | * @param flags Flags that alter the default matching behavior for |
| 153 | * the regular expression, UREGEX_CASE_INSENSITIVE, for |
| 154 | * example. For default behavior, set this parameter to zero. |
| 155 | * See <code>enum URegexpFlag</code>. All desired flags |
| 156 | * are bitwise-ORed together. |
| 157 | * @param pe Receives the position (line and column numbers) of any syntax |
| 158 | * error within the source regular expression string. If this |
| 159 | * information is not wanted, pass NULL for this parameter. |
| 160 | * @param status Receives error detected by this function. |
| 161 | * |
| 162 | * @stable ICU 4.6 |
| 163 | */ |
| 164 | U_STABLE URegularExpression * U_EXPORT2 |
| 165 | uregex_openUText(UText *pattern, |
| 166 | uint32_t flags, |
| 167 | UParseError *pe, |
| 168 | UErrorCode *status); |
| 169 | |
| 170 | #if !UCONFIG_NO_CONVERSION |
| 171 | /** |
| 172 | * Open (compile) an ICU regular expression. The resulting regular expression |
| 173 | * handle can then be used to perform various matching operations. |
| 174 | * <p> |
| 175 | * This function is the same as uregex_open, except that the pattern |
| 176 | * is supplied as an 8 bit char * string in the default code page. |
| 177 | * |
| 178 | * @param pattern The Regular Expression pattern to be compiled, |
| 179 | * NUL terminated. |
| 180 | * @param flags Flags that alter the default matching behavior for |
| 181 | * the regular expression, UREGEX_CASE_INSENSITIVE, for |
| 182 | * example. For default behavior, set this parameter to zero. |
| 183 | * See <code>enum URegexpFlag</code>. All desired flags |
| 184 | * are bitwise-ORed together. |
| 185 | * @param pe Receives the position (line and column numbers) of any syntax |
| 186 | * error within the source regular expression string. If this |
| 187 | * information is not wanted, pass NULL for this parameter. |
| 188 | * @param status Receives errors detected by this function. |
| 189 | * @return The URegularExpression object representing the compiled |
| 190 | * pattern. |
| 191 | * |
| 192 | * @stable ICU 3.0 |
| 193 | */ |
| 194 | U_STABLE URegularExpression * U_EXPORT2 |
| 195 | uregex_openC( const char *pattern, |
| 196 | uint32_t flags, |
| 197 | UParseError *pe, |
| 198 | UErrorCode *status); |
| 199 | #endif |
| 200 | |
| 201 | |
| 202 | |
| 203 | /** |
| 204 | * Close the regular expression, recovering all resources (memory) it |
| 205 | * was holding. |
| 206 | * |
| 207 | * @param regexp The regular expression to be closed. |
| 208 | * @stable ICU 3.0 |
| 209 | */ |
| 210 | U_STABLE void U_EXPORT2 |
| 211 | uregex_close(URegularExpression *regexp); |
| 212 | |
| 213 | #if U_SHOW_CPLUSPLUS_API |
| 214 | |
| 215 | U_NAMESPACE_BEGIN |
| 216 | |
| 217 | /** |
| 218 | * \class LocalURegularExpressionPointer |
| 219 | * "Smart pointer" class, closes a URegularExpression via uregex_close(). |
| 220 | * For most methods see the LocalPointerBase base class. |
| 221 | * |
| 222 | * @see LocalPointerBase |
| 223 | * @see LocalPointer |
| 224 | * @stable ICU 4.4 |
| 225 | */ |
| 226 | U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close); |
| 227 | |
| 228 | U_NAMESPACE_END |
| 229 | |
| 230 | #endif |
| 231 | |
| 232 | /** |
| 233 | * Make a copy of a compiled regular expression. Cloning a regular |
| 234 | * expression is faster than opening a second instance from the source |
| 235 | * form of the expression, and requires less memory. |
| 236 | * <p> |
| 237 | * Note that the current input string and the position of any matched text |
| 238 | * within it are not cloned; only the pattern itself and the |
| 239 | * match mode flags are copied. |
| 240 | * <p> |
| 241 | * Cloning can be particularly useful to threaded applications that perform |
| 242 | * multiple match operations in parallel. Each concurrent RE |
| 243 | * operation requires its own instance of a URegularExpression. |
| 244 | * |
| 245 | * @param regexp The compiled regular expression to be cloned. |
| 246 | * @param status Receives indication of any errors encountered |
| 247 | * @return the cloned copy of the compiled regular expression. |
| 248 | * @stable ICU 3.0 |
| 249 | */ |
| 250 | U_STABLE URegularExpression * U_EXPORT2 |
| 251 | uregex_clone(const URegularExpression *regexp, UErrorCode *status); |
| 252 | |
| 253 | /** |
| 254 | * Returns a pointer to the source form of the pattern for this regular expression. |
| 255 | * This function will work even if the pattern was originally specified as a UText. |
| 256 | * |
| 257 | * @param regexp The compiled regular expression. |
| 258 | * @param patLength This output parameter will be set to the length of the |
| 259 | * pattern string. A NULL pointer may be used here if the |
| 260 | * pattern length is not needed, as would be the case if |
| 261 | * the pattern is known in advance to be a NUL terminated |
| 262 | * string. |
| 263 | * @param status Receives errors detected by this function. |
| 264 | * @return a pointer to the pattern string. The storage for the string is |
| 265 | * owned by the regular expression object, and must not be |
| 266 | * altered or deleted by the application. The returned string |
| 267 | * will remain valid until the regular expression is closed. |
| 268 | * @stable ICU 3.0 |
| 269 | */ |
| 270 | U_STABLE const UChar * U_EXPORT2 |
| 271 | uregex_pattern(const URegularExpression *regexp, |
| 272 | int32_t *patLength, |
| 273 | UErrorCode *status); |
| 274 | |
| 275 | /** |
| 276 | * Returns the source text of the pattern for this regular expression. |
| 277 | * This function will work even if the pattern was originally specified as a UChar string. |
| 278 | * |
| 279 | * @param regexp The compiled regular expression. |
| 280 | * @param status Receives errors detected by this function. |
| 281 | * @return the pattern text. The storage for the text is owned by the regular expression |
| 282 | * object, and must not be altered or deleted. |
| 283 | * |
| 284 | * @stable ICU 4.6 |
| 285 | */ |
| 286 | U_STABLE UText * U_EXPORT2 |
| 287 | uregex_patternUText(const URegularExpression *regexp, |
| 288 | UErrorCode *status); |
| 289 | |
| 290 | /** |
| 291 | * Get the match mode flags that were specified when compiling this regular expression. |
| 292 | * @param status Receives errors detected by this function. |
| 293 | * @param regexp The compiled regular expression. |
| 294 | * @return The match mode flags |
| 295 | * @see URegexpFlag |
| 296 | * @stable ICU 3.0 |
| 297 | */ |
| 298 | U_STABLE int32_t U_EXPORT2 |
| 299 | uregex_flags(const URegularExpression *regexp, |
| 300 | UErrorCode *status); |
| 301 | |
| 302 | |
| 303 | /** |
| 304 | * Set the subject text string upon which the regular expression will look for matches. |
| 305 | * This function may be called any number of times, allowing the regular |
| 306 | * expression pattern to be applied to different strings. |
| 307 | * <p> |
| 308 | * Regular expression matching operations work directly on the application's |
| 309 | * string data. No copy is made. The subject string data must not be |
| 310 | * altered after calling this function until after all regular expression |
| 311 | * operations involving this string data are completed. |
| 312 | * <p> |
| 313 | * Zero length strings are permitted. In this case, no subsequent match |
| 314 | * operation will dereference the text string pointer. |
| 315 | * |
| 316 | * @param regexp The compiled regular expression. |
| 317 | * @param text The subject text string. |
| 318 | * @param textLength The length of the subject text, or -1 if the string |
| 319 | * is NUL terminated. |
| 320 | * @param status Receives errors detected by this function. |
| 321 | * @stable ICU 3.0 |
| 322 | */ |
| 323 | U_STABLE void U_EXPORT2 |
| 324 | uregex_setText(URegularExpression *regexp, |
| 325 | const UChar *text, |
| 326 | int32_t textLength, |
| 327 | UErrorCode *status); |
| 328 | |
| 329 | |
| 330 | /** |
| 331 | * Set the subject text string upon which the regular expression will look for matches. |
| 332 | * This function may be called any number of times, allowing the regular |
| 333 | * expression pattern to be applied to different strings. |
| 334 | * <p> |
| 335 | * Regular expression matching operations work directly on the application's |
| 336 | * string data; only a shallow clone is made. The subject string data must not be |
| 337 | * altered after calling this function until after all regular expression |
| 338 | * operations involving this string data are completed. |
| 339 | * |
| 340 | * @param regexp The compiled regular expression. |
| 341 | * @param text The subject text string. |
| 342 | * @param status Receives errors detected by this function. |
| 343 | * |
| 344 | * @stable ICU 4.6 |
| 345 | */ |
| 346 | U_STABLE void U_EXPORT2 |
| 347 | uregex_setUText(URegularExpression *regexp, |
| 348 | UText *text, |
| 349 | UErrorCode *status); |
| 350 | |
| 351 | /** |
| 352 | * Get the subject text that is currently associated with this |
| 353 | * regular expression object. If the input was supplied using uregex_setText(), |
| 354 | * that pointer will be returned. Otherwise, the characters in the input will |
| 355 | * be extracted to a buffer and returned. In either case, ownership remains |
| 356 | * with the regular expression object. |
| 357 | * |
| 358 | * This function will work even if the input was originally specified as a UText. |
| 359 | * |
| 360 | * @param regexp The compiled regular expression. |
| 361 | * @param textLength The length of the string is returned in this output parameter. |
| 362 | * A NULL pointer may be used here if the |
| 363 | * text length is not needed, as would be the case if |
| 364 | * the text is known in advance to be a NUL terminated |
| 365 | * string. |
| 366 | * @param status Receives errors detected by this function. |
| 367 | * @return Pointer to the subject text string currently associated with |
| 368 | * this regular expression. |
| 369 | * @stable ICU 3.0 |
| 370 | */ |
| 371 | U_STABLE const UChar * U_EXPORT2 |
| 372 | uregex_getText(URegularExpression *regexp, |
| 373 | int32_t *textLength, |
| 374 | UErrorCode *status); |
| 375 | |
| 376 | /** |
| 377 | * Get the subject text that is currently associated with this |
| 378 | * regular expression object. |
| 379 | * |
| 380 | * This function will work even if the input was originally specified as a UChar string. |
| 381 | * |
| 382 | * @param regexp The compiled regular expression. |
| 383 | * @param dest A mutable UText in which to store the current input. |
| 384 | * If NULL, a new UText will be created as an immutable shallow clone |
| 385 | * of the actual input string. |
| 386 | * @param status Receives errors detected by this function. |
| 387 | * @return The subject text currently associated with this regular expression. |
| 388 | * If a pre-allocated UText was provided, it will always be used and returned. |
| 389 | * |
| 390 | * @stable ICU 4.6 |
| 391 | */ |
| 392 | U_STABLE UText * U_EXPORT2 |
| 393 | uregex_getUText(URegularExpression *regexp, |
| 394 | UText *dest, |
| 395 | UErrorCode *status); |
| 396 | |
| 397 | /** |
| 398 | * Set the subject text string upon which the regular expression is looking for matches |
| 399 | * without changing any other aspect of the matching state. |
| 400 | * The new and previous text strings must have the same content. |
| 401 | * |
| 402 | * This function is intended for use in environments where ICU is operating on |
| 403 | * strings that may move around in memory. It provides a mechanism for notifying |
| 404 | * ICU that the string has been relocated, and providing a new UText to access the |
| 405 | * string in its new position. |
| 406 | * |
| 407 | * Note that the regular expression implementation never copies the underlying text |
| 408 | * of a string being matched, but always operates directly on the original text |
| 409 | * provided by the user. Refreshing simply drops the references to the old text |
| 410 | * and replaces them with references to the new. |
| 411 | * |
| 412 | * Caution: this function is normally used only by very specialized |
| 413 | * system-level code. One example use case is with garbage collection |
| 414 | * that moves the text in memory. |
| 415 | * |
| 416 | * @param regexp The compiled regular expression. |
| 417 | * @param text The new (moved) text string. |
| 418 | * @param status Receives errors detected by this function. |
| 419 | * |
| 420 | * @stable ICU 4.8 |
| 421 | */ |
| 422 | U_STABLE void U_EXPORT2 |
| 423 | uregex_refreshUText(URegularExpression *regexp, |
| 424 | UText *text, |
| 425 | UErrorCode *status); |
| 426 | |
| 427 | /** |
| 428 | * Attempts to match the input string against the pattern. |
| 429 | * To succeed, the match must extend to the end of the string, |
| 430 | * or cover the complete match region. |
| 431 | * |
| 432 | * If startIndex >= zero the match operation starts at the specified |
| 433 | * index and must extend to the end of the input string. Any region |
| 434 | * that has been specified is reset. |
| 435 | * |
| 436 | * If startIndex == -1 the match must cover the input region, or the entire |
| 437 | * input string if no region has been set. This directly corresponds to |
| 438 | * Matcher.matches() in Java |
| 439 | * |
| 440 | * @param regexp The compiled regular expression. |
| 441 | * @param startIndex The input string (native) index at which to begin matching, or -1 |
| 442 | * to match the input Region. |
| 443 | * @param status Receives errors detected by this function. |
| 444 | * @return TRUE if there is a match |
| 445 | * @stable ICU 3.0 |
| 446 | */ |
| 447 | U_STABLE UBool U_EXPORT2 |
| 448 | uregex_matches(URegularExpression *regexp, |
| 449 | int32_t startIndex, |
| 450 | UErrorCode *status); |
| 451 | |
| 452 | /** |
| 453 | * 64bit version of uregex_matches. |
| 454 | * Attempts to match the input string against the pattern. |
| 455 | * To succeed, the match must extend to the end of the string, |
| 456 | * or cover the complete match region. |
| 457 | * |
| 458 | * If startIndex >= zero the match operation starts at the specified |
| 459 | * index and must extend to the end of the input string. Any region |
| 460 | * that has been specified is reset. |
| 461 | * |
| 462 | * If startIndex == -1 the match must cover the input region, or the entire |
| 463 | * input string if no region has been set. This directly corresponds to |
| 464 | * Matcher.matches() in Java |
| 465 | * |
| 466 | * @param regexp The compiled regular expression. |
| 467 | * @param startIndex The input string (native) index at which to begin matching, or -1 |
| 468 | * to match the input Region. |
| 469 | * @param status Receives errors detected by this function. |
| 470 | * @return TRUE if there is a match |
| 471 | * @stable ICU 4.6 |
| 472 | */ |
| 473 | U_STABLE UBool U_EXPORT2 |
| 474 | uregex_matches64(URegularExpression *regexp, |
| 475 | int64_t startIndex, |
| 476 | UErrorCode *status); |
| 477 | |
| 478 | /** |
| 479 | * Attempts to match the input string, starting from the specified index, against the pattern. |
| 480 | * The match may be of any length, and is not required to extend to the end |
| 481 | * of the input string. Contrast with uregex_matches(). |
| 482 | * |
| 483 | * <p>If startIndex is >= 0 any input region that was set for this |
| 484 | * URegularExpression is reset before the operation begins. |
| 485 | * |
| 486 | * <p>If the specified starting index == -1 the match begins at the start of the input |
| 487 | * region, or at the start of the full string if no region has been specified. |
| 488 | * This corresponds directly with Matcher.lookingAt() in Java. |
| 489 | * |
| 490 | * <p>If the match succeeds then more information can be obtained via the |
| 491 | * <code>uregexp_start()</code>, <code>uregexp_end()</code>, |
| 492 | * and <code>uregex_group()</code> functions.</p> |
| 493 | * |
| 494 | * @param regexp The compiled regular expression. |
| 495 | * @param startIndex The input string (native) index at which to begin matching, or |
| 496 | * -1 to match the Input Region |
| 497 | * @param status A reference to a UErrorCode to receive any errors. |
| 498 | * @return TRUE if there is a match. |
| 499 | * @stable ICU 3.0 |
| 500 | */ |
| 501 | U_STABLE UBool U_EXPORT2 |
| 502 | uregex_lookingAt(URegularExpression *regexp, |
| 503 | int32_t startIndex, |
| 504 | UErrorCode *status); |
| 505 | |
| 506 | /** |
| 507 | * 64bit version of uregex_lookingAt. |
| 508 | * Attempts to match the input string, starting from the specified index, against the pattern. |
| 509 | * The match may be of any length, and is not required to extend to the end |
| 510 | * of the input string. Contrast with uregex_matches(). |
| 511 | * |
| 512 | * <p>If startIndex is >= 0 any input region that was set for this |
| 513 | * URegularExpression is reset before the operation begins. |
| 514 | * |
| 515 | * <p>If the specified starting index == -1 the match begins at the start of the input |
| 516 | * region, or at the start of the full string if no region has been specified. |
| 517 | * This corresponds directly with Matcher.lookingAt() in Java. |
| 518 | * |
| 519 | * <p>If the match succeeds then more information can be obtained via the |
| 520 | * <code>uregexp_start()</code>, <code>uregexp_end()</code>, |
| 521 | * and <code>uregex_group()</code> functions.</p> |
| 522 | * |
| 523 | * @param regexp The compiled regular expression. |
| 524 | * @param startIndex The input string (native) index at which to begin matching, or |
| 525 | * -1 to match the Input Region |
| 526 | * @param status A reference to a UErrorCode to receive any errors. |
| 527 | * @return TRUE if there is a match. |
| 528 | * @stable ICU 4.6 |
| 529 | */ |
| 530 | U_STABLE UBool U_EXPORT2 |
| 531 | uregex_lookingAt64(URegularExpression *regexp, |
| 532 | int64_t startIndex, |
| 533 | UErrorCode *status); |
| 534 | |
| 535 | /** |
| 536 | * Find the first matching substring of the input string that matches the pattern. |
| 537 | * If startIndex is >= zero the search for a match begins at the specified index, |
| 538 | * and any match region is reset. This corresponds directly with |
| 539 | * Matcher.find(startIndex) in Java. |
| 540 | * |
| 541 | * If startIndex == -1 the search begins at the start of the input region, |
| 542 | * or at the start of the full string if no region has been specified. |
| 543 | * |
| 544 | * If a match is found, <code>uregex_start(), uregex_end()</code>, and |
| 545 | * <code>uregex_group()</code> will provide more information regarding the match. |
| 546 | * |
| 547 | * @param regexp The compiled regular expression. |
| 548 | * @param startIndex The position (native) in the input string to begin the search, or |
| 549 | * -1 to search within the Input Region. |
| 550 | * @param status A reference to a UErrorCode to receive any errors. |
| 551 | * @return TRUE if a match is found. |
| 552 | * @stable ICU 3.0 |
| 553 | */ |
| 554 | U_STABLE UBool U_EXPORT2 |
| 555 | uregex_find(URegularExpression *regexp, |
| 556 | int32_t startIndex, |
| 557 | UErrorCode *status); |
| 558 | |
| 559 | /** |
| 560 | * 64bit version of uregex_find. |
| 561 | * Find the first matching substring of the input string that matches the pattern. |
| 562 | * If startIndex is >= zero the search for a match begins at the specified index, |
| 563 | * and any match region is reset. This corresponds directly with |
| 564 | * Matcher.find(startIndex) in Java. |
| 565 | * |
| 566 | * If startIndex == -1 the search begins at the start of the input region, |
| 567 | * or at the start of the full string if no region has been specified. |
| 568 | * |
| 569 | * If a match is found, <code>uregex_start(), uregex_end()</code>, and |
| 570 | * <code>uregex_group()</code> will provide more information regarding the match. |
| 571 | * |
| 572 | * @param regexp The compiled regular expression. |
| 573 | * @param startIndex The position (native) in the input string to begin the search, or |
| 574 | * -1 to search within the Input Region. |
| 575 | * @param status A reference to a UErrorCode to receive any errors. |
| 576 | * @return TRUE if a match is found. |
| 577 | * @stable ICU 4.6 |
| 578 | */ |
| 579 | U_STABLE UBool U_EXPORT2 |
| 580 | uregex_find64(URegularExpression *regexp, |
| 581 | int64_t startIndex, |
| 582 | UErrorCode *status); |
| 583 | |
| 584 | /** |
| 585 | * Find the next pattern match in the input string. Begin searching |
| 586 | * the input at the location following the end of he previous match, |
| 587 | * or at the start of the string (or region) if there is no |
| 588 | * previous match. If a match is found, <code>uregex_start(), uregex_end()</code>, and |
| 589 | * <code>uregex_group()</code> will provide more information regarding the match. |
| 590 | * |
| 591 | * @param regexp The compiled regular expression. |
| 592 | * @param status A reference to a UErrorCode to receive any errors. |
| 593 | * @return TRUE if a match is found. |
| 594 | * @see uregex_reset |
| 595 | * @stable ICU 3.0 |
| 596 | */ |
| 597 | U_STABLE UBool U_EXPORT2 |
| 598 | uregex_findNext(URegularExpression *regexp, |
| 599 | UErrorCode *status); |
| 600 | |
| 601 | /** |
| 602 | * Get the number of capturing groups in this regular expression's pattern. |
| 603 | * @param regexp The compiled regular expression. |
| 604 | * @param status A reference to a UErrorCode to receive any errors. |
| 605 | * @return the number of capture groups |
| 606 | * @stable ICU 3.0 |
| 607 | */ |
| 608 | U_STABLE int32_t U_EXPORT2 |
| 609 | uregex_groupCount(URegularExpression *regexp, |
| 610 | UErrorCode *status); |
| 611 | |
| 612 | /** |
| 613 | * Get the group number corresponding to a named capture group. |
| 614 | * The returned number can be used with any function that access |
| 615 | * capture groups by number. |
| 616 | * |
| 617 | * The function returns an error status if the specified name does not |
| 618 | * appear in the pattern. |
| 619 | * |
| 620 | * @param regexp The compiled regular expression. |
| 621 | * @param groupName The capture group name. |
| 622 | * @param nameLength The length of the name, or -1 if the name is a |
| 623 | * nul-terminated string. |
| 624 | * @param status A pointer to a UErrorCode to receive any errors. |
| 625 | * |
| 626 | * @stable ICU 55 |
| 627 | */ |
| 628 | U_STABLE int32_t U_EXPORT2 |
| 629 | uregex_groupNumberFromName(URegularExpression *regexp, |
| 630 | const UChar *groupName, |
| 631 | int32_t nameLength, |
| 632 | UErrorCode *status); |
| 633 | |
| 634 | |
| 635 | /** |
| 636 | * Get the group number corresponding to a named capture group. |
| 637 | * The returned number can be used with any function that access |
| 638 | * capture groups by number. |
| 639 | * |
| 640 | * The function returns an error status if the specified name does not |
| 641 | * appear in the pattern. |
| 642 | * |
| 643 | * @param regexp The compiled regular expression. |
| 644 | * @param groupName The capture group name, |
| 645 | * platform invariant characters only. |
| 646 | * @param nameLength The length of the name, or -1 if the name is |
| 647 | * nul-terminated. |
| 648 | * @param status A pointer to a UErrorCode to receive any errors. |
| 649 | * |
| 650 | * @stable ICU 55 |
| 651 | */ |
| 652 | U_STABLE int32_t U_EXPORT2 |
| 653 | uregex_groupNumberFromCName(URegularExpression *regexp, |
| 654 | const char *groupName, |
| 655 | int32_t nameLength, |
| 656 | UErrorCode *status); |
| 657 | |
| 658 | /** Extract the string for the specified matching expression or subexpression. |
| 659 | * Group #0 is the complete string of matched text. |
| 660 | * Group #1 is the text matched by the first set of capturing parentheses. |
| 661 | * |
| 662 | * @param regexp The compiled regular expression. |
| 663 | * @param groupNum The capture group to extract. Group 0 is the complete |
| 664 | * match. The value of this parameter must be |
| 665 | * less than or equal to the number of capture groups in |
| 666 | * the pattern. |
| 667 | * @param dest Buffer to receive the matching string data |
| 668 | * @param destCapacity Capacity of the dest buffer. |
| 669 | * @param status A reference to a UErrorCode to receive any errors. |
| 670 | * @return Length of matching data, |
| 671 | * or -1 if no applicable match. |
| 672 | * @stable ICU 3.0 |
| 673 | */ |
| 674 | U_STABLE int32_t U_EXPORT2 |
| 675 | uregex_group(URegularExpression *regexp, |
| 676 | int32_t groupNum, |
| 677 | UChar *dest, |
| 678 | int32_t destCapacity, |
| 679 | UErrorCode *status); |
| 680 | |
| 681 | /** Returns a shallow immutable clone of the entire input string with the current index set |
| 682 | * to the beginning of the requested capture group. The capture group length is also |
| 683 | * returned via groupLength. |
| 684 | * Group #0 is the complete string of matched text. |
| 685 | * Group #1 is the text matched by the first set of capturing parentheses. |
| 686 | * |
| 687 | * @param regexp The compiled regular expression. |
| 688 | * @param groupNum The capture group to extract. Group 0 is the complete |
| 689 | * match. The value of this parameter must be |
| 690 | * less than or equal to the number of capture groups in |
| 691 | * the pattern. |
| 692 | * @param dest A mutable UText in which to store the current input. |
| 693 | * If NULL, a new UText will be created as an immutable shallow clone |
| 694 | * of the entire input string. |
| 695 | * @param groupLength The group length of the desired capture group. Output parameter. |
| 696 | * @param status A reference to a UErrorCode to receive any errors. |
| 697 | * @return The subject text currently associated with this regular expression. |
| 698 | * If a pre-allocated UText was provided, it will always be used and returned. |
| 699 | |
| 700 | * |
| 701 | * @stable ICU 4.6 |
| 702 | */ |
| 703 | U_STABLE UText * U_EXPORT2 |
| 704 | uregex_groupUText(URegularExpression *regexp, |
| 705 | int32_t groupNum, |
| 706 | UText *dest, |
| 707 | int64_t *groupLength, |
| 708 | UErrorCode *status); |
| 709 | |
| 710 | /** |
| 711 | * Returns the index in the input string of the start of the text matched by the |
| 712 | * specified capture group during the previous match operation. Return -1 if |
| 713 | * the capture group was not part of the last match. |
| 714 | * Group #0 refers to the complete range of matched text. |
| 715 | * Group #1 refers to the text matched by the first set of capturing parentheses. |
| 716 | * |
| 717 | * @param regexp The compiled regular expression. |
| 718 | * @param groupNum The capture group number |
| 719 | * @param status A reference to a UErrorCode to receive any errors. |
| 720 | * @return the starting (native) position in the input of the text matched |
| 721 | * by the specified group. |
| 722 | * @stable ICU 3.0 |
| 723 | */ |
| 724 | U_STABLE int32_t U_EXPORT2 |
| 725 | uregex_start(URegularExpression *regexp, |
| 726 | int32_t groupNum, |
| 727 | UErrorCode *status); |
| 728 | |
| 729 | /** |
| 730 | * 64bit version of uregex_start. |
| 731 | * Returns the index in the input string of the start of the text matched by the |
| 732 | * specified capture group during the previous match operation. Return -1 if |
| 733 | * the capture group was not part of the last match. |
| 734 | * Group #0 refers to the complete range of matched text. |
| 735 | * Group #1 refers to the text matched by the first set of capturing parentheses. |
| 736 | * |
| 737 | * @param regexp The compiled regular expression. |
| 738 | * @param groupNum The capture group number |
| 739 | * @param status A reference to a UErrorCode to receive any errors. |
| 740 | * @return the starting (native) position in the input of the text matched |
| 741 | * by the specified group. |
| 742 | * @stable ICU 4.6 |
| 743 | */ |
| 744 | U_STABLE int64_t U_EXPORT2 |
| 745 | uregex_start64(URegularExpression *regexp, |
| 746 | int32_t groupNum, |
| 747 | UErrorCode *status); |
| 748 | |
| 749 | /** |
| 750 | * Returns the index in the input string of the position following the end |
| 751 | * of the text matched by the specified capture group. |
| 752 | * Return -1 if the capture group was not part of the last match. |
| 753 | * Group #0 refers to the complete range of matched text. |
| 754 | * Group #1 refers to the text matched by the first set of capturing parentheses. |
| 755 | * |
| 756 | * @param regexp The compiled regular expression. |
| 757 | * @param groupNum The capture group number |
| 758 | * @param status A reference to a UErrorCode to receive any errors. |
| 759 | * @return the (native) index of the position following the last matched character. |
| 760 | * @stable ICU 3.0 |
| 761 | */ |
| 762 | U_STABLE int32_t U_EXPORT2 |
| 763 | uregex_end(URegularExpression *regexp, |
| 764 | int32_t groupNum, |
| 765 | UErrorCode *status); |
| 766 | |
| 767 | /** |
| 768 | * 64bit version of uregex_end. |
| 769 | * Returns the index in the input string of the position following the end |
| 770 | * of the text matched by the specified capture group. |
| 771 | * Return -1 if the capture group was not part of the last match. |
| 772 | * Group #0 refers to the complete range of matched text. |
| 773 | * Group #1 refers to the text matched by the first set of capturing parentheses. |
| 774 | * |
| 775 | * @param regexp The compiled regular expression. |
| 776 | * @param groupNum The capture group number |
| 777 | * @param status A reference to a UErrorCode to receive any errors. |
| 778 | * @return the (native) index of the position following the last matched character. |
| 779 | * @stable ICU 4.6 |
| 780 | */ |
| 781 | U_STABLE int64_t U_EXPORT2 |
| 782 | uregex_end64(URegularExpression *regexp, |
| 783 | int32_t groupNum, |
| 784 | UErrorCode *status); |
| 785 | |
| 786 | /** |
| 787 | * Reset any saved state from the previous match. Has the effect of |
| 788 | * causing uregex_findNext to begin at the specified index, and causing |
| 789 | * uregex_start(), uregex_end() and uregex_group() to return an error |
| 790 | * indicating that there is no match information available. Clears any |
| 791 | * match region that may have been set. |
| 792 | * |
| 793 | * @param regexp The compiled regular expression. |
| 794 | * @param index The position (native) in the text at which a |
| 795 | * uregex_findNext() should begin searching. |
| 796 | * @param status A reference to a UErrorCode to receive any errors. |
| 797 | * @stable ICU 3.0 |
| 798 | */ |
| 799 | U_STABLE void U_EXPORT2 |
| 800 | uregex_reset(URegularExpression *regexp, |
| 801 | int32_t index, |
| 802 | UErrorCode *status); |
| 803 | |
| 804 | /** |
| 805 | * 64bit version of uregex_reset. |
| 806 | * Reset any saved state from the previous match. Has the effect of |
| 807 | * causing uregex_findNext to begin at the specified index, and causing |
| 808 | * uregex_start(), uregex_end() and uregex_group() to return an error |
| 809 | * indicating that there is no match information available. Clears any |
| 810 | * match region that may have been set. |
| 811 | * |
| 812 | * @param regexp The compiled regular expression. |
| 813 | * @param index The position (native) in the text at which a |
| 814 | * uregex_findNext() should begin searching. |
| 815 | * @param status A reference to a UErrorCode to receive any errors. |
| 816 | * @stable ICU 4.6 |
| 817 | */ |
| 818 | U_STABLE void U_EXPORT2 |
| 819 | uregex_reset64(URegularExpression *regexp, |
| 820 | int64_t index, |
| 821 | UErrorCode *status); |
| 822 | |
| 823 | /** |
| 824 | * Sets the limits of the matching region for this URegularExpression. |
| 825 | * The region is the part of the input string that will be considered when matching. |
| 826 | * Invoking this method resets any saved state from the previous match, |
| 827 | * then sets the region to start at the index specified by the start parameter |
| 828 | * and end at the index specified by the end parameter. |
| 829 | * |
| 830 | * Depending on the transparency and anchoring being used (see useTransparentBounds |
| 831 | * and useAnchoringBounds), certain constructs such as anchors may behave differently |
| 832 | * at or around the boundaries of the region |
| 833 | * |
| 834 | * The function will fail if start is greater than limit, or if either index |
| 835 | * is less than zero or greater than the length of the string being matched. |
| 836 | * |
| 837 | * @param regexp The compiled regular expression. |
| 838 | * @param regionStart The (native) index to begin searches at. |
| 839 | * @param regionLimit The (native) index to end searches at (exclusive). |
| 840 | * @param status A pointer to a UErrorCode to receive any errors. |
| 841 | * @stable ICU 4.0 |
| 842 | */ |
| 843 | U_STABLE void U_EXPORT2 |
| 844 | uregex_setRegion(URegularExpression *regexp, |
| 845 | int32_t regionStart, |
| 846 | int32_t regionLimit, |
| 847 | UErrorCode *status); |
| 848 | |
| 849 | /** |
| 850 | * 64bit version of uregex_setRegion. |
| 851 | * Sets the limits of the matching region for this URegularExpression. |
| 852 | * The region is the part of the input string that will be considered when matching. |
| 853 | * Invoking this method resets any saved state from the previous match, |
| 854 | * then sets the region to start at the index specified by the start parameter |
| 855 | * and end at the index specified by the end parameter. |
| 856 | * |
| 857 | * Depending on the transparency and anchoring being used (see useTransparentBounds |
| 858 | * and useAnchoringBounds), certain constructs such as anchors may behave differently |
| 859 | * at or around the boundaries of the region |
| 860 | * |
| 861 | * The function will fail if start is greater than limit, or if either index |
| 862 | * is less than zero or greater than the length of the string being matched. |
| 863 | * |
| 864 | * @param regexp The compiled regular expression. |
| 865 | * @param regionStart The (native) index to begin searches at. |
| 866 | * @param regionLimit The (native) index to end searches at (exclusive). |
| 867 | * @param status A pointer to a UErrorCode to receive any errors. |
| 868 | * @stable ICU 4.6 |
| 869 | */ |
| 870 | U_STABLE void U_EXPORT2 |
| 871 | uregex_setRegion64(URegularExpression *regexp, |
| 872 | int64_t regionStart, |
| 873 | int64_t regionLimit, |
| 874 | UErrorCode *status); |
| 875 | |
| 876 | /** |
| 877 | * Set the matching region and the starting index for subsequent matches |
| 878 | * in a single operation. |
| 879 | * This is useful because the usual function for setting the starting |
| 880 | * index, urgex_reset(), also resets any region limits. |
| 881 | * |
| 882 | * @param regexp The compiled regular expression. |
| 883 | * @param regionStart The (native) index to begin searches at. |
| 884 | * @param regionLimit The (native) index to end searches at (exclusive). |
| 885 | * @param startIndex The index in the input text at which the next |
| 886 | * match operation should begin. |
| 887 | * @param status A pointer to a UErrorCode to receive any errors. |
| 888 | * @stable ICU 4.6 |
| 889 | */ |
| 890 | U_STABLE void U_EXPORT2 |
| 891 | uregex_setRegionAndStart(URegularExpression *regexp, |
| 892 | int64_t regionStart, |
| 893 | int64_t regionLimit, |
| 894 | int64_t startIndex, |
| 895 | UErrorCode *status); |
| 896 | |
| 897 | /** |
| 898 | * Reports the start index of the matching region. Any matches found are limited to |
| 899 | * to the region bounded by regionStart (inclusive) and regionEnd (exclusive). |
| 900 | * |
| 901 | * @param regexp The compiled regular expression. |
| 902 | * @param status A pointer to a UErrorCode to receive any errors. |
| 903 | * @return The starting (native) index of this matcher's region. |
| 904 | * @stable ICU 4.0 |
| 905 | */ |
| 906 | U_STABLE int32_t U_EXPORT2 |
| 907 | uregex_regionStart(const URegularExpression *regexp, |
| 908 | UErrorCode *status); |
| 909 | |
| 910 | /** |
| 911 | * 64bit version of uregex_regionStart. |
| 912 | * Reports the start index of the matching region. Any matches found are limited to |
| 913 | * to the region bounded by regionStart (inclusive) and regionEnd (exclusive). |
| 914 | * |
| 915 | * @param regexp The compiled regular expression. |
| 916 | * @param status A pointer to a UErrorCode to receive any errors. |
| 917 | * @return The starting (native) index of this matcher's region. |
| 918 | * @stable ICU 4.6 |
| 919 | */ |
| 920 | U_STABLE int64_t U_EXPORT2 |
| 921 | uregex_regionStart64(const URegularExpression *regexp, |
| 922 | UErrorCode *status); |
| 923 | |
| 924 | /** |
| 925 | * Reports the end index (exclusive) of the matching region for this URegularExpression. |
| 926 | * Any matches found are limited to to the region bounded by regionStart (inclusive) |
| 927 | * and regionEnd (exclusive). |
| 928 | * |
| 929 | * @param regexp The compiled regular expression. |
| 930 | * @param status A pointer to a UErrorCode to receive any errors. |
| 931 | * @return The ending point (native) of this matcher's region. |
| 932 | * @stable ICU 4.0 |
| 933 | */ |
| 934 | U_STABLE int32_t U_EXPORT2 |
| 935 | uregex_regionEnd(const URegularExpression *regexp, |
| 936 | UErrorCode *status); |
| 937 | |
| 938 | /** |
| 939 | * 64bit version of uregex_regionEnd. |
| 940 | * Reports the end index (exclusive) of the matching region for this URegularExpression. |
| 941 | * Any matches found are limited to to the region bounded by regionStart (inclusive) |
| 942 | * and regionEnd (exclusive). |
| 943 | * |
| 944 | * @param regexp The compiled regular expression. |
| 945 | * @param status A pointer to a UErrorCode to receive any errors. |
| 946 | * @return The ending point (native) of this matcher's region. |
| 947 | * @stable ICU 4.6 |
| 948 | */ |
| 949 | U_STABLE int64_t U_EXPORT2 |
| 950 | uregex_regionEnd64(const URegularExpression *regexp, |
| 951 | UErrorCode *status); |
| 952 | |
| 953 | /** |
| 954 | * Queries the transparency of region bounds for this URegularExpression. |
| 955 | * See useTransparentBounds for a description of transparent and opaque bounds. |
| 956 | * By default, matching boundaries are opaque. |
| 957 | * |
| 958 | * @param regexp The compiled regular expression. |
| 959 | * @param status A pointer to a UErrorCode to receive any errors. |
| 960 | * @return TRUE if this matcher is using opaque bounds, false if it is not. |
| 961 | * @stable ICU 4.0 |
| 962 | */ |
| 963 | U_STABLE UBool U_EXPORT2 |
| 964 | uregex_hasTransparentBounds(const URegularExpression *regexp, |
| 965 | UErrorCode *status); |
| 966 | |
| 967 | |
| 968 | /** |
| 969 | * Sets the transparency of region bounds for this URegularExpression. |
| 970 | * Invoking this function with an argument of TRUE will set matches to use transparent bounds. |
| 971 | * If the boolean argument is FALSE, then opaque bounds will be used. |
| 972 | * |
| 973 | * Using transparent bounds, the boundaries of the matching region are transparent |
| 974 | * to lookahead, lookbehind, and boundary matching constructs. Those constructs can |
| 975 | * see text beyond the boundaries of the region while checking for a match. |
| 976 | * |
| 977 | * With opaque bounds, no text outside of the matching region is visible to lookahead, |
| 978 | * lookbehind, and boundary matching constructs. |
| 979 | * |
| 980 | * By default, opaque bounds are used. |
| 981 | * |
| 982 | * @param regexp The compiled regular expression. |
| 983 | * @param b TRUE for transparent bounds; FALSE for opaque bounds |
| 984 | * @param status A pointer to a UErrorCode to receive any errors. |
| 985 | * @stable ICU 4.0 |
| 986 | **/ |
| 987 | U_STABLE void U_EXPORT2 |
| 988 | uregex_useTransparentBounds(URegularExpression *regexp, |
| 989 | UBool b, |
| 990 | UErrorCode *status); |
| 991 | |
| 992 | |
| 993 | /** |
| 994 | * Return true if this URegularExpression is using anchoring bounds. |
| 995 | * By default, anchoring region bounds are used. |
| 996 | * |
| 997 | * @param regexp The compiled regular expression. |
| 998 | * @param status A pointer to a UErrorCode to receive any errors. |
| 999 | * @return TRUE if this matcher is using anchoring bounds. |
| 1000 | * @stable ICU 4.0 |
| 1001 | */ |
| 1002 | U_STABLE UBool U_EXPORT2 |
| 1003 | uregex_hasAnchoringBounds(const URegularExpression *regexp, |
| 1004 | UErrorCode *status); |
| 1005 | |
| 1006 | |
| 1007 | /** |
| 1008 | * Set whether this URegularExpression is using Anchoring Bounds for its region. |
| 1009 | * With anchoring bounds, pattern anchors such as ^ and $ will match at the start |
| 1010 | * and end of the region. Without Anchoring Bounds, anchors will only match at |
| 1011 | * the positions they would in the complete text. |
| 1012 | * |
| 1013 | * Anchoring Bounds are the default for regions. |
| 1014 | * |
| 1015 | * @param regexp The compiled regular expression. |
| 1016 | * @param b TRUE if to enable anchoring bounds; FALSE to disable them. |
| 1017 | * @param status A pointer to a UErrorCode to receive any errors. |
| 1018 | * @stable ICU 4.0 |
| 1019 | */ |
| 1020 | U_STABLE void U_EXPORT2 |
| 1021 | uregex_useAnchoringBounds(URegularExpression *regexp, |
| 1022 | UBool b, |
| 1023 | UErrorCode *status); |
| 1024 | |
| 1025 | /** |
| 1026 | * Return TRUE if the most recent matching operation touched the |
| 1027 | * end of the text being processed. In this case, additional input text could |
| 1028 | * change the results of that match. |
| 1029 | * |
| 1030 | * @param regexp The compiled regular expression. |
| 1031 | * @param status A pointer to a UErrorCode to receive any errors. |
| 1032 | * @return TRUE if the most recent match hit the end of input |
| 1033 | * @stable ICU 4.0 |
| 1034 | */ |
| 1035 | U_STABLE UBool U_EXPORT2 |
| 1036 | uregex_hitEnd(const URegularExpression *regexp, |
| 1037 | UErrorCode *status); |
| 1038 | |
| 1039 | /** |
| 1040 | * Return TRUE the most recent match succeeded and additional input could cause |
| 1041 | * it to fail. If this function returns false and a match was found, then more input |
| 1042 | * might change the match but the match won't be lost. If a match was not found, |
| 1043 | * then requireEnd has no meaning. |
| 1044 | * |
| 1045 | * @param regexp The compiled regular expression. |
| 1046 | * @param status A pointer to a UErrorCode to receive any errors. |
| 1047 | * @return TRUE if more input could cause the most recent match to no longer match. |
| 1048 | * @stable ICU 4.0 |
| 1049 | */ |
| 1050 | U_STABLE UBool U_EXPORT2 |
| 1051 | uregex_requireEnd(const URegularExpression *regexp, |
| 1052 | UErrorCode *status); |
| 1053 | |
| 1054 | |
| 1055 | |
| 1056 | |
| 1057 | |
| 1058 | /** |
| 1059 | * Replaces every substring of the input that matches the pattern |
| 1060 | * with the given replacement string. This is a convenience function that |
| 1061 | * provides a complete find-and-replace-all operation. |
| 1062 | * |
| 1063 | * This method scans the input string looking for matches of the pattern. |
| 1064 | * Input that is not part of any match is copied unchanged to the |
| 1065 | * destination buffer. Matched regions are replaced in the output |
| 1066 | * buffer by the replacement string. The replacement string may contain |
| 1067 | * references to capture groups; these take the form of $1, $2, etc. |
| 1068 | * |
| 1069 | * @param regexp The compiled regular expression. |
| 1070 | * @param replacementText A string containing the replacement text. |
| 1071 | * @param replacementLength The length of the replacement string, or |
| 1072 | * -1 if it is NUL terminated. |
| 1073 | * @param destBuf A (UChar *) buffer that will receive the result. |
| 1074 | * @param destCapacity The capacity of the destination buffer. |
| 1075 | * @param status A reference to a UErrorCode to receive any errors. |
| 1076 | * @return The length of the string resulting from the find |
| 1077 | * and replace operation. In the event that the |
| 1078 | * destination capacity is inadequate, the return value |
| 1079 | * is still the full length of the untruncated string. |
| 1080 | * @stable ICU 3.0 |
| 1081 | */ |
| 1082 | U_STABLE int32_t U_EXPORT2 |
| 1083 | uregex_replaceAll(URegularExpression *regexp, |
| 1084 | const UChar *replacementText, |
| 1085 | int32_t replacementLength, |
| 1086 | UChar *destBuf, |
| 1087 | int32_t destCapacity, |
| 1088 | UErrorCode *status); |
| 1089 | |
| 1090 | /** |
| 1091 | * Replaces every substring of the input that matches the pattern |
| 1092 | * with the given replacement string. This is a convenience function that |
| 1093 | * provides a complete find-and-replace-all operation. |
| 1094 | * |
| 1095 | * This method scans the input string looking for matches of the pattern. |
| 1096 | * Input that is not part of any match is copied unchanged to the |
| 1097 | * destination buffer. Matched regions are replaced in the output |
| 1098 | * buffer by the replacement string. The replacement string may contain |
| 1099 | * references to capture groups; these take the form of $1, $2, etc. |
| 1100 | * |
| 1101 | * @param regexp The compiled regular expression. |
| 1102 | * @param replacement A string containing the replacement text. |
| 1103 | * @param dest A mutable UText that will receive the result. |
| 1104 | * If NULL, a new UText will be created (which may not be mutable). |
| 1105 | * @param status A reference to a UErrorCode to receive any errors. |
| 1106 | * @return A UText containing the results of the find and replace. |
| 1107 | * If a pre-allocated UText was provided, it will always be used and returned. |
| 1108 | * |
| 1109 | * @stable ICU 4.6 |
| 1110 | */ |
| 1111 | U_STABLE UText * U_EXPORT2 |
| 1112 | uregex_replaceAllUText(URegularExpression *regexp, |
| 1113 | UText *replacement, |
| 1114 | UText *dest, |
| 1115 | UErrorCode *status); |
| 1116 | |
| 1117 | /** |
| 1118 | * Replaces the first substring of the input that matches the pattern |
| 1119 | * with the given replacement string. This is a convenience function that |
| 1120 | * provides a complete find-and-replace operation. |
| 1121 | * |
| 1122 | * This method scans the input string looking for a match of the pattern. |
| 1123 | * All input that is not part of the match is copied unchanged to the |
| 1124 | * destination buffer. The matched region is replaced in the output |
| 1125 | * buffer by the replacement string. The replacement string may contain |
| 1126 | * references to capture groups; these take the form of $1, $2, etc. |
| 1127 | * |
| 1128 | * @param regexp The compiled regular expression. |
| 1129 | * @param replacementText A string containing the replacement text. |
| 1130 | * @param replacementLength The length of the replacement string, or |
| 1131 | * -1 if it is NUL terminated. |
| 1132 | * @param destBuf A (UChar *) buffer that will receive the result. |
| 1133 | * @param destCapacity The capacity of the destination buffer. |
| 1134 | * @param status a reference to a UErrorCode to receive any errors. |
| 1135 | * @return The length of the string resulting from the find |
| 1136 | * and replace operation. In the event that the |
| 1137 | * destination capacity is inadequate, the return value |
| 1138 | * is still the full length of the untruncated string. |
| 1139 | * @stable ICU 3.0 |
| 1140 | */ |
| 1141 | U_STABLE int32_t U_EXPORT2 |
| 1142 | uregex_replaceFirst(URegularExpression *regexp, |
| 1143 | const UChar *replacementText, |
| 1144 | int32_t replacementLength, |
| 1145 | UChar *destBuf, |
| 1146 | int32_t destCapacity, |
| 1147 | UErrorCode *status); |
| 1148 | |
| 1149 | /** |
| 1150 | * Replaces the first substring of the input that matches the pattern |
| 1151 | * with the given replacement string. This is a convenience function that |
| 1152 | * provides a complete find-and-replace operation. |
| 1153 | * |
| 1154 | * This method scans the input string looking for a match of the pattern. |
| 1155 | * All input that is not part of the match is copied unchanged to the |
| 1156 | * destination buffer. The matched region is replaced in the output |
| 1157 | * buffer by the replacement string. The replacement string may contain |
| 1158 | * references to capture groups; these take the form of $1, $2, etc. |
| 1159 | * |
| 1160 | * @param regexp The compiled regular expression. |
| 1161 | * @param replacement A string containing the replacement text. |
| 1162 | * @param dest A mutable UText that will receive the result. |
| 1163 | * If NULL, a new UText will be created (which may not be mutable). |
| 1164 | * @param status A reference to a UErrorCode to receive any errors. |
| 1165 | * @return A UText containing the results of the find and replace. |
| 1166 | * If a pre-allocated UText was provided, it will always be used and returned. |
| 1167 | * |
| 1168 | * @stable ICU 4.6 |
| 1169 | */ |
| 1170 | U_STABLE UText * U_EXPORT2 |
| 1171 | uregex_replaceFirstUText(URegularExpression *regexp, |
| 1172 | UText *replacement, |
| 1173 | UText *dest, |
| 1174 | UErrorCode *status); |
| 1175 | |
| 1176 | /** |
| 1177 | * Implements a replace operation intended to be used as part of an |
| 1178 | * incremental find-and-replace. |
| 1179 | * |
| 1180 | * <p>The input string, starting from the end of the previous match and ending at |
| 1181 | * the start of the current match, is appended to the destination string. Then the |
| 1182 | * replacement string is appended to the output string, |
| 1183 | * including handling any substitutions of captured text.</p> |
| 1184 | * |
| 1185 | * <p>A note on preflight computation of buffersize and error handling: |
| 1186 | * Calls to uregex_appendReplacement() and uregex_appendTail() are |
| 1187 | * designed to be chained, one after another, with the destination |
| 1188 | * buffer pointer and buffer capacity updated after each in preparation |
| 1189 | * to for the next. If the destination buffer is exhausted partway through such a |
| 1190 | * sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal |
| 1191 | * ICU conventions are for a function to perform no action if it is |
| 1192 | * called with an error status, but for this one case, uregex_appendRepacement() |
| 1193 | * will operate normally so that buffer size computations will complete |
| 1194 | * correctly. |
| 1195 | * |
| 1196 | * <p>For simple, prepackaged, non-incremental find-and-replace |
| 1197 | * operations, see replaceFirst() or replaceAll().</p> |
| 1198 | * |
| 1199 | * @param regexp The regular expression object. |
| 1200 | * @param replacementText The string that will replace the matched portion of the |
| 1201 | * input string as it is copied to the destination buffer. |
| 1202 | * The replacement text may contain references ($1, for |
| 1203 | * example) to capture groups from the match. |
| 1204 | * @param replacementLength The length of the replacement text string, |
| 1205 | * or -1 if the string is NUL terminated. |
| 1206 | * @param destBuf The buffer into which the results of the |
| 1207 | * find-and-replace are placed. On return, this pointer |
| 1208 | * will be updated to refer to the beginning of the |
| 1209 | * unused portion of buffer, leaving it in position for |
| 1210 | * a subsequent call to this function. |
| 1211 | * @param destCapacity The size of the output buffer, On return, this |
| 1212 | * parameter will be updated to reflect the space remaining |
| 1213 | * unused in the output buffer. |
| 1214 | * @param status A reference to a UErrorCode to receive any errors. |
| 1215 | * @return The length of the result string. In the event that |
| 1216 | * destCapacity is inadequate, the full length of the |
| 1217 | * untruncated output string is returned. |
| 1218 | * |
| 1219 | * @stable ICU 3.0 |
| 1220 | * |
| 1221 | */ |
| 1222 | U_STABLE int32_t U_EXPORT2 |
| 1223 | uregex_appendReplacement(URegularExpression *regexp, |
| 1224 | const UChar *replacementText, |
| 1225 | int32_t replacementLength, |
| 1226 | UChar **destBuf, |
| 1227 | int32_t *destCapacity, |
| 1228 | UErrorCode *status); |
| 1229 | |
| 1230 | /** |
| 1231 | * Implements a replace operation intended to be used as part of an |
| 1232 | * incremental find-and-replace. |
| 1233 | * |
| 1234 | * <p>The input string, starting from the end of the previous match and ending at |
| 1235 | * the start of the current match, is appended to the destination string. Then the |
| 1236 | * replacement string is appended to the output string, |
| 1237 | * including handling any substitutions of captured text.</p> |
| 1238 | * |
| 1239 | * <p>For simple, prepackaged, non-incremental find-and-replace |
| 1240 | * operations, see replaceFirst() or replaceAll().</p> |
| 1241 | * |
| 1242 | * @param regexp The regular expression object. |
| 1243 | * @param replacementText The string that will replace the matched portion of the |
| 1244 | * input string as it is copied to the destination buffer. |
| 1245 | * The replacement text may contain references ($1, for |
| 1246 | * example) to capture groups from the match. |
| 1247 | * @param dest A mutable UText that will receive the result. Must not be NULL. |
| 1248 | * @param status A reference to a UErrorCode to receive any errors. |
| 1249 | * |
| 1250 | * @stable ICU 4.6 |
| 1251 | */ |
| 1252 | U_STABLE void U_EXPORT2 |
| 1253 | uregex_appendReplacementUText(URegularExpression *regexp, |
| 1254 | UText *replacementText, |
| 1255 | UText *dest, |
| 1256 | UErrorCode *status); |
| 1257 | |
| 1258 | /** |
| 1259 | * As the final step in a find-and-replace operation, append the remainder |
| 1260 | * of the input string, starting at the position following the last match, |
| 1261 | * to the destination string. <code>uregex_appendTail()</code> is intended |
| 1262 | * to be invoked after one or more invocations of the |
| 1263 | * <code>uregex_appendReplacement()</code> function. |
| 1264 | * |
| 1265 | * @param regexp The regular expression object. This is needed to |
| 1266 | * obtain the input string and with the position |
| 1267 | * of the last match within it. |
| 1268 | * @param destBuf The buffer in which the results of the |
| 1269 | * find-and-replace are placed. On return, the pointer |
| 1270 | * will be updated to refer to the beginning of the |
| 1271 | * unused portion of buffer. |
| 1272 | * @param destCapacity The size of the output buffer, On return, this |
| 1273 | * value will be updated to reflect the space remaining |
| 1274 | * unused in the output buffer. |
| 1275 | * @param status A reference to a UErrorCode to receive any errors. |
| 1276 | * @return The length of the result string. In the event that |
| 1277 | * destCapacity is inadequate, the full length of the |
| 1278 | * untruncated output string is returned. |
| 1279 | * |
| 1280 | * @stable ICU 3.0 |
| 1281 | */ |
| 1282 | U_STABLE int32_t U_EXPORT2 |
| 1283 | uregex_appendTail(URegularExpression *regexp, |
| 1284 | UChar **destBuf, |
| 1285 | int32_t *destCapacity, |
| 1286 | UErrorCode *status); |
| 1287 | |
| 1288 | /** |
| 1289 | * As the final step in a find-and-replace operation, append the remainder |
| 1290 | * of the input string, starting at the position following the last match, |
| 1291 | * to the destination string. <code>uregex_appendTailUText()</code> is intended |
| 1292 | * to be invoked after one or more invocations of the |
| 1293 | * <code>uregex_appendReplacementUText()</code> function. |
| 1294 | * |
| 1295 | * @param regexp The regular expression object. This is needed to |
| 1296 | * obtain the input string and with the position |
| 1297 | * of the last match within it. |
| 1298 | * @param dest A mutable UText that will receive the result. Must not be NULL. |
| 1299 | * |
| 1300 | * @param status Error code |
| 1301 | * |
| 1302 | * @return The destination UText. |
| 1303 | * |
| 1304 | * @stable ICU 4.6 |
| 1305 | */ |
| 1306 | U_STABLE UText * U_EXPORT2 |
| 1307 | uregex_appendTailUText(URegularExpression *regexp, |
| 1308 | UText *dest, |
| 1309 | UErrorCode *status); |
| 1310 | |
| 1311 | /** |
| 1312 | * Split a string into fields. Somewhat like split() from Perl. |
| 1313 | * The pattern matches identify delimiters that separate the input |
| 1314 | * into fields. The input data between the matches becomes the |
| 1315 | * fields themselves. |
| 1316 | * |
| 1317 | * Each of the fields is copied from the input string to the destination |
| 1318 | * buffer, and NUL terminated. The position of each field within |
| 1319 | * the destination buffer is returned in the destFields array. |
| 1320 | * |
| 1321 | * If the delimiter pattern includes capture groups, the captured text will |
| 1322 | * also appear in the destination array of output strings, interspersed |
| 1323 | * with the fields. This is similar to Perl, but differs from Java, |
| 1324 | * which ignores the presence of capture groups in the pattern. |
| 1325 | * |
| 1326 | * Trailing empty fields will always be returned, assuming sufficient |
| 1327 | * destination capacity. This differs from the default behavior for Java |
| 1328 | * and Perl where trailing empty fields are not returned. |
| 1329 | * |
| 1330 | * The number of strings produced by the split operation is returned. |
| 1331 | * This count includes the strings from capture groups in the delimiter pattern. |
| 1332 | * This behavior differs from Java, which ignores capture groups. |
| 1333 | * |
| 1334 | * @param regexp The compiled regular expression. |
| 1335 | * @param destBuf A (UChar *) buffer to receive the fields that |
| 1336 | * are extracted from the input string. These |
| 1337 | * field pointers will refer to positions within the |
| 1338 | * destination buffer supplied by the caller. Any |
| 1339 | * extra positions within the destFields array will be |
| 1340 | * set to NULL. |
| 1341 | * @param destCapacity The capacity of the destBuf. |
| 1342 | * @param requiredCapacity The actual capacity required of the destBuf. |
| 1343 | * If destCapacity is too small, requiredCapacity will return |
| 1344 | * the total capacity required to hold all of the output, and |
| 1345 | * a U_BUFFER_OVERFLOW_ERROR will be returned. |
| 1346 | * @param destFields An array to be filled with the position of each |
| 1347 | * of the extracted fields within destBuf. |
| 1348 | * @param destFieldsCapacity The number of elements in the destFields array. |
| 1349 | * If the number of fields found is less than destFieldsCapacity, |
| 1350 | * the extra destFields elements are set to zero. |
| 1351 | * If destFieldsCapacity is too small, the trailing part of the |
| 1352 | * input, including any field delimiters, is treated as if it |
| 1353 | * were the last field - it is copied to the destBuf, and |
| 1354 | * its position is in the destBuf is stored in the last element |
| 1355 | * of destFields. This behavior mimics that of Perl. It is not |
| 1356 | * an error condition, and no error status is returned when all destField |
| 1357 | * positions are used. |
| 1358 | * @param status A reference to a UErrorCode to receive any errors. |
| 1359 | * @return The number of fields into which the input string was split. |
| 1360 | * @stable ICU 3.0 |
| 1361 | */ |
| 1362 | U_STABLE int32_t U_EXPORT2 |
| 1363 | uregex_split( URegularExpression *regexp, |
| 1364 | UChar *destBuf, |
| 1365 | int32_t destCapacity, |
| 1366 | int32_t *requiredCapacity, |
| 1367 | UChar *destFields[], |
| 1368 | int32_t destFieldsCapacity, |
| 1369 | UErrorCode *status); |
| 1370 | |
| 1371 | /** |
| 1372 | * Split a string into fields. Somewhat like split() from Perl. |
| 1373 | * The pattern matches identify delimiters that separate the input |
| 1374 | * into fields. The input data between the matches becomes the |
| 1375 | * fields themselves. |
| 1376 | * <p> |
| 1377 | * The behavior of this function is not very closely aligned with uregex_split(); |
| 1378 | * instead, it is based on (and implemented directly on top of) the C++ split method. |
| 1379 | * |
| 1380 | * @param regexp The compiled regular expression. |
| 1381 | * @param destFields An array of mutable UText structs to receive the results of the split. |
| 1382 | * If a field is NULL, a new UText is allocated to contain the results for |
| 1383 | * that field. This new UText is not guaranteed to be mutable. |
| 1384 | * @param destFieldsCapacity The number of elements in the destination array. |
| 1385 | * If the number of fields found is less than destCapacity, the |
| 1386 | * extra strings in the destination array are not altered. |
| 1387 | * If the number of destination strings is less than the number |
| 1388 | * of fields, the trailing part of the input string, including any |
| 1389 | * field delimiters, is placed in the last destination string. |
| 1390 | * This behavior mimics that of Perl. It is not an error condition, and no |
| 1391 | * error status is returned when all destField positions are used. |
| 1392 | * @param status A reference to a UErrorCode to receive any errors. |
| 1393 | * @return The number of fields into which the input string was split. |
| 1394 | * |
| 1395 | * @stable ICU 4.6 |
| 1396 | */ |
| 1397 | U_STABLE int32_t U_EXPORT2 |
| 1398 | uregex_splitUText(URegularExpression *regexp, |
| 1399 | UText *destFields[], |
| 1400 | int32_t destFieldsCapacity, |
| 1401 | UErrorCode *status); |
| 1402 | |
| 1403 | /** |
| 1404 | * Set a processing time limit for match operations with this URegularExpression. |
| 1405 | * |
| 1406 | * Some patterns, when matching certain strings, can run in exponential time. |
| 1407 | * For practical purposes, the match operation may appear to be in an |
| 1408 | * infinite loop. |
| 1409 | * When a limit is set a match operation will fail with an error if the |
| 1410 | * limit is exceeded. |
| 1411 | * <p> |
| 1412 | * The units of the limit are steps of the match engine. |
| 1413 | * Correspondence with actual processor time will depend on the speed |
| 1414 | * of the processor and the details of the specific pattern, but will |
| 1415 | * typically be on the order of milliseconds. |
| 1416 | * <p> |
| 1417 | * By default, the matching time is not limited. |
| 1418 | * <p> |
| 1419 | * |
| 1420 | * @param regexp The compiled regular expression. |
| 1421 | * @param limit The limit value, or 0 for no limit. |
| 1422 | * @param status A reference to a UErrorCode to receive any errors. |
| 1423 | * @stable ICU 4.0 |
| 1424 | */ |
| 1425 | U_STABLE void U_EXPORT2 |
| 1426 | uregex_setTimeLimit(URegularExpression *regexp, |
| 1427 | int32_t limit, |
| 1428 | UErrorCode *status); |
| 1429 | |
| 1430 | /** |
| 1431 | * Get the time limit for for matches with this URegularExpression. |
| 1432 | * A return value of zero indicates that there is no limit. |
| 1433 | * |
| 1434 | * @param regexp The compiled regular expression. |
| 1435 | * @param status A reference to a UErrorCode to receive any errors. |
| 1436 | * @return the maximum allowed time for a match, in units of processing steps. |
| 1437 | * @stable ICU 4.0 |
| 1438 | */ |
| 1439 | U_STABLE int32_t U_EXPORT2 |
| 1440 | uregex_getTimeLimit(const URegularExpression *regexp, |
| 1441 | UErrorCode *status); |
| 1442 | |
| 1443 | /** |
| 1444 | * Set the amount of heap storage available for use by the match backtracking stack. |
| 1445 | * <p> |
| 1446 | * ICU uses a backtracking regular expression engine, with the backtrack stack |
| 1447 | * maintained on the heap. This function sets the limit to the amount of memory |
| 1448 | * that can be used for this purpose. A backtracking stack overflow will |
| 1449 | * result in an error from the match operation that caused it. |
| 1450 | * <p> |
| 1451 | * A limit is desirable because a malicious or poorly designed pattern can use |
| 1452 | * excessive memory, potentially crashing the process. A limit is enabled |
| 1453 | * by default. |
| 1454 | * <p> |
| 1455 | * @param regexp The compiled regular expression. |
| 1456 | * @param limit The maximum size, in bytes, of the matching backtrack stack. |
| 1457 | * A value of zero means no limit. |
| 1458 | * The limit must be greater than or equal to zero. |
| 1459 | * @param status A reference to a UErrorCode to receive any errors. |
| 1460 | * |
| 1461 | * @stable ICU 4.0 |
| 1462 | */ |
| 1463 | U_STABLE void U_EXPORT2 |
| 1464 | uregex_setStackLimit(URegularExpression *regexp, |
| 1465 | int32_t limit, |
| 1466 | UErrorCode *status); |
| 1467 | |
| 1468 | /** |
| 1469 | * Get the size of the heap storage available for use by the back tracking stack. |
| 1470 | * |
| 1471 | * @return the maximum backtracking stack size, in bytes, or zero if the |
| 1472 | * stack size is unlimited. |
| 1473 | * @stable ICU 4.0 |
| 1474 | */ |
| 1475 | U_STABLE int32_t U_EXPORT2 |
| 1476 | uregex_getStackLimit(const URegularExpression *regexp, |
| 1477 | UErrorCode *status); |
| 1478 | |
| 1479 | |
| 1480 | /** |
| 1481 | * Function pointer for a regular expression matching callback function. |
| 1482 | * When set, a callback function will be called periodically during matching |
| 1483 | * operations. If the call back function returns FALSE, the matching |
| 1484 | * operation will be terminated early. |
| 1485 | * |
| 1486 | * Note: the callback function must not call other functions on this |
| 1487 | * URegularExpression. |
| 1488 | * |
| 1489 | * @param context context pointer. The callback function will be invoked |
| 1490 | * with the context specified at the time that |
| 1491 | * uregex_setMatchCallback() is called. |
| 1492 | * @param steps the accumulated processing time, in match steps, |
| 1493 | * for this matching operation. |
| 1494 | * @return TRUE to continue the matching operation. |
| 1495 | * FALSE to terminate the matching operation. |
| 1496 | * @stable ICU 4.0 |
| 1497 | */ |
| 1498 | U_CDECL_BEGIN |
| 1499 | typedef UBool U_CALLCONV URegexMatchCallback ( |
| 1500 | const void *context, |
| 1501 | int32_t steps); |
| 1502 | U_CDECL_END |
| 1503 | |
| 1504 | /** |
| 1505 | * Set a callback function for this URegularExpression. |
| 1506 | * During matching operations the function will be called periodically, |
| 1507 | * giving the application the opportunity to terminate a long-running |
| 1508 | * match. |
| 1509 | * |
| 1510 | * @param regexp The compiled regular expression. |
| 1511 | * @param callback A pointer to the user-supplied callback function. |
| 1512 | * @param context User context pointer. The value supplied at the |
| 1513 | * time the callback function is set will be saved |
| 1514 | * and passed to the callback each time that it is called. |
| 1515 | * @param status A reference to a UErrorCode to receive any errors. |
| 1516 | * @stable ICU 4.0 |
| 1517 | */ |
| 1518 | U_STABLE void U_EXPORT2 |
| 1519 | uregex_setMatchCallback(URegularExpression *regexp, |
| 1520 | URegexMatchCallback *callback, |
| 1521 | const void *context, |
| 1522 | UErrorCode *status); |
| 1523 | |
| 1524 | |
| 1525 | /** |
| 1526 | * Get the callback function for this URegularExpression. |
| 1527 | * |
| 1528 | * @param regexp The compiled regular expression. |
| 1529 | * @param callback Out parameter, receives a pointer to the user-supplied |
| 1530 | * callback function. |
| 1531 | * @param context Out parameter, receives the user context pointer that |
| 1532 | * was set when uregex_setMatchCallback() was called. |
| 1533 | * @param status A reference to a UErrorCode to receive any errors. |
| 1534 | * @stable ICU 4.0 |
| 1535 | */ |
| 1536 | U_STABLE void U_EXPORT2 |
| 1537 | uregex_getMatchCallback(const URegularExpression *regexp, |
| 1538 | URegexMatchCallback **callback, |
| 1539 | const void **context, |
| 1540 | UErrorCode *status); |
| 1541 | |
| 1542 | /** |
| 1543 | * Function pointer for a regular expression find callback function. |
| 1544 | * |
| 1545 | * When set, a callback function will be called during a find operation |
| 1546 | * and for operations that depend on find, such as findNext, split and some replace |
| 1547 | * operations like replaceFirst. |
| 1548 | * The callback will usually be called after each attempt at a match, but this is not a |
| 1549 | * guarantee that the callback will be invoked at each character. For finds where the |
| 1550 | * match engine is invoked at each character, this may be close to true, but less likely |
| 1551 | * for more optimized loops where the pattern is known to only start, and the match |
| 1552 | * engine invoked, at certain characters. |
| 1553 | * When invoked, this callback will specify the index at which a match operation is about |
| 1554 | * to be attempted, giving the application the opportunity to terminate a long-running |
| 1555 | * find operation. |
| 1556 | * |
| 1557 | * If the call back function returns FALSE, the find operation will be terminated early. |
| 1558 | * |
| 1559 | * Note: the callback function must not call other functions on this |
| 1560 | * URegularExpression |
| 1561 | * |
| 1562 | * @param context context pointer. The callback function will be invoked |
| 1563 | * with the context specified at the time that |
| 1564 | * uregex_setFindProgressCallback() is called. |
| 1565 | * @param matchIndex the next index at which a match attempt will be attempted for this |
| 1566 | * find operation. If this callback interrupts the search, this is the |
| 1567 | * index at which a find/findNext operation may be re-initiated. |
| 1568 | * @return TRUE to continue the matching operation. |
| 1569 | * FALSE to terminate the matching operation. |
| 1570 | * @stable ICU 4.6 |
| 1571 | */ |
| 1572 | U_CDECL_BEGIN |
| 1573 | typedef UBool U_CALLCONV URegexFindProgressCallback ( |
| 1574 | const void *context, |
| 1575 | int64_t matchIndex); |
| 1576 | U_CDECL_END |
| 1577 | |
| 1578 | |
| 1579 | /** |
| 1580 | * Set the find progress callback function for this URegularExpression. |
| 1581 | * |
| 1582 | * @param regexp The compiled regular expression. |
| 1583 | * @param callback A pointer to the user-supplied callback function. |
| 1584 | * @param context User context pointer. The value supplied at the |
| 1585 | * time the callback function is set will be saved |
| 1586 | * and passed to the callback each time that it is called. |
| 1587 | * @param status A reference to a UErrorCode to receive any errors. |
| 1588 | * @stable ICU 4.6 |
| 1589 | */ |
| 1590 | U_STABLE void U_EXPORT2 |
| 1591 | uregex_setFindProgressCallback(URegularExpression *regexp, |
| 1592 | URegexFindProgressCallback *callback, |
| 1593 | const void *context, |
| 1594 | UErrorCode *status); |
| 1595 | |
| 1596 | /** |
| 1597 | * Get the find progress callback function for this URegularExpression. |
| 1598 | * |
| 1599 | * @param regexp The compiled regular expression. |
| 1600 | * @param callback Out parameter, receives a pointer to the user-supplied |
| 1601 | * callback function. |
| 1602 | * @param context Out parameter, receives the user context pointer that |
| 1603 | * was set when uregex_setFindProgressCallback() was called. |
| 1604 | * @param status A reference to a UErrorCode to receive any errors. |
| 1605 | * @stable ICU 4.6 |
| 1606 | */ |
| 1607 | U_STABLE void U_EXPORT2 |
| 1608 | uregex_getFindProgressCallback(const URegularExpression *regexp, |
| 1609 | URegexFindProgressCallback **callback, |
| 1610 | const void **context, |
| 1611 | UErrorCode *status); |
| 1612 | |
| 1613 | #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
| 1614 | #endif /* UREGEX_H */ |
| 1615 | |