1/* GRegex -- regular expression API wrapper around PCRE.
2 *
3 * Copyright (C) 1999, 2000 Scott Wimer
4 * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
5 * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21#ifndef __G_REGEX_H__
22#define __G_REGEX_H__
23
24#if !defined (__GLIB_H_INSIDE__) && !defined (GLIB_COMPILATION)
25#error "Only <glib.h> can be included directly."
26#endif
27
28#include <glib/gerror.h>
29#include <glib/gstring.h>
30
31G_BEGIN_DECLS
32
33/**
34 * GRegexError:
35 * @G_REGEX_ERROR_COMPILE: Compilation of the regular expression failed.
36 * @G_REGEX_ERROR_OPTIMIZE: Optimization of the regular expression failed.
37 * @G_REGEX_ERROR_REPLACE: Replacement failed due to an ill-formed replacement
38 * string.
39 * @G_REGEX_ERROR_MATCH: The match process failed.
40 * @G_REGEX_ERROR_INTERNAL: Internal error of the regular expression engine.
41 * Since 2.16
42 * @G_REGEX_ERROR_STRAY_BACKSLASH: "\\" at end of pattern. Since 2.16
43 * @G_REGEX_ERROR_MISSING_CONTROL_CHAR: "\\c" at end of pattern. Since 2.16
44 * @G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: Unrecognized character follows "\\".
45 * Since 2.16
46 * @G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: Numbers out of order in "{}"
47 * quantifier. Since 2.16
48 * @G_REGEX_ERROR_QUANTIFIER_TOO_BIG: Number too big in "{}" quantifier.
49 * Since 2.16
50 * @G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: Missing terminating "]" for
51 * character class. Since 2.16
52 * @G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: Invalid escape sequence
53 * in character class. Since 2.16
54 * @G_REGEX_ERROR_RANGE_OUT_OF_ORDER: Range out of order in character class.
55 * Since 2.16
56 * @G_REGEX_ERROR_NOTHING_TO_REPEAT: Nothing to repeat. Since 2.16
57 * @G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: Unrecognized character after "(?",
58 * "(?<" or "(?P". Since 2.16
59 * @G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: POSIX named classes are
60 * supported only within a class. Since 2.16
61 * @G_REGEX_ERROR_UNMATCHED_PARENTHESIS: Missing terminating ")" or ")"
62 * without opening "(". Since 2.16
63 * @G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: Reference to non-existent
64 * subpattern. Since 2.16
65 * @G_REGEX_ERROR_UNTERMINATED_COMMENT: Missing terminating ")" after comment.
66 * Since 2.16
67 * @G_REGEX_ERROR_EXPRESSION_TOO_LARGE: Regular expression too large.
68 * Since 2.16
69 * @G_REGEX_ERROR_MEMORY_ERROR: Failed to get memory. Since 2.16
70 * @G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: Lookbehind assertion is not
71 * fixed length. Since 2.16
72 * @G_REGEX_ERROR_MALFORMED_CONDITION: Malformed number or name after "(?(".
73 * Since 2.16
74 * @G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: Conditional group contains
75 * more than two branches. Since 2.16
76 * @G_REGEX_ERROR_ASSERTION_EXPECTED: Assertion expected after "(?(".
77 * Since 2.16
78 * @G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: Unknown POSIX class name.
79 * Since 2.16
80 * @G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: POSIX collating
81 * elements are not supported. Since 2.16
82 * @G_REGEX_ERROR_HEX_CODE_TOO_LARGE: Character value in "\\x{...}" sequence
83 * is too large. Since 2.16
84 * @G_REGEX_ERROR_INVALID_CONDITION: Invalid condition "(?(0)". Since 2.16
85 * @G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: \\C not allowed in
86 * lookbehind assertion. Since 2.16
87 * @G_REGEX_ERROR_INFINITE_LOOP: Recursive call could loop indefinitely.
88 * Since 2.16
89 * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: Missing terminator
90 * in subpattern name. Since 2.16
91 * @G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: Two named subpatterns have
92 * the same name. Since 2.16
93 * @G_REGEX_ERROR_MALFORMED_PROPERTY: Malformed "\\P" or "\\p" sequence.
94 * Since 2.16
95 * @G_REGEX_ERROR_UNKNOWN_PROPERTY: Unknown property name after "\\P" or
96 * "\\p". Since 2.16
97 * @G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: Subpattern name is too long
98 * (maximum 32 characters). Since 2.16
99 * @G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: Too many named subpatterns (maximum
100 * 10,000). Since 2.16
101 * @G_REGEX_ERROR_INVALID_OCTAL_VALUE: Octal value is greater than "\\377".
102 * Since 2.16
103 * @G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: "DEFINE" group contains more
104 * than one branch. Since 2.16
105 * @G_REGEX_ERROR_DEFINE_REPETION: Repeating a "DEFINE" group is not allowed.
106 * This error is never raised. Since: 2.16 Deprecated: 2.34
107 * @G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: Inconsistent newline options.
108 * Since 2.16
109 * @G_REGEX_ERROR_MISSING_BACK_REFERENCE: "\\g" is not followed by a braced,
110 * angle-bracketed, or quoted name or number, or by a plain number. Since: 2.16
111 * @G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: relative reference must not be zero. Since: 2.34
112 * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: the backtracing
113 * control verb used does not allow an argument. Since: 2.34
114 * @G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: unknown backtracing
115 * control verb. Since: 2.34
116 * @G_REGEX_ERROR_NUMBER_TOO_BIG: number is too big in escape sequence. Since: 2.34
117 * @G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: Missing subpattern name. Since: 2.34
118 * @G_REGEX_ERROR_MISSING_DIGIT: Missing digit. Since 2.34
119 * @G_REGEX_ERROR_INVALID_DATA_CHARACTER: In JavaScript compatibility mode,
120 * "[" is an invalid data character. Since: 2.34
121 * @G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: different names for subpatterns of the
122 * same number are not allowed. Since: 2.34
123 * @G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: the backtracing control
124 * verb requires an argument. Since: 2.34
125 * @G_REGEX_ERROR_INVALID_CONTROL_CHAR: "\\c" must be followed by an ASCII
126 * character. Since: 2.34
127 * @G_REGEX_ERROR_MISSING_NAME: "\\k" is not followed by a braced, angle-bracketed, or
128 * quoted name. Since: 2.34
129 * @G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: "\\N" is not supported in a class. Since: 2.34
130 * @G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: too many forward references. Since: 2.34
131 * @G_REGEX_ERROR_NAME_TOO_LONG: the name is too long in "(*MARK)", "(*PRUNE)",
132 * "(*SKIP)", or "(*THEN)". Since: 2.34
133 * @G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: the character value in the \\u sequence is
134 * too large. Since: 2.34
135 *
136 * Error codes returned by regular expressions functions.
137 *
138 * Since: 2.14
139 */
140typedef enum
141{
142 G_REGEX_ERROR_COMPILE,
143 G_REGEX_ERROR_OPTIMIZE,
144 G_REGEX_ERROR_REPLACE,
145 G_REGEX_ERROR_MATCH,
146 G_REGEX_ERROR_INTERNAL,
147
148 /* These are the error codes from PCRE + 100 */
149 G_REGEX_ERROR_STRAY_BACKSLASH = 101,
150 G_REGEX_ERROR_MISSING_CONTROL_CHAR = 102,
151 G_REGEX_ERROR_UNRECOGNIZED_ESCAPE = 103,
152 G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER = 104,
153 G_REGEX_ERROR_QUANTIFIER_TOO_BIG = 105,
154 G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS = 106,
155 G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS = 107,
156 G_REGEX_ERROR_RANGE_OUT_OF_ORDER = 108,
157 G_REGEX_ERROR_NOTHING_TO_REPEAT = 109,
158 G_REGEX_ERROR_UNRECOGNIZED_CHARACTER = 112,
159 G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS = 113,
160 G_REGEX_ERROR_UNMATCHED_PARENTHESIS = 114,
161 G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE = 115,
162 G_REGEX_ERROR_UNTERMINATED_COMMENT = 118,
163 G_REGEX_ERROR_EXPRESSION_TOO_LARGE = 120,
164 G_REGEX_ERROR_MEMORY_ERROR = 121,
165 G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND = 125,
166 G_REGEX_ERROR_MALFORMED_CONDITION = 126,
167 G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES = 127,
168 G_REGEX_ERROR_ASSERTION_EXPECTED = 128,
169 G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME = 130,
170 G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED = 131,
171 G_REGEX_ERROR_HEX_CODE_TOO_LARGE = 134,
172 G_REGEX_ERROR_INVALID_CONDITION = 135,
173 G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND = 136,
174 G_REGEX_ERROR_INFINITE_LOOP = 140,
175 G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR = 142,
176 G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME = 143,
177 G_REGEX_ERROR_MALFORMED_PROPERTY = 146,
178 G_REGEX_ERROR_UNKNOWN_PROPERTY = 147,
179 G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG = 148,
180 G_REGEX_ERROR_TOO_MANY_SUBPATTERNS = 149,
181 G_REGEX_ERROR_INVALID_OCTAL_VALUE = 151,
182 G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE = 154,
183 G_REGEX_ERROR_DEFINE_REPETION = 155,
184 G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS = 156,
185 G_REGEX_ERROR_MISSING_BACK_REFERENCE = 157,
186 G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE = 158,
187 G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN = 159,
188 G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB = 160,
189 G_REGEX_ERROR_NUMBER_TOO_BIG = 161,
190 G_REGEX_ERROR_MISSING_SUBPATTERN_NAME = 162,
191 G_REGEX_ERROR_MISSING_DIGIT = 163,
192 G_REGEX_ERROR_INVALID_DATA_CHARACTER = 164,
193 G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME = 165,
194 G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED = 166,
195 G_REGEX_ERROR_INVALID_CONTROL_CHAR = 168,
196 G_REGEX_ERROR_MISSING_NAME = 169,
197 G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS = 171,
198 G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES = 172,
199 G_REGEX_ERROR_NAME_TOO_LONG = 175,
200 G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE = 176
201} GRegexError;
202
203/**
204 * G_REGEX_ERROR:
205 *
206 * Error domain for regular expressions. Errors in this domain will be
207 * from the #GRegexError enumeration. See #GError for information on
208 * error domains.
209 *
210 * Since: 2.14
211 */
212#define G_REGEX_ERROR g_regex_error_quark ()
213
214GLIB_AVAILABLE_IN_ALL
215GQuark g_regex_error_quark (void);
216
217/**
218 * GRegexCompileFlags:
219 * @G_REGEX_CASELESS: Letters in the pattern match both upper- and
220 * lowercase letters. This option can be changed within a pattern
221 * by a "(?i)" option setting.
222 * @G_REGEX_MULTILINE: By default, GRegex treats the strings as consisting
223 * of a single line of characters (even if it actually contains
224 * newlines). The "start of line" metacharacter ("^") matches only
225 * at the start of the string, while the "end of line" metacharacter
226 * ("$") matches only at the end of the string, or before a terminating
227 * newline (unless #G_REGEX_DOLLAR_ENDONLY is set). When
228 * #G_REGEX_MULTILINE is set, the "start of line" and "end of line"
229 * constructs match immediately following or immediately before any
230 * newline in the string, respectively, as well as at the very start
231 * and end. This can be changed within a pattern by a "(?m)" option
232 * setting.
233 * @G_REGEX_DOTALL: A dot metacharacter (".") in the pattern matches all
234 * characters, including newlines. Without it, newlines are excluded.
235 * This option can be changed within a pattern by a ("?s") option setting.
236 * @G_REGEX_EXTENDED: Whitespace data characters in the pattern are
237 * totally ignored except when escaped or inside a character class.
238 * Whitespace does not include the VT character (code 11). In addition,
239 * characters between an unescaped "#" outside a character class and
240 * the next newline character, inclusive, are also ignored. This can
241 * be changed within a pattern by a "(?x)" option setting.
242 * @G_REGEX_ANCHORED: The pattern is forced to be "anchored", that is,
243 * it is constrained to match only at the first matching point in the
244 * string that is being searched. This effect can also be achieved by
245 * appropriate constructs in the pattern itself such as the "^"
246 * metacharacter.
247 * @G_REGEX_DOLLAR_ENDONLY: A dollar metacharacter ("$") in the pattern
248 * matches only at the end of the string. Without this option, a
249 * dollar also matches immediately before the final character if
250 * it is a newline (but not before any other newlines). This option
251 * is ignored if #G_REGEX_MULTILINE is set.
252 * @G_REGEX_UNGREEDY: Inverts the "greediness" of the quantifiers so that
253 * they are not greedy by default, but become greedy if followed by "?".
254 * It can also be set by a "(?U)" option setting within the pattern.
255 * @G_REGEX_RAW: Usually strings must be valid UTF-8 strings, using this
256 * flag they are considered as a raw sequence of bytes.
257 * @G_REGEX_NO_AUTO_CAPTURE: Disables the use of numbered capturing
258 * parentheses in the pattern. Any opening parenthesis that is not
259 * followed by "?" behaves as if it were followed by "?:" but named
260 * parentheses can still be used for capturing (and they acquire numbers
261 * in the usual way).
262 * @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will
263 * be used many times, then it may be worth the effort to optimize it
264 * to improve the speed of matches.
265 * @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
266 * first newline. Since: 2.34
267 * @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
268 * be unique. This can be helpful for certain types of pattern when it
269 * is known that only one instance of the named subpattern can ever be
270 * matched.
271 * @G_REGEX_NEWLINE_CR: Usually any newline character or character sequence is
272 * recognized. If this option is set, the only recognized newline character
273 * is '\r'.
274 * @G_REGEX_NEWLINE_LF: Usually any newline character or character sequence is
275 * recognized. If this option is set, the only recognized newline character
276 * is '\n'.
277 * @G_REGEX_NEWLINE_CRLF: Usually any newline character or character sequence is
278 * recognized. If this option is set, the only recognized newline character
279 * sequence is '\r\n'.
280 * @G_REGEX_NEWLINE_ANYCRLF: Usually any newline character or character sequence
281 * is recognized. If this option is set, the only recognized newline character
282 * sequences are '\r', '\n', and '\r\n'. Since: 2.34
283 * @G_REGEX_BSR_ANYCRLF: Usually any newline character or character sequence
284 * is recognised. If this option is set, then "\R" only recognizes the newline
285 * characters '\r', '\n' and '\r\n'. Since: 2.34
286 * @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
287 * JavaScript rather than PCRE. Since: 2.34
288 *
289 * Flags specifying compile-time options.
290 *
291 * Since: 2.14
292 */
293/* Remember to update G_REGEX_COMPILE_MASK in gregex.c after
294 * adding a new flag.
295 */
296typedef enum
297{
298 G_REGEX_CASELESS = 1 << 0,
299 G_REGEX_MULTILINE = 1 << 1,
300 G_REGEX_DOTALL = 1 << 2,
301 G_REGEX_EXTENDED = 1 << 3,
302 G_REGEX_ANCHORED = 1 << 4,
303 G_REGEX_DOLLAR_ENDONLY = 1 << 5,
304 G_REGEX_UNGREEDY = 1 << 9,
305 G_REGEX_RAW = 1 << 11,
306 G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
307 G_REGEX_OPTIMIZE = 1 << 13,
308 G_REGEX_FIRSTLINE = 1 << 18,
309 G_REGEX_DUPNAMES = 1 << 19,
310 G_REGEX_NEWLINE_CR = 1 << 20,
311 G_REGEX_NEWLINE_LF = 1 << 21,
312 G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
313 G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
314 G_REGEX_BSR_ANYCRLF = 1 << 23,
315 G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
316} GRegexCompileFlags;
317
318/**
319 * GRegexMatchFlags:
320 * @G_REGEX_MATCH_ANCHORED: The pattern is forced to be "anchored", that is,
321 * it is constrained to match only at the first matching point in the
322 * string that is being searched. This effect can also be achieved by
323 * appropriate constructs in the pattern itself such as the "^"
324 * metacharacter.
325 * @G_REGEX_MATCH_NOTBOL: Specifies that first character of the string is
326 * not the beginning of a line, so the circumflex metacharacter should
327 * not match before it. Setting this without #G_REGEX_MULTILINE (at
328 * compile time) causes circumflex never to match. This option affects
329 * only the behaviour of the circumflex metacharacter, it does not
330 * affect "\A".
331 * @G_REGEX_MATCH_NOTEOL: Specifies that the end of the subject string is
332 * not the end of a line, so the dollar metacharacter should not match
333 * it nor (except in multiline mode) a newline immediately before it.
334 * Setting this without #G_REGEX_MULTILINE (at compile time) causes
335 * dollar never to match. This option affects only the behaviour of
336 * the dollar metacharacter, it does not affect "\Z" or "\z".
337 * @G_REGEX_MATCH_NOTEMPTY: An empty string is not considered to be a valid
338 * match if this option is set. If there are alternatives in the pattern,
339 * they are tried. If all the alternatives match the empty string, the
340 * entire match fails. For example, if the pattern "a?b?" is applied to
341 * a string not beginning with "a" or "b", it matches the empty string
342 * at the start of the string. With this flag set, this match is not
343 * valid, so GRegex searches further into the string for occurrences
344 * of "a" or "b".
345 * @G_REGEX_MATCH_PARTIAL: Turns on the partial matching feature, for more
346 * documentation on partial matching see g_match_info_is_partial_match().
347 * @G_REGEX_MATCH_NEWLINE_CR: Overrides the newline definition set when
348 * creating a new #GRegex, setting the '\r' character as line terminator.
349 * @G_REGEX_MATCH_NEWLINE_LF: Overrides the newline definition set when
350 * creating a new #GRegex, setting the '\n' character as line terminator.
351 * @G_REGEX_MATCH_NEWLINE_CRLF: Overrides the newline definition set when
352 * creating a new #GRegex, setting the '\r\n' characters sequence as line terminator.
353 * @G_REGEX_MATCH_NEWLINE_ANY: Overrides the newline definition set when
354 * creating a new #GRegex, any Unicode newline sequence
355 * is recognised as a newline. These are '\r', '\n' and '\rn', and the
356 * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
357 * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
358 * U+2029 PARAGRAPH SEPARATOR.
359 * @G_REGEX_MATCH_NEWLINE_ANYCRLF: Overrides the newline definition set when
360 * creating a new #GRegex; any '\r', '\n', or '\r\n' character sequence
361 * is recognized as a newline. Since: 2.34
362 * @G_REGEX_MATCH_BSR_ANYCRLF: Overrides the newline definition for "\R" set when
363 * creating a new #GRegex; only '\r', '\n', or '\r\n' character sequences
364 * are recognized as a newline by "\R". Since: 2.34
365 * @G_REGEX_MATCH_BSR_ANY: Overrides the newline definition for "\R" set when
366 * creating a new #GRegex; any Unicode newline character or character sequence
367 * are recognized as a newline by "\R". These are '\r', '\n' and '\rn', and the
368 * single characters U+000B LINE TABULATION, U+000C FORM FEED (FF),
369 * U+0085 NEXT LINE (NEL), U+2028 LINE SEPARATOR and
370 * U+2029 PARAGRAPH SEPARATOR. Since: 2.34
371 * @G_REGEX_MATCH_PARTIAL_SOFT: An alias for #G_REGEX_MATCH_PARTIAL. Since: 2.34
372 * @G_REGEX_MATCH_PARTIAL_HARD: Turns on the partial matching feature. In contrast to
373 * to #G_REGEX_MATCH_PARTIAL_SOFT, this stops matching as soon as a partial match
374 * is found, without continuing to search for a possible complete match. See
375 * g_match_info_is_partial_match() for more information. Since: 2.34
376 * @G_REGEX_MATCH_NOTEMPTY_ATSTART: Like #G_REGEX_MATCH_NOTEMPTY, but only applied to
377 * the start of the matched string. For anchored
378 * patterns this can only happen for pattern containing "\K". Since: 2.34
379 *
380 * Flags specifying match-time options.
381 *
382 * Since: 2.14
383 */
384/* Remember to update G_REGEX_MATCH_MASK in gregex.c after
385 * adding a new flag. */
386typedef enum
387{
388 G_REGEX_MATCH_ANCHORED = 1 << 4,
389 G_REGEX_MATCH_NOTBOL = 1 << 7,
390 G_REGEX_MATCH_NOTEOL = 1 << 8,
391 G_REGEX_MATCH_NOTEMPTY = 1 << 10,
392 G_REGEX_MATCH_PARTIAL = 1 << 15,
393 G_REGEX_MATCH_NEWLINE_CR = 1 << 20,
394 G_REGEX_MATCH_NEWLINE_LF = 1 << 21,
395 G_REGEX_MATCH_NEWLINE_CRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_LF,
396 G_REGEX_MATCH_NEWLINE_ANY = 1 << 22,
397 G_REGEX_MATCH_NEWLINE_ANYCRLF = G_REGEX_MATCH_NEWLINE_CR | G_REGEX_MATCH_NEWLINE_ANY,
398 G_REGEX_MATCH_BSR_ANYCRLF = 1 << 23,
399 G_REGEX_MATCH_BSR_ANY = 1 << 24,
400 G_REGEX_MATCH_PARTIAL_SOFT = G_REGEX_MATCH_PARTIAL,
401 G_REGEX_MATCH_PARTIAL_HARD = 1 << 27,
402 G_REGEX_MATCH_NOTEMPTY_ATSTART = 1 << 28
403} GRegexMatchFlags;
404
405/**
406 * GRegex:
407 *
408 * A GRegex is the "compiled" form of a regular expression pattern.
409 * This structure is opaque and its fields cannot be accessed directly.
410 *
411 * Since: 2.14
412 */
413typedef struct _GRegex GRegex;
414
415
416/**
417 * GMatchInfo:
418 *
419 * A GMatchInfo is an opaque struct used to return information about
420 * matches.
421 */
422typedef struct _GMatchInfo GMatchInfo;
423
424/**
425 * GRegexEvalCallback:
426 * @match_info: the #GMatchInfo generated by the match.
427 * Use g_match_info_get_regex() and g_match_info_get_string() if you
428 * need the #GRegex or the matched string.
429 * @result: a #GString containing the new string
430 * @user_data: user data passed to g_regex_replace_eval()
431 *
432 * Specifies the type of the function passed to g_regex_replace_eval().
433 * It is called for each occurrence of the pattern in the string passed
434 * to g_regex_replace_eval(), and it should append the replacement to
435 * @result.
436 *
437 * Returns: %FALSE to continue the replacement process, %TRUE to stop it
438 *
439 * Since: 2.14
440 */
441typedef gboolean (*GRegexEvalCallback) (const GMatchInfo *match_info,
442 GString *result,
443 gpointer user_data);
444
445
446GLIB_AVAILABLE_IN_ALL
447GRegex *g_regex_new (const gchar *pattern,
448 GRegexCompileFlags compile_options,
449 GRegexMatchFlags match_options,
450 GError **error);
451GLIB_AVAILABLE_IN_ALL
452GRegex *g_regex_ref (GRegex *regex);
453GLIB_AVAILABLE_IN_ALL
454void g_regex_unref (GRegex *regex);
455GLIB_AVAILABLE_IN_ALL
456const gchar *g_regex_get_pattern (const GRegex *regex);
457GLIB_AVAILABLE_IN_ALL
458gint g_regex_get_max_backref (const GRegex *regex);
459GLIB_AVAILABLE_IN_ALL
460gint g_regex_get_capture_count (const GRegex *regex);
461GLIB_AVAILABLE_IN_ALL
462gboolean g_regex_get_has_cr_or_lf (const GRegex *regex);
463GLIB_AVAILABLE_IN_2_38
464gint g_regex_get_max_lookbehind (const GRegex *regex);
465GLIB_AVAILABLE_IN_ALL
466gint g_regex_get_string_number (const GRegex *regex,
467 const gchar *name);
468GLIB_AVAILABLE_IN_ALL
469gchar *g_regex_escape_string (const gchar *string,
470 gint length);
471GLIB_AVAILABLE_IN_ALL
472gchar *g_regex_escape_nul (const gchar *string,
473 gint length);
474
475GLIB_AVAILABLE_IN_ALL
476GRegexCompileFlags g_regex_get_compile_flags (const GRegex *regex);
477GLIB_AVAILABLE_IN_ALL
478GRegexMatchFlags g_regex_get_match_flags (const GRegex *regex);
479
480/* Matching. */
481GLIB_AVAILABLE_IN_ALL
482gboolean g_regex_match_simple (const gchar *pattern,
483 const gchar *string,
484 GRegexCompileFlags compile_options,
485 GRegexMatchFlags match_options);
486GLIB_AVAILABLE_IN_ALL
487gboolean g_regex_match (const GRegex *regex,
488 const gchar *string,
489 GRegexMatchFlags match_options,
490 GMatchInfo **match_info);
491GLIB_AVAILABLE_IN_ALL
492gboolean g_regex_match_full (const GRegex *regex,
493 const gchar *string,
494 gssize string_len,
495 gint start_position,
496 GRegexMatchFlags match_options,
497 GMatchInfo **match_info,
498 GError **error);
499GLIB_AVAILABLE_IN_ALL
500gboolean g_regex_match_all (const GRegex *regex,
501 const gchar *string,
502 GRegexMatchFlags match_options,
503 GMatchInfo **match_info);
504GLIB_AVAILABLE_IN_ALL
505gboolean g_regex_match_all_full (const GRegex *regex,
506 const gchar *string,
507 gssize string_len,
508 gint start_position,
509 GRegexMatchFlags match_options,
510 GMatchInfo **match_info,
511 GError **error);
512
513/* String splitting. */
514GLIB_AVAILABLE_IN_ALL
515gchar **g_regex_split_simple (const gchar *pattern,
516 const gchar *string,
517 GRegexCompileFlags compile_options,
518 GRegexMatchFlags match_options);
519GLIB_AVAILABLE_IN_ALL
520gchar **g_regex_split (const GRegex *regex,
521 const gchar *string,
522 GRegexMatchFlags match_options);
523GLIB_AVAILABLE_IN_ALL
524gchar **g_regex_split_full (const GRegex *regex,
525 const gchar *string,
526 gssize string_len,
527 gint start_position,
528 GRegexMatchFlags match_options,
529 gint max_tokens,
530 GError **error);
531
532/* String replacement. */
533GLIB_AVAILABLE_IN_ALL
534gchar *g_regex_replace (const GRegex *regex,
535 const gchar *string,
536 gssize string_len,
537 gint start_position,
538 const gchar *replacement,
539 GRegexMatchFlags match_options,
540 GError **error);
541GLIB_AVAILABLE_IN_ALL
542gchar *g_regex_replace_literal (const GRegex *regex,
543 const gchar *string,
544 gssize string_len,
545 gint start_position,
546 const gchar *replacement,
547 GRegexMatchFlags match_options,
548 GError **error);
549GLIB_AVAILABLE_IN_ALL
550gchar *g_regex_replace_eval (const GRegex *regex,
551 const gchar *string,
552 gssize string_len,
553 gint start_position,
554 GRegexMatchFlags match_options,
555 GRegexEvalCallback eval,
556 gpointer user_data,
557 GError **error);
558GLIB_AVAILABLE_IN_ALL
559gboolean g_regex_check_replacement (const gchar *replacement,
560 gboolean *has_references,
561 GError **error);
562
563/* Match info */
564GLIB_AVAILABLE_IN_ALL
565GRegex *g_match_info_get_regex (const GMatchInfo *match_info);
566GLIB_AVAILABLE_IN_ALL
567const gchar *g_match_info_get_string (const GMatchInfo *match_info);
568
569GLIB_AVAILABLE_IN_ALL
570GMatchInfo *g_match_info_ref (GMatchInfo *match_info);
571GLIB_AVAILABLE_IN_ALL
572void g_match_info_unref (GMatchInfo *match_info);
573GLIB_AVAILABLE_IN_ALL
574void g_match_info_free (GMatchInfo *match_info);
575GLIB_AVAILABLE_IN_ALL
576gboolean g_match_info_next (GMatchInfo *match_info,
577 GError **error);
578GLIB_AVAILABLE_IN_ALL
579gboolean g_match_info_matches (const GMatchInfo *match_info);
580GLIB_AVAILABLE_IN_ALL
581gint g_match_info_get_match_count (const GMatchInfo *match_info);
582GLIB_AVAILABLE_IN_ALL
583gboolean g_match_info_is_partial_match (const GMatchInfo *match_info);
584GLIB_AVAILABLE_IN_ALL
585gchar *g_match_info_expand_references(const GMatchInfo *match_info,
586 const gchar *string_to_expand,
587 GError **error);
588GLIB_AVAILABLE_IN_ALL
589gchar *g_match_info_fetch (const GMatchInfo *match_info,
590 gint match_num);
591GLIB_AVAILABLE_IN_ALL
592gboolean g_match_info_fetch_pos (const GMatchInfo *match_info,
593 gint match_num,
594 gint *start_pos,
595 gint *end_pos);
596GLIB_AVAILABLE_IN_ALL
597gchar *g_match_info_fetch_named (const GMatchInfo *match_info,
598 const gchar *name);
599GLIB_AVAILABLE_IN_ALL
600gboolean g_match_info_fetch_named_pos (const GMatchInfo *match_info,
601 const gchar *name,
602 gint *start_pos,
603 gint *end_pos);
604GLIB_AVAILABLE_IN_ALL
605gchar **g_match_info_fetch_all (const GMatchInfo *match_info);
606
607G_END_DECLS
608
609#endif /* __G_REGEX_H__ */
610