| 1 | /************************************************* | 
|---|
| 2 | *      Perl-Compatible Regular Expressions       * | 
|---|
| 3 | *************************************************/ | 
|---|
| 4 |  | 
|---|
| 5 | /* PCRE is a library of functions to support regular expressions whose syntax | 
|---|
| 6 | and semantics are as close as possible to those of the Perl 5 language. | 
|---|
| 7 |  | 
|---|
| 8 | Written by Philip Hazel | 
|---|
| 9 | Original API code Copyright (c) 1997-2012 University of Cambridge | 
|---|
| 10 | New API code Copyright (c) 2016-2021 University of Cambridge | 
|---|
| 11 |  | 
|---|
| 12 | ----------------------------------------------------------------------------- | 
|---|
| 13 | Redistribution and use in source and binary forms, with or without | 
|---|
| 14 | modification, are permitted provided that the following conditions are met: | 
|---|
| 15 |  | 
|---|
| 16 | * Redistributions of source code must retain the above copyright notice, | 
|---|
| 17 | this list of conditions and the following disclaimer. | 
|---|
| 18 |  | 
|---|
| 19 | * Redistributions in binary form must reproduce the above copyright | 
|---|
| 20 | notice, this list of conditions and the following disclaimer in the | 
|---|
| 21 | documentation and/or other materials provided with the distribution. | 
|---|
| 22 |  | 
|---|
| 23 | * Neither the name of the University of Cambridge nor the names of its | 
|---|
| 24 | contributors may be used to endorse or promote products derived from | 
|---|
| 25 | this software without specific prior written permission. | 
|---|
| 26 |  | 
|---|
| 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|---|
| 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|---|
| 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|---|
| 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|---|
| 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|---|
| 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|---|
| 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|---|
| 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|---|
| 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|---|
| 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|---|
| 37 | POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 38 | ----------------------------------------------------------------------------- | 
|---|
| 39 | */ | 
|---|
| 40 |  | 
|---|
| 41 | /* This module contains an internal function that is used to match a Unicode | 
|---|
| 42 | extended grapheme sequence. It is used by both pcre2_match() and | 
|---|
| 43 | pcre2_def_match(). However, it is called only when Unicode support is being | 
|---|
| 44 | compiled. Nevertheless, we provide a dummy function when there is no Unicode | 
|---|
| 45 | support, because some compilers do not like functionless source files. */ | 
|---|
| 46 |  | 
|---|
| 47 |  | 
|---|
| 48 | #ifdef HAVE_CONFIG_H | 
|---|
| 49 | #include "config.h" | 
|---|
| 50 | #endif | 
|---|
| 51 |  | 
|---|
| 52 |  | 
|---|
| 53 | #include "pcre2_internal.h" | 
|---|
| 54 |  | 
|---|
| 55 |  | 
|---|
| 56 | /* Dummy function */ | 
|---|
| 57 |  | 
|---|
| 58 | #ifndef SUPPORT_UNICODE | 
|---|
| 59 | PCRE2_SPTR | 
|---|
| 60 | PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, | 
|---|
| 61 | PCRE2_SPTR end_subject, BOOL utf, int *xcount) | 
|---|
| 62 | { | 
|---|
| 63 | (void)c; | 
|---|
| 64 | (void)eptr; | 
|---|
| 65 | (void)start_subject; | 
|---|
| 66 | (void)end_subject; | 
|---|
| 67 | (void)utf; | 
|---|
| 68 | (void)xcount; | 
|---|
| 69 | return NULL; | 
|---|
| 70 | } | 
|---|
| 71 | #else | 
|---|
| 72 |  | 
|---|
| 73 |  | 
|---|
| 74 | /************************************************* | 
|---|
| 75 | *      Match an extended grapheme sequence       * | 
|---|
| 76 | *************************************************/ | 
|---|
| 77 |  | 
|---|
| 78 | /* | 
|---|
| 79 | Arguments: | 
|---|
| 80 | c              the first character | 
|---|
| 81 | eptr           pointer to next character | 
|---|
| 82 | start_subject  pointer to start of subject | 
|---|
| 83 | end_subject    pointer to end of subject | 
|---|
| 84 | utf            TRUE if in UTF mode | 
|---|
| 85 | xcount         pointer to count of additional characters, | 
|---|
| 86 | or NULL if count not needed | 
|---|
| 87 |  | 
|---|
| 88 | Returns:         pointer after the end of the sequence | 
|---|
| 89 | */ | 
|---|
| 90 |  | 
|---|
| 91 | PCRE2_SPTR | 
|---|
| 92 | PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject, | 
|---|
| 93 | PCRE2_SPTR end_subject, BOOL utf, int *xcount) | 
|---|
| 94 | { | 
|---|
| 95 | int lgb = UCD_GRAPHBREAK(c); | 
|---|
| 96 |  | 
|---|
| 97 | while (eptr < end_subject) | 
|---|
| 98 | { | 
|---|
| 99 | int rgb; | 
|---|
| 100 | int len = 1; | 
|---|
| 101 | if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } | 
|---|
| 102 | rgb = UCD_GRAPHBREAK(c); | 
|---|
| 103 | if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break; | 
|---|
| 104 |  | 
|---|
| 105 | /* Not breaking between Regional Indicators is allowed only if there | 
|---|
| 106 | are an even number of preceding RIs. */ | 
|---|
| 107 |  | 
|---|
| 108 | if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) | 
|---|
| 109 | { | 
|---|
| 110 | int ricount = 0; | 
|---|
| 111 | PCRE2_SPTR bptr = eptr - 1; | 
|---|
| 112 | if (utf) BACKCHAR(bptr); | 
|---|
| 113 |  | 
|---|
| 114 | /* bptr is pointing to the left-hand character */ | 
|---|
| 115 |  | 
|---|
| 116 | while (bptr > start_subject) | 
|---|
| 117 | { | 
|---|
| 118 | bptr--; | 
|---|
| 119 | if (utf) | 
|---|
| 120 | { | 
|---|
| 121 | BACKCHAR(bptr); | 
|---|
| 122 | GETCHAR(c, bptr); | 
|---|
| 123 | } | 
|---|
| 124 | else | 
|---|
| 125 | c = *bptr; | 
|---|
| 126 | if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; | 
|---|
| 127 | ricount++; | 
|---|
| 128 | } | 
|---|
| 129 | if ((ricount & 1) != 0) break;  /* Grapheme break required */ | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 | /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this | 
|---|
| 133 | allows any number of them before a following Extended_Pictographic. */ | 
|---|
| 134 |  | 
|---|
| 135 | if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) || | 
|---|
| 136 | lgb != ucp_gbExtended_Pictographic) | 
|---|
| 137 | lgb = rgb; | 
|---|
| 138 |  | 
|---|
| 139 | eptr += len; | 
|---|
| 140 | if (xcount != NULL) *xcount += 1; | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | return eptr; | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | #endif  /* SUPPORT_UNICODE */ | 
|---|
| 147 |  | 
|---|
| 148 | /* End of pcre2_extuni.c */ | 
|---|
| 149 |  | 
|---|