1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Copyright (c) 1997-2012 University of Cambridge |
10 | |
11 | ----------------------------------------------------------------------------- |
12 | Redistribution and use in source and binary forms, with or without |
13 | modification, are permitted provided that the following conditions are met: |
14 | |
15 | * Redistributions of source code must retain the above copyright notice, |
16 | this list of conditions and the following disclaimer. |
17 | |
18 | * Redistributions in binary form must reproduce the above copyright |
19 | notice, this list of conditions and the following disclaimer in the |
20 | documentation and/or other materials provided with the distribution. |
21 | |
22 | * Neither the name of the University of Cambridge nor the names of its |
23 | contributors may be used to endorse or promote products derived from |
24 | this software without specific prior written permission. |
25 | |
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
36 | POSSIBILITY OF SUCH DAMAGE. |
37 | ----------------------------------------------------------------------------- |
38 | */ |
39 | |
40 | |
41 | /* This module contains internal functions for testing newlines when more than |
42 | one kind of newline is to be recognized. When a newline is found, its length is |
43 | returned. In principle, we could implement several newline "types", each |
44 | referring to a different set of newline characters. At present, PCRE supports |
45 | only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, |
46 | and NLTYPE_ANY. The full list of Unicode newline characters is taken from |
47 | http://unicode.org/unicode/reports/tr18/. */ |
48 | |
49 | |
50 | #include "pcre_config.h" |
51 | #include "pcre_internal.h" |
52 | |
53 | |
54 | |
55 | /************************************************* |
56 | * Check for newline at given position * |
57 | *************************************************/ |
58 | |
59 | /* It is guaranteed that the initial value of ptr is less than the end of the |
60 | string that is being processed. |
61 | |
62 | Arguments: |
63 | ptr pointer to possible newline |
64 | type the newline type |
65 | endptr pointer to the end of the string |
66 | lenptr where to return the length |
67 | utf TRUE if in utf mode |
68 | |
69 | Returns: TRUE or FALSE |
70 | */ |
71 | |
72 | BOOL |
73 | PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr, |
74 | BOOL utf) |
75 | { |
76 | pcre_uint32 c; |
77 | (void)utf; |
78 | #ifdef SUPPORT_UTF |
79 | if (utf) |
80 | { |
81 | GETCHAR(c, ptr); |
82 | } |
83 | else |
84 | #endif /* SUPPORT_UTF */ |
85 | c = *ptr; |
86 | |
87 | /* Note that this function is called only for ANY or ANYCRLF. */ |
88 | |
89 | if (type == NLTYPE_ANYCRLF) switch(c) |
90 | { |
91 | case CHAR_LF: *lenptr = 1; return TRUE; |
92 | case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
93 | return TRUE; |
94 | default: return FALSE; |
95 | } |
96 | |
97 | /* NLTYPE_ANY */ |
98 | |
99 | else switch(c) |
100 | { |
101 | #ifdef EBCDIC |
102 | case CHAR_NEL: |
103 | #endif |
104 | case CHAR_LF: |
105 | case CHAR_VT: |
106 | case CHAR_FF: *lenptr = 1; return TRUE; |
107 | |
108 | case CHAR_CR: |
109 | *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; |
110 | return TRUE; |
111 | |
112 | #ifndef EBCDIC |
113 | #ifdef COMPILE_PCRE8 |
114 | case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; |
115 | case 0x2028: /* LS */ |
116 | case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
117 | #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ |
118 | case CHAR_NEL: |
119 | case 0x2028: /* LS */ |
120 | case 0x2029: *lenptr = 1; return TRUE; /* PS */ |
121 | #endif /* COMPILE_PCRE8 */ |
122 | #endif /* Not EBCDIC */ |
123 | |
124 | default: return FALSE; |
125 | } |
126 | } |
127 | |
128 | |
129 | |
130 | /************************************************* |
131 | * Check for newline at previous position * |
132 | *************************************************/ |
133 | |
134 | /* It is guaranteed that the initial value of ptr is greater than the start of |
135 | the string that is being processed. |
136 | |
137 | Arguments: |
138 | ptr pointer to possible newline |
139 | type the newline type |
140 | startptr pointer to the start of the string |
141 | lenptr where to return the length |
142 | utf TRUE if in utf mode |
143 | |
144 | Returns: TRUE or FALSE |
145 | */ |
146 | |
147 | BOOL |
148 | PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr, |
149 | BOOL utf) |
150 | { |
151 | pcre_uint32 c; |
152 | (void)utf; |
153 | ptr--; |
154 | #ifdef SUPPORT_UTF |
155 | if (utf) |
156 | { |
157 | BACKCHAR(ptr); |
158 | GETCHAR(c, ptr); |
159 | } |
160 | else |
161 | #endif /* SUPPORT_UTF */ |
162 | c = *ptr; |
163 | |
164 | /* Note that this function is called only for ANY or ANYCRLF. */ |
165 | |
166 | if (type == NLTYPE_ANYCRLF) switch(c) |
167 | { |
168 | case CHAR_LF: |
169 | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
170 | return TRUE; |
171 | |
172 | case CHAR_CR: *lenptr = 1; return TRUE; |
173 | default: return FALSE; |
174 | } |
175 | |
176 | /* NLTYPE_ANY */ |
177 | |
178 | else switch(c) |
179 | { |
180 | case CHAR_LF: |
181 | *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; |
182 | return TRUE; |
183 | |
184 | #ifdef EBCDIC |
185 | case CHAR_NEL: |
186 | #endif |
187 | case CHAR_VT: |
188 | case CHAR_FF: |
189 | case CHAR_CR: *lenptr = 1; return TRUE; |
190 | |
191 | #ifndef EBCDIC |
192 | #ifdef COMPILE_PCRE8 |
193 | case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE; |
194 | case 0x2028: /* LS */ |
195 | case 0x2029: *lenptr = 3; return TRUE; /* PS */ |
196 | #else /* COMPILE_PCRE16 || COMPILE_PCRE32 */ |
197 | case CHAR_NEL: |
198 | case 0x2028: /* LS */ |
199 | case 0x2029: *lenptr = 1; return TRUE; /* PS */ |
200 | #endif /* COMPILE_PCRE8 */ |
201 | #endif /* NotEBCDIC */ |
202 | |
203 | default: return FALSE; |
204 | } |
205 | } |
206 | |
207 | /* End of pcre_newline.c */ |
208 | |