1 | // |
2 | // Copyright 2017 The Abseil Authors. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // https://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | // |
16 | |
17 | // These routines provide mem versions of standard C string routines, |
18 | // such as strpbrk. They function exactly the same as the str versions, |
19 | // so if you wonder what they are, replace the word "mem" by |
20 | // "str" and check out the man page. I could return void*, as the |
21 | // strutil.h mem*() routines tend to do, but I return char* instead |
22 | // since this is by far the most common way these functions are called. |
23 | // |
24 | // The difference between the mem and str versions is the mem version |
25 | // takes a pointer and a length, rather than a '\0'-terminated string. |
26 | // The memcase* routines defined here assume the locale is "C" |
27 | // (they use absl::ascii_tolower instead of tolower). |
28 | // |
29 | // These routines are based on the BSD library. |
30 | // |
31 | // Here's a list of routines from string.h, and their mem analogues. |
32 | // Functions in lowercase are defined in string.h; those in UPPERCASE |
33 | // are defined here: |
34 | // |
35 | // strlen -- |
36 | // strcat strncat MEMCAT |
37 | // strcpy strncpy memcpy |
38 | // -- memccpy (very cool function, btw) |
39 | // -- memmove |
40 | // -- memset |
41 | // strcmp strncmp memcmp |
42 | // strcasecmp strncasecmp MEMCASECMP |
43 | // strchr memchr |
44 | // strcoll -- |
45 | // strxfrm -- |
46 | // strdup strndup MEMDUP |
47 | // strrchr MEMRCHR |
48 | // strspn MEMSPN |
49 | // strcspn MEMCSPN |
50 | // strpbrk MEMPBRK |
51 | // strstr MEMSTR MEMMEM |
52 | // (g)strcasestr MEMCASESTR MEMCASEMEM |
53 | // strtok -- |
54 | // strprefix MEMPREFIX (strprefix is from strutil.h) |
55 | // strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) |
56 | // strsuffix MEMSUFFIX (strsuffix is from strutil.h) |
57 | // strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) |
58 | // -- MEMIS |
59 | // -- MEMCASEIS |
60 | // strcount MEMCOUNT (strcount is from strutil.h) |
61 | |
62 | #ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ |
63 | #define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ |
64 | |
65 | #include <cstddef> |
66 | #include <cstring> |
67 | |
68 | #include "absl/base/port.h" // disable some warnings on Windows |
69 | #include "absl/strings/ascii.h" // for absl::ascii_tolower |
70 | |
71 | namespace absl { |
72 | namespace strings_internal { |
73 | |
74 | inline char* memcat(char* dest, size_t destlen, const char* src, |
75 | size_t srclen) { |
76 | return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); |
77 | } |
78 | |
79 | int memcasecmp(const char* s1, const char* s2, size_t len); |
80 | char* memdup(const char* s, size_t slen); |
81 | char* memrchr(const char* s, int c, size_t slen); |
82 | size_t memspn(const char* s, size_t slen, const char* accept); |
83 | size_t memcspn(const char* s, size_t slen, const char* reject); |
84 | char* mempbrk(const char* s, size_t slen, const char* accept); |
85 | |
86 | // This is for internal use only. Don't call this directly |
87 | template <bool case_sensitive> |
88 | const char* int_memmatch(const char* haystack, size_t haylen, |
89 | const char* needle, size_t neelen) { |
90 | if (0 == neelen) { |
91 | return haystack; // even if haylen is 0 |
92 | } |
93 | const char* hayend = haystack + haylen; |
94 | const char* needlestart = needle; |
95 | const char* needleend = needlestart + neelen; |
96 | |
97 | for (; haystack < hayend; ++haystack) { |
98 | char hay = case_sensitive |
99 | ? *haystack |
100 | : absl::ascii_tolower(static_cast<unsigned char>(*haystack)); |
101 | char nee = case_sensitive |
102 | ? *needle |
103 | : absl::ascii_tolower(static_cast<unsigned char>(*needle)); |
104 | if (hay == nee) { |
105 | if (++needle == needleend) { |
106 | return haystack + 1 - neelen; |
107 | } |
108 | } else if (needle != needlestart) { |
109 | // must back up haystack in case a prefix matched (find "aab" in "aaab") |
110 | haystack -= needle - needlestart; // for loop will advance one more |
111 | needle = needlestart; |
112 | } |
113 | } |
114 | return nullptr; |
115 | } |
116 | |
117 | // These are the guys you can call directly |
118 | inline const char* memstr(const char* phaystack, size_t haylen, |
119 | const char* pneedle) { |
120 | return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); |
121 | } |
122 | |
123 | inline const char* memcasestr(const char* phaystack, size_t haylen, |
124 | const char* pneedle) { |
125 | return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); |
126 | } |
127 | |
128 | inline const char* memmem(const char* phaystack, size_t haylen, |
129 | const char* pneedle, size_t needlelen) { |
130 | return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); |
131 | } |
132 | |
133 | inline const char* memcasemem(const char* phaystack, size_t haylen, |
134 | const char* pneedle, size_t needlelen) { |
135 | return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); |
136 | } |
137 | |
138 | // This is significantly faster for case-sensitive matches with very |
139 | // few possible matches. See unit test for benchmarks. |
140 | const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, |
141 | size_t neelen); |
142 | |
143 | } // namespace strings_internal |
144 | } // namespace absl |
145 | |
146 | #endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ |
147 | |