compare.h source code [ClickHouse/contrib/hyperscan/src/util/compare.h]

1	/*
2	* Copyright (c) 2015-2016, Intel Corporation
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions are met:
6	*
7	* * Redistributions of source code must retain the above copyright notice,
8	* this list of conditions and the following disclaimer.
9	* * Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	* * Neither the name of Intel Corporation nor the names of its contributors
13	* may be used to endorse or promote products derived from this software
14	* without specific prior written permission.
15	*
16	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26	* POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#ifndef COMPARE_H
30	#define COMPARE_H
31
32	#include "unaligned.h"
33	#include "ue2common.h"
34
35	/ Our own definitions of tolower, toupper and isalpha are provided to prevent*
36	* us from going out to libc for these tests. */
37
38	static really_inline
39	char myisupper(const char c) {
40	return ((c >= `'A'`) && (c <= `'Z'`));
41	}
42
43	static really_inline
44	char myislower(const char c) {
45	return ((c >= `'a'`) && (c <= `'z'`));
46	}
47
48	static really_inline
49	char mytolower(const char c) {
50	if (myisupper(c)) {
51	return c + `0x20`;
52	}
53	return c;
54	}
55
56	static really_inline
57	char mytoupper(const char c) {
58	if (myislower(c)) {
59	return c - `0x20`;
60	}
61	return c;
62	}
63
64	/ this is a slightly warped definition of `alpha'. What we really*
65	* mean is: does this character have different uppercase and lowercase forms?
66	*/
67	static really_inline char ourisalpha(const char c) {
68	return mytolower(c) != mytoupper(c);
69	}
70
71	static really_inline char ourisprint(const char c) {
72	return c >= `0x20` && c <= `0x7e`;
73	}
74
75	// Paul Hsieh's SWAR toupper; used because it doesn't
76	// matter whether we go toupper or tolower. We should
77	// probably change the other one
78	static really_inline
79	u32 theirtoupper32(const u32 x) {
80	u32 b = `0x80808080ul` \| x;
81	u32 c = b - `0x61616161ul`;
82	u32 d = ~(b - `0x7b7b7b7bul`);
83	u32 e = (c & d) & (~x & `0x80808080ul`);
84	return x - (e >> `2`);
85	}
86
87	// 64-bit variant.
88	static really_inline
89	u64a theirtoupper64(const u64a x) {
90	u64a b = `0x8080808080808080ull` \| x;
91	u64a c = b - `0x6161616161616161ull`;
92	u64a d = ~(b - `0x7b7b7b7b7b7b7b7bull`);
93	u64a e = (c & d) & (~x & `0x8080808080808080ull`);
94	u64a v = x - (e >> `2`);
95	return v;
96	}
97
98	static really_inline
99	int cmpNocaseNaive(const u8 p1, const* u8 *p2, size_t len) {
100	const u8 *pEnd = p1 + len;
101	for (; p1 < pEnd; p1++, p2++) {
102	assert(!ourisalpha(p2) \|\| myisupper(p2)); // Already upper-case.
103	if ((u8)mytoupper(p1) != p2) {
104	return `1`;
105	}
106	}
107	return `0`;
108	}
109
110	static really_inline
111	int cmpCaseNaive(const u8 p1, const* u8 *p2, size_t len) {
112	const u8 *pEnd = p1 + len;
113	for (; p1 < pEnd; p1++, p2++) {
114	if (p1 != p2) {
115	return `1`;
116	}
117	}
118	return `0`;
119	}
120
121	#ifdef ARCH_64_BIT
122	# define CMP_T u64a
123	# define ULOAD(x) unaligned_load_u64a(x)
124	# define TOUPPER(x) theirtoupper64(x)
125	#else
126	# define CMP_T u32
127	# define ULOAD(x) unaligned_load_u32(x)
128	# define TOUPPER(x) theirtoupper32(x)
129	#endif
130
131	#define CMP_SIZE sizeof(CMP_T)
132
133	/**
134	* \brief Compare two strings, optionally caselessly.
135	*
136	* Note: If nocase is true, p2 is assumed to be already upper-case.
137	*/
138	#if defined(ARCH_IA32)
139	static UNUSED never_inline
140	#else
141	static really_inline
142	#endif
143	int cmpForward(const u8 p1, const* u8 p2, size_t len, char* nocase) {
144	if (len < CMP_SIZE) {
145	return nocase ? cmpNocaseNaive(p1, p2, len)
146	: cmpCaseNaive(p1, p2, len);
147	}
148
149	const u8 *p1_end = p1 + len - CMP_SIZE;
150	const u8 *p2_end = p2 + len - CMP_SIZE;
151
152	if (nocase) { // Case-insensitive version.
153	for (; p1 < p1_end; p1 += CMP_SIZE, p2 += CMP_SIZE) {
154	assert(ULOAD(p2) == TOUPPER(ULOAD(p2))); // Already upper-case.
155	if (TOUPPER(ULOAD(p1)) != ULOAD(p2)) {
156	return `1`;
157	}
158	}
159	assert(ULOAD(p2_end) == TOUPPER(ULOAD(p2_end))); // Already upper-case.
160	if (TOUPPER(ULOAD(p1_end)) != ULOAD(p2_end)) {
161	return `1`;
162	}
163	} else { // Case-sensitive version.
164	for (; p1 < p1_end; p1 += CMP_SIZE, p2 += CMP_SIZE) {
165	if (ULOAD(p1) != ULOAD(p2)) {
166	return `1`;
167	}
168	}
169	if (ULOAD(p1_end) != ULOAD(p2_end)) {
170	return `1`;
171	}
172	}
173
174	return `0`;
175	}
176
177	#undef CMP_T
178	#undef ULOAD
179	#undef TOUPPER
180	#undef CMP_SIZE
181
182	#endif
183
184

Browse the source code of ClickHouse/contrib/hyperscan/src/util/compare.h