1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * pg_bitutils.c |
4 | * Miscellaneous functions for bit-wise operations. |
5 | * |
6 | * Copyright (c) 2019, PostgreSQL Global Development Group |
7 | * |
8 | * IDENTIFICATION |
9 | * src/port/pg_bitutils.c |
10 | * |
11 | *------------------------------------------------------------------------- |
12 | */ |
13 | #include "c.h" |
14 | |
15 | #ifdef HAVE__GET_CPUID |
16 | #include <cpuid.h> |
17 | #endif |
18 | #ifdef HAVE__CPUID |
19 | #include <intrin.h> |
20 | #endif |
21 | |
22 | #include "port/pg_bitutils.h" |
23 | |
24 | |
25 | /* |
26 | * Array giving the position of the left-most set bit for each possible |
27 | * byte value. We count the right-most position as the 0th bit, and the |
28 | * left-most the 7th bit. The 0th entry of the array should not be used. |
29 | * |
30 | * Note: this is not used by the functions in pg_bitutils.h when |
31 | * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that |
32 | * extensions possibly compiled with a different compiler can use it. |
33 | */ |
34 | const uint8 pg_leftmost_one_pos[256] = { |
35 | 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, |
36 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
37 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
38 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
39 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
40 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
41 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
42 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
43 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
44 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
45 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
46 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
47 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
48 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
49 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
50 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 |
51 | }; |
52 | |
53 | /* |
54 | * Array giving the position of the right-most set bit for each possible |
55 | * byte value. We count the right-most position as the 0th bit, and the |
56 | * left-most the 7th bit. The 0th entry of the array should not be used. |
57 | * |
58 | * Note: this is not used by the functions in pg_bitutils.h when |
59 | * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that |
60 | * extensions possibly compiled with a different compiler can use it. |
61 | */ |
62 | const uint8 pg_rightmost_one_pos[256] = { |
63 | 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
64 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
65 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
66 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
67 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
68 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
69 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
70 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
71 | 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
72 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
73 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
74 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
75 | 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
76 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
77 | 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, |
78 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 |
79 | }; |
80 | |
81 | /* |
82 | * Array giving the number of 1-bits in each possible byte value. |
83 | * |
84 | * Note: we export this for use by functions in which explicit use |
85 | * of the popcount functions seems unlikely to be a win. |
86 | */ |
87 | const uint8 pg_number_of_ones[256] = { |
88 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, |
89 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
90 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
91 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
92 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
93 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
94 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
95 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
96 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
97 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
98 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
99 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
100 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
101 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
102 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
103 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 |
104 | }; |
105 | |
106 | /* |
107 | * On x86_64, we can use the hardware popcount instruction, but only if |
108 | * we can verify that the CPU supports it via the cpuid instruction. |
109 | * |
110 | * Otherwise, we fall back to __builtin_popcount if the compiler has that, |
111 | * or a hand-rolled implementation if not. |
112 | */ |
113 | #ifdef HAVE_X86_64_POPCNTQ |
114 | #if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) |
115 | #define USE_POPCNT_ASM 1 |
116 | #endif |
117 | #endif |
118 | |
119 | static int pg_popcount32_slow(uint32 word); |
120 | static int pg_popcount64_slow(uint64 word); |
121 | |
122 | #ifdef USE_POPCNT_ASM |
123 | static bool pg_popcount_available(void); |
124 | static int pg_popcount32_choose(uint32 word); |
125 | static int pg_popcount64_choose(uint64 word); |
126 | static int pg_popcount32_asm(uint32 word); |
127 | static int pg_popcount64_asm(uint64 word); |
128 | |
129 | int (*pg_popcount32) (uint32 word) = pg_popcount32_choose; |
130 | int (*pg_popcount64) (uint64 word) = pg_popcount64_choose; |
131 | #else |
132 | int (*pg_popcount32) (uint32 word) = pg_popcount32_slow; |
133 | int (*pg_popcount64) (uint64 word) = pg_popcount64_slow; |
134 | #endif /* USE_POPCNT_ASM */ |
135 | |
136 | #ifdef USE_POPCNT_ASM |
137 | |
138 | /* |
139 | * Return true if CPUID indicates that the POPCNT instruction is available. |
140 | */ |
141 | static bool |
142 | pg_popcount_available(void) |
143 | { |
144 | unsigned int exx[4] = {0, 0, 0, 0}; |
145 | |
146 | #if defined(HAVE__GET_CPUID) |
147 | __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); |
148 | #elif defined(HAVE__CPUID) |
149 | __cpuid(exx, 1); |
150 | #else |
151 | #error cpuid instruction not available |
152 | #endif |
153 | |
154 | return (exx[2] & (1 << 23)) != 0; /* POPCNT */ |
155 | } |
156 | |
157 | /* |
158 | * These functions get called on the first call to pg_popcount32 etc. |
159 | * They detect whether we can use the asm implementations, and replace |
160 | * the function pointers so that subsequent calls are routed directly to |
161 | * the chosen implementation. |
162 | */ |
163 | static int |
164 | pg_popcount32_choose(uint32 word) |
165 | { |
166 | if (pg_popcount_available()) |
167 | { |
168 | pg_popcount32 = pg_popcount32_asm; |
169 | pg_popcount64 = pg_popcount64_asm; |
170 | } |
171 | else |
172 | { |
173 | pg_popcount32 = pg_popcount32_slow; |
174 | pg_popcount64 = pg_popcount64_slow; |
175 | } |
176 | |
177 | return pg_popcount32(word); |
178 | } |
179 | |
180 | static int |
181 | pg_popcount64_choose(uint64 word) |
182 | { |
183 | if (pg_popcount_available()) |
184 | { |
185 | pg_popcount32 = pg_popcount32_asm; |
186 | pg_popcount64 = pg_popcount64_asm; |
187 | } |
188 | else |
189 | { |
190 | pg_popcount32 = pg_popcount32_slow; |
191 | pg_popcount64 = pg_popcount64_slow; |
192 | } |
193 | |
194 | return pg_popcount64(word); |
195 | } |
196 | |
197 | /* |
198 | * pg_popcount32_asm |
199 | * Return the number of 1 bits set in word |
200 | */ |
201 | static int |
202 | pg_popcount32_asm(uint32 word) |
203 | { |
204 | uint32 res; |
205 | |
206 | __asm__ __volatile__(" popcntl %1,%0\n" :"=q" (res):"rm" (word):"cc" ); |
207 | return (int) res; |
208 | } |
209 | |
210 | /* |
211 | * pg_popcount64_asm |
212 | * Return the number of 1 bits set in word |
213 | */ |
214 | static int |
215 | pg_popcount64_asm(uint64 word) |
216 | { |
217 | uint64 res; |
218 | |
219 | __asm__ __volatile__(" popcntq %1,%0\n" :"=q" (res):"rm" (word):"cc" ); |
220 | return (int) res; |
221 | } |
222 | |
223 | #endif /* USE_POPCNT_ASM */ |
224 | |
225 | |
226 | /* |
227 | * pg_popcount32_slow |
228 | * Return the number of 1 bits set in word |
229 | */ |
230 | static int |
231 | pg_popcount32_slow(uint32 word) |
232 | { |
233 | #ifdef HAVE__BUILTIN_POPCOUNT |
234 | return __builtin_popcount(word); |
235 | #else /* !HAVE__BUILTIN_POPCOUNT */ |
236 | int result = 0; |
237 | |
238 | while (word != 0) |
239 | { |
240 | result += pg_number_of_ones[word & 255]; |
241 | word >>= 8; |
242 | } |
243 | |
244 | return result; |
245 | #endif /* HAVE__BUILTIN_POPCOUNT */ |
246 | } |
247 | |
248 | /* |
249 | * pg_popcount64_slow |
250 | * Return the number of 1 bits set in word |
251 | */ |
252 | static int |
253 | pg_popcount64_slow(uint64 word) |
254 | { |
255 | #ifdef HAVE__BUILTIN_POPCOUNT |
256 | #if defined(HAVE_LONG_INT_64) |
257 | return __builtin_popcountl(word); |
258 | #elif defined(HAVE_LONG_LONG_INT_64) |
259 | return __builtin_popcountll(word); |
260 | #else |
261 | #error must have a working 64-bit integer datatype |
262 | #endif |
263 | #else /* !HAVE__BUILTIN_POPCOUNT */ |
264 | int result = 0; |
265 | |
266 | while (word != 0) |
267 | { |
268 | result += pg_number_of_ones[word & 255]; |
269 | word >>= 8; |
270 | } |
271 | |
272 | return result; |
273 | #endif /* HAVE__BUILTIN_POPCOUNT */ |
274 | } |
275 | |
276 | |
277 | /* |
278 | * pg_popcount |
279 | * Returns the number of 1-bits in buf |
280 | */ |
281 | uint64 |
282 | pg_popcount(const char *buf, int bytes) |
283 | { |
284 | uint64 popcnt = 0; |
285 | |
286 | #if SIZEOF_VOID_P >= 8 |
287 | /* Process in 64-bit chunks if the buffer is aligned. */ |
288 | if (buf == (const char *) TYPEALIGN(8, buf)) |
289 | { |
290 | const uint64 *words = (const uint64 *) buf; |
291 | |
292 | while (bytes >= 8) |
293 | { |
294 | popcnt += pg_popcount64(*words++); |
295 | bytes -= 8; |
296 | } |
297 | |
298 | buf = (const char *) words; |
299 | } |
300 | #else |
301 | /* Process in 32-bit chunks if the buffer is aligned. */ |
302 | if (buf == (const char *) TYPEALIGN(4, buf)) |
303 | { |
304 | const uint32 *words = (const uint32 *) buf; |
305 | |
306 | while (bytes >= 4) |
307 | { |
308 | popcnt += pg_popcount32(*words++); |
309 | bytes -= 4; |
310 | } |
311 | |
312 | buf = (const char *) words; |
313 | } |
314 | #endif |
315 | |
316 | /* Process any remaining bytes */ |
317 | while (bytes--) |
318 | popcnt += pg_number_of_ones[(unsigned char) *buf++]; |
319 | |
320 | return popcnt; |
321 | } |
322 | |