1/*-------------------------------------------------------------------------
2 *
3 * pg_bitutils.c
4 * Miscellaneous functions for bit-wise operations.
5 *
6 * Copyright (c) 2019, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/port/pg_bitutils.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "c.h"
14
15#ifdef HAVE__GET_CPUID
16#include <cpuid.h>
17#endif
18#ifdef HAVE__CPUID
19#include <intrin.h>
20#endif
21
22#include "port/pg_bitutils.h"
23
24
25/*
26 * Array giving the position of the left-most set bit for each possible
27 * byte value. We count the right-most position as the 0th bit, and the
28 * left-most the 7th bit. The 0th entry of the array should not be used.
29 *
30 * Note: this is not used by the functions in pg_bitutils.h when
31 * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 * extensions possibly compiled with a different compiler can use it.
33 */
34const uint8 pg_leftmost_one_pos[256] = {
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
51};
52
53/*
54 * Array giving the position of the right-most set bit for each possible
55 * byte value. We count the right-most position as the 0th bit, and the
56 * left-most the 7th bit. The 0th entry of the array should not be used.
57 *
58 * Note: this is not used by the functions in pg_bitutils.h when
59 * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 * extensions possibly compiled with a different compiler can use it.
61 */
62const uint8 pg_rightmost_one_pos[256] = {
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
79};
80
81/*
82 * Array giving the number of 1-bits in each possible byte value.
83 *
84 * Note: we export this for use by functions in which explicit use
85 * of the popcount functions seems unlikely to be a win.
86 */
87const uint8 pg_number_of_ones[256] = {
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104};
105
106/*
107 * On x86_64, we can use the hardware popcount instruction, but only if
108 * we can verify that the CPU supports it via the cpuid instruction.
109 *
110 * Otherwise, we fall back to __builtin_popcount if the compiler has that,
111 * or a hand-rolled implementation if not.
112 */
113#ifdef HAVE_X86_64_POPCNTQ
114#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
115#define USE_POPCNT_ASM 1
116#endif
117#endif
118
119static int pg_popcount32_slow(uint32 word);
120static int pg_popcount64_slow(uint64 word);
121
122#ifdef USE_POPCNT_ASM
123static bool pg_popcount_available(void);
124static int pg_popcount32_choose(uint32 word);
125static int pg_popcount64_choose(uint64 word);
126static int pg_popcount32_asm(uint32 word);
127static int pg_popcount64_asm(uint64 word);
128
129int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
130int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
131#else
132int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
133int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
134#endif /* USE_POPCNT_ASM */
135
136#ifdef USE_POPCNT_ASM
137
138/*
139 * Return true if CPUID indicates that the POPCNT instruction is available.
140 */
141static bool
142pg_popcount_available(void)
143{
144 unsigned int exx[4] = {0, 0, 0, 0};
145
146#if defined(HAVE__GET_CPUID)
147 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
148#elif defined(HAVE__CPUID)
149 __cpuid(exx, 1);
150#else
151#error cpuid instruction not available
152#endif
153
154 return (exx[2] & (1 << 23)) != 0; /* POPCNT */
155}
156
157/*
158 * These functions get called on the first call to pg_popcount32 etc.
159 * They detect whether we can use the asm implementations, and replace
160 * the function pointers so that subsequent calls are routed directly to
161 * the chosen implementation.
162 */
163static int
164pg_popcount32_choose(uint32 word)
165{
166 if (pg_popcount_available())
167 {
168 pg_popcount32 = pg_popcount32_asm;
169 pg_popcount64 = pg_popcount64_asm;
170 }
171 else
172 {
173 pg_popcount32 = pg_popcount32_slow;
174 pg_popcount64 = pg_popcount64_slow;
175 }
176
177 return pg_popcount32(word);
178}
179
180static int
181pg_popcount64_choose(uint64 word)
182{
183 if (pg_popcount_available())
184 {
185 pg_popcount32 = pg_popcount32_asm;
186 pg_popcount64 = pg_popcount64_asm;
187 }
188 else
189 {
190 pg_popcount32 = pg_popcount32_slow;
191 pg_popcount64 = pg_popcount64_slow;
192 }
193
194 return pg_popcount64(word);
195}
196
197/*
198 * pg_popcount32_asm
199 * Return the number of 1 bits set in word
200 */
201static int
202pg_popcount32_asm(uint32 word)
203{
204 uint32 res;
205
206__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
207 return (int) res;
208}
209
210/*
211 * pg_popcount64_asm
212 * Return the number of 1 bits set in word
213 */
214static int
215pg_popcount64_asm(uint64 word)
216{
217 uint64 res;
218
219__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
220 return (int) res;
221}
222
223#endif /* USE_POPCNT_ASM */
224
225
226/*
227 * pg_popcount32_slow
228 * Return the number of 1 bits set in word
229 */
230static int
231pg_popcount32_slow(uint32 word)
232{
233#ifdef HAVE__BUILTIN_POPCOUNT
234 return __builtin_popcount(word);
235#else /* !HAVE__BUILTIN_POPCOUNT */
236 int result = 0;
237
238 while (word != 0)
239 {
240 result += pg_number_of_ones[word & 255];
241 word >>= 8;
242 }
243
244 return result;
245#endif /* HAVE__BUILTIN_POPCOUNT */
246}
247
248/*
249 * pg_popcount64_slow
250 * Return the number of 1 bits set in word
251 */
252static int
253pg_popcount64_slow(uint64 word)
254{
255#ifdef HAVE__BUILTIN_POPCOUNT
256#if defined(HAVE_LONG_INT_64)
257 return __builtin_popcountl(word);
258#elif defined(HAVE_LONG_LONG_INT_64)
259 return __builtin_popcountll(word);
260#else
261#error must have a working 64-bit integer datatype
262#endif
263#else /* !HAVE__BUILTIN_POPCOUNT */
264 int result = 0;
265
266 while (word != 0)
267 {
268 result += pg_number_of_ones[word & 255];
269 word >>= 8;
270 }
271
272 return result;
273#endif /* HAVE__BUILTIN_POPCOUNT */
274}
275
276
277/*
278 * pg_popcount
279 * Returns the number of 1-bits in buf
280 */
281uint64
282pg_popcount(const char *buf, int bytes)
283{
284 uint64 popcnt = 0;
285
286#if SIZEOF_VOID_P >= 8
287 /* Process in 64-bit chunks if the buffer is aligned. */
288 if (buf == (const char *) TYPEALIGN(8, buf))
289 {
290 const uint64 *words = (const uint64 *) buf;
291
292 while (bytes >= 8)
293 {
294 popcnt += pg_popcount64(*words++);
295 bytes -= 8;
296 }
297
298 buf = (const char *) words;
299 }
300#else
301 /* Process in 32-bit chunks if the buffer is aligned. */
302 if (buf == (const char *) TYPEALIGN(4, buf))
303 {
304 const uint32 *words = (const uint32 *) buf;
305
306 while (bytes >= 4)
307 {
308 popcnt += pg_popcount32(*words++);
309 bytes -= 4;
310 }
311
312 buf = (const char *) words;
313 }
314#endif
315
316 /* Process any remaining bytes */
317 while (bytes--)
318 popcnt += pg_number_of_ones[(unsigned char) *buf++];
319
320 return popcnt;
321}
322