bitmask.h source code [Velox/build/_deps/simdjson-src/include/simdjson/arm64/bitmask.h]

1	#ifndef SIMDJSON_ARM64_BITMASK_H
2	#define SIMDJSON_ARM64_BITMASK_H
3
4	namespace simdjson {
5	namespace SIMDJSON_IMPLEMENTATION {
6	namespace {
7
8	//
9	// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
10	//
11	// For example, prefix_xor(00100100) == 00011100
12	//
13	simdjson_inline uint64_t prefix_xor(uint64_t bitmask) {
14	/////////////
15	// We could do this with PMULL, but it is apparently slow.
16	//
17	//#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
18	//return vmull_p64(-1ULL, bitmask);
19	//#else
20	// Analysis by @sebpop:
21	// When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out
22	// in between other vector code, so effectively the extra cycles of the sequence do not matter
23	// because the GPR units are idle otherwise and the critical path is on the FP side.
24	// Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
25	// and FP->GPR (2 cycles on N1 and 5 cycles on A72.)
26	///////////
27	bitmask ^= bitmask << `1`;
28	bitmask ^= bitmask << `2`;
29	bitmask ^= bitmask << `4`;
30	bitmask ^= bitmask << `8`;
31	bitmask ^= bitmask << `16`;
32	bitmask ^= bitmask << `32`;
33	return bitmask;
34	}
35
36	} // unnamed namespace
37	} // namespace arm64
38	} // namespace simdjson
39
40	#endif
41

Browse the source code of Velox/build/_deps/simdjson-src/include/simdjson/arm64/bitmask.h