1#ifndef SIMDJSON_ARM64_BITMASK_H
2#define SIMDJSON_ARM64_BITMASK_H
3
4namespace simdjson {
5namespace SIMDJSON_IMPLEMENTATION {
6namespace {
7
8//
9// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered.
10//
11// For example, prefix_xor(00100100) == 00011100
12//
13simdjson_inline uint64_t prefix_xor(uint64_t bitmask) {
14 /////////////
15 // We could do this with PMULL, but it is apparently slow.
16 //
17 //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
18 //return vmull_p64(-1ULL, bitmask);
19 //#else
20 // Analysis by @sebpop:
21 // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out
22 // in between other vector code, so effectively the extra cycles of the sequence do not matter
23 // because the GPR units are idle otherwise and the critical path is on the FP side.
24 // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 )
25 // and FP->GPR (2 cycles on N1 and 5 cycles on A72.)
26 ///////////
27 bitmask ^= bitmask << 1;
28 bitmask ^= bitmask << 2;
29 bitmask ^= bitmask << 4;
30 bitmask ^= bitmask << 8;
31 bitmask ^= bitmask << 16;
32 bitmask ^= bitmask << 32;
33 return bitmask;
34}
35
36} // unnamed namespace
37} // namespace arm64
38} // namespace simdjson
39
40#endif
41