SkChecksum_opts.h source code [Skia/src/opts/SkChecksum_opts.h]

1	/*
2	* Copyright 2016 Google Inc.
3	*
4	* Use of this source code is governed by a BSD-style license that can be
5	* found in the LICENSE file.
6	*/
7
8	#ifndef SkChecksum_opts_DEFINED
9	#define SkChecksum_opts_DEFINED
10
11	#include "include/core/SkTypes.h"
12	#include "include/private/SkChecksum.h"
13	#include "src/core/SkUtils.h" // sk_unaligned_load
14
15	#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
16	#include <immintrin.h>
17	#elif defined(SK_ARM_HAS_CRC32)
18	#include <arm_acle.h>
19	#endif
20
21	namespace SK_OPTS_NS {
22
23	#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) \|\| defined(_M_X64))
24	// This is not a CRC32. It's Just A Hash that uses those instructions because they're fast.
25	/not static/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) {
26	auto data = (const uint8_t*)vdata;
27
28	// _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while.
29	uint64_t hash = seed;
30	if (bytes >= `24`) {
31	// We'll create 3 independent hashes, each using _mm_crc32_u64()
32	// to hash 8 bytes per step. Both 3 and independent are important:
33	// we can execute 3 of these instructions in parallel on a single core.
34	uint64_t a = hash,
35	b = hash,
36	c = hash;
37	size_t steps = bytes/`24`;
38	while (steps --> `0`) {
39	a = _mm_crc32_u64(a, sk_unaligned_load<uint64_t>(data+ `0`));
40	b = _mm_crc32_u64(b, sk_unaligned_load<uint64_t>(data+ `8`));
41	c = _mm_crc32_u64(c, sk_unaligned_load<uint64_t>(data+`16`));
42	data += `24`;
43	}
44	bytes %= `24`;
45	hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
46	}
47
48	SkASSERT(bytes < `24`);
49	if (bytes >= `16`) {
50	hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
51	bytes -= `8`;
52	data += `8`;
53	}
54
55	SkASSERT(bytes < `16`);
56	if (bytes & `8`) {
57	hash = _mm_crc32_u64(hash, sk_unaligned_load<uint64_t>(data));
58	data += `8`;
59	}
60
61	// The remainder of these _mm_crc32_u() operate on a 32-bit register.*
62	// We don't lose anything here: only the bottom 32-bits were populated.
63	auto hash32 = (uint32_t)hash;
64
65	if (bytes & `4`) {
66	hash32 = _mm_crc32_u32(hash32, sk_unaligned_load<uint32_t>(data));
67	data += `4`;
68	}
69	if (bytes & `2`) {
70	hash32 = _mm_crc32_u16(hash32, sk_unaligned_load<uint16_t>(data));
71	data += `2`;
72	}
73	if (bytes & `1`) {
74	hash32 = _mm_crc32_u8(hash32, sk_unaligned_load<uint8_t>(data));
75	}
76	return hash32;
77	}
78
79	#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
80	// 32-bit version of above, using _mm_crc32_u32() but not _mm_crc32_u64().
81	/not static/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
82	auto data = (const uint8_t*)vdata;
83
84	if (bytes >= `12`) {
85	// We'll create 3 independent hashes, each using _mm_crc32_u32()
86	// to hash 4 bytes per step. Both 3 and independent are important:
87	// we can execute 3 of these instructions in parallel on a single core.
88	uint32_t a = hash,
89	b = hash,
90	c = hash;
91	size_t steps = bytes/`12`;
92	while (steps --> `0`) {
93	a = _mm_crc32_u32(a, sk_unaligned_load<uint32_t>(data+`0`));
94	b = _mm_crc32_u32(b, sk_unaligned_load<uint32_t>(data+`4`));
95	c = _mm_crc32_u32(c, sk_unaligned_load<uint32_t>(data+`8`));
96	data += `12`;
97	}
98	bytes %= `12`;
99	hash = _mm_crc32_u32(a, _mm_crc32_u32(b, c));
100	}
101
102	SkASSERT(bytes < `12`);
103	if (bytes >= `8`) {
104	hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data));
105	bytes -= `4`;
106	data += `4`;
107	}
108
109	SkASSERT(bytes < `8`);
110	if (bytes & `4`) {
111	hash = _mm_crc32_u32(hash, sk_unaligned_load<uint32_t>(data));
112	data += `4`;
113	}
114	if (bytes & `2`) {
115	hash = _mm_crc32_u16(hash, sk_unaligned_load<uint16_t>(data));
116	data += `2`;
117	}
118	if (bytes & `1`) {
119	hash = _mm_crc32_u8(hash, sk_unaligned_load<uint8_t>(data));
120	}
121	return hash;
122	}
123
124	#elif defined(SK_ARM_HAS_CRC32)
125	/not static/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
126	auto data = (const uint8_t*)vdata;
127	if (bytes >= `24`) {
128	uint32_t a = hash,
129	b = hash,
130	c = hash;
131	size_t steps = bytes/`24`;
132	while (steps --> `0`) {
133	a = __crc32d(a, sk_unaligned_load<uint64_t>(data+ `0`));
134	b = __crc32d(b, sk_unaligned_load<uint64_t>(data+ `8`));
135	c = __crc32d(c, sk_unaligned_load<uint64_t>(data+`16`));
136	data += `24`;
137	}
138	bytes %= `24`;
139	hash = __crc32w(a, __crc32w(b, c));
140	}
141
142	SkASSERT(bytes < `24`);
143	if (bytes >= `16`) {
144	hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data));
145	bytes -= `8`;
146	data += `8`;
147	}
148
149	SkASSERT(bytes < `16`);
150	if (bytes & `8`) {
151	hash = __crc32d(hash, sk_unaligned_load<uint64_t>(data));
152	data += `8`;
153	}
154	if (bytes & `4`) {
155	hash = __crc32w(hash, sk_unaligned_load<uint32_t>(data));
156	data += `4`;
157	}
158	if (bytes & `2`) {
159	hash = __crc32h(hash, sk_unaligned_load<uint16_t>(data));
160	data += `2`;
161	}
162	if (bytes & `1`) {
163	hash = __crc32b(hash, sk_unaligned_load<uint8_t>(data));
164	}
165	return hash;
166	}
167
168	#else
169	// This is Murmur3.
170	/not static/ inline uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
171	auto data = (const uint8_t*)vdata;
172
173	size_t original_bytes = bytes;
174
175	// Handle 4 bytes at a time while possible.
176	while (bytes >= `4`) {
177	uint32_t k = sk_unaligned_load<uint32_t>(data);
178	k *= `0xcc9e2d51`;
179	k = (k << `15`) \| (k >> `17`);
180	k *= `0x1b873593`;
181
182	hash ^= k;
183	hash = (hash << `13`) \| (hash >> `19`);
184	hash *= `5`;
185	hash += `0xe6546b64`;
186
187	bytes -= `4`;
188	data += `4`;
189	}
190
191	// Handle last 0-3 bytes.
192	uint32_t k = `0`;
193	switch (bytes & `3`) {
194	case `3`: k ^= data[`2`] << `16`;
195	case `2`: k ^= data[`1`] << `8`;
196	case `1`: k ^= data[`0`] << `0`;
197	k *= `0xcc9e2d51`;
198	k = (k << `15`) \| (k >> `17`);
199	k *= `0x1b873593`;
200	hash ^= k;
201	}
202
203	hash ^= original_bytes;
204	return SkChecksum::Mix(hash);
205	}
206	#endif
207
208	} // namespace SK_OPTS_NS
209
210	#endif//SkChecksum_opts_DEFINED
211

Browse the source code of Skia/src/opts/SkChecksum_opts.h