int-util.cc source code [arrow/arrow/util/int-util.cc]

1	// Licensed to the Apache Software Foundation (ASF) under one
2	// or more contributor license agreements. See the NOTICE file
3	// distributed with this work for additional information
4	// regarding copyright ownership. The ASF licenses this file
5	// to you under the Apache License, Version 2.0 (the
6	// "License"); you may not use this file except in compliance
7	// with the License. You may obtain a copy of the License at
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing,
12	// software distributed under the License is distributed on an
13	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14	// KIND, either express or implied. See the License for the
15	// specific language governing permissions and limitations
16	// under the License.
17
18	#include "arrow/util/int-util.h"
19
20	#include <algorithm>
21	#include <cstring>
22	#include <limits>
23
24	#include "arrow/util/bit-util.h"
25	#include "arrow/util/logging.h"
26
27	namespace arrow {
28	namespace internal {
29
30	static constexpr uint64_t max_uint8 =
31	static_cast<uint64_t>(std::numeric_limits<uint8_t>::max());
32	static constexpr uint64_t max_uint16 =
33	static_cast<uint64_t>(std::numeric_limits<uint16_t>::max());
34	static constexpr uint64_t max_uint32 =
35	static_cast<uint64_t>(std::numeric_limits<uint32_t>::max());
36	static constexpr uint64_t max_uint64 = std::numeric_limits<uint64_t>::max();
37
38	static constexpr uint64_t mask_uint8 = ~`0xffULL`;
39	static constexpr uint64_t mask_uint16 = ~`0xffffULL`;
40	static constexpr uint64_t mask_uint32 = ~`0xffffffffULL`;
41
42	//
43	// Unsigned integer width detection
44	//
45
46	static const uint64_t max_uints[] = {`0`, max_uint8, max_uint16, `0`, max_uint32,
47	`0`, `0`, `0`, max_uint64};
48
49	// Check if we would need to expand the underlying storage type
50	inline uint8_t ExpandedUIntWidth(uint64_t val, uint8_t current_width) {
51	// Optimize for the common case where width doesn't change
52	if (ARROW_PREDICT_TRUE(val <= max_uints[current_width])) {
53	return current_width;
54	}
55	if (current_width == `1` && val <= max_uint8) {
56	return `1`;
57	} else if (current_width <= `2` && val <= max_uint16) {
58	return `2`;
59	} else if (current_width <= `4` && val <= max_uint32) {
60	return `4`;
61	} else {
62	return `8`;
63	}
64	}
65
66	uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width) {
67	uint8_t width = min_width;
68	if (min_width < `8`) {
69	auto p = values;
70	const auto end = p + length;
71	while (p <= end - `16`) {
72	// This is probably SIMD-izable
73	auto u = p[`0`];
74	auto v = p[`1`];
75	auto w = p[`2`];
76	auto x = p[`3`];
77	u \|= p[`4`];
78	v \|= p[`5`];
79	w \|= p[`6`];
80	x \|= p[`7`];
81	u \|= p[`8`];
82	v \|= p[`9`];
83	w \|= p[`10`];
84	x \|= p[`11`];
85	u \|= p[`12`];
86	v \|= p[`13`];
87	w \|= p[`14`];
88	x \|= p[`15`];
89	p += `16`;
90	width = ExpandedUIntWidth(u \| v \| w \| x, width);
91	if (ARROW_PREDICT_FALSE(width == `8`)) {
92	break;
93	}
94	}
95	if (p <= end - `8`) {
96	auto u = p[`0`];
97	auto v = p[`1`];
98	auto w = p[`2`];
99	auto x = p[`3`];
100	u \|= p[`4`];
101	v \|= p[`5`];
102	w \|= p[`6`];
103	x \|= p[`7`];
104	p += `8`;
105	width = ExpandedUIntWidth(u \| v \| w \| x, width);
106	}
107	while (p < end) {
108	width = ExpandedUIntWidth(*p++, width);
109	}
110	}
111	return width;
112	}
113
114	uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
115	int64_t length, uint8_t min_width) {
116	if (valid_bytes == nullptr) {
117	return DetectUIntWidth(values, length, min_width);
118	}
119	uint8_t width = min_width;
120	if (min_width < `8`) {
121	auto p = values;
122	const auto end = p + length;
123	auto b = valid_bytes;
124
125	#define MASK(p, b, i) p[i] * (b[i] != 0)
126
127	while (p <= end - `8`) {
128	// This is probably be SIMD-izable
129	auto u = MASK(p, b, `0`);
130	auto v = MASK(p, b, `1`);
131	auto w = MASK(p, b, `2`);
132	auto x = MASK(p, b, `3`);
133	u \|= MASK(p, b, `4`);
134	v \|= MASK(p, b, `5`);
135	w \|= MASK(p, b, `6`);
136	x \|= MASK(p, b, `7`);
137	b += `8`;
138	p += `8`;
139	width = ExpandedUIntWidth(u \| v \| w \| x, width);
140	if (ARROW_PREDICT_FALSE(width == `8`)) {
141	break;
142	}
143	}
144	uint64_t mask = `0`;
145	while (p < end) {
146	mask \|= MASK(p, b, `0`);
147	++b;
148	++p;
149	}
150	width = ExpandedUIntWidth(mask, width);
151
152	#undef MASK
153	}
154	return width;
155	}
156
157	//
158	// Signed integer width detection
159	//
160
161	uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width) {
162	if (min_width == `8`) {
163	return min_width;
164	}
165	uint8_t width = min_width;
166
167	auto p = values;
168	const auto end = p + length;
169	// Strategy: to determine whether `x` is between -0x80 and 0x7f,
170	// we determine whether `x + 0x80` is between 0x00 and 0xff. The
171	// latter can be done with a simple AND mask with ~0xff and, more
172	// importantly, can be computed in a single step over multiple ORed
173	// values (so we can branch once every N items instead of once every item).
174	// This strategy could probably lend itself to explicit SIMD-ization,
175	// if more performance is needed.
176	constexpr uint64_t addend8 = `0x80ULL`;
177	constexpr uint64_t addend16 = `0x8000ULL`;
178	constexpr uint64_t addend32 = `0x80000000ULL`;
179
180	auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
181	auto v = *p++;
182	if (ARROW_PREDICT_FALSE(((v + addend) & test_mask) != `0`)) {
183	--p;
184	return false;
185	} else {
186	return true;
187	}
188	};
189
190	auto test_four_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
191	auto mask = (p[`0`] + addend) \| (p[`1`] + addend) \| (p[`2`] + addend) \| (p[`3`] + addend);
192	p += `4`;
193	if (ARROW_PREDICT_FALSE((mask & test_mask) != `0`)) {
194	p -= `4`;
195	return false;
196	} else {
197	return true;
198	}
199	};
200
201	if (width == `1`) {
202	while (p <= end - `4`) {
203	if (!test_four_items (addend8, mask_uint8)) {
204	width = `2`;
205	goto width2;
206	}
207	}
208	while (p < end) {
209	if (!test_one_item (addend8, mask_uint8)) {
210	width = `2`;
211	goto width2;
212	}
213	}
214	return `1`;
215	}
216	width2:
217	if (width == `2`) {
218	while (p <= end - `4`) {
219	if (!test_four_items (addend16, mask_uint16)) {
220	width = `4`;
221	goto width4;
222	}
223	}
224	while (p < end) {
225	if (!test_one_item (addend16, mask_uint16)) {
226	width = `4`;
227	goto width4;
228	}
229	}
230	return `2`;
231	}
232	width4:
233	if (width == `4`) {
234	while (p <= end - `4`) {
235	if (!test_four_items (addend32, mask_uint32)) {
236	width = `8`;
237	goto width8;
238	}
239	}
240	while (p < end) {
241	if (!test_one_item (addend32, mask_uint32)) {
242	width = `8`;
243	goto width8;
244	}
245	}
246	return `4`;
247	}
248	width8:
249	return `8`;
250	}
251
252	uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
253	uint8_t min_width) {
254	if (valid_bytes == nullptr) {
255	return DetectIntWidth(values, length, min_width);
256	}
257
258	if (min_width == `8`) {
259	return min_width;
260	}
261	uint8_t width = min_width;
262
263	auto p = values;
264	const auto end = p + length;
265	auto b = valid_bytes;
266	// Strategy is similar to the no-nulls case above, but we also
267	// have to zero any incoming items that have a zero validity byte.
268	constexpr uint64_t addend8 = `0x80ULL`;
269	constexpr uint64_t addend16 = `0x8000ULL`;
270	constexpr uint64_t addend32 = `0x80000000ULL`;
271
272	#define MASK(p, b, addend, i) (p[i] + addend) * (b[i] != 0)
273
274	auto test_one_item = [&](uint64_t addend, uint64_t test_mask) -> bool {
275	auto v = MASK(p, b, addend, `0`);
276	++b;
277	++p;
278	if (ARROW_PREDICT_FALSE((v & test_mask) != `0`)) {
279	--b;
280	--p;
281	return false;
282	} else {
283	return true;
284	}
285	};
286
287	auto test_eight_items = [&](uint64_t addend, uint64_t test_mask) -> bool {
288	auto mask1 = MASK(p, b, addend, `0`) \| MASK(p, b, addend, `1`) \| MASK(p, b, addend, `2`) \|
289	MASK(p, b, addend, `3`);
290	auto mask2 = MASK(p, b, addend, `4`) \| MASK(p, b, addend, `5`) \| MASK(p, b, addend, `6`) \|
291	MASK(p, b, addend, `7`);
292	b += `8`;
293	p += `8`;
294	if (ARROW_PREDICT_FALSE(((mask1 \| mask2) & test_mask) != `0`)) {
295	b -= `8`;
296	p -= `8`;
297	return false;
298	} else {
299	return true;
300	}
301	};
302
303	#undef MASK
304
305	if (width == `1`) {
306	while (p <= end - `8`) {
307	if (!test_eight_items (addend8, mask_uint8)) {
308	width = `2`;
309	goto width2;
310	}
311	}
312	while (p < end) {
313	if (!test_one_item (addend8, mask_uint8)) {
314	width = `2`;
315	goto width2;
316	}
317	}
318	return `1`;
319	}
320	width2:
321	if (width == `2`) {
322	while (p <= end - `8`) {
323	if (!test_eight_items (addend16, mask_uint16)) {
324	width = `4`;
325	goto width4;
326	}
327	}
328	while (p < end) {
329	if (!test_one_item (addend16, mask_uint16)) {
330	width = `4`;
331	goto width4;
332	}
333	}
334	return `2`;
335	}
336	width4:
337	if (width == `4`) {
338	while (p <= end - `8`) {
339	if (!test_eight_items (addend32, mask_uint32)) {
340	width = `8`;
341	goto width8;
342	}
343	}
344	while (p < end) {
345	if (!test_one_item (addend32, mask_uint32)) {
346	width = `8`;
347	goto width8;
348	}
349	}
350	return `4`;
351	}
352	width8:
353	return `8`;
354	}
355
356	template <typename Source, typename Dest>
357	inline void DowncastIntsInternal(const Source* src, Dest* dest, int64_t length) {
358	while (length >= `4`) {
359	dest[`0`] = static_cast<Dest>(src[`0`]);
360	dest[`1`] = static_cast<Dest>(src[`1`]);
361	dest[`2`] = static_cast<Dest>(src[`2`]);
362	dest[`3`] = static_cast<Dest>(src[`3`]);
363	length -= `4`;
364	src += `4`;
365	dest += `4`;
366	}
367	while (length > `0`) {
368	dest++ = static_cast<Dest>(src++);
369	--length;
370	}
371	}
372
373	void DowncastInts(const int64_t* source, int8_t* dest, int64_t length) {
374	DowncastIntsInternal(source, dest, length);
375	}
376
377	void DowncastInts(const int64_t* source, int16_t* dest, int64_t length) {
378	DowncastIntsInternal(source, dest, length);
379	}
380
381	void DowncastInts(const int64_t* source, int32_t* dest, int64_t length) {
382	DowncastIntsInternal(source, dest, length);
383	}
384
385	void DowncastInts(const int64_t* source, int64_t* dest, int64_t length) {
386	memcpy(dest, source, length * sizeof(int64_t));
387	}
388
389	void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length) {
390	DowncastIntsInternal(source, dest, length);
391	}
392
393	void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length) {
394	DowncastIntsInternal(source, dest, length);
395	}
396
397	void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length) {
398	DowncastIntsInternal(source, dest, length);
399	}
400
401	void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length) {
402	memcpy(dest, source, length * sizeof(int64_t));
403	}
404
405	template <typename InputInt, typename OutputInt>
406	void TransposeInts(const InputInt* src, OutputInt* dest, int64_t length,
407	const int32_t* transpose_map) {
408	while (length >= `4`) {
409	dest[`0`] = static_cast<OutputInt>(transpose_map[src[`0`]]);
410	dest[`1`] = static_cast<OutputInt>(transpose_map[src[`1`]]);
411	dest[`2`] = static_cast<OutputInt>(transpose_map[src[`2`]]);
412	dest[`3`] = static_cast<OutputInt>(transpose_map[src[`3`]]);
413	length -= `4`;
414	src += `4`;
415	dest += `4`;
416	}
417	while (length > `0`) {
418	dest++ = static_cast<OutputInt>(transpose_map[src++]);
419	--length;
420	}
421	}
422
423	#define INSTANTIATE(SRC, DEST) \
424	template ARROW_EXPORT void TransposeInts( \
425	const SRC* source, DEST* dest, int64_t length, const int32_t* transpose_map);
426
427	#define INSTANTIATE_ALL_DEST(DEST) \
428	INSTANTIATE(int8_t, DEST) \
429	INSTANTIATE(int16_t, DEST) \
430	INSTANTIATE(int32_t, DEST) \
431	INSTANTIATE(int64_t, DEST)
432
433	#define INSTANTIATE_ALL() \
434	INSTANTIATE_ALL_DEST(int8_t) \
435	INSTANTIATE_ALL_DEST(int16_t) \
436	INSTANTIATE_ALL_DEST(int32_t) \
437	INSTANTIATE_ALL_DEST(int64_t)
438
439	INSTANTIATE_ALL()
440
441	#undef INSTANTIATE
442	#undef INSTANTIATE_ALL
443	#undef INSTANTIATE_ALL_DEST
444
445	} // namespace internal
446	} // namespace arrow
447

Browse the source code of arrow/arrow/util/int-util.cc