1/* Copyright (C) 2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_POPCNT_H
9#define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_POPCNT_H
10
11#ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13#endif
14
15#include <simdpp/types.h>
16#include <simdpp/detail/null/bitwise.h>
17#include <simdpp/core/i_popcnt.h>
18#include <simdpp/core/i_reduce_add.h>
19
20namespace simdpp {
21namespace SIMDPP_ARCH_NAMESPACE {
22namespace detail {
23namespace insn {
24
25static SIMDPP_INL
26uint32_t i_reduce_popcnt(const uint32<4>& a)
27{
28#if SIMDPP_USE_NULL
29 uint32_t r = 0;
30 for (unsigned i = 0; i < a.length; i++) {
31 r += detail::null::el_popcnt32(a.el(i));
32 }
33 return r;
34#elif SIMDPP_USE_X86_POPCNT_INSN
35 uint32_t r = 0;
36#if SIMDPP_64_BITS
37 uint64<2> a64; a64 = a;
38 r += _mm_popcnt_u64(extract<0>(a64));
39 r += _mm_popcnt_u64(extract<1>(a64));
40#else
41 r += _mm_popcnt_u32(extract<0>(a));
42 r += _mm_popcnt_u32(extract<1>(a));
43 r += _mm_popcnt_u32(extract<2>(a));
44 r += _mm_popcnt_u32(extract<3>(a));
45#endif
46 return r;
47#elif SIMDPP_USE_NEON
48 uint8<16> r = vcntq_u8(vreinterpretq_u8_u32(a.native()));
49 return reduce_add(r);
50#elif SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA
51 uint64<2> a64; a64 = a;
52 a64 = popcnt(a64);
53 return reduce_add(a64);
54#elif SIMDPP_USE_SSE2
55 uint64<2> r = popcnt((uint64<2>)a);
56 return (uint32_t) reduce_add(r);
57#else
58 uint32<4> r = popcnt(a);
59 return reduce_add(r);
60#endif
61}
62
63#if SIMDPP_USE_AVX2
64static SIMDPP_INL
65uint32_t i_reduce_popcnt(const uint32<8>& a)
66{
67#if SIMDPP_USE_X86_POPCNT_INSN && SIMDPP_64_BITS
68 uint32<4> a0, a1;
69 split(a, a0, a1);
70 return i_reduce_popcnt(a0) + i_reduce_popcnt(a1);
71#else
72 uint64<4> r = popcnt((uint64<4>)a);
73 return (uint32_t) reduce_add(r);
74#endif
75}
76#endif
77
78#if SIMDPP_USE_AVX512F
79static SIMDPP_INL
80uint32_t i_reduce_popcnt(const uint32<16>& a)
81{
82#if SIMDPP_USE_X86_POPCNT_INSN && SIMDPP_64_BITS
83 uint32<8> a0, a1;
84 split(a, a0, a1);
85 return i_reduce_popcnt(a0) + i_reduce_popcnt(a1);
86#else
87 // TODO: support AVX512VPOPCNTDQ
88 uint64<8> r = popcnt((uint64<8>)a);
89 return reduce_add(r);
90#endif
91}
92#endif
93
94template<unsigned N> SIMDPP_INL
95uint32_t i_reduce_popcnt(const uint32<N>& a)
96{
97 uint32_t r = 0;
98 for (unsigned j = 0; j < a.vec_length; ++j) {
99 r += i_reduce_popcnt(a.vec(j));
100 }
101 return r;
102}
103
104// -----------------------------------------------------------------------------
105
106} // namespace insn
107} // namespace detail
108} // namespace SIMDPP_ARCH_NAMESPACE
109} // namespace simdpp
110
111#endif
112
113