1 | /* Copyright (C) 2016 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_MAX_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_INSN_I_REDUCE_MAX_H |
10 | |
11 | #ifndef LIBSIMDPP_SIMD_H |
12 | #error "This file must be included through simd.h" |
13 | #endif |
14 | |
15 | #include <simdpp/types.h> |
16 | #include <simdpp/core/i_max.h> |
17 | #include <simdpp/core/extract.h> |
18 | #include <simdpp/core/move_l.h> |
19 | #include <simdpp/core/make_uint.h> |
20 | #include <simdpp/detail/mem_block.h> |
21 | #include <simdpp/detail/extract128.h> |
22 | #include <limits> |
23 | |
24 | namespace simdpp { |
25 | namespace SIMDPP_ARCH_NAMESPACE { |
26 | namespace detail { |
27 | namespace insn { |
28 | |
29 | static SIMDPP_INL |
30 | uint8_t i_reduce_max(const uint8x16& a) |
31 | { |
32 | #if SIMDPP_USE_NULL |
33 | uint8_t r = a.el(0); |
34 | for (unsigned i = 0; i < a.length; i++) { |
35 | r = r > a.el(i) ? r : a.el(i); |
36 | } |
37 | return r; |
38 | #elif SIMDPP_USE_NEON64 |
39 | return vmaxvq_u8(a.native()); |
40 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA |
41 | uint8x16 r = max(a, move16_l<8>(a)); |
42 | r = max(r, move16_l<4>(r)); |
43 | r = max(r, move16_l<2>(r)); |
44 | r = max(r, move16_l<1>(r)); |
45 | return extract<0>(r); |
46 | #endif |
47 | } |
48 | |
49 | #if SIMDPP_USE_AVX2 |
50 | static SIMDPP_INL |
51 | uint8_t i_reduce_max(const uint8<32>& a) |
52 | { |
53 | uint8x16 r = detail::extract128<0>(a); |
54 | r = max(r, detail::extract128<1>(a)); |
55 | return i_reduce_max(r); |
56 | } |
57 | #endif |
58 | |
59 | #if SIMDPP_USE_AVX512BW |
60 | SIMDPP_INL uint8_t i_reduce_max(const uint8<64>& a) |
61 | { |
62 | uint8<32> r = detail::extract256<0>(a); |
63 | r = max(r, detail::extract256<1>(a)); |
64 | return i_reduce_max(r); |
65 | } |
66 | #endif |
67 | |
68 | template<unsigned N> |
69 | SIMDPP_INL uint8_t i_reduce_max(const uint8<N>& a) |
70 | { |
71 | #if SIMDPP_USE_NULL |
72 | uint8_t r = std::numeric_limits<uint8_t>::min(); |
73 | for (unsigned j = 0; j < a.vec_length; ++j) { |
74 | for (unsigned i = 0; i < a.base_length; i++) { |
75 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
76 | } |
77 | } |
78 | return r; |
79 | #else |
80 | uint8v r = a.vec(0); |
81 | for (unsigned j = 1; j < a.vec_length; ++j) { |
82 | r = max(r, a.vec(j)); |
83 | } |
84 | return i_reduce_max(r); |
85 | #endif |
86 | } |
87 | |
88 | // ----------------------------------------------------------------------------- |
89 | |
90 | static SIMDPP_INL |
91 | int8_t i_reduce_max(const int8x16& a) |
92 | { |
93 | #if SIMDPP_USE_NULL |
94 | int8_t r = a.el(0); |
95 | for (unsigned i = 0; i < a.length; i++) { |
96 | r = r > a.el(i) ? r : a.el(i); |
97 | } |
98 | return r; |
99 | #elif SIMDPP_USE_NEON64 |
100 | return vmaxvq_s8(a.native()); |
101 | #elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA |
102 | int8x16 r = a; |
103 | r = max(r, move16_l<8>(r)); |
104 | r = max(r, move16_l<4>(r)); |
105 | r = max(r, move16_l<2>(r)); |
106 | r = max(r, move16_l<1>(r)); |
107 | return extract<0>(r); |
108 | #elif SIMDPP_USE_SSE2 |
109 | // no instruction for int8 max available, only for uint8 |
110 | uint8x16 ca = bit_xor(a, 0x80); |
111 | return i_reduce_max(ca) ^ 0x80; |
112 | #endif |
113 | } |
114 | |
115 | #if SIMDPP_USE_AVX2 |
116 | static SIMDPP_INL |
117 | int8_t i_reduce_max(const int8<32>& a) |
118 | { |
119 | int8x16 r = detail::extract128<0>(a); |
120 | r = max(r, detail::extract128<1>(a)); |
121 | return i_reduce_max(r); |
122 | } |
123 | #endif |
124 | |
125 | #if SIMDPP_USE_AVX512BW |
126 | SIMDPP_INL int8_t i_reduce_max(const int8<64>& a) |
127 | { |
128 | int8<32> r = detail::extract256<0>(a); |
129 | r = max(r, detail::extract256<1>(a)); |
130 | return i_reduce_max(r); |
131 | } |
132 | #endif |
133 | |
134 | template<unsigned N> |
135 | SIMDPP_INL int8_t i_reduce_max(const int8<N>& a) |
136 | { |
137 | #if SIMDPP_USE_NULL |
138 | int8_t r = std::numeric_limits<int8_t>::min();; |
139 | for (unsigned j = 0; j < a.vec_length; ++j) { |
140 | for (unsigned i = 0; i < a.base_length; i++) { |
141 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
142 | } |
143 | } |
144 | return r; |
145 | #elif SIMDPP_USE_SSE2 && !SIMDPP_USE_SSE4_1 |
146 | // no instruction for int8 max available, only for uint8 |
147 | uint8x16 r = bit_xor(a.vec(0), 0x80); |
148 | for (unsigned j = 1; j < a.vec_length; ++j) { |
149 | uint8x16 ca = bit_xor(a.vec(j), 0x80); |
150 | r = max(r, ca); |
151 | } |
152 | return i_reduce_max(r) ^ 0x80; |
153 | #else |
154 | int8v r = a.vec(0); |
155 | for (unsigned j = 1; j < a.vec_length; ++j) { |
156 | r = max(r, a.vec(j)); |
157 | } |
158 | return i_reduce_max(r); |
159 | #endif |
160 | } |
161 | |
162 | // ----------------------------------------------------------------------------- |
163 | static SIMDPP_INL |
164 | int16_t i_reduce_max(const int16x8& a); |
165 | |
166 | static SIMDPP_INL |
167 | uint16_t i_reduce_max(const uint16x8& a) |
168 | { |
169 | #if SIMDPP_USE_NULL |
170 | uint16_t r = a.el(0); |
171 | for (unsigned i = 0; i < a.length; i++) { |
172 | r = r > a.el(i) ? r : a.el(i); |
173 | } |
174 | return r; |
175 | #elif SIMDPP_USE_NEON64 |
176 | return vmaxvq_u16(a.native()); |
177 | #elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA |
178 | uint16x8 r = max(a, move8_l<4>(a)); |
179 | r = max(r, move8_l<2>(r)); |
180 | r = max(r, move8_l<1>(r)); |
181 | return extract<0>(r); |
182 | #elif SIMDPP_USE_SSE2 |
183 | // no instruction for uint16 max available, only for int16 |
184 | int16x8 ca = bit_xor(a, 0x8000); |
185 | return i_reduce_max(ca) ^ 0x8000; |
186 | #endif |
187 | } |
188 | |
189 | #if SIMDPP_USE_AVX2 |
190 | static SIMDPP_INL |
191 | uint16_t i_reduce_max(const uint16x16& a) |
192 | { |
193 | uint16x8 r = detail::extract128<0>(a); |
194 | r = max(r, detail::extract128<1>(a)); |
195 | return i_reduce_max(r); |
196 | } |
197 | #endif |
198 | |
199 | #if SIMDPP_USE_AVX512BW |
200 | SIMDPP_INL uint16_t i_reduce_max(const uint16<32>& a) |
201 | { |
202 | uint16<16> r = detail::extract256<0>(a); |
203 | r = max(r, detail::extract256<1>(a)); |
204 | return i_reduce_max(r); |
205 | } |
206 | #endif |
207 | |
208 | template<unsigned N> |
209 | SIMDPP_INL uint16_t i_reduce_max(const uint16<N>& a) |
210 | { |
211 | #if SIMDPP_USE_NULL |
212 | uint16_t r = std::numeric_limits<uint16_t>::min();; |
213 | for (unsigned j = 0; j < a.vec_length; ++j) { |
214 | for (unsigned i = 0; i < a.base_length; i++) { |
215 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
216 | } |
217 | } |
218 | return r; |
219 | #elif SIMDPP_USE_SSE2 && !SIMDPP_USE_SSE4_1 |
220 | // no instruction for uint16 max available, only for int16 |
221 | int16x8 r = bit_xor(a.vec(0), 0x8000); |
222 | for (unsigned j = 1; j < a.vec_length; ++j) { |
223 | int16x8 ca = bit_xor(a.vec(j), 0x8000); |
224 | r = max(r, ca); |
225 | } |
226 | return i_reduce_max(r) ^ 0x8000; |
227 | #else |
228 | uint16v r = a.vec(0); |
229 | for (unsigned j = 1; j < a.vec_length; ++j) { |
230 | r = max(r, a.vec(j)); |
231 | } |
232 | return i_reduce_max(r); |
233 | #endif |
234 | } |
235 | |
236 | // ----------------------------------------------------------------------------- |
237 | |
238 | static SIMDPP_INL |
239 | int16_t i_reduce_max(const int16x8& a) |
240 | { |
241 | #if SIMDPP_USE_NULL |
242 | int16_t r = a.el(0); |
243 | for (unsigned i = 0; i < a.length; i++) { |
244 | r = r > a.el(i) ? r : a.el(i); |
245 | } |
246 | return r; |
247 | #elif SIMDPP_USE_NEON64 |
248 | return vmaxvq_s16(a.native()); |
249 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA |
250 | int16x8 r = max(a, move8_l<4>(a)); |
251 | r = max(r, move8_l<2>(r)); |
252 | r = max(r, move8_l<1>(r)); |
253 | return extract<0>(r); |
254 | #endif |
255 | } |
256 | |
257 | #if SIMDPP_USE_AVX2 |
258 | static SIMDPP_INL |
259 | int16_t i_reduce_max(const int16x16& a) |
260 | { |
261 | int16x8 r = detail::extract128<0>(a); |
262 | r = max(r, detail::extract128<1>(a)); |
263 | return i_reduce_max(r); |
264 | } |
265 | #endif |
266 | |
267 | #if SIMDPP_USE_AVX512BW |
268 | SIMDPP_INL int16_t i_reduce_max(const int16<32>& a) |
269 | { |
270 | int16<16> r = detail::extract256<0>(a); |
271 | r = max(r, detail::extract256<1>(a)); |
272 | return i_reduce_max(r); |
273 | } |
274 | #endif |
275 | |
276 | template<unsigned N> |
277 | SIMDPP_INL int16_t i_reduce_max(const int16<N>& a) |
278 | { |
279 | #if SIMDPP_USE_NULL |
280 | int16_t r = std::numeric_limits<int16_t>::min();; |
281 | for (unsigned j = 0; j < a.vec_length; ++j) { |
282 | for (unsigned i = 0; i < a.base_length; i++) { |
283 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
284 | } |
285 | } |
286 | return r; |
287 | #else |
288 | int16v r = a.vec(0); |
289 | for (unsigned j = 1; j < a.vec_length; ++j) { |
290 | r = max(r, a.vec(j)); |
291 | } |
292 | return i_reduce_max(r); |
293 | #endif |
294 | } |
295 | |
296 | // ----------------------------------------------------------------------------- |
297 | |
298 | static SIMDPP_INL |
299 | uint32_t i_reduce_max(const uint32x4& a) |
300 | { |
301 | #if SIMDPP_USE_NULL |
302 | uint32_t r = a.el(0); |
303 | for (unsigned i = 0; i < a.length; i++) { |
304 | r = r > a.el(i) ? r : a.el(i); |
305 | } |
306 | return r; |
307 | #elif SIMDPP_USE_NEON64 |
308 | return vmaxvq_u32(a.native()); |
309 | #elif SIMDPP_USE_SSE4_1 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA |
310 | uint32x4 r = max(a, move4_l<2>(a)); |
311 | r = max(r, move4_l<1>(r)); |
312 | return extract<0>(r); |
313 | #elif SIMDPP_USE_SSE2 |
314 | mem_block<uint32x4> b = a; |
315 | uint32_t r = b[0]; |
316 | for (unsigned i = 1; i < b.length; i++) { |
317 | r = r > b[i] ? r : b[i]; |
318 | } |
319 | return r; |
320 | #endif |
321 | } |
322 | |
323 | #if SIMDPP_USE_AVX2 |
324 | static SIMDPP_INL |
325 | uint32_t i_reduce_max(const uint32x8& a) |
326 | { |
327 | uint32x4 r = detail::extract128<0>(a); |
328 | r = max(r, detail::extract128<1>(a)); |
329 | r = max(r, move4_l<2>(r)); |
330 | r = max(r, move4_l<1>(r)); |
331 | return extract<0>(r); |
332 | } |
333 | #endif |
334 | |
335 | #if SIMDPP_USE_AVX512F |
336 | static SIMDPP_INL |
337 | uint32_t i_reduce_max(const uint32<16>& a) |
338 | { |
339 | return i_reduce_max((uint32<8>)max(extract256<0>(a), extract256<1>(a))); |
340 | } |
341 | #endif |
342 | |
343 | template<unsigned N> |
344 | SIMDPP_INL uint32_t i_reduce_max(const uint32<N>& a) |
345 | { |
346 | #if SIMDPP_USE_NULL |
347 | uint32_t r = std::numeric_limits<uint32_t>::min();; |
348 | for (unsigned j = 0; j < a.vec_length; ++j) { |
349 | for (unsigned i = 0; i < a.base_length; i++) { |
350 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
351 | } |
352 | } |
353 | return r; |
354 | #else |
355 | uint32v r = a.vec(0); |
356 | for (unsigned j = 1; j < a.vec_length; ++j) { |
357 | r = max(r, a.vec(j)); |
358 | } |
359 | return i_reduce_max(r); |
360 | #endif |
361 | } |
362 | |
363 | // ----------------------------------------------------------------------------- |
364 | |
365 | static SIMDPP_INL |
366 | int32_t i_reduce_max(const int32x4& a) |
367 | { |
368 | #if SIMDPP_USE_NULL |
369 | int32_t r = a.el(0); |
370 | for (unsigned i = 0; i < a.length; i++) { |
371 | r = r > a.el(i) ? r : a.el(i); |
372 | } |
373 | return r; |
374 | #elif SIMDPP_USE_NEON64 |
375 | return vmaxvq_s32(a.native()); |
376 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC || SIMDPP_USE_MSA |
377 | int32x4 r = max(a, move4_l<2>(a)); |
378 | r = max(r, move4_l<1>(r)); |
379 | return extract<0>(r); |
380 | #endif |
381 | } |
382 | |
383 | #if SIMDPP_USE_AVX2 |
384 | static SIMDPP_INL |
385 | int32_t i_reduce_max(const int32x8& a) |
386 | { |
387 | int32x4 r = detail::extract128<0>(a); |
388 | r = max(r, detail::extract128<1>(a)); |
389 | r = max(r, move4_l<2>(r)); |
390 | r = max(r, move4_l<1>(r)); |
391 | return extract<0>(r); |
392 | } |
393 | #endif |
394 | |
395 | #if SIMDPP_USE_AVX512F |
396 | static SIMDPP_INL |
397 | int32_t i_reduce_max(const int32<16>& a) |
398 | { |
399 | return i_reduce_max((int32<8>)max(extract256<0>(a), extract256<1>(a))); |
400 | } |
401 | #endif |
402 | |
403 | template<unsigned N> |
404 | SIMDPP_INL int32_t i_reduce_max(const int32<N>& a) |
405 | { |
406 | #if SIMDPP_USE_NULL |
407 | int32_t r = std::numeric_limits<int32_t>::min();; |
408 | for (unsigned j = 0; j < a.vec_length; ++j) { |
409 | for (unsigned i = 0; i < a.base_length; i++) { |
410 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
411 | } |
412 | } |
413 | return r; |
414 | #else |
415 | int32v r = a.vec(0); |
416 | for (unsigned j = 1; j < a.vec_length; ++j) { |
417 | r = max(r, a.vec(j)); |
418 | } |
419 | return i_reduce_max(r); |
420 | #endif |
421 | } |
422 | |
423 | // ----------------------------------------------------------------------------- |
424 | |
425 | static SIMDPP_INL |
426 | uint64_t i_reduce_max(const uint64x2& a) |
427 | { |
428 | #if SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_207 |
429 | uint64x2 r = max(a, move2_l<1>(a)); |
430 | return extract<0>(r); |
431 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON32 |
432 | mem_block<uint64x2> b = a; |
433 | return b[0] > b[1] ? b[0] : b[1]; |
434 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC |
435 | uint64_t r = a.el(0); |
436 | for (unsigned i = 0; i < a.length; i++) { |
437 | r = r > a.el(i) ? r : a.el(i); |
438 | } |
439 | return r; |
440 | #else |
441 | return SIMDPP_NOT_IMPLEMENTED1(a); |
442 | #endif |
443 | } |
444 | |
445 | #if SIMDPP_USE_AVX2 |
446 | static SIMDPP_INL |
447 | uint64_t i_reduce_max(const uint64x4& a) |
448 | { |
449 | uint64x2 r = detail::extract128<0>(a); |
450 | r = max(r, detail::extract128<1>(a)); |
451 | r = max(r, move2_l<1>(r)); |
452 | return extract<0>(r); |
453 | } |
454 | #endif |
455 | |
456 | #if SIMDPP_USE_AVX512F |
457 | static SIMDPP_INL |
458 | uint64_t i_reduce_max(const uint64<8>& a) |
459 | { |
460 | return i_reduce_max((uint64<4>)max(extract256<0>(a), extract256<1>(a))); |
461 | } |
462 | #endif |
463 | |
464 | template<unsigned N> |
465 | SIMDPP_INL uint64_t i_reduce_max(const uint64<N>& a) |
466 | { |
467 | #if SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA |
468 | uint64v r = a.vec(0); |
469 | for (unsigned j = 1; j < a.vec_length; ++j) { |
470 | r = max(r, a.vec(j)); |
471 | } |
472 | return i_reduce_max(r); |
473 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON32 |
474 | uint64_t r = std::numeric_limits<uint64_t>::min(); |
475 | for (unsigned j = 0; j < a.vec_length; ++j) { |
476 | mem_block<uint64v> b = a.vec(j); |
477 | for (unsigned i = 0; i < a.base_length; i++) { |
478 | r = r > b[i] ? r : b[i]; |
479 | } |
480 | } |
481 | return r; |
482 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC |
483 | uint64_t r = std::numeric_limits<uint64_t>::min();; |
484 | for (unsigned j = 0; j < a.vec_length; ++j) { |
485 | for (unsigned i = 0; i < a.base_length; i++) { |
486 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
487 | } |
488 | } |
489 | return r; |
490 | #else |
491 | return SIMDPP_NOT_IMPLEMENTED1(a); |
492 | #endif |
493 | } |
494 | |
495 | // ----------------------------------------------------------------------------- |
496 | |
497 | static SIMDPP_INL |
498 | int64_t i_reduce_max(const int64x2& a) |
499 | { |
500 | #if SIMDPP_USE_AVX2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA |
501 | int64x2 r = max(a, move2_l<1>(a)); |
502 | return extract<0>(r); |
503 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON32 |
504 | mem_block<int64x2> b = a; |
505 | return b[0] > b[1] ? b[0] : b[1]; |
506 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC |
507 | int64_t r = a.el(0); |
508 | for (unsigned i = 0; i < a.length; i++) { |
509 | r = r > a.el(i) ? r : a.el(i); |
510 | } |
511 | return r; |
512 | #else |
513 | return SIMDPP_NOT_IMPLEMENTED1(a); |
514 | #endif |
515 | } |
516 | |
517 | #if SIMDPP_USE_AVX2 |
518 | static SIMDPP_INL |
519 | int64_t i_reduce_max(const int64x4& a) |
520 | { |
521 | int64x2 r = detail::extract128<0>(a); |
522 | r = max(r, detail::extract128<1>(a)); |
523 | r = max(r, move2_l<1>(r)); |
524 | return extract<0>(r); |
525 | } |
526 | #endif |
527 | |
528 | #if SIMDPP_USE_AVX512F |
529 | static SIMDPP_INL |
530 | int64_t i_reduce_max(const int64<8>& a) |
531 | { |
532 | return i_reduce_max((int64<4>)max(extract256<0>(a), extract256<1>(a))); |
533 | } |
534 | #endif |
535 | |
536 | template<unsigned N> |
537 | SIMDPP_INL int64_t i_reduce_max(const int64<N>& a) |
538 | { |
539 | #if SIMDPP_USE_AXV2 || SIMDPP_USE_NEON64 || SIMDPP_USE_VSX_207 || SIMDPP_USE_MSA |
540 | int64v r = a.vec(0); |
541 | for (unsigned j = 1; j < a.vec_length; ++j) { |
542 | r = max(r, a.vec(j)); |
543 | } |
544 | return i_reduce_max(r); |
545 | #elif SIMDPP_USE_SSE2 || SIMDPP_USE_NEON32 |
546 | int64_t r = std::numeric_limits<int64_t>::min();; |
547 | for (unsigned j = 0; j < a.vec_length; ++j) { |
548 | mem_block<int64v> b = a.vec(j); |
549 | for (unsigned i = 0; i < a.base_length; i++) { |
550 | r = r > b[i] ? r : b[i]; |
551 | } |
552 | } |
553 | return r; |
554 | #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC |
555 | int64_t r = std::numeric_limits<int64_t>::min();; |
556 | for (unsigned j = 0; j < a.vec_length; ++j) { |
557 | for (unsigned i = 0; i < a.base_length; i++) { |
558 | r = r > a.vec(j).el(i) ? r : a.vec(j).el(i); |
559 | } |
560 | } |
561 | return r; |
562 | #else |
563 | return SIMDPP_NOT_IMPLEMENTED1(a); |
564 | #endif |
565 | } |
566 | |
567 | // ----------------------------------------------------------------------------- |
568 | |
569 | } // namespace insn |
570 | } // namespace detail |
571 | } // namespace SIMDPP_ARCH_NAMESPACE |
572 | } // namespace simdpp |
573 | |
574 | #endif |
575 | |
576 | |