| 1 | /******************************************************************************* |
| 2 | * Copyright 2018 Intel Corporation |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *******************************************************************************/ |
| 16 | |
| 17 | #ifndef CPU_ISA_TRAITS_HPP |
| 18 | #define CPU_ISA_TRAITS_HPP |
| 19 | |
| 20 | #include <type_traits> |
| 21 | |
| 22 | #define XBYAK64 |
| 23 | #define XBYAK_NO_OP_NAMES |
| 24 | /* in order to make selinux happy memory that would be marked with X-bit should |
| 25 | * be obtained with mmap */ |
| 26 | #define XBYAK_USE_MMAP_ALLOCATOR |
| 27 | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) |
| 28 | /* turn off `size_t to other-type implicit casting` warning |
| 29 | * currently we have a lot of jit-generated instructions that |
| 30 | * take uint32_t, but we pass size_t (e.g. due to using sizeof). |
| 31 | * FIXME: replace size_t parameters with the appropriate ones */ |
| 32 | #pragma warning (disable: 4267) |
| 33 | #endif |
| 34 | #include "xbyak/xbyak.h" |
| 35 | #include "xbyak/xbyak_util.h" |
| 36 | |
| 37 | namespace mkldnn { |
| 38 | namespace impl { |
| 39 | namespace cpu { |
| 40 | |
| 41 | typedef enum { |
| 42 | isa_any, |
| 43 | sse41, |
| 44 | sse42, |
| 45 | avx, |
| 46 | avx2, |
| 47 | avx512_common, |
| 48 | avx512_core, |
| 49 | avx512_core_vnni, |
| 50 | avx512_mic, |
| 51 | avx512_mic_4ops, |
| 52 | } cpu_isa_t; |
| 53 | |
| 54 | template <cpu_isa_t> struct cpu_isa_traits {}; /* ::vlen -> 32 (for avx2) */ |
| 55 | |
| 56 | template <> struct cpu_isa_traits<sse42> { |
| 57 | typedef Xbyak::Xmm Vmm; |
| 58 | static constexpr int vlen_shift = 4; |
| 59 | static constexpr int vlen = 16; |
| 60 | static constexpr int n_vregs = 16; |
| 61 | }; |
| 62 | template <> struct cpu_isa_traits<avx> { |
| 63 | typedef Xbyak::Ymm Vmm; |
| 64 | static constexpr int vlen_shift = 5; |
| 65 | static constexpr int vlen = 32; |
| 66 | static constexpr int n_vregs = 16; |
| 67 | }; |
| 68 | template <> struct cpu_isa_traits<avx2>: |
| 69 | public cpu_isa_traits<avx> {}; |
| 70 | |
| 71 | template <> struct cpu_isa_traits<avx512_common> { |
| 72 | typedef Xbyak::Zmm Vmm; |
| 73 | static constexpr int vlen_shift = 6; |
| 74 | static constexpr int vlen = 64; |
| 75 | static constexpr int n_vregs = 32; |
| 76 | }; |
| 77 | template <> struct cpu_isa_traits<avx512_core>: |
| 78 | public cpu_isa_traits<avx512_common> {}; |
| 79 | |
| 80 | template <> struct cpu_isa_traits<avx512_mic>: |
| 81 | public cpu_isa_traits<avx512_common> {}; |
| 82 | |
| 83 | template <> struct cpu_isa_traits<avx512_mic_4ops>: |
| 84 | public cpu_isa_traits<avx512_common> {}; |
| 85 | |
| 86 | namespace { |
| 87 | |
| 88 | static Xbyak::util::Cpu cpu; |
| 89 | static inline bool mayiuse(const cpu_isa_t cpu_isa) { |
| 90 | using namespace Xbyak::util; |
| 91 | |
| 92 | switch (cpu_isa) { |
| 93 | case sse41: |
| 94 | case sse42: |
| 95 | // FIXME: SSE4.2 is actually NOT required |
| 96 | //return cpu.has(Cpu::tSSE42); |
| 97 | return cpu.has(Cpu::tSSE41); |
| 98 | case avx: |
| 99 | return cpu.has(Cpu::tAVX); |
| 100 | case avx2: |
| 101 | return cpu.has(Cpu::tAVX2); |
| 102 | case avx512_common: |
| 103 | return cpu.has(Cpu::tAVX512F); |
| 104 | case avx512_core: |
| 105 | return true |
| 106 | && cpu.has(Cpu::tAVX512F) |
| 107 | && cpu.has(Cpu::tAVX512BW) |
| 108 | && cpu.has(Cpu::tAVX512VL) |
| 109 | && cpu.has(Cpu::tAVX512DQ); |
| 110 | case avx512_core_vnni: |
| 111 | return true |
| 112 | && cpu.has(Cpu::tAVX512F) |
| 113 | && cpu.has(Cpu::tAVX512BW) |
| 114 | && cpu.has(Cpu::tAVX512VL) |
| 115 | && cpu.has(Cpu::tAVX512DQ) |
| 116 | && cpu.has(Cpu::tAVX512_VNNI); |
| 117 | case avx512_mic: |
| 118 | return true |
| 119 | && cpu.has(Cpu::tAVX512F) |
| 120 | && cpu.has(Cpu::tAVX512CD) |
| 121 | && cpu.has(Cpu::tAVX512ER) |
| 122 | && cpu.has(Cpu::tAVX512PF); |
| 123 | case avx512_mic_4ops: |
| 124 | return true |
| 125 | && mayiuse(avx512_mic) |
| 126 | && cpu.has(Cpu::tAVX512_4FMAPS) |
| 127 | && cpu.has(Cpu::tAVX512_4VNNIW); |
| 128 | case isa_any: |
| 129 | return true; |
| 130 | } |
| 131 | return false; |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | /* whatever is required to generate string literals... */ |
| 136 | #include "z_magic.hpp" |
| 137 | #define JIT_IMPL_NAME_HELPER(prefix, isa, suffix_if_any) \ |
| 138 | (isa == sse42 ? prefix STRINGIFY(sse42) : \ |
| 139 | (isa == avx ? prefix STRINGIFY(avx) : \ |
| 140 | (isa == avx2 ? prefix STRINGIFY(avx2) : \ |
| 141 | (isa == avx512_common ? prefix STRINGIFY(avx512_common) : \ |
| 142 | (isa == avx512_core ? prefix STRINGIFY(avx512_core) : \ |
| 143 | (isa == avx512_mic ? prefix STRINGIFY(avx512_mic) : \ |
| 144 | (isa == avx512_mic_4ops ? prefix STRINGIFY(avx512_mic_4ops) : \ |
| 145 | prefix suffix_if_any))))))) |
| 146 | |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | #endif |
| 152 | |