1 | /******************************************************************************* |
2 | * Copyright 2018 Intel Corporation |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | *******************************************************************************/ |
16 | |
17 | #ifndef CPU_ISA_TRAITS_HPP |
18 | #define CPU_ISA_TRAITS_HPP |
19 | |
20 | #include <type_traits> |
21 | |
22 | #define XBYAK64 |
23 | #define XBYAK_NO_OP_NAMES |
24 | /* in order to make selinux happy memory that would be marked with X-bit should |
25 | * be obtained with mmap */ |
26 | #define XBYAK_USE_MMAP_ALLOCATOR |
27 | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) |
28 | /* turn off `size_t to other-type implicit casting` warning |
29 | * currently we have a lot of jit-generated instructions that |
30 | * take uint32_t, but we pass size_t (e.g. due to using sizeof). |
31 | * FIXME: replace size_t parameters with the appropriate ones */ |
32 | #pragma warning (disable: 4267) |
33 | #endif |
34 | #include "xbyak/xbyak.h" |
35 | #include "xbyak/xbyak_util.h" |
36 | |
37 | namespace mkldnn { |
38 | namespace impl { |
39 | namespace cpu { |
40 | |
41 | typedef enum { |
42 | isa_any, |
43 | sse41, |
44 | sse42, |
45 | avx, |
46 | avx2, |
47 | avx512_common, |
48 | avx512_core, |
49 | avx512_core_vnni, |
50 | avx512_mic, |
51 | avx512_mic_4ops, |
52 | } cpu_isa_t; |
53 | |
54 | template <cpu_isa_t> struct cpu_isa_traits {}; /* ::vlen -> 32 (for avx2) */ |
55 | |
56 | template <> struct cpu_isa_traits<sse42> { |
57 | typedef Xbyak::Xmm Vmm; |
58 | static constexpr int vlen_shift = 4; |
59 | static constexpr int vlen = 16; |
60 | static constexpr int n_vregs = 16; |
61 | }; |
62 | template <> struct cpu_isa_traits<avx> { |
63 | typedef Xbyak::Ymm Vmm; |
64 | static constexpr int vlen_shift = 5; |
65 | static constexpr int vlen = 32; |
66 | static constexpr int n_vregs = 16; |
67 | }; |
68 | template <> struct cpu_isa_traits<avx2>: |
69 | public cpu_isa_traits<avx> {}; |
70 | |
71 | template <> struct cpu_isa_traits<avx512_common> { |
72 | typedef Xbyak::Zmm Vmm; |
73 | static constexpr int vlen_shift = 6; |
74 | static constexpr int vlen = 64; |
75 | static constexpr int n_vregs = 32; |
76 | }; |
77 | template <> struct cpu_isa_traits<avx512_core>: |
78 | public cpu_isa_traits<avx512_common> {}; |
79 | |
80 | template <> struct cpu_isa_traits<avx512_mic>: |
81 | public cpu_isa_traits<avx512_common> {}; |
82 | |
83 | template <> struct cpu_isa_traits<avx512_mic_4ops>: |
84 | public cpu_isa_traits<avx512_common> {}; |
85 | |
86 | namespace { |
87 | |
88 | static Xbyak::util::Cpu cpu; |
89 | static inline bool mayiuse(const cpu_isa_t cpu_isa) { |
90 | using namespace Xbyak::util; |
91 | |
92 | switch (cpu_isa) { |
93 | case sse41: |
94 | case sse42: |
95 | // FIXME: SSE4.2 is actually NOT required |
96 | //return cpu.has(Cpu::tSSE42); |
97 | return cpu.has(Cpu::tSSE41); |
98 | case avx: |
99 | return cpu.has(Cpu::tAVX); |
100 | case avx2: |
101 | return cpu.has(Cpu::tAVX2); |
102 | case avx512_common: |
103 | return cpu.has(Cpu::tAVX512F); |
104 | case avx512_core: |
105 | return true |
106 | && cpu.has(Cpu::tAVX512F) |
107 | && cpu.has(Cpu::tAVX512BW) |
108 | && cpu.has(Cpu::tAVX512VL) |
109 | && cpu.has(Cpu::tAVX512DQ); |
110 | case avx512_core_vnni: |
111 | return true |
112 | && cpu.has(Cpu::tAVX512F) |
113 | && cpu.has(Cpu::tAVX512BW) |
114 | && cpu.has(Cpu::tAVX512VL) |
115 | && cpu.has(Cpu::tAVX512DQ) |
116 | && cpu.has(Cpu::tAVX512_VNNI); |
117 | case avx512_mic: |
118 | return true |
119 | && cpu.has(Cpu::tAVX512F) |
120 | && cpu.has(Cpu::tAVX512CD) |
121 | && cpu.has(Cpu::tAVX512ER) |
122 | && cpu.has(Cpu::tAVX512PF); |
123 | case avx512_mic_4ops: |
124 | return true |
125 | && mayiuse(avx512_mic) |
126 | && cpu.has(Cpu::tAVX512_4FMAPS) |
127 | && cpu.has(Cpu::tAVX512_4VNNIW); |
128 | case isa_any: |
129 | return true; |
130 | } |
131 | return false; |
132 | } |
133 | } |
134 | |
135 | /* whatever is required to generate string literals... */ |
136 | #include "z_magic.hpp" |
137 | #define JIT_IMPL_NAME_HELPER(prefix, isa, suffix_if_any) \ |
138 | (isa == sse42 ? prefix STRINGIFY(sse42) : \ |
139 | (isa == avx ? prefix STRINGIFY(avx) : \ |
140 | (isa == avx2 ? prefix STRINGIFY(avx2) : \ |
141 | (isa == avx512_common ? prefix STRINGIFY(avx512_common) : \ |
142 | (isa == avx512_core ? prefix STRINGIFY(avx512_core) : \ |
143 | (isa == avx512_mic ? prefix STRINGIFY(avx512_mic) : \ |
144 | (isa == avx512_mic_4ops ? prefix STRINGIFY(avx512_mic_4ops) : \ |
145 | prefix suffix_if_any))))))) |
146 | |
147 | } |
148 | } |
149 | } |
150 | |
151 | #endif |
152 | |