1/*******************************************************************************
2* Copyright 2018 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
17#ifndef CPU_ISA_TRAITS_HPP
18#define CPU_ISA_TRAITS_HPP
19
20#include <type_traits>
21
22#define XBYAK64
23#define XBYAK_NO_OP_NAMES
24/* in order to make selinux happy memory that would be marked with X-bit should
25 * be obtained with mmap */
26#define XBYAK_USE_MMAP_ALLOCATOR
27#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
28/* turn off `size_t to other-type implicit casting` warning
29 * currently we have a lot of jit-generated instructions that
30 * take uint32_t, but we pass size_t (e.g. due to using sizeof).
31 * FIXME: replace size_t parameters with the appropriate ones */
32#pragma warning (disable: 4267)
33#endif
34#include "xbyak/xbyak.h"
35#include "xbyak/xbyak_util.h"
36
37namespace mkldnn {
38namespace impl {
39namespace cpu {
40
41typedef enum {
42 isa_any,
43 sse41,
44 sse42,
45 avx,
46 avx2,
47 avx512_common,
48 avx512_core,
49 avx512_core_vnni,
50 avx512_mic,
51 avx512_mic_4ops,
52} cpu_isa_t;
53
54template <cpu_isa_t> struct cpu_isa_traits {}; /* ::vlen -> 32 (for avx2) */
55
56template <> struct cpu_isa_traits<sse42> {
57 typedef Xbyak::Xmm Vmm;
58 static constexpr int vlen_shift = 4;
59 static constexpr int vlen = 16;
60 static constexpr int n_vregs = 16;
61};
62template <> struct cpu_isa_traits<avx> {
63 typedef Xbyak::Ymm Vmm;
64 static constexpr int vlen_shift = 5;
65 static constexpr int vlen = 32;
66 static constexpr int n_vregs = 16;
67};
68template <> struct cpu_isa_traits<avx2>:
69 public cpu_isa_traits<avx> {};
70
71template <> struct cpu_isa_traits<avx512_common> {
72 typedef Xbyak::Zmm Vmm;
73 static constexpr int vlen_shift = 6;
74 static constexpr int vlen = 64;
75 static constexpr int n_vregs = 32;
76};
77template <> struct cpu_isa_traits<avx512_core>:
78 public cpu_isa_traits<avx512_common> {};
79
80template <> struct cpu_isa_traits<avx512_mic>:
81 public cpu_isa_traits<avx512_common> {};
82
83template <> struct cpu_isa_traits<avx512_mic_4ops>:
84 public cpu_isa_traits<avx512_common> {};
85
86namespace {
87
88static Xbyak::util::Cpu cpu;
89static inline bool mayiuse(const cpu_isa_t cpu_isa) {
90 using namespace Xbyak::util;
91
92 switch (cpu_isa) {
93 case sse41:
94 case sse42:
95 // FIXME: SSE4.2 is actually NOT required
96 //return cpu.has(Cpu::tSSE42);
97 return cpu.has(Cpu::tSSE41);
98 case avx:
99 return cpu.has(Cpu::tAVX);
100 case avx2:
101 return cpu.has(Cpu::tAVX2);
102 case avx512_common:
103 return cpu.has(Cpu::tAVX512F);
104 case avx512_core:
105 return true
106 && cpu.has(Cpu::tAVX512F)
107 && cpu.has(Cpu::tAVX512BW)
108 && cpu.has(Cpu::tAVX512VL)
109 && cpu.has(Cpu::tAVX512DQ);
110 case avx512_core_vnni:
111 return true
112 && cpu.has(Cpu::tAVX512F)
113 && cpu.has(Cpu::tAVX512BW)
114 && cpu.has(Cpu::tAVX512VL)
115 && cpu.has(Cpu::tAVX512DQ)
116 && cpu.has(Cpu::tAVX512_VNNI);
117 case avx512_mic:
118 return true
119 && cpu.has(Cpu::tAVX512F)
120 && cpu.has(Cpu::tAVX512CD)
121 && cpu.has(Cpu::tAVX512ER)
122 && cpu.has(Cpu::tAVX512PF);
123 case avx512_mic_4ops:
124 return true
125 && mayiuse(avx512_mic)
126 && cpu.has(Cpu::tAVX512_4FMAPS)
127 && cpu.has(Cpu::tAVX512_4VNNIW);
128 case isa_any:
129 return true;
130 }
131 return false;
132}
133}
134
135/* whatever is required to generate string literals... */
136#include "z_magic.hpp"
137#define JIT_IMPL_NAME_HELPER(prefix, isa, suffix_if_any) \
138 (isa == sse42 ? prefix STRINGIFY(sse42) : \
139 (isa == avx ? prefix STRINGIFY(avx) : \
140 (isa == avx2 ? prefix STRINGIFY(avx2) : \
141 (isa == avx512_common ? prefix STRINGIFY(avx512_common) : \
142 (isa == avx512_core ? prefix STRINGIFY(avx512_core) : \
143 (isa == avx512_mic ? prefix STRINGIFY(avx512_mic) : \
144 (isa == avx512_mic_4ops ? prefix STRINGIFY(avx512_mic_4ops) : \
145 prefix suffix_if_any)))))))
146
147}
148}
149}
150
151#endif
152