| 1 | /******************************************************************************* |
| 2 | * Copyright 2017-2018 Intel Corporation |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *******************************************************************************/ |
| 16 | |
| 17 | #include <assert.h> |
| 18 | |
| 19 | #include "cpu_barrier.hpp" |
| 20 | |
| 21 | namespace mkldnn { |
| 22 | namespace impl { |
| 23 | namespace cpu { |
| 24 | |
| 25 | namespace simple_barrier { |
| 26 | |
| 27 | void generate(jit_generator &code, Xbyak::Reg64 reg_ctx, |
| 28 | Xbyak::Reg64 reg_nthr) { |
| 29 | # define BAR_CTR_OFF offsetof(ctx_t, ctr) |
| 30 | # define BAR_SENSE_OFF offsetof(ctx_t, sense) |
| 31 | using namespace Xbyak; |
| 32 | |
| 33 | Xbyak::Reg64 reg_tmp = [&]() { |
| 34 | /* returns register which is neither reg_ctx nor reg_nthr */ |
| 35 | Xbyak::Reg64 regs[] = { util::rax, util::rbx, util::rcx }; |
| 36 | for (size_t i = 0; i < sizeof(regs) / sizeof(regs[0]); ++i) |
| 37 | if (!utils::one_of(regs[i], reg_ctx, reg_nthr)) |
| 38 | return regs[i]; |
| 39 | return regs[0]; /* should not happen */ |
| 40 | }(); |
| 41 | |
| 42 | Label barrier_exit_label, barrier_exit_restore_label, spin_label; |
| 43 | |
| 44 | code.cmp(reg_nthr, 1); |
| 45 | code.jbe(barrier_exit_label); |
| 46 | |
| 47 | code.push(reg_tmp); |
| 48 | |
| 49 | /* take and save current sense */ |
| 50 | code.mov(reg_tmp, code.ptr[reg_ctx + BAR_SENSE_OFF]); |
| 51 | code.push(reg_tmp); |
| 52 | code.mov(reg_tmp, 1); |
| 53 | |
| 54 | if (mayiuse(avx512_mic)) { |
| 55 | code.prefetchwt1(code.ptr[reg_ctx + BAR_CTR_OFF]); |
| 56 | code.prefetchwt1(code.ptr[reg_ctx + BAR_CTR_OFF]); |
| 57 | } |
| 58 | |
| 59 | code.lock(); code.xadd(code.ptr[reg_ctx + BAR_CTR_OFF], reg_tmp); |
| 60 | code.add(reg_tmp, 1); |
| 61 | code.cmp(reg_tmp, reg_nthr); |
| 62 | code.pop(reg_tmp); /* restore previous sense */ |
| 63 | code.jne(spin_label); |
| 64 | |
| 65 | /* the last thread {{{ */ |
| 66 | code.mov(code.qword[reg_ctx + BAR_CTR_OFF], 0); // reset ctx |
| 67 | |
| 68 | // notify waiting threads |
| 69 | code.not_(reg_tmp); |
| 70 | code.mov(code.ptr[reg_ctx + BAR_SENSE_OFF], reg_tmp); |
| 71 | code.jmp(barrier_exit_restore_label); |
| 72 | /* }}} the last thread */ |
| 73 | |
| 74 | code.CodeGenerator::L(spin_label); |
| 75 | code.pause(); |
| 76 | code.cmp(reg_tmp, code.ptr[reg_ctx + BAR_SENSE_OFF]); |
| 77 | code.je(spin_label); |
| 78 | |
| 79 | code.CodeGenerator::L(barrier_exit_restore_label); |
| 80 | code.pop(reg_tmp); |
| 81 | |
| 82 | code.CodeGenerator::L(barrier_exit_label); |
| 83 | # undef BAR_CTR_OFF |
| 84 | # undef BAR_SENSE_OFF |
| 85 | } |
| 86 | |
| 87 | /** jit barrier generator */ |
| 88 | struct jit_t: public jit_generator { |
| 89 | void (*barrier)(ctx_t *ctx, size_t nthr); |
| 90 | |
| 91 | jit_t() { |
| 92 | generate(*this, abi_param1, abi_param2); |
| 93 | ret(); |
| 94 | barrier = reinterpret_cast<decltype(barrier)>(const_cast<uint8_t*>( |
| 95 | this->getCode())); |
| 96 | } |
| 97 | |
| 98 | DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_t) |
| 99 | }; |
| 100 | |
| 101 | void barrier(ctx_t *ctx, int nthr) { |
| 102 | static jit_t j; /* XXX: constructed on load ... */ |
| 103 | j.barrier(ctx, nthr); |
| 104 | } |
| 105 | |
| 106 | } |
| 107 | |
| 108 | } |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | // vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s |
| 113 | |