| 1 | /******************************************************************************* |
| 2 | * Copyright 2017-2018 Intel Corporation |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *******************************************************************************/ |
| 16 | |
| 17 | #ifndef CPU_JIT_SSE42_CONVOLUTION_HPP |
| 18 | #define CPU_JIT_SSE42_CONVOLUTION_HPP |
| 19 | |
| 20 | #include "c_types_map.hpp" |
| 21 | #include "utils.hpp" |
| 22 | |
| 23 | #include "cpu_convolution_pd.hpp" |
| 24 | #include "cpu_primitive.hpp" |
| 25 | |
| 26 | #include "jit_primitive_conf.hpp" |
| 27 | #include "jit_sse42_conv_kernel_f32.hpp" |
| 28 | |
| 29 | namespace mkldnn { |
| 30 | namespace impl { |
| 31 | namespace cpu { |
| 32 | |
| 33 | struct jit_sse42_convolution_fwd_t: public cpu_primitive_t { |
| 34 | struct pd_t: public cpu_convolution_fwd_pd_t { |
| 35 | pd_t(engine_t *engine, |
| 36 | const convolution_desc_t *adesc, |
| 37 | const primitive_attr_t *attr, |
| 38 | const typename pd_t::base_class *hint_fwd_pd) |
| 39 | : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd) |
| 40 | , jcp_() {} |
| 41 | |
| 42 | DECLARE_COMMON_PD_T( |
| 43 | JIT_IMPL_NAME_HELPER("jit:" , sse42, "" ), |
| 44 | jit_sse42_convolution_fwd_t); |
| 45 | |
| 46 | status_t init() { |
| 47 | bool ok = true |
| 48 | && is_fwd() |
| 49 | && set_default_alg_kind(alg_kind::convolution_direct) |
| 50 | && expect_data_types(data_type::f32, data_type::f32, |
| 51 | data_type::f32, data_type::f32, data_type::f32) |
| 52 | && !has_zero_dim_memory() |
| 53 | && set_default_formats(); |
| 54 | if (!ok) return status::unimplemented; |
| 55 | |
| 56 | return jit_sse42_conv_fwd_kernel_f32::init_conf(jcp_, *desc(), |
| 57 | *src_md(), *weights_md(), *dst_md(), *attr()); |
| 58 | } |
| 59 | |
| 60 | jit_conv_conf_t jcp_; |
| 61 | |
| 62 | protected: |
| 63 | bool set_default_formats() { |
| 64 | using namespace format_tag; |
| 65 | |
| 66 | const bool flat = IC() == 3; |
| 67 | auto src_tag = flat |
| 68 | ? utils::pick(ndims() - 3, ncw, nchw, ncdhw) |
| 69 | : utils::pick(ndims() - 3, nCw8c, nChw8c, nCdhw8c); |
| 70 | auto dst_tag = |
| 71 | utils::pick(ndims() - 3, nCw8c, nChw8c, nCdhw8c); |
| 72 | auto wei_tag = with_groups() |
| 73 | ? utils::pick(2 * ndims() - 6 + flat, gOIw8i8o, gOwi8o, |
| 74 | gOIhw8i8o, gOhwi8o, gOIdhw8i8o, gOdhwi8o) |
| 75 | : utils::pick(2 * ndims() - 6 + flat, OIw8i8o, Owi8o, |
| 76 | OIhw8i8o, Ohwi8o, OIdhw8i8o, Odhwi8o); |
| 77 | |
| 78 | return set_default_formats_common(src_tag, wei_tag, dst_tag); |
| 79 | } |
| 80 | }; |
| 81 | |
| 82 | jit_sse42_convolution_fwd_t(const pd_t *apd): cpu_primitive_t(apd) |
| 83 | { kernel_ = new jit_sse42_conv_fwd_kernel_f32(pd()->jcp_, *pd()->attr()); } |
| 84 | ~jit_sse42_convolution_fwd_t() { delete kernel_; }; |
| 85 | |
| 86 | typedef typename prec_traits<data_type::f32>::type data_t; |
| 87 | |
| 88 | virtual status_t execute(const exec_ctx_t &ctx) const override { |
| 89 | execute_forward(ctx); |
| 90 | return status::success; |
| 91 | } |
| 92 | |
| 93 | private: |
| 94 | void execute_forward(const exec_ctx_t &ctx) const; |
| 95 | const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); } |
| 96 | jit_sse42_conv_fwd_kernel_f32 *kernel_; |
| 97 | }; |
| 98 | |
| 99 | } |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | #endif |
| 104 | |