| 1 | /******************************************************************************* |
| 2 | * Copyright 2017-2018 Intel Corporation |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | *******************************************************************************/ |
| 16 | |
| 17 | #include "mkldnn_thread.hpp" |
| 18 | #include "utils.hpp" |
| 19 | |
| 20 | #include "scratchpad.hpp" |
| 21 | |
| 22 | namespace mkldnn { |
| 23 | namespace impl { |
| 24 | |
| 25 | /* Allocating memory buffers on a page boundary to reduce TLB/page misses */ |
| 26 | const size_t page_size = 2097152; |
| 27 | |
| 28 | /* |
| 29 | Implementation of the scratchpad_t interface that is compatible with |
| 30 | a concurrent execution |
| 31 | */ |
| 32 | struct concurent_scratchpad_t : public scratchpad_t { |
| 33 | concurent_scratchpad_t(size_t size) { |
| 34 | size_ = size; |
| 35 | scratchpad_ = (char *) malloc(size, page_size); |
| 36 | assert(scratchpad_ != nullptr); |
| 37 | } |
| 38 | |
| 39 | ~concurent_scratchpad_t() { |
| 40 | free(scratchpad_); |
| 41 | } |
| 42 | |
| 43 | virtual char *get() const { |
| 44 | return scratchpad_; |
| 45 | } |
| 46 | |
| 47 | private: |
| 48 | char *scratchpad_; |
| 49 | size_t size_; |
| 50 | }; |
| 51 | |
| 52 | /* |
| 53 | Implementation of the scratchpad_t interface that uses a global |
| 54 | scratchpad |
| 55 | */ |
| 56 | |
| 57 | struct global_scratchpad_t : public scratchpad_t { |
| 58 | global_scratchpad_t(size_t size) { |
| 59 | if (size > size_) { |
| 60 | if (scratchpad_ != nullptr) free(scratchpad_); |
| 61 | size_ = size; |
| 62 | scratchpad_ = (char *) malloc(size, page_size); |
| 63 | assert(scratchpad_ != nullptr); |
| 64 | } |
| 65 | reference_count_++; |
| 66 | } |
| 67 | |
| 68 | ~global_scratchpad_t() { |
| 69 | reference_count_--; |
| 70 | if (reference_count_ == 0) { |
| 71 | free(scratchpad_); |
| 72 | scratchpad_ = nullptr; |
| 73 | size_ = 0; |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | virtual char *get() const { |
| 78 | return scratchpad_; |
| 79 | } |
| 80 | |
| 81 | private: |
| 82 | /* |
| 83 | Using thread-local here is unnecessary and even buggy! All threads |
| 84 | actually share the same scratchpad, which is created and queried only |
| 85 | on the main thread. If the scratchpad is queried on some thread other |
| 86 | than the one it was created on (e.g. the application calls the API from |
| 87 | multiple threads), thread-local causes a segfault because the scratchpad |
| 88 | is uninitialized on the current thread. |
| 89 | */ |
| 90 | /*thread_local*/ static char *scratchpad_; |
| 91 | /*thread_local*/ static size_t size_; |
| 92 | /*thread_local*/ static unsigned int reference_count_; |
| 93 | }; |
| 94 | |
| 95 | /*thread_local*/ char *global_scratchpad_t::scratchpad_ = nullptr; |
| 96 | /*thread_local*/ size_t global_scratchpad_t::size_ = 0; |
| 97 | /*thread_local*/ unsigned int global_scratchpad_t::reference_count_ = 0; |
| 98 | |
| 99 | |
| 100 | /* |
| 101 | Scratchpad creation routine |
| 102 | */ |
| 103 | scratchpad_t *create_scratchpad(size_t size) { |
| 104 | #ifndef MKLDNN_ENABLE_CONCURRENT_EXEC |
| 105 | return new global_scratchpad_t(size); |
| 106 | #else |
| 107 | return new concurent_scratchpad_t(size); |
| 108 | #endif |
| 109 | } |
| 110 | |
| 111 | } |
| 112 | } |
| 113 | |