| 1 | /* |
| 2 | * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef OS_LINUX_OS_LINUX_HPP |
| 26 | #define OS_LINUX_OS_LINUX_HPP |
| 27 | |
| 28 | // Linux_OS defines the interface to Linux operating systems |
| 29 | |
| 30 | // Information about the protection of the page at address '0' on this os. |
| 31 | static bool zero_page_read_protected() { return true; } |
| 32 | |
| 33 | class Linux { |
| 34 | friend class os; |
| 35 | friend class OSContainer; |
| 36 | friend class TestReserveMemorySpecial; |
| 37 | |
| 38 | static bool libjsig_is_loaded; // libjsig that interposes sigaction(), |
| 39 | // __sigaction(), signal() is loaded |
| 40 | static struct sigaction *(*get_signal_action)(int); |
| 41 | |
| 42 | static void check_signal_handler(int sig); |
| 43 | |
| 44 | static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *); |
| 45 | static int (*_pthread_setname_np)(pthread_t, const char*); |
| 46 | |
| 47 | static address _initial_thread_stack_bottom; |
| 48 | static uintptr_t _initial_thread_stack_size; |
| 49 | |
| 50 | static const char *_glibc_version; |
| 51 | static const char *_libpthread_version; |
| 52 | |
| 53 | static bool _supports_fast_thread_cpu_time; |
| 54 | |
| 55 | static GrowableArray<int>* _cpu_to_node; |
| 56 | static GrowableArray<int>* _nindex_to_node; |
| 57 | |
| 58 | // 0x00000000 = uninitialized, |
| 59 | // 0x01000000 = kernel version unknown, |
| 60 | // otherwise a 32-bit number: |
| 61 | // Ox00AABBCC |
| 62 | // AA, Major Version |
| 63 | // BB, Minor Version |
| 64 | // CC, Fix Version |
| 65 | static uint32_t _os_version; |
| 66 | |
| 67 | protected: |
| 68 | |
| 69 | static julong _physical_memory; |
| 70 | static pthread_t _main_thread; |
| 71 | static Mutex* _createThread_lock; |
| 72 | static int _page_size; |
| 73 | |
| 74 | static julong available_memory(); |
| 75 | static julong physical_memory() { return _physical_memory; } |
| 76 | static void set_physical_memory(julong phys_mem) { _physical_memory = phys_mem; } |
| 77 | static int active_processor_count(); |
| 78 | |
| 79 | static void initialize_system_info(); |
| 80 | |
| 81 | static int commit_memory_impl(char* addr, size_t bytes, bool exec); |
| 82 | static int commit_memory_impl(char* addr, size_t bytes, |
| 83 | size_t alignment_hint, bool exec); |
| 84 | |
| 85 | static void set_glibc_version(const char *s) { _glibc_version = s; } |
| 86 | static void set_libpthread_version(const char *s) { _libpthread_version = s; } |
| 87 | |
| 88 | static void rebuild_cpu_to_node_map(); |
| 89 | static void rebuild_nindex_to_node_map(); |
| 90 | static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; } |
| 91 | static GrowableArray<int>* nindex_to_node() { return _nindex_to_node; } |
| 92 | |
| 93 | static size_t find_large_page_size(); |
| 94 | static size_t setup_large_page_size(); |
| 95 | |
| 96 | static bool setup_large_page_type(size_t page_size); |
| 97 | static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size); |
| 98 | static bool hugetlbfs_sanity_check(bool warn, size_t page_size); |
| 99 | |
| 100 | static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec); |
| 101 | static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec); |
| 102 | static char* reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec); |
| 103 | static char* reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec); |
| 104 | |
| 105 | static bool release_memory_special_impl(char* base, size_t bytes); |
| 106 | static bool release_memory_special_shm(char* base, size_t bytes); |
| 107 | static bool release_memory_special_huge_tlbfs(char* base, size_t bytes); |
| 108 | |
| 109 | static void print_full_memory_info(outputStream* st); |
| 110 | static void print_container_info(outputStream* st); |
| 111 | static void print_steal_info(outputStream* st); |
| 112 | static void print_distro_info(outputStream* st); |
| 113 | static void print_libversion_info(outputStream* st); |
| 114 | static void print_proc_sys_info(outputStream* st); |
| 115 | static void print_ld_preload_file(outputStream* st); |
| 116 | |
| 117 | public: |
| 118 | struct CPUPerfTicks { |
| 119 | uint64_t used; |
| 120 | uint64_t usedKernel; |
| 121 | uint64_t total; |
| 122 | uint64_t steal; |
| 123 | bool has_steal_ticks; |
| 124 | }; |
| 125 | |
| 126 | // which_logical_cpu=-1 returns accumulated ticks for all cpus. |
| 127 | static bool get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu); |
| 128 | static bool _stack_is_executable; |
| 129 | static void *dlopen_helper(const char *name, char *ebuf, int ebuflen); |
| 130 | static void *dll_load_in_vmthread(const char *name, char *ebuf, int ebuflen); |
| 131 | |
| 132 | static void init_thread_fpu_state(); |
| 133 | static int get_fpu_control_word(); |
| 134 | static void set_fpu_control_word(int fpu_control); |
| 135 | static pthread_t main_thread(void) { return _main_thread; } |
| 136 | // returns kernel thread id (similar to LWP id on Solaris), which can be |
| 137 | // used to access /proc |
| 138 | static pid_t gettid(); |
| 139 | static void set_createThread_lock(Mutex* lk) { _createThread_lock = lk; } |
| 140 | static Mutex* createThread_lock(void) { return _createThread_lock; } |
| 141 | static void hotspot_sigmask(Thread* thread); |
| 142 | |
| 143 | static address initial_thread_stack_bottom(void) { return _initial_thread_stack_bottom; } |
| 144 | static uintptr_t initial_thread_stack_size(void) { return _initial_thread_stack_size; } |
| 145 | |
| 146 | static int page_size(void) { return _page_size; } |
| 147 | static void set_page_size(int val) { _page_size = val; } |
| 148 | |
| 149 | static address ucontext_get_pc(const ucontext_t* uc); |
| 150 | static void ucontext_set_pc(ucontext_t* uc, address pc); |
| 151 | static intptr_t* ucontext_get_sp(const ucontext_t* uc); |
| 152 | static intptr_t* ucontext_get_fp(const ucontext_t* uc); |
| 153 | |
| 154 | // For Analyzer Forte AsyncGetCallTrace profiling support: |
| 155 | // |
| 156 | // This interface should be declared in os_linux_i486.hpp, but |
| 157 | // that file provides extensions to the os class and not the |
| 158 | // Linux class. |
| 159 | static ExtendedPC fetch_frame_from_ucontext(Thread* thread, const ucontext_t* uc, |
| 160 | intptr_t** ret_sp, intptr_t** ret_fp); |
| 161 | |
| 162 | static bool get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr); |
| 163 | |
| 164 | // This boolean allows users to forward their own non-matching signals |
| 165 | // to JVM_handle_linux_signal, harmlessly. |
| 166 | static bool signal_handlers_are_installed; |
| 167 | |
| 168 | static int get_our_sigflags(int); |
| 169 | static void set_our_sigflags(int, int); |
| 170 | static void signal_sets_init(); |
| 171 | static void install_signal_handlers(); |
| 172 | static void set_signal_handler(int, bool); |
| 173 | |
| 174 | static sigset_t* unblocked_signals(); |
| 175 | static sigset_t* vm_signals(); |
| 176 | |
| 177 | // For signal-chaining |
| 178 | static struct sigaction *get_chained_signal_action(int sig); |
| 179 | static bool chained_handler(int sig, siginfo_t* siginfo, void* context); |
| 180 | |
| 181 | // GNU libc and libpthread version strings |
| 182 | static const char *glibc_version() { return _glibc_version; } |
| 183 | static const char *libpthread_version() { return _libpthread_version; } |
| 184 | |
| 185 | static void libpthread_init(); |
| 186 | static void sched_getcpu_init(); |
| 187 | static bool libnuma_init(); |
| 188 | static void* libnuma_dlsym(void* handle, const char* name); |
| 189 | // libnuma v2 (libnuma_1.2) symbols |
| 190 | static void* libnuma_v2_dlsym(void* handle, const char* name); |
| 191 | |
| 192 | // Return default guard size for the specified thread type |
| 193 | static size_t default_guard_size(os::ThreadType thr_type); |
| 194 | |
| 195 | static void capture_initial_stack(size_t max_size); |
| 196 | |
| 197 | // Stack overflow handling |
| 198 | static bool manually_expand_stack(JavaThread * t, address addr); |
| 199 | static int max_register_window_saves_before_flushing(); |
| 200 | |
| 201 | // fast POSIX clocks support |
| 202 | static void fast_thread_clock_init(void); |
| 203 | |
| 204 | static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) { |
| 205 | return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1; |
| 206 | } |
| 207 | |
| 208 | static bool supports_fast_thread_cpu_time() { |
| 209 | return _supports_fast_thread_cpu_time; |
| 210 | } |
| 211 | |
| 212 | static jlong fast_thread_cpu_time(clockid_t clockid); |
| 213 | |
| 214 | static void initialize_os_info(); |
| 215 | static bool os_version_is_known(); |
| 216 | static uint32_t os_version(); |
| 217 | |
| 218 | // Stack repair handling |
| 219 | |
| 220 | // none present |
| 221 | |
| 222 | private: |
| 223 | static void numa_init(); |
| 224 | static void expand_stack_to(address bottom); |
| 225 | |
| 226 | typedef int (*sched_getcpu_func_t)(void); |
| 227 | typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen); |
| 228 | typedef int (*numa_max_node_func_t)(void); |
| 229 | typedef int (*numa_num_configured_nodes_func_t)(void); |
| 230 | typedef int (*numa_available_func_t)(void); |
| 231 | typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node); |
| 232 | typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask); |
| 233 | typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask); |
| 234 | typedef struct bitmask* (*numa_get_membind_func_t)(void); |
| 235 | typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void); |
| 236 | |
| 237 | typedef void (*numa_set_bind_policy_func_t)(int policy); |
| 238 | typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n); |
| 239 | typedef int (*numa_distance_func_t)(int node1, int node2); |
| 240 | |
| 241 | static sched_getcpu_func_t _sched_getcpu; |
| 242 | static numa_node_to_cpus_func_t _numa_node_to_cpus; |
| 243 | static numa_max_node_func_t _numa_max_node; |
| 244 | static numa_num_configured_nodes_func_t _numa_num_configured_nodes; |
| 245 | static numa_available_func_t _numa_available; |
| 246 | static numa_tonode_memory_func_t _numa_tonode_memory; |
| 247 | static numa_interleave_memory_func_t _numa_interleave_memory; |
| 248 | static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2; |
| 249 | static numa_set_bind_policy_func_t _numa_set_bind_policy; |
| 250 | static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset; |
| 251 | static numa_distance_func_t _numa_distance; |
| 252 | static numa_get_membind_func_t _numa_get_membind; |
| 253 | static numa_get_interleave_mask_func_t _numa_get_interleave_mask; |
| 254 | static unsigned long* _numa_all_nodes; |
| 255 | static struct bitmask* _numa_all_nodes_ptr; |
| 256 | static struct bitmask* _numa_nodes_ptr; |
| 257 | static struct bitmask* _numa_interleave_bitmask; |
| 258 | static struct bitmask* _numa_membind_bitmask; |
| 259 | |
| 260 | static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; } |
| 261 | static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; } |
| 262 | static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; } |
| 263 | static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; } |
| 264 | static void set_numa_available(numa_available_func_t func) { _numa_available = func; } |
| 265 | static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; } |
| 266 | static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; } |
| 267 | static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; } |
| 268 | static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; } |
| 269 | static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; } |
| 270 | static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; } |
| 271 | static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; } |
| 272 | static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; } |
| 273 | static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; } |
| 274 | static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); } |
| 275 | static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); } |
| 276 | static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; } |
| 277 | static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; } |
| 278 | static int sched_getcpu_syscall(void); |
| 279 | |
| 280 | enum NumaAllocationPolicy{ |
| 281 | NotInitialized, |
| 282 | Membind, |
| 283 | Interleave |
| 284 | }; |
| 285 | static NumaAllocationPolicy _current_numa_policy; |
| 286 | |
| 287 | public: |
| 288 | static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; } |
| 289 | static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) { |
| 290 | return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1; |
| 291 | } |
| 292 | static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; } |
| 293 | static int numa_num_configured_nodes() { |
| 294 | return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1; |
| 295 | } |
| 296 | static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; } |
| 297 | static int numa_tonode_memory(void *start, size_t size, int node) { |
| 298 | return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1; |
| 299 | } |
| 300 | |
| 301 | static bool is_running_in_interleave_mode() { |
| 302 | return _current_numa_policy == Interleave; |
| 303 | } |
| 304 | |
| 305 | static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) { |
| 306 | _current_numa_policy = numa_policy; |
| 307 | } |
| 308 | |
| 309 | static NumaAllocationPolicy identify_numa_policy() { |
| 310 | for (int node = 0; node <= Linux::numa_max_node(); node++) { |
| 311 | if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) { |
| 312 | return Interleave; |
| 313 | } |
| 314 | } |
| 315 | return Membind; |
| 316 | } |
| 317 | |
| 318 | static void numa_interleave_memory(void *start, size_t size) { |
| 319 | // Prefer v2 API |
| 320 | if (_numa_interleave_memory_v2 != NULL) { |
| 321 | if (is_running_in_interleave_mode()) { |
| 322 | _numa_interleave_memory_v2(start, size, _numa_interleave_bitmask); |
| 323 | } else if (_numa_membind_bitmask != NULL) { |
| 324 | _numa_interleave_memory_v2(start, size, _numa_membind_bitmask); |
| 325 | } |
| 326 | } else if (_numa_interleave_memory != NULL) { |
| 327 | _numa_interleave_memory(start, size, _numa_all_nodes); |
| 328 | } |
| 329 | } |
| 330 | static void numa_set_bind_policy(int policy) { |
| 331 | if (_numa_set_bind_policy != NULL) { |
| 332 | _numa_set_bind_policy(policy); |
| 333 | } |
| 334 | } |
| 335 | static int numa_distance(int node1, int node2) { |
| 336 | return _numa_distance != NULL ? _numa_distance(node1, node2) : -1; |
| 337 | } |
| 338 | static int get_node_by_cpu(int cpu_id); |
| 339 | static int get_existing_num_nodes(); |
| 340 | // Check if numa node is configured (non-zero memory node). |
| 341 | static bool is_node_in_configured_nodes(unsigned int n) { |
| 342 | if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) { |
| 343 | return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n); |
| 344 | } else |
| 345 | return false; |
| 346 | } |
| 347 | // Check if numa node exists in the system (including zero memory nodes). |
| 348 | static bool is_node_in_existing_nodes(unsigned int n) { |
| 349 | if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) { |
| 350 | return _numa_bitmask_isbitset(_numa_nodes_ptr, n); |
| 351 | } else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) { |
| 352 | // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible |
| 353 | // to trust the API version for checking its absence. On the other hand, |
| 354 | // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get |
| 355 | // a complete view of all numa nodes in the system, hence numa_nodes_ptr |
| 356 | // is used to handle CPU and nodes on architectures (like PowerPC) where |
| 357 | // there can exist nodes with CPUs but no memory or vice-versa and the |
| 358 | // nodes may be non-contiguous. For most of the architectures, like |
| 359 | // x86_64, numa_node_ptr presents the same node set as found in |
| 360 | // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a |
| 361 | // substitute. |
| 362 | return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n); |
| 363 | } else |
| 364 | return false; |
| 365 | } |
| 366 | // Check if node is in bound node set. |
| 367 | static bool is_node_in_bound_nodes(int node) { |
| 368 | if (_numa_bitmask_isbitset != NULL) { |
| 369 | if (is_running_in_interleave_mode()) { |
| 370 | return _numa_bitmask_isbitset(_numa_interleave_bitmask, node); |
| 371 | } else { |
| 372 | return _numa_membind_bitmask != NULL ? _numa_bitmask_isbitset(_numa_membind_bitmask, node) : false; |
| 373 | } |
| 374 | } |
| 375 | return false; |
| 376 | } |
| 377 | // Check if bound to only one numa node. |
| 378 | // Returns true if bound to a single numa node, otherwise returns false. |
| 379 | static bool is_bound_to_single_node() { |
| 380 | int nodes = 0; |
| 381 | struct bitmask* bmp = NULL; |
| 382 | unsigned int node = 0; |
| 383 | unsigned int highest_node_number = 0; |
| 384 | |
| 385 | if (_numa_get_membind != NULL && _numa_max_node != NULL && _numa_bitmask_isbitset != NULL) { |
| 386 | bmp = _numa_get_membind(); |
| 387 | highest_node_number = _numa_max_node(); |
| 388 | } else { |
| 389 | return false; |
| 390 | } |
| 391 | |
| 392 | for (node = 0; node <= highest_node_number; node++) { |
| 393 | if (_numa_bitmask_isbitset(bmp, node)) { |
| 394 | nodes++; |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | if (nodes == 1) { |
| 399 | return true; |
| 400 | } else { |
| 401 | return false; |
| 402 | } |
| 403 | } |
| 404 | }; |
| 405 | |
| 406 | #endif // OS_LINUX_OS_LINUX_HPP |
| 407 | |