| 1 | /* |
| 2 | * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #include "precompiled.hpp" |
| 26 | #include "jfr/jfrEvents.hpp" |
| 27 | #include "jfr/jni/jfrJavaSupport.hpp" |
| 28 | #include "jfr/leakprofiler/leakProfiler.hpp" |
| 29 | #include "jfr/recorder/repository/jfrEmergencyDump.hpp" |
| 30 | #include "jfr/recorder/service/jfrPostBox.hpp" |
| 31 | #include "jfr/recorder/service/jfrRecorderService.hpp" |
| 32 | #include "jfr/utilities/jfrTypes.hpp" |
| 33 | #include "logging/log.hpp" |
| 34 | #include "memory/resourceArea.hpp" |
| 35 | #include "runtime/atomic.hpp" |
| 36 | #include "runtime/handles.inline.hpp" |
| 37 | #include "runtime/globals.hpp" |
| 38 | #include "runtime/mutexLocker.hpp" |
| 39 | #include "runtime/os.hpp" |
| 40 | #include "runtime/thread.inline.hpp" |
| 41 | #include "utilities/growableArray.hpp" |
| 42 | |
| 43 | static const char vm_error_filename_fmt[] = "hs_err_pid%p.jfr" ; |
| 44 | static const char vm_oom_filename_fmt[] = "hs_oom_pid%p.jfr" ; |
| 45 | static const char vm_soe_filename_fmt[] = "hs_soe_pid%p.jfr" ; |
| 46 | static const char chunk_file_jfr_ext[] = ".jfr" ; |
| 47 | static const size_t iso8601_len = 19; // "YYYY-MM-DDTHH:MM:SS" |
| 48 | |
| 49 | static fio_fd open_exclusivly(const char* path) { |
| 50 | return os::open(path, O_CREAT | O_RDWR, S_IREAD | S_IWRITE); |
| 51 | } |
| 52 | |
| 53 | static int file_sort(const char** const file1, const char** file2) { |
| 54 | assert(NULL != *file1 && NULL != *file2, "invariant" ); |
| 55 | int cmp = strncmp(*file1, *file2, iso8601_len); |
| 56 | if (0 == cmp) { |
| 57 | const char* const dot1 = strchr(*file1, '.'); |
| 58 | assert(NULL != dot1, "invariant" ); |
| 59 | const char* const dot2 = strchr(*file2, '.'); |
| 60 | assert(NULL != dot2, "invariant" ); |
| 61 | ptrdiff_t file1_len = dot1 - *file1; |
| 62 | ptrdiff_t file2_len = dot2 - *file2; |
| 63 | if (file1_len < file2_len) { |
| 64 | return -1; |
| 65 | } |
| 66 | if (file1_len > file2_len) { |
| 67 | return 1; |
| 68 | } |
| 69 | assert(file1_len == file2_len, "invariant" ); |
| 70 | cmp = strncmp(*file1, *file2, file1_len); |
| 71 | } |
| 72 | assert(cmp != 0, "invariant" ); |
| 73 | return cmp; |
| 74 | } |
| 75 | |
| 76 | static void iso8601_to_date_time(char* iso8601_str) { |
| 77 | assert(iso8601_str != NULL, "invariant" ); |
| 78 | assert(strlen(iso8601_str) == iso8601_len, "invariant" ); |
| 79 | // "YYYY-MM-DDTHH:MM:SS" |
| 80 | for (size_t i = 0; i < iso8601_len; ++i) { |
| 81 | switch (iso8601_str[i]) { |
| 82 | case 'T': |
| 83 | case '-': |
| 84 | case ':': |
| 85 | iso8601_str[i] = '_'; |
| 86 | break; |
| 87 | } |
| 88 | } |
| 89 | // "YYYY_MM_DD_HH_MM_SS" |
| 90 | } |
| 91 | |
| 92 | static void date_time(char* buffer, size_t buffer_len) { |
| 93 | assert(buffer != NULL, "invariant" ); |
| 94 | assert(buffer_len >= iso8601_len, "buffer too small" ); |
| 95 | os::iso8601_time(buffer, buffer_len); |
| 96 | assert(strlen(buffer) >= iso8601_len + 1, "invariant" ); |
| 97 | // "YYYY-MM-DDTHH:MM:SS" |
| 98 | buffer[iso8601_len] = '\0'; |
| 99 | iso8601_to_date_time(buffer); |
| 100 | } |
| 101 | |
| 102 | static int64_t file_size(fio_fd fd) { |
| 103 | assert(fd != invalid_fd, "invariant" ); |
| 104 | const int64_t current_offset = os::current_file_offset(fd); |
| 105 | const int64_t size = os::lseek(fd, 0, SEEK_END); |
| 106 | os::seek_to_file_offset(fd, current_offset); |
| 107 | return size; |
| 108 | } |
| 109 | |
| 110 | class RepositoryIterator : public StackObj { |
| 111 | private: |
| 112 | const char* const _repo; |
| 113 | const size_t _repository_len; |
| 114 | GrowableArray<const char*>* _files; |
| 115 | const char* const fully_qualified(const char* entry) const; |
| 116 | mutable int _iterator; |
| 117 | |
| 118 | public: |
| 119 | RepositoryIterator(const char* repository, size_t repository_len); |
| 120 | ~RepositoryIterator() {} |
| 121 | const char* const filter(const char* entry) const; |
| 122 | bool has_next() const; |
| 123 | const char* const next() const; |
| 124 | }; |
| 125 | |
| 126 | const char* const RepositoryIterator::fully_qualified(const char* entry) const { |
| 127 | assert(NULL != entry, "invariant" ); |
| 128 | char* file_path_entry = NULL; |
| 129 | // only use files that have content, not placeholders |
| 130 | const char* const file_separator = os::file_separator(); |
| 131 | if (NULL != file_separator) { |
| 132 | const size_t entry_len = strlen(entry); |
| 133 | const size_t file_separator_length = strlen(file_separator); |
| 134 | const size_t file_path_entry_length = _repository_len + file_separator_length + entry_len; |
| 135 | file_path_entry = NEW_RESOURCE_ARRAY_RETURN_NULL(char, file_path_entry_length + 1); |
| 136 | if (NULL == file_path_entry) { |
| 137 | return NULL; |
| 138 | } |
| 139 | int position = 0; |
| 140 | position += jio_snprintf(&file_path_entry[position], _repository_len + 1, "%s" , _repo); |
| 141 | position += jio_snprintf(&file_path_entry[position], file_separator_length + 1, "%s" , os::file_separator()); |
| 142 | position += jio_snprintf(&file_path_entry[position], entry_len + 1, "%s" , entry); |
| 143 | file_path_entry[position] = '\0'; |
| 144 | assert((size_t)position == file_path_entry_length, "invariant" ); |
| 145 | assert(strlen(file_path_entry) == (size_t)position, "invariant" ); |
| 146 | } |
| 147 | return file_path_entry; |
| 148 | } |
| 149 | |
| 150 | const char* const RepositoryIterator::filter(const char* entry) const { |
| 151 | if (entry == NULL) { |
| 152 | return NULL; |
| 153 | } |
| 154 | const size_t entry_len = strlen(entry); |
| 155 | if (entry_len <= 2) { |
| 156 | // for "." and ".." |
| 157 | return NULL; |
| 158 | } |
| 159 | char* entry_name = NEW_RESOURCE_ARRAY_RETURN_NULL(char, entry_len + 1); |
| 160 | if (entry_name == NULL) { |
| 161 | return NULL; |
| 162 | } |
| 163 | strncpy(entry_name, entry, entry_len + 1); |
| 164 | const char* const fully_qualified_path_entry = fully_qualified(entry_name); |
| 165 | if (NULL == fully_qualified_path_entry) { |
| 166 | return NULL; |
| 167 | } |
| 168 | const fio_fd entry_fd = open_exclusivly(fully_qualified_path_entry); |
| 169 | if (invalid_fd == entry_fd) { |
| 170 | return NULL; |
| 171 | } |
| 172 | const int64_t entry_size = file_size(entry_fd); |
| 173 | os::close(entry_fd); |
| 174 | if (0 == entry_size) { |
| 175 | return NULL; |
| 176 | } |
| 177 | return entry_name; |
| 178 | } |
| 179 | |
| 180 | RepositoryIterator::RepositoryIterator(const char* repository, size_t repository_len) : |
| 181 | _repo(repository), |
| 182 | _repository_len(repository_len), |
| 183 | _files(NULL), |
| 184 | _iterator(0) { |
| 185 | if (NULL != _repo) { |
| 186 | assert(strlen(_repo) == _repository_len, "invariant" ); |
| 187 | _files = new GrowableArray<const char*>(10); |
| 188 | DIR* dirp = os::opendir(_repo); |
| 189 | if (dirp == NULL) { |
| 190 | log_error(jfr, system)("Unable to open repository %s" , _repo); |
| 191 | return; |
| 192 | } |
| 193 | struct dirent* dentry; |
| 194 | while ((dentry = os::readdir(dirp)) != NULL) { |
| 195 | const char* const entry_path = filter(dentry->d_name); |
| 196 | if (NULL != entry_path) { |
| 197 | _files->append(entry_path); |
| 198 | } |
| 199 | } |
| 200 | os::closedir(dirp); |
| 201 | if (_files->length() > 1) { |
| 202 | _files->sort(file_sort); |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | bool RepositoryIterator::has_next() const { |
| 208 | return (_files != NULL && _iterator < _files->length()); |
| 209 | } |
| 210 | |
| 211 | const char* const RepositoryIterator::next() const { |
| 212 | return _iterator >= _files->length() ? NULL : fully_qualified(_files->at(_iterator++)); |
| 213 | } |
| 214 | |
| 215 | static void write_emergency_file(fio_fd emergency_fd, const RepositoryIterator& iterator) { |
| 216 | assert(emergency_fd != invalid_fd, "invariant" ); |
| 217 | const size_t size_of_file_copy_block = 1 * M; // 1 mb |
| 218 | jbyte* const file_copy_block = NEW_RESOURCE_ARRAY_RETURN_NULL(jbyte, size_of_file_copy_block); |
| 219 | if (file_copy_block == NULL) { |
| 220 | return; |
| 221 | } |
| 222 | while (iterator.has_next()) { |
| 223 | fio_fd current_fd = invalid_fd; |
| 224 | const char* const fqn = iterator.next(); |
| 225 | if (fqn != NULL) { |
| 226 | current_fd = open_exclusivly(fqn); |
| 227 | if (current_fd != invalid_fd) { |
| 228 | const int64_t current_filesize = file_size(current_fd); |
| 229 | assert(current_filesize > 0, "invariant" ); |
| 230 | int64_t bytes_read = 0; |
| 231 | int64_t bytes_written = 0; |
| 232 | while (bytes_read < current_filesize) { |
| 233 | const ssize_t read_result = os::read_at(current_fd, file_copy_block, size_of_file_copy_block, bytes_read); |
| 234 | if (-1 == read_result) { |
| 235 | log_info(jfr)( // For user, should not be "jfr, system" |
| 236 | "Unable to recover JFR data" ); |
| 237 | break; |
| 238 | } |
| 239 | bytes_read += (int64_t)read_result; |
| 240 | assert(bytes_read - bytes_written <= (int64_t)size_of_file_copy_block, "invariant" ); |
| 241 | bytes_written += (int64_t)os::write(emergency_fd, file_copy_block, bytes_read - bytes_written); |
| 242 | assert(bytes_read == bytes_written, "invariant" ); |
| 243 | } |
| 244 | os::close(current_fd); |
| 245 | } |
| 246 | } |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | static const char* create_emergency_dump_path() { |
| 251 | assert(JfrStream_lock->owned_by_self(), "invariant" ); |
| 252 | char* buffer = NEW_RESOURCE_ARRAY_RETURN_NULL(char, JVM_MAXPATHLEN); |
| 253 | if (NULL == buffer) { |
| 254 | return NULL; |
| 255 | } |
| 256 | const char* const cwd = os::get_current_directory(buffer, JVM_MAXPATHLEN); |
| 257 | if (NULL == cwd) { |
| 258 | return NULL; |
| 259 | } |
| 260 | size_t pos = strlen(cwd); |
| 261 | const int fsep_len = jio_snprintf(&buffer[pos], JVM_MAXPATHLEN - pos, "%s" , os::file_separator()); |
| 262 | const char* filename_fmt = NULL; |
| 263 | // fetch specific error cause |
| 264 | switch (JfrJavaSupport::cause()) { |
| 265 | case JfrJavaSupport::OUT_OF_MEMORY: |
| 266 | filename_fmt = vm_oom_filename_fmt; |
| 267 | break; |
| 268 | case JfrJavaSupport::STACK_OVERFLOW: |
| 269 | filename_fmt = vm_soe_filename_fmt; |
| 270 | break; |
| 271 | default: |
| 272 | filename_fmt = vm_error_filename_fmt; |
| 273 | } |
| 274 | char* emergency_dump_path = NULL; |
| 275 | pos += fsep_len; |
| 276 | if (Arguments::copy_expand_pid(filename_fmt, strlen(filename_fmt), &buffer[pos], JVM_MAXPATHLEN - pos)) { |
| 277 | const size_t emergency_filename_length = strlen(buffer); |
| 278 | emergency_dump_path = NEW_RESOURCE_ARRAY_RETURN_NULL(char, emergency_filename_length + 1); |
| 279 | if (NULL == emergency_dump_path) { |
| 280 | return NULL; |
| 281 | } |
| 282 | strncpy(emergency_dump_path, buffer, emergency_filename_length + 1); |
| 283 | } |
| 284 | if (emergency_dump_path != NULL) { |
| 285 | log_info(jfr)( // For user, should not be "jfr, system" |
| 286 | "Attempting to recover JFR data, emergency jfr file: %s" , emergency_dump_path); |
| 287 | } |
| 288 | return emergency_dump_path; |
| 289 | } |
| 290 | |
| 291 | // Caller needs ResourceMark |
| 292 | static const char* create_emergency_chunk_path(const char* repository_path) { |
| 293 | assert(repository_path != NULL, "invariant" ); |
| 294 | assert(JfrStream_lock->owned_by_self(), "invariant" ); |
| 295 | const size_t repository_path_len = strlen(repository_path); |
| 296 | // date time |
| 297 | char date_time_buffer[32] = { 0 }; |
| 298 | date_time(date_time_buffer, sizeof(date_time_buffer)); |
| 299 | size_t date_time_len = strlen(date_time_buffer); |
| 300 | size_t chunkname_max_len = repository_path_len // repository_base_path |
| 301 | + 1 // "/" |
| 302 | + date_time_len // date_time |
| 303 | + strlen(chunk_file_jfr_ext) // .jfr |
| 304 | + 1; |
| 305 | char* chunk_path = NEW_RESOURCE_ARRAY_RETURN_NULL(char, chunkname_max_len); |
| 306 | if (chunk_path == NULL) { |
| 307 | return NULL; |
| 308 | } |
| 309 | // append the individual substrings |
| 310 | jio_snprintf(chunk_path, chunkname_max_len, "%s%s%s%s" , repository_path_len, os::file_separator(), date_time_buffer, chunk_file_jfr_ext); |
| 311 | return chunk_path; |
| 312 | } |
| 313 | |
| 314 | static fio_fd emergency_dump_file_descriptor() { |
| 315 | assert(JfrStream_lock->owned_by_self(), "invariant" ); |
| 316 | ResourceMark rm; |
| 317 | const char* const emergency_dump_path = create_emergency_dump_path(); |
| 318 | return emergency_dump_path != NULL ? open_exclusivly(emergency_dump_path) : invalid_fd; |
| 319 | } |
| 320 | |
| 321 | const char* JfrEmergencyDump::build_dump_path(const char* repository_path) { |
| 322 | return repository_path == NULL ? create_emergency_dump_path() : create_emergency_chunk_path(repository_path); |
| 323 | } |
| 324 | |
| 325 | void JfrEmergencyDump::on_vm_error(const char* repository_path) { |
| 326 | assert(repository_path != NULL, "invariant" ); |
| 327 | ResourceMark rm; |
| 328 | MutexLocker stream_lock(JfrStream_lock, Mutex::_no_safepoint_check_flag); |
| 329 | const fio_fd emergency_fd = emergency_dump_file_descriptor(); |
| 330 | if (emergency_fd != invalid_fd) { |
| 331 | RepositoryIterator iterator(repository_path, strlen(repository_path)); |
| 332 | write_emergency_file(emergency_fd, iterator); |
| 333 | os::close(emergency_fd); |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | /* |
| 338 | * We are just about to exit the VM, so we will be very aggressive |
| 339 | * at this point in order to increase overall success of dumping jfr data: |
| 340 | * |
| 341 | * 1. if the thread state is not "_thread_in_vm", we will quick transition |
| 342 | * it to "_thread_in_vm". |
| 343 | * 2. the nesting state for both resource and handle areas are unknown, |
| 344 | * so we allocate new fresh arenas, discarding the old ones. |
| 345 | * 3. if the thread is the owner of some critical lock(s), unlock them. |
| 346 | * |
| 347 | * If we end up deadlocking in the attempt of dumping out jfr data, |
| 348 | * we rely on the WatcherThread task "is_error_reported()", |
| 349 | * to exit the VM after a hard-coded timeout. |
| 350 | * This "safety net" somewhat explains the aggressiveness in this attempt. |
| 351 | * |
| 352 | */ |
| 353 | static void prepare_for_emergency_dump(Thread* thread) { |
| 354 | if (thread->is_Java_thread()) { |
| 355 | ((JavaThread*)thread)->set_thread_state(_thread_in_vm); |
| 356 | } |
| 357 | |
| 358 | #ifdef ASSERT |
| 359 | Monitor* owned_lock = thread->owned_locks(); |
| 360 | while (owned_lock != NULL) { |
| 361 | Monitor* next = owned_lock->next(); |
| 362 | owned_lock->unlock(); |
| 363 | owned_lock = next; |
| 364 | } |
| 365 | #endif // ASSERT |
| 366 | |
| 367 | if (Threads_lock->owned_by_self()) { |
| 368 | Threads_lock->unlock(); |
| 369 | } |
| 370 | |
| 371 | if (Module_lock->owned_by_self()) { |
| 372 | Module_lock->unlock(); |
| 373 | } |
| 374 | |
| 375 | if (ClassLoaderDataGraph_lock->owned_by_self()) { |
| 376 | ClassLoaderDataGraph_lock->unlock(); |
| 377 | } |
| 378 | |
| 379 | if (Heap_lock->owned_by_self()) { |
| 380 | Heap_lock->unlock(); |
| 381 | } |
| 382 | |
| 383 | if (VMOperationQueue_lock->owned_by_self()) { |
| 384 | VMOperationQueue_lock->unlock(); |
| 385 | } |
| 386 | |
| 387 | if (VMOperationRequest_lock->owned_by_self()) { |
| 388 | VMOperationRequest_lock->unlock(); |
| 389 | } |
| 390 | |
| 391 | |
| 392 | if (Service_lock->owned_by_self()) { |
| 393 | Service_lock->unlock(); |
| 394 | } |
| 395 | |
| 396 | if (CodeCache_lock->owned_by_self()) { |
| 397 | CodeCache_lock->unlock(); |
| 398 | } |
| 399 | |
| 400 | if (PeriodicTask_lock->owned_by_self()) { |
| 401 | PeriodicTask_lock->unlock(); |
| 402 | } |
| 403 | |
| 404 | if (JfrMsg_lock->owned_by_self()) { |
| 405 | JfrMsg_lock->unlock(); |
| 406 | } |
| 407 | |
| 408 | if (JfrBuffer_lock->owned_by_self()) { |
| 409 | JfrBuffer_lock->unlock(); |
| 410 | } |
| 411 | |
| 412 | if (JfrStream_lock->owned_by_self()) { |
| 413 | JfrStream_lock->unlock(); |
| 414 | } |
| 415 | |
| 416 | if (JfrStacktrace_lock->owned_by_self()) { |
| 417 | JfrStacktrace_lock->unlock(); |
| 418 | } |
| 419 | } |
| 420 | |
| 421 | static volatile int jfr_shutdown_lock = 0; |
| 422 | |
| 423 | static bool guard_reentrancy() { |
| 424 | return Atomic::cmpxchg(1, &jfr_shutdown_lock, 0) == 0; |
| 425 | } |
| 426 | |
| 427 | void JfrEmergencyDump::on_vm_shutdown(bool exception_handler) { |
| 428 | if (!guard_reentrancy()) { |
| 429 | return; |
| 430 | } |
| 431 | // function made non-reentrant |
| 432 | Thread* thread = Thread::current(); |
| 433 | if (exception_handler) { |
| 434 | // we are crashing |
| 435 | if (thread->is_Watcher_thread()) { |
| 436 | // The Watcher thread runs the periodic thread sampling task. |
| 437 | // If it has crashed, it is likely that another thread is |
| 438 | // left in a suspended state. This would mean the system |
| 439 | // will not be able to ever move to a safepoint. We try |
| 440 | // to avoid issuing safepoint operations when attempting |
| 441 | // an emergency dump, but a safepoint might be already pending. |
| 442 | return; |
| 443 | } |
| 444 | prepare_for_emergency_dump(thread); |
| 445 | } |
| 446 | EventDumpReason event; |
| 447 | if (event.should_commit()) { |
| 448 | event.set_reason(exception_handler ? "Crash" : "Out of Memory" ); |
| 449 | event.set_recordingId(-1); |
| 450 | event.commit(); |
| 451 | } |
| 452 | if (!exception_handler) { |
| 453 | // OOM |
| 454 | LeakProfiler::emit_events(max_jlong, false); |
| 455 | } |
| 456 | const int messages = MSGBIT(MSG_VM_ERROR); |
| 457 | ResourceMark rm(thread); |
| 458 | HandleMark hm(thread); |
| 459 | JfrRecorderService service; |
| 460 | service.rotate(messages); |
| 461 | } |
| 462 | |