1/*
2 * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "jfr/jfrEvents.hpp"
27#include "jfr/jni/jfrJavaSupport.hpp"
28#include "jfr/leakprofiler/leakProfiler.hpp"
29#include "jfr/recorder/repository/jfrEmergencyDump.hpp"
30#include "jfr/recorder/service/jfrPostBox.hpp"
31#include "jfr/recorder/service/jfrRecorderService.hpp"
32#include "jfr/utilities/jfrTypes.hpp"
33#include "logging/log.hpp"
34#include "memory/resourceArea.hpp"
35#include "runtime/atomic.hpp"
36#include "runtime/handles.inline.hpp"
37#include "runtime/globals.hpp"
38#include "runtime/mutexLocker.hpp"
39#include "runtime/os.hpp"
40#include "runtime/thread.inline.hpp"
41#include "utilities/growableArray.hpp"
42
43static const char vm_error_filename_fmt[] = "hs_err_pid%p.jfr";
44static const char vm_oom_filename_fmt[] = "hs_oom_pid%p.jfr";
45static const char vm_soe_filename_fmt[] = "hs_soe_pid%p.jfr";
46static const char chunk_file_jfr_ext[] = ".jfr";
47static const size_t iso8601_len = 19; // "YYYY-MM-DDTHH:MM:SS"
48
49static fio_fd open_exclusivly(const char* path) {
50 return os::open(path, O_CREAT | O_RDWR, S_IREAD | S_IWRITE);
51}
52
53static int file_sort(const char** const file1, const char** file2) {
54 assert(NULL != *file1 && NULL != *file2, "invariant");
55 int cmp = strncmp(*file1, *file2, iso8601_len);
56 if (0 == cmp) {
57 const char* const dot1 = strchr(*file1, '.');
58 assert(NULL != dot1, "invariant");
59 const char* const dot2 = strchr(*file2, '.');
60 assert(NULL != dot2, "invariant");
61 ptrdiff_t file1_len = dot1 - *file1;
62 ptrdiff_t file2_len = dot2 - *file2;
63 if (file1_len < file2_len) {
64 return -1;
65 }
66 if (file1_len > file2_len) {
67 return 1;
68 }
69 assert(file1_len == file2_len, "invariant");
70 cmp = strncmp(*file1, *file2, file1_len);
71 }
72 assert(cmp != 0, "invariant");
73 return cmp;
74}
75
76static void iso8601_to_date_time(char* iso8601_str) {
77 assert(iso8601_str != NULL, "invariant");
78 assert(strlen(iso8601_str) == iso8601_len, "invariant");
79 // "YYYY-MM-DDTHH:MM:SS"
80 for (size_t i = 0; i < iso8601_len; ++i) {
81 switch (iso8601_str[i]) {
82 case 'T':
83 case '-':
84 case ':':
85 iso8601_str[i] = '_';
86 break;
87 }
88 }
89 // "YYYY_MM_DD_HH_MM_SS"
90}
91
92static void date_time(char* buffer, size_t buffer_len) {
93 assert(buffer != NULL, "invariant");
94 assert(buffer_len >= iso8601_len, "buffer too small");
95 os::iso8601_time(buffer, buffer_len);
96 assert(strlen(buffer) >= iso8601_len + 1, "invariant");
97 // "YYYY-MM-DDTHH:MM:SS"
98 buffer[iso8601_len] = '\0';
99 iso8601_to_date_time(buffer);
100}
101
102static int64_t file_size(fio_fd fd) {
103 assert(fd != invalid_fd, "invariant");
104 const int64_t current_offset = os::current_file_offset(fd);
105 const int64_t size = os::lseek(fd, 0, SEEK_END);
106 os::seek_to_file_offset(fd, current_offset);
107 return size;
108}
109
110class RepositoryIterator : public StackObj {
111 private:
112 const char* const _repo;
113 const size_t _repository_len;
114 GrowableArray<const char*>* _files;
115 const char* const fully_qualified(const char* entry) const;
116 mutable int _iterator;
117
118 public:
119 RepositoryIterator(const char* repository, size_t repository_len);
120 ~RepositoryIterator() {}
121 const char* const filter(const char* entry) const;
122 bool has_next() const;
123 const char* const next() const;
124};
125
126const char* const RepositoryIterator::fully_qualified(const char* entry) const {
127 assert(NULL != entry, "invariant");
128 char* file_path_entry = NULL;
129 // only use files that have content, not placeholders
130 const char* const file_separator = os::file_separator();
131 if (NULL != file_separator) {
132 const size_t entry_len = strlen(entry);
133 const size_t file_separator_length = strlen(file_separator);
134 const size_t file_path_entry_length = _repository_len + file_separator_length + entry_len;
135 file_path_entry = NEW_RESOURCE_ARRAY_RETURN_NULL(char, file_path_entry_length + 1);
136 if (NULL == file_path_entry) {
137 return NULL;
138 }
139 int position = 0;
140 position += jio_snprintf(&file_path_entry[position], _repository_len + 1, "%s", _repo);
141 position += jio_snprintf(&file_path_entry[position], file_separator_length + 1, "%s", os::file_separator());
142 position += jio_snprintf(&file_path_entry[position], entry_len + 1, "%s", entry);
143 file_path_entry[position] = '\0';
144 assert((size_t)position == file_path_entry_length, "invariant");
145 assert(strlen(file_path_entry) == (size_t)position, "invariant");
146 }
147 return file_path_entry;
148}
149
150const char* const RepositoryIterator::filter(const char* entry) const {
151 if (entry == NULL) {
152 return NULL;
153 }
154 const size_t entry_len = strlen(entry);
155 if (entry_len <= 2) {
156 // for "." and ".."
157 return NULL;
158 }
159 char* entry_name = NEW_RESOURCE_ARRAY_RETURN_NULL(char, entry_len + 1);
160 if (entry_name == NULL) {
161 return NULL;
162 }
163 strncpy(entry_name, entry, entry_len + 1);
164 const char* const fully_qualified_path_entry = fully_qualified(entry_name);
165 if (NULL == fully_qualified_path_entry) {
166 return NULL;
167 }
168 const fio_fd entry_fd = open_exclusivly(fully_qualified_path_entry);
169 if (invalid_fd == entry_fd) {
170 return NULL;
171 }
172 const int64_t entry_size = file_size(entry_fd);
173 os::close(entry_fd);
174 if (0 == entry_size) {
175 return NULL;
176 }
177 return entry_name;
178}
179
180RepositoryIterator::RepositoryIterator(const char* repository, size_t repository_len) :
181 _repo(repository),
182 _repository_len(repository_len),
183 _files(NULL),
184 _iterator(0) {
185 if (NULL != _repo) {
186 assert(strlen(_repo) == _repository_len, "invariant");
187 _files = new GrowableArray<const char*>(10);
188 DIR* dirp = os::opendir(_repo);
189 if (dirp == NULL) {
190 log_error(jfr, system)("Unable to open repository %s", _repo);
191 return;
192 }
193 struct dirent* dentry;
194 while ((dentry = os::readdir(dirp)) != NULL) {
195 const char* const entry_path = filter(dentry->d_name);
196 if (NULL != entry_path) {
197 _files->append(entry_path);
198 }
199 }
200 os::closedir(dirp);
201 if (_files->length() > 1) {
202 _files->sort(file_sort);
203 }
204 }
205}
206
207bool RepositoryIterator::has_next() const {
208 return (_files != NULL && _iterator < _files->length());
209}
210
211const char* const RepositoryIterator::next() const {
212 return _iterator >= _files->length() ? NULL : fully_qualified(_files->at(_iterator++));
213}
214
215static void write_emergency_file(fio_fd emergency_fd, const RepositoryIterator& iterator) {
216 assert(emergency_fd != invalid_fd, "invariant");
217 const size_t size_of_file_copy_block = 1 * M; // 1 mb
218 jbyte* const file_copy_block = NEW_RESOURCE_ARRAY_RETURN_NULL(jbyte, size_of_file_copy_block);
219 if (file_copy_block == NULL) {
220 return;
221 }
222 while (iterator.has_next()) {
223 fio_fd current_fd = invalid_fd;
224 const char* const fqn = iterator.next();
225 if (fqn != NULL) {
226 current_fd = open_exclusivly(fqn);
227 if (current_fd != invalid_fd) {
228 const int64_t current_filesize = file_size(current_fd);
229 assert(current_filesize > 0, "invariant");
230 int64_t bytes_read = 0;
231 int64_t bytes_written = 0;
232 while (bytes_read < current_filesize) {
233 const ssize_t read_result = os::read_at(current_fd, file_copy_block, size_of_file_copy_block, bytes_read);
234 if (-1 == read_result) {
235 log_info(jfr)( // For user, should not be "jfr, system"
236 "Unable to recover JFR data");
237 break;
238 }
239 bytes_read += (int64_t)read_result;
240 assert(bytes_read - bytes_written <= (int64_t)size_of_file_copy_block, "invariant");
241 bytes_written += (int64_t)os::write(emergency_fd, file_copy_block, bytes_read - bytes_written);
242 assert(bytes_read == bytes_written, "invariant");
243 }
244 os::close(current_fd);
245 }
246 }
247 }
248}
249
250static const char* create_emergency_dump_path() {
251 assert(JfrStream_lock->owned_by_self(), "invariant");
252 char* buffer = NEW_RESOURCE_ARRAY_RETURN_NULL(char, JVM_MAXPATHLEN);
253 if (NULL == buffer) {
254 return NULL;
255 }
256 const char* const cwd = os::get_current_directory(buffer, JVM_MAXPATHLEN);
257 if (NULL == cwd) {
258 return NULL;
259 }
260 size_t pos = strlen(cwd);
261 const int fsep_len = jio_snprintf(&buffer[pos], JVM_MAXPATHLEN - pos, "%s", os::file_separator());
262 const char* filename_fmt = NULL;
263 // fetch specific error cause
264 switch (JfrJavaSupport::cause()) {
265 case JfrJavaSupport::OUT_OF_MEMORY:
266 filename_fmt = vm_oom_filename_fmt;
267 break;
268 case JfrJavaSupport::STACK_OVERFLOW:
269 filename_fmt = vm_soe_filename_fmt;
270 break;
271 default:
272 filename_fmt = vm_error_filename_fmt;
273 }
274 char* emergency_dump_path = NULL;
275 pos += fsep_len;
276 if (Arguments::copy_expand_pid(filename_fmt, strlen(filename_fmt), &buffer[pos], JVM_MAXPATHLEN - pos)) {
277 const size_t emergency_filename_length = strlen(buffer);
278 emergency_dump_path = NEW_RESOURCE_ARRAY_RETURN_NULL(char, emergency_filename_length + 1);
279 if (NULL == emergency_dump_path) {
280 return NULL;
281 }
282 strncpy(emergency_dump_path, buffer, emergency_filename_length + 1);
283 }
284 if (emergency_dump_path != NULL) {
285 log_info(jfr)( // For user, should not be "jfr, system"
286 "Attempting to recover JFR data, emergency jfr file: %s", emergency_dump_path);
287 }
288 return emergency_dump_path;
289}
290
291// Caller needs ResourceMark
292static const char* create_emergency_chunk_path(const char* repository_path) {
293 assert(repository_path != NULL, "invariant");
294 assert(JfrStream_lock->owned_by_self(), "invariant");
295 const size_t repository_path_len = strlen(repository_path);
296 // date time
297 char date_time_buffer[32] = { 0 };
298 date_time(date_time_buffer, sizeof(date_time_buffer));
299 size_t date_time_len = strlen(date_time_buffer);
300 size_t chunkname_max_len = repository_path_len // repository_base_path
301 + 1 // "/"
302 + date_time_len // date_time
303 + strlen(chunk_file_jfr_ext) // .jfr
304 + 1;
305 char* chunk_path = NEW_RESOURCE_ARRAY_RETURN_NULL(char, chunkname_max_len);
306 if (chunk_path == NULL) {
307 return NULL;
308 }
309 // append the individual substrings
310 jio_snprintf(chunk_path, chunkname_max_len, "%s%s%s%s", repository_path_len, os::file_separator(), date_time_buffer, chunk_file_jfr_ext);
311 return chunk_path;
312}
313
314static fio_fd emergency_dump_file_descriptor() {
315 assert(JfrStream_lock->owned_by_self(), "invariant");
316 ResourceMark rm;
317 const char* const emergency_dump_path = create_emergency_dump_path();
318 return emergency_dump_path != NULL ? open_exclusivly(emergency_dump_path) : invalid_fd;
319}
320
321const char* JfrEmergencyDump::build_dump_path(const char* repository_path) {
322 return repository_path == NULL ? create_emergency_dump_path() : create_emergency_chunk_path(repository_path);
323}
324
325void JfrEmergencyDump::on_vm_error(const char* repository_path) {
326 assert(repository_path != NULL, "invariant");
327 ResourceMark rm;
328 MutexLocker stream_lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
329 const fio_fd emergency_fd = emergency_dump_file_descriptor();
330 if (emergency_fd != invalid_fd) {
331 RepositoryIterator iterator(repository_path, strlen(repository_path));
332 write_emergency_file(emergency_fd, iterator);
333 os::close(emergency_fd);
334 }
335}
336
337/*
338* We are just about to exit the VM, so we will be very aggressive
339* at this point in order to increase overall success of dumping jfr data:
340*
341* 1. if the thread state is not "_thread_in_vm", we will quick transition
342* it to "_thread_in_vm".
343* 2. the nesting state for both resource and handle areas are unknown,
344* so we allocate new fresh arenas, discarding the old ones.
345* 3. if the thread is the owner of some critical lock(s), unlock them.
346*
347* If we end up deadlocking in the attempt of dumping out jfr data,
348* we rely on the WatcherThread task "is_error_reported()",
349* to exit the VM after a hard-coded timeout.
350* This "safety net" somewhat explains the aggressiveness in this attempt.
351*
352*/
353static void prepare_for_emergency_dump(Thread* thread) {
354 if (thread->is_Java_thread()) {
355 ((JavaThread*)thread)->set_thread_state(_thread_in_vm);
356 }
357
358#ifdef ASSERT
359 Monitor* owned_lock = thread->owned_locks();
360 while (owned_lock != NULL) {
361 Monitor* next = owned_lock->next();
362 owned_lock->unlock();
363 owned_lock = next;
364 }
365#endif // ASSERT
366
367 if (Threads_lock->owned_by_self()) {
368 Threads_lock->unlock();
369 }
370
371 if (Module_lock->owned_by_self()) {
372 Module_lock->unlock();
373 }
374
375 if (ClassLoaderDataGraph_lock->owned_by_self()) {
376 ClassLoaderDataGraph_lock->unlock();
377 }
378
379 if (Heap_lock->owned_by_self()) {
380 Heap_lock->unlock();
381 }
382
383 if (VMOperationQueue_lock->owned_by_self()) {
384 VMOperationQueue_lock->unlock();
385 }
386
387 if (VMOperationRequest_lock->owned_by_self()) {
388 VMOperationRequest_lock->unlock();
389 }
390
391
392 if (Service_lock->owned_by_self()) {
393 Service_lock->unlock();
394 }
395
396 if (CodeCache_lock->owned_by_self()) {
397 CodeCache_lock->unlock();
398 }
399
400 if (PeriodicTask_lock->owned_by_self()) {
401 PeriodicTask_lock->unlock();
402 }
403
404 if (JfrMsg_lock->owned_by_self()) {
405 JfrMsg_lock->unlock();
406 }
407
408 if (JfrBuffer_lock->owned_by_self()) {
409 JfrBuffer_lock->unlock();
410 }
411
412 if (JfrStream_lock->owned_by_self()) {
413 JfrStream_lock->unlock();
414 }
415
416 if (JfrStacktrace_lock->owned_by_self()) {
417 JfrStacktrace_lock->unlock();
418 }
419}
420
421static volatile int jfr_shutdown_lock = 0;
422
423static bool guard_reentrancy() {
424 return Atomic::cmpxchg(1, &jfr_shutdown_lock, 0) == 0;
425}
426
427void JfrEmergencyDump::on_vm_shutdown(bool exception_handler) {
428 if (!guard_reentrancy()) {
429 return;
430 }
431 // function made non-reentrant
432 Thread* thread = Thread::current();
433 if (exception_handler) {
434 // we are crashing
435 if (thread->is_Watcher_thread()) {
436 // The Watcher thread runs the periodic thread sampling task.
437 // If it has crashed, it is likely that another thread is
438 // left in a suspended state. This would mean the system
439 // will not be able to ever move to a safepoint. We try
440 // to avoid issuing safepoint operations when attempting
441 // an emergency dump, but a safepoint might be already pending.
442 return;
443 }
444 prepare_for_emergency_dump(thread);
445 }
446 EventDumpReason event;
447 if (event.should_commit()) {
448 event.set_reason(exception_handler ? "Crash" : "Out of Memory");
449 event.set_recordingId(-1);
450 event.commit();
451 }
452 if (!exception_handler) {
453 // OOM
454 LeakProfiler::emit_events(max_jlong, false);
455 }
456 const int messages = MSGBIT(MSG_VM_ERROR);
457 ResourceMark rm(thread);
458 HandleMark hm(thread);
459 JfrRecorderService service;
460 service.rotate(messages);
461}
462