1/*
2 * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP
26#define SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP
27
28#include "gc/parallel/mutableSpace.hpp"
29#include "gc/parallel/objectStartArray.hpp"
30#include "gc/parallel/parMarkBitMap.hpp"
31#include "gc/parallel/parallelScavengeHeap.hpp"
32#include "gc/shared/collectedHeap.hpp"
33#include "gc/shared/collectorCounters.hpp"
34#include "oops/oop.hpp"
35
36class ParallelScavengeHeap;
37class PSAdaptiveSizePolicy;
38class PSYoungGen;
39class PSOldGen;
40class ParCompactionManager;
41class ParallelTaskTerminator;
42class PSParallelCompact;
43class GCTaskManager;
44class GCTaskQueue;
45class PreGCValues;
46class MoveAndUpdateClosure;
47class RefProcTaskExecutor;
48class ParallelOldTracer;
49class STWGCTimer;
50
51// The SplitInfo class holds the information needed to 'split' a source region
52// so that the live data can be copied to two destination *spaces*. Normally,
53// all the live data in a region is copied to a single destination space (e.g.,
54// everything live in a region in eden is copied entirely into the old gen).
55// However, when the heap is nearly full, all the live data in eden may not fit
56// into the old gen. Copying only some of the regions from eden to old gen
57// requires finding a region that does not contain a partial object (i.e., no
58// live object crosses the region boundary) somewhere near the last object that
59// does fit into the old gen. Since it's not always possible to find such a
60// region, splitting is necessary for predictable behavior.
61//
62// A region is always split at the end of the partial object. This avoids
63// additional tests when calculating the new location of a pointer, which is a
64// very hot code path. The partial object and everything to its left will be
65// copied to another space (call it dest_space_1). The live data to the right
66// of the partial object will be copied either within the space itself, or to a
67// different destination space (distinct from dest_space_1).
68//
69// Split points are identified during the summary phase, when region
70// destinations are computed: data about the split, including the
71// partial_object_size, is recorded in a SplitInfo record and the
72// partial_object_size field in the summary data is set to zero. The zeroing is
73// possible (and necessary) since the partial object will move to a different
74// destination space than anything to its right, thus the partial object should
75// not affect the locations of any objects to its right.
76//
77// The recorded data is used during the compaction phase, but only rarely: when
78// the partial object on the split region will be copied across a destination
79// region boundary. This test is made once each time a region is filled, and is
80// a simple address comparison, so the overhead is negligible (see
81// PSParallelCompact::first_src_addr()).
82//
83// Notes:
84//
85// Only regions with partial objects are split; a region without a partial
86// object does not need any extra bookkeeping.
87//
88// At most one region is split per space, so the amount of data required is
89// constant.
90//
91// A region is split only when the destination space would overflow. Once that
92// happens, the destination space is abandoned and no other data (even from
93// other source spaces) is targeted to that destination space. Abandoning the
94// destination space may leave a somewhat large unused area at the end, if a
95// large object caused the overflow.
96//
97// Future work:
98//
99// More bookkeeping would be required to continue to use the destination space.
100// The most general solution would allow data from regions in two different
101// source spaces to be "joined" in a single destination region. At the very
102// least, additional code would be required in next_src_region() to detect the
103// join and skip to an out-of-order source region. If the join region was also
104// the last destination region to which a split region was copied (the most
105// likely case), then additional work would be needed to get fill_region() to
106// stop iteration and switch to a new source region at the right point. Basic
107// idea would be to use a fake value for the top of the source space. It is
108// doable, if a bit tricky.
109//
110// A simpler (but less general) solution would fill the remainder of the
111// destination region with a dummy object and continue filling the next
112// destination region.
113
114class SplitInfo
115{
116public:
117 // Return true if this split info is valid (i.e., if a split has been
118 // recorded). The very first region cannot have a partial object and thus is
119 // never split, so 0 is the 'invalid' value.
120 bool is_valid() const { return _src_region_idx > 0; }
121
122 // Return true if this split holds data for the specified source region.
123 inline bool is_split(size_t source_region) const;
124
125 // The index of the split region, the size of the partial object on that
126 // region and the destination of the partial object.
127 size_t src_region_idx() const { return _src_region_idx; }
128 size_t partial_obj_size() const { return _partial_obj_size; }
129 HeapWord* destination() const { return _destination; }
130
131 // The destination count of the partial object referenced by this split
132 // (either 1 or 2). This must be added to the destination count of the
133 // remainder of the source region.
134 unsigned int destination_count() const { return _destination_count; }
135
136 // If a word within the partial object will be written to the first word of a
137 // destination region, this is the address of the destination region;
138 // otherwise this is NULL.
139 HeapWord* dest_region_addr() const { return _dest_region_addr; }
140
141 // If a word within the partial object will be written to the first word of a
142 // destination region, this is the address of that word within the partial
143 // object; otherwise this is NULL.
144 HeapWord* first_src_addr() const { return _first_src_addr; }
145
146 // Record the data necessary to split the region src_region_idx.
147 void record(size_t src_region_idx, size_t partial_obj_size,
148 HeapWord* destination);
149
150 void clear();
151
152 DEBUG_ONLY(void verify_clear();)
153
154private:
155 size_t _src_region_idx;
156 size_t _partial_obj_size;
157 HeapWord* _destination;
158 unsigned int _destination_count;
159 HeapWord* _dest_region_addr;
160 HeapWord* _first_src_addr;
161};
162
163inline bool SplitInfo::is_split(size_t region_idx) const
164{
165 return _src_region_idx == region_idx && is_valid();
166}
167
168class SpaceInfo
169{
170 public:
171 MutableSpace* space() const { return _space; }
172
173 // Where the free space will start after the collection. Valid only after the
174 // summary phase completes.
175 HeapWord* new_top() const { return _new_top; }
176
177 // Allows new_top to be set.
178 HeapWord** new_top_addr() { return &_new_top; }
179
180 // Where the smallest allowable dense prefix ends (used only for perm gen).
181 HeapWord* min_dense_prefix() const { return _min_dense_prefix; }
182
183 // Where the dense prefix ends, or the compacted region begins.
184 HeapWord* dense_prefix() const { return _dense_prefix; }
185
186 // The start array for the (generation containing the) space, or NULL if there
187 // is no start array.
188 ObjectStartArray* start_array() const { return _start_array; }
189
190 SplitInfo& split_info() { return _split_info; }
191
192 void set_space(MutableSpace* s) { _space = s; }
193 void set_new_top(HeapWord* addr) { _new_top = addr; }
194 void set_min_dense_prefix(HeapWord* addr) { _min_dense_prefix = addr; }
195 void set_dense_prefix(HeapWord* addr) { _dense_prefix = addr; }
196 void set_start_array(ObjectStartArray* s) { _start_array = s; }
197
198 void publish_new_top() const { _space->set_top(_new_top); }
199
200 private:
201 MutableSpace* _space;
202 HeapWord* _new_top;
203 HeapWord* _min_dense_prefix;
204 HeapWord* _dense_prefix;
205 ObjectStartArray* _start_array;
206 SplitInfo _split_info;
207};
208
209class ParallelCompactData
210{
211public:
212 // Sizes are in HeapWords, unless indicated otherwise.
213 static const size_t Log2RegionSize;
214 static const size_t RegionSize;
215 static const size_t RegionSizeBytes;
216
217 // Mask for the bits in a size_t to get an offset within a region.
218 static const size_t RegionSizeOffsetMask;
219 // Mask for the bits in a pointer to get an offset within a region.
220 static const size_t RegionAddrOffsetMask;
221 // Mask for the bits in a pointer to get the address of the start of a region.
222 static const size_t RegionAddrMask;
223
224 static const size_t Log2BlockSize;
225 static const size_t BlockSize;
226 static const size_t BlockSizeBytes;
227
228 static const size_t BlockSizeOffsetMask;
229 static const size_t BlockAddrOffsetMask;
230 static const size_t BlockAddrMask;
231
232 static const size_t BlocksPerRegion;
233 static const size_t Log2BlocksPerRegion;
234
235 class RegionData
236 {
237 public:
238 // Destination address of the region.
239 HeapWord* destination() const { return _destination; }
240
241 // The first region containing data destined for this region.
242 size_t source_region() const { return _source_region; }
243
244 // The object (if any) starting in this region and ending in a different
245 // region that could not be updated during the main (parallel) compaction
246 // phase. This is different from _partial_obj_addr, which is an object that
247 // extends onto a source region. However, the two uses do not overlap in
248 // time, so the same field is used to save space.
249 HeapWord* deferred_obj_addr() const { return _partial_obj_addr; }
250
251 // The starting address of the partial object extending onto the region.
252 HeapWord* partial_obj_addr() const { return _partial_obj_addr; }
253
254 // Size of the partial object extending onto the region (words).
255 size_t partial_obj_size() const { return _partial_obj_size; }
256
257 // Size of live data that lies within this region due to objects that start
258 // in this region (words). This does not include the partial object
259 // extending onto the region (if any), or the part of an object that extends
260 // onto the next region (if any).
261 size_t live_obj_size() const { return _dc_and_los & los_mask; }
262
263 // Total live data that lies within the region (words).
264 size_t data_size() const { return partial_obj_size() + live_obj_size(); }
265
266 // The destination_count is the number of other regions to which data from
267 // this region will be copied. At the end of the summary phase, the valid
268 // values of destination_count are
269 //
270 // 0 - data from the region will be compacted completely into itself, or the
271 // region is empty. The region can be claimed and then filled.
272 // 1 - data from the region will be compacted into 1 other region; some
273 // data from the region may also be compacted into the region itself.
274 // 2 - data from the region will be copied to 2 other regions.
275 //
276 // During compaction as regions are emptied, the destination_count is
277 // decremented (atomically) and when it reaches 0, it can be claimed and
278 // then filled.
279 //
280 // A region is claimed for processing by atomically changing the
281 // destination_count to the claimed value (dc_claimed). After a region has
282 // been filled, the destination_count should be set to the completed value
283 // (dc_completed).
284 inline uint destination_count() const;
285 inline uint destination_count_raw() const;
286
287 // Whether the block table for this region has been filled.
288 inline bool blocks_filled() const;
289
290 // Number of times the block table was filled.
291 DEBUG_ONLY(inline size_t blocks_filled_count() const;)
292
293 // The location of the java heap data that corresponds to this region.
294 inline HeapWord* data_location() const;
295
296 // The highest address referenced by objects in this region.
297 inline HeapWord* highest_ref() const;
298
299 // Whether this region is available to be claimed, has been claimed, or has
300 // been completed.
301 //
302 // Minor subtlety: claimed() returns true if the region is marked
303 // completed(), which is desirable since a region must be claimed before it
304 // can be completed.
305 bool available() const { return _dc_and_los < dc_one; }
306 bool claimed() const { return _dc_and_los >= dc_claimed; }
307 bool completed() const { return _dc_and_los >= dc_completed; }
308
309 // These are not atomic.
310 void set_destination(HeapWord* addr) { _destination = addr; }
311 void set_source_region(size_t region) { _source_region = region; }
312 void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
313 void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; }
314 void set_partial_obj_size(size_t words) {
315 _partial_obj_size = (region_sz_t) words;
316 }
317 inline void set_blocks_filled();
318
319 inline void set_destination_count(uint count);
320 inline void set_live_obj_size(size_t words);
321 inline void set_data_location(HeapWord* addr);
322 inline void set_completed();
323 inline bool claim_unsafe();
324
325 // These are atomic.
326 inline void add_live_obj(size_t words);
327 inline void set_highest_ref(HeapWord* addr);
328 inline void decrement_destination_count();
329 inline bool claim();
330
331 private:
332 // The type used to represent object sizes within a region.
333 typedef uint region_sz_t;
334
335 // Constants for manipulating the _dc_and_los field, which holds both the
336 // destination count and live obj size. The live obj size lives at the
337 // least significant end so no masking is necessary when adding.
338 static const region_sz_t dc_shift; // Shift amount.
339 static const region_sz_t dc_mask; // Mask for destination count.
340 static const region_sz_t dc_one; // 1, shifted appropriately.
341 static const region_sz_t dc_claimed; // Region has been claimed.
342 static const region_sz_t dc_completed; // Region has been completed.
343 static const region_sz_t los_mask; // Mask for live obj size.
344
345 HeapWord* _destination;
346 size_t _source_region;
347 HeapWord* _partial_obj_addr;
348 region_sz_t _partial_obj_size;
349 region_sz_t volatile _dc_and_los;
350 bool volatile _blocks_filled;
351
352#ifdef ASSERT
353 size_t _blocks_filled_count; // Number of block table fills.
354
355 // These enable optimizations that are only partially implemented. Use
356 // debug builds to prevent the code fragments from breaking.
357 HeapWord* _data_location;
358 HeapWord* _highest_ref;
359#endif // #ifdef ASSERT
360
361#ifdef ASSERT
362 public:
363 uint _pushed; // 0 until region is pushed onto a stack
364 private:
365#endif
366 };
367
368 // "Blocks" allow shorter sections of the bitmap to be searched. Each Block
369 // holds an offset, which is the amount of live data in the Region to the left
370 // of the first live object that starts in the Block.
371 class BlockData
372 {
373 public:
374 typedef unsigned short int blk_ofs_t;
375
376 blk_ofs_t offset() const { return _offset; }
377 void set_offset(size_t val) { _offset = (blk_ofs_t)val; }
378
379 private:
380 blk_ofs_t _offset;
381 };
382
383public:
384 ParallelCompactData();
385 bool initialize(MemRegion covered_region);
386
387 size_t region_count() const { return _region_count; }
388 size_t reserved_byte_size() const { return _reserved_byte_size; }
389
390 // Convert region indices to/from RegionData pointers.
391 inline RegionData* region(size_t region_idx) const;
392 inline size_t region(const RegionData* const region_ptr) const;
393
394 size_t block_count() const { return _block_count; }
395 inline BlockData* block(size_t block_idx) const;
396 inline size_t block(const BlockData* block_ptr) const;
397
398 void add_obj(HeapWord* addr, size_t len);
399 void add_obj(oop p, size_t len) { add_obj((HeapWord*)p, len); }
400
401 // Fill in the regions covering [beg, end) so that no data moves; i.e., the
402 // destination of region n is simply the start of region n. The argument beg
403 // must be region-aligned; end need not be.
404 void summarize_dense_prefix(HeapWord* beg, HeapWord* end);
405
406 HeapWord* summarize_split_space(size_t src_region, SplitInfo& split_info,
407 HeapWord* destination, HeapWord* target_end,
408 HeapWord** target_next);
409 bool summarize(SplitInfo& split_info,
410 HeapWord* source_beg, HeapWord* source_end,
411 HeapWord** source_next,
412 HeapWord* target_beg, HeapWord* target_end,
413 HeapWord** target_next);
414
415 void clear();
416 void clear_range(size_t beg_region, size_t end_region);
417 void clear_range(HeapWord* beg, HeapWord* end) {
418 clear_range(addr_to_region_idx(beg), addr_to_region_idx(end));
419 }
420
421 // Return the number of words between addr and the start of the region
422 // containing addr.
423 inline size_t region_offset(const HeapWord* addr) const;
424
425 // Convert addresses to/from a region index or region pointer.
426 inline size_t addr_to_region_idx(const HeapWord* addr) const;
427 inline RegionData* addr_to_region_ptr(const HeapWord* addr) const;
428 inline HeapWord* region_to_addr(size_t region) const;
429 inline HeapWord* region_to_addr(size_t region, size_t offset) const;
430 inline HeapWord* region_to_addr(const RegionData* region) const;
431
432 inline HeapWord* region_align_down(HeapWord* addr) const;
433 inline HeapWord* region_align_up(HeapWord* addr) const;
434 inline bool is_region_aligned(HeapWord* addr) const;
435
436 // Analogous to region_offset() for blocks.
437 size_t block_offset(const HeapWord* addr) const;
438 size_t addr_to_block_idx(const HeapWord* addr) const;
439 size_t addr_to_block_idx(const oop obj) const {
440 return addr_to_block_idx((HeapWord*) obj);
441 }
442 inline BlockData* addr_to_block_ptr(const HeapWord* addr) const;
443 inline HeapWord* block_to_addr(size_t block) const;
444 inline size_t region_to_block_idx(size_t region) const;
445
446 inline HeapWord* block_align_down(HeapWord* addr) const;
447 inline HeapWord* block_align_up(HeapWord* addr) const;
448 inline bool is_block_aligned(HeapWord* addr) const;
449
450 // Return the address one past the end of the partial object.
451 HeapWord* partial_obj_end(size_t region_idx) const;
452
453 // Return the location of the object after compaction.
454 HeapWord* calc_new_pointer(HeapWord* addr, ParCompactionManager* cm);
455
456 HeapWord* calc_new_pointer(oop p, ParCompactionManager* cm) {
457 return calc_new_pointer((HeapWord*) p, cm);
458 }
459
460#ifdef ASSERT
461 void verify_clear(const PSVirtualSpace* vspace);
462 void verify_clear();
463#endif // #ifdef ASSERT
464
465private:
466 bool initialize_block_data();
467 bool initialize_region_data(size_t region_size);
468 PSVirtualSpace* create_vspace(size_t count, size_t element_size);
469
470private:
471 HeapWord* _region_start;
472#ifdef ASSERT
473 HeapWord* _region_end;
474#endif // #ifdef ASSERT
475
476 PSVirtualSpace* _region_vspace;
477 size_t _reserved_byte_size;
478 RegionData* _region_data;
479 size_t _region_count;
480
481 PSVirtualSpace* _block_vspace;
482 BlockData* _block_data;
483 size_t _block_count;
484};
485
486inline uint
487ParallelCompactData::RegionData::destination_count_raw() const
488{
489 return _dc_and_los & dc_mask;
490}
491
492inline uint
493ParallelCompactData::RegionData::destination_count() const
494{
495 return destination_count_raw() >> dc_shift;
496}
497
498inline bool
499ParallelCompactData::RegionData::blocks_filled() const
500{
501 bool result = _blocks_filled;
502 OrderAccess::acquire();
503 return result;
504}
505
506#ifdef ASSERT
507inline size_t
508ParallelCompactData::RegionData::blocks_filled_count() const
509{
510 return _blocks_filled_count;
511}
512#endif // #ifdef ASSERT
513
514inline void
515ParallelCompactData::RegionData::set_blocks_filled()
516{
517 OrderAccess::release();
518 _blocks_filled = true;
519 // Debug builds count the number of times the table was filled.
520 DEBUG_ONLY(Atomic::inc(&_blocks_filled_count));
521}
522
523inline void
524ParallelCompactData::RegionData::set_destination_count(uint count)
525{
526 assert(count <= (dc_completed >> dc_shift), "count too large");
527 const region_sz_t live_sz = (region_sz_t) live_obj_size();
528 _dc_and_los = (count << dc_shift) | live_sz;
529}
530
531inline void ParallelCompactData::RegionData::set_live_obj_size(size_t words)
532{
533 assert(words <= los_mask, "would overflow");
534 _dc_and_los = destination_count_raw() | (region_sz_t)words;
535}
536
537inline void ParallelCompactData::RegionData::decrement_destination_count()
538{
539 assert(_dc_and_los < dc_claimed, "already claimed");
540 assert(_dc_and_los >= dc_one, "count would go negative");
541 Atomic::add(dc_mask, &_dc_and_los);
542}
543
544inline HeapWord* ParallelCompactData::RegionData::data_location() const
545{
546 DEBUG_ONLY(return _data_location;)
547 NOT_DEBUG(return NULL;)
548}
549
550inline HeapWord* ParallelCompactData::RegionData::highest_ref() const
551{
552 DEBUG_ONLY(return _highest_ref;)
553 NOT_DEBUG(return NULL;)
554}
555
556inline void ParallelCompactData::RegionData::set_data_location(HeapWord* addr)
557{
558 DEBUG_ONLY(_data_location = addr;)
559}
560
561inline void ParallelCompactData::RegionData::set_completed()
562{
563 assert(claimed(), "must be claimed first");
564 _dc_and_los = dc_completed | (region_sz_t) live_obj_size();
565}
566
567// MT-unsafe claiming of a region. Should only be used during single threaded
568// execution.
569inline bool ParallelCompactData::RegionData::claim_unsafe()
570{
571 if (available()) {
572 _dc_and_los |= dc_claimed;
573 return true;
574 }
575 return false;
576}
577
578inline void ParallelCompactData::RegionData::add_live_obj(size_t words)
579{
580 assert(words <= (size_t)los_mask - live_obj_size(), "overflow");
581 Atomic::add(static_cast<region_sz_t>(words), &_dc_and_los);
582}
583
584inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr)
585{
586#ifdef ASSERT
587 HeapWord* tmp = _highest_ref;
588 while (addr > tmp) {
589 tmp = Atomic::cmpxchg(addr, &_highest_ref, tmp);
590 }
591#endif // #ifdef ASSERT
592}
593
594inline bool ParallelCompactData::RegionData::claim()
595{
596 const region_sz_t los = static_cast<region_sz_t>(live_obj_size());
597 const region_sz_t old = Atomic::cmpxchg(dc_claimed | los, &_dc_and_los, los);
598 return old == los;
599}
600
601inline ParallelCompactData::RegionData*
602ParallelCompactData::region(size_t region_idx) const
603{
604 assert(region_idx <= region_count(), "bad arg");
605 return _region_data + region_idx;
606}
607
608inline size_t
609ParallelCompactData::region(const RegionData* const region_ptr) const
610{
611 assert(region_ptr >= _region_data, "bad arg");
612 assert(region_ptr <= _region_data + region_count(), "bad arg");
613 return pointer_delta(region_ptr, _region_data, sizeof(RegionData));
614}
615
616inline ParallelCompactData::BlockData*
617ParallelCompactData::block(size_t n) const {
618 assert(n < block_count(), "bad arg");
619 return _block_data + n;
620}
621
622inline size_t
623ParallelCompactData::region_offset(const HeapWord* addr) const
624{
625 assert(addr >= _region_start, "bad addr");
626 assert(addr <= _region_end, "bad addr");
627 return (size_t(addr) & RegionAddrOffsetMask) >> LogHeapWordSize;
628}
629
630inline size_t
631ParallelCompactData::addr_to_region_idx(const HeapWord* addr) const
632{
633 assert(addr >= _region_start, "bad addr " PTR_FORMAT " _region_start " PTR_FORMAT, p2i(addr), p2i(_region_start));
634 assert(addr <= _region_end, "bad addr " PTR_FORMAT " _region_end " PTR_FORMAT, p2i(addr), p2i(_region_end));
635 return pointer_delta(addr, _region_start) >> Log2RegionSize;
636}
637
638inline ParallelCompactData::RegionData*
639ParallelCompactData::addr_to_region_ptr(const HeapWord* addr) const
640{
641 return region(addr_to_region_idx(addr));
642}
643
644inline HeapWord*
645ParallelCompactData::region_to_addr(size_t region) const
646{
647 assert(region <= _region_count, "region out of range");
648 return _region_start + (region << Log2RegionSize);
649}
650
651inline HeapWord*
652ParallelCompactData::region_to_addr(const RegionData* region) const
653{
654 return region_to_addr(pointer_delta(region, _region_data,
655 sizeof(RegionData)));
656}
657
658inline HeapWord*
659ParallelCompactData::region_to_addr(size_t region, size_t offset) const
660{
661 assert(region <= _region_count, "region out of range");
662 assert(offset < RegionSize, "offset too big"); // This may be too strict.
663 return region_to_addr(region) + offset;
664}
665
666inline HeapWord*
667ParallelCompactData::region_align_down(HeapWord* addr) const
668{
669 assert(addr >= _region_start, "bad addr");
670 assert(addr < _region_end + RegionSize, "bad addr");
671 return (HeapWord*)(size_t(addr) & RegionAddrMask);
672}
673
674inline HeapWord*
675ParallelCompactData::region_align_up(HeapWord* addr) const
676{
677 assert(addr >= _region_start, "bad addr");
678 assert(addr <= _region_end, "bad addr");
679 return region_align_down(addr + RegionSizeOffsetMask);
680}
681
682inline bool
683ParallelCompactData::is_region_aligned(HeapWord* addr) const
684{
685 return region_offset(addr) == 0;
686}
687
688inline size_t
689ParallelCompactData::block_offset(const HeapWord* addr) const
690{
691 assert(addr >= _region_start, "bad addr");
692 assert(addr <= _region_end, "bad addr");
693 return (size_t(addr) & BlockAddrOffsetMask) >> LogHeapWordSize;
694}
695
696inline size_t
697ParallelCompactData::addr_to_block_idx(const HeapWord* addr) const
698{
699 assert(addr >= _region_start, "bad addr");
700 assert(addr <= _region_end, "bad addr");
701 return pointer_delta(addr, _region_start) >> Log2BlockSize;
702}
703
704inline ParallelCompactData::BlockData*
705ParallelCompactData::addr_to_block_ptr(const HeapWord* addr) const
706{
707 return block(addr_to_block_idx(addr));
708}
709
710inline HeapWord*
711ParallelCompactData::block_to_addr(size_t block) const
712{
713 assert(block < _block_count, "block out of range");
714 return _region_start + (block << Log2BlockSize);
715}
716
717inline size_t
718ParallelCompactData::region_to_block_idx(size_t region) const
719{
720 return region << Log2BlocksPerRegion;
721}
722
723inline HeapWord*
724ParallelCompactData::block_align_down(HeapWord* addr) const
725{
726 assert(addr >= _region_start, "bad addr");
727 assert(addr < _region_end + RegionSize, "bad addr");
728 return (HeapWord*)(size_t(addr) & BlockAddrMask);
729}
730
731inline HeapWord*
732ParallelCompactData::block_align_up(HeapWord* addr) const
733{
734 assert(addr >= _region_start, "bad addr");
735 assert(addr <= _region_end, "bad addr");
736 return block_align_down(addr + BlockSizeOffsetMask);
737}
738
739inline bool
740ParallelCompactData::is_block_aligned(HeapWord* addr) const
741{
742 return block_offset(addr) == 0;
743}
744
745// Abstract closure for use with ParMarkBitMap::iterate(), which will invoke the
746// do_addr() method.
747//
748// The closure is initialized with the number of heap words to process
749// (words_remaining()), and becomes 'full' when it reaches 0. The do_addr()
750// methods in subclasses should update the total as words are processed. Since
751// only one subclass actually uses this mechanism to terminate iteration, the
752// default initial value is > 0. The implementation is here and not in the
753// single subclass that uses it to avoid making is_full() virtual, and thus
754// adding a virtual call per live object.
755
756class ParMarkBitMapClosure: public StackObj {
757 public:
758 typedef ParMarkBitMap::idx_t idx_t;
759 typedef ParMarkBitMap::IterationStatus IterationStatus;
760
761 public:
762 inline ParMarkBitMapClosure(ParMarkBitMap* mbm, ParCompactionManager* cm,
763 size_t words = max_uintx);
764
765 inline ParCompactionManager* compaction_manager() const;
766 inline ParMarkBitMap* bitmap() const;
767 inline size_t words_remaining() const;
768 inline bool is_full() const;
769 inline HeapWord* source() const;
770
771 inline void set_source(HeapWord* addr);
772
773 virtual IterationStatus do_addr(HeapWord* addr, size_t words) = 0;
774
775 protected:
776 inline void decrement_words_remaining(size_t words);
777
778 private:
779 ParMarkBitMap* const _bitmap;
780 ParCompactionManager* const _compaction_manager;
781 DEBUG_ONLY(const size_t _initial_words_remaining;) // Useful in debugger.
782 size_t _words_remaining; // Words left to copy.
783
784 protected:
785 HeapWord* _source; // Next addr that would be read.
786};
787
788inline
789ParMarkBitMapClosure::ParMarkBitMapClosure(ParMarkBitMap* bitmap,
790 ParCompactionManager* cm,
791 size_t words):
792 _bitmap(bitmap), _compaction_manager(cm)
793#ifdef ASSERT
794 , _initial_words_remaining(words)
795#endif
796{
797 _words_remaining = words;
798 _source = NULL;
799}
800
801inline ParCompactionManager* ParMarkBitMapClosure::compaction_manager() const {
802 return _compaction_manager;
803}
804
805inline ParMarkBitMap* ParMarkBitMapClosure::bitmap() const {
806 return _bitmap;
807}
808
809inline size_t ParMarkBitMapClosure::words_remaining() const {
810 return _words_remaining;
811}
812
813inline bool ParMarkBitMapClosure::is_full() const {
814 return words_remaining() == 0;
815}
816
817inline HeapWord* ParMarkBitMapClosure::source() const {
818 return _source;
819}
820
821inline void ParMarkBitMapClosure::set_source(HeapWord* addr) {
822 _source = addr;
823}
824
825inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) {
826 assert(_words_remaining >= words, "processed too many words");
827 _words_remaining -= words;
828}
829
830// The UseParallelOldGC collector is a stop-the-world garbage collector that
831// does parts of the collection using parallel threads. The collection includes
832// the tenured generation and the young generation. The permanent generation is
833// collected at the same time as the other two generations but the permanent
834// generation is collect by a single GC thread. The permanent generation is
835// collected serially because of the requirement that during the processing of a
836// klass AAA, any objects reference by AAA must already have been processed.
837// This requirement is enforced by a left (lower address) to right (higher
838// address) sliding compaction.
839//
840// There are four phases of the collection.
841//
842// - marking phase
843// - summary phase
844// - compacting phase
845// - clean up phase
846//
847// Roughly speaking these phases correspond, respectively, to
848// - mark all the live objects
849// - calculate the destination of each object at the end of the collection
850// - move the objects to their destination
851// - update some references and reinitialize some variables
852//
853// These three phases are invoked in PSParallelCompact::invoke_no_policy(). The
854// marking phase is implemented in PSParallelCompact::marking_phase() and does a
855// complete marking of the heap. The summary phase is implemented in
856// PSParallelCompact::summary_phase(). The move and update phase is implemented
857// in PSParallelCompact::compact().
858//
859// A space that is being collected is divided into regions and with each region
860// is associated an object of type ParallelCompactData. Each region is of a
861// fixed size and typically will contain more than 1 object and may have parts
862// of objects at the front and back of the region.
863//
864// region -----+---------------------+----------
865// objects covered [ AAA )[ BBB )[ CCC )[ DDD )
866//
867// The marking phase does a complete marking of all live objects in the heap.
868// The marking also compiles the size of the data for all live objects covered
869// by the region. This size includes the part of any live object spanning onto
870// the region (part of AAA if it is live) from the front, all live objects
871// contained in the region (BBB and/or CCC if they are live), and the part of
872// any live objects covered by the region that extends off the region (part of
873// DDD if it is live). The marking phase uses multiple GC threads and marking
874// is done in a bit array of type ParMarkBitMap. The marking of the bit map is
875// done atomically as is the accumulation of the size of the live objects
876// covered by a region.
877//
878// The summary phase calculates the total live data to the left of each region
879// XXX. Based on that total and the bottom of the space, it can calculate the
880// starting location of the live data in XXX. The summary phase calculates for
881// each region XXX quantities such as
882//
883// - the amount of live data at the beginning of a region from an object
884// entering the region.
885// - the location of the first live data on the region
886// - a count of the number of regions receiving live data from XXX.
887//
888// See ParallelCompactData for precise details. The summary phase also
889// calculates the dense prefix for the compaction. The dense prefix is a
890// portion at the beginning of the space that is not moved. The objects in the
891// dense prefix do need to have their object references updated. See method
892// summarize_dense_prefix().
893//
894// The summary phase is done using 1 GC thread.
895//
896// The compaction phase moves objects to their new location and updates all
897// references in the object.
898//
899// A current exception is that objects that cross a region boundary are moved
900// but do not have their references updated. References are not updated because
901// it cannot easily be determined if the klass pointer KKK for the object AAA
902// has been updated. KKK likely resides in a region to the left of the region
903// containing AAA. These AAA's have there references updated at the end in a
904// clean up phase. See the method PSParallelCompact::update_deferred_objects().
905// An alternate strategy is being investigated for this deferral of updating.
906//
907// Compaction is done on a region basis. A region that is ready to be filled is
908// put on a ready list and GC threads take region off the list and fill them. A
909// region is ready to be filled if it empty of live objects. Such a region may
910// have been initially empty (only contained dead objects) or may have had all
911// its live objects copied out already. A region that compacts into itself is
912// also ready for filling. The ready list is initially filled with empty
913// regions and regions compacting into themselves. There is always at least 1
914// region that can be put on the ready list. The regions are atomically added
915// and removed from the ready list.
916
917class PSParallelCompact : AllStatic {
918 public:
919 // Convenient access to type names.
920 typedef ParMarkBitMap::idx_t idx_t;
921 typedef ParallelCompactData::RegionData RegionData;
922 typedef ParallelCompactData::BlockData BlockData;
923
924 typedef enum {
925 old_space_id, eden_space_id,
926 from_space_id, to_space_id, last_space_id
927 } SpaceId;
928
929 public:
930 // Inline closure decls
931 //
932 class IsAliveClosure: public BoolObjectClosure {
933 public:
934 virtual bool do_object_b(oop p);
935 };
936
937 friend class RefProcTaskProxy;
938 friend class PSParallelCompactTest;
939
940 private:
941 static STWGCTimer _gc_timer;
942 static ParallelOldTracer _gc_tracer;
943 static elapsedTimer _accumulated_time;
944 static unsigned int _total_invocations;
945 static unsigned int _maximum_compaction_gc_num;
946 static jlong _time_of_last_gc; // ms
947 static CollectorCounters* _counters;
948 static ParMarkBitMap _mark_bitmap;
949 static ParallelCompactData _summary_data;
950 static IsAliveClosure _is_alive_closure;
951 static SpaceInfo _space_info[last_space_id];
952
953 // Reference processing (used in ...follow_contents)
954 static SpanSubjectToDiscoveryClosure _span_based_discoverer;
955 static ReferenceProcessor* _ref_processor;
956
957 // Values computed at initialization and used by dead_wood_limiter().
958 static double _dwl_mean;
959 static double _dwl_std_dev;
960 static double _dwl_first_term;
961 static double _dwl_adjustment;
962#ifdef ASSERT
963 static bool _dwl_initialized;
964#endif // #ifdef ASSERT
965
966 public:
967 static ParallelOldTracer* gc_tracer() { return &_gc_tracer; }
968
969 private:
970
971 static void initialize_space_info();
972
973 // Clear the marking bitmap and summary data that cover the specified space.
974 static void clear_data_covering_space(SpaceId id);
975
976 static void pre_compact();
977 static void post_compact();
978
979 // Mark live objects
980 static void marking_phase(ParCompactionManager* cm,
981 bool maximum_heap_compaction,
982 ParallelOldTracer *gc_tracer);
983
984 // Compute the dense prefix for the designated space. This is an experimental
985 // implementation currently not used in production.
986 static HeapWord* compute_dense_prefix_via_density(const SpaceId id,
987 bool maximum_compaction);
988
989 // Methods used to compute the dense prefix.
990
991 // Compute the value of the normal distribution at x = density. The mean and
992 // standard deviation are values saved by initialize_dead_wood_limiter().
993 static inline double normal_distribution(double density);
994
995 // Initialize the static vars used by dead_wood_limiter().
996 static void initialize_dead_wood_limiter();
997
998 // Return the percentage of space that can be treated as "dead wood" (i.e.,
999 // not reclaimed).
1000 static double dead_wood_limiter(double density, size_t min_percent);
1001
1002 // Find the first (left-most) region in the range [beg, end) that has at least
1003 // dead_words of dead space to the left. The argument beg must be the first
1004 // region in the space that is not completely live.
1005 static RegionData* dead_wood_limit_region(const RegionData* beg,
1006 const RegionData* end,
1007 size_t dead_words);
1008
1009 // Return a pointer to the first region in the range [beg, end) that is not
1010 // completely full.
1011 static RegionData* first_dead_space_region(const RegionData* beg,
1012 const RegionData* end);
1013
1014 // Return a value indicating the benefit or 'yield' if the compacted region
1015 // were to start (or equivalently if the dense prefix were to end) at the
1016 // candidate region. Higher values are better.
1017 //
1018 // The value is based on the amount of space reclaimed vs. the costs of (a)
1019 // updating references in the dense prefix plus (b) copying objects and
1020 // updating references in the compacted region.
1021 static inline double reclaimed_ratio(const RegionData* const candidate,
1022 HeapWord* const bottom,
1023 HeapWord* const top,
1024 HeapWord* const new_top);
1025
1026 // Compute the dense prefix for the designated space.
1027 static HeapWord* compute_dense_prefix(const SpaceId id,
1028 bool maximum_compaction);
1029
1030 // Return true if dead space crosses onto the specified Region; bit must be
1031 // the bit index corresponding to the first word of the Region.
1032 static inline bool dead_space_crosses_boundary(const RegionData* region,
1033 idx_t bit);
1034
1035 // Summary phase utility routine to fill dead space (if any) at the dense
1036 // prefix boundary. Should only be called if the the dense prefix is
1037 // non-empty.
1038 static void fill_dense_prefix_end(SpaceId id);
1039
1040 static void summarize_spaces_quick();
1041 static void summarize_space(SpaceId id, bool maximum_compaction);
1042 static void summary_phase(ParCompactionManager* cm, bool maximum_compaction);
1043
1044 // Adjust addresses in roots. Does not adjust addresses in heap.
1045 static void adjust_roots(ParCompactionManager* cm);
1046
1047 DEBUG_ONLY(static void write_block_fill_histogram();)
1048
1049 // Move objects to new locations.
1050 static void compact_perm(ParCompactionManager* cm);
1051 static void compact();
1052
1053 // Add available regions to the stack and draining tasks to the task queue.
1054 static void prepare_region_draining_tasks(GCTaskQueue* q,
1055 uint parallel_gc_threads);
1056
1057 // Add dense prefix update tasks to the task queue.
1058 static void enqueue_dense_prefix_tasks(GCTaskQueue* q,
1059 uint parallel_gc_threads);
1060
1061 // Add region stealing tasks to the task queue.
1062 static void enqueue_region_stealing_tasks(
1063 GCTaskQueue* q,
1064 ParallelTaskTerminator* terminator_ptr,
1065 uint parallel_gc_threads);
1066
1067 // If objects are left in eden after a collection, try to move the boundary
1068 // and absorb them into the old gen. Returns true if eden was emptied.
1069 static bool absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
1070 PSYoungGen* young_gen,
1071 PSOldGen* old_gen);
1072
1073 // Reset time since last full gc
1074 static void reset_millis_since_last_gc();
1075
1076#ifndef PRODUCT
1077 // Print generic summary data
1078 static void print_generic_summary_data(ParallelCompactData& summary_data,
1079 HeapWord* const beg_addr,
1080 HeapWord* const end_addr);
1081#endif // #ifndef PRODUCT
1082
1083 public:
1084
1085 PSParallelCompact();
1086
1087 static void invoke(bool maximum_heap_compaction);
1088 static bool invoke_no_policy(bool maximum_heap_compaction);
1089
1090 static void post_initialize();
1091 // Perform initialization for PSParallelCompact that requires
1092 // allocations. This should be called during the VM initialization
1093 // at a pointer where it would be appropriate to return a JNI_ENOMEM
1094 // in the event of a failure.
1095 static bool initialize();
1096
1097 // Closure accessors
1098 static BoolObjectClosure* is_alive_closure() { return (BoolObjectClosure*)&_is_alive_closure; }
1099
1100 // Public accessors
1101 static elapsedTimer* accumulated_time() { return &_accumulated_time; }
1102 static unsigned int total_invocations() { return _total_invocations; }
1103 static CollectorCounters* counters() { return _counters; }
1104
1105 // Used to add tasks
1106 static GCTaskManager* const gc_task_manager();
1107
1108 // Marking support
1109 static inline bool mark_obj(oop obj);
1110 static inline bool is_marked(oop obj);
1111
1112 template <class T> static inline void adjust_pointer(T* p, ParCompactionManager* cm);
1113
1114 // Compaction support.
1115 // Return true if p is in the range [beg_addr, end_addr).
1116 static inline bool is_in(HeapWord* p, HeapWord* beg_addr, HeapWord* end_addr);
1117 static inline bool is_in(oop* p, HeapWord* beg_addr, HeapWord* end_addr);
1118
1119 // Convenience wrappers for per-space data kept in _space_info.
1120 static inline MutableSpace* space(SpaceId space_id);
1121 static inline HeapWord* new_top(SpaceId space_id);
1122 static inline HeapWord* dense_prefix(SpaceId space_id);
1123 static inline ObjectStartArray* start_array(SpaceId space_id);
1124
1125 // Move and update the live objects in the specified space.
1126 static void move_and_update(ParCompactionManager* cm, SpaceId space_id);
1127
1128 // Process the end of the given region range in the dense prefix.
1129 // This includes saving any object not updated.
1130 static void dense_prefix_regions_epilogue(ParCompactionManager* cm,
1131 size_t region_start_index,
1132 size_t region_end_index,
1133 idx_t exiting_object_offset,
1134 idx_t region_offset_start,
1135 idx_t region_offset_end);
1136
1137 // Update a region in the dense prefix. For each live object
1138 // in the region, update it's interior references. For each
1139 // dead object, fill it with deadwood. Dead space at the end
1140 // of a region range will be filled to the start of the next
1141 // live object regardless of the region_index_end. None of the
1142 // objects in the dense prefix move and dead space is dead
1143 // (holds only dead objects that don't need any processing), so
1144 // dead space can be filled in any order.
1145 static void update_and_deadwood_in_dense_prefix(ParCompactionManager* cm,
1146 SpaceId space_id,
1147 size_t region_index_start,
1148 size_t region_index_end);
1149
1150 // Return the address of the count + 1st live word in the range [beg, end).
1151 static HeapWord* skip_live_words(HeapWord* beg, HeapWord* end, size_t count);
1152
1153 // Return the address of the word to be copied to dest_addr, which must be
1154 // aligned to a region boundary.
1155 static HeapWord* first_src_addr(HeapWord* const dest_addr,
1156 SpaceId src_space_id,
1157 size_t src_region_idx);
1158
1159 // Determine the next source region, set closure.source() to the start of the
1160 // new region return the region index. Parameter end_addr is the address one
1161 // beyond the end of source range just processed. If necessary, switch to a
1162 // new source space and set src_space_id (in-out parameter) and src_space_top
1163 // (out parameter) accordingly.
1164 static size_t next_src_region(MoveAndUpdateClosure& closure,
1165 SpaceId& src_space_id,
1166 HeapWord*& src_space_top,
1167 HeapWord* end_addr);
1168
1169 // Decrement the destination count for each non-empty source region in the
1170 // range [beg_region, region(region_align_up(end_addr))). If the destination
1171 // count for a region goes to 0 and it needs to be filled, enqueue it.
1172 static void decrement_destination_counts(ParCompactionManager* cm,
1173 SpaceId src_space_id,
1174 size_t beg_region,
1175 HeapWord* end_addr);
1176
1177 // Fill a region, copying objects from one or more source regions.
1178 static void fill_region(ParCompactionManager* cm, size_t region_idx);
1179 static void fill_and_update_region(ParCompactionManager* cm, size_t region) {
1180 fill_region(cm, region);
1181 }
1182
1183 // Fill in the block table for the specified region.
1184 static void fill_blocks(size_t region_idx);
1185
1186 // Update the deferred objects in the space.
1187 static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);
1188
1189 static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
1190 static ParallelCompactData& summary_data() { return _summary_data; }
1191
1192 // Reference Processing
1193 static ReferenceProcessor* const ref_processor() { return _ref_processor; }
1194
1195 static STWGCTimer* gc_timer() { return &_gc_timer; }
1196
1197 // Return the SpaceId for the given address.
1198 static SpaceId space_id(HeapWord* addr);
1199
1200 // Time since last full gc (in milliseconds).
1201 static jlong millis_since_last_gc();
1202
1203 static void print_on_error(outputStream* st);
1204
1205#ifndef PRODUCT
1206 // Debugging support.
1207 static const char* space_names[last_space_id];
1208 static void print_region_ranges();
1209 static void print_dense_prefix_stats(const char* const algorithm,
1210 const SpaceId id,
1211 const bool maximum_compaction,
1212 HeapWord* const addr);
1213 static void summary_phase_msg(SpaceId dst_space_id,
1214 HeapWord* dst_beg, HeapWord* dst_end,
1215 SpaceId src_space_id,
1216 HeapWord* src_beg, HeapWord* src_end);
1217#endif // #ifndef PRODUCT
1218
1219#ifdef ASSERT
1220 // Sanity check the new location of a word in the heap.
1221 static inline void check_new_location(HeapWord* old_addr, HeapWord* new_addr);
1222 // Verify that all the regions have been emptied.
1223 static void verify_complete(SpaceId space_id);
1224#endif // #ifdef ASSERT
1225};
1226
1227class MoveAndUpdateClosure: public ParMarkBitMapClosure {
1228 public:
1229 inline MoveAndUpdateClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm,
1230 ObjectStartArray* start_array,
1231 HeapWord* destination, size_t words);
1232
1233 // Accessors.
1234 HeapWord* destination() const { return _destination; }
1235
1236 // If the object will fit (size <= words_remaining()), copy it to the current
1237 // destination, update the interior oops and the start array and return either
1238 // full (if the closure is full) or incomplete. If the object will not fit,
1239 // return would_overflow.
1240 virtual IterationStatus do_addr(HeapWord* addr, size_t size);
1241
1242 // Copy enough words to fill this closure, starting at source(). Interior
1243 // oops and the start array are not updated. Return full.
1244 IterationStatus copy_until_full();
1245
1246 // Copy enough words to fill this closure or to the end of an object,
1247 // whichever is smaller, starting at source(). Interior oops and the start
1248 // array are not updated.
1249 void copy_partial_obj();
1250
1251 protected:
1252 // Update variables to indicate that word_count words were processed.
1253 inline void update_state(size_t word_count);
1254
1255 protected:
1256 ObjectStartArray* const _start_array;
1257 HeapWord* _destination; // Next addr to be written.
1258};
1259
1260inline
1261MoveAndUpdateClosure::MoveAndUpdateClosure(ParMarkBitMap* bitmap,
1262 ParCompactionManager* cm,
1263 ObjectStartArray* start_array,
1264 HeapWord* destination,
1265 size_t words) :
1266 ParMarkBitMapClosure(bitmap, cm, words), _start_array(start_array)
1267{
1268 _destination = destination;
1269}
1270
1271inline void MoveAndUpdateClosure::update_state(size_t words)
1272{
1273 decrement_words_remaining(words);
1274 _source += words;
1275 _destination += words;
1276}
1277
1278class UpdateOnlyClosure: public ParMarkBitMapClosure {
1279 private:
1280 const PSParallelCompact::SpaceId _space_id;
1281 ObjectStartArray* const _start_array;
1282
1283 public:
1284 UpdateOnlyClosure(ParMarkBitMap* mbm,
1285 ParCompactionManager* cm,
1286 PSParallelCompact::SpaceId space_id);
1287
1288 // Update the object.
1289 virtual IterationStatus do_addr(HeapWord* addr, size_t words);
1290
1291 inline void do_addr(HeapWord* addr);
1292};
1293
1294class FillClosure: public ParMarkBitMapClosure {
1295 public:
1296 FillClosure(ParCompactionManager* cm, PSParallelCompact::SpaceId space_id);
1297
1298 virtual IterationStatus do_addr(HeapWord* addr, size_t size);
1299
1300 private:
1301 ObjectStartArray* const _start_array;
1302};
1303
1304#endif // SHARE_GC_PARALLEL_PSPARALLELCOMPACT_HPP
1305