| 1 | /* |
| 2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef SHARE_CLASSFILE_COMPACTHASHTABLE_HPP |
| 26 | #define SHARE_CLASSFILE_COMPACTHASHTABLE_HPP |
| 27 | |
| 28 | #include "oops/array.hpp" |
| 29 | #include "oops/symbol.hpp" |
| 30 | #include "utilities/growableArray.hpp" |
| 31 | |
| 32 | |
| 33 | template < |
| 34 | typename K, |
| 35 | typename V, |
| 36 | V (*DECODE)(address base_address, u4 offset), |
| 37 | bool (*EQUALS)(V value, K key, int len) |
| 38 | > |
| 39 | class CompactHashtable; |
| 40 | class NumberSeq; |
| 41 | class SimpleCompactHashtable; |
| 42 | class SerializeClosure; |
| 43 | |
| 44 | // Stats for symbol tables in the CDS archive |
| 45 | class CompactHashtableStats { |
| 46 | public: |
| 47 | int hashentry_count; |
| 48 | int hashentry_bytes; |
| 49 | int bucket_count; |
| 50 | int bucket_bytes; |
| 51 | }; |
| 52 | |
| 53 | #if INCLUDE_CDS |
| 54 | ///////////////////////////////////////////////////////////////////////// |
| 55 | // |
| 56 | // The compact hash table writer. Used at dump time for writing out |
| 57 | // the compact table to the shared archive. |
| 58 | // |
| 59 | // At dump time, the CompactHashtableWriter obtains all entries from the |
| 60 | // symbol/string table and adds them to a new temporary hash table. The hash |
| 61 | // table size (number of buckets) is calculated using |
| 62 | // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket |
| 63 | // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option. |
| 64 | // 4 is chosen because it produces smaller sized bucket on average for |
| 65 | // faster lookup. It also has relatively small number of empty buckets and |
| 66 | // good distribution of the entries. |
| 67 | // |
| 68 | // We use a simple hash function (hash % num_bucket) for the table. |
| 69 | // The new table is compacted when written out. Please see comments |
| 70 | // above the CompactHashtable class for the table layout detail. The bucket |
| 71 | // offsets are written to the archive as part of the compact table. The |
| 72 | // bucket offset is encoded in the low 30-bit (0-29) and the bucket type |
| 73 | // (regular or compact) are encoded in bit[31, 30]. For buckets with more |
| 74 | // than one entry, both hash and entry offset are written to the |
| 75 | // table. For buckets with only one entry, only the entry offset is written |
| 76 | // to the table and the buckets are tagged as compact in their type bits. |
| 77 | // Buckets without entry are skipped from the table. Their offsets are |
| 78 | // still written out for faster lookup. |
| 79 | // |
| 80 | class CompactHashtableWriter: public StackObj { |
| 81 | public: |
| 82 | class Entry { |
| 83 | unsigned int _hash; |
| 84 | u4 _value; |
| 85 | |
| 86 | public: |
| 87 | Entry() {} |
| 88 | Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {} |
| 89 | |
| 90 | u4 value() { |
| 91 | return _value; |
| 92 | } |
| 93 | unsigned int hash() { |
| 94 | return _hash; |
| 95 | } |
| 96 | |
| 97 | bool operator==(const CompactHashtableWriter::Entry& other) { |
| 98 | return (_value == other._value && _hash == other._hash); |
| 99 | } |
| 100 | }; // class CompactHashtableWriter::Entry |
| 101 | |
| 102 | private: |
| 103 | int _num_entries_written; |
| 104 | int _num_buckets; |
| 105 | int _num_empty_buckets; |
| 106 | int _num_value_only_buckets; |
| 107 | int _num_other_buckets; |
| 108 | GrowableArray<Entry>** _buckets; |
| 109 | CompactHashtableStats* _stats; |
| 110 | Array<u4>* _compact_buckets; |
| 111 | Array<u4>* _compact_entries; |
| 112 | |
| 113 | public: |
| 114 | // This is called at dump-time only |
| 115 | CompactHashtableWriter(int num_entries, CompactHashtableStats* stats); |
| 116 | ~CompactHashtableWriter(); |
| 117 | |
| 118 | void add(unsigned int hash, u4 value); |
| 119 | |
| 120 | private: |
| 121 | void allocate_table(); |
| 122 | void dump_table(NumberSeq* summary); |
| 123 | static int calculate_num_buckets(int num_entries) { |
| 124 | int num_buckets = num_entries / SharedSymbolTableBucketSize; |
| 125 | // calculation of num_buckets can result in zero buckets, we need at least one |
| 126 | return (num_buckets < 1) ? 1 : num_buckets; |
| 127 | } |
| 128 | |
| 129 | public: |
| 130 | void dump(SimpleCompactHashtable *cht, const char* table_name); |
| 131 | |
| 132 | static size_t estimate_size(int num_entries); |
| 133 | }; |
| 134 | #endif // INCLUDE_CDS |
| 135 | |
| 136 | #define REGULAR_BUCKET_TYPE 0 |
| 137 | #define VALUE_ONLY_BUCKET_TYPE 1 |
| 138 | #define TABLEEND_BUCKET_TYPE 3 |
| 139 | #define BUCKET_OFFSET_MASK 0x3FFFFFFF |
| 140 | #define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK) |
| 141 | #define BUCKET_TYPE_SHIFT 30 |
| 142 | #define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT) |
| 143 | #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK)) |
| 144 | |
| 145 | ///////////////////////////////////////////////////////////////////////////// |
| 146 | // |
| 147 | // CompactHashtable is used to store the CDS archive's symbol/string tables. |
| 148 | // |
| 149 | // Because these tables are read-only (no entries can be added/deleted) at run-time |
| 150 | // and tend to have large number of entries, we try to minimize the footprint |
| 151 | // cost per entry. |
| 152 | // |
| 153 | // The CompactHashtable is split into two arrays |
| 154 | // |
| 155 | // u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset |
| 156 | // u4 entries[<variable size>] |
| 157 | // |
| 158 | // The size of buckets[] is 'num_buckets + 1'. Each entry of |
| 159 | // buckets[] is a 32-bit encoding of the bucket type and bucket offset, |
| 160 | // with the type in the left-most 2-bit and offset in the remaining 30-bit. |
| 161 | // The last entry is a special type. It contains the end of the last |
| 162 | // bucket. |
| 163 | // |
| 164 | // There are two types of buckets, regular buckets and value_only buckets. The |
| 165 | // value_only buckets have '01' in their highest 2-bit, and regular buckets have |
| 166 | // '00' in their highest 2-bit. |
| 167 | // |
| 168 | // For normal buckets, each entry is 8 bytes in the entries[]: |
| 169 | // u4 hash; /* symbol/string hash */ |
| 170 | // union { |
| 171 | // u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */ |
| 172 | // narrowOop str; /* String narrowOop encoding */ |
| 173 | // } |
| 174 | // |
| 175 | // |
| 176 | // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[]. |
| 177 | // |
| 178 | // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code |
| 179 | // is skipped. |
| 180 | // buckets[0, 4, 5, ....] |
| 181 | // | | | |
| 182 | // | | +---+ |
| 183 | // | | | |
| 184 | // | +----+ | |
| 185 | // v v v |
| 186 | // entries[H,O,H,O,O,H,O,H,O.....] |
| 187 | // |
| 188 | // See CompactHashtable::lookup() for how the table is searched at runtime. |
| 189 | // See CompactHashtableWriter::dump() for how the table is written at CDS |
| 190 | // dump time. |
| 191 | // |
| 192 | class SimpleCompactHashtable { |
| 193 | protected: |
| 194 | address _base_address; |
| 195 | u4 _bucket_count; |
| 196 | u4 _entry_count; |
| 197 | u4* _buckets; |
| 198 | u4* _entries; |
| 199 | |
| 200 | public: |
| 201 | SimpleCompactHashtable() { |
| 202 | _entry_count = 0; |
| 203 | _bucket_count = 0; |
| 204 | _buckets = 0; |
| 205 | _entries = 0; |
| 206 | } |
| 207 | |
| 208 | void reset() { |
| 209 | _bucket_count = 0; |
| 210 | _entry_count = 0; |
| 211 | _buckets = 0; |
| 212 | _entries = 0; |
| 213 | } |
| 214 | |
| 215 | void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries); |
| 216 | |
| 217 | // Read/Write the table's header from/to the CDS archive |
| 218 | void (SerializeClosure* soc) NOT_CDS_RETURN; |
| 219 | |
| 220 | inline bool empty() { |
| 221 | return (_entry_count == 0); |
| 222 | } |
| 223 | |
| 224 | static size_t (); |
| 225 | }; |
| 226 | |
| 227 | template < |
| 228 | typename K, |
| 229 | typename V, |
| 230 | V (*DECODE)(address base_address, u4 offset), |
| 231 | bool (*EQUALS)(V value, K key, int len) |
| 232 | > |
| 233 | class CompactHashtable : public SimpleCompactHashtable { |
| 234 | friend class VMStructs; |
| 235 | |
| 236 | V decode(u4 offset) const { |
| 237 | return DECODE(_base_address, offset); |
| 238 | } |
| 239 | |
| 240 | public: |
| 241 | // Lookup a value V from the compact table using key K |
| 242 | inline V lookup(K key, unsigned int hash, int len) const { |
| 243 | if (_entry_count > 0) { |
| 244 | int index = hash % _bucket_count; |
| 245 | u4 bucket_info = _buckets[index]; |
| 246 | u4 bucket_offset = BUCKET_OFFSET(bucket_info); |
| 247 | int bucket_type = BUCKET_TYPE(bucket_info); |
| 248 | u4* entry = _entries + bucket_offset; |
| 249 | |
| 250 | if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { |
| 251 | V value = decode(entry[0]); |
| 252 | if (EQUALS(value, key, len)) { |
| 253 | return value; |
| 254 | } |
| 255 | } else { |
| 256 | // This is a regular bucket, which has more than one |
| 257 | // entries. Each entry is a pair of entry (hash, offset). |
| 258 | // Seek until the end of the bucket. |
| 259 | u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]); |
| 260 | while (entry < entry_max) { |
| 261 | unsigned int h = (unsigned int)(entry[0]); |
| 262 | if (h == hash) { |
| 263 | V value = decode(entry[1]); |
| 264 | if (EQUALS(value, key, len)) { |
| 265 | return value; |
| 266 | } |
| 267 | } |
| 268 | entry += 2; |
| 269 | } |
| 270 | } |
| 271 | } |
| 272 | return NULL; |
| 273 | } |
| 274 | |
| 275 | template <class ITER> |
| 276 | inline void iterate(ITER* iter) const { |
| 277 | for (u4 i = 0; i < _bucket_count; i++) { |
| 278 | u4 bucket_info = _buckets[i]; |
| 279 | u4 bucket_offset = BUCKET_OFFSET(bucket_info); |
| 280 | int bucket_type = BUCKET_TYPE(bucket_info); |
| 281 | u4* entry = _entries + bucket_offset; |
| 282 | |
| 283 | if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { |
| 284 | iter->do_value(decode(entry[0])); |
| 285 | } else { |
| 286 | u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]); |
| 287 | while (entry < entry_max) { |
| 288 | iter->do_value(decode(entry[1])); |
| 289 | entry += 2; |
| 290 | } |
| 291 | } |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | void print_table_statistics(outputStream* st, const char* name) { |
| 296 | st->print_cr("%s statistics:" , name); |
| 297 | int total_entries = 0; |
| 298 | int max_bucket = 0; |
| 299 | for (u4 i = 0; i < _bucket_count; i++) { |
| 300 | u4 bucket_info = _buckets[i]; |
| 301 | int bucket_type = BUCKET_TYPE(bucket_info); |
| 302 | int bucket_size; |
| 303 | |
| 304 | if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { |
| 305 | bucket_size = 1; |
| 306 | } else { |
| 307 | bucket_size = (BUCKET_OFFSET(_buckets[i + 1]) - BUCKET_OFFSET(bucket_info)) / 2; |
| 308 | } |
| 309 | total_entries += bucket_size; |
| 310 | if (max_bucket < bucket_size) { |
| 311 | max_bucket = bucket_size; |
| 312 | } |
| 313 | } |
| 314 | st->print_cr("Number of buckets : %9d" , _bucket_count); |
| 315 | st->print_cr("Number of entries : %9d" , total_entries); |
| 316 | st->print_cr("Maximum bucket size : %9d" , max_bucket); |
| 317 | } |
| 318 | }; |
| 319 | |
| 320 | //////////////////////////////////////////////////////////////////////// |
| 321 | // |
| 322 | // OffsetCompactHashtable -- This is used to store many types of objects |
| 323 | // in the CDS archive. On 64-bit platforms, we save space by using a 32-bit |
| 324 | // offset from the CDS base address. |
| 325 | |
| 326 | template <typename V> |
| 327 | inline V read_value_from_compact_hashtable(address base_address, u4 offset) { |
| 328 | return (V)(base_address + offset); |
| 329 | } |
| 330 | |
| 331 | template < |
| 332 | typename K, |
| 333 | typename V, |
| 334 | bool (*EQUALS)(V value, K key, int len) |
| 335 | > |
| 336 | class OffsetCompactHashtable : public CompactHashtable< |
| 337 | K, V, read_value_from_compact_hashtable<V>, EQUALS> { |
| 338 | }; |
| 339 | |
| 340 | |
| 341 | //////////////////////////////////////////////////////////////////////// |
| 342 | // |
| 343 | // Read/Write the contents of a hashtable textual dump (created by |
| 344 | // SymbolTable::dump and StringTable::dump). |
| 345 | // Because the dump file may be big (hundred of MB in extreme cases), |
| 346 | // we use mmap for fast access when reading it. |
| 347 | // |
| 348 | class HashtableTextDump { |
| 349 | int _fd; |
| 350 | const char* _base; |
| 351 | const char* _p; |
| 352 | const char* _end; |
| 353 | const char* _filename; |
| 354 | size_t _size; |
| 355 | int _prefix_type; |
| 356 | int _line_no; |
| 357 | public: |
| 358 | HashtableTextDump(const char* filename); |
| 359 | ~HashtableTextDump(); |
| 360 | |
| 361 | enum { |
| 362 | SymbolPrefix = 1 << 0, |
| 363 | StringPrefix = 1 << 1, |
| 364 | Unknown = 1 << 2 |
| 365 | }; |
| 366 | |
| 367 | void quit(const char* err, const char* msg); |
| 368 | |
| 369 | inline int remain() { |
| 370 | return (int)(_end - _p); |
| 371 | } |
| 372 | int last_line_no() { |
| 373 | return _line_no - 1; |
| 374 | } |
| 375 | |
| 376 | void corrupted(const char *p, const char *msg); |
| 377 | |
| 378 | inline void corrupted_if(bool cond, const char *msg) { |
| 379 | if (cond) { |
| 380 | corrupted(_p, msg); |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | bool skip_newline(); |
| 385 | int skip(char must_be_char); |
| 386 | void skip_past(char c); |
| 387 | void check_version(const char* ver); |
| 388 | |
| 389 | inline void get_num(char delim, int *num) { |
| 390 | const char* p = _p; |
| 391 | const char* end = _end; |
| 392 | u8 n = 0; |
| 393 | |
| 394 | while (p < end) { |
| 395 | char c = *p++; |
| 396 | if ('0' <= c && c <= '9') { |
| 397 | n = n * 10 + (c - '0'); |
| 398 | if (n > (u8)INT_MAX) { |
| 399 | corrupted(_p, "Num overflow" ); |
| 400 | } |
| 401 | } else if (c == delim) { |
| 402 | _p = p; |
| 403 | *num = (int)n; |
| 404 | return; |
| 405 | } else { |
| 406 | // Not [0-9], not 'delim' |
| 407 | corrupted(_p, "Unrecognized format" );; |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | corrupted(_end, "Incorrect format" ); |
| 412 | ShouldNotReachHere(); |
| 413 | } |
| 414 | |
| 415 | void scan_prefix_type(); |
| 416 | int scan_prefix(int* utf8_length); |
| 417 | int scan_string_prefix(); |
| 418 | int scan_symbol_prefix(); |
| 419 | |
| 420 | jchar unescape(const char* from, const char* end, int count); |
| 421 | void get_utf8(char* utf8_buffer, int utf8_length); |
| 422 | static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length); |
| 423 | }; |
| 424 | |
| 425 | #endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP |
| 426 | |