compactHashtable.cpp (original) (raw)

1 /* 2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 / 24 25 #include "precompiled.hpp" 26 #include "classfile/javaClasses.hpp" 27 #include "memory/metaspaceShared.hpp" 28 #include "prims/jvm.h" 29 #include "utilities/numberSeq.hpp" 30 #include <sys/stat.h> 31 32 ///////////////////////////////////////////////////// 33 // 34 // The compact hash table writer implementations 35 // 36 CompactHashtableWriter::CompactHashtableWriter(int table_type, 37 int num_entries, 38 CompactHashtableStats stats) { 39 assert(DumpSharedSpaces, "dump-time only"); 40 _type = table_type; 41 _num_entries = num_entries; 42 _num_buckets = number_of_buckets(_num_entries); 43 _buckets = NEW_C_HEAP_ARRAY(Entry*, _num_buckets, mtSymbol); 44 memset(_buckets, 0, sizeof(Entry*) * _num_buckets); 45 46 /* bucket sizes table / 47 _bucket_sizes = NEW_C_HEAP_ARRAY(juint, _num_buckets, mtSymbol); 48 memset(_bucket_sizes, 0, sizeof(juint) * _num_buckets); 49 50 stats->hashentry_count = _num_entries; 51 // Compact buckets' entries will have only the 4-byte offset, but 52 // we don't know how many there will be at this point. So use a 53 // conservative estimate here. The size is adjusted later when we 54 // write out the buckets. 55 stats->hashentry_bytes = _num_entries * 8; 56 stats->bucket_count = _num_buckets; 57 stats->bucket_bytes = (_num_buckets + 1) * (sizeof(juint)); 58 _stats = stats; 59 60 // See compactHashtable.hpp for table layout 61 _required_bytes = sizeof(juint) * 2; // _base_address, written as 2 juints 62 _required_bytes+= sizeof(juint) + // num_entries 63 sizeof(juint) + // num_buckets 64 stats->hashentry_bytes + 65 stats->bucket_bytes; 66 } 67 68 CompactHashtableWriter::~CompactHashtableWriter() { 69 for (int index = 0; index < _num_buckets; index++) { 70 Entry* next = NULL; 71 for (Entry* tent = _buckets[index]; tent; tent = next) { 72 next = tent->next(); 73 delete tent; 74 } 75 } 76 77 FREE_C_HEAP_ARRAY(juint, _bucket_sizes); 78 FREE_C_HEAP_ARRAY(Entry, _buckets); 79 } 80 81 // Calculate the number of buckets in the temporary hash table 82 int CompactHashtableWriter::number_of_buckets(int num_entries) { 83 const int buksize = (int)SharedSymbolTableBucketSize; 84 int num_buckets = (num_entries + buksize - 1) / buksize; 85 num_buckets = (num_buckets + 1) & (0x01); 86 87 return num_buckets; 88 } 89 90 // Add a symbol entry to the temporary hash table 91 void CompactHashtableWriter::add(unsigned int hash, Entry* entry) { 92 int index = hash % _num_buckets; 93 entry->set_next(_buckets[index]); 94 _buckets[index] = entry; 95 _bucket_sizes[index] ++; 96 } 97 98 // Write the compact table's bucket infos 99 juint* CompactHashtableWriter::dump_table(juint* p, juint** first_bucket, 100 NumberSeq* summary) { 101 int index; 102 juint* compact_table = p; 103 // Compute the start of the buckets, include the compact_bucket_infos table 104 // and the table end offset. 105 juint offset = _num_buckets + 1; 106 first_bucket = compact_table + offset; 107 108 for (index = 0; index < _num_buckets; index++) { 109 int bucket_size = _bucket_sizes[index]; 110 if (bucket_size == 1) { 111 // bucket with one entry is compacted and only has the symbol offset 112 compact_table[index] = BUCKET_INFO(offset, COMPACT_BUCKET_TYPE); 113 offset += bucket_size; // each entry contains symbol offset only 114 } else { 115 // regular bucket, each entry is a symbol (hash, offset) pair 116 compact_table[index] = BUCKET_INFO(offset, REGULAR_BUCKET_TYPE); 117 offset += bucket_size * 2; // each hash entry is 2 juints 118 } 119 if (offset & ~BUCKET_OFFSET_MASK) { 120 vm_exit_during_initialization("CompactHashtableWriter::dump_table: Overflow! " 121 "Too many symbols."); 122 } 123 summary->add(bucket_size); 124 } 125 // Mark the end of the table 126 compact_table[_num_buckets] = BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE); 127 128 return compact_table; 129 } 130 131 // Write the compact table's entries 132 juint CompactHashtableWriter::dump_buckets(juint* compact_table, juint* p, 133 NumberSeq* summary) { 134 uintx base_address = 0; 135 uintx max_delta = 0; 136 int num_compact_buckets = 0; 137 if (_type == CompactHashtable<Symbol*, char>::_symbol_table) { 138 base_address = uintx(MetaspaceShared::shared_rs()->base()); 139 max_delta = uintx(MetaspaceShared::shared_rs()->size()); 140 assert(max_delta <= 0x7fffffff, "range check"); 141 } else { 142 assert((_type == CompactHashtable<oop, char>::_string_table), "unknown table"); 143 assert(UseCompressedOops, "UseCompressedOops is required"); 144 } 145 146 assert(p != NULL, "sanity"); 147 for (int index = 0; index < _num_buckets; index++) { 148 juint count = 0; 149 int bucket_size = _bucket_sizes[index]; 150 int bucket_type = BUCKET_TYPE(compact_table[index]); 151 152 if (bucket_size == 1) { 153 assert(bucket_type == COMPACT_BUCKET_TYPE, "Bad bucket type"); 154 num_compact_buckets ++; 155 } 156 for (Entry* tent = _buckets[index]; tent; 157 tent = tent->next()) { 158 if (bucket_type == REGULAR_BUCKET_TYPE) { 159 p++ = juint(tent->hash()); // write entry hash 160 } 161 if (_type == CompactHashtable<Symbol*, char>::_symbol_table) { 162 uintx deltax = uintx(tent->value()) - base_address; 163 assert(deltax < max_delta, "range check"); 164 juint delta = juint(deltax); 165 *p++ = delta; // write entry offset 166 } else { 167 *p++ = oopDesc::encode_heap_oop(tent->string()); 168 } 169 count ++; 170 } 171 assert(count == _bucket_sizes[index], "sanity"); 172 } 173 174 // Adjust the hashentry_bytes in CompactHashtableStats. Each compact 175 // bucket saves 4-byte. 176 _stats->hashentry_bytes -= num_compact_buckets * 4; 177 178 return p; 179 } 180 181 // Write the compact table 182 void CompactHashtableWriter::dump(char* top, char* end) { 183 NumberSeq summary; 184 char* old_top = top; 185 juint p = (juint*)(top); 186 187 uintx base_address = uintx(MetaspaceShared::shared_rs()->base()); 188 189 // Now write the following at the beginning of the table: 190 // base_address (uintx) 191 // num_entries (juint) 192 // num_buckets (juint) 193 p++ = high(base_address); 194 p++ = low (base_address); // base address 195 p++ = _num_entries; // number of entries in the table 196 p++ = _num_buckets; // number of buckets in the table 197 198 juint first_bucket = NULL; 199 juint compact_table = dump_table(p, &first_bucket, &summary); 200 juint bucket_end = dump_buckets(compact_table, first_bucket, &summary); 201 202 assert(bucket_end <= (juint*)end, "cannot write past end"); 203 *top = (char*)bucket_end; 204 205 if (PrintSharedSpaces) { 206 double avg_cost = 0.0; 207 if (_num_entries > 0) { 208 avg_cost = double(_required_bytes)/double(_num_entries); 209 } 210 tty->print_cr("Shared %s table stats -------- base: " PTR_FORMAT, 211 table_name(), (intptr_t)base_address); 212 tty->print_cr("Number of entries : %9d", _num_entries); 213 tty->print_cr("Total bytes used : %9d", (int)((top) - old_top)); 214 tty->print_cr("Average bytes per entry : %9.3f", avg_cost); 215 tty->print_cr("Average bucket size : %9.3f", summary.avg()); 216 tty->print_cr("Variance of bucket size : %9.3f", summary.variance()); 217 tty->print_cr("Std. dev. of bucket size: %9.3f", summary.sd()); 218 tty->print_cr("Maximum bucket size : %9d", (int)summary.maximum()); 219 } 220 } 221 222 const char CompactHashtableWriter::table_name() { 223 switch (_type) { 224 case CompactHashtable<Symbol*, char>::_symbol_table: return "symbol"; 225 case CompactHashtable<oop, char>::_string_table: return "string"; 226 default: 227 ; 228 } 229 return "unknown"; 230 } 231 232 ///////////////////////////////////////////////////////////// 233 // 234 // The CompactHashtable implementation 235 // 236 template <class T, class N> const char CompactHashtable<T, N>::init( 237 CompactHashtableType type, const char buffer) { 238 assert(!DumpSharedSpaces, "run-time only"); 239 _type = type; 240 juintp = (juint)buffer; 241 juint upper = *p++; 242 juint lower = *p++; 243 _base_address = uintx(jlong_from(upper, lower)); 244 _entry_count = *p++; 245 _bucket_count = p++; 246 _buckets = p; 247 _table_end_offset = BUCKET_OFFSET(p[_bucket_count]); // located at the end of the bucket_info table 248 249 juint end = _buckets + _table_end_offset; 250 return (const char)end; 251 } 252 253 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure cl) { 254 assert(!DumpSharedSpaces, "run-time only"); 255 for (juint i = 0; i < _bucket_count; i ++) { 256 juint bucket_info = _buckets[i]; 257 juint bucket_offset = BUCKET_OFFSET(bucket_info); 258 int bucket_type = BUCKET_TYPE(bucket_info); 259 juint* bucket = _buckets + bucket_offset; 260 juint* bucket_end = _buckets; 261 262 Symbol* sym; 263 if (bucket_type == COMPACT_BUCKET_TYPE) { 264 sym = (Symbol*)((void*)(_base_address + bucket[0])); 265 cl->do_symbol(&sym); 266 } else { 267 bucket_end += BUCKET_OFFSET(_buckets[i + 1]); 268 while (bucket < bucket_end) { 269 sym = (Symbol*)((void*)(_base_address + bucket[1])); 270 cl->do_symbol(&sym); 271 bucket += 2; 272 } 273 } 274 } 275 } 276 277 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure f) { 278 assert(!DumpSharedSpaces, "run-time only"); 279 assert(_type == _string_table || _bucket_count == 0, "sanity"); 280 for (juint i = 0; i < _bucket_count; i ++) { 281 juint bucket_info = _buckets[i]; 282 juint bucket_offset = BUCKET_OFFSET(bucket_info); 283 int bucket_type = BUCKET_TYPE(bucket_info); 284 juint* bucket = _buckets + bucket_offset; 285 juint* bucket_end = _buckets; 286 287 narrowOop o; 288 if (bucket_type == COMPACT_BUCKET_TYPE) { 289 o = (narrowOop)bucket[0]; 290 f->do_oop(&o); 291 } else { 292 bucket_end += BUCKET_OFFSET(_buckets[i + 1]); 293 while (bucket < bucket_end) { 294 o = (narrowOop)bucket[1]; 295 f->do_oop(&o); 296 bucket += 2; 297 } 298 } 299 } 300 } 301 302 // Explicitly instantiate these types 303 template class CompactHashtable<Symbol*, char>; 304 template class CompactHashtable<oop, char>; 305 306 #ifndef O_BINARY // if defined (Win32) use binary files. 307 #define O_BINARY 0 // otherwise do nothing. 308 #endif 309 310 //////////////////////////////////////////////////////// 311 // 312 // HashtableTextDump 313 // 314 HashtableTextDump::HashtableTextDump(const char filename) : _fd(-1) { 315 struct stat st; 316 if (os::stat(filename, &st) != 0) { 317 quit("Unable to get hashtable dump file size", filename); 318 } 319 _size = st.st_size; 320 _fd = open(filename, O_RDONLY | O_BINARY, 0); 321 if (_fd < 0) { 322 quit("Unable to open hashtable dump file", filename); 323 } 324 _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false); 325 if (_base == NULL) { 326 quit("Unable to map hashtable dump file", filename); 327 } 328 _p = _base; 329 _end = _base + st.st_size; 330 _filename = filename; 331 _prefix_type = Unknown; 332 _line_no = 1; 333 } 334 335 HashtableTextDump::HashtableTextDump() { 336 os::unmap_memory((char*)_base, _size); 337 if (_fd >= 0) { 338 close(_fd); 339 } 340 } 341 342 void HashtableTextDump::quit(const char* err, const char* msg) { 343 vm_exit_during_initialization(err, msg); 344 } 345 346 void HashtableTextDump::corrupted(const char p, const char msg) { 347 char info[100]; 348 jio_snprintf(info, sizeof(info), 349 "%s. Corrupted at line %d (file pos %d)", 350 msg, _line_no, (int)(p - _base)); 351 quit(info, _filename); 352 } 353 354 bool HashtableTextDump::skip_newline() { 355 if (_p[0] == '\r' && _p[1] == '\n') { 356 _p += 2; 357 } else if (_p[0] == '\n') { 358 _p += 1; 359 } else { 360 corrupted(_p, "Unexpected character"); 361 } 362 _line_no ++; 363 return true; 364 } 365 366 int HashtableTextDump::skip(char must_be_char) { 367 corrupted_if(remain() < 1); 368 corrupted_if(_p++ != must_be_char); 369 return 0; 370 } 371 372 void HashtableTextDump::skip_past(char c) { 373 for (;;) { 374 corrupted_if(remain() < 1); 375 if (*_p++ == c) { 376 return; 377 } 378 } 379 } 380 381 void HashtableTextDump::check_version(const char* ver) { 382 int len = (int)strlen(ver); 383 corrupted_if(remain() < len); 384 if (strncmp(_p, ver, len) != 0) { 385 quit("wrong version of hashtable dump file", _filename); 386 } 387 _p += len; 388 skip_newline(); 389 } 390 391 void HashtableTextDump::scan_prefix_type() { 392 _p ++; 393 if (strncmp(_p, "SECTION: String", 15) == 0) { 394 _p += 15; 395 _prefix_type = StringPrefix; 396 } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) { 397 _p += 15; 398 _prefix_type = SymbolPrefix; 399 } else { 400 _prefix_type = Unknown; 401 } 402 skip_newline(); 403 } 404 405 int HashtableTextDump::scan_prefix(int* utf8_length) { 406 if (*_p == '@') { 407 scan_prefix_type(); 408 } 409 410 switch (_prefix_type) { 411 case SymbolPrefix: 412 *utf8_length = scan_symbol_prefix(); break; 413 case StringPrefix: 414 *utf8_length = scan_string_prefix(); break; 415 default: 416 tty->print_cr("Shared input data type: Unknown."); 417 corrupted(_p, "Unknown data type"); 418 } 419 420 return _prefix_type; 421 } 422 423 int HashtableTextDump::scan_string_prefix() { 424 // Expect /[0-9]+: / 425 int utf8_length = 0; 426 get_num(':', &utf8_length); 427 if (_p != ' ') { 428 corrupted(_p, "Wrong prefix format for string"); 429 } 430 _p++; 431 return utf8_length; 432 } 433 434 int HashtableTextDump::scan_symbol_prefix() { 435 // Expect /[0-9]+ (-|)[0-9]+: / 436 int utf8_length = 0; 437 get_num(' ', &utf8_length); 438 if (_p == '-') { 439 _p++; 440 } 441 int ref_num; 442 (void)get_num(':', &ref_num); 443 if (_p != ' ') { 444 corrupted(_p, "Wrong prefix format for symbol"); 445 } 446 _p++; 447 return utf8_length; 448 } 449 450 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) { 451 jchar value = 0; 452 453 corrupted_if(from + count > end); 454 455 for (int i=0; i<count; i++) { 456 char c = from++; 457 switch (c) { 458 case '0': case '1': case '2': case '3': case '4': 459 case '5': case '6': case '7': case '8': case '9': 460 value = (value << 4) + c - '0'; 461 break; 462 case 'a': case 'b': case 'c': 463 case 'd': case 'e': case 'f': 464 value = (value << 4) + 10 + c - 'a'; 465 break; 466 case 'A': case 'B': case 'C': 467 case 'D': case 'E': case 'F': 468 value = (value << 4) + 10 + c - 'A'; 469 break; 470 default: 471 ShouldNotReachHere(); 472 } 473 } 474 return value; 475 } 476 477 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) { 478 // cache in local vars 479 const char* from = _p; 480 const char* end = _end; 481 char* to = utf8_buffer; 482 int n = utf8_length; 483 484 for (; n > 0 && from < end; n--) { 485 if (*from != '\') { 486 *to++ = *from++; 487 } else { 488 corrupted_if(from + 2 > end); 489 char c = from[1]; 490 from += 2; 491 switch (c) { 492 case 'x': 493 { 494 jchar value = unescape(from, end, 2); 495 from += 2; 496 assert(value <= 0xff, "sanity"); 497 *to++ = (char)(value & 0xff); 498 } 499 break; 500 case 't': *to++ = '\t'; break; 501 case 'n': *to++ = '\n'; break; 502 case 'r': *to++ = '\r'; break; 503 case '\': *to++ = '\'; break; 504 default: 505 corrupted(_p, "Unsupported character"); 506 } 507 } 508 } 509 corrupted_if(n > 0); // expected more chars but file has ended 510 _p = from; 511 skip_newline(); 512 } 513 514 // NOTE: the content is NOT the same as 515 // UTF8::as_quoted_ascii(const char utf8_str, int utf8_length, char* buf, int buflen). 516 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily 517 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8() 518 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) { 519 const char *c = utf8_string; 520 const char *end = c + utf8_length; 521 for (; c < end; c++) { 522 switch (*c) { 523 case '\t': st->print("\t"); break; 524 case '\r': st->print("\r"); break; 525 case '\n': st->print("\n"); break; 526 case '\': st->print("\\"); break; 527 default: 528 if (isprint(*c)) { 529 st->print("%c", *c); 530 } else { 531 st->print("\x%02x", ((unsigned int)*c) & 0xff); 532 } 533 } 534 } 535 }