New src/share/vm/classfile/compactHashtable.cpp (original) (raw)
1 /*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 /
24
25 #include "precompiled.hpp"
26 #include "classfile/javaClasses.hpp"
27 #include "memory/metaspaceShared.hpp"
28 #include "prims/jvm.h"
29 #include "utilities/numberSeq.hpp"
30 #include <sys/stat.h>
31
32 /////////////////////////////////////////////////////
33 //
34 // The compact hash table writer implementations
35 //
36 CompactHashtableWriter::CompactHashtableWriter(int table_type,
37 int num_entries,
38 CompactHashtableStats stats) {
39 assert(DumpSharedSpaces, "dump-time only");
40 _type = table_type;
41 _num_entries = num_entries;
42 _num_buckets = number_of_buckets(_num_entries);
43 _buckets = NEW_C_HEAP_ARRAY(Entry*, _num_buckets, mtSymbol);
44 memset(_buckets, 0, sizeof(Entry*) * _num_buckets);
45
46 /* bucket sizes table /
47 _bucket_sizes = NEW_C_HEAP_ARRAY(juint, _num_buckets, mtSymbol);
48 memset(_bucket_sizes, 0, sizeof(juint) * _num_buckets);
49
50 stats->hashentry_count = _num_entries;
51 // Compact buckets' entries will have only the 4-byte offset, but
52 // we don't know how many there will be at this point. So use a
53 // conservative estimate here. The size is adjusted later when we
54 // write out the buckets.
55 stats->hashentry_bytes = _num_entries * 8;
56 stats->bucket_count = _num_buckets;
57 stats->bucket_bytes = (_num_buckets + 1) * (sizeof(juint));
58 _stats = stats;
59
60 // See compactHashtable.hpp for table layout
61 _required_bytes = sizeof(juint) * 2; // _base_address, written as 2 juints
62 _required_bytes+= sizeof(juint) + // num_entries
63 sizeof(juint) + // num_buckets
64 stats->hashentry_bytes +
65 stats->bucket_bytes;
66 }
67
68 CompactHashtableWriter::~CompactHashtableWriter() {
69 for (int index = 0; index < _num_buckets; index++) {
70 Entry* next = NULL;
71 for (Entry* tent = _buckets[index]; tent; tent = next) {
72 next = tent->next();
73 delete tent;
74 }
75 }
76
77 FREE_C_HEAP_ARRAY(juint, _bucket_sizes);
78 FREE_C_HEAP_ARRAY(Entry, _buckets);
79 }
80
81 // Calculate the number of buckets in the temporary hash table
82 int CompactHashtableWriter::number_of_buckets(int num_entries) {
83 const int buksize = (int)SharedSymbolTableBucketSize;
84 int num_buckets = (num_entries + buksize - 1) / buksize;
85 num_buckets = (num_buckets + 1) & (0x01);
86
87 return num_buckets;
88 }
89
90 // Add a symbol entry to the temporary hash table
91 void CompactHashtableWriter::add(unsigned int hash, Entry* entry) {
92 int index = hash % _num_buckets;
93 entry->set_next(_buckets[index]);
94 _buckets[index] = entry;
95 _bucket_sizes[index] ++;
96 }
97
98 // Write the compact table's bucket infos
99 juint* CompactHashtableWriter::dump_table(juint* p, juint** first_bucket,
100 NumberSeq* summary) {
101 int index;
102 juint* compact_table = p;
103 // Compute the start of the buckets, include the compact_bucket_infos table
104 // and the table end offset.
105 juint offset = _num_buckets + 1;
106 first_bucket = compact_table + offset;
107
108 for (index = 0; index < _num_buckets; index++) {
109 int bucket_size = _bucket_sizes[index];
110 if (bucket_size == 1) {
111 // bucket with one entry is compacted and only has the symbol offset
112 compact_table[index] = BUCKET_INFO(offset, COMPACT_BUCKET_TYPE);
113 offset += bucket_size; // each entry contains symbol offset only
114 } else {
115 // regular bucket, each entry is a symbol (hash, offset) pair
116 compact_table[index] = BUCKET_INFO(offset, REGULAR_BUCKET_TYPE);
117 offset += bucket_size * 2; // each hash entry is 2 juints
118 }
119 if (offset & ~BUCKET_OFFSET_MASK) {
120 vm_exit_during_initialization("CompactHashtableWriter::dump_table: Overflow! "
121 "Too many symbols.");
122 }
123 summary->add(bucket_size);
124 }
125 // Mark the end of the table
126 compact_table[_num_buckets] = BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE);
127
128 return compact_table;
129 }
130
131 // Write the compact table's entries
132 juint CompactHashtableWriter::dump_buckets(juint* compact_table, juint* p,
133 NumberSeq* summary) {
134 uintx base_address = 0;
135 uintx max_delta = 0;
136 int num_compact_buckets = 0;
137 if (_type == CompactHashtable<Symbol*, char>::_symbol_table) {
138 base_address = uintx(MetaspaceShared::shared_rs()->base());
139 max_delta = uintx(MetaspaceShared::shared_rs()->size());
140 assert(max_delta <= 0x7fffffff, "range check");
141 } else {
142 assert((_type == CompactHashtable<oop, char>::_string_table), "unknown table");
143 assert(UseCompressedOops, "UseCompressedOops is required");
144 }
145
146 assert(p != NULL, "sanity");
147 for (int index = 0; index < _num_buckets; index++) {
148 juint count = 0;
149 int bucket_size = _bucket_sizes[index];
150 int bucket_type = BUCKET_TYPE(compact_table[index]);
151
152 if (bucket_size == 1) {
153 assert(bucket_type == COMPACT_BUCKET_TYPE, "Bad bucket type");
154 num_compact_buckets ++;
155 }
156 for (Entry* tent = _buckets[index]; tent;
157 tent = tent->next()) {
158 if (bucket_type == REGULAR_BUCKET_TYPE) {
159 p++ = juint(tent->hash()); // write entry hash
160 }
161 if (_type == CompactHashtable<Symbol*, char>::_symbol_table) {
162 uintx deltax = uintx(tent->value()) - base_address;
163 assert(deltax < max_delta, "range check");
164 juint delta = juint(deltax);
165 *p++ = delta; // write entry offset
166 } else {
167 *p++ = oopDesc::encode_heap_oop(tent->string());
168 }
169 count ++;
170 }
171 assert(count == _bucket_sizes[index], "sanity");
172 }
173
174 // Adjust the hashentry_bytes in CompactHashtableStats. Each compact
175 // bucket saves 4-byte.
176 _stats->hashentry_bytes -= num_compact_buckets * 4;
177
178 return p;
179 }
180
181 // Write the compact table
182 void CompactHashtableWriter::dump(char* top, char* end) {
183 NumberSeq summary;
184 char* old_top = top;
185 juint p = (juint*)(top);
186
187 uintx base_address = uintx(MetaspaceShared::shared_rs()->base());
188
189 // Now write the following at the beginning of the table:
190 // base_address (uintx)
191 // num_entries (juint)
192 // num_buckets (juint)
193 p++ = high(base_address);
194 p++ = low (base_address); // base address
195 p++ = _num_entries; // number of entries in the table
196 p++ = _num_buckets; // number of buckets in the table
197
198 juint first_bucket = NULL;
199 juint compact_table = dump_table(p, &first_bucket, &summary);
200 juint bucket_end = dump_buckets(compact_table, first_bucket, &summary);
201
202 assert(bucket_end <= (juint*)end, "cannot write past end");
203 *top = (char*)bucket_end;
204
205 if (PrintSharedSpaces) {
206 double avg_cost = 0.0;
207 if (_num_entries > 0) {
208 avg_cost = double(_required_bytes)/double(_num_entries);
209 }
210 tty->print_cr("Shared %s table stats -------- base: " PTR_FORMAT,
211 table_name(), (intptr_t)base_address);
212 tty->print_cr("Number of entries : %9d", _num_entries);
213 tty->print_cr("Total bytes used : %9d", (int)((top) - old_top));
214 tty->print_cr("Average bytes per entry : %9.3f", avg_cost);
215 tty->print_cr("Average bucket size : %9.3f", summary.avg());
216 tty->print_cr("Variance of bucket size : %9.3f", summary.variance());
217 tty->print_cr("Std. dev. of bucket size: %9.3f", summary.sd());
218 tty->print_cr("Maximum bucket size : %9d", (int)summary.maximum());
219 }
220 }
221
222 const char CompactHashtableWriter::table_name() {
223 switch (_type) {
224 case CompactHashtable<Symbol*, char>::_symbol_table: return "symbol";
225 case CompactHashtable<oop, char>::_string_table: return "string";
226 default:
227 ;
228 }
229 return "unknown";
230 }
231
232 /////////////////////////////////////////////////////////////
233 //
234 // The CompactHashtable implementation
235 //
236 template <class T, class N> const char CompactHashtable<T, N>::init(
237 CompactHashtableType type, const char buffer) {
238 assert(!DumpSharedSpaces, "run-time only");
239 _type = type;
240 juintp = (juint)buffer;
241 juint upper = *p++;
242 juint lower = *p++;
243 _base_address = uintx(jlong_from(upper, lower));
244 _entry_count = *p++;
245 _bucket_count = p++;
246 _buckets = p;
247 _table_end_offset = BUCKET_OFFSET(p[_bucket_count]); // located at the end of the bucket_info table
248
249 juint end = _buckets + _table_end_offset;
250 return (const char)end;
251 }
252
253 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure cl) {
254 assert(!DumpSharedSpaces, "run-time only");
255 for (juint i = 0; i < _bucket_count; i ++) {
256 juint bucket_info = _buckets[i];
257 juint bucket_offset = BUCKET_OFFSET(bucket_info);
258 int bucket_type = BUCKET_TYPE(bucket_info);
259 juint* bucket = _buckets + bucket_offset;
260 juint* bucket_end = _buckets;
261
262 Symbol* sym;
263 if (bucket_type == COMPACT_BUCKET_TYPE) {
264 sym = (Symbol*)((void*)(_base_address + bucket[0]));
265 cl->do_symbol(&sym);
266 } else {
267 bucket_end += BUCKET_OFFSET(_buckets[i + 1]);
268 while (bucket < bucket_end) {
269 sym = (Symbol*)((void*)(_base_address + bucket[1]));
270 cl->do_symbol(&sym);
271 bucket += 2;
272 }
273 }
274 }
275 }
276
277 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure f) {
278 assert(!DumpSharedSpaces, "run-time only");
279 assert(_type == _string_table || _bucket_count == 0, "sanity");
280 for (juint i = 0; i < _bucket_count; i ++) {
281 juint bucket_info = _buckets[i];
282 juint bucket_offset = BUCKET_OFFSET(bucket_info);
283 int bucket_type = BUCKET_TYPE(bucket_info);
284 juint* bucket = _buckets + bucket_offset;
285 juint* bucket_end = _buckets;
286
287 narrowOop o;
288 if (bucket_type == COMPACT_BUCKET_TYPE) {
289 o = (narrowOop)bucket[0];
290 f->do_oop(&o);
291 } else {
292 bucket_end += BUCKET_OFFSET(_buckets[i + 1]);
293 while (bucket < bucket_end) {
294 o = (narrowOop)bucket[1];
295 f->do_oop(&o);
296 bucket += 2;
297 }
298 }
299 }
300 }
301
302 // Explicitly instantiate these types
303 template class CompactHashtable<Symbol*, char>;
304 template class CompactHashtable<oop, char>;
305
306 #ifndef O_BINARY // if defined (Win32) use binary files.
307 #define O_BINARY 0 // otherwise do nothing.
308 #endif
309
310 ////////////////////////////////////////////////////////
311 //
312 // HashtableTextDump
313 //
314 HashtableTextDump::HashtableTextDump(const char filename) : _fd(-1) {
315 struct stat st;
316 if (os::stat(filename, &st) != 0) {
317 quit("Unable to get hashtable dump file size", filename);
318 }
319 _size = st.st_size;
320 _fd = open(filename, O_RDONLY | O_BINARY, 0);
321 if (_fd < 0) {
322 quit("Unable to open hashtable dump file", filename);
323 }
324 _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
325 if (_base == NULL) {
326 quit("Unable to map hashtable dump file", filename);
327 }
328 _p = _base;
329 _end = _base + st.st_size;
330 _filename = filename;
331 _prefix_type = Unknown;
332 _line_no = 1;
333 }
334
335 HashtableTextDump::HashtableTextDump() {
336 os::unmap_memory((char*)_base, _size);
337 if (_fd >= 0) {
338 close(_fd);
339 }
340 }
341
342 void HashtableTextDump::quit(const char* err, const char* msg) {
343 vm_exit_during_initialization(err, msg);
344 }
345
346 void HashtableTextDump::corrupted(const char p, const char msg) {
347 char info[100];
348 jio_snprintf(info, sizeof(info),
349 "%s. Corrupted at line %d (file pos %d)",
350 msg, _line_no, (int)(p - _base));
351 quit(info, _filename);
352 }
353
354 bool HashtableTextDump::skip_newline() {
355 if (_p[0] == '\r' && _p[1] == '\n') {
356 _p += 2;
357 } else if (_p[0] == '\n') {
358 _p += 1;
359 } else {
360 corrupted(_p, "Unexpected character");
361 }
362 _line_no ++;
363 return true;
364 }
365
366 int HashtableTextDump::skip(char must_be_char) {
367 corrupted_if(remain() < 1);
368 corrupted_if(_p++ != must_be_char);
369 return 0;
370 }
371
372 void HashtableTextDump::skip_past(char c) {
373 for (;;) {
374 corrupted_if(remain() < 1);
375 if (*_p++ == c) {
376 return;
377 }
378 }
379 }
380
381 void HashtableTextDump::check_version(const char* ver) {
382 int len = (int)strlen(ver);
383 corrupted_if(remain() < len);
384 if (strncmp(_p, ver, len) != 0) {
385 quit("wrong version of hashtable dump file", _filename);
386 }
387 _p += len;
388 skip_newline();
389 }
390
391 void HashtableTextDump::scan_prefix_type() {
392 _p ++;
393 if (strncmp(_p, "SECTION: String", 15) == 0) {
394 _p += 15;
395 _prefix_type = StringPrefix;
396 } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
397 _p += 15;
398 _prefix_type = SymbolPrefix;
399 } else {
400 _prefix_type = Unknown;
401 }
402 skip_newline();
403 }
404
405 int HashtableTextDump::scan_prefix(int* utf8_length) {
406 if (*_p == '@') {
407 scan_prefix_type();
408 }
409
410 switch (_prefix_type) {
411 case SymbolPrefix:
412 *utf8_length = scan_symbol_prefix(); break;
413 case StringPrefix:
414 *utf8_length = scan_string_prefix(); break;
415 default:
416 tty->print_cr("Shared input data type: Unknown.");
417 corrupted(_p, "Unknown data type");
418 }
419
420 return _prefix_type;
421 }
422
423 int HashtableTextDump::scan_string_prefix() {
424 // Expect /[0-9]+: /
425 int utf8_length = 0;
426 get_num(':', &utf8_length);
427 if (_p != ' ') {
428 corrupted(_p, "Wrong prefix format for string");
429 }
430 _p++;
431 return utf8_length;
432 }
433
434 int HashtableTextDump::scan_symbol_prefix() {
435 // Expect /[0-9]+ (-|)[0-9]+: /
436 int utf8_length = 0;
437 get_num(' ', &utf8_length);
438 if (_p == '-') {
439 _p++;
440 }
441 int ref_num;
442 (void)get_num(':', &ref_num);
443 if (_p != ' ') {
444 corrupted(_p, "Wrong prefix format for symbol");
445 }
446 _p++;
447 return utf8_length;
448 }
449
450 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
451 jchar value = 0;
452
453 corrupted_if(from + count > end);
454
455 for (int i=0; i<count; i++) {
456 char c = from++;
457 switch (c) {
458 case '0': case '1': case '2': case '3': case '4':
459 case '5': case '6': case '7': case '8': case '9':
460 value = (value << 4) + c - '0';
461 break;
462 case 'a': case 'b': case 'c':
463 case 'd': case 'e': case 'f':
464 value = (value << 4) + 10 + c - 'a';
465 break;
466 case 'A': case 'B': case 'C':
467 case 'D': case 'E': case 'F':
468 value = (value << 4) + 10 + c - 'A';
469 break;
470 default:
471 ShouldNotReachHere();
472 }
473 }
474 return value;
475 }
476
477 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
478 // cache in local vars
479 const char* from = _p;
480 const char* end = _end;
481 char* to = utf8_buffer;
482 int n = utf8_length;
483
484 for (; n > 0 && from < end; n--) {
485 if (*from != '\') {
486 *to++ = *from++;
487 } else {
488 corrupted_if(from + 2 > end);
489 char c = from[1];
490 from += 2;
491 switch (c) {
492 case 'x':
493 {
494 jchar value = unescape(from, end, 2);
495 from += 2;
496 assert(value <= 0xff, "sanity");
497 *to++ = (char)(value & 0xff);
498 }
499 break;
500 case 't': *to++ = '\t'; break;
501 case 'n': *to++ = '\n'; break;
502 case 'r': *to++ = '\r'; break;
503 case '\': *to++ = '\'; break;
504 default:
505 corrupted(_p, "Unsupported character");
506 }
507 }
508 }
509 corrupted_if(n > 0); // expected more chars but file has ended
510 _p = from;
511 skip_newline();
512 }
513
514 // NOTE: the content is NOT the same as
515 // UTF8::as_quoted_ascii(const char utf8_str, int utf8_length, char* buf, int buflen).
516 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
517 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
518 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
519 const char *c = utf8_string;
520 const char *end = c + utf8_length;
521 for (; c < end; c++) {
522 switch (*c) {
523 case '\t': st->print("\t"); break;
524 case '\r': st->print("\r"); break;
525 case '\n': st->print("\n"); break;
526 case '\': st->print("\\"); break;
527 default:
528 if (isprint(*c)) {
529 st->print("%c", *c);
530 } else {
531 st->print("\x%02x", ((unsigned int)*c) & 0xff);
532 }
533 }
534 }
535 }