New src/hotspot/share/runtime/threadHeapSampler.cpp (original) (raw)
1 /* 2 * Copyright (c) 2018, Google and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 / 24 25 #include "runtime/sharedRuntime.hpp" 26 #include "runtime/threadHeapSampler.hpp" 27 28 // Cheap random number generator 29 uint64_t ThreadHeapSampler::_rnd; 30 // Default is 512kb. 31 int ThreadHeapSampler::_sampling_rate = 512 * 1024; 32 int ThreadHeapSampler::_enabled; 33 34 // Statics for the fast log 35 static const int FastLogNumBits = 10; 36 static const int FastLogMask = (1 << FastLogNumBits) - 1; 37 static double log_table[1<<FastLogNumBits]; // Constant 38 static bool log_table_initialized; 39 40 // Returns the next prng value. 41 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 42 // This is the lrand64 generator. 43 static uint64_t next_random(uint64_t rnd) { 44 const uint64_t PrngMult = 0x5DEECE66DLL; 45 const uint64_t PrngAdd = 0xB; 46 const uint64_t PrngModPower = 48; 47 const uint64_t PrngModMask = right_n_bits(PrngModPower); 48 return (PrngMult * rnd + PrngAdd) & PrngModMask; 49 } 50 51 static double fast_log2(const double & d) { 52 assert(d>0, "bad value passed to assert"); 53 uint64_t x = 0; 54 memcpy(&x, &d, sizeof(uint64_t)); 55 const uint32_t x_high = x >> 32; 56 const uint32_t y = x_high >> (20 - FastLogNumBits) & FastLogMask; 57 const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023; 58 return exponent + log_table[y]; 59 } 60 61 // Generates a geometric variable with the specified mean (512K by default). 62 // This is done by generating a random number between 0 and 1 and applying 63 // the inverse cumulative distribution function for an exponential. 64 // Specifically: Let m be the inverse of the sample rate, then 65 // the probability distribution function is mexp(-mx) so the CDF is 66 // p = 1 - exp(-mx), so 67 // q = 1 - p = exp(-mx) 68 // log_e(q) = -mx 69 // -log_e(q)/m = x 70 // log_2(q) * (-log_e(2) * 1/m) = x 71 // In the code, q is actually in the range 1 to 226, hence the -26 below 72 void ThreadHeapSampler::pick_next_geometric_sample() { 73 _rnd = next_random(_rnd); 74 // Take the top 26 bits as the random number 75 // (This plus a 1<<58 sampling bound gives a max possible step of 76 // 5194297183973780480 bytes. In this case, 77 // for sample_parameter = 1<<19, max possible step is 78 // 9448372 bytes (24 bits). 79 const uint64_t PrngModPower = 48; // Number of bits in prng 80 // The uint32_t cast is to prevent a (hard-to-reproduce) NAN 81 // under piii debug for some binaries. 82 double q = static_cast(_rnd >> (PrngModPower - 26)) + 1.0; 83 // Put the computed p-value through the CDF of a geometric. 84 // For faster performance (save ~1/20th exec time), replace 85 // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) 86 // The value 26.000705 is used rather than 26 to compensate 87 // for inaccuracies in FastLog2 which otherwise result in a 88 // negative answer. 89 double log_val = (fast_log2(q) - 26); 90 size_t rate = static_cast( 91 (0.0 < log_val ? 0.0 : log_val) * (-log(2.0) * (_sampling_rate)) + 1); 92 _bytes_until_sample = rate; 93 } 94 95 void ThreadHeapSampler::pick_next_sample(size_t overflowed_bytes) { 96 if (_sampling_rate == 1) { 97 _bytes_until_sample = 1; 98 return; 99 } 100 101 pick_next_geometric_sample(); 102 103 // Try to correct sample size by removing extra space from last allocation. 104 if (overflowed_bytes > 0 && _bytes_until_sample > overflowed_bytes) { 105 _bytes_until_sample -= overflowed_bytes; 106 } 107 } 108 109 void ThreadHeapSampler::check_for_sampling(HeapWord ptr, size_t allocation_size, size_t bytes_since_allocation) { 110 oopDesc oop = reinterpret_cast<oopDesc*>(ptr); 111 size_t total_allocated_bytes = bytes_since_allocation + allocation_size; 112 113 // If not yet time for a sample, skip it. 114 if (total_allocated_bytes < _bytes_until_sample) { 115 _bytes_until_sample -= total_allocated_bytes; 116 return; 117 } 118 119 JvmtiExport::sampled_object_alloc_event_collector(oop); 120 121 size_t overflow_bytes = total_allocated_bytes - _bytes_until_sample; 122 pick_next_sample(overflow_bytes); 123 } 124 125 void ThreadHeapSampler::init_log_table() { 126 MutexLocker mu(ThreadHeapSampler_lock); 127 128 if (log_table_initialized) { 129 return; 130 } 131 132 for (int i = 0; i < (1 << FastLogNumBits); i++) { 133 log_table[i] = (log(1.0 + static_cast(i+0.5) / (1 << FastLogNumBits)) 134 / log(2.0)); 135 } 136 137 log_table_initialized = true; 138 } 139 140 void ThreadHeapSampler::set_tlab_heap_sampling(int sampling_rate) { 141 MutexLocker mu(ThreadHeapSampler_lock); 142 _sampling_rate = sampling_rate; 143 }