// With 512K average sample step (the default):
// the probability of sampling a 4K allocation is about 0.00778
// the probability of sampling a 1MB allocation is about 0.865
// the probability of sampling a 1GB allocation is about 1.00000
// In general, the probablity of sampling is an allocation of size X
// given a flag value of Y (default 1M) is:
// 1 - e^(-X/Y)
//
// With 128K average sample step:
// the probability of sampling a 1MB allocation is about 0.99966
// the probability of sampling a 1GB allocation is about 1.0
// (about 1 - 2**(-26))
// With 1M average sample step:
// the probability of sampling a 4K allocation is about 0.00390
// the probability of sampling a 1MB allocation is about 0.632
// the probability of sampling a 1GB allocation is about 1.0
采样逻辑是也是个辅助功能,应该也是很少才会打开的。
比较有意思的是这个函数本身,在将bytes_until_sample_减掉当前申请的k后,判断是否bytes_until_sample_小于0时,利用到了一个cpu的指令优化功能。sub , followed by conditional jump on 'carry',不过这个机制暂时还没有理解,先放一放。
inline bool Sampler::TryRecordAllocationFast(size_t k) {
// For efficiency reason, we're testing bytes_until_sample_ after
// decrementing it by k. This allows compiler to do sub <reg>, <mem>
// followed by conditional jump on sign. But it is correct only if k
// is actually smaller than largest ssize_t value. Otherwise
// converting k to signed value overflows.
//
// It would be great for generated code to be sub <reg>, <mem>
// followed by conditional jump on 'carry', which would work for
// arbitrary values of k, but there seem to be no way to express
// that in C++.
//
// Our API contract explicitly states that only small values of k
// are permitted. And thus it makes sense to assert on that.
ASSERT(static_cast<ssize_t>(k) >= 0);
bytes_until_sample_ -= static_cast<ssize_t>(k);
if (PREDICT_FALSE(bytes_until_sample_ < 0)) {
// Note, we undo sampling counter update, since we're not actually
// handling slow path in the "needs sampling" case (calling
// RecordAllocationSlow to reset counter). And we do that in order
// to avoid non-tail calls in malloc fast-path. See also comments
// on declaration inside Sampler class.
//
// volatile is used here to improve compiler's choice of
// instuctions. We know that this path is very rare and that there
// is no need to keep previous value of bytes_until_sample_ in
// register. This helps compiler generate slightly more efficient
// sub <reg>, <mem> instruction for subtraction above.
volatile ssize_t *ptr =
const_cast<volatile ssize_t *>(&bytes_until_sample_);
*ptr += k;
return false;
}
return true;
}
// Remove some objects of class "cl" from central cache and add to thread heap.
// On success, return the first object for immediate use; otherwise return NULL.
void* ThreadCache::FetchFromCentralCache(uint32 cl, int32_t byte_size,
void *(*oom_handler)(size_t size)) {
FreeList* list = &list_[cl];
ASSERT(list->empty());
const int batch_size = Static::sizemap()->num_objects_to_move(cl);
const int num_to_move = min<int>(list->max_length(), batch_size);
void *start, *end;
int fetch_count = Static::central_cache()[cl].RemoveRange(
&start, &end, num_to_move);
if (fetch_count == 0) {
ASSERT(start == NULL);
return oom_handler(byte_size);
}
ASSERT(start != NULL);
if (--fetch_count >= 0) {
size_ += byte_size * fetch_count;
list->PushRange(fetch_count, SLL_Next(start), end);
}
// Increase max length slowly up to batch_size. After that,
// increase by batch_size in one shot so that the length is a
// multiple of batch_size.
if (list->max_length() < batch_size) {
list->set_max_length(list->max_length() + 1);
} else {
// Don't let the list get too long. In 32 bit builds, the length
// is represented by a 16 bit int, so we need to watch out for
// integer overflow.
int new_length = min<int>(list->max_length() + batch_size,
kMaxDynamicFreeListLength);
// The list's max_length must always be a multiple of batch_size,
// and kMaxDynamicFreeListLength is not necessarily a multiple
// of batch_size.
new_length -= new_length % batch_size;
ASSERT(new_length % batch_size == 0);
list->set_max_length(new_length);
}
return start;
}