issue #151 memory allocator updates

pull/167/head
Dibyendu Majumdar 6 years ago
parent 4ccf6cb2ef
commit b584ce9862

@ -173,7 +173,7 @@ set(LUA_CORE_SRCS src/lapi.c src/lcode.c src/lctype.c src/ldebug.c src/ldo.c src
src/lfunc.c src/lgc.c src/llex.c src/lmem.c src/lobject.c src/lopcodes.c
src/lparser.c src/lstate.c src/lstring.c src/ltable.c src/ltm.c src/lundump.c
src/lvm.c src/lzio.c src/ravijit.cpp src/ltests.c src/ravi_profile.c src/ravi_ast.c src/ravi_membuf.c
src/ravi_jitshared.c src/bit.c) # src/ravi_alloc.c)
src/ravi_jitshared.c src/bit.c src/ravi_alloc.c)
if (COMPUTED_GOTO AND NOT MSVC)
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
set_source_files_properties(src/lvm.c PROPERTIES COMPILE_FLAGS -DRAVI_USE_COMPUTED_GOTO)

@ -0,0 +1,50 @@
local function BottomUpTree(depth)
if depth > 0 then
depth = depth - 1
local left, right = BottomUpTree(depth), BottomUpTree(depth)
return { left, right }
else
return { }
end
end
local function ItemCheck(tree)
if tree[1] then
return 1 + ItemCheck(tree[1]) + ItemCheck(tree[2])
else
return 1
end
end
local N = tonumber(arg and arg[1]) or 0
local mindepth = 4
local maxdepth = mindepth + 2
if maxdepth < N then maxdepth = N end
local start = os.clock()
do
local stretchdepth = maxdepth + 1
local stretchtree = BottomUpTree(stretchdepth)
io.write(string.format("stretch tree of depth %d\t check: %d\n",
stretchdepth, ItemCheck(stretchtree)))
end
local longlivedtree = BottomUpTree(maxdepth)
for depth=mindepth,maxdepth,2 do
local iterations = 2 ^ (maxdepth - depth + mindepth)
local check = 0
for i=1,iterations do
check = check + ItemCheck(BottomUpTree(depth))
end
io.write(string.format("%d\t trees of depth %d\t check: %d\n",
iterations, depth, check))
end
io.write(string.format("long lived tree of depth %d\t check: %d\n",
maxdepth, ItemCheck(longlivedtree)))
local finish = os.clock()
io.write(string.format("Time taken %f\n", finish-start))

@ -25,7 +25,7 @@
#include "lua.h"
#include "lauxlib.h"
#include "ravi_alloc.h"
/*
** {======================================================
@ -1052,7 +1052,8 @@ static int panic (lua_State *L) {
LUALIB_API lua_State *luaL_newstate (void) {
lua_State *L = lua_newstate(l_alloc, NULL);
void *ud = ravi_alloc_create(); /* Use LuaJIT's allocator */
lua_State *L = lua_newstate(ravi_alloc_f, ud);
if (L) lua_atpanic(L, &panic);
return L;
}

@ -34,6 +34,7 @@
#include "ravijit.h"
#include "ravi_profile.h"
#include "ravi_alloc.h"
#if !defined(LUAI_GCPAUSE)
#define LUAI_GCPAUSE 200 /* 200% */
@ -264,7 +265,10 @@ static void close_state (lua_State *L) {
freestack(L);
lua_assert(gettotalbytes(g) == sizeof(LG));
raviV_close(L);
(*g->frealloc)(g->ud, fromstate(L), sizeof(LG), 0); /* free main block */
if (g->frealloc == ravi_alloc_f) /* Using LuaJIt allocator? */
ravi_alloc_destroy(g->ud);
else
(*g->frealloc)(g->ud, fromstate(L), sizeof(LG), 0); /* free main block */
}

@ -505,6 +505,95 @@ typedef unsigned int flag_t; /* The type of various bit flag sets */
/* ---------------------- Overlaid data structures ----------------------- */
/*
When chunks are not in use, they are treated as nodes of either
lists or trees.
"Small" chunks are stored in circular doubly-linked lists, and look
like this:
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`head:' | Size of chunk, in bytes |P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Forward pointer to next chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Back pointer to previous chunk in list |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Unused space (may be 0 bytes long) .
. .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`foot:' | Size of chunk, in bytes |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Larger chunks are kept in a form of bitwise digital trees (aka
tries) keyed on chunksizes. Because malloc_tree_chunks are only for
free chunks greater than 256 bytes, their size doesn't impose any
constraints on user chunk sizes. Each node looks like:
chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`head:' | Size of chunk, in bytes |P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Forward pointer to next chunk of same size |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Back pointer to previous chunk of same size |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Pointer to left child (child[0]) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Pointer to right child (child[1]) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Pointer to parent |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| bin index of this chunk |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Unused space .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
`foot:' | Size of chunk, in bytes |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Each tree holding treenodes is a tree of unique chunk sizes. Chunks
of the same size are arranged in a circularly-linked list, with only
the oldest chunk (the next to be used, in our FIFO ordering)
actually in the tree. (Tree members are distinguished by a non-null
parent pointer.) If a chunk with the same size an an existing node
is inserted, it is linked off the existing node using pointers that
work in the same way as fd/bk pointers of small chunks.
Each tree contains a power of 2 sized range of chunk sizes (the
smallest is 0x100 <= x < 0x180), which is is divided in half at each
tree level, with the chunks in the smaller half of the range (0x100
<= x < 0x140 for the top nose) in the left subtree and the larger
half (0x140 <= x < 0x180) in the right subtree. This is, of course,
done by inspecting individual bits.
Using these rules, each node's left subtree contains all smaller
sizes than its right subtree. However, the node at the root of each
subtree has no particular ordering relationship to either. (The
dividing line between the subtree sizes is based on trie relation.)
If we remove the last chunk of a given size from the interior of the
tree, we need to replace it with a leaf node. The tree ordering
rules permit a node to be replaced by any leaf below it.
The smallest chunk in a tree (a common operation in a best-fit
allocator) can be found by walking a path to the leftmost leaf in
the tree. Unlike a usual binary tree, where we follow left child
pointers until we reach a null, here we follow the right child
pointer any time the left one is null, until we reach a leaf with
both child pointers null. The smallest chunk in the tree will be
somewhere along that path.
The worst case number of steps to add, find, or remove a node is
bounded by the number of bits differentiating chunks within
bins. Under current bin calculations, this ranges from 6 up to 21
(for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case
is of course much better.
*/
struct malloc_tree_chunk {
/* The first four fields must be compatible with malloc_chunk */
size_t prev_foot;
@ -526,6 +615,61 @@ typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */
/* ----------------------------- Segments -------------------------------- */
/*
Each malloc space may include non-contiguous segments, held in a
list headed by an embedded malloc_segment record representing the
top-most space. Segments also include flags holding properties of
the space. Large chunks that are directly allocated by mmap are not
included in this list. They are instead independently created and
destroyed without otherwise keeping track of them.
Segment management mainly comes into play for spaces allocated by
MMAP. Any call to MMAP might or might not return memory that is
adjacent to an existing segment. MORECORE normally contiguously
extends the current space, so this space is almost always adjacent,
which is simpler and faster to deal with. (This is why MORECORE is
used preferentially to MMAP when both are available -- see
sys_alloc.) When allocating using MMAP, we don't use any of the
hinting mechanisms (inconsistently) supported in various
implementations of unix mmap, or distinguish reserving from
committing memory. Instead, we just ask for space, and exploit
contiguity when we get it. It is probably possible to do
better than this on some systems, but no general scheme seems
to be significantly better.
Management entails a simpler variant of the consolidation scheme
used for chunks to reduce fragmentation -- new adjacent memory is
normally prepended or appended to an existing segment. However,
there are limitations compared to chunk consolidation that mostly
reflect the fact that segment processing is relatively infrequent
(occurring only when getting memory from system) and that we
don't expect to have huge numbers of segments:
* Segments are not indexed, so traversal requires linear scans. (It
would be possible to index these, but is not worth the extra
overhead and complexity for most programs on most platforms.)
* New segments are only appended to old ones when holding top-most
memory; if they cannot be prepended to others, they are held in
different segments.
Except for the top-most segment of an mstate, each segment record
is kept at the tail of its segment. Segments are added by pushing
segment records onto the list headed by &mstate.seg for the
containing mstate.
Segment flags control allocation/merge/deallocation policies:
* If EXTERN_BIT set, then we did not allocate this segment,
and so should not try to deallocate or merge with others.
(This currently holds only for the initial segment passed
into create_mspace_with_base.)
* If USE_MMAP_BIT set, the segment may be merged with
other surrounding mmapped segments and trimmed/de-allocated
using munmap.
* If neither bit is set, then the segment was obtained using
MORECORE so can be merged with surrounding MORECORE'd segments
and deallocated/trimmed using MORECORE with negative arguments.
*/
struct malloc_segment {
char *base; /* base address */
size_t size; /* allocated size */
@ -537,6 +681,91 @@ typedef struct malloc_segment *msegmentptr;
/* ---------------------------- malloc_state ----------------------------- */
/*
A malloc_state holds all of the bookkeeping for a space.
The main fields are:
Top
The topmost chunk of the currently active segment. Its size is
cached in topsize. The actual size of topmost space is
topsize+TOP_FOOT_SIZE, which includes space reserved for adding
fenceposts and segment records if necessary when getting more
space from the system. The size at which to autotrim top is
cached from mparams in trim_check, except that it is disabled if
an autotrim fails.
Designated victim (dv)
This is the preferred chunk for servicing small requests that
don't have exact fits. It is normally the chunk split off most
recently to service another small request. Its size is cached in
dvsize. The link fields of this chunk are not maintained since it
is not kept in a bin.
SmallBins
An array of bin headers for free chunks. These bins hold chunks
with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
chunks of all the same size, spaced 8 bytes apart. To simplify
use in double-linked lists, each bin header acts as a malloc_chunk
pointing to the real first node, if it exists (else pointing to
itself). This avoids special-casing for headers. But to avoid
waste, we allocate only the fd/bk pointers of bins, and then use
repositioning tricks to treat these as the fields of a chunk.
TreeBins
Treebins are pointers to the roots of trees holding a range of
sizes. There are 2 equally spaced treebins for each power of two
from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
larger.
Bin maps
There is one bit map for small bins ("smallmap") and one for
treebins ("treemap). Each bin sets its bit when non-empty, and
clears the bit when empty. Bit operations are then used to avoid
bin-by-bin searching -- nearly all "search" is done without ever
looking at bins that won't be selected. The bit maps
conservatively use 32 bits per map word, even if on 64bit system.
For a good description of some of the bit-based techniques used
here, see Henry S. Warren Jr's book "Hacker's Delight" (and
supplement at http://hackersdelight.org/). Many of these are
intended to reduce the branchiness of paths through malloc etc, as
well as to reduce the number of memory locations read or written.
Segments
A list of segments headed by an embedded malloc_segment record
representing the initial space.
Address check support
The least_addr field is the least address ever obtained from
MORECORE or MMAP. Attempted frees and reallocs of any address less
than this are trapped (unless INSECURE is defined).
Magic tag
A cross-check field that should always hold same value as mparams.magic.
Max allowed footprint
The maximum allowed bytes to allocate from system (zero means no limit)
Flags
Bits recording whether to use MMAP, locks, or contiguous MORECORE
Statistics
Each space keeps track of current and maximum system memory
obtained via MORECORE or MMAP.
Trim support
Fields holding the amount of unused topmost memory that should trigger
trimming, and a counter to force periodic scanning to release unused
non-topmost segments.
Locking
If USE_LOCKS is defined, the "mutex" lock is acquired and released
around every public call using this mspace.
Extension support
A void* pointer and a size_t field that can be used to help implement
extensions to this malloc.
*/
/* Bin types, widths and sizes */
#define NSMALLBINS (32U)
#define NTREEBINS (32U)
@ -690,6 +919,13 @@ static int has_segment_link(mstate m, msegmentptr ss)
/* ----------------------- Operations on smallbins ----------------------- */
/*
Various forms of linking and unlinking are defined as macros. Even
the ones for trees, which are very long but have very short typical
paths. This is ugly but reduces reliance on inlining support of
compilers.
*/
/* Link a free chunk into a smallbin */
#define insert_small_chunk(M, P, S) {\
bindex_t I = small_index(S);\
@ -783,6 +1019,23 @@ static int has_segment_link(mstate m, msegmentptr ss)
}\
}
/*
Unlink steps:
1. If x is a chained node, unlink it from its same-sized fd/bk links
and choose its bk node as its replacement.
2. If x was the last node of its size, but not a leaf node, it must
be replaced with a leaf node (not merely one with an open left or
right), to make sure that lefts and rights of descendents
correspond properly to bit masks. We use the rightmost descendent
of x. We could use any other leaf, but this is easy to locate and
tends to counteract removal of leftmosts elsewhere, and so keeps
paths shorter than minimally guaranteed. This doesn't loop much
because on average a node in a tree is near the bottom.
3. If x is the base of a chain (i.e., has parent links) relink
x's parent and children to x's replacement (or null if none).
*/
#define unlink_large_chunk(M, X) {\
tchunkptr XP = X->parent;\
tchunkptr R;\

Loading…
Cancel
Save