From 28838aeea7c309e0a122c28f655fff9e37dfa4d3 Mon Sep 17 00:00:00 2001 From: Robert Konrad Date: Thu, 5 Sep 2024 00:46:42 +0200 Subject: [PATCH] Copy in the offset allocator for now (beware the license) --- Sources/kope/util/offalloc/LICENSE | 21 + Sources/kope/util/offalloc/README.md | 84 ++++ Sources/kope/util/offalloc/offalloc.c | 528 ++++++++++++++++++++++++++ Sources/kope/util/offalloc/offalloc.h | 75 ++++ 4 files changed, 708 insertions(+) create mode 100644 Sources/kope/util/offalloc/LICENSE create mode 100644 Sources/kope/util/offalloc/README.md create mode 100644 Sources/kope/util/offalloc/offalloc.c create mode 100644 Sources/kope/util/offalloc/offalloc.h diff --git a/Sources/kope/util/offalloc/LICENSE b/Sources/kope/util/offalloc/LICENSE new file mode 100644 index 000000000..b34d3a0e3 --- /dev/null +++ b/Sources/kope/util/offalloc/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Sebastian Aaltonen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Sources/kope/util/offalloc/README.md b/Sources/kope/util/offalloc/README.md new file mode 100644 index 000000000..a89a6d06e --- /dev/null +++ b/Sources/kope/util/offalloc/README.md @@ -0,0 +1,84 @@ +# offalloc +Fast hard realtime O(1) offset allocator with minimal fragmentation. + +Uses 256 bins with 8 bit floating point distribution (3 bit mantissa + 5 bit exponent) and a two level bitfield to find the next available bin using 2x LZCNT instructions to make all operations O(1). Bin sizes following the floating point distribution ensures hard bounds for memory overhead percentage regarless of size class. Pow2 bins would waste up to +100% memory (+50% on average). Our float bins waste up to +12.5% (+6.25% on average). + +The allocation metadata is stored in a separate data structure, making this allocator suitable for sub-allocating any resources, such as GPU heaps, buffers and arrays. Returns an offset to the first element of the allocated contiguous range. + +**Bin size table:** +``` +0->0 1->1 2->2 3->3 4->4 5->5 6->6 7->7 +8->8 9->9 10->10 11->11 12->12 13->13 14->14 15->15 +16->16 17->18 18->20 19->22 20->24 21->26 22->28 23->30 +24->32 25->36 26->40 27->44 28->48 29->52 30->56 31->60 +32->64 33->72 34->80 35->88 36->96 37->104 38->112 39->120 +40->128 41->144 42->160 43->176 44->192 45->208 46->224 47->240 +48->256 49->288 50->320 51->352 52->384 53->416 54->448 55->480 +56->512 57->576 58->640 59->704 60->768 61->832 62->896 63->960 +64->1024 65->1152 66->1280 67->1408 68->1536 69->1664 70->1792 71->1920 +72->2048 73->2304 74->2560 75->2816 76->3072 77->3328 78->3584 79->3840 +80->4096 81->4608 82->5120 83->5632 84->6144 85->6656 86->7168 87->7680 +88->8192 89->9216 90->10240 91->11264 92->12288 93->13312 94->14336 95->15360 +96->16384 97->18432 98->20480 99->22528 100->24576 101->26624 102->28672 103->30720 +104->32768 105->36864 106->40960 107->45056 108->49152 109->53248 110->57344 111->61440 +112->65536 113->73728 114->81920 115->90112 116->98304 117->106496 118->114688 119->122880 +120->131072 121->147456 122->163840 123->180224 124->196608 125->212992 126->229376 127->245760 +128->262144 129->294912 130->327680 131->360448 132->393216 133->425984 134->458752 135->491520 +136->524288 137->589824 138->655360 139->720896 140->786432 141->851968 142->917504 143->983040 +144->1048576 145->1179648 146->1310720 147->1441792 148->1572864 149->1703936 150->1835008 151->1966080 +152->2097152 153->2359296 154->2621440 155->2883584 156->3145728 157->3407872 158->3670016 159->3932160 +160->4194304 161->4718592 162->5242880 163->5767168 164->6291456 165->6815744 166->7340032 167->7864320 +168->8388608 169->9437184 170->10485760 171->11534336 172->12582912 173->13631488 174->14680064 175->15728640 +176->16777216 177->18874368 178->20971520 179->23068672 180->25165824 181->27262976 182->29360128 183->31457280 +184->33554432 185->37748736 186->41943040 187->46137344 188->50331648 189->54525952 190->58720256 191->62914560 +192->67108864 193->75497472 194->83886080 195->92274688 196->100663296 197->109051904 198->117440512 199->125829120 +200->134217728 201->150994944 202->167772160 203->184549376 204->201326592 205->218103808 206->234881024 207->251658240 +208->268435456 209->301989888 210->335544320 211->369098752 212->402653184 213->436207616 214->469762048 215->503316480 +216->536870912 217->603979776 218->671088640 219->738197504 220->805306368 221->872415232 222->939524096 223->1006632960 +224->1073741824 225->1207959552 226->1342177280 227->1476395008 228->1610612736 229->1744830464 230->1879048192 231->2013265920 +232->2147483648 233->2415919104 234->2684354560 235->2952790016 236->3221225472 237->3489660928 238->3758096384 239->4026531840 +``` + +## Integration +CMakeLists.txt exists for cmake folder include. Alternatively, just copy the offalloc.c and offalloc.h in your project. No other files are needed. + +## How to use + +``` +#include "offalloc.h" + +// Allocator with 65536 contiguous elements in total +// and 1024 maximum simultaneous allocations +oa_allocator_t allocator = { 0 }; +oa_create(&allocator, 65536, 1024); + +oa_allocation_t a; +oa_allocate(&allocator, 1337, &a); // Allocate a 1337 element contiguous range +do_something(a.offset); // Provides offset to the first element of the range + +oa_allocation_t b; +oa_allocate(&allocator, 123, &b); // Allocate a 123 element contiguous range +do_something(b.offset); // Provides offset to the first element of the range + +oa_storage_report_t report; +oa_storage_report(&allocator, &report); + +oa_free(&allocator, &b); +oa_free(&allocator, &a); + +oa_destroy(&allocator); +``` + +## References +This allocator is similar to the two-level segregated fit (TLSF) algorithm. + +**Comparison paper shows that TLSF algorithm provides best in class performance and fragmentation:** +https://www.researchgate.net/profile/Alfons-Crespo/publication/234785757_A_comparison_of_memory_allocators_for_real-time_applications/links/5421d8550cf2a39f4af765f4/A-comparison-of-memory-allocators-for-real-time-applications.pdf + +## Disclaimer +Early one weekend prototype. Unit tests are green, but coverage is still not 100%. Use at your own risk! + +C99 version: Hasn't been thoroughly tested yet + +## License +MIT license (see file: LICENSE) diff --git a/Sources/kope/util/offalloc/offalloc.c b/Sources/kope/util/offalloc/offalloc.c new file mode 100644 index 000000000..b26be75e4 --- /dev/null +++ b/Sources/kope/util/offalloc/offalloc.c @@ -0,0 +1,528 @@ +// (C) Sebastian Aaltonen 2023 +// MIT License (see file: LICENSE) +// C99 conversion by Aarni Gratseff +// https://github.com/aarni57/offalloc + +#include "offalloc.h" + +#include + +#if !defined(NDEBUG) +# define OA_DEBUG + +# include +# define oa_assert(x) assert(x) +#else +# define oa_assert(x) +#endif + +#if defined(OA_VERBOSE) +# include +#endif + +static uint32_t lzcnt_nonzero(uint32_t v) +{ +#if defined(_MSC_VER) + unsigned long retVal; + _BitScanReverse(&retVal, v); + return 31 - retVal; +#else + return __builtin_clz(v); +#endif +} + +static uint32_t tzcnt_nonzero(uint32_t v) +{ +#if defined(_MSC_VER) + unsigned long retVal; + _BitScanForward(&retVal, v); + return retVal; +#else + return __builtin_ctz(v); +#endif +} + +#define SMALLFLOAT_MANTISSA_BITS 3 +#define SMALLFLOAT_MANTISSA_VALUE (1 << SMALLFLOAT_MANTISSA_BITS) +#define SMALLFLOAT_MANTISSA_MASK (SMALLFLOAT_MANTISSA_VALUE - 1) + +// Bin sizes follow floating point (exponent + mantissa) distribution +// (piecewise linear log approx) +// This ensures that for each size class, +// the average overhead percentage stays the same +static uint32_t smallfloat_uint_to_float_round_up(uint32_t size) +{ + uint32_t exp = 0; + uint32_t mantissa = 0; + + if (size < SMALLFLOAT_MANTISSA_VALUE) { + // Denorm: 0..(MANTISSA_VALUE-1) + mantissa = size; + } else { + // Normalized: Hidden high bit always 1. Not stored. Just like float. + uint32_t leadingZeros = lzcnt_nonzero(size); + uint32_t highestSetBit = 31 - leadingZeros; + + uint32_t mantissaStartBit = highestSetBit - SMALLFLOAT_MANTISSA_BITS; + exp = mantissaStartBit + 1; + mantissa = (size >> mantissaStartBit) & SMALLFLOAT_MANTISSA_MASK; + + uint32_t lowBitsMask = (1 << mantissaStartBit) - 1; + + // Round up! + if ((size & lowBitsMask) != 0) + mantissa++; + } + + // + allows mantissa->exp overflow for round up + return (exp << SMALLFLOAT_MANTISSA_BITS) + mantissa; +} + +static uint32_t smallfloat_uint_to_float_round_down(uint32_t size) +{ + uint32_t exp = 0; + uint32_t mantissa = 0; + + if (size < SMALLFLOAT_MANTISSA_VALUE) { + // Denorm: 0..(MANTISSA_VALUE-1) + mantissa = size; + } else { + // Normalized: Hidden high bit always 1. Not stored. Just like float. + uint32_t leadingZeros = lzcnt_nonzero(size); + uint32_t highestSetBit = 31 - leadingZeros; + + uint32_t mantissaStartBit = highestSetBit - SMALLFLOAT_MANTISSA_BITS; + exp = mantissaStartBit + 1; + mantissa = (size >> mantissaStartBit) & SMALLFLOAT_MANTISSA_MASK; + } + + return (exp << SMALLFLOAT_MANTISSA_BITS) | mantissa; +} + +static uint32_t smallfloat_float_to_uint(uint32_t floatValue) +{ + uint32_t exponent = floatValue >> SMALLFLOAT_MANTISSA_BITS; + uint32_t mantissa = floatValue & SMALLFLOAT_MANTISSA_MASK; + if (exponent == 0) // Denorms + return mantissa; + else + return (mantissa | SMALLFLOAT_MANTISSA_VALUE) << (exponent - 1); +} + +// Utility functions +static uint32_t find_lowest_set_bit_after(uint32_t bitmask, + uint32_t start_bit_index) +{ + uint32_t mask_before_start_index = (1 << start_bit_index) - 1; + uint32_t mask_after_start_index = ~mask_before_start_index; + uint32_t bits_after = bitmask & mask_after_start_index; + if (bits_after == 0) return OA_NO_SPACE; + return tzcnt_nonzero(bits_after); +} + +// + +static const struct ao_node_t AO_NODE_DEFAULTS = { + .data_offset = 0, + .data_size = 0, + .bin_list_prev = OA_INVALID_INDEX, + .bin_list_next = OA_INVALID_INDEX, + .neighbor_prev = OA_INVALID_INDEX, + .neighbor_next = OA_INVALID_INDEX, +}; + +#define OA_TOP_BINS_INDEX_SHIFT 3 +#define OA_LEAF_BINS_INDEX_MASK 0x7 + +#define OA_NODE_USED_FLAG (1 << 31) +#define OA_NODE_DATA_SIZE_MASK ~OA_NODE_USED_FLAG + +// + +static uint32_t insert_node_into_bin(oa_allocator_t *self, uint32_t size, + uint32_t data_offset) +{ + // Round down to bin index to ensure that bin >= alloc + uint32_t bin_index = smallfloat_uint_to_float_round_down(size); + oa_assert(bin_index < OA_NUM_LEAF_BINS); + + uint32_t top_bin_index = bin_index >> OA_TOP_BINS_INDEX_SHIFT; + uint32_t leaf_bin_index = bin_index & OA_LEAF_BINS_INDEX_MASK; + + // Bin was empty before? + if (self->bin_indices[bin_index] == OA_UNUSED) { + // Set bin mask bits + self->used_bins[top_bin_index] |= 1 << leaf_bin_index; + self->used_bins_top |= 1 << top_bin_index; + } + + // Take a freelist node and insert on top of the bin linked list + // (next = old top) + uint32_t top_node_index = self->bin_indices[bin_index]; + oa_assert(self->free_offset < self->max_allocs); + uint32_t node_index = self->free_nodes[self->free_offset--]; + oa_assert(node_index < self->max_allocs); + +#if defined(OA_VERBOSE) + printf("Getting node %u from freelist[%u]\n", node_index, + self->free_offset + 1); +#endif + + self->nodes[node_index] = AO_NODE_DEFAULTS; + self->nodes[node_index].data_offset = data_offset; + self->nodes[node_index].data_size = size; + self->nodes[node_index].bin_list_next = top_node_index; + + if (top_node_index != OA_UNUSED) { + oa_assert(top_node_index < self->max_allocs); + self->nodes[top_node_index].bin_list_prev = node_index; + } + + self->bin_indices[bin_index] = node_index; + + self->free_storage += size; + +#if defined(OA_VERBOSE) + printf("Free storage: %u (+%u) (insert_node_into_bin)\n", + self->free_storage, size); +#endif + + return node_index; +} + +static void remove_node_from_bin(oa_allocator_t *self, uint32_t node_index) +{ + oa_assert(node_index < self->max_allocs); + oa_node_t *node = &self->nodes[node_index]; + + if (node->bin_list_prev != OA_UNUSED) { + // Easy case: We have previous node. + // Just remove this node from the middle of the list. + self->nodes[node->bin_list_prev].bin_list_next = node->bin_list_next; + if (node->bin_list_next != OA_UNUSED) + self->nodes[node->bin_list_next].bin_list_prev = + node->bin_list_prev; + } else { + // Hard case: We are the first node in a bin. Find the bin. + + // Round down to bin index to ensure that bin >= alloc + uint32_t bin_index = smallfloat_uint_to_float_round_down( + node->data_size & OA_NODE_DATA_SIZE_MASK); + + uint32_t top_bin_index = bin_index >> OA_TOP_BINS_INDEX_SHIFT; + uint32_t leaf_bin_index = bin_index & OA_LEAF_BINS_INDEX_MASK; + + self->bin_indices[bin_index] = node->bin_list_next; + if (node->bin_list_next != OA_UNUSED) + self->nodes[node->bin_list_next].bin_list_prev = OA_UNUSED; + + // Bin empty? + if (self->bin_indices[bin_index] == OA_UNUSED) { + // Remove a leaf bin mask bit + self->used_bins[top_bin_index] &= ~(1 << leaf_bin_index); + + // All leaf bins empty? + if (self->used_bins[top_bin_index] == 0) + // Remove a top bin mask bit + self->used_bins_top &= ~(1 << top_bin_index); + } + } + + // Insert the node to freelist +#if defined(OA_VERBOSE) + printf("Putting node %u into freelist[%u] (remove_node_from_bin)\n", + node_index, self->free_offset + 1); +#endif + oa_assert(self->free_offset + 1 < self->max_allocs); + self->free_nodes[++self->free_offset] = node_index; + + self->free_storage -= node->data_size & OA_NODE_DATA_SIZE_MASK; +#if defined(OA_VERBOSE) + printf("Free storage: %u (-%u) (remove_node_from_bin)\n", + self->free_storage, node->data_size & OA_NODE_DATA_SIZE_MASK); +#endif +} + +// + +int oa_create(oa_allocator_t *self, uint32_t size, uint32_t max_allocs) +{ + oa_assert(!self->nodes); + oa_assert(!self->free_nodes); + oa_assert(self->used_bins_top == 0); + + self->size = size; + self->max_allocs = max_allocs; + + self->free_offset = self->max_allocs - 1; + + for (uint32_t i = 0 ; i < OA_NUM_LEAF_BINS; ++i) + self->bin_indices[i] = OA_INVALID_INDEX; + + self->nodes = (oa_node_t *)malloc(sizeof(oa_node_t) * self->max_allocs); + if (!self->nodes) { + return -1; + } + + self->free_nodes = (oa_index_t *)malloc( + sizeof(oa_index_t) * self->max_allocs); + if (!self->free_nodes) { + free(self->nodes); + self->nodes = NULL; + return -1; + } + + for (uint32_t i = 0; i < self->max_allocs; ++i) { + self->nodes[i] = AO_NODE_DEFAULTS; + } + + // Freelist is a stack. Nodes in inverse order so that [0] pops first. + for (uint32_t i = 0; i < self->max_allocs; ++i) + self->free_nodes[i] = self->max_allocs - i - 1; + + // Start state: Whole storage as one big node + // Algorithm will split remainders and push them back as smaller nodes + insert_node_into_bin(self, self->size, 0); + + return 0; +} + +void oa_destroy(oa_allocator_t *self) +{ +#if defined(OA_DEBUG) + remove_node_from_bin(self, 0); + oa_assert(self->free_offset == self->max_allocs - 1); + oa_assert(self->free_storage == 0); +#endif + + free(self->free_nodes); + self->free_nodes = NULL; + + free(self->nodes); + self->nodes = NULL; +} + +int oa_allocate(oa_allocator_t *self, uint32_t size, + oa_allocation_t *allocation) +{ + oa_assert(size != 0); + + // Out of allocations? + if (self->free_offset == 0) { + allocation->offset = OA_NO_SPACE; + allocation->index = OA_INVALID_INDEX; + return -1; + } + + // Round up to bin index to ensure that alloc >= bin + // Gives us min bin index that fits the size + uint32_t min_bin_index = smallfloat_uint_to_float_round_up(size); + + uint32_t min_top_bin_index = min_bin_index >> OA_TOP_BINS_INDEX_SHIFT; + uint32_t min_leaf_bin_index = min_bin_index & OA_LEAF_BINS_INDEX_MASK; + + uint32_t top_bin_index = min_top_bin_index; + uint32_t leaf_bin_index = OA_NO_SPACE; + + // If top bin exists, scan its leaf bin. This can fail (NO_SPACE). + if (self->used_bins_top & (1 << top_bin_index)) + leaf_bin_index = find_lowest_set_bit_after( + self->used_bins[top_bin_index], min_leaf_bin_index); + + // If we didn't find space in top bin, we search top bin from +1 + if (leaf_bin_index == OA_NO_SPACE) { + top_bin_index = find_lowest_set_bit_after( + self->used_bins_top, min_top_bin_index + 1); + + // Out of space? + if (top_bin_index == OA_NO_SPACE) { +#if defined(OA_VERBOSE) + printf("oa_allocate: No space; trying to allocate %u\n", size); +#endif + allocation->offset = OA_NO_SPACE; + allocation->index = OA_INVALID_INDEX; + return -1; + } + + // All leaf bins here fit the alloc, since the top bin was rounded up. + // Start leaf search from bit 0. NOTE: This search can't fail since at + // least one leaf bit was set because the top bit was set. + leaf_bin_index = tzcnt_nonzero(self->used_bins[top_bin_index]); + } + + uint32_t bin_index = + (top_bin_index << OA_TOP_BINS_INDEX_SHIFT) | leaf_bin_index; + oa_assert(bin_index < OA_NUM_LEAF_BINS); + + // Pop the top node of the bin. Bin top = node->next. + uint32_t node_index = self->bin_indices[bin_index]; + oa_assert(node_index < self->max_allocs); + oa_node_t *node = &self->nodes[node_index]; + uint32_t node_total_size = node->data_size & OA_NODE_DATA_SIZE_MASK; + node->data_size = size | OA_NODE_USED_FLAG; + self->bin_indices[bin_index] = node->bin_list_next; + + if (node->bin_list_next != OA_INVALID_INDEX) + self->nodes[node->bin_list_next].bin_list_prev = OA_INVALID_INDEX; + + oa_assert(self->free_storage >= node_total_size); + self->free_storage -= node_total_size; + +#if defined(OA_VERBOSE) + printf("Free storage: %u (-%u) (oa_allocate)\n", self->free_storage, + node_total_size); +#endif + + // Bin empty? + if (self->bin_indices[bin_index] == OA_UNUSED) { + // Remove a leaf bin mask bit + self->used_bins[top_bin_index] &= ~(1 << leaf_bin_index); + + // All leaf bins empty? + if (self->used_bins[top_bin_index] == 0) + // Remove a top bin mask bit + self->used_bins_top &= ~(1 << top_bin_index); + } + + // Push back remainder N elements to a lower bin + oa_assert(node_total_size >= size); + uint32_t remainder_size = node_total_size - size; + if (remainder_size != 0) { + uint32_t new_node_index = insert_node_into_bin(self, remainder_size, + node->data_offset + size); + + // Link nodes next to each other so that we can merge them + // later if both are free. And update the old next neighbor to point + // to the new node (in middle) + if (node->neighbor_next != OA_UNUSED) + self->nodes[node->neighbor_next].neighbor_prev = new_node_index; + + self->nodes[new_node_index].neighbor_prev = node_index; + self->nodes[new_node_index].neighbor_next = node->neighbor_next; + node->neighbor_next = new_node_index; + } + + allocation->offset = node->data_offset; + allocation->index = node_index; + return 0; +} + +void oa_free(oa_allocator_t* self, oa_allocation_t *allocation) +{ + if (allocation->index == OA_INVALID_INDEX) return; + if (!self->nodes) return; + + uint32_t node_index = allocation->index; + oa_node_t *node = &self->nodes[node_index]; + + // Double delete check + oa_assert(node->data_size & OA_NODE_USED_FLAG); + + // Merge with neighbors... + uint32_t offset = node->data_offset; + uint32_t size = node->data_size & OA_NODE_DATA_SIZE_MASK; + + oa_assert(node->neighbor_prev != node_index); + if ((node->neighbor_prev != OA_UNUSED) && + (!(self->nodes[node->neighbor_prev].data_size & OA_NODE_USED_FLAG))) { + // Previous (contiguous) free node: + // Change offset to previous node offset. Sum sizes + oa_node_t *prev_node = &self->nodes[node->neighbor_prev]; + offset = prev_node->data_offset; + size += prev_node->data_size & OA_NODE_DATA_SIZE_MASK; + + // Remove node from the bin linked list and put it in the freelist + remove_node_from_bin(self, node->neighbor_prev); + + oa_assert(prev_node->neighbor_next == node_index); + node->neighbor_prev = prev_node->neighbor_prev; + } + + oa_assert(node->neighbor_next != node_index); + if ((node->neighbor_next != OA_UNUSED) && + (!(self->nodes[node->neighbor_next].data_size & OA_NODE_USED_FLAG))) { + // Next (contiguous) free node: Offset remains the same. Sum sizes. + oa_node_t *next_node = &self->nodes[node->neighbor_next]; + size += next_node->data_size & OA_NODE_DATA_SIZE_MASK; + + // Remove node from the bin linked list and put it in the freelist + remove_node_from_bin(self, node->neighbor_next); + + oa_assert(next_node->neighbor_prev == node_index); + node->neighbor_next = next_node->neighbor_next; + } + + uint32_t neighbor_next = node->neighbor_next; + uint32_t neighbor_prev = node->neighbor_prev; + + // Insert the removed node to freelist +#if defined(OA_VERBOSE) + printf("Putting node %u into freelist[%u] (oa_free)\n", node_index, + self->free_offset + 1); +#endif + + oa_assert(self->free_offset + 1 < self->max_allocs); + self->free_nodes[++self->free_offset] = node_index; + + // Insert the (combined) free node to bin + uint32_t combined_node_index = insert_node_into_bin(self, size, offset); + + // Connect neighbors with the new combined node + if (neighbor_next != OA_UNUSED) { + self->nodes[combined_node_index].neighbor_next = neighbor_next; + self->nodes[neighbor_next].neighbor_prev = combined_node_index; + } + + if (neighbor_prev != OA_UNUSED) { + self->nodes[combined_node_index].neighbor_prev = neighbor_prev; + self->nodes[neighbor_prev].neighbor_next = combined_node_index; + } +} + +uint32_t oa_allocation_size(oa_allocator_t *self, + const oa_allocation_t *allocation) +{ + if (allocation->index == OA_INVALID_INDEX) return 0; + if (!self->nodes) return 0; + return self->nodes[allocation->index].data_size & OA_NODE_DATA_SIZE_MASK; +} + +void oa_storage_report(const oa_allocator_t *self, oa_storage_report_t *report) +{ + uint32_t largest_free_region = 0; + uint32_t free_storage = 0; + + // Out of allocations? -> Zero free space + if (self->free_offset != 0) { + free_storage = self->free_storage; + if (self->used_bins_top) { + uint32_t top_bin_index = + 31 - lzcnt_nonzero(self->used_bins_top); + uint32_t leaf_bin_index = + 31 - lzcnt_nonzero(self->used_bins[top_bin_index]); + largest_free_region = smallfloat_float_to_uint( + (top_bin_index << OA_TOP_BINS_INDEX_SHIFT) | leaf_bin_index); + oa_assert(free_storage >= largest_free_region); + } + } + + report->total_free_space = free_storage; + report->largest_free_region = largest_free_region; +} + +void oa_storage_report_full(const oa_allocator_t *self, + oa_storage_report_full_t *report) +{ + for (uint32_t i = 0; i < OA_NUM_LEAF_BINS; ++i) { + uint32_t count = 0; + uint32_t node_index = self->bin_indices[i]; + while (node_index != OA_UNUSED) { + oa_assert(node_index < self->max_allocs); + node_index = self->nodes[node_index].bin_list_next; + count++; + } + + report->free_regions[i].size = smallfloat_float_to_uint(i); + report->free_regions[i].count = count; + } +} diff --git a/Sources/kope/util/offalloc/offalloc.h b/Sources/kope/util/offalloc/offalloc.h new file mode 100644 index 000000000..b3cc5417b --- /dev/null +++ b/Sources/kope/util/offalloc/offalloc.h @@ -0,0 +1,75 @@ +// (C) Sebastian Aaltonen 2023 +// MIT License (see file: LICENSE) +// C99 conversion by Aarni Gratseff +// https://github.com/aarni57/offalloc + +#ifndef OFFALLOC_H +#define OFFALLOC_H + +#include "stdint.h" + +#define OA_NUM_TOP_BINS 32 +#define OA_BINS_PER_LEAF 8 +#define OA_NUM_LEAF_BINS OA_NUM_TOP_BINS * OA_BINS_PER_LEAF + +#define OA_NO_SPACE 0xffffffff + +typedef uint16_t oa_index_t; +#define OA_INVALID_INDEX (oa_index_t)0xffff +#define OA_UNUSED (oa_index_t)0xffff + +typedef struct oa_allocation_t { + uint32_t offset; + oa_index_t index; // internal: node index +} oa_allocation_t; + +typedef struct ao_storage_report_t { + uint32_t total_free_space; + uint32_t largest_free_region; +} oa_storage_report_t; + +typedef struct ao_region_t { + uint32_t size; + uint32_t count; +} oa_region_t; + +typedef struct ao_storage_report_full_t { + oa_region_t free_regions[OA_NUM_LEAF_BINS]; +} oa_storage_report_full_t; + +typedef struct ao_node_t { + uint32_t data_offset; + uint32_t data_size; // 'used' flag stored as high bit + oa_index_t bin_list_prev; + oa_index_t bin_list_next; + oa_index_t neighbor_prev; + oa_index_t neighbor_next; +} oa_node_t; + +typedef struct ao_allocator_t { + uint32_t size; + uint32_t max_allocs; + uint32_t free_storage; + + uint32_t used_bins_top; + uint8_t used_bins[OA_NUM_TOP_BINS]; + oa_index_t bin_indices[OA_NUM_LEAF_BINS]; + + oa_node_t *nodes; + oa_index_t *free_nodes; + uint32_t free_offset; +} oa_allocator_t; + +// + +int oa_create(oa_allocator_t *self, uint32_t size, uint32_t max_allocs); +void oa_destroy(oa_allocator_t *self); + +int oa_allocate(oa_allocator_t *self, uint32_t size, oa_allocation_t *allocation); +void oa_free(oa_allocator_t *self, oa_allocation_t *allocation); + +uint32_t oa_allocation_size(oa_allocator_t *self, const oa_allocation_t *allocation); +void oa_storage_report(const oa_allocator_t *self, oa_storage_report_t *report); +void oa_storage_report_full(const oa_allocator_t *self, oa_storage_report_full_t *report); + +#endif