Skip to content

Commit

Permalink
#15535: implement optimized memory allocator
Browse files Browse the repository at this point in the history
  • Loading branch information
marty1885 committed Nov 28, 2024
1 parent aac41a3 commit 0057eef
Show file tree
Hide file tree
Showing 9 changed files with 942 additions and 7 deletions.
1 change: 1 addition & 0 deletions tests/tt_metal/tt_metal/api/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set(UNIT_TESTS_API_SRC
${CMAKE_CURRENT_SOURCE_DIR}/allocator/test_free_list_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/allocator/test_free_list_opt_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/allocator/test_l1_banking_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/circular_buffer/test_CircularBuffer_allocation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/circular_buffer/test_CircularBuffer_creation.cpp
Expand Down
258 changes: 258 additions & 0 deletions tests/tt_metal/tt_metal/api/allocator/test_free_list_opt_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
#include <gtest/gtest.h>
#include "allocator/allocator.hpp"
#include "tt_metal/impl/allocator/algorithms/free_list_opt.hpp"

// UDL to convert integer literals to SI units
constexpr size_t operator"" _KiB(unsigned long long x) { return x * 1024; }
constexpr size_t operator"" _MiB(unsigned long long x) { return x * 1024 * 1024; }
constexpr size_t operator"" _GiB(unsigned long long x) { return x * 1024 * 1024 * 1024; }

TEST(FreeListOptTest, Allocation) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 0);

auto b = allocator.allocate(1_KiB);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_KiB);
}

TEST(FreeListOptTest, Alignment) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1, 1_KiB);
auto a = allocator.allocate(64);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 0);
auto b = allocator.allocate(64);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_KiB);
}

TEST(FreeListOptTest, MinAllocationSize) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1);
auto a = allocator.allocate(1);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 0);
auto b = allocator.allocate(1);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_KiB);
}

TEST(FreeListOptTest, Clear) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
auto b = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_TRUE(b.has_value());
allocator.clear();
auto c = allocator.allocate(1_KiB);
ASSERT_TRUE(c.has_value());
ASSERT_EQ(c.value(), 0);
}

TEST(FreeListOptTest, AllocationAndDeallocation) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
std::vector<std::optional<tt::tt_metal::DeviceAddr>> allocations(10);

// Deallocate in order
for(size_t i = 0; i < allocations.size(); i++) {
allocations[i] = allocator.allocate(1_KiB);
ASSERT_TRUE(allocations[i].has_value());
}

for(size_t i = allocations.size(); i > 0; i--) {
allocator.deallocate(allocations[i - 1].value());
}

// Deallocate in reverse order
for(size_t i = 0; i < allocations.size(); i++) {
allocations[i] = allocator.allocate(1_KiB);
ASSERT_TRUE(allocations[i].has_value());
}

for(size_t i = 0; i < allocations.size(); i++) {
allocator.deallocate(allocations[i].value());
}
}

TEST(FreeListOptTest, AllocateAtAddress) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 0);

auto b = allocator.allocate_at_address(1_KiB, 1_KiB);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_KiB);

// Address is already allocated
auto c = allocator.allocate_at_address(1_KiB, 1_KiB);
ASSERT_FALSE(c.has_value());

auto d = allocator.allocate_at_address(2_KiB, 1_KiB);
ASSERT_TRUE(d.has_value());
ASSERT_EQ(d.value(), 2_KiB);

allocator.deallocate(a.value());
auto e = allocator.allocate_at_address(0, 1_KiB);
ASSERT_TRUE(e.has_value());
ASSERT_EQ(e.value(), 0);
}

TEST(FreeListOptTest, AllocateAtAddressInteractions) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto wedge = allocator.allocate_at_address(32_KiB, 1_KiB);

auto a = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 0);

auto z = allocator.allocate(1_KiB, false);
ASSERT_TRUE(z.has_value());
ASSERT_EQ(z.value(), 32_KiB - 1_KiB); // Counterintuitive, but because we use BestFit, it will find the smaller block at the beginning

auto b = allocator.allocate(1_KiB);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_KiB);
}

TEST(FreeListOptTest, ShrinkAndReset) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
auto b = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_TRUE(b.has_value());
allocator.deallocate(a.value());

allocator.shrink_size(1_KiB);
auto c = allocator.allocate_at_address(0, 1_KiB);
ASSERT_FALSE(c.has_value());

auto d = allocator.allocate_at_address(1_KiB, 1_KiB);
ASSERT_FALSE(d.has_value());

allocator.reset_size();
allocator.deallocate(b.value());

auto e = allocator.allocate(2_KiB);
ASSERT_TRUE(e.has_value());
}

TEST(FreeListOptTest, Statistics) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
auto b = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_TRUE(b.has_value());
allocator.deallocate(a.value());

auto stats = allocator.get_statistics();
ASSERT_EQ(stats.total_allocated_bytes, 1_KiB);
}

TEST(FreeListOptTest, AllocateFromTop) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB, false);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 1_GiB - 1_KiB);

auto b = allocator.allocate(1_KiB, false);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_GiB - 2_KiB);

auto c = allocator.allocate(1_KiB);
ASSERT_TRUE(c.has_value());
ASSERT_EQ(c.value(), 0);
}

TEST(FreeListOptTest, Coalescing) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
auto b = allocator.allocate(1_KiB);
auto c = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_TRUE(b.has_value());
ASSERT_TRUE(c.has_value());
allocator.deallocate(b.value());
allocator.deallocate(a.value());

auto d = allocator.allocate(2_KiB);
ASSERT_TRUE(d.has_value());
ASSERT_EQ(d.value(), 0);
}

TEST(FreeListOptTest, CoalescingAfterResetShrink) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
auto b = allocator.allocate(1_KiB);
auto c = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_TRUE(b.has_value());
ASSERT_TRUE(c.has_value());
allocator.deallocate(b.value());
allocator.deallocate(a.value());

allocator.shrink_size(1_KiB);
auto d = allocator.allocate(2_KiB);
allocator.reset_size();
auto e = allocator.allocate(2_KiB);
ASSERT_TRUE(e.has_value());
ASSERT_EQ(e.value(), 0);
}

TEST(FreeListOptTest, OutOfMemory) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_GiB);
ASSERT_TRUE(a.has_value());
auto b = allocator.allocate(1_KiB);
ASSERT_FALSE(b.has_value());

allocator.clear();
auto c = allocator.allocate(1_GiB - 1_KiB);
ASSERT_TRUE(c.has_value());
auto d = allocator.allocate(2_KiB);
ASSERT_FALSE(d.has_value());
}

TEST(FreeListOptTest, AvailableAddresses) {
auto allocator = tt::tt_metal::allocator::FreeListOpt(1_GiB, 0, 1_KiB, 1_KiB);
auto a = allocator.allocate(1_KiB);
auto aval = allocator.available_addresses(1_KiB);
ASSERT_EQ(aval.size(), 1);
ASSERT_EQ(aval[0].first, 1_KiB); // Start address
ASSERT_EQ(aval[0].second, 1_GiB); // End address
allocator.clear();

a = allocator.allocate(1_KiB);
auto b = allocator.allocate(1_KiB);
auto c = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 0);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_KiB);
ASSERT_TRUE(c.has_value());
ASSERT_EQ(c.value(), 2_KiB);
allocator.deallocate(b.value());
aval = allocator.available_addresses(1_KiB);
ASSERT_EQ(aval.size(), 2);
ASSERT_EQ(aval[0].first, 1_KiB); // Start address
ASSERT_EQ(aval[0].second, 2_KiB); // End address
ASSERT_EQ(aval[1].first, 3_KiB); // Start address
ASSERT_EQ(aval[1].second, 1_GiB); // End address

allocator.clear();
a = allocator.allocate(1_KiB);
b = allocator.allocate(1_KiB);
c = allocator.allocate(1_KiB);
ASSERT_TRUE(a.has_value());
ASSERT_EQ(a.value(), 0);
ASSERT_TRUE(b.has_value());
ASSERT_EQ(b.value(), 1_KiB);
ASSERT_TRUE(c.has_value());
ASSERT_EQ(c.value(), 2_KiB);
allocator.deallocate(b.value());
aval = allocator.available_addresses(10_KiB);
ASSERT_EQ(aval.size(), 1);
ASSERT_EQ(aval[0].first, 3_KiB); // Start address
ASSERT_EQ(aval[0].second, 1_GiB); // End address
}
1 change: 1 addition & 0 deletions tt_metal/impl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(IMPL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/buffers/semaphore.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernels/kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/allocator/algorithms/free_list.cpp
${CMAKE_CURRENT_SOURCE_DIR}/allocator/algorithms/free_list_opt.cpp
${CMAKE_CURRENT_SOURCE_DIR}/allocator/allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/allocator/basic_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/allocator/l1_banking_allocator.cpp
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/impl/allocator/algorithms/allocator_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class Algorithm {

virtual Statistics get_statistics() const = 0;

virtual void dump_blocks(std::ofstream& out) const = 0;
virtual void dump_blocks(std::ostream& out) const = 0;

virtual void shrink_size(DeviceAddr shrink_size, bool bottom_up = true) = 0;

Expand Down
4 changes: 2 additions & 2 deletions tt_metal/impl/allocator/algorithms/free_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,12 +431,12 @@ Statistics FreeList::get_statistics() const {
return stats;
}

void FreeList::dump_block(const boost::local_shared_ptr<Block>& block, std::ofstream& out) const {
void FreeList::dump_block(const boost::local_shared_ptr<Block>& block, std::ostream& out) const {
auto alloc_status = this->is_allocated(block) ? "Y" : "N";
out << ",,," << (block->address + this->offset_bytes_) << "," << (block->size) << "," << alloc_status << "\n";
}

void FreeList::dump_blocks(std::ofstream& out) const {
void FreeList::dump_blocks(std::ostream& out) const {
out << ",,Blocks:,Address (B),Size (B),Allocated (Y/N)\n";
boost::local_shared_ptr<Block> curr_block = this->block_head_;
while (curr_block != nullptr) {
Expand Down
4 changes: 2 additions & 2 deletions tt_metal/impl/allocator/algorithms/free_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class FreeList : public Algorithm {

Statistics get_statistics() const;

void dump_blocks(std::ofstream& out) const;
void dump_blocks(std::ostream& out) const;

void shrink_size(DeviceAddr shrink_size, bool bottom_up = true);

Expand Down Expand Up @@ -66,7 +66,7 @@ class FreeList : public Algorithm {
boost::local_shared_ptr<Block> next_free = nullptr;
};

void dump_block(const boost::local_shared_ptr<Block>& block, std::ofstream& out) const;
void dump_block(const boost::local_shared_ptr<Block>& block, std::ostream& out) const;

bool is_allocated(const boost::local_shared_ptr<Block>& block) const;

Expand Down
Loading

0 comments on commit 0057eef

Please sign in to comment.