Skip to content

Commit

Permalink
ggml : sync ggml-alloc
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Aug 22, 2023
1 parent bcc3d05 commit 9bdb10b
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 9 deletions.
4 changes: 4 additions & 0 deletions include/ggml/ggml-alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ extern "C" {
GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);

// tell the allocator to parse nodes following the order described in the list
// you should call this if your graph are optimized to execute out-of-order
GGML_API void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n);

GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
Expand Down
2 changes: 2 additions & 0 deletions scripts/sync-llama.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

cp -rpv ../llama.cpp/ggml.c src/ggml.c
cp -rpv ../llama.cpp/ggml-alloc.c src/ggml-alloc.c
cp -rpv ../llama.cpp/ggml-cuda.h src/ggml-cuda.h
cp -rpv ../llama.cpp/ggml-cuda.cu src/ggml-cuda.cu
cp -rpv ../llama.cpp/ggml-opencl.h src/ggml-opencl.h
Expand All @@ -9,6 +10,7 @@ cp -rpv ../llama.cpp/ggml-metal.h src/ggml-metal.h
cp -rpv ../llama.cpp/ggml-metal.m src/ggml-metal.m
cp -rpv ../llama.cpp/ggml-metal.metal src/ggml-metal.metal
cp -rpv ../llama.cpp/ggml.h include/ggml/ggml.h
cp -rpv ../llama.cpp/ggml-alloc.h include/ggml/ggml-alloc.h

cp -rpv ../llama.cpp/tests/test-opt.cpp tests/test-opt.cpp
cp -rpv ../llama.cpp/tests/test-grad0.cpp tests/test-grad0.cpp
Expand Down
48 changes: 39 additions & 9 deletions src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ struct ggml_allocr {
struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
size_t max_size;
bool measure;
int parse_seq[GGML_MAX_NODES];
bool has_parse_seq;

#ifdef GGML_ALLOCATOR_DEBUG
struct ggml_tensor * allocated_tensors[1024];
Expand Down Expand Up @@ -111,10 +113,10 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)

size_t max_avail = 0;

// find the best fitting free block
// find the best fitting free block besides the last block
int best_fit_block = -1;
size_t best_fit_size = SIZE_MAX;
for (int i = 0; i < alloc->n_free_blocks; i++) {
for (int i = 0; i < alloc->n_free_blocks - 1; i++) {
struct free_block * block = &alloc->free_blocks[i];
max_avail = MAX(max_avail, block->size);
if (block->size >= size && block->size <= best_fit_size) {
Expand All @@ -126,10 +128,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
AT_PRINTF("block %d\n", best_fit_block);

if (best_fit_block == -1) {
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
__func__, size, max_avail);
GGML_ASSERT(!"not enough space in the buffer");
// the last block is our last resort
struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
if (block->size >= size) {
best_fit_block = alloc->n_free_blocks - 1;
max_avail = MAX(max_avail, block->size);
} else {
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
__func__, size, max_avail);
GGML_ASSERT(!"not enough space in the buffer");
return;
}
}
struct free_block * block = &alloc->free_blocks[best_fit_block];
void * addr = block->addr;
Expand Down Expand Up @@ -229,6 +238,17 @@ static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_t
alloc->n_free_blocks++;
}

void ggml_allocr_set_parse_seq(struct ggml_allocr * alloc, int * list, int n) {
int pos = 0;
for (int i = 0; i < n; i++) {
if (list[i] != -1) {
alloc->parse_seq[pos] = list[i];
pos++;
}
}
alloc->has_parse_seq = true;
}

void ggml_allocr_reset(struct ggml_allocr * alloc) {
alloc->n_free_blocks = 1;
size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
Expand All @@ -248,8 +268,10 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
/*.hash_table = */ {{0}},
/*.max_size = */ 0,
/*.measure = */ false,
/*.parse_seq = */ {0},
/*.has_parse_seq = */ false,
#ifdef GGML_ALLOCATOR_DEBUG
/* .allocated_tensors = */ {0},
/*.allocated_tensors = */ = {0},
#endif
};

Expand All @@ -275,8 +297,10 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
/*.hash_table = */ {{0}},
/*.max_size = */ 0,
/*.measure = */ true,
/*.parse_seq = */ {0},
/*.has_parse_seq = */ false,
#ifdef GGML_ALLOCATOR_DEBUG
/*.allocated_tensors = */ {0},
/*.allocated_tensors = */ = {0},
#endif
};

Expand Down Expand Up @@ -473,7 +497,13 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
allocate_node(alloc, input);
}
}
for (int i = 0; i < gf->n_nodes; i++) {
for (int ind = 0; ind < gf->n_nodes; ind++) {
int i;
if (alloc->has_parse_seq) {
i = alloc->parse_seq[ind];
} else {
i = ind;
}
struct ggml_tensor * node = gf->nodes[i];

// allocate parents (leafs)
Expand Down Expand Up @@ -517,7 +547,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
struct ggml_tensor * view_src = get_view_source(parent);
struct hash_node * view_src_hn = hash_get(ht, view_src);
view_src_hn->n_views -= 1;
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views);
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src->n_children, view_src->n_views);
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
ggml_allocator_free_tensor(alloc, view_src);
}
Expand Down

0 comments on commit 9bdb10b

Please sign in to comment.