From a0402ca7fdcb5d36922108eb1af9f97b255e55b5 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Fri, 18 Oct 2024 21:31:35 -0500 Subject: [PATCH] Improve: Faster concatenation This commit deprecates `string.h` function usage in th ePython binding implementation in favor of faster StringZilla operations. --- python/lib.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/python/lib.c b/python/lib.c index 1130271..b6f5764 100644 --- a/python/lib.c +++ b/python/lib.c @@ -38,7 +38,6 @@ typedef SSIZE_T ssize_t; #include // `errno` #include // `fopen` #include // `rand`, `srand` -#include // `memset`, `memcpy` #include // `time` #include @@ -1034,7 +1033,7 @@ static PyObject *Strs_subscript(Strs *self, PyObject *key) { Py_XDECREF(result); return NULL; } - memcpy(to->parts, from->parts + start, sizeof(sz_string_view_t) * to->count); + sz_copy(to->parts, from->parts + start, sizeof(sz_string_view_t) * to->count); Py_INCREF(to->parent_string); break; } @@ -1365,7 +1364,7 @@ static PyObject *Str_write_to(PyObject *self, PyObject *args, PyObject *kwargs) PyErr_SetString(PyExc_MemoryError, "Unable to allocate memory for the path"); return NULL; } - memcpy(path_buffer, path.start, path.length); + sz_copy(path_buffer, path.start, path.length); path_buffer[path.length] = '\0'; // Unlock the Global Interpreter Lock (GIL) to allow other threads to run @@ -2233,7 +2232,7 @@ static PyObject *Str_translate(PyObject *self, PyObject *args, PyObject *kwargs) PyErr_SetString(PyExc_ValueError, "The look-up table must be exactly 256 bytes long"); return NULL; } - memcpy(&look_up_table[0], look_up_table_str.start, look_up_table_str.length); + sz_copy(&look_up_table[0], look_up_table_str.start, look_up_table_str.length); } else { wrap_current_exception("The look-up table must be string-like or a dictionary"); @@ -2888,8 +2887,8 @@ static PyObject *Str_concat(PyObject *self, PyObject *other) { } // Perform the string concatenation - memcpy(result_str->memory.start, self_str.start, self_str.length); - memcpy(result_str->memory.start + self_str.length, other_str.start, other_str.length); + sz_copy(result_str->memory.start, self_str.start, self_str.length); + sz_copy(result_str->memory.start + self_str.length, other_str.start, other_str.length); return (PyObject *)result_str; } @@ -3142,7 +3141,7 @@ static sz_bool_t Strs_sort_(Strs *self, sz_string_view_t **parts_output, sz_sort // Call our sorting algorithm sz_sequence_t sequence; - memset(&sequence, 0, sizeof(sequence)); + sz_fill(&sequence, sizeof(sequence), 0); sequence.order = (sz_sorted_idx_t *)temporary_memory.start; sequence.count = count; sequence.handle = parts; @@ -3250,7 +3249,7 @@ static PyObject *Strs_order(Strs *self, PyObject *args, PyObject *kwargs) { // return NULL; // } // sz_sorted_idx_t *numpy_data_ptr = (sz_sorted_idx_t *)PyArray_DATA((PyArrayObject *)array); - // memcpy(numpy_data_ptr, order, count * sizeof(sz_sorted_idx_t)); + // sz_copy(numpy_data_ptr, order, count * sizeof(sz_sorted_idx_t)); // // There are compilation issues with NumPy. // Here is an example for `cp312-musllinux_s390x`: https://x.com/ashvardanian/status/1757880762278531447?s=20 @@ -3382,7 +3381,7 @@ char const *export_escaped_unquoted_to_utf8_buffer(char const *cstr, size_t cstr else { *(buffer++) = *(cstr++); } } else if (buffer + rune_length < buffer_end) { - memcpy(buffer, cstr, rune_length); + sz_copy(buffer, cstr, rune_length); buffer += rune_length; cstr += rune_length; } @@ -3412,7 +3411,7 @@ static PyObject *Strs_repr(Strs *self) { char const *const repr_buffer_end = repr_buffer_ptr + 1024; // Start of the array - memcpy(repr_buffer_ptr, "sz.Strs([", 9); + sz_copy(repr_buffer_ptr, "sz.Strs([", 9); repr_buffer_ptr += 9; size_t count = Strs_len(self); @@ -3439,7 +3438,7 @@ static PyObject *Strs_repr(Strs *self) { &did_fit); // If it didn't fit, let's put an ellipsis if (!did_fit) { - memcpy(repr_buffer_ptr, non_fitting_array_tail, non_fitting_array_tail_length); + sz_copy(repr_buffer_ptr, non_fitting_array_tail, non_fitting_array_tail_length); repr_buffer_ptr += non_fitting_array_tail_length; return PyUnicode_FromStringAndSize(repr_buffer, repr_buffer_ptr - repr_buffer); }