Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0). Leave blank to disable. Input is a JSON object, reference here.
-
-
+
+
@@ -15478,14 +15579,26 @@ Current version: 136
-
+
-
Token Bans ?Token Filter ?Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||
-
Token bans may be unavailable.
-
Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||
+
Token filter may be unavailable.
+
Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$|| Note: If you're trying to ban a specific token ID, you should use Logit Bias instead!
From 46c9785bb5f56b398da82753dd48b4cb31b70da3 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 28 Apr 2024 15:49:45 +0800
Subject: [PATCH 06/35] default max ctx to true max ctx if found
---
klite.embd | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/klite.embd b/klite.embd
index bb175f13e3550..5a5bb8a13c193 100644
--- a/klite.embd
+++ b/klite.embd
@@ -3740,7 +3740,7 @@ Current version: 136
passed_ai_warning: false, //used to store AI safety panel acknowledgement state
entersubmit: true, //enter sends the prompt
- max_context_length: 1800,
+ max_context_length: (localflag?2048:1600),
max_length: 140,
auto_ctxlen: true,
auto_genamt: true,
@@ -7427,6 +7427,10 @@ Current version: 136
{
document.getElementById("max_context_length_slide").max = ep_maxctx;
document.getElementById("max_context_length_slide_label").innerText = ep_maxctx;
+ if(localflag && localsettings.max_context_length==2048 && ep_maxctx>2048)
+ {
+ localsettings.max_context_length = ep_maxctx;
+ }
}
}).catch(error => {
console.log("Failed to get true max ctx: " + error);
From 6e472f58e40cd4acf6023e15c75a2700535c5f0b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Sun, 28 Apr 2024 00:18:27 +0000
Subject: [PATCH 07/35] flake.lock: Update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Flake lock file updates:
• Updated input 'nixpkgs':
'github:NixOS/nixpkgs/5c24cf2f0a12ad855f444c30b2421d044120c66f?narHash=sha256-XtTSSIB2DA6tOv%2Bl0FhvfDMiyCmhoRbNB%2B0SeInZkbk%3D' (2024-04-19)
→ 'github:NixOS/nixpkgs/7bb2ccd8cdc44c91edba16c48d2c8f331fb3d856?narHash=sha256-Drmja/f5MRHZCskS6mvzFqxEaZMeciScCTFxWVLqWEY%3D' (2024-04-25)
---
flake.lock | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/flake.lock b/flake.lock
index 9c1b0af3797a7..b738da7c69455 100644
--- a/flake.lock
+++ b/flake.lock
@@ -20,11 +20,11 @@
},
"nixpkgs": {
"locked": {
- "lastModified": 1713537308,
- "narHash": "sha256-XtTSSIB2DA6tOv+l0FhvfDMiyCmhoRbNB+0SeInZkbk=",
+ "lastModified": 1714076141,
+ "narHash": "sha256-Drmja/f5MRHZCskS6mvzFqxEaZMeciScCTFxWVLqWEY=",
"owner": "NixOS",
"repo": "nixpkgs",
- "rev": "5c24cf2f0a12ad855f444c30b2421d044120c66f",
+ "rev": "7bb2ccd8cdc44c91edba16c48d2c8f331fb3d856",
"type": "github"
},
"original": {
From ce023f6f2ff34fbe840e32e65d443d2fed7393de Mon Sep 17 00:00:00 2001
From: Neo Zhang <14088817+arthw@users.noreply.github.com>
Date: Sun, 28 Apr 2024 22:40:31 +0800
Subject: [PATCH 08/35] add device version in device list (#6959)
Co-authored-by: arthw <>
---
ggml-sycl.cpp | 18 ++++++++++++------
1 file changed, 12 insertions(+), 6 deletions(-)
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index a9b310243f04f..2b76b3ebd64f7 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -13416,11 +13416,16 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
version += std::to_string(prop.get_minor_version());
device_type = std::regex_replace(device_type, std::regex("ext_oneapi_"), "");
+ std::string name = std::string(prop.get_name());
+ name = std::regex_replace(name, std::regex("\\(R\\)"), "");
+ name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
- fprintf(stderr, "|%2d|%18s|%45s|%10s|%11d|%8d|%7d|%15lu|\n", id, device_type.c_str(),
- prop.get_name(), version.c_str(), prop.get_max_compute_units(),
+ auto global_mem_size = prop.get_global_mem_size()/1000000;
+
+ fprintf(stderr, "|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(),
+ name.c_str(), version.c_str(), prop.get_max_compute_units(),
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
- prop.get_global_mem_size());
+ global_mem_size, device.get_info().c_str());
}
void ggml_backend_sycl_print_sycl_devices() {
@@ -13428,9 +13433,10 @@ void ggml_backend_sycl_print_sycl_devices() {
int device_count = dpct::dev_mgr::instance().device_count();
std::map DeviceNums;
fprintf(stderr, "found %d SYCL devices:\n", device_count);
- fprintf(stderr, "| | | |Compute |Max compute|Max work|Max sub| |\n");
- fprintf(stderr, "|ID| Device Type| Name|capability|units |group |group |Global mem size|\n");
- fprintf(stderr, "|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|\n");
+ fprintf(stderr, "| | | | |Max | |Max |Global | |\n");
+ fprintf(stderr, "| | | | |compute|Max work|sub |mem | |\n");
+ fprintf(stderr, "|ID| Device Type| Name|Version|units |group |group|size | Driver version|\n");
+ fprintf(stderr, "|--|-------------------|---------------------------------------|-------|-------|--------|-----|-------|---------------------|\n");
for (int id = 0; id < device_count; ++id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id);
sycl::backend backend = device.get_backend();
From 7bb36ccf91b8a2e92b182dd75624f1fd7cb205ac Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen
Date: Sun, 28 Apr 2024 17:36:18 +0200
Subject: [PATCH 09/35] gguf : enforce that tensor names are unique (#6905)
* not allow adding duplicated tensor name
* no duplicated tensor while reading gguf
* typo
* throw exception inside llama_model_loader
Co-authored-by: slaren
---------
Co-authored-by: slaren
---
ggml.c | 12 ++++++++++++
gguf-py/gguf/gguf_reader.py | 8 +++++++-
gguf-py/gguf/gguf_writer.py | 5 +++++
llama.cpp | 8 ++++++++
4 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/ggml.c b/ggml.c
index 34eef23fcf90f..cb273061c5c53 100644
--- a/ggml.c
+++ b/ggml.c
@@ -20819,6 +20819,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// TODO: return an error instead of crashing with GGML_ASSERT
gguf_tensor_info_sanitize(info);
+ // make sure there is no duplicated tensor names
+ for (uint64_t j = 0; j < i; ++j) {
+ if (strcmp(info->name.data, ctx->infos[j].name.data) == 0) {
+ fprintf(stderr, "%s: duplicated tensor name %s\n", __func__, info->name.data);
+ ok = false;
+ }
+ }
+
if (!ok) {
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
fclose(file);
@@ -21355,6 +21363,10 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
void gguf_add_tensor(
struct gguf_context * ctx,
const struct ggml_tensor * tensor) {
+ if (gguf_find_tensor(ctx, tensor->name) != -1) {
+ GGML_ASSERT(false && "duplicated tensor name");
+ }
+
const int idx = ctx->header.n_tensors;
ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
index 33afac552ca75..48ef6d4ae45df 100644
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -234,8 +234,14 @@ def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[Reader
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
tensors = []
+ tensor_names = set() # keep track of name to prevent duplicated tensors
for field in fields:
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
+ # check if there's any tensor having same name already in the list
+ tensor_name = str(bytes(name_data), encoding = 'utf-8')
+ if tensor_name in tensor_names:
+ raise ValueError(f'Found duplicated tensor with name {tensor_name}')
+ tensor_names.add(tensor_name)
ggml_type = GGMLQuantizationType(raw_dtype[0])
n_elems = np.prod(dims)
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
@@ -267,7 +273,7 @@ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
item_count = n_bytes
item_type = np.uint8
tensors.append(ReaderTensor(
- name = str(bytes(name_data), encoding = 'utf-8'),
+ name = tensor_name,
tensor_type = ggml_type,
shape = dims,
n_elements = n_elems,
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index e3dbca454ae05..ec44ac9f3813d 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -63,6 +63,7 @@ def __init__(
self.kv_data_count = 0
self.ti_data = bytearray()
self.ti_data_count = 0
+ self.ti_names = set()
self.use_temp_file = use_temp_file
self.temp_file = None
self.tensors = []
@@ -197,6 +198,10 @@ def add_tensor_info(
if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}')
+ if name in self.ti_names:
+ raise ValueError(f'Duplicated tensor name {name}')
+ self.ti_names.add(name)
+
encoded_name = name.encode("utf8")
self.ti_data += self._pack("Q", len(encoded_name))
self.ti_data += encoded_name
diff --git a/llama.cpp b/llama.cpp
index 49f2b559e965e..3c64622d7c8dc 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3120,9 +3120,17 @@ struct llama_model_loader {
fver = (enum llama_fver) gguf_get_version(meta);
+ std::set tensor_names;
for (auto & w : weights) {
n_elements += ggml_nelements(w.tensor);
n_bytes += ggml_nbytes(w.tensor);
+ // make sure there is no duplicated tensor names
+ const std::string name(w.tensor->name);
+ auto found = tensor_names.find(name);
+ if (found != tensor_names.end()) {
+ throw std::runtime_error(format("invalid model: tensor '%s' is duplicated", w.tensor->name));
+ }
+ tensor_names.insert(name);
}
LLAMA_LOG_INFO("%s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n",
From e00b4a8f816ebc45b98a46e5f5231359b9a017e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?DAN=E2=84=A2?=
Date: Sun, 28 Apr 2024 18:38:44 -0400
Subject: [PATCH 10/35] Fix more int overflow during quant (PPL/CUDA). (#6563)
* Fix more int overflow during quant.
* Fix some more int overflow in softmax.
* Revert back to int64_t.
---
ggml-cuda/convert.cu | 168 +++++++++++++++++++++----------------------
ggml-cuda/softmax.cu | 8 +--
2 files changed, 88 insertions(+), 88 deletions(-)
diff --git a/ggml-cuda/convert.cu b/ggml-cuda/convert.cu
index b15e3578267b3..75e50c9856123 100644
--- a/ggml-cuda/convert.cu
+++ b/ggml-cuda/convert.cu
@@ -5,16 +5,16 @@
template
static __global__ void dequantize_block(const void * __restrict__ vx, dst_t * __restrict__ y, const int64_t k) {
- const int64_t i = 2*(blockDim.x*blockIdx.x + threadIdx.x);
+ const int64_t i = (int64_t)2*(blockDim.x*blockIdx.x + threadIdx.x);
if (i >= k) {
return;
}
const int64_t ib = i/qk; // block index
- const int iqs = (i%qk)/qr; // quant index
- const int iybs = i - i%qk; // y block start index
- const int y_offset = qr == 1 ? 1 : qk/2;
+ const int64_t iqs = (i%qk)/qr; // quant index
+ const int64_t iybs = i - i%qk; // y block start index
+ const int64_t y_offset = qr == 1 ? 1 : qk/2;
// dequantize
dfloat2 v;
@@ -29,7 +29,7 @@ static __global__ void dequantize_block_q8_0_f16(const void * __restrict__ vx, h
#if __CUDA_ARCH__ >= CC_PASCAL
constexpr int nint = CUDA_Q8_0_NE_ALIGN/sizeof(int) + WARP_SIZE;
- const int i0 = CUDA_Q8_0_NE_ALIGN*blockIdx.x;
+ const int64_t i0 = CUDA_Q8_0_NE_ALIGN*blockIdx.x;
const int * x0 = ((int *) vx) + blockIdx.x * nint;
half2 * y2 = (half2 *) (y + i0);
@@ -73,9 +73,9 @@ static __global__ void dequantize_block_q4_0(const void * __restrict__ vx, dst_t
const int64_t i = blockIdx.x;
// assume 32 threads
- const int tid = threadIdx.x;
- const int il = tid/8;
- const int ir = tid%8;
+ const int64_t tid = threadIdx.x;
+ const int64_t il = tid/8;
+ const int64_t ir = tid%8;
const int64_t ib = 8*i + ir;
if (ib >= nb32) {
return;
@@ -101,9 +101,9 @@ static __global__ void dequantize_block_q4_1(const void * __restrict__ vx, dst_t
const int64_t i = blockIdx.x;
// assume 32 threads
- const int tid = threadIdx.x;
- const int il = tid/8;
- const int ir = tid%8;
+ const int64_t tid = threadIdx.x;
+ const int64_t il = tid/8;
+ const int64_t ir = tid%8;
const int64_t ib = 8*i + ir;
if (ib >= nb32) {
return;
@@ -127,14 +127,14 @@ static __global__ void dequantize_block_q4_1(const void * __restrict__ vx, dst_t
template
static __global__ void dequantize_block_q2_K(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_q2_K * x = (const block_q2_K *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int n = tid/32;
- const int l = tid - 32*n;
- const int is = 8*n + l/16;
+ const int64_t n = tid/32;
+ const int64_t l = tid - 32*n;
+ const int64_t is = 8*n + l/16;
const uint8_t q = x[i].qs[32*n + l];
dst_t * y = yy + i*QK_K + 128*n;
@@ -146,8 +146,8 @@ static __global__ void dequantize_block_q2_K(const void * __restrict__ vx, dst_t
y[l+64] = dall * (x[i].scales[is+4] & 0xF) * ((q >> 4) & 3) - dmin * (x[i].scales[is+4] >> 4);
y[l+96] = dall * (x[i].scales[is+6] & 0xF) * ((q >> 6) & 3) - dmin * (x[i].scales[is+6] >> 4);
#else
- const int is = tid/16; // 0 or 1
- const int il = tid%16; // 0...15
+ const int64_t is = tid/16; // 0 or 1
+ const int64_t il = tid%16; // 0...15
const uint8_t q = x[i].qs[il] >> (2*is);
dst_t * y = yy + i*QK_K + 16*is + il;
float dall = __low2half(x[i].dm);
@@ -161,19 +161,19 @@ static __global__ void dequantize_block_q2_K(const void * __restrict__ vx, dst_t
template
static __global__ void dequantize_block_q3_K(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_q3_K * x = (const block_q3_K *) vx;
#if QK_K == 256
- const int r = threadIdx.x/4;
- const int tid = r/2;
- const int is0 = r%2;
- const int l0 = 16*is0 + 4*(threadIdx.x%4);
- const int n = tid / 4;
- const int j = tid - 4*n;
+ const int64_t r = threadIdx.x/4;
+ const int64_t tid = r/2;
+ const int64_t is0 = r%2;
+ const int64_t l0 = 16*is0 + 4*(threadIdx.x%4);
+ const int64_t n = tid / 4;
+ const int64_t j = tid - 4*n;
uint8_t m = 1 << (4*n + j);
- int is = 8*n + 2*j + is0;
+ int64_t is = 8*n + 2*j + is0;
int shift = 2*j;
int8_t us = is < 4 ? (x[i].scales[is-0] & 0xF) | (((x[i].scales[is+8] >> 0) & 3) << 4) :
@@ -189,11 +189,11 @@ static __global__ void dequantize_block_q3_K(const void * __restrict__ vx, dst_t
for (int l = l0; l < l0+4; ++l) y[l] = dl * ((int8_t)((q[l] >> shift) & 3) - ((hm[l] & m) ? 0 : 4));
#else
- const int tid = threadIdx.x;
- const int is = tid/16; // 0 or 1
- const int il = tid%16; // 0...15
- const int im = il/8; // 0...1
- const int in = il%8; // 0...7
+ const int64_t tid = threadIdx.x;
+ const int64_t is = tid/16; // 0 or 1
+ const int64_t il = tid%16; // 0...15
+ const int64_t im = il/8; // 0...1
+ const int64_t in = il%8; // 0...7
dst_t * y = yy + i*QK_K + 16*is + il;
@@ -227,15 +227,15 @@ template
static __global__ void dequantize_block_q4_K(const void * __restrict__ vx, dst_t * __restrict__ yy) {
const block_q4_K * x = (const block_q4_K *) vx;
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
#if QK_K == 256
// assume 32 threads
- const int tid = threadIdx.x;
- const int il = tid/8;
- const int ir = tid%8;
- const int is = 2*il;
- const int n = 4;
+ const int64_t tid = threadIdx.x;
+ const int64_t il = tid/8;
+ const int64_t ir = tid%8;
+ const int64_t is = 2*il;
+ const int64_t n = 4;
dst_t * y = yy + i*QK_K + 64*il + n*ir;
@@ -254,7 +254,7 @@ static __global__ void dequantize_block_q4_K(const void * __restrict__ vx, dst_t
y[l +32] = d2 * (q[l] >> 4) - m2;
}
#else
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
const uint8_t * q = x[i].qs;
dst_t * y = yy + i*QK_K;
const float d = (float)x[i].dm[0];
@@ -268,14 +268,14 @@ template
static __global__ void dequantize_block_q5_K(const void * __restrict__ vx, dst_t * __restrict__ yy) {
const block_q5_K * x = (const block_q5_K *) vx;
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
#if QK_K == 256
// assume 64 threads - this is very slightly better than the one below
- const int tid = threadIdx.x;
- const int il = tid/16; // il is in 0...3
- const int ir = tid%16; // ir is in 0...15
- const int is = 2*il; // is is in 0...6
+ const int64_t tid = threadIdx.x;
+ const int64_t il = tid/16; // il is in 0...3
+ const int64_t ir = tid%16; // ir is in 0...15
+ const int64_t is = 2*il; // is is in 0...6
dst_t * y = yy + i*QK_K + 64*il + 2*ir;
@@ -298,11 +298,11 @@ static __global__ void dequantize_block_q5_K(const void * __restrict__ vx, dst_t
y[32] = d2 * ((ql[ 0] >> 4) + (qh[ 0] & hm ? 16 : 0)) - m2;
y[33] = d2 * ((ql[ 1] >> 4) + (qh[ 1] & hm ? 16 : 0)) - m2;
#else
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
const uint8_t q = x[i].qs[tid];
- const int im = tid/8; // 0...3
- const int in = tid%8; // 0...7
- const int is = tid/16; // 0 or 1
+ const int64_t im = tid/8; // 0...3
+ const int64_t in = tid%8; // 0...7
+ const int64_t is = tid/16; // 0 or 1
const uint8_t h = x[i].qh[in] >> im;
const float d = x[i].d;
dst_t * y = yy + i*QK_K + tid;
@@ -359,13 +359,13 @@ static __global__ void dequantize_block_q6_K(const void * __restrict__ vx, dst_t
template
static __global__ void dequantize_block_iq2_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq2_xxs * x = (const block_iq2_xxs *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint16_t * q2 = x[i].qs + 4*ib;
const uint8_t * aux8 = (const uint8_t *)q2;
@@ -383,13 +383,13 @@ static __global__ void dequantize_block_iq2_xxs(const void * __restrict__ vx, ds
template
static __global__ void dequantize_block_iq2_xs(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq2_xs * x = (const block_iq2_xs *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint16_t * q2 = x[i].qs + 4*ib;
const uint8_t * grid = (const uint8_t *)(iq2xs_grid + (q2[il] & 511));
@@ -405,13 +405,13 @@ static __global__ void dequantize_block_iq2_xs(const void * __restrict__ vx, dst
template
static __global__ void dequantize_block_iq2_s(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq2_s * x = (const block_iq2_s *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint8_t * grid = (const uint8_t *)(iq2s_grid + (x[i].qs[4*ib+il] | ((x[i].qh[ib] << (8-2*il)) & 0x300)));
const float d = (float)x[i].d * (0.5f + ((x[i].scales[ib] >> 4*(il/2)) & 0xf)) * 0.25f;
@@ -426,13 +426,13 @@ static __global__ void dequantize_block_iq2_s(const void * __restrict__ vx, dst_
template
static __global__ void dequantize_block_iq3_xxs(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq3_xxs * x = (const block_iq3_xxs *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint8_t * q3 = x[i].qs + 8*ib;
const uint16_t * gas = (const uint16_t *)(x[i].qs + QK_K/4) + 2*ib;
@@ -454,13 +454,13 @@ static __global__ void dequantize_block_iq3_xxs(const void * __restrict__ vx, ds
template
static __global__ void dequantize_block_iq3_s(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq3_s * x = (const block_iq3_s *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint8_t * qs = x[i].qs + 8*ib;
const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + (qs[2*il+0] | ((x[i].qh[ib] << (8-2*il)) & 256)));
@@ -480,13 +480,13 @@ static __global__ void dequantize_block_iq3_s(const void * __restrict__ vx, dst_
template
static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq1_s * x = (const block_iq1_s *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const float delta = x[i].qh[ib] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA;
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 7) + 1);
@@ -506,18 +506,18 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_
template
static __global__ void dequantize_block_iq1_m(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq1_m * x = (const block_iq1_m *) vx;
- const int tid = threadIdx.x;
+ const int64_t tid = threadIdx.x;
#if QK_K == 256
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint16_t * sc = (const uint16_t *)x[i].scales;
iq1m_scale_t scale;
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
- const int ib16 = 2*ib + il/2; // sc[ib16/4] >> 3*(ib16%4) -> sc[ib/2] >> 3*((2*ib+il/2)%4);
+ const int64_t ib16 = 2*ib + il/2; // sc[ib16/4] >> 3*(ib16%4) -> sc[ib/2] >> 3*((2*ib+il/2)%4);
const float d = (float)scale.f16 * (2*((sc[ib16/4] >> 3*(ib16%4)) & 0x7) + 1);
const float delta = x[i].qh[2*ib+il/2] & (0x08 << 4*(il%2)) ? -1 - IQ1M_DELTA : -1 + IQ1M_DELTA;
uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32;
@@ -537,12 +537,12 @@ static __global__ void dequantize_block_iq1_m(const void * __restrict__ vx, dst_
template
static __global__ void dequantize_block_iq4_nl(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq4_nl * x = (const block_iq4_nl *) vx + i*(QK_K/QK4_NL);
- const int tid = threadIdx.x;
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t tid = threadIdx.x;
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 4*il;
const uint8_t * q4 = x[ib].qs + 4*il;
const float d = (float)x[ib].d;
@@ -556,12 +556,12 @@ static __global__ void dequantize_block_iq4_nl(const void * __restrict__ vx, dst
#if QK_K != 64
template
static __global__ void dequantize_block_iq4_xs(const void * __restrict__ vx, dst_t * __restrict__ yy) {
- const int i = blockIdx.x;
+ const int64_t i = blockIdx.x;
const block_iq4_xs * x = (const block_iq4_xs *)vx;
- const int tid = threadIdx.x;
- const int il = tid/8; // 0...3
- const int ib = tid%8; // 0...7
+ const int64_t tid = threadIdx.x;
+ const int64_t il = tid/8; // 0...3
+ const int64_t ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 4*il;
const uint8_t * q4 = x[i].qs + 16*ib + 4*il;
const float d = (float)x[i].d * ((((x[i].scales_l[ib/2] >> 4*(ib%2)) & 0xf) | (((x[i].scales_h >> 2*ib) & 3) << 4)) - 32);
diff --git a/ggml-cuda/softmax.cu b/ggml-cuda/softmax.cu
index 9bda18e581c75..fa8f987cf7c1d 100644
--- a/ggml-cuda/softmax.cu
+++ b/ggml-cuda/softmax.cu
@@ -28,7 +28,7 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f
extern __shared__ float data_soft_max_f32[];
float * buf_iw = data_soft_max_f32; // shared memory buffer for inter-warp communication
// shared memory buffer to cache values between iterations:
- float * vals = vals_smem ? buf_iw + WARP_SIZE : dst + rowx*ncols;
+ float * vals = vals_smem ? buf_iw + WARP_SIZE : dst + (int64_t)rowx*ncols;
float max_val = -INFINITY;
@@ -40,8 +40,8 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f
break;
}
- const int ix = rowx*ncols + col;
- const int iy = rowy*ncols + col;
+ const int64_t ix = (int64_t)rowx*ncols + col;
+ const int64_t iy = (int64_t)rowy*ncols + col;
const float val = x[ix]*scale + (mask ? mask[iy] : 0.0f) + (pos ? slope*pos[col] : 0.0f);
@@ -109,7 +109,7 @@ static __global__ void soft_max_f32(const float * x, const float * mask, const f
return;
}
- const int idst = rowx*ncols + col;
+ const int64_t idst = (int64_t)rowx*ncols + col;
dst[idst] = vals[col] * inv_sum;
}
}
From 35930ab85cdef8723ba34aa7a95a0d6dfe430c60 Mon Sep 17 00:00:00 2001
From: henk717
Date: Mon, 29 Apr 2024 05:25:18 +0200
Subject: [PATCH 11/35] Update README.md (#814)
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 7517e40cef4a6..2066652005919 100644
--- a/README.md
+++ b/README.md
@@ -11,10 +11,10 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
- **[Download the latest .exe release here](https://github.com/LostRuins/koboldcpp/releases/latest)** or clone the git repo.
- Windows binaries are provided in the form of **koboldcpp.exe**, which is a pyinstaller wrapper for a few **.dll** files and **koboldcpp.py**. You can also rebuild it yourself with the provided makefiles and scripts.
- Weights are not included, you can use the official llama.cpp `quantize.exe` to generate them from your official weight files (or download them from other places such as [TheBloke's Huggingface](https://huggingface.co/TheBloke).
-- To run, execute **koboldcpp.exe** or drag and drop your quantized `ggml_model.bin` file onto the .exe, and then connect with Kobold or Kobold Lite. If you're not on windows, then run the script **KoboldCpp.py** after compiling the libraries.
+- To run, simply execute **koboldcpp.exe**.
- Launching with no command line arguments displays a GUI containing a subset of configurable settings. Generally you dont have to change much besides the `Presets` and `GPU Layers`. Read the `--help` for more info about each settings.
- By default, you can connect to http://localhost:5001
-- You can also run it using the command line `koboldcpp.exe [ggml_model.bin] [port]`. For info, please check `koboldcpp.exe --help`
+- You can also run it using the command line. For info, please check `koboldcpp.exe --help`
### Improving Performance
- **(Nvidia Only) GPU Acceleration**: If you're on Windows with an Nvidia GPU you can get CUDA support out of the box using the `--usecublas` flag, make sure you select the correct .exe with CUDA support.
From c4f708a93f1df5e35167f9313e000d381298be7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?=
Date: Mon, 29 Apr 2024 14:36:22 +0200
Subject: [PATCH 12/35] llama : fix typo LAMMAFILE -> LLAMAFILE (#6974)
---
llama.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llama.cpp b/llama.cpp
index 3c64622d7c8dc..30af5991fe8f0 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -17729,9 +17729,9 @@ const char * llama_print_system_info(void) {
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
s += "MATMUL_INT8 = " + std::to_string(ggml_cpu_has_matmul_int8()) + " | ";
#ifdef GGML_USE_LLAMAFILE
- s += "LAMMAFILE = 1 | ";
+ s += "LLAMAFILE = 1 | ";
#else
- s += "LAMMAFILE = 0 | ";
+ s += "LLAMAFILE = 0 | ";
#endif
return s.c_str();
From ca7f29f568803bee4c92d1b3e41c7d721b0dc570 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Przemys=C5=82aw=20Pawe=C5=82czyk?=
Date: Mon, 29 Apr 2024 14:59:47 +0200
Subject: [PATCH 13/35] ci : add building in MSYS2 environments (Windows)
(#6967)
---
.github/workflows/build.yml | 57 +++++++++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 50f76db3c0d46..2d747e688437a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -593,6 +593,63 @@ jobs:
run: |
make swift
+ windows-msys2:
+ runs-on: windows-latest
+
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
+
+ steps:
+ - name: Clone
+ uses: actions/checkout@v4
+
+ - name: Setup ${{ matrix.sys }}
+ uses: msys2/setup-msys2@v2
+ with:
+ update: true
+ msystem: ${{matrix.sys}}
+ install: >-
+ base-devel
+ mingw-w64-${{matrix.env}}-toolchain
+ mingw-w64-${{matrix.env}}-cmake
+ mingw-w64-${{matrix.env}}-openblas
+
+ - name: Build using make
+ shell: msys2 {0}
+ run: |
+ make -j $(nproc)
+
+ - name: Clean after building using make
+ shell: msys2 {0}
+ run: |
+ make clean
+
+ - name: Build using make w/ OpenBLAS
+ shell: msys2 {0}
+ run: |
+ make LLAMA_OPENBLAS=1 -j $(nproc)
+
+ - name: Build using CMake
+ shell: msys2 {0}
+ run: |
+ cmake -B build
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
+ - name: Clean after building using CMake
+ shell: msys2 {0}
+ run: |
+ rm -rf build
+
+ - name: Build using CMake w/ OpenBLAS
+ shell: msys2 {0}
+ run: |
+ cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
windows-latest-cmake:
runs-on: windows-latest
From 577277ffd203b190c3dc2ab3e737946dc432132c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Przemys=C5=82aw=20Pawe=C5=82czyk?=
Date: Mon, 29 Apr 2024 15:08:20 +0200
Subject: [PATCH 14/35] make : change GNU make default CXX from g++ to c++
(#6966)
---
Makefile | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/Makefile b/Makefile
index 9dc35410a2945..e169cae88895a 100644
--- a/Makefile
+++ b/Makefile
@@ -27,6 +27,17 @@ ifndef UNAME_M
UNAME_M := $(shell uname -m)
endif
+# In GNU make default CXX is g++ instead of c++. Let's fix that so that users
+# of non-gcc compilers don't have to provide g++ alias or wrapper.
+DEFCC := cc
+DEFCXX := c++
+ifeq ($(origin CC),default)
+CC := $(DEFCC)
+endif
+ifeq ($(origin CXX),default)
+CXX := $(DEFCXX)
+endif
+
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
ifeq ($(UNAME_S),Darwin)
From 3055a4180557c6cbe29eacc8284c9e070ac10eab Mon Sep 17 00:00:00 2001
From: Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
Date: Mon, 29 Apr 2024 09:34:41 -0400
Subject: [PATCH 15/35] convert : fix conversion of some BERT embedding models
(#6937)
---
convert-hf-to-gguf.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 5763b6664e832..3b9fa264aa22c 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -2482,6 +2482,10 @@ def write_tensors(self):
print(f"Can not map tensor {name!r}")
sys.exit()
+ # convert any unsupported data types to float32
+ if data_torch.dtype not in (torch.float16, torch.float32):
+ data_torch = data_torch.to(torch.float32)
+
data = data_torch.squeeze().numpy()
n_dims = len(data.shape)
new_dtype: type[np.floating[Any]]
From 3f167476b11efa7ab08f6cacdeb8cab0935c1249 Mon Sep 17 00:00:00 2001
From: David Renshaw
Date: Mon, 29 Apr 2024 09:35:45 -0400
Subject: [PATCH 16/35] sampling : use std::random_device{}() for default
random seed (#6962)
---
common/sampling.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/sampling.cpp b/common/sampling.cpp
index f2466550168a7..cc83600d9926e 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -68,7 +68,7 @@ void llama_sampling_reset(llama_sampling_context * ctx) {
void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed) {
if (seed == LLAMA_DEFAULT_SEED) {
- seed = time(NULL);
+ seed = std::random_device{}();
}
ctx->rng.seed(seed);
}
From f4ab2a41476600a98067a9474ea8f9e6db41bcfa Mon Sep 17 00:00:00 2001
From: Georgi Gerganov
Date: Mon, 29 Apr 2024 16:58:41 +0300
Subject: [PATCH 17/35] llama : fix BPE pre-tokenization (#6920)
* merged the changes from deepseeker models to main branch
* Moved regex patterns to unicode.cpp and updated unicode.h
* Moved header files
* Resolved issues
* added and refactored unicode_regex_split and related functions
* Updated/merged the deepseek coder pr
* Refactored code
* Adding unicode regex mappings
* Adding unicode regex function
* Added needed functionality, testing remains
* Fixed issues
* Fixed issue with gpt2 regex custom preprocessor
* unicode : fix? unicode_wstring_to_utf8
* lint : fix whitespaces
* tests : add tokenizer tests for numbers
* unicode : remove redundant headers
* tests : remove and rename tokenizer test scripts
* tests : add sample usage
* gguf-py : reader prints warnings on duplicate keys
* llama : towards llama3 tokenization support (wip)
* unicode : shot in the dark to fix tests on Windows
* unicode : first try custom implementations
* convert : add "tokenizer.ggml.pre" GGUF KV (wip)
* llama : use new pre-tokenizer type
* convert : fix pre-tokenizer type writing
* lint : fix
* make : add test-tokenizer-0-llama-v3
* wip
* models : add llama v3 vocab file
* llama : adapt punctuation regex + add llama 3 regex
* minor
* unicode : set bomb
* unicode : set bomb
* unicode : always use std::wregex
* unicode : support \p{N}, \p{L} and \p{P} natively
* unicode : try fix windows
* unicode : category support via std::regex
* unicode : clean-up
* unicode : simplify
* convert : add convert-hf-to-gguf-update.py
ggml-ci
* lint : update
* convert : add falcon
ggml-ci
* unicode : normalize signatures
* lint : fix
* lint : fix
* convert : remove unused functions
* convert : add comments
* convert : exercise contractions
ggml-ci
* lint : fix
* cmake : refactor test targets
* tests : refactor vocab tests
ggml-ci
* tests : add more vocabs and tests
ggml-ci
* unicode : cleanup
* scripts : ignore new update script in check-requirements.sh
* models : add phi-3, mpt, gpt-2, starcoder
* tests : disable obsolete
ggml-ci
* tests : use faster bpe test
ggml-ci
* llama : more prominent warning for old BPE models
* tests : disable test-tokenizer-1-bpe due to slowness
ggml-ci
---------
Co-authored-by: Jaggzh
Co-authored-by: Kazim Abrar Mahi
---
.github/workflows/python-lint.yml | 2 +-
.gitignore | 15 +
Makefile | 44 +-
common/common.cpp | 12 +
common/common.h | 1 +
convert-hf-to-gguf-update.py | 275 ++++++++++
convert-hf-to-gguf.py | 93 +++-
convert-llama-ggml-to-gguf.py | 1 +
convert-persimmon-to-gguf.py | 1 +
gguf-py/gguf/constants.py | 2 +
gguf-py/gguf/gguf_reader.py | 9 +-
gguf-py/gguf/gguf_writer.py | 3 +
llama.cpp | 311 ++++++-----
llama.h | 12 +
models/ggml-vocab-bert-bge.gguf | Bin 0 -> 627549 bytes
models/ggml-vocab-bert-bge.gguf.inp | 102 ++++
models/ggml-vocab-bert-bge.gguf.out | 41 ++
models/ggml-vocab-deepseek-coder.gguf | Bin 0 -> 1156067 bytes
models/ggml-vocab-deepseek-coder.gguf.inp | 102 ++++
models/ggml-vocab-deepseek-coder.gguf.out | 41 ++
models/ggml-vocab-deepseek-llm.gguf | Bin 0 -> 3970167 bytes
models/ggml-vocab-deepseek-llm.gguf.inp | 102 ++++
models/ggml-vocab-deepseek-llm.gguf.out | 41 ++
models/ggml-vocab-falcon.gguf | Bin 2547782 -> 2287728 bytes
models/ggml-vocab-falcon.gguf.inp | 102 ++++
models/ggml-vocab-falcon.gguf.out | 41 ++
models/ggml-vocab-gpt-2.gguf | Bin 0 -> 1766807 bytes
models/ggml-vocab-gpt-2.gguf.inp | 102 ++++
models/ggml-vocab-gpt-2.gguf.out | 41 ++
models/ggml-vocab-llama-bpe.gguf | Bin 0 -> 7818140 bytes
models/ggml-vocab-llama-bpe.gguf.inp | 102 ++++
models/ggml-vocab-llama-bpe.gguf.out | 41 ++
...b-llama.gguf => ggml-vocab-llama-spm.gguf} | Bin 723676 -> 723869 bytes
models/ggml-vocab-llama-spm.gguf.inp | 102 ++++
models/ggml-vocab-llama-spm.gguf.out | 41 ++
models/ggml-vocab-mpt.gguf | Bin 1771406 -> 1771393 bytes
models/ggml-vocab-mpt.gguf.inp | 102 ++++
models/ggml-vocab-mpt.gguf.out | 41 ++
models/ggml-vocab-phi-3.gguf | Bin 0 -> 725945 bytes
models/ggml-vocab-phi-3.gguf.inp | 102 ++++
models/ggml-vocab-phi-3.gguf.out | 41 ++
...-3b-4e1t.gguf => ggml-vocab-stablelm.gguf} | Bin
models/ggml-vocab-starcoder.gguf | Bin 1719281 -> 1719346 bytes
models/ggml-vocab-starcoder.gguf.inp | 102 ++++
models/ggml-vocab-starcoder.gguf.out | 41 ++
requirements.txt | 1 +
...requirements-convert-hf-to-gguf-update.txt | 3 +
scripts/check-requirements.sh | 5 +
tests/CMakeLists.txt | 128 +++--
...er-0-falcon.py => test-tokenizer-0-bpe.py} | 37 +-
tests/test-tokenizer-0-falcon.cpp | 187 -------
tests/test-tokenizer-0-llama.cpp | 190 -------
...zer-0-llama.py => test-tokenizer-0-spm.py} | 22 +
tests/test-tokenizer-0.cpp | 271 ++++++++++
...r-1-llama.cpp => test-tokenizer-1-spm.cpp} | 2 +-
unicode-data.cpp | 2 +-
unicode-data.h | 4 +-
unicode.cpp | 487 ++++++++++++++++--
unicode.h | 3 +-
59 files changed, 2909 insertions(+), 644 deletions(-)
create mode 100644 convert-hf-to-gguf-update.py
create mode 100644 models/ggml-vocab-bert-bge.gguf
create mode 100644 models/ggml-vocab-bert-bge.gguf.inp
create mode 100644 models/ggml-vocab-bert-bge.gguf.out
create mode 100644 models/ggml-vocab-deepseek-coder.gguf
create mode 100644 models/ggml-vocab-deepseek-coder.gguf.inp
create mode 100644 models/ggml-vocab-deepseek-coder.gguf.out
create mode 100644 models/ggml-vocab-deepseek-llm.gguf
create mode 100644 models/ggml-vocab-deepseek-llm.gguf.inp
create mode 100644 models/ggml-vocab-deepseek-llm.gguf.out
create mode 100644 models/ggml-vocab-falcon.gguf.inp
create mode 100644 models/ggml-vocab-falcon.gguf.out
create mode 100644 models/ggml-vocab-gpt-2.gguf
create mode 100644 models/ggml-vocab-gpt-2.gguf.inp
create mode 100644 models/ggml-vocab-gpt-2.gguf.out
create mode 100644 models/ggml-vocab-llama-bpe.gguf
create mode 100644 models/ggml-vocab-llama-bpe.gguf.inp
create mode 100644 models/ggml-vocab-llama-bpe.gguf.out
rename models/{ggml-vocab-llama.gguf => ggml-vocab-llama-spm.gguf} (99%)
create mode 100644 models/ggml-vocab-llama-spm.gguf.inp
create mode 100644 models/ggml-vocab-llama-spm.gguf.out
create mode 100644 models/ggml-vocab-mpt.gguf.inp
create mode 100644 models/ggml-vocab-mpt.gguf.out
create mode 100644 models/ggml-vocab-phi-3.gguf
create mode 100644 models/ggml-vocab-phi-3.gguf.inp
create mode 100644 models/ggml-vocab-phi-3.gguf.out
rename models/{ggml-vocab-stablelm-3b-4e1t.gguf => ggml-vocab-stablelm.gguf} (100%)
create mode 100644 models/ggml-vocab-starcoder.gguf.inp
create mode 100644 models/ggml-vocab-starcoder.gguf.out
create mode 100644 requirements/requirements-convert-hf-to-gguf-update.txt
rename tests/{test-tokenizer-0-falcon.py => test-tokenizer-0-bpe.py} (59%)
delete mode 100644 tests/test-tokenizer-0-falcon.cpp
delete mode 100644 tests/test-tokenizer-0-llama.cpp
rename tests/{test-tokenizer-0-llama.py => test-tokenizer-0-spm.py} (86%)
create mode 100644 tests/test-tokenizer-0.cpp
rename tests/{test-tokenizer-1-llama.cpp => test-tokenizer-1-spm.cpp} (98%)
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
index f4ae654959be3..5be17f1576ebb 100644
--- a/.github/workflows/python-lint.yml
+++ b/.github/workflows/python-lint.yml
@@ -21,4 +21,4 @@ jobs:
uses: py-actions/flake8@v2
with:
ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503"
- exclude: "examples/*,examples/*/**,*/**/__init__.py"
+ exclude: "examples/*,examples/*/**,*/**/__init__.py,convert-hf-to-gguf-update.py"
diff --git a/.gitignore b/.gitignore
index 5c14900844435..60f9d1f8d04b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -108,3 +108,18 @@ examples/server/*.mjs.hpp
poetry.lock
poetry.toml
nppBackup
+
+# Test binaries
+/tests/test-grammar-parser
+/tests/test-llama-grammar
+/tests/test-double-float
+/tests/test-grad0
+/tests/test-opt
+/tests/test-quantize-fns
+/tests/test-quantize-perf
+/tests/test-sampling
+/tests/test-tokenizer-0
+/tests/test-tokenizer-1-spm
+/tests/test-tokenizer-1-bpe
+/tests/test-rope
+/tests/test-backend-ops
diff --git a/Makefile b/Makefile
index e169cae88895a..0a73f2a582a20 100644
--- a/Makefile
+++ b/Makefile
@@ -6,11 +6,23 @@ BUILD_TARGETS = \
# Binaries only useful for tests
TEST_TARGETS = \
- tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
- tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
- tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope \
- tests/test-backend-ops tests/test-model-load-cancel tests/test-autorelease \
- tests/test-json-schema-to-grammar tests/test-grammar-integration
+ tests/test-autorelease \
+ tests/test-backend-ops \
+ tests/test-double-float \
+ tests/test-grad0 \
+ tests/test-grammar-integration \
+ tests/test-grammar-parser \
+ tests/test-json-schema-to-grammar \
+ tests/test-llama-grammar \
+ tests/test-model-load-cancel \
+ tests/test-opt \
+ tests/test-quantize-fns \
+ tests/test-quantize-perf \
+ tests/test-rope \
+ tests/test-sampling \
+ tests/test-tokenizer-0 \
+ tests/test-tokenizer-1-bpe \
+ tests/test-tokenizer-1-spm
# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -60,11 +72,17 @@ default: $(BUILD_TARGETS)
test: $(TEST_TARGETS)
@failures=0; \
for test_target in $(TEST_TARGETS); do \
- if [ "$$test_target" = "tests/test-tokenizer-0-llama" ]; then \
- ./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
- elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
+ if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
- elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-coder.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-deepseek-llm.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
+ ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
+ elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
continue; \
elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
continue; \
@@ -982,11 +1000,7 @@ tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-
-tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
+tests/test-tokenizer-0: tests/test-tokenizer-0.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -994,7 +1008,7 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
-tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
+tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp ggml.o llama.o $(COMMON_DEPS) console.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
diff --git a/common/common.cpp b/common/common.cpp
index d42fa131de3f6..aa494291dd52b 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1693,6 +1693,18 @@ std::vector string_split(std::string input, char separator) {
return parts;
}
+std::string string_strip(const std::string & str) {
+ size_t start = 0;
+ size_t end = str.size();
+ while (start < end && std::isspace(str[start])) {
+ start++;
+ }
+ while (end > start && std::isspace(str[end - 1])) {
+ end--;
+ }
+ return str.substr(start, end - start);
+}
+
std::vector sampler_types_from_names(const std::vector & names, bool allow_alt_names) {
std::unordered_map sampler_canonical_name_map {
{"top_k", llama_sampler_type::TOP_K},
diff --git a/common/common.h b/common/common.h
index 96a28a6ce1b7f..eea63a1142a4d 100644
--- a/common/common.h
+++ b/common/common.h
@@ -196,6 +196,7 @@ bool validate_file_name(const std::string & filename);
std::vector sampler_types_from_names(const std::vector & names, bool allow_alt_names);
std::vector sampler_types_from_chars(const std::string & names_string);
std::vector string_split(std::string input, char separator);
+std::string string_strip(const std::string & str);
std::string sampler_type_to_name_string(llama_sampler_type sampler_type);
//
diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py
new file mode 100644
index 0000000000000..1c559c3f693be
--- /dev/null
+++ b/convert-hf-to-gguf-update.py
@@ -0,0 +1,275 @@
+# This script downloads the tokenizer models of the specified models from Huggingface and
+# generates the get_vocab_base_pre() function for convert-hf-to-gguf.py
+#
+# This is necessary in order to analyze the type of pre-tokenizer used by the model and
+# provide the necessary information to llama.cpp via the GGUF header in order to implement
+# the same pre-tokenizer.
+#
+# ref: https://github.com/ggerganov/llama.cpp/pull/6920
+#
+# Instructions:
+#
+# - Add a new model to the "models" list
+# - Run the script with your huggingface token:
+#
+# python3 convert-hf-to-gguf-update.py
+#
+# - Copy-paste the generated get_vocab_base_pre() function into convert-hf-to-gguf.py
+# - Update llama.cpp with the new pre-tokenizer if necessary
+#
+# TODO: generate tokenizer tests for llama.cpp
+# TODO: automate the update of convert-hf-to-gguf.py
+#
+
+import os
+import requests
+import sys
+import json
+
+from hashlib import sha256
+from enum import IntEnum, auto
+
+class TOKENIZER_TYPE(IntEnum):
+ SPM = auto()
+ BPE = auto()
+ WPM = auto()
+
+# TODO: this string has to exercise as much pre-tokenizer functionality as possible
+# will be updated with time - contributions welcome
+chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
+
+if len(sys.argv) == 2:
+ token = sys.argv[1]
+else:
+ print("Usage: python convert-hf-to-gguf-update.py ")
+ sys.exit(1)
+
+# TODO: add models here, base models preferred
+models = [
+ { "name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
+ { "name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
+ { "name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
+ { "name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
+ { "name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
+ { "name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
+ { "name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
+ { "name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
+ { "name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
+ { "name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
+ ]
+
+# make directory "models/tokenizers" if it doesn't exist
+if not os.path.exists("models/tokenizers"):
+ os.makedirs("models/tokenizers")
+
+def download_file_with_auth(url, token, save_path):
+ headers = {"Authorization": f"Bearer {token}"}
+ response = requests.get(url, headers=headers)
+ if response.status_code == 200:
+ with open(save_path, 'wb') as f:
+ f.write(response.content)
+ print(f"File {save_path} downloaded successfully")
+ else:
+ print(f"Failed to download file. Status code: {response.status_code}")
+
+# download the tokenizer models
+for model in models:
+ name = model["name"]
+ repo = model["repo"]
+ tokt = model["tokt"]
+
+ if not os.path.exists(f"models/tokenizers/{name}"):
+ os.makedirs(f"models/tokenizers/{name}")
+ else:
+ print(f"Directory models/tokenizers/{name} already exists - skipping")
+ continue
+
+ print(f"Downloading {name} to models/tokenizers/{name}")
+
+ url = f"{repo}/raw/main/config.json"
+ save_path = f"models/tokenizers/{name}/config.json"
+ download_file_with_auth(url, token, save_path)
+
+ url = f"{repo}/raw/main/tokenizer.json"
+ save_path = f"models/tokenizers/{name}/tokenizer.json"
+ download_file_with_auth(url, token, save_path)
+
+ if tokt == TOKENIZER_TYPE.SPM:
+ url = f"{repo}/resolve/main/tokenizer.model"
+ save_path = f"models/tokenizers/{name}/tokenizer.model"
+ download_file_with_auth(url, token, save_path)
+
+ url = f"{repo}/raw/main/tokenizer_config.json"
+ save_path = f"models/tokenizers/{name}/tokenizer_config.json"
+ download_file_with_auth(url, token, save_path)
+
+# generate the source code for the convert-hf-to-gguf.py:get_vocab_base_pre() function:
+# TODO: auto-update convert-hf-to-gguf.py with the generated function
+
+src_ifs = ""
+for model in models:
+ name = model["name"]
+ tokt = model["tokt"]
+
+ if tokt == TOKENIZER_TYPE.SPM:
+ continue
+
+ # create the tokenizer
+ from transformers import AutoTokenizer
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+
+ chktok = tokenizer.encode(chktxt)
+ chkhsh = sha256(str(chktok).encode()).hexdigest()
+
+ print(f"model: {name}")
+ print(f"tokt: {tokt}")
+ print(f"repo: {model['repo']}")
+ print(f"chktok: {chktok}")
+ print(f"chkhsh: {chkhsh}")
+
+ # print the "pre_tokenizer" content from the tokenizer.json
+ with open(f"models/tokenizers/{name}/tokenizer.json", "r") as f:
+ cfg = json.load(f)
+ pre_tokenizer = cfg["pre_tokenizer"]
+ print("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
+
+ print(f"\n")
+
+ src_ifs += f" if chkhsh == \"{chkhsh}\":\n"
+ src_ifs += f" # ref: {model['repo']}\n"
+ src_ifs += f" res = \"{name}\"\n"
+
+src_func = ""
+src_func += " def get_vocab_base_pre(self, tokenizer) -> str:\n"
+src_func += " # encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that\n"
+src_func += " # is specific for the BPE pre-tokenizer used by the model\n"
+src_func += " # we will use this unique identifier to write a \"tokenizer.ggml.pre\" entry in the GGUF file which we can\n"
+src_func += " # use in llama.cpp to implement the same pre-tokenizer\n"
+src_func += "\n"
+src_func += f" chktxt = {repr(chktxt)}\n"
+src_func += "\n"
+src_func += " chktok = tokenizer.encode(chktxt)\n"
+src_func += " chkhsh = sha256(str(chktok).encode()).hexdigest()\n"
+src_func += "\n"
+src_func += " print(f\"chktok: {chktok}\")\n"
+src_func += " print(f\"chkhsh: {chkhsh}\")\n"
+src_func += "\n"
+src_func += " res = None\n"
+src_func += "\n"
+src_func += " # NOTE: if you get an error here, you need to add the model to the if-elif chain below\n"
+src_func += " # don't do this manually - use the convert-hf-to-gguf-update.py script!\n"
+src_func += f"{src_ifs}\n"
+src_func += " if res is None:\n"
+src_func += " print(\"\\n\")\n"
+src_func += " print(\"**************************************************************************************\")\n"
+src_func += " print(\"** WARNING: The BPE pre-tokenizer was not recognized!\")\n"
+src_func += " print(\"** This means that it was not added yet or you are using an older version.\")\n"
+src_func += " print(\"** Check convert-hf-to-gguf-update.py and update it accordingly.\")\n"
+src_func += " print(\"**\")\n"
+src_func += " print(f\"** chkhsh: {chkhsh}\")\n"
+src_func += " print(\"**************************************************************************************\")\n"
+src_func += " print(\"\\n\")\n"
+src_func += " raise NotImplementedError(\"BPE pre-tokenizer was not recognized - update get_vocab_base_pre()\")\n"
+src_func += "\n"
+src_func += " print(f\"tokenizer.ggml.pre: {res}\")\n"
+src_func += " print(f\"chkhsh: {chkhsh}\")\n"
+src_func += "\n"
+src_func += " return res\n"
+
+print(src_func)
+
+print("\n")
+print("!!! Copy-paste the function above into convert-hf-to-gguf.py !!!")
+print("\n")
+
+# generate tests for each tokenizer model
+
+tests = [
+ "",
+ " ",
+ " ",
+ " ",
+ "\t",
+ "\n",
+ "\n\n",
+ "\n\n\n",
+ "\t\n",
+ "Hello world",
+ " Hello world",
+ "Hello World",
+ " Hello World",
+ " Hello World!",
+ "Hello, world!",
+ " Hello, world!",
+ " this is 🦙.cpp",
+ "w048 7tuijk dsdfhu",
+ "нещо на Български",
+ "កាន់តែពិសេសអាចខលចេញ",
+ "🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)",
+ "Hello",
+ " Hello",
+ " Hello",
+ " Hello",
+ " Hello",
+ " Hello\n Hello",
+ " (",
+ "\n =",
+ "' era",
+ "Hello, y'all! How are you 😁 ?我想在apple工作1314151天~",
+ "3",
+ "33",
+ "333",
+ "3333",
+ "33333",
+ "333333",
+ "3333333",
+ "33333333",
+ "333333333",
+ chktxt,
+]
+
+# write the tests to ./models/ggml-vocab-{name}.gguf.inp
+# the format is:
+#
+# test0
+# __ggml_vocab_test__
+# test1
+# __ggml_vocab_test__
+# ...
+#
+
+# with each model, encode all tests and write the results in ./models/ggml-vocab-{name}.gguf.out
+# for each test, write the resulting tokens on a separate line
+
+for model in models:
+ name = model["name"]
+ tokt = model["tokt"]
+
+ # create the tokenizer
+ from transformers import AutoTokenizer
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
+
+ with open(f"models/ggml-vocab-{name}.gguf.inp", "w") as f:
+ for text in tests:
+ f.write(f"{text}")
+ f.write("\n__ggml_vocab_test__\n")
+
+ with open(f"models/ggml-vocab-{name}.gguf.out", "w") as f:
+ for text in tests:
+ res = tokenizer.encode(text, add_special_tokens=False)
+ for r in res:
+ f.write(f" {r}")
+ f.write("\n")
+
+ print(f"Tests for {name} written in ./models/ggml-vocab-{name}.gguf.*")
+
+# generate commands for creating vocab files
+
+print("\nRun the following commands to generate the vocab files for testing:\n")
+
+for model in models:
+ name = model["name"]
+
+ print(f"python3 convert-hf-to-gguf.py models/tokenizers/{name}/ --outfile models/ggml-vocab-{name}.gguf --vocab-only")
+
+print("\n")
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 3b9fa264aa22c..d1b8cef11277d 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -11,6 +11,7 @@
from abc import ABC, abstractmethod
from enum import IntEnum
from pathlib import Path
+from hashlib import sha256
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterator, Sequence, TypeVar, cast
import numpy as np
@@ -229,7 +230,7 @@ def _get_part_names(self):
return (f"pytorch_model-{n:05}-of-{self.num_parts:05}.bin" for n in range(1, self.num_parts + 1))
# used for GPT-2 BPE and WordPiece vocabs
- def get_basic_vocab(self) -> tuple[list[str], list[int]]:
+ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
tokens: list[str] = []
toktypes: list[int] = []
@@ -238,6 +239,8 @@ def get_basic_vocab(self) -> tuple[list[str], list[int]]:
vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
assert max(tokenizer.vocab.values()) < vocab_size
+ tokpre = self.get_vocab_base_pre(tokenizer)
+
reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
added_vocab = tokenizer.get_added_vocab()
@@ -255,11 +258,75 @@ def get_basic_vocab(self) -> tuple[list[str], list[int]]:
tokens.append(reverse_vocab[i])
toktypes.append(gguf.TokenType.NORMAL)
- return tokens, toktypes
+ return tokens, toktypes, tokpre
+
+ # NOTE: this function is generated by convert-hf-to-gguf-update.py
+ # do not modify it manually!
+ # ref: https://github.com/ggerganov/llama.cpp/pull/6920
+ def get_vocab_base_pre(self, tokenizer) -> str:
+ # encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that
+ # is specific for the BPE pre-tokenizer used by the model
+ # we will use this unique identifier to write a "tokenizer.ggml.pre" entry in the GGUF file which we can
+ # use in llama.cpp to implement the same pre-tokenizer
+
+ chktxt = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶\u200d🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````""""......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
+
+ chktok = tokenizer.encode(chktxt)
+ chkhsh = sha256(str(chktok).encode()).hexdigest()
+
+ print(f"chktok: {chktok}")
+ print(f"chkhsh: {chkhsh}")
+
+ res = None
+
+ # NOTE: if you get an error here, you need to add the model to the if-elif chain below
+ # don't do this manually - use the convert-hf-to-gguf-update.py script!
+ if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5":
+ # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
+ res = "llama-bpe"
+ if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754":
+ # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
+ res = "deepseek-llm"
+ if chkhsh == "347715f544604f9118bb75ed199f68779f423cabb20db6de6f31b908d04d7821":
+ # ref: https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base
+ res = "deepseek-coder"
+ if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed":
+ # ref: https://huggingface.co/tiiuae/falcon-7b
+ res = "falcon"
+ if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
+ # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
+ res = "bert-bge"
+ if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
+ # ref: https://huggingface.co/mosaicml/mpt-7b
+ res = "mpt"
+ if chkhsh == "35d91631860c815f952d711435f48d356ebac988362536bed955d43bfa436e34":
+ # ref: https://huggingface.co/bigcode/starcoder2-3b
+ res = "starcoder"
+ if chkhsh == "3ce83efda5659b07b1ad37ca97ca5797ea4285d9b9ab0dc679e4a720c9da7454":
+ # ref: https://huggingface.co/openai-community/gpt2
+ res = "gpt-2"
+
+ if res is None:
+ print("\n")
+ print("**************************************************************************************")
+ print("** WARNING: The BPE pre-tokenizer was not recognized!")
+ print("** This means that it was not added yet or you are using an older version.")
+ print("** Check convert-hf-to-gguf-update.py and update it accordingly.")
+ print("**")
+ print(f"** chkhsh: {chkhsh}")
+ print("**************************************************************************************")
+ print("\n")
+ raise NotImplementedError("BPE pre-tokenizer was not recognized - update get_vocab_base_pre()")
+
+ print(f"tokenizer.ggml.pre: {res}")
+ print(f"chkhsh: {chkhsh}")
+
+ return res
def _set_vocab_gpt2(self) -> None:
- tokens, toktypes = self.get_basic_vocab()
+ tokens, toktypes, tokpre = self.get_vocab_base()
self.gguf_writer.add_tokenizer_model("gpt2")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_types(toktypes)
@@ -277,6 +344,8 @@ def _set_vocab_qwen(self):
vocab_size = hparams["vocab_size"]
assert max(tokenizer.get_vocab().values()) < vocab_size
+ tokpre = self.get_vocab_base_pre(tokenizer)
+
merges = []
vocab = {}
mergeable_ranks = tokenizer.mergeable_ranks
@@ -304,6 +373,7 @@ def _set_vocab_qwen(self):
toktypes.append(gguf.TokenType.NORMAL)
self.gguf_writer.add_tokenizer_model("gpt2")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_types(toktypes)
@@ -376,6 +446,7 @@ def _set_vocab_sentencepiece(self):
assert len(tokens) == vocab_size
self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores)
self.gguf_writer.add_token_types(toktypes)
@@ -397,6 +468,7 @@ def _set_vocab_llama_hf(self):
assert len(tokens) == vocab.vocab_size
self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores)
self.gguf_writer.add_token_types(toktypes)
@@ -840,6 +912,7 @@ def set_vocab(self):
toktypes.append(toktype)
self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_types(toktypes)
@@ -1335,6 +1408,11 @@ def set_gguf_parameters(self):
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
self.gguf_writer.add_rope_dimension_count(hparams["hidden_size"] // hparams["num_attention_heads"])
+ if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
+ if self.hparams["rope_scaling"].get("type") == "linear":
+ self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
+ self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
+
# Same as super class, but permuting q_proj, k_proj
def write_tensors(self):
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
@@ -2052,6 +2130,7 @@ def set_vocab(self):
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores)
self.gguf_writer.add_token_types(toktypes)
@@ -2294,6 +2373,7 @@ def set_vocab(self):
toktypes.append(SentencePieceTokenTypes.USER_DEFINED)
self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_tokenizer_pre("default")
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores)
self.gguf_writer.add_token_types(toktypes)
@@ -2443,7 +2523,7 @@ def set_gguf_parameters(self):
self.gguf_writer.add_pooling_type(pooling_type)
def set_vocab(self):
- tokens, toktypes = self.get_basic_vocab()
+ tokens, toktypes, tokpre = self.get_vocab_base()
self.vocab_size = len(tokens)
# we need this to validate the size of the token_type embeddings
@@ -2461,6 +2541,7 @@ def phantom(tok):
# add vocab to gguf
self.gguf_writer.add_tokenizer_model("bert")
+ self.gguf_writer.add_tokenizer_pre(tokpre)
self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_types(toktypes)
@@ -2642,6 +2723,9 @@ def set_vocab(self):
field = neox_reader.get_field(gguf.Keys.Tokenizer.MODEL)
self.gguf_writer.add_tokenizer_model(bytes(field.parts[-1]))
+ field = neox_reader.get_field(gguf.Keys.Tokenizer.PRE)
+ self.gguf_writer.add_tokenizer_pre(bytes(field.parts[-1]))
+
field = neox_reader.get_field(gguf.Keys.Tokenizer.LIST)
self.gguf_writer.add_token_list([bytes(field.parts[i]) for i in field.data][:vocab_size])
@@ -2847,6 +2931,7 @@ def parse_args() -> argparse.Namespace:
help="directory containing model file",
)
parser.add_argument("--use-temp-file", action="store_true", help="use the tempfile library while processing (helpful when running out of memory, process killed)")
+ parser.add_argument("--model-name", type=str, default=None, help="name of the model")
return parser.parse_args()
diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py
index cd9644fcb5213..5354b748b259f 100755
--- a/convert-llama-ggml-to-gguf.py
+++ b/convert-llama-ggml-to-gguf.py
@@ -281,6 +281,7 @@ def add_params(self, gguf_writer):
def add_vocab(self, gguf_writer):
hp = self.model.hyperparameters
gguf_writer.add_tokenizer_model('llama')
+ gguf_writer.add_tokenizer_pre('default')
tokens = []
scores = []
toktypes = []
diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py
index 69be17f94efd9..aba575426b492 100755
--- a/convert-persimmon-to-gguf.py
+++ b/convert-persimmon-to-gguf.py
@@ -99,6 +99,7 @@ def main():
tokens, scores, toktypes = _get_sentencepiece_tokenizer_info(args.model_dir)
gguf_writer.add_tokenizer_model('llama')
+ gguf_writer.add_tokenizer_pre('default')
gguf_writer.add_token_list(tokens)
gguf_writer.add_token_scores(scores)
gguf_writer.add_token_types(toktypes)
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index d2f1de19831e3..6d597bfd9d621 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -72,6 +72,7 @@ class SSM:
class Tokenizer:
MODEL = "tokenizer.ggml.model"
+ PRE = "tokenizer.ggml.pre"
LIST = "tokenizer.ggml.tokens"
TOKEN_TYPE = "tokenizer.ggml.token_type"
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
@@ -940,6 +941,7 @@ def get_type(val: Any) -> GGUFValueType:
# tokenization
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
+KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
index 48ef6d4ae45df..2bdb15525b1a1 100644
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -139,8 +139,13 @@ def _get(
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
if field.name in self.fields:
- raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
- self.fields[field.name] = field
+ # TODO: add option to generate error on duplicate keys
+ # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
+
+ print(f'Warning: Duplicate key {field.name} at offset {field.offset}')
+ self.fields[field.name + '_{}'.format(field.offset)] = field
+ else:
+ self.fields[field.name] = field
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index ec44ac9f3813d..089aece876a93 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -427,6 +427,9 @@ def add_ssm_time_step_rank(self, value: int) -> None:
def add_tokenizer_model(self, model: str) -> None:
self.add_string(Keys.Tokenizer.MODEL, model)
+ def add_tokenizer_pre(self, pre: str) -> None:
+ self.add_string(Keys.Tokenizer.PRE, pre)
+
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
self.add_array(Keys.Tokenizer.LIST, tokens)
diff --git a/llama.cpp b/llama.cpp
index 30af5991fe8f0..72c10ffc202fc 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -317,6 +317,7 @@ enum llm_kv {
LLM_KV_SSM_TIME_STEP_RANK,
LLM_KV_TOKENIZER_MODEL,
+ LLM_KV_TOKENIZER_PRE,
LLM_KV_TOKENIZER_LIST,
LLM_KV_TOKENIZER_TOKEN_TYPE,
LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
@@ -393,6 +394,7 @@ static const std::map LLM_KV_NAMES = {
{ LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" },
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
+ { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
{ LLM_KV_TOKENIZER_TOKEN_TYPE, "tokenizer.ggml.token_type" },
{ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, "tokenizer.ggml.token_type_count" },
@@ -2115,7 +2117,8 @@ struct llama_vocab {
ttype type;
};
- enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
+ enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
+ enum llama_vocab_pre_type type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
std::unordered_map token_to_id;
std::vector id_to_token;
@@ -4214,11 +4217,13 @@ static void llm_load_vocab(
// determine vocab type
{
- std::string tokenizer_name;
+ std::string tokenizer_model;
+ std::string tokenizer_pre;
- ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_name);
+ ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
+ ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
- if (tokenizer_name == "no_vocab") {
+ if (tokenizer_model == "no_vocab") {
vocab.type = LLAMA_VOCAB_TYPE_NONE;
// default special tokens
@@ -4232,7 +4237,7 @@ static void llm_load_vocab(
vocab.linefeed_id = -1;
return;
- } else if (tokenizer_name == "llama") {
+ } else if (tokenizer_model == "llama") {
vocab.type = LLAMA_VOCAB_TYPE_SPM;
// default special tokens
@@ -4277,9 +4282,27 @@ static void llm_load_vocab(
if (add_space_prefix_keyidx != -1) {
vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
} // The default value of add_space_prefix is true.
- } else if (tokenizer_name == "gpt2") {
- vocab.type = LLAMA_VOCAB_TYPE_BPE;
+ } else if (tokenizer_model == "bert") {
+ vocab.type = LLAMA_VOCAB_TYPE_WPM;
+ // default special tokens
+ vocab.special_bos_id = -1;
+ vocab.special_eos_id = -1;
+ vocab.special_unk_id = 100;
+ vocab.special_sep_id = 102;
+ vocab.special_pad_id = 0;
+ vocab.special_cls_id = 101;
+ vocab.special_mask_id = 103;
+ vocab.add_space_prefix = false;
+ } else {
+ if (tokenizer_model == "gpt2") {
+ vocab.type = LLAMA_VOCAB_TYPE_BPE;
+ } else {
+ LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_model.c_str());
+ LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
+ vocab.type = LLAMA_VOCAB_TYPE_SPM;
+ return;
+ }
// read bpe merges and populate bpe ranks
const int merges_keyidx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_MERGES).c_str());
if (merges_keyidx == -1) {
@@ -4313,23 +4336,50 @@ static void llm_load_vocab(
vocab.special_pad_id = -1;
vocab.special_cls_id = -1;
vocab.special_mask_id = -1;
- } else if (tokenizer_name == "bert") {
- vocab.type = LLAMA_VOCAB_TYPE_WPM;
+ }
- // default special tokens
- vocab.special_bos_id = -1;
- vocab.special_eos_id = -1;
- vocab.special_unk_id = 100;
- vocab.special_sep_id = 102;
- vocab.special_pad_id = 0;
- vocab.special_cls_id = 101;
- vocab.special_mask_id = 103;
- vocab.add_space_prefix = false;
+ // for now, only BPE models have pre-tokenizers
+ if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
+ if (tokenizer_pre.empty()) {
+ LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
+ LLAMA_LOG_WARN("%s: \n", __func__);
+ LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
+ LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED! \n", __func__);
+ LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
+ LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
+ LLAMA_LOG_WARN("%s: \n", __func__);
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ } else if (
+ tokenizer_pre == "default") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+ } else if (
+ tokenizer_pre == "llama3" ||
+ tokenizer_pre == "llama-v3" ||
+ tokenizer_pre == "llama-bpe") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
+ } else if (
+ tokenizer_pre == "deepseek-llm") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM;
+ } else if (
+ tokenizer_pre == "deepseek-coder") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER;
+ } else if (
+ tokenizer_pre == "falcon") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
+ } else if (
+ tokenizer_pre == "mpt") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
+ } else if (
+ tokenizer_pre == "starcoder") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_STARCODER;
+ } else if (
+ tokenizer_pre == "gpt-2") {
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
+ } else {
+ throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
+ }
} else {
- LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_name.c_str());
- LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
-
- vocab.type = LLAMA_VOCAB_TYPE_SPM;
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
}
}
@@ -11834,7 +11884,7 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
}
case LLAMA_VOCAB_TYPE_BPE: {
GGML_ASSERT(false);
- return unicode_utf8_to_byte(token_data.text);
+ return unicode_utf8_to_byte(token_data.text); // TODO: why is this here after GGML_ASSERT?
}
case LLAMA_VOCAB_TYPE_WPM: {
GGML_ASSERT(false);
@@ -12056,7 +12106,79 @@ struct llm_tokenizer_bpe {
void tokenize(const std::string & text, std::vector & output) {
int final_prev_index = -1;
- auto word_collection = bpe_gpt2_preprocess(text);
+
+ std::vector word_collection;
+ switch (vocab.type) {
+ case LLAMA_VOCAB_TYPE_BPE:
+ switch (vocab.type_pre) {
+ case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
+ word_collection = unicode_regex_split(text, {
+ // original regex from tokenizer.json
+ //"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+
+ // adapted: https://github.com/ggerganov/llama.cpp/pull/6920#issuecomment-2080233989
+ "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ });
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM:
+ word_collection = unicode_regex_split(text, {
+ "[\r\n]",
+ "\\s?[A-Za-zµÀ-ÖØ-öø-ƺƼ-ƿDŽ-ʓʕ-ʯͰ-ͳͶͷͻ-ͽͿΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-ՖႠ-ჅᎠ-Ᏽᏸ-ᏽᲐ-ᲺᲽ-Ჿᴀ-ᴫᵫ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℴℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-ⱻⱾ-ⳤⳫ-ⳮⳲⳳꙀ-ꙭꚀ-ꚛꜢ-ꝯꝱ-ꞇꞋ-ꞎꭰ-ꮿff-stﬓ-ﬗA-Za-z𐐀-𐑏𐒰-𐓓𐓘-𐓻𐲀-𐲲𐳀-𐳲𑢠-𑣟𞤀-𞥃]+",
+ "\\s?[!-/:-~!-/:-~‘-‟ -。]+",
+ "\\s+$",
+ "[一-龥ࠀ-一가-]+",
+ "\\p{N}+",
+ });
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
+ word_collection = unicode_regex_split(text, {
+ "[\r\n]",
+ "\\s?\\p{L}+",
+ "\\s?\\p{P}+",
+ "[一-龥ࠀ-一가-]+",
+ "\\p{N}+",
+ });
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_FALCON:
+ word_collection = unicode_regex_split(text, {
+ "[\\p{P}\\$\\+<=>\\^~\\|]+",
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ "\\p{N}+",
+ "[0-9][0-9][0-9]",
+ });
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_MPT:
+ // TODO: MPT pre-tokenization regexes are unknown
+ // the following are close, but not exact. run the following:
+ // ./bin/test-tokenizer-0 ../models/ggml-vocab-mpt.gguf
+ GGML_ASSERT("MPT pre-tokenization regexes are unknown - fixes needed");
+ word_collection = unicode_regex_split(text, {
+ "\\s?\\p{L}+",
+ "\\s?\\p{P}+",
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ });
+ break;
+ case LLAMA_VOCAB_PRE_TYPE_STARCODER:
+ case LLAMA_VOCAB_PRE_TYPE_GPT2:
+ word_collection = unicode_regex_split(text, {
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ });
+ break;
+ default:
+ // default regex for BPE tokenization pre-processing
+ word_collection = unicode_regex_split(text, {
+ "[\\p{P}\\$\\+<=>\\^~\\|]+",
+ "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+ "\\p{N}+",
+ "[0-9][0-9][0-9]",
+ });
+ break;
+ }
+ break;
+ default:
+ GGML_ASSERT(false);
+ break;
+ }
symbols_final.clear();
@@ -12183,145 +12305,6 @@ struct llm_tokenizer_bpe {
work_queue.push(bigram);
}
- std::vector bpe_gpt2_preprocess(const std::string & text) {
- std::vector bpe_words;
- std::vector bpe_encoded_words;
-
- std::string token = "";
- // GPT2 system regex: 's|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+
- bool collecting_numeric = false;
- bool collecting_letter = false;
- bool collecting_special = false;
- bool collecting_whitespace_lookahead = false;
- bool collecting = false;
-
- std::vector text_utf;
- text_utf.reserve(text.size());
- bpe_words.reserve(text.size());
- bpe_encoded_words.reserve(text.size());
-
- const auto cpts = unicode_cpts_from_utf8(text);
- for (size_t i = 0; i < cpts.size(); ++i)
- text_utf.emplace_back(unicode_cpt_to_utf8(cpts[i]));
-
- for (int i = 0; i < (int)text_utf.size(); i++) {
- const std::string & utf_char = text_utf[i];
- bool split_condition = false;
- int bytes_remain = text_utf.size() - i;
- // forward backward lookups
- const std::string & utf_char_next = (i + 1 < (int)text_utf.size()) ? text_utf[i + 1] : "";
- const std::string & utf_char_next_next = (i + 2 < (int)text_utf.size()) ? text_utf[i + 2] : "";
-
- // handling contractions
- if (!split_condition && bytes_remain >= 2) {
- // 's|'t|'m|'d
- if (utf_char == "\'" && (utf_char_next == "s" || utf_char_next == "t" || utf_char_next == "m" || utf_char_next == "d")) {
- split_condition = true;
- }
- if (split_condition) {
- if (token.size()) {
- bpe_words.emplace_back(token); // push previous content as token
- }
- token = utf_char + utf_char_next;
- bpe_words.emplace_back(token);
- token = "";
- i++;
- continue;
- }
- }
- if (!split_condition && bytes_remain >= 3) {
- // 're|'ve|'ll
- if (utf_char == "\'" && (
- (utf_char_next == "r" && utf_char_next_next == "e") ||
- (utf_char_next == "v" && utf_char_next_next == "e") ||
- (utf_char_next == "l" && utf_char_next_next == "l"))
- ) {
- split_condition = true;
- }
- if (split_condition) {
- // current token + next token can be defined
- if (token.size()) {
- bpe_words.emplace_back(token); // push previous content as token
- }
- token = utf_char + utf_char_next + utf_char_next_next;
- bpe_words.emplace_back(token); // the contraction
- token = "";
- i += 2;
- continue;
- }
- }
-
- if (!split_condition && !collecting) {
- if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_LETTER || (!token.size() && utf_char == " " && unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_LETTER)) {
- collecting_letter = true;
- collecting = true;
- }
- else if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_DIGIT || (!token.size() && utf_char == " " && unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_DIGIT)) {
- collecting_numeric = true;
- collecting = true;
- }
- else if (
- ((unicode_cpt_type(utf_char) != CODEPOINT_TYPE_LETTER && unicode_cpt_type(utf_char) != CODEPOINT_TYPE_DIGIT) && (unicode_cpt_type(utf_char) != CODEPOINT_TYPE_WHITESPACE)) ||
- (!token.size() && utf_char == " " && unicode_cpt_type(utf_char_next) != CODEPOINT_TYPE_LETTER && unicode_cpt_type(utf_char_next) != CODEPOINT_TYPE_DIGIT && unicode_cpt_type(utf_char_next) != CODEPOINT_TYPE_WHITESPACE)
- ) {
- collecting_special = true;
- collecting = true;
- }
- else if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_WHITESPACE && unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_WHITESPACE) {
- collecting_whitespace_lookahead = true;
- collecting = true;
- }
- else if (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_WHITESPACE) {
- split_condition = true;
- }
- }
- else if (!split_condition && collecting) {
- if (collecting_letter && unicode_cpt_type(utf_char) != CODEPOINT_TYPE_LETTER) {
- split_condition = true;
- }
- else if (collecting_numeric && unicode_cpt_type(utf_char) != CODEPOINT_TYPE_DIGIT) {
- split_condition = true;
- }
- else if (collecting_special && (unicode_cpt_type(utf_char) == CODEPOINT_TYPE_LETTER || unicode_cpt_type(utf_char) == CODEPOINT_TYPE_DIGIT || unicode_cpt_type(utf_char) == CODEPOINT_TYPE_WHITESPACE)) {
- split_condition = true;
- }
- else if (collecting_whitespace_lookahead && (unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_LETTER || unicode_cpt_type(utf_char_next) == CODEPOINT_TYPE_DIGIT)) {
- split_condition = true;
- }
- }
-
- if (utf_char_next == "") {
- split_condition = true; // final
- token += utf_char;
- }
-
- if (split_condition) {
- if (token.size()) {
- bpe_words.emplace_back(token);
- }
- token = utf_char;
- collecting = false;
- collecting_letter = false;
- collecting_numeric = false;
- collecting_special = false;
- collecting_whitespace_lookahead = false;
- }
- else {
- token += utf_char;
- }
- }
-
- for (std::string & word : bpe_words) {
- std::string encoded_token = "";
- for (char & c : word) {
- encoded_token += unicode_byte_to_utf8(c);
- }
- bpe_encoded_words.emplace_back(encoded_token);
- }
-
- return bpe_encoded_words;
- }
-
const llama_vocab & vocab;
std::vector symbols;
@@ -12641,7 +12624,7 @@ static std::vector llama_tokenize_internal(const llama_vocab &
} break;
case LLAMA_VOCAB_TYPE_BPE:
{
- if (add_special && vocab.special_add_bos == 1) {
+ if (add_special && vocab.special_add_bos != 0) {
GGML_ASSERT(vocab.special_bos_id != -1);
output.push_back(vocab.special_bos_id);
}
diff --git a/llama.h b/llama.h
index 8b1b15ed4ad55..30835de5f9433 100644
--- a/llama.h
+++ b/llama.h
@@ -69,6 +69,18 @@ extern "C" {
LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece
};
+ // pre-tokenization types
+ enum llama_vocab_pre_type {
+ LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
+ LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
+ LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
+ LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
+ LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
+ LLAMA_VOCAB_PRE_TYPE_MPT = 5,
+ LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
+ LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
+ };
+
// note: these values should be synchronized with ggml_rope
// TODO: maybe move this enum to ggml.h (ggml_rope_type)
enum llama_rope_type {
diff --git a/models/ggml-vocab-bert-bge.gguf b/models/ggml-vocab-bert-bge.gguf
new file mode 100644
index 0000000000000000000000000000000000000000..b2cbd5df6882d8e581368d494bb86aa449f212a7
GIT binary patch
literal 627549
zcmZ_1`;#Qsb?1lOwY?9o?cKE_h3K(5=Ba)KtcpXNd!j^ibDz{$RP-j5i6}D06IV*9MAyLFYfN?8O%@k
z7yLapE2}@Jjy=I3I;*lW&pr3t^F6P7?mzek|Kz{_OV3>Y``6Zg*8d&l(|n$dKb6e~
z2gNcUELZdVujrnCZhc=rpD+LV`tSPQX*Nms*nRfT?)kg@QT{io%XIyz{y^H_k(h}%x^9~Jl%IT*L@@(h{``iEDH;%KDeE#WaIiGwwpDl`U
zIn_g-`M=)$rN8qx*FUH^vR{nzPcKhqxs5{<`f*&pdXTLa+4wKpum3IkKmA*lNBOk4
zmCrvl8coKZN|)(T*CdKB&(!m^b^D|o=Hu=8`djJ2;nA~dX18apX8G2W)8*o?>QVNC
zzjs_c^PgWo`se@g-~Vra!M?PAKfju;7Wr^z`-9!>5B9b{Xl#Gb-2R}o{Xu*CgUnwx>s%f#y!2xf5va1e!a6=1!ow6KL)Pnmd8!PN2CHXzm7@
zyMg9zpt&1p?gpB>f#zco$_@uNt+aCCb>yOH|cNd0c4em7FT8>!!o)bB>>cO&(?k$NHO
zHuZwk@KK~*pc?Lq)C*R_U6FbLYq%>?FKFGqOV}Ddiqs2T!(EYj;cK`nQZIxJcSY)j
zvEi;ry-;@hF2QX0C{iz=4R=NA1-0R>NWH)|+!d)8+-~0`ybT{k>V>%Bu1LKwH{2De
z7wU$)BK5-Ea95;W$h&=)pf`LJsTcT$yCU_1-*8u?UH}~Kiqs2&x9<`LhmRulLg8>%
zq+U22?uyh4iNjrydSP+6D^f2s-o8t496pNF3y{NIk$ORLxGPdGP!4xR>IKW&cL|rn
zN0E9VbGR!~FKiBXMe2pl;jT!%@HyNSsNdTO)bH&C>i2d6^?N&k`n{b%{oYQXes3pG
zzqb>p-`fe)@9jkDBi8Oktlf)PyBD!`FJkRp#M-@xwR;h3_afHrMXcS6Si2Xoc2EB6
z);#V-tlg6b8}5p{mk%56ioBN>8}5p{k661WM|S&%5o`D4%7(il^>SvzU6FdZv*E5t
zy&T%@yCT-^MXcSEPaA$1sh3w9?uyjQuMKxa>gCynyCU@wYxm^bZvQZ1?VjA*a95;W
z4sN(BQZE-b+!d*ple>La#M-@xwR`e&!w)0%@^r&pk$U;M;jT!%yxnkDq&{Npo*dro
zA4aU*lgk_Kiqy;L4R=NA<@ScVBK2~7x9^HryBD!`Prh&XVWeK(Z@4Q`FaI~(6{(j8
z9PWzLN37kG6TJPyh_!oigTq~sdO5=3u1LLH;c!=^KKiwdh_#J~wT+0ijfl04h_#J~
zwT+0ijfl04h_#J~wT+0ijfl04h_#J~wT+0ijfl04h_#J~wT+0ijfl04h_#J~wT+0i
zjfl04h_#LA*ES;7HlknKh*;Z*er+RSZ6jiBBVuhMVr?V(wT+0ijp)}lBGxvdU)zXS
z+lW}(h*;Z*SlftxZ6jiBBl@+Ch_#LA*ES;7HlknKh*;Z*SlftL+lW}(h<vHSlftxZ6jiBBl@+Ch_#LA*ES;7HX_zGBGxt{
z);6MF+lW}(h<vHSlftxZ6jiBBl@+C
zh_#J~wT+0ijfl04=+`zP);6MF+lW}(h<GW^o
z?|&PA|2z2m-^Jhm9{&FK@%Mj#zyCx0{U71){}_M&9sK>D;P1bSzdwb)|5NZsG4X{_f!KF8=P}@4tt?|1_z>;O`;+PTj|>!H8sFAD@r#chzaFx~IVBhxq#le~>Xdyc;s_&YU+ThqM4=Qr^8G5)@ZzfbV@E&Tnz;_v?rfB*0J`~RpLx`l7gJ7n)WWbZqakasxJcPRevPYtM8pK=<1N*(f3
z&goC7L*AuWzRMxJ%OSkWnR}Nj_+75xcRA8`InsAI+wXEYze^sxOCG$-<@_!g@-7+j
zGm6X4C@w#vxcrRb^0Q~b#Gl>e#dEy)EHB9TpHbf4qa3_Pj=o2ZzDJI}M~=Qnj=o2Z
zzDJI}M~=QnCcZ}|zDFj$M<%{UCcZ}|zE38;PbR)kCcaN5zE38;PbR)kCcaN5zE38;
zPbR+4Mf*M%?fYD`?|%+c@;+Da`=94GT(s|V(Z2r@Z~HPYs370}4Sqvy`G500A8^fm
zz%};)*W3qOa~}}5KOi7~K&AQtmFfpnsvl6^KA<3dKtcL|g7g7p=L5>l2b7QxC?OwE
zLjIHz@~0GvKcyV}DIf5V>gges>O;=YLxSEzPS!&L*h9|T!!Kce9EVavC3U8Xs~R
zAHKo|e3ci!$&0V?;@wAG)Ve_&%iYefTX5`$JlTA5ttoq*#7PHTWUb;D=O$A5wZhr1XAB>HUyu
z@I#97hXj@nDc2uSU_YWt`-sx}5mnkp6y=Yo(mtX}`-m#-BdWBIsM0>7O8ba}e#BMs
zXMFUZaen@c?ENzi?$7vuKj+~7oI>&Ezgl;Ll#lWMyn5GNJ?E}oa95vqSHJGAUUFAo
za#z3Mu3m9hUvpQ#?XJG=u3mLl-*8v=+|}>8tKV~1f8ehE&|Q7YUEOzA-*#8uaaXUo
ztJmGt_ubVG+|>hj^+R{{V|VovUVY=XyZS|U<+$_>$EAB`yz`!$_j_*M?|s(Y>7?ME
zlY)D{=I(SdanH%bJtq_QoJ`#NihGKakb6!-?l}p$=OpBw8~Z&!b|)eCoP^wS5^~Q;
z$UP?^_nd^>a}sjTNyt4XA@`hw+;cK<&&k9+ClmLaOx$xaanH%b?>d?IT{nOC-K^Yq
zoWJk5d*5;JzT?aN-{&0NcU-#fIC0-`;=bd=eaDIW-*wM)E9br&?R_`eZ=dpa-*%(@
z_VfJiH8+OW94)W8H+i@RajAr0o38ue$F@=lLDdd47lU
zcYcTScYcRdpWpFsr<|SNq2@ckL(O-7=c|quj{W=&rQrMyIdOi6oH&1$^LPF%=ji;|
zJAA{lM1=Eai3sP&{SRrR`?_t%}wzfK%Gf8EK%_uU%%
zK3CPnZBlk|n=*KDn{-~>re?UfP0es|o0{R`Hb-=En`5|edg00=)7pIi@i&NS}7pIQ0Q{wc+siX6Za(!`jO3Kct6)(;x^A~4sd}q{=
z7iZLw7iU}@7iU}@7iU}@7iZL%7iU~67iZLw7iUf)&zwY_If*=T5_#ri@XX2J8CTWC
znUlyfV%5c&lgKk-)y0{U&oi#Li!
z*R96q90#9s9DL4k@HzLC=loOL%K5C5iO&+_ECq2(Q>3QBi
z)6L)WZvLKkGxogG6VDU>E}ka>T|Dn*?0KT$#S4y>7o2{1!Oi;%)U+2by0O3LM*E_Z
z&llZ@e$jF1i*DV1(LM7er}bMa;O%r84yzU!x2cydZFq%%Tq_^siW-FQFiKRIdyM8<(j@cG>2UMX;pU~o%}a-rmkt{*
z9U5LbG`w_ZczNdL{mee$F0UYZf@_mxxM3N^NyRNJJjWm9uUmp5tYJbH^(-lHEoF8zeM{LxRS8Lmz#Ay;>~
zR<7=HR<7=T6RYv+E=POy9Jzb-S&se6S$S74kg}^6-5Xx~3g7S|W&Y|HIfkoWB92}C
zl6&9h$la^YxeuHR0(HpM7u@*1Kv}x_
zg5$)my6?W?==_S~%U9kamtLXtT)pDP@Ctp=tFO{3x%wJ4*wt^j?|zGZ>ebiXyuV6a
zcl9dQ-_@(sFITVrKIwdw`sM0X_snlNo_&Kl#J`Oov!Xvhg{wNEZ=wEasIyJ
zr^^6b-FGwXBKcSMzwe&;6JCAWN!@qc+rQ(u^c^Q>e?NU3-zw6%jJvT=`a4X;k
zM2o8*ILaQlJAdf-`9n9iKXkJDL-&+7=~rF7No2ly%RTeQPIiCnM)cz!kY_(}tMMnq
z;;WyyIr<4#|Kn%50vt>4GbQ$ij;&+$EefotXQ3#9t-7diIFFL5>>zr=O)
z_$AsQkH19y`S{D6vBzKj8pi(k%l^)9@w=BPOOL-o{qp!LZVaz*tvr5(QvUcA&imt6
zevfZ>rmlXLX=>-YK0ZxEdx|DNO0?>o+a%W?i&v`rs>
zi>vXmTkns*P4s>2*4X3kxG{W(QvUcml-^vB<&
zCGhxl_l5_=#>a2+nNMzWR-QN?=!uIuJbBUG`ODME*
zsy_ZjclCMqyI|UOAHPN&
z^6_h2M<4%)EBxah(SG^($3(8DE(Y<`c}q{7_ww`(wbau)l)y*}~zs?o#^d;I1Pha{qXt1YW=GdQpnf!eEGG+IvTPsh$!nN}Bt6XDGzsmLg
z^mp87zs`00^xn&)#noLr{U)*ask82%{+Jry6@g3l6>+erS6kA
zDQBO!EcYjGavnZ;la|gW&L{cgO^)xAw8*=&&vsame2n}
z^}1(B)8ZhHRdc&
zOWb=bF$6JL-w$Px75wQj2`z8roD+?M!z^uFn3Vr%juYcwkRD#2F8&3^#w+MyfqbvtHts!twpk!j{fSkbPiW@|3N?3(%+1U
zqZpR;s~iWiVUFi51_xz1{%hASSRTyF)o4p$Od$!=@)(q#LS!e^4HS!2Cwer;c)R;2_U9JD>X0
zr|?V7)wGrmw|IiG^%;_x!3fb19J2_>MI
z4u~cTae1BgaN!bC`OdjBuiMp!;F%%pG}G}p>&ax
z>bagp43ymAP5DW_0MQ4f_y{@|dA2Ayp~o2)Tv{WGzkV&W;@EgK^y_L_mQ>c`VxQzq
z%ek|R7UB!(%LWIaXCMT3Q#SRtPy7;}jk7@xL6NLVKnYAEs^xvHP%vX8buz=RGg&jj
z2;89`IDK>MY(AMn1ZIO5{B~4|O%U1L9Sl3u0>rQv68W4Oew^>Sh2$R`#=EAgNk5-s*n@I1lX&8jg)uQBC`|1Wym6elHy)M;6r!CSj#a;;vdPB%
z714YtY9agilq+^rmeeQ`H3;K=IUbkC;xe?yJReJ>od}I2LKwef<|HRW#>H|u=FIi;
zQ8vXhhs9zkm9(r8i&l9#Cb`Gil)$_^C>Ip=MKR>%LGEB%`WoURupLuGWY$cV^cBWq
z3`&C@t;j|#b4=H<1f-^Zh0pi3eDF@0`qQIYM>~1ck?K3Zu8~=X(@8$vJeBHflFbLS
zZA=y7EsKd3IizirPfglD*3ug&hpTB`Q{@)mxNuHI-zFaNVTPd%X;csWh+pQjWxll%
zG+(mHmgK8+o3k1+1BJgqg(y>|hG`5z4BLfNX9ZW!;mQG^RB9hTJr-NRp2KXq%H~|2
z4Fr;AIiG-|1Z)Gx@G
zY_oTu^_q_;mwH|gzu3=A|Kfw$sy{9U_1v$hA*Hn_?~+9jwlC8PV_M{7sDYZSD!gWJ
zu#&{qQbNr-&2Lg1(q$DW?y!I9W)(@sP1I)y5(mMC&PSFr-=tkRVh4G+;zv6F(n&6F?@#2tPhH
za~~2tkbtk8L|gR4yQckeNYLmD=xKaS&FBHa_JD%wb0lo$5;&H)aLN}_3u9W8H*!o#
ze^rdl%E2JU*?g2k1cFT{twpcr4vg<8^lkz~##8@0SUL#~vq
z$~D`<#oi8Bp;hQ30=rzYnz$CGLWakUVnFFymNQUjjNsbo&x>WTpmHx~E-)|@+xhj8
z=i)?SM-JF(pq*jCjRhuRASZJ;&xywSd4FyS5#raE_(9T-4~UbyySQ^$x=7PBcYraQ
z=LXRZ!^^pRP;g|wkUxee&dZE5thgeH&BoPI0(HX`!ElltI*NuxKE(XokVm4`kM~Q?
z=X~LjLP#J-^V`PV*+TFI;n2F9=8X9)+$xg!lGEN=WGA(5nPFCzrTji=YJ4=85|i+g
zvWXmIj9W?pgqYwt-VclYeHqZxWi9<~x%P7{Z!$(f!9_l12x&iel3Jk;eV>_}1CqR7
zmdn0O5zsi!76@;6qj5Xeu&5tmLik~jaIYqL;^{Qy6)(CzsUChP3
z+pdFD^6WtCB6DDyT$h?UC!yJVLRC7GUqa2OF!PjvBCC{6Q7%2H70;<#DnLY!)5l3H
zxC03^?}M{o);O=_3C%YeugLIA#Z$Zk)x_v$t0j!NWG5&RP6vBCdM>(%SS7@gD<^X
ztO&sZB38NOWc%0v5(C+5LBgcNxKNiq0GLe#-+1_9mJj5eVjRNO@%RM%&jthONr^Fj
zW`p%+{XymytmK?FgxOY@tWt4}Emb>GUI0`mqNVkT59IfX1bmQBx1y|L1y0F_m2;vD
z%G?yl&t8J0HJ8b{v2-3JWTZ<@_$Z%i(SoAneA05CuLu`?Irh_h!Sx}-C7XiPvU?{M
z*kV9vup(wpZ0a!)iXM$6^%(b9YZ8~5h#d?<3unIkm#Xa0x#mz;5iz9mfie=G&8Yr;9(I?Fb{$*nYPAG~Bw8^r=U>2)6Ejb~&l(Sow+1TRe
znAc$;^pc7O&rEp?TugMF)kwGpdIpLCIVF(%O^(23lrmW?jQjPoZt+=Ww^76E;k80{
zDZ{-cSU-?VQv|h)2AkmME*`HufRqn7C4J?L&(&VDc&_3D3U_UI(8pI6f%E*bsH-z!63|fuGY89dIrSMXkWPW0~f{CyCb3u?6x3Y@D``7C(E?YBuYha2J3I9Qry2aQ
zVQ!wbDJe&v3+PLj$*~*RyihzG51GwN$*YMg^wr)jE_Qohu<{))MWoSk#R(iF%OFST
zYWIFi|CH0(ywg=JPy3)r=$uz}3*J00PqJ}+cN5ZAjldZ~wt_?lc$f{yq=CX_V7bk6
zzGUceo{#(jHa|&>COz4(D4}d-3TT%U4T}(n2>it2w0y*}TspJ$D9^X;P!C(jN;H6O
zjeInB;w&QYeSe-b2c~176VrxDnUzut%4;!d
zJIYH16)<~icbxcP;c>2rfdw|Mm1hSd+!t=kE5it-b;u(9bRZpsNf^rc;I%9ZXR#j2
zS#Vl#HFK7P?AsiJEG7ma@**J$NpFmyN{XqOdP$YwwpHLBo@-jq07Zu3X|+j9VR)5sL0?O_Z~5sB2$AyLUXStYp~6X{DRjB)3NuE$
z2&W9G2f0y6ft3+F1C9O6@>Tdi=@Q8#IF_&G@?{*5D!OWK#x#kz!Mx}@bs&e$g;|%S
z98GY;+!yf&uaYr4S}8&Wh}>hm(Gt4+j=?E|&mn9AiPF9#L#sJ+L7DOxWUCOs+)5WI
zkja%3#`V%dI%ZmA06irNadnatdlW6sH^m8s<+
z>Wd6ioYQ9-hbVJRoOHNV90NoXu`I84O#cbVqoCB@mo_WKJB`ix4Ndec-lUVL=Q$
zWS-KVc!@L~7|tr4jJG8Q*wfep9~Gm`NPjB)DC&H^J=_mELAK?NF^R4H`;zu$o&a
z-c=o`kO(xVFau#xa59rAgV&A}N`)Yc^{(Ky#h8=0>=MH&Okl8SuZQO<#pKY@heE5o
z9H&fy)Gjc%4lIrdY*nC+!D4|?U*jFS&vkcsp4Cq+$2CnR@;#^$4fNoG)4w2G*j}C
zDkjy#b4D~Qw}rvVL_t_TW9X!q-YCbuj^#*|q97m@f??;7%P6bBi|)bx%2X<5N=CBG
zn62!&>WzzJ@?o}=Bcp7ZpKIYd(4h?_TY=AgVd$7DeWHy8w}f_hu9i2m$?K4dD?u8|
zp2Rw_g4cXrM?9pf!MjC9&I{y~s>91vyu}49K~Dx7gmj=32|W=j1s*X#XPzjZkv2p4
zTKPqBiz8eftccr+#t!D%l?UxA^pm?Js|d?so!h4gGt8D0#X&Y(%0#Kf)H$mAW+Z`U
zH>|9URII_3)~kYumOjI;R9rW7fe_oT;2ghQ9WMwGs45_cJMt~ubX$mRGj$8*EzCgU
zx-xj#%1+2qoiVH?D?)C-R*G#4{l~>@fmpw>I+s1>BlHqvWx^l^Rz|_SQOkUAFfGUB
zh<=d;sifsF{(U*$iUneQndf3fms7?j`Ytx1=(5VhA;eN2pjbA2t~wA)Yh!9sl@i%y
zVd7!QN(QZ13a%EqOGPRYz!6(Amw|$LZkdy7PerBe%p4y24UzlOAfQem1q}%wXzQpi
zj+1}Qw5$wxB2dz@j2KGJ7_wU!Z4_rwyKN>Y`&y7eRsAvbuy&g|o|;MEa8AL~Otoc@
z3)u*6yOL=|SIezMxdmdPLyUKn2-X#Z!D3G#GjDZTCih^oQnspVC<(*dg2fO*%e!l`
zAW;2`QG>yO7_-#d@S7w~&e_oxf|W!Wb7*_SoaNn@Hm$W!M#c_GpD;-lnmkvGSnk;k
z%XQ=V3L+J5Kl?t1Dc60RpTO@^rc!LYwOz|$xrv!m`U+@rlTKBJCykA1wM;z~i+svd
z`{_2Ld}gJXJcb~=)3)3tjYM;8Ee^?1r33|8ZF_{=;!wy?Kf^7Wdl^1dqq75VqA4D)
zpI`*Cu9tKwm0U1q146KLrH2QWx|DkhN(Jfq1qP^XGP6v<9Xv#}J+(|~Ht^%VWq}z-
zwh~ky(Q#X2wV;_P9w;oT)rbLEnsE#KDCD-twnSJyyw(xumyS_3&8aQbmgd$9|Y^0?v4@8}&A-3z~;rJ?(c7CGy1gX+rX=e$9V7+L7C26}#
zvaxX6xaiOQZk%-uRP9e$#Wp#okkA$~C<5S|7@5t^p}pa9ThfU)7^&NJMH9BwZ*%Ud
z1(9Ipn9K@jT|Qs6PF}LJG>tXi<}T1^8>A_*W7Pt?M3ccJO&VdGOhxKm3p7Y7AZU`3
zeU@!6grwlFf)$E5@e`}c^b4ihedcV|^2lvymNDa`#{&IWR*Mxye<0hM0foboyRjrz
zEeM5FPO1@dGpM2p=LM*Q+C{rm$E6X}a_{{aZCc+7@Jv))jiqj3bDw6kbtcOZT(h6(
z4PdivOyc^{Fc{@iqQiG6klo}WMD-4sjc{$%jH+y!PSj|{r8p|Dx2(HJvMgThx?}dx
zw$3@1Q`Wofk^#$W>~eeNtOOPe7%PP(VZ|pBeao|hC6>CO5u+8aVy}FTS4q~CjM0+e
zeM+xRx3r}d<#;id2)VVA@*|{2X*XL=phxXpoc^rc640|2j{Oy-_b8{Gxm5OtdQ)0!
z6W2O%{)^JA(`}j9f#qv5kt}srPhxxKiQPjBC1Ml2jDUX>_=_PlG
zRM}L=5?M|ze!P*I%?fv5V~rn_VB@wzt;Pmq)gmWazF72X*`eLOz`tVAXCX-)>-Tic
zl#Rr1EOLss$iXuv;u972X%8DmTkTaT1|TI_`jjvk<#Z$!ekr*%&+Hod<42Q!`WjG4
zW}H&k-d7SvmYjsj83LgSrw%f=m0wzsIwBSRq?L>Diw=1?R+C?4o270z8{GCzZz82f1*nzj3y&MGiKvcD3KiTy7pYFCAwr
z$T`TNCjxAk6?-+py!yNhx%cyuzgWD9|dN{14~>82TL-EXI6#_bHaobem!*?c
zyQLT@Uy_1uG{aM3vyHY?RX(?PzyB~rpIwSEbp;Ru
zF>+aJk_3F9(6=Huke{J#HwPm%*fuVwxvO*5w_Wf``=qS<2cDmLEYUzUr~d?$tFTz!
zop~Q1!lGe>MVkZPUFz6UzW9aWtvM@}C19?4v#RnoR)V-g`5Q9^Jcgw_c;1nOjRs)q
zl9P?GzD<-B#Bzy#?SAmpw6Z-#5MFJev-}vE%>u|1l?H*=$|*5%uBtCZ{8VBC?kVVH
z+ihgmVHT>02;nWiDJ7GNM)7#fUDH2GXlkY#?y>9(E^T)+mR@E2=*bTMtk}Rqn*v#C
z4+jc|GHRqVYK;X=wE%rS9QxmazPOlhR8bI_c_MLBl`
z8n%kZ`ArP6pUZ74C8KV+I^_qHeogcqgNlw(f
z6C@mGDSHAEl$%=6#;wZLCG_svW{LJ;1Irc+tw!lpb^30`M@DN)dBL}dmE}uDD
zM`O_@GFN9aE>{H^zAsR&T^e)QAYRH^t2zcU-ezO6I)33evD<4iYp#@)P=Z=rzJwAl
zE`wifv%|pF8GJ~J3ey|eYo80YOq6SLB1_fwMbgHQwW2ht^*(`+OMa-vw5!J#C*;Z;(7`ps3G*_v
zu4J5-31xae^`ihgB?R)lxvHuPP)fl-?i1dS%ex!!0}WO3SF2iY4W=p`I&0Owd(QgW
zgy%=e3043FY>|D-?MDJ1OK7-+L9yTuByH^5F>|H<`>OSIOH6SY4a!9h)QpVQfg;J&
z75$PsB5iA~nUoN%)KSQL&xffhg8Q%<#G!m>HK@=G=0I*R|7fGjr|hOI=sKpmA{DiC
zp|3(|?*qWEcv}^dlHrxd?Y87CRK!Q~+gd|A{ZWNmoi3ClyVi3H;n;QCze5O-dI>7l
z2tiC+t(^O8Ta_t>6A^$XD5|<}3eJ+8*$;axnv$Ca!u?F&$~lQC>@}qG0u(vY0Rj
z^OY=5U_|M#g3bpg=naoeL%)_OMkH
z3>zAf!$6U3u~1M>zih}(;*Ke!(EKnt1a_P)OnFx%a
zT3-bss@WYBM~kD2krD
zoC;qkrI?G9w%ed?ySm(VNu|r2Ew^9nI}fzNb#KI}hC&VvjJU=gn7y%fL7GJet~(>T
z+LkSKTqT$|Z0pxJ3d#L;B3QPc
zZ#QG^tAbe+XSqDCn5WtlffpH(7?E9jG}s)8`bAcq8EigHAcRoWe~t0x3xF=O
zxt(typR173gz@Rj1j$YPzCL$ntN3KqQsC?YHbgD+!3~_Y!0l_60lxPe9=&TE?YjxH>R|t(hlz#Rp%ej+P9837^z(6
z;YJz@(r(EtJl*=2T0oAGo{4OKrP(2!QefLZ?eNOB4*N{^b$=VM@+fe
zF(rDXYptwmJ!N1FF)#?35yDotbY&Owjua7fvTeaXXGo@4)Mh$elPlfm`-fVqV@*XZ
zx31X8wjww@t9+v|o4gLFiWlk~3O4q&vRtxHwPBJ42#Wj~I&0Y=)Hf*mA>CJ#w_vH4
zB*OCN5R&yy5z0+vW%*#Xp6K`BR-Ns+K2$wT6|5P~h879}?T0dKv4j?ybK;O{!<-1H
zAC!PeVRbx0z3DjBDX|Y>kkh8T1l^JpMo~pFZZb{p%}%MCtvrQl>OG^RRVJ2}$7G!(
z{~pX-$Ml%$5>W2pC$?cbqb5ja9=KA&bw4nv#(E0-TuIhi+%a+HZ4srqIGO8SepH6G
z7mO2U9n`UK)h7Y=cXy}Ox^f;d5Q{@P7hL^1^#Pb6d(u)|8w+a55@OZBNQ+}$rT=#E
zS)w*HKtn>o(9ukex4huZB9G@r$F(B^i$S^5lV5!Ki%HhZ&1#5_w
zk}1Pe>KIG1YWx_{4M|H)SZa?6$+zwxoFz?~N_?ioSe0)w*-=HAJ9Uf!-^=JjWj84
zy5+6{S9LjsI7uZWhYhqz7%dhJA5XPJ{>9*3g
z7=YDs`w1peHVv>vu}+_6TnL;%#!;b(b1u8uP2}VMpEQL#2f+gDR^tz^$JTr5|KNI1nM0<2#Xn&@*x}t_m$!`(+&f>
z;nbiBm%ewhDlO-ar%7e|P;-_%Rn3+6xD>jdE5?D_B;wj~xm6vkhg)$ZBuU1(?J>dH
zQ2kg+_+fAfiOog@Q&ktY!)rTzgg&6wlf{-XYZ4BxXZPi+y0>UAnpzq&6+KcG!^wiS
za;ty=Pd$+0%(=3(lvuE*gS6yTW5hZ+M%6)^{xOnCN|JufE%g@Jv2Dfpy=|0KMG8bg
z!2w1m^axgE32)8C1IebpTsgUUtXt`48Th#i#pruAall*E*_TRDKiH}qYjtq96beha
zWU5+r6w@w!C|}&=D`eC;>#I6og$ljO-w|)M4p=#$euUcchXpV!H%b3PdU9fsmEpMJ
zY_(2@z}feFlX@-F>!>uZlq}YO3_;ucA&Cd~%}R4}r^hN{R)HMmr({->;mFuVnvN``T)=O@CtoB~@(rIDE$1`!>39W~
zDVB35vvqI)ja&J9CQan-YOXChc%4P}aVc%bjv`eLG%!{0qyVH7C&jT_C93sPgvz;@3KAEC+QL&!jq9Qz
z-$mBPHdr$hWQFOecRmJZ?VQ=(tkJ;4i-Ik?r|Mvj-)r4CWvP`VjgA>m7KnRrrBkxh
z8XJ?)*JfH*eO&K}-0BZuyB8Jg;V%_2b2~t6LnDFRO46vpip5<0=19L(2hF0xg33u6
z(zS8!mwmjwN+)ovvy!}Y)o>P|rwWn~*p@Hgh~TT*hJYC`D@=+QbMADh+I-SHw9Nq8
z)JAPa|%>m9V1Q!h8$+mM9RNC(@V03wQlTR+rw9YZYdh^sK
zCFmUxEmk(v-P>_HJawp`+kBDY2&u9aizdaZeTkv=khtW)x*)cmw)H-8P^qJ^6g={Q
z^R!ilJB;fh)>`RFNkB3dCqp&TtG($F6Fo|p=+w)CDRSwK)TZVK`wQt=_;aZfWk!OD5tHG-d4
zT7v`E85_3ccg|eDom$UV*c=ZSB=7gRt!|n2^58AUd0(hN<3qES52-FzQ!~FIl$KKF
z)LY@s7I>uNR;eH6t5Kg$RX$ZQTA2>>gvpQ^7ptb$@lm+bSBFFpE!LRhP)o-F3qsT>
zojg+~JWKhzY7nilCMHssbZm!;=DSFtRrRFJL7*UQnQ;jk`Kx-7G%^^Kgb;5k4~nwZ
z$?6fyfR6c8kQCW+zA`ZFy4VydNQ&?pD<)y@k9yM-WnbmY)bLCBPkf)KQHY3(LlH@N
z06wyy^Oh9F02}Nu7xkmT@3j3xy4&|Es0;;CV<|KQAS{Suy{5I$h?`Xlc7~MFIjCl3
zL$Am-1}_wAcS}yX!k5x5RwGr4YMwxJRomtLZQEeHRVZN@4UR(kv&UC!tG5$9YbRvF
zhkH`fEzGk5?lNy6-_nacT$GeUHDwGRp_IlVMfg441t-G
z>?WS0k~rVy?UE8F^J$*E#QT@f}#r@?%+Tv|D!#_SHx;JW+F^*$7>5iDnUsLXa~_*EFjBVBqRoe;wDA7;01K|EBPb)EK{
zkYFo6+bXKtSM3VRlb6{|NQac}QEjXju42!ckQeGGh4*elTBRj8mbIF~fb*(@^L3XJ
zEqlzhd2&+7H3`0Ot*@-friZXwAa)=(^C0ae1WJ386}ScsTGXl!b?%Nzs8p3km(Vf+
zSdc2V2btR6qh4RsPF0tGi8f*}+Oes9LNP%)tu0t{W@>w^B
zhAfL=vKw?4CQ2j4#*nQLvh*sJcLT
zgP2ccBouIAxLVP?>Sy4qQCLf+41dgmwF@!t(!Ext!y+HQzooDnUMjt0m{&hhZa=1e
zq?PONP#pl&se-+m0
z)Vgev$e_rccNOe(MCvP*kc?zlK`Ql=oG6*HVv;n_t%_?^Ad`98KqYS&w+QxZ2Ngas
z)+q>w8QYf?fE)Bk2^2RzC`=%ZQlJ!%?ouK;82@I6e=P;_JTbqi74CehWT0;*OOBv>)Gl*&*7KYbqj-pe#k5aIU6es^>MP6Ns20x|RXPfq
zF}&su3r}TWrbAf0p_UFWbwbrw;GsH>3^HJ1$>~iVASZL}fB|9@5w%J(XXw~Ti5!4!
zer5u}dY<{8RG0uZ9%%~*E)_$kAX@HA4}e5;xM9FfRh8P>&Jn5&g6+v5@hTRyDj~q1
z+<4reY?8IsrwwM6shBNEw{}J0Co<;EE$8@EW#k&L+EMT1s=!}n@FqSur2nL*-N&?}
z66;Fe&r~sQO#;|ut#(19HWo?eV#!rqj5yQkzNgS|tzQbRM)ei6oqXVt95OuYh7Lei
zt1XC;G`K718M-Wl(%3{pYxO{_owmn0g}in>Sc4>;;k=cUzrrNg_Q$Q9vBkvXQ-N@+
zRae`wde9RVv?|-8z^pw2Y*lFj!|UrhDbBQ2hLEjt+=cFVgbY~P54NQ@--peleAv3j
zuPaJ1w=tumB*Vj<-jG@$0W*en6oCb27;0Kr-4z&+!WYibZ@R2?)ie=;RNmA=C7>$E
zrebd`MGR5Olo4Z9R;XIhF|gXnk$fM
z6~fmhhg^2ZrylHj6(uEXoiu`>1yH7KM#BYV@i9=3xC<5CHrSjWNG?jK$kH{B$v&P_StTF7rPa_^3e*NWk)wz5(A
z#CWr!fr?+^19gH^=PtZjQ8eFs$~xrBG6a`We$YKdKHaTilZDbFxju9dfTX7mhi=)O
zN}(wQMdh6@8Czc$+o76hvt3=v!BrVZM!Ah{X#xVOl~=cxF8HMCyuzig;Oms72{$Zz
z=W0|^QzqvL*r~r%7t&C^XhsEw?wqGhvL4f5eKcHy++$y4RXKqhZzAW6%`6gtJcgCf
z3QFW5E**l+RQF69Wu>CUBeq$ZsiD5?$+7-RcgoC6cQ%`PB#~)`Zd1IUt(>_iv=hc1
z3$h0VAfdA@rJ7LJz9DsgbSX`tqJnqeobt#10Plf1^1G8i_DhC3kF-n9kFK2FB0Du)
z*Edt!BDNQe+PL`MNtP3HaHr~rbZ`ySp4wcPEOGIvP)_&30@@$cYdrJr-MW7@#vRUV
zR@QER3C*?kwY{RTopD|pTK8AZk~BXz|0bi;Woaxy;gz3MHxJo9)ZTk19~Sk4hA~%}
z#@KFeZwIS#S{>Dk9|-gv;HC|s^ae7kSdtzAd(yTOzSVe52WBd#8uzPFv7!uMU#b97
zg37f;F_4kjYpFMijQR6a0ZLq|@hj%uBDq#dhhbXN53v!SSk^&>l)$g54odTbN6A}D
zO)&u0stu)Gb!bxxp=$InPW2|%nXCBL)rYC~g`k*+eP%bxg648
zh63K0_AHV+3I91HMV_q4lp%_`^#>(W4a)dj?SzwgRIisqrUoU$u5eBPv|BRI!RxBU
z9Oc7~s$M{gA#b_WLf>N8>W2lp7q&V?R`7a`p$yaf1d2lQf6VFmLGg+>tB`OeAaMF3s(^Y~s
zC^+TWUB$f&EEkFsk~3H!w+L}t9p0%k36n8YFNPWK?q}>6Rxw+Wf(fV`E!SAAUXrxi
z2Xfc+D)Mf(x7)+;t(hBnW?NXDOe$ZTaZ9Mo8mUEEX;wF*(LxDI;;O9_c#-;1_;^jD
zfyI>HJScQ1izX0iNRyz<3mjmX%6N-VUPiiHExtt`kWJOjYR09C>EUWlp-4eSn{qsQ
z({mJ)W==>FF;|pg85<=ySr4zUlAJHW+mOo21c=gUXLo40FsLNPQV2Sz(Bpsd7BNIk
zU=uT@tZ*@L=N+!`-=!bc`#ta&6`FDP%UUPs)wsu6EaFvG1gE~txIx=vsEe$7GS-M4
z_Cyqs*{woKOt99T7d9(na%d`NTD3;5HI~ZaHxWmKZpW;=8QVr?P-5DS3dm~w6kWkc
z<|oW7*E%^U`M_a)WqZODopgPp0g
zNv0Vz#M%x?#n-sgz)uU;3B1)Z;1)kAM#R)GKC$GU
z79OVD{C-?Yv(TMbcX;s{N>zRi_r=yRUAT0LjEqZ%p;kjh<8r{NETsW072}K&fybtS
zx5~1a^GzxK3W%UW`h%KB>MsPuRGb5=R_mg|Wr%^Wdb*Gy3r5Kpvt!ori98HpEtjC{
zifXAl$sbrLTwwFM>R2`dHrz8POWsrElPsD5>O`A10yr1`N|Sz~fR_u~*NPc6fuy
zsoKH$pcbY!&f@h#E?4KIeS!3_)J{rqPb}YL8_Oqf+sNcLWB3Y%W60~Y0m%%2*+#
zYdr+A76KD0{$oj+TgPcb41S;%TDRx{FYL_ndKN5F3$eAG@G7J9DP;+$?N^}EGfCl*
z7#18W7o}QDVSH8xI#PdydJg1fVs=z4%VV|^912{o8`lc1n;ig}x!*3Hdt8q9!GBd@
zu3MgQKz5#uav+;_xO(al<*wR!iQdA2HnEzs!KvV}!
zxCh$VUW%QtY+$`dTn;4OQe{hgZxRJpCj@*e1+)uROq(j;;DKB%vvnXYGt&iFdg#yXlgI`&5*O*B-)
zL#A>p`{CTv>YTge$pQql*8qdE;tl6{YsF!R))NGj8%Ic!PE0oWs@c5c#HLYtS7Lp5
z83okZmt8-g&zFfU8DBQgL0?koPyyD(*w2xAG{A?up!A?8JfDtN*%pT^(`v17S`x9B9Ma}FZ
zs&6yX@k<44#TRhO3|@UgUD`(4ZoM!oW0Qea_XR{&2a2YWG`w?ADmp?fC*#umNt#fW
z+(sBz;v*%kIw`YS@F<-NsCWYSCdM!lVui;V4b)WC0!Dk4uX8ll#i!RT*_C0%k4zEB
z#_~@x7Mqy4R9AA^W
z8?03?rf|q+BI@Zv^Dq%YpQ+_4oYXd!yPT>+fT@I4vg(p6CJ0)pBG3u`nuV?iB02hy
zav4T$8?`b8Qs1)oYwakY$GSN-hS~H&+~f+W4#H)kRWXJeWHpX$XEsD$kFl&J*tgKJ
zM!1wv@eB%Eu2iQox%@<~dlKqHH#iLk*LsQrZS{DzvEU9>?7NKyb{-SHR>n_K(cLE#HD1y)hVi3x{c6Rnl!e#&FaErz2@}CuTzd8edGIydynkYw0f!mG*K9{)1uZu&8Vke~`R6NmYzX
zPB@4&9c4Q$<@6EHRc=Ry6*oYo3{wpnA1FGeU=Kdf*<*w97*k>;n1p8STTfE#CBK13
zRQeX*+jbj#uT+p61uV$}cSdzOL4e9d2;-0b9TzNfOLZKEE_kD2;VryC#tHZ=-KCP%
zjc+ei4au0~5)2gzQW>SRheHs#Huy~n&RNC~pKZ!q@nY-ZFC_xzCKKAz-L9cc6>Av6
zBx9>r`JPhe_@NXscY~&*A2}=q-!$MMXE~BLq+wmQmnxl_X4IP
zsPf#W2elGh-bC=n+5>CK!P&AJdN3s!zqsDFPFtY6zv40<-k_CuBr^>QRa;xlN*rKf
zWYv#{wyyvZsd(AU`C}F#!*A*)OI!zPA!|nfZGs3EzJ-CfPKS%*m?eoNJDeys<=YCW
z+<>mb%-RlZNv`bF5|pO~cT~d0TYRrtFz*Ul71fq4({)=;WR@v>BU24*Oce^+X@v2u
z&xTMv0Qgj$N+AJ)(5D-7hB@>GB_pH@@i@~Nwn^JZab#iZHatvBHuM5ZE$akXv6vZW
zX)3ulXvHnsusft?uA-1G(s7g;7eVC4ZuKdNej{g`dy>_d>s04drQm$Cq0UC_=r}QO
zSRn%yOgdw4v=W^dyLQl9FbI>Y;Atan$$qzIndRi$HiogF3ZCHI5CG?F8~
z+59F&14it+7Cq_A4XSE|G2L!ynf`?<+1F&q-lUKp6n3~1=0{Cv+_o^q8}(4|O3{Jj
zKtb?SAt#yDW+U(`os!_jWu7eU5k+N9CE_^}P2-^Zl{K>AEXw`J7_(ZCKS!<}I5p5A
zh}()+*$V9Gb*DA+VnN5+RP-%pELEl4x7&?w=nUQ|@ZTVICHRwv+
zEIMu*HYrI8T)UJ|b?V#J%MR5D0)(czGuv`cH%R!(ORaa>n}S8((bG~6u6wGGK*{>8
z@MUVaRr@bes+3=;{3Gfh?ND^kqS_GgK#Q8|R1tGb)w;14r!ELWr~)vf)UGnC
zSU;&!X|YJ%1F?kdz*V(dsS#{QSg}2tVkzFG5{(o()u(uujuTe$0i>t07u|#5s!z@l
zIHHlLjL^^5YHNMzgema=?@3)DIHRQw(vqFZd$c+A#MKT|<%-uOwK8oWBRW9M)d<@%
zaSAP+qa_^)GWvG72QyYm^0>yhX)CiU-$=*OXg@ffQppi!V%-I*mLY-6lJ%w%1{wKu
z65Bhh6gP|AZUD~}qoo2fq`MCG%V?@!Wu