From 2aca1ef483dace34be161fb4f31e970cf4dce5ea Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Mon, 2 Sep 2024 14:15:40 +0100 Subject: [PATCH] Improve checking of *out_size in rans4x16 & arith These should be set to a minimum of at least rans_compress_bound_4x16() or arith_compress_bound() but as they're user supplied parameters we ought to consider the possibility of them being smaller and the potential for an encode failure. (Although arguably it's a user error too as we're not using the API as intended.) This happens in many places, but not consistently when very small sizes are given such that we don't have room for the compression header meta-data. --- htscodecs/arith_dynamic.c | 70 ++++++++++++++++++++++----- htscodecs/rANS_static4x16pr.c | 90 ++++++++++++++++++++++++++++++++--- htscodecs/tokenise_name3.c | 1 + 3 files changed, 143 insertions(+), 18 deletions(-) diff --git a/htscodecs/arith_dynamic.c b/htscodecs/arith_dynamic.c index 37aca77..0591958 100644 --- a/htscodecs/arith_dynamic.c +++ b/htscodecs/arith_dynamic.c @@ -733,15 +733,17 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, unsigned int c_meta_len; uint8_t *rle = NULL, *packed = NULL; - if (in_size > INT_MAX) { + if (in_size > INT_MAX || (out && *out_size == 0)) { *out_size = 0; return NULL; } if (!out) { *out_size = arith_compress_bound(in_size, order); - if (!(out = malloc(*out_size))) + if (!(out = malloc(*out_size))) { + *out_size = 0; return NULL; + } } unsigned char *out_end = out + *out_size; @@ -751,6 +753,10 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, if (order & X_CAT) { out[0] = X_CAT; c_meta_len = 1 + var_put_u32(&out[1], out_end, in_size); + if (c_meta_len + in_size > *out_size) { + *out_size = 0; + return NULL; + } memcpy(out+c_meta_len, in, in_size); *out_size = in_size+c_meta_len; } @@ -759,16 +765,23 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, int N = (order>>8); if (N == 0) N = 4; // default for compatibility with old tests - if (N > 255) + if (N > in_size) + N = in_size; + + if (N > 255) { + *out_size = 0; return NULL; + } unsigned char *transposed = malloc(in_size); unsigned int part_len[256]; unsigned int idx[256]; - if (!transposed) + if (!transposed) { + *out_size = 0; return NULL; - int i, j, x; + } + int i, j, x; for (i = 0; i < N; i++) { part_len[i] = in_size / N + ((in_size % N) > i); idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index @@ -788,6 +801,12 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, c_meta_len = 1; *out = order & ~X_NOSZ; c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); + if (c_meta_len >= *out_size) { + free(transposed); + *out_size = 0; + return NULL; + } + out[c_meta_len++] = N; out2_start = out2 = out+7+5*N; // shares a buffer with c_meta @@ -843,6 +862,9 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // {1, 128}}; for (j = 1; j <= m[MIN(i,3)][0]; j++) { + if (out2 - out > *out_size) + continue; // an error, but caught in best_sz check later + olen2 = *out_size - (out2 - out); //fprintf(stderr, "order=%d m=%d\n", order&3, m[MIN(i,4)][j]); if ((order&3) == 0 && (m[MIN(i,3)][j]&1)) @@ -850,13 +872,17 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, arith_compress_to(transposed+idx[i], part_len[i], out2, &olen2, m[MIN(i,3)][j] | X_NOSZ); - if (best_sz > olen2) { + if (olen2 && best_sz > olen2) { best_sz = olen2; best_j = j; } } -// if (best_j == 0) // none desireable -// return NULL; + + if (best_sz == INT_MAX) { + free(transposed); + *out_size = 0; + return NULL; + } if (best_j != j-1) { olen2 = *out_size - (out2 - out); arith_compress_to(transposed+idx[i], part_len[i], @@ -893,6 +919,10 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // PACK 2, 4 or 8 symbols into one byte. int pmeta_len; uint64_t packed_len; + if (c_meta_len + 256 > *out_size) { + *out_size = 0; + return NULL; + } packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); if (!packed) { out[0] &= ~X_PACK; @@ -934,6 +964,7 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, #else fprintf(stderr, "Htscodecs has been compiled without libbz2 support\n"); free(out); + *out_size = 0; return NULL; #endif @@ -945,25 +976,40 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // *out_size = lzma_size; } else { + uint8_t *r; if (do_rle) { if (order == 0) - arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size); else - arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size); } else { //if (order == 2) // arith_compress_O2(in, in_size, out+c_meta_len, out_size); //else if (order == 1) - arith_compress_O1(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O1(in, in_size, out+c_meta_len, out_size); else - arith_compress_O0(in, in_size, out+c_meta_len, out_size); + r=arith_compress_O0(in, in_size, out+c_meta_len, out_size); + } + + if (!r) { + free(rle); + free(packed); + *out_size = 0; + return NULL; } } if (*out_size >= in_size) { out[0] &= ~(3|X_EXT); // no entropy encoding, but keep e.g. PACK out[0] |= X_CAT | no_size; + + if (out + c_meta_len + in_size > out_end) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } memcpy(out+c_meta_len, in, in_size); *out_size = in_size; } diff --git a/htscodecs/rANS_static4x16pr.c b/htscodecs/rANS_static4x16pr.c index 8c9a64a..d208f1c 100644 --- a/htscodecs/rANS_static4x16pr.c +++ b/htscodecs/rANS_static4x16pr.c @@ -1164,7 +1164,7 @@ void rans_set_cpu(int opts) { unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, unsigned char *out,unsigned int *out_size, int order) { - if (in_size > INT_MAX) { + if (in_size > INT_MAX || (out && *out_size == 0)) { *out_size = 0; return NULL; } @@ -1177,8 +1177,10 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, *out_size = rans_compress_bound_4x16(in_size, order); if (*out_size == 0) return NULL; - if (!(out_free = out = malloc(*out_size))) + if (!(out_free = out = malloc(*out_size))) { + *out_size = 0; return NULL; + } } unsigned char *out_end = out + *out_size; @@ -1199,11 +1201,20 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, int N = (order>>8) & 0xff; if (N == 0) N = 4; // default for compatibility with old tests + if (N > in_size) + N = in_size; + + if (N > 255) { + *out_size = 0; + return NULL; + } + unsigned char *transposed = malloc(in_size); unsigned int part_len[256]; unsigned int idx[256]; if (!transposed) { free(out_free); + *out_size = 0; return NULL; } int i, j, x; @@ -1241,6 +1252,13 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, c_meta_len = 1; *out = order & ~RANS_ORDER_NOSZ; c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size); + if (c_meta_len >= *out_size) { + free(out_free); + free(transposed); + *out_size = 0; + return NULL; + } + out[c_meta_len++] = N; unsigned char *out_best = NULL; @@ -1249,7 +1267,7 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out2_start = out2 = out+7+5*N; // shares a buffer with c_meta for (i = 0; i < N; i++) { // Brute force try all methods. - int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10; + int j, m[] = {1,64,128,0}, best_j = 0, best_sz = INT_MAX; for (j = 0; j < sizeof(m)/sizeof(*m); j++) { if ((order & m[j]) != m[j]) continue; @@ -1257,18 +1275,24 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, // order-1 *only*; bit check above cannot elide order-0 if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0) continue; + + if (out2 - out > *out_size) + continue; // an error, but caught in best_sz check later + olen2 = *out_size - (out2 - out); rans_compress_to_4x16(transposed+idx[i], part_len[i], out2, &olen2, m[j] | RANS_ORDER_NOSZ | (order&RANS_ORDER_X32)); - if (best_sz > olen2) { + if (olen2 && best_sz > olen2) { best_sz = olen2; best_j = j; if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) { unsigned char *tmp = realloc(out_best, olen2); if (!tmp) { free(out_free); + free(transposed); + *out_size = 0; return NULL; } out_best = tmp; @@ -1279,6 +1303,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, memcpy(out_best, out2, olen2); } } + + if (best_sz == INT_MAX) { + free(out_best); + free(out_free); + free(transposed); + *out_size = 0; + return NULL; + } + if (best_j < sizeof(m)/sizeof(*m)) { // Copy the best compression to output buffer if not current memcpy(out2, out_best, best_sz); @@ -1301,6 +1334,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out[0] = RANS_ORDER_CAT; c_meta_len = 1; c_meta_len += var_put_u32(&out[1], out_end, in_size); + + if (c_meta_len + in_size > *out_size) { + *out_size = 0; + return NULL; + } if (in_size) memcpy(out+c_meta_len, in, in_size); *out_size = c_meta_len + in_size; @@ -1329,6 +1367,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, // PACK 2, 4 or 8 symbols into one byte. int pmeta_len; uint64_t packed_len; + if (c_meta_len + 256 > *out_size) { + free(out_free); + *out_size = 0; + return NULL; + } packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len); if (!packed) { out[0] &= ~RANS_ORDER_PACK; @@ -1357,6 +1400,7 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, c_rmeta_len = in_size+257; if (!(meta = malloc(c_rmeta_len))) { free(out_free); + *out_size = 0; return NULL; } @@ -1380,8 +1424,23 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, // Compress lengths with O0 and literals with O0/O1 ("order" param) int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2; sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len); + if ((c_meta_len+sz+5) > *out_size) { + free(out_free); + free(rle); + free(meta); + free(packed); + *out_size = 0; + return NULL; + } c_rmeta_len = *out_size - (c_meta_len+sz+5); - rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len); + if (!rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len)) { + free(out_free); + free(rle); + free(meta); + free(packed); + *out_size = 0; + return NULL; + } if (c_rmeta_len < rmeta_len) { sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len); memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len); @@ -1404,17 +1463,36 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out[0] &= ~RANS_ORDER_RLE; } + if (c_meta_len > *out_size) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } + *out_size -= c_meta_len; if (order && in_size < 8) { out[0] &= ~1; order &= ~1; } - rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size); + if (!rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size)) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } if (*out_size >= in_size) { out[0] &= ~3; out[0] |= RANS_ORDER_CAT | no_size; + + if (out + c_meta_len + in_size > out_end) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } if (in_size) memcpy(out+c_meta_len, in, in_size); *out_size = in_size; diff --git a/htscodecs/tokenise_name3.c b/htscodecs/tokenise_name3.c index 7493579..08e6261 100644 --- a/htscodecs/tokenise_name3.c +++ b/htscodecs/tokenise_name3.c @@ -1555,6 +1555,7 @@ uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level, use_arith, out, &out_len) < 0) { free_context(ctx); + free(out); return NULL; }