Skip to content

Commit

Permalink
Improve checking of *out_size in rans4x16 & arith
Browse files Browse the repository at this point in the history
These should be set to a minimum of at least rans_compress_bound_4x16()
or arith_compress_bound()  but as they're user supplied parameters we
ought to consider the possibility of them being smaller and the
potential for an encode failure.  (Although arguably it's a user error
too as we're not using the API as intended.)

This happens in many places, but not consistently when very small
sizes are given such that we don't have room for the compression
header meta-data.
  • Loading branch information
jkbonfield committed Sep 4, 2024
1 parent 5179428 commit 027ebf6
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 28 deletions.
84 changes: 66 additions & 18 deletions htscodecs/arith_dynamic.c
Original file line number Diff line number Diff line change
Expand Up @@ -733,15 +733,17 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
unsigned int c_meta_len;
uint8_t *rle = NULL, *packed = NULL;

if (in_size > INT_MAX) {
if (in_size > INT_MAX || (out && *out_size == 0)) {
*out_size = 0;
return NULL;
}

if (!out) {
*out_size = arith_compress_bound(in_size, order);
if (!(out = malloc(*out_size)))
if (!(out = malloc(*out_size))) {
*out_size = 0;
return NULL;
}
}
unsigned char *out_end = out + *out_size;

Expand All @@ -751,24 +753,30 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
if (order & X_CAT) {
out[0] = X_CAT;
c_meta_len = 1 + var_put_u32(&out[1], out_end, in_size);
if (c_meta_len + in_size > *out_size) {
*out_size = 0;
return NULL;
}
memcpy(out+c_meta_len, in, in_size);
*out_size = in_size+c_meta_len;
}

if (order & X_STRIPE) {
int N = (order>>8);
int N = (order>>8) & 0xff;
if (N == 0) N = 4; // default for compatibility with old tests

if (N > 255)
return NULL;
if (N > in_size)
N = in_size;

unsigned char *transposed = malloc(in_size);
unsigned int part_len[256];
unsigned int idx[256];
if (!transposed)
if (!transposed) {
*out_size = 0;
return NULL;
int i, j, x;
}

int i, j, x;
for (i = 0; i < N; i++) {
part_len[i] = in_size / N + ((in_size % N) > i);
idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index
Expand All @@ -788,13 +796,20 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
c_meta_len = 1;
*out = order & ~X_NOSZ;
c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size);
if (c_meta_len >= *out_size) {
free(transposed);
*out_size = 0;
return NULL;
}

out[c_meta_len++] = N;

out2_start = out2 = out+7+5*N; // shares a buffer with c_meta
for (i = 0; i < N; i++) {
// Brute force try all methods.
// FIXME: optimise this bit. Maybe learn over time?
int j, best_j = 0, best_sz = INT_MAX;
uint8_t *r;

// Works OK with read names. The first byte is the most important,
// as it has most variability (little-endian). After that it's
Expand Down Expand Up @@ -843,24 +858,37 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
// {1, 128}};

for (j = 1; j <= m[MIN(i,3)][0]; j++) {
if (out2 - out > *out_size)
continue; // an error, but caught in best_sz check later

olen2 = *out_size - (out2 - out);
//fprintf(stderr, "order=%d m=%d\n", order&3, m[MIN(i,4)][j]);
if ((order&3) == 0 && (m[MIN(i,3)][j]&1))
continue;

arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2, m[MIN(i,3)][j] | X_NOSZ);
if (best_sz > olen2) {
r = arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2, m[MIN(i,3)][j] | X_NOSZ);
if (r && olen2 && best_sz > olen2) {
best_sz = olen2;
best_j = j;
}
}
// if (best_j == 0) // none desireable
// return NULL;

if (best_sz == INT_MAX) {
free(transposed);
*out_size = 0;
return NULL;
}
if (best_j != j-1) {
olen2 = *out_size - (out2 - out);
arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2, m[MIN(i,3)][best_j] | X_NOSZ);
r = arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2,
m[MIN(i,3)][best_j] | X_NOSZ);
if (!r) {
free(transposed);
*out_size = 0;
return NULL;
}
}
out2 += olen2;
c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2);
Expand Down Expand Up @@ -893,6 +921,10 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
// PACK 2, 4 or 8 symbols into one byte.
int pmeta_len;
uint64_t packed_len;
if (c_meta_len + 256 > *out_size) {
*out_size = 0;
return NULL;
}
packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len);
if (!packed) {
out[0] &= ~X_PACK;
Expand Down Expand Up @@ -934,6 +966,7 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
#else
fprintf(stderr, "Htscodecs has been compiled without libbz2 support\n");
free(out);
*out_size = 0;
return NULL;
#endif

Expand All @@ -945,25 +978,40 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
// *out_size = lzma_size;

} else {
uint8_t *r;
if (do_rle) {
if (order == 0)
arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size);
else
arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size);
} else {
//if (order == 2)
// arith_compress_O2(in, in_size, out+c_meta_len, out_size);
//else
if (order == 1)
arith_compress_O1(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O1(in, in_size, out+c_meta_len, out_size);
else
arith_compress_O0(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O0(in, in_size, out+c_meta_len, out_size);
}

if (!r) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}
}

if (*out_size >= in_size) {
out[0] &= ~(3|X_EXT); // no entropy encoding, but keep e.g. PACK
out[0] |= X_CAT | no_size;

if (out + c_meta_len + in_size > out_end) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}
memcpy(out+c_meta_len, in, in_size);
*out_size = in_size;
}
Expand Down
94 changes: 84 additions & 10 deletions htscodecs/rANS_static4x16pr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ void rans_set_cpu(int opts) {
unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
unsigned char *out,unsigned int *out_size,
int order) {
if (in_size > INT_MAX) {
if (in_size > INT_MAX || (out && *out_size == 0)) {
*out_size = 0;
return NULL;
}
Expand All @@ -1177,8 +1177,10 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
*out_size = rans_compress_bound_4x16(in_size, order);
if (*out_size == 0)
return NULL;
if (!(out_free = out = malloc(*out_size)))
if (!(out_free = out = malloc(*out_size))) {
*out_size = 0;
return NULL;
}
}

unsigned char *out_end = out + *out_size;
Expand All @@ -1199,11 +1201,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
int N = (order>>8) & 0xff;
if (N == 0) N = 4; // default for compatibility with old tests

if (N > in_size)
N = in_size;

unsigned char *transposed = malloc(in_size);
unsigned int part_len[256];
unsigned int idx[256];
if (!transposed) {
free(out_free);
*out_size = 0;
return NULL;
}
int i, j, x;
Expand Down Expand Up @@ -1241,6 +1247,13 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
c_meta_len = 1;
*out = order & ~RANS_ORDER_NOSZ;
c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size);
if (c_meta_len >= *out_size) {
free(out_free);
free(transposed);
*out_size = 0;
return NULL;
}

out[c_meta_len++] = N;

unsigned char *out_best = NULL;
Expand All @@ -1249,26 +1262,33 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
out2_start = out2 = out+7+5*N; // shares a buffer with c_meta
for (i = 0; i < N; i++) {
// Brute force try all methods.
int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10;
uint8_t *r;
int j, m[] = {1,64,128,0}, best_j = 0, best_sz = INT_MAX;
for (j = 0; j < sizeof(m)/sizeof(*m); j++) {
if ((order & m[j]) != m[j])
continue;

// order-1 *only*; bit check above cannot elide order-0
if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0)
continue;

if (out2 - out > *out_size)
continue; // an error, but caught in best_sz check later

olen2 = *out_size - (out2 - out);
rans_compress_to_4x16(transposed+idx[i], part_len[i],
out2, &olen2,
m[j] | RANS_ORDER_NOSZ
| (order&RANS_ORDER_X32));
if (best_sz > olen2) {
r = rans_compress_to_4x16(transposed+idx[i], part_len[i],
out2, &olen2,
m[j] | RANS_ORDER_NOSZ
| (order&RANS_ORDER_X32));
if (r && olen2 && best_sz > olen2) {
best_sz = olen2;
best_j = j;
if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) {
unsigned char *tmp = realloc(out_best, olen2);
if (!tmp) {
free(out_free);
free(transposed);
*out_size = 0;
return NULL;
}
out_best = tmp;
Expand All @@ -1279,6 +1299,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
memcpy(out_best, out2, olen2);
}
}

if (best_sz == INT_MAX) {
free(out_best);
free(out_free);
free(transposed);
*out_size = 0;
return NULL;
}

if (best_j < sizeof(m)/sizeof(*m)) {
// Copy the best compression to output buffer if not current
memcpy(out2, out_best, best_sz);
Expand All @@ -1301,6 +1330,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
out[0] = RANS_ORDER_CAT;
c_meta_len = 1;
c_meta_len += var_put_u32(&out[1], out_end, in_size);

if (c_meta_len + in_size > *out_size) {
*out_size = 0;
return NULL;
}
if (in_size)
memcpy(out+c_meta_len, in, in_size);
*out_size = c_meta_len + in_size;
Expand Down Expand Up @@ -1329,6 +1363,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
// PACK 2, 4 or 8 symbols into one byte.
int pmeta_len;
uint64_t packed_len;
if (c_meta_len + 256 > *out_size) {
free(out_free);
*out_size = 0;
return NULL;
}
packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len);
if (!packed) {
out[0] &= ~RANS_ORDER_PACK;
Expand Down Expand Up @@ -1357,6 +1396,7 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
c_rmeta_len = in_size+257;
if (!(meta = malloc(c_rmeta_len))) {
free(out_free);
*out_size = 0;
return NULL;
}

Expand All @@ -1380,8 +1420,23 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
// Compress lengths with O0 and literals with O0/O1 ("order" param)
int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2;
sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len);
if ((c_meta_len+sz+5) > *out_size) {
free(out_free);
free(rle);
free(meta);
free(packed);
*out_size = 0;
return NULL;
}
c_rmeta_len = *out_size - (c_meta_len+sz+5);
rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len);
if (!rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len)) {
free(out_free);
free(rle);
free(meta);
free(packed);
*out_size = 0;
return NULL;
}
if (c_rmeta_len < rmeta_len) {
sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len);
memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len);
Expand All @@ -1404,17 +1459,36 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
out[0] &= ~RANS_ORDER_RLE;
}

if (c_meta_len > *out_size) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}

*out_size -= c_meta_len;
if (order && in_size < 8) {
out[0] &= ~1;
order &= ~1;
}

rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size);
if (!rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size)) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}

if (*out_size >= in_size) {
out[0] &= ~3;
out[0] |= RANS_ORDER_CAT | no_size;

if (out + c_meta_len + in_size > out_end) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}
if (in_size)
memcpy(out+c_meta_len, in, in_size);
*out_size = in_size;
Expand Down
1 change: 1 addition & 0 deletions htscodecs/tokenise_name3.c
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,7 @@ uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith,
if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level,
use_arith, out, &out_len) < 0) {
free_context(ctx);
free(out);
return NULL;
}

Expand Down

0 comments on commit 027ebf6

Please sign in to comment.