From 59ad21a8b0ea47b8346250a2caec212c6c552f79 Mon Sep 17 00:00:00 2001 From: jeffro256 Date: Mon, 4 Nov 2024 13:06:35 -0600 Subject: [PATCH] blake2b: compress and wipe keys immediately Changes keyed hashing such that: - Keys are not copied into a temporary 128 byte stack buffer, nor the hash state buffer - Keys are compressed immediately during initialization, not during `blake2b_final` or `blake2b_update` - Internal calculation vectors `m` and `v` are wiped in the same stack frame as creation These changes should make keyed Blake2b hashing more memory secure. Also, for optimization, we use 1 `memcpy` call for regular copying into message vector `m` instead of 16 `load64` calls if we're on a little-endian system. --- src/crypto/blake2b.c | 117 ++++++++++++++++++++++++++++++++++++------- src/crypto/blake2b.h | 3 +- 2 files changed, 102 insertions(+), 18 deletions(-) diff --git a/src/crypto/blake2b.c b/src/crypto/blake2b.c index 7e8be78905..e9435b9ec4 100644 --- a/src/crypto/blake2b.c +++ b/src/crypto/blake2b.c @@ -286,7 +286,83 @@ int blake2b_init(blake2b_state *S, size_t outlen) { return blake2b_init_param(S, &P); } -int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) { + +/** + * The difference from `blake2b_compress()` is that this function directly takes + * a variable-sized input, treating it as a block where the remaining bytes are + * zeros, and also wipes stack variables `m` and `v`. The intended use case is + * initializing hash states with keys, hence the stricter memory handling. I + * don't attempt to prevent swapping of the variables `m` and `v` since the OS + * would have to be dumber than a box of rocks to swap stack variables used just + * microseconds ago. `keylen` is assumed to be less than or equal to 128. + */ +static void blake2b_compress_key(blake2b_state *S, const uint8_t *key, size_t keylen) { + uint64_t m[16]; + uint64_t v[16]; + unsigned int i, r; + + memset(m, 0, sizeof(m)); +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(m, key, keylen); +#else + for (i = 0; i < keylen; ++i) { + m[i / 8] |= (((uint64_t) key[i]) << ((i % 8) * 8)); + } +#endif + + for (i = 0; i < 8; ++i) { + v[i] = S->h[i]; + } + + v[8] = blake2b_IV[0]; + v[9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + +#define G(r, i, a, b, c, d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while ((void)0, 0) + +#define ROUND(r) \ + do { \ + G(r, 0, v[0], v[4], v[8], v[12]); \ + G(r, 1, v[1], v[5], v[9], v[13]); \ + G(r, 2, v[2], v[6], v[10], v[14]); \ + G(r, 3, v[3], v[7], v[11], v[15]); \ + G(r, 4, v[0], v[5], v[10], v[15]); \ + G(r, 5, v[1], v[6], v[11], v[12]); \ + G(r, 6, v[2], v[7], v[8], v[13]); \ + G(r, 7, v[3], v[4], v[9], v[14]); \ + } while ((void)0, 0) + + for (r = 0; r < 12; ++r) { + ROUND(r); + } + + for (i = 0; i < 8; ++i) { + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + } + + clear_internal_memory(m, sizeof(m)); + clear_internal_memory(v, sizeof(v)); + +#undef G +#undef ROUND +} + +int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen, int has_data) { blake2b_param P; if (S == NULL) { @@ -321,14 +397,13 @@ int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t ke return -1; } - { - uint8_t block[BLAKE2B_BLOCKBYTES]; - memset(block, 0, BLAKE2B_BLOCKBYTES); - memcpy(block, key, keylen); - blake2b_update(S, block, BLAKE2B_BLOCKBYTES); - /* Burn the key from stack */ - clear_internal_memory(block, BLAKE2B_BLOCKBYTES); + blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES); + if (!has_data) { + blake2b_set_lastblock(S); } + blake2b_compress_key(S, key, keylen); + S->skip_final_compress = 1; + return 0; } @@ -337,9 +412,13 @@ static void blake2b_compress(blake2b_state *S, const uint8_t *block) { uint64_t v[16]; unsigned int i, r; +#if defined(NATIVE_LITTLE_ENDIAN) + memcpy(m, block, BLAKE2B_BLOCKBYTES); +#else for (i = 0; i < 16; ++i) { m[i] = load64(block + i * sizeof(m[i])); } +#endif for (i = 0; i < 8; ++i) { v[i] = S->h[i]; @@ -427,6 +506,7 @@ int blake2b_update(blake2b_state *S, const void *in, size_t inlen) { } memcpy(&S->buf[S->buflen], pin, inlen); S->buflen += (unsigned int)inlen; + S->skip_final_compress = 0; return 0; } @@ -439,15 +519,18 @@ int blake2b_final(blake2b_state *S, void *out, size_t outlen) { return -1; } - /* Is this a reused state? */ - if (S->f[0] != 0) { - return -1; - } + if (!S->skip_final_compress) { + /* Is this a reused state? */ + if (S->f[0] != 0) { + return -1; + } - blake2b_increment_counter(S, S->buflen); - blake2b_set_lastblock(S); - memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ - blake2b_compress(S, S->buf); + blake2b_increment_counter(S, S->buflen); + blake2b_set_lastblock(S); + memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */ + blake2b_compress(S, S->buf); + } + S->skip_final_compress = 0; for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */ store64(buffer + sizeof(S->h[i]) * i, S->h[i]); @@ -479,7 +562,7 @@ int blake2b(void *out, size_t outlen, const void *in, size_t inlen, } if (keylen > 0) { - if (blake2b_init_key(&S, outlen, key, keylen) < 0) { + if (blake2b_init_key(&S, outlen, key, keylen, inlen != 0) < 0) { goto fail; } } diff --git a/src/crypto/blake2b.h b/src/crypto/blake2b.h index ea00f381be..c53daddbe6 100644 --- a/src/crypto/blake2b.h +++ b/src/crypto/blake2b.h @@ -76,6 +76,7 @@ extern "C" { unsigned buflen; unsigned outlen; uint8_t last_node; + uint8_t skip_final_compress; } blake2b_state; /* Ensure param structs have not been wrongly padded */ @@ -98,7 +99,7 @@ extern "C" { /* Streaming API */ int blake2b_init(blake2b_state *S, size_t outlen); int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, - size_t keylen); + size_t keylen, int has_data); int blake2b_init_param(blake2b_state *S, const blake2b_param *P); int blake2b_update(blake2b_state *S, const void *in, size_t inlen); int blake2b_final(blake2b_state *S, void *out, size_t outlen);