Skip to content

Commit

Permalink
Unify Context .has() and .offset() into .find()
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
  • Loading branch information
jviotti committed Oct 9, 2024
1 parent d27ae67 commit d2a13f4
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 87 deletions.
18 changes: 9 additions & 9 deletions src/runtime/encoder_any.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,26 +110,26 @@ auto Encoder::ANY_PACKED_TYPE_TAG_BYTE_PREFIX(
} else if (document.is_string()) {
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
const auto size{document.byte_size()};
const bool is_shared{this->context_.has(value, Context::Type::Standalone)};
const auto shared{this->context_.find(value, Context::Type::Standalone)};
if (size < uint_max<5>) {
const std::uint8_t type{is_shared ? TYPE_SHARED_STRING : TYPE_STRING};
const std::uint8_t type{shared.has_value() ? TYPE_SHARED_STRING
: TYPE_STRING};
this->put_byte(
static_cast<std::uint8_t>(type | ((size + 1) << type_size)));
if (is_shared) {
this->put_varint(
this->position() -
this->context_.offset(value, Context::Type::Standalone));
if (shared.has_value()) {
this->put_varint(this->position() - shared.value());
} else {
this->context_.record(value, this->position(),
Context::Type::Standalone);
this->put_string_utf8(value, size);
}
} else if (size >= uint_max<5> && size < uint_max<5> * 2 && !is_shared) {
} else if (size >= uint_max<5> && size < uint_max<5> * 2 &&
!shared.has_value()) {
this->put_byte(static_cast<std::uint8_t>(
TYPE_LONG_STRING | ((size - uint_max<5>) << type_size)));
this->put_string_utf8(value, size);
} else if (size >= 2 << (SUBTYPE_LONG_STRING_BASE_EXPONENT_7 - 1) &&
!is_shared) {
!shared.has_value()) {
const std::uint8_t exponent{closest_smallest_exponent(
size, 2, SUBTYPE_LONG_STRING_BASE_EXPONENT_7,
SUBTYPE_LONG_STRING_BASE_EXPONENT_10)};
Expand All @@ -141,7 +141,7 @@ auto Encoder::ANY_PACKED_TYPE_TAG_BYTE_PREFIX(
// Exploit the fact that a shared string always starts
// with an impossible length marker (0) to avoid having
// to encode an additional tag
if (!is_shared) {
if (!shared.has_value()) {
this->put_byte(TYPE_STRING);
}

Expand Down
35 changes: 16 additions & 19 deletions src/runtime/encoder_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,19 @@ auto Encoder::FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED(
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
const auto size{value.size()};
assert(document.size() == size);
const bool is_shared{this->context_.has(value, Context::Type::Standalone)};
const auto shared{this->context_.find(value, Context::Type::Standalone)};

// (1) Write 0x00 if shared, else do nothing
if (is_shared) {
if (shared.has_value()) {
this->put_byte(0);
}

// (2) Write length of the string + 1 (so it will never be zero)
this->put_varint(size - options.minimum + 1);

// (3) Write relative offset if shared, else write plain string
if (is_shared) {
this->put_varint(this->position() -
this->context_.offset(value, Context::Type::Standalone));
if (shared.has_value()) {
this->put_varint(this->position() - shared.value());
} else {
this->context_.record(value, this->position(), Context::Type::Standalone);
this->put_string_utf8(value, size);
Expand All @@ -49,20 +48,19 @@ auto Encoder::ROOF_VARINT_PREFIX_UTF8_STRING_SHARED(
const auto size{value.size()};
assert(document.size() == size);
assert(size <= options.maximum);
const bool is_shared{this->context_.has(value, Context::Type::Standalone)};
const auto shared{this->context_.find(value, Context::Type::Standalone)};

// (1) Write 0x00 if shared, else do nothing
if (is_shared) {
if (shared.has_value()) {
this->put_byte(0);
}

// (2) Write length of the string + 1 (so it will never be zero)
this->put_varint(options.maximum - size + 1);

// (3) Write relative offset if shared, else write plain string
if (is_shared) {
this->put_varint(this->position() -
this->context_.offset(value, Context::Type::Standalone));
if (shared.has_value()) {
this->put_varint(this->position() - shared.value());
} else {
this->context_.record(value, this->position(), Context::Type::Standalone);
this->put_string_utf8(value, size);
Expand All @@ -79,20 +77,19 @@ auto Encoder::BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED(
assert(options.minimum <= options.maximum);
assert(is_byte(options.maximum - options.minimum + 1));
assert(is_within(size, options.minimum, options.maximum));
const bool is_shared{this->context_.has(value, Context::Type::Standalone)};
const auto shared{this->context_.find(value, Context::Type::Standalone)};

// (1) Write 0x00 if shared, else do nothing
if (is_shared) {
if (shared.has_value()) {
this->put_byte(0);
}

// (2) Write length of the string + 1 (so it will never be zero)
this->put_byte(static_cast<std::uint8_t>(size - options.minimum + 1));

// (3) Write relative offset if shared, else write plain string
if (is_shared) {
this->put_varint(this->position() -
this->context_.offset(value, Context::Type::Standalone));
if (shared.has_value()) {
this->put_varint(this->position() - shared.value());
} else {
this->context_.record(value, this->position(), Context::Type::Standalone);
this->put_string_utf8(value, size);
Expand Down Expand Up @@ -129,12 +126,12 @@ auto Encoder::PREFIX_VARINT_LENGTH_STRING_SHARED(
assert(document.is_string());
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};

if (this->context_.has(value, Context::Type::PrefixLengthVarintPlusOne)) {
const auto shared{
this->context_.find(value, Context::Type::PrefixLengthVarintPlusOne)};
if (shared.has_value()) {
const auto new_offset{this->position()};
this->put_byte(0);
this->put_varint(
this->position() -
this->context_.offset(value, Context::Type::PrefixLengthVarintPlusOne));
this->put_varint(this->position() - shared.value());
// Bump the context cache for locality purposes
this->context_.record(value, new_offset,
Context::Type::PrefixLengthVarintPlusOne);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

#include <sourcemeta/jsontoolkit/json.h>

#include <cassert> // assert
#include <iterator> // std::cbegin, std::cend
#include <map> // std::map
#include <stdexcept> // std::logic_error
#include <utility> // std::pair, std::make_pair
#include <cassert> // assert
#include <iterator> // std::cbegin, std::cend
#include <map> // std::map
#include <optional> // std::optional, std::nullopt
#include <utility> // std::pair, std::make_pair

// Encoding a shared string has some overhead, such as the
// shared string marker + the offset, so its not worth
Expand Down Expand Up @@ -47,7 +47,8 @@ class SOURCEMETA_JSONBINPACK_RUNTIME_EXPORT Context {

// If the string already exists, we want to
// bump the offset for locality purposes.
if (this->has(value, type)) {
const auto maybe_entry{this->find(value, type)};
if (maybe_entry.has_value()) {
const auto key{std::make_pair(value, type)};
const auto previous_offset{this->strings[key]};
if (offset > previous_offset) {
Expand Down Expand Up @@ -79,16 +80,14 @@ class SOURCEMETA_JSONBINPACK_RUNTIME_EXPORT Context {
this->offsets.erase(iterator);
}

auto has(const sourcemeta::jsontoolkit::JSON::String &value,
const Type type) const -> bool {
return this->strings.contains(std::make_pair(value, type));
}
auto find(const sourcemeta::jsontoolkit::JSON::String &value,
const Type type) const -> std::optional<std::uint64_t> {
const auto result{this->strings.find(std::make_pair(value, type))};
if (result == this->strings.cend()) {
return std::nullopt;
}

auto offset(const sourcemeta::jsontoolkit::JSON::String &value,
const Type type) const -> std::uint64_t {
// This method assumes the value indeed exists for performance reasons
assert(this->has(value, type));
return this->strings.at(std::make_pair(value, type));
return result->second;
}

private:
Expand Down
109 changes: 65 additions & 44 deletions test/runtime/encode_context_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,59 @@
TEST(JSONBinPack_Encoder, context_record_string) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
EXPECT_FALSE(context.has("foo", ContextType::Standalone));
const auto result_1{context.find("foo", ContextType::Standalone)};
EXPECT_FALSE(result_1.has_value());
context.record("foo", 2, ContextType::Standalone);
EXPECT_TRUE(context.has("foo", ContextType::Standalone));
EXPECT_EQ(context.offset("foo", ContextType::Standalone), 2);
const auto result_2{context.find("foo", ContextType::Standalone)};
EXPECT_TRUE(result_2.has_value());
EXPECT_EQ(result_2.value(), 2);
}

TEST(JSONBinPack_Encoder, context_record_string_too_short) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
EXPECT_FALSE(context.has("fo", ContextType::Standalone));
const auto result_1{context.find("fo", ContextType::Standalone)};
EXPECT_FALSE(result_1.has_value());
context.record("fo", 2, ContextType::Standalone);
EXPECT_FALSE(context.has("fo", ContextType::Standalone));
const auto result_2{context.find("fo", ContextType::Standalone)};
EXPECT_FALSE(result_2.has_value());
}

TEST(JSONBinPack_Encoder, context_record_string_empty) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
EXPECT_FALSE(context.has("", ContextType::Standalone));
const auto result_1{context.find("fo", ContextType::Standalone)};
EXPECT_FALSE(result_1.has_value());
context.record("", 2, ContextType::Standalone);
EXPECT_FALSE(context.has("", ContextType::Standalone));
const auto result_2{context.find("", ContextType::Standalone)};
EXPECT_FALSE(result_2.has_value());
}

TEST(JSONBinPack_Encoder, context_has_on_unknown_string) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
EXPECT_FALSE(context.has("foobarbaz", ContextType::Standalone));
const auto result{context.find("foobarbaz", ContextType::Standalone)};
EXPECT_FALSE(result.has_value());
}

TEST(JSONBinPack_Encoder, context_increase_offset) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
context.record("foo", 2, ContextType::Standalone);
context.record("foo", 4, ContextType::Standalone);
EXPECT_EQ(context.offset("foo", ContextType::Standalone), 4);
const auto result{context.find("foo", ContextType::Standalone)};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), 4);
}

TEST(JSONBinPack_Encoder, context_do_not_decrease_offset) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
context.record("foo", 4, ContextType::Standalone);
context.record("foo", 2, ContextType::Standalone);
EXPECT_EQ(context.offset("foo", ContextType::Standalone), 4);
const auto result{context.find("foo", ContextType::Standalone)};
EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), 4);
}

TEST(JSONBinPack_Encoder, context_not_record_too_big) {
Expand All @@ -60,7 +71,8 @@ TEST(JSONBinPack_Encoder, context_not_record_too_big) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
context.record(too_big, 1, ContextType::Standalone);
EXPECT_FALSE(context.has(too_big, ContextType::Standalone));
const auto result{context.find(too_big, ContextType::Standalone)};
EXPECT_FALSE(result.has_value());
}

TEST(JSONBinPack_Encoder, context_remove_oldest) {
Expand All @@ -70,27 +82,31 @@ TEST(JSONBinPack_Encoder, context_remove_oldest) {
context.record("bar", 3, ContextType::Standalone);
context.record("baz", 7, ContextType::PrefixLengthVarintPlusOne);

EXPECT_TRUE(context.has("foo", ContextType::Standalone));
EXPECT_TRUE(context.has("bar", ContextType::Standalone));
EXPECT_TRUE(context.has("baz", ContextType::PrefixLengthVarintPlusOne));
EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value());
EXPECT_TRUE(context.find("bar", ContextType::Standalone).has_value());
EXPECT_TRUE(
context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value());

context.remove_oldest();

EXPECT_TRUE(context.has("foo", ContextType::Standalone));
EXPECT_FALSE(context.has("bar", ContextType::Standalone));
EXPECT_TRUE(context.has("baz", ContextType::PrefixLengthVarintPlusOne));
EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value());
EXPECT_FALSE(context.find("bar", ContextType::Standalone).has_value());
EXPECT_TRUE(
context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value());

context.remove_oldest();

EXPECT_TRUE(context.has("foo", ContextType::Standalone));
EXPECT_FALSE(context.has("bar", ContextType::Standalone));
EXPECT_FALSE(context.has("baz", ContextType::PrefixLengthVarintPlusOne));
EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value());
EXPECT_FALSE(context.find("bar", ContextType::Standalone).has_value());
EXPECT_FALSE(
context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value());

context.remove_oldest();

EXPECT_FALSE(context.has("foo", ContextType::Standalone));
EXPECT_FALSE(context.has("bar", ContextType::Standalone));
EXPECT_FALSE(context.has("baz", ContextType::PrefixLengthVarintPlusOne));
EXPECT_FALSE(context.find("foo", ContextType::Standalone).has_value());
EXPECT_FALSE(context.find("bar", ContextType::Standalone).has_value());
EXPECT_FALSE(
context.find("baz", ContextType::PrefixLengthVarintPlusOne).has_value());
}

TEST(JSONBinPack_Encoder, context_is_a_circular_buffer) {
Expand All @@ -117,27 +133,27 @@ TEST(JSONBinPack_Encoder, context_is_a_circular_buffer) {
context.record(string_3, length * 2, ContextType::Standalone);
context.record(string_4, length * 3, ContextType::Standalone);

EXPECT_TRUE(context.has(string_1, ContextType::Standalone));
EXPECT_TRUE(context.has(string_2, ContextType::Standalone));
EXPECT_TRUE(context.has(string_3, ContextType::Standalone));
EXPECT_TRUE(context.has(string_4, ContextType::Standalone));
EXPECT_TRUE(context.find(string_1, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_2, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_3, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_4, ContextType::Standalone).has_value());

context.record(string_5, length * 4, ContextType::Standalone);

EXPECT_FALSE(context.has(string_1, ContextType::Standalone));
EXPECT_TRUE(context.has(string_2, ContextType::Standalone));
EXPECT_TRUE(context.has(string_3, ContextType::Standalone));
EXPECT_TRUE(context.has(string_4, ContextType::Standalone));
EXPECT_TRUE(context.has(string_5, ContextType::Standalone));
EXPECT_FALSE(context.find(string_1, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_2, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_3, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_4, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_5, ContextType::Standalone).has_value());

context.record(string_6, length * 5, ContextType::Standalone);

EXPECT_FALSE(context.has(string_1, ContextType::Standalone));
EXPECT_FALSE(context.has(string_2, ContextType::Standalone));
EXPECT_TRUE(context.has(string_3, ContextType::Standalone));
EXPECT_TRUE(context.has(string_4, ContextType::Standalone));
EXPECT_TRUE(context.has(string_5, ContextType::Standalone));
EXPECT_TRUE(context.has(string_6, ContextType::Standalone));
EXPECT_FALSE(context.find(string_1, ContextType::Standalone).has_value());
EXPECT_FALSE(context.find(string_2, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_3, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_4, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_5, ContextType::Standalone).has_value());
EXPECT_TRUE(context.find(string_6, ContextType::Standalone).has_value());
}

TEST(JSONBinPack_Encoder, context_same_string_different_type) {
Expand All @@ -146,17 +162,22 @@ TEST(JSONBinPack_Encoder, context_same_string_different_type) {
context.record("foo", 10, ContextType::Standalone);
context.record("foo", 20, ContextType::PrefixLengthVarintPlusOne);

EXPECT_TRUE(context.has("foo", ContextType::Standalone));
EXPECT_TRUE(context.has("foo", ContextType::PrefixLengthVarintPlusOne));
const auto result_1{context.find("foo", ContextType::Standalone)};
const auto result_2{
context.find("foo", ContextType::PrefixLengthVarintPlusOne)};

EXPECT_EQ(context.offset("foo", ContextType::Standalone), 10);
EXPECT_EQ(context.offset("foo", ContextType::PrefixLengthVarintPlusOne), 20);
EXPECT_TRUE(result_1.has_value());
EXPECT_TRUE(result_2.has_value());

EXPECT_EQ(result_1.value(), 10);
EXPECT_EQ(result_2.value(), 20);
}

TEST(JSONBinPack_Encoder, context_no_fallback_type) {
sourcemeta::jsonbinpack::Context context;
using ContextType = sourcemeta::jsonbinpack::Context::Type;
context.record("foo", 10, ContextType::Standalone);
EXPECT_TRUE(context.has("foo", ContextType::Standalone));
EXPECT_FALSE(context.has("foo", ContextType::PrefixLengthVarintPlusOne));
EXPECT_TRUE(context.find("foo", ContextType::Standalone).has_value());
EXPECT_FALSE(
context.find("foo", ContextType::PrefixLengthVarintPlusOne).has_value());
}

0 comments on commit d2a13f4

Please sign in to comment.