Skip to content

Commit

Permalink
Correctly encode unicode strings
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
  • Loading branch information
jviotti committed Oct 9, 2024
1 parent 5b0b335 commit caf45ab
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/runtime/encoder_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ auto Encoder::FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED(
assert(document.is_string());
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
const auto size{value.size()};
assert(document.size() == size);
assert(document.byte_size() == size);
const auto shared{this->cache_.find(value, Cache::Type::Standalone)};

// (1) Write 0x00 if shared, else do nothing
Expand All @@ -46,7 +46,7 @@ auto Encoder::ROOF_VARINT_PREFIX_UTF8_STRING_SHARED(
assert(document.is_string());
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
const auto size{value.size()};
assert(document.size() == size);
assert(document.byte_size() == size);
assert(size <= options.maximum);
const auto shared{this->cache_.find(value, Cache::Type::Standalone)};

Expand All @@ -73,7 +73,7 @@ auto Encoder::BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED(
assert(document.is_string());
const sourcemeta::jsontoolkit::JSON::String value{document.to_string()};
const auto size{value.size()};
assert(document.size() == size);
assert(document.byte_size() == size);
assert(options.minimum <= options.maximum);
assert(is_byte(options.maximum - options.minimum + 1));
assert(is_within(size, options.minimum, options.maximum));
Expand Down Expand Up @@ -137,7 +137,7 @@ auto Encoder::PREFIX_VARINT_LENGTH_STRING_SHARED(
Cache::Type::PrefixLengthVarintPlusOne);
} else {
const auto size{value.size()};
assert(document.size() == size);
assert(document.byte_size() == size);
this->cache_.record(value, this->position(),
Cache::Type::PrefixLengthVarintPlusOne);
this->put_varint(size + 1);
Expand Down
32 changes: 32 additions & 0 deletions test/runtime/decode_string_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ TEST(JSONBinPack_Decoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_foo_0_foo_3) {
EXPECT_EQ(result2, expected);
}

TEST(JSONBinPack_Decoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_1) {
InputByteStream stream{0x04, 0x66, 0x6f, 0xc3, 0xb8};
sourcemeta::jsonbinpack::Decoder decoder{stream};
const auto result = decoder.FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED({1});
const sourcemeta::jsontoolkit::JSON expected{"foø"};
EXPECT_EQ(result, expected);
}

TEST(JSONBinPack_Decoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_4) {
InputByteStream stream{0x02, 0x66, 0x6f, 0x6f};
sourcemeta::jsonbinpack::Decoder decoder{stream};
Expand All @@ -61,6 +69,14 @@ TEST(JSONBinPack_Decoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_3_foo_5) {
EXPECT_EQ(result2, expected);
}

TEST(JSONBinPack_Decoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_4) {
InputByteStream stream{0x01, 0x66, 0x6f, 0xc3, 0xb8};
sourcemeta::jsonbinpack::Decoder decoder{stream};
const auto result = decoder.FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED({4});
const sourcemeta::jsontoolkit::JSON expected{"foø"};
EXPECT_EQ(result, expected);
}

TEST(JSONBinPack_Decoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_foo_3_5) {
InputByteStream stream{0x01, 0x66, 0x6f, 0x6f};
sourcemeta::jsonbinpack::Decoder decoder{stream};
Expand Down Expand Up @@ -94,6 +110,14 @@ TEST(JSONBinPack_Decoder,
EXPECT_EQ(result2, expected);
}

TEST(JSONBinPack_Decoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_unicode_0_6) {
InputByteStream stream{0x05, 0x66, 0x6f, 0xc3, 0xb8};
sourcemeta::jsonbinpack::Decoder decoder{stream};
const auto result = decoder.BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED({0, 6});
const sourcemeta::jsontoolkit::JSON expected{"foø"};
EXPECT_EQ(result, expected);
}

TEST(JSONBinPack_Decoder, RFC3339_DATE_INTEGER_TRIPLET_2014_10_01) {
InputByteStream stream{0xde, 0x07, 0x0a, 0x01};
sourcemeta::jsonbinpack::Decoder decoder{stream};
Expand Down Expand Up @@ -155,3 +179,11 @@ TEST(JSONBinPack_Decoder,
EXPECT_EQ(result1, expected);
EXPECT_EQ(result2, expected);
}

TEST(JSONBinPack_Decoder, PREFIX_VARINT_LENGTH_STRING_SHARED_unicode) {
InputByteStream stream{0x05, 0x66, 0x6f, 0xc3, 0xb8};
sourcemeta::jsonbinpack::Decoder decoder{stream};
const auto result = decoder.PREFIX_VARINT_LENGTH_STRING_SHARED({});
const sourcemeta::jsontoolkit::JSON expected{"foø"};
EXPECT_EQ(result, expected);
}
32 changes: 32 additions & 0 deletions test/runtime/encode_string_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ TEST(JSONBinPack_Encoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_foo_0_foo_3) {
EXPECT_BYTES(stream, {0x04, 0x66, 0x6f, 0x6f, 0x00, 0x01, 0x05});
}

TEST(JSONBinPack_Encoder, FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_1) {
const sourcemeta::jsontoolkit::JSON document{"foø"};
OutputByteStream stream{};
sourcemeta::jsonbinpack::Encoder encoder{stream};
encoder.FLOOR_VARINT_PREFIX_UTF8_STRING_SHARED(document, {1});
EXPECT_BYTES(stream, {0x04, 0x66, 0x6f, 0xc3, 0xb8});
}

TEST(JSONBinPack_Encoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_4) {
const sourcemeta::jsontoolkit::JSON document{"foo"};
OutputByteStream stream{};
Expand All @@ -46,6 +54,14 @@ TEST(JSONBinPack_Encoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_foo_3_foo_5) {
EXPECT_BYTES(stream, {0x01, 0x66, 0x6f, 0x6f, 0x00, 0x03, 0x05});
}

TEST(JSONBinPack_Encoder, ROOF_VARINT_PREFIX_UTF8_STRING_SHARED_unicode_4) {
const sourcemeta::jsontoolkit::JSON document{"foø"};
OutputByteStream stream{};
sourcemeta::jsonbinpack::Encoder encoder{stream};
encoder.ROOF_VARINT_PREFIX_UTF8_STRING_SHARED(document, {4});
EXPECT_BYTES(stream, {0x01, 0x66, 0x6f, 0xc3, 0xb8});
}

TEST(JSONBinPack_Encoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_foo_3_5) {
const sourcemeta::jsontoolkit::JSON document{"foo"};
OutputByteStream stream{};
Expand All @@ -72,6 +88,14 @@ TEST(JSONBinPack_Encoder,
EXPECT_BYTES(stream, {0x04, 0x66, 0x6f, 0x6f, 0x00, 0x01, 0x05});
}

TEST(JSONBinPack_Encoder, BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED_unicode_0_6) {
const sourcemeta::jsontoolkit::JSON document{"foø"};
OutputByteStream stream{};
sourcemeta::jsonbinpack::Encoder encoder{stream};
encoder.BOUNDED_8BIT_PREFIX_UTF8_STRING_SHARED(document, {0, 6});
EXPECT_BYTES(stream, {0x05, 0x66, 0x6f, 0xc3, 0xb8});
}

TEST(JSONBinPack_Encoder, RFC3339_DATE_INTEGER_TRIPLET_2014_10_01) {
const sourcemeta::jsontoolkit::JSON document{"2014-10-01"};
OutputByteStream stream{};
Expand Down Expand Up @@ -144,3 +168,11 @@ TEST(JSONBinPack_Encoder,
0x05 // Pointer (6 - 1 = 5)
});
}

TEST(JSONBinPack_Encoder, PREFIX_VARINT_LENGTH_STRING_SHARED_unicode) {
const sourcemeta::jsontoolkit::JSON document{"foø"};
OutputByteStream stream{};
sourcemeta::jsonbinpack::Encoder encoder{stream};
encoder.PREFIX_VARINT_LENGTH_STRING_SHARED(document, {});
EXPECT_BYTES(stream, {0x05, 0x66, 0x6f, 0xc3, 0xb8});
}

0 comments on commit caf45ab

Please sign in to comment.