Skip to content

Commit

Permalink
Update vendor
Browse files Browse the repository at this point in the history
  • Loading branch information
tekezo committed Apr 15, 2023
1 parent 5a8d9f6 commit 2646974
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ namespace utf8
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
invalid_utf8 (char c) : u8(static_cast<uint8_t>(c)) {}
virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
Expand All @@ -75,24 +76,7 @@ namespace utf8
if (!utf8::internal::is_code_point_valid(cp))
throw invalid_code_point(cp);

if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
return internal::append(cp, result);
}

template <typename octet_iterator, typename output_iterator>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,55 @@ namespace internal
return utf8::internal::validate_next(it, end, ignored);
}

// Internal implementation of both checked and unchecked append() function
// This function will be invoked by the overloads below, as they will know
// the octet_type.
template <typename octet_iterator, typename octet_type>
octet_iterator append(uint32_t cp, octet_iterator result) {
if (cp < 0x80) // one octet
*(result++) = static_cast<octet_type>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<octet_type>((cp >> 6) | 0xc0);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<octet_type>((cp >> 12) | 0xe0);
*(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<octet_type>((cp >> 18) | 0xf0);
*(result++) = static_cast<octet_type>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
return result;
}

// One of the following overloads will be invoked from the API calls

// A simple (but dangerous) case: the caller appends byte(s) to a char array
inline char* append(uint32_t cp, char* result) {
return append<char*, char>(cp, result);
}

// Hopefully, most common case: the caller uses back_inserter
// i.e. append(cp, std::back_inserter(str));
template<typename container_type>
std::back_insert_iterator<container_type> append
(uint32_t cp, std::back_insert_iterator<container_type> result) {
return append<std::back_insert_iterator<container_type>,
typename container_type::value_type>(cp, result);
}

// The caller uses some other kind of output operator - not covered above
// Note that in this case we are not able to determine octet_type
// so we assume it's uint_8; that can cause a conversion warning if we are wrong.
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result) {
return append<octet_iterator, uint8_t>(cp, result);
}

} // namespace internal

/// The library API - functions intended to be called by the users
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,7 @@ namespace utf8
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
return internal::append(cp, result);
}

template <typename octet_iterator, typename output_iterator>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ namespace utf8
uint8_t u8;
public:
invalid_utf8 (uint8_t u) : u8(u) {}
invalid_utf8 (char c) : u8(static_cast<uint8_t>(c)) {}
virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; }
uint8_t utf8_octet() const {return u8;}
};
Expand All @@ -75,24 +76,7 @@ namespace utf8
if (!utf8::internal::is_code_point_valid(cp))
throw invalid_code_point(cp);

if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
return internal::append(cp, result);
}

template <typename octet_iterator, typename output_iterator>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,55 @@ namespace internal
return utf8::internal::validate_next(it, end, ignored);
}

// Internal implementation of both checked and unchecked append() function
// This function will be invoked by the overloads below, as they will know
// the octet_type.
template <typename octet_iterator, typename octet_type>
octet_iterator append(uint32_t cp, octet_iterator result) {
if (cp < 0x80) // one octet
*(result++) = static_cast<octet_type>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<octet_type>((cp >> 6) | 0xc0);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<octet_type>((cp >> 12) | 0xe0);
*(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<octet_type>((cp >> 18) | 0xf0);
*(result++) = static_cast<octet_type>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<octet_type>((cp & 0x3f) | 0x80);
}
return result;
}

// One of the following overloads will be invoked from the API calls

// A simple (but dangerous) case: the caller appends byte(s) to a char array
inline char* append(uint32_t cp, char* result) {
return append<char*, char>(cp, result);
}

// Hopefully, most common case: the caller uses back_inserter
// i.e. append(cp, std::back_inserter(str));
template<typename container_type>
std::back_insert_iterator<container_type> append
(uint32_t cp, std::back_insert_iterator<container_type> result) {
return append<std::back_insert_iterator<container_type>,
typename container_type::value_type>(cp, result);
}

// The caller uses some other kind of output operator - not covered above
// Note that in this case we are not able to determine octet_type
// so we assume it's uint_8; that can cause a conversion warning if we are wrong.
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result) {
return append<octet_iterator, uint8_t>(cp, result);
}

} // namespace internal

/// The library API - functions intended to be called by the users
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,7 @@ namespace utf8
template <typename octet_iterator>
octet_iterator append(uint32_t cp, octet_iterator result)
{
if (cp < 0x80) // one octet
*(result++) = static_cast<uint8_t>(cp);
else if (cp < 0x800) { // two octets
*(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else if (cp < 0x10000) { // three octets
*(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
else { // four octets
*(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
*(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
*(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
*(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
}
return result;
return internal::append(cp, result);
}

template <typename octet_iterator, typename output_iterator>
Expand Down

0 comments on commit 2646974

Please sign in to comment.