diff --git a/FireDog/config.h b/FireDog/config.h index 29bfeb1..1d5f4dd 100644 --- a/FireDog/config.h +++ b/FireDog/config.h @@ -3,7 +3,7 @@ //main version -#define FIREDOG_VERSION "1.3.1" +#define FIREDOG_VERSION "1.3.3" //feature library version #define FIREDOG_FEATURE_LIBRARY_VERSION "1.2.1" diff --git a/FireDog/featurelibrary.cpp b/FireDog/featurelibrary.cpp index a7b0c38..21ef8fb 100644 --- a/FireDog/featurelibrary.cpp +++ b/FireDog/featurelibrary.cpp @@ -1,3 +1,5 @@ +#pragma warning (disable: 4996) + #include "featurelibrary.h" using namespace nlohmann; diff --git a/FireDog/yaml/yaml.hpp b/FireDog/yaml/yaml.hpp index a790ed8..ff077e6 100644 --- a/FireDog/yaml/yaml.hpp +++ b/FireDog/yaml/yaml.hpp @@ -1,6 +1,5 @@ #ifndef _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ #define _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ - // // Rapid YAML - a library to parse and emit YAML, and do it fast. // @@ -343,10 +342,10 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); # else # error "Unknown Apple platform" # endif -#elif defined(__linux) +#elif defined(__linux__) || defined(__linux) # define C4_UNIX # define C4_LINUX -#elif defined(__unix) +#elif defined(__unix__) || defined(__unix) # define C4_UNIX #elif defined(__arm__) || defined(__aarch64__) # define C4_ARM @@ -356,7 +355,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); # error "unknown platform" #endif -#if defined(__posix) || defined(__unix__) || defined(__linux) +#if defined(__posix) || defined(C4_UNIX) || defined(C4_LINUX) # define C4_POSIX #endif @@ -420,21 +419,25 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); #else #define C4_CPU_ARM #define C4_WORDSIZE 4 - #if defined(__ARM_ARCH_8__) || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8) + #if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) \ + || (defined(__ARCH_ARM) && __ARCH_ARM >= 8) + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8) \ #define C4_CPU_ARMV8 #elif defined(__ARM_ARCH_7__) || defined(_ARM_ARCH_7) \ || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \ || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) \ + || defined(__ARM_ARCH_7EM__) \ || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 7) \ || (defined(_M_ARM) && _M_ARM >= 7) #define C4_CPU_ARMV7 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) \ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) \ - || defined(__ARM_ARCH_6M__) \ + || defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6KZ__) \ || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 6) #define C4_CPU_ARMV6 #elif defined(__ARM_ARCH_5TEJ__) \ + || defined(__ARM_ARCH_5TE__) \ || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 5) #define C4_CPU_ARMV5 #elif defined(__ARM_ARCH_4T__) \ @@ -636,9 +639,9 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); // amalgamate: removed include of // https://github.com/biojppm/c4core/src/c4/gcc-4.8.hpp //# include "c4/gcc-4.8.hpp" -#if !defined(C4_GCC_4_8_HPP_) && !defined(_C4_GCC_4_8_HPP_) -#error "amalgamate: file c4/gcc-4.8.hpp must have been included at this point" -#endif /* C4_GCC_4_8_HPP_ */ +// #if !defined(C4_GCC_4_8_HPP_) && !defined(_C4_GCC_4_8_HPP_) +// #error "amalgamate: file c4/gcc-4.8.hpp must have been included at this point" +// #endif /* C4_GCC_4_8_HPP_ */ # else // we do not support GCC < 4.8: @@ -960,6 +963,8 @@ typedef long double max_align_t ; # define C4_RESTRICT_FN __attribute__((restrict)) # define C4_NO_INLINE __attribute__((noinline)) # define C4_ALWAYS_INLINE inline __attribute__((always_inline)) +# define C4_CONST __attribute__((const)) +# define C4_PURE __attribute__((pure)) /** force inlining of every callee function */ # define C4_FLATTEN __atribute__((flatten)) /** mark a function as hot, ie as having a visible impact in CPU time @@ -981,6 +986,8 @@ typedef long double max_align_t ; # define C4_NO_INLINE __declspec(noinline) # define C4_ALWAYS_INLINE inline __forceinline /** these are not available in VS AFAIK */ +# define C4_CONST +# define C4_PURE # define C4_FLATTEN # define C4_HOT /** @todo */ # define C4_COLD /** @todo */ @@ -1667,18 +1674,18 @@ using index_sequence_for = make_index_sequence; //******************************************************************************** /* Copyright (c) 2011-2021, Scott Tsai - * + * * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: - * + * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -1709,96 +1716,96 @@ extern "C" { #define DEBUG_BREAK_USE_SIGTRAP 3 #if defined(__i386__) || defined(__x86_64__) - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION __inline__ static void trap_instruction(void) { - __asm__ volatile("int $0x03"); + __asm__ volatile("int $0x03"); } #elif defined(__thumb__) - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION /* FIXME: handle __THUMB_INTERWORK__ */ __attribute__((always_inline)) __inline__ static void trap_instruction(void) { - /* See 'arm-linux-tdep.c' in GDB source. - * Both instruction sequences below work. */ + /* See 'arm-linux-tdep.c' in GDB source. + * Both instruction sequences below work. */ #if 1 - /* 'eabi_linux_thumb_le_breakpoint' */ - __asm__ volatile(".inst 0xde01"); + /* 'eabi_linux_thumb_le_breakpoint' */ + __asm__ volatile(".inst 0xde01"); #else - /* 'eabi_linux_thumb2_le_breakpoint' */ - __asm__ volatile(".inst.w 0xf7f0a000"); + /* 'eabi_linux_thumb2_le_breakpoint' */ + __asm__ volatile(".inst.w 0xf7f0a000"); #endif - /* Known problem: - * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. - * 'step' would keep getting stuck on the same instruction. - * - * Workaround: use the new GDB commands 'debugbreak-step' and - * 'debugbreak-continue' that become available - * after you source the script from GDB: - * - * $ gdb -x debugbreak-gdb.py <... USUAL ARGUMENTS ...> - * - * 'debugbreak-step' would jump over the breakpoint instruction with - * roughly equivalent of: - * (gdb) set $instruction_len = 2 - * (gdb) tbreak *($pc + $instruction_len) - * (gdb) jump *($pc + $instruction_len) - */ + /* Known problem: + * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. + * 'step' would keep getting stuck on the same instruction. + * + * Workaround: use the new GDB commands 'debugbreak-step' and + * 'debugbreak-continue' that become available + * after you source the script from GDB: + * + * $ gdb -x debugbreak-gdb.py <... USUAL ARGUMENTS ...> + * + * 'debugbreak-step' would jump over the breakpoint instruction with + * roughly equivalent of: + * (gdb) set $instruction_len = 2 + * (gdb) tbreak *($pc + $instruction_len) + * (gdb) jump *($pc + $instruction_len) + */ } #elif defined(__arm__) && !defined(__thumb__) - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION __attribute__((always_inline)) __inline__ static void trap_instruction(void) { - /* See 'arm-linux-tdep.c' in GDB source, - * 'eabi_linux_arm_le_breakpoint' */ - __asm__ volatile(".inst 0xe7f001f0"); - /* Known problem: - * Same problem and workaround as Thumb mode */ + /* See 'arm-linux-tdep.c' in GDB source, + * 'eabi_linux_arm_le_breakpoint' */ + __asm__ volatile(".inst 0xe7f001f0"); + /* Known problem: + * Same problem and workaround as Thumb mode */ } #elif defined(__aarch64__) && defined(__APPLE__) - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_BULTIN_DEBUGTRAP + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_BULTIN_DEBUGTRAP #elif defined(__aarch64__) - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION __attribute__((always_inline)) __inline__ static void trap_instruction(void) { - /* See 'aarch64-tdep.c' in GDB source, - * 'aarch64_default_breakpoint' */ - __asm__ volatile(".inst 0xd4200000"); + /* See 'aarch64-tdep.c' in GDB source, + * 'aarch64_default_breakpoint' */ + __asm__ volatile(".inst 0xd4200000"); } #elif defined(__powerpc__) - /* PPC 32 or 64-bit, big or little endian */ - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION + /* PPC 32 or 64-bit, big or little endian */ + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION __attribute__((always_inline)) __inline__ static void trap_instruction(void) { - /* See 'rs6000-tdep.c' in GDB source, - * 'rs6000_breakpoint' */ - __asm__ volatile(".4byte 0x7d821008"); + /* See 'rs6000-tdep.c' in GDB source, + * 'rs6000_breakpoint' */ + __asm__ volatile(".4byte 0x7d821008"); - /* Known problem: - * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. - * 'step' stuck on the same instruction ("twge r2,r2"). - * - * The workaround is the same as ARM Thumb mode: use debugbreak-gdb.py - * or manually jump over the instruction. */ + /* Known problem: + * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. + * 'step' stuck on the same instruction ("twge r2,r2"). + * + * The workaround is the same as ARM Thumb mode: use debugbreak-gdb.py + * or manually jump over the instruction. */ } #elif defined(__riscv) - /* RISC-V 32 or 64-bit, whether the "C" extension - * for compressed, 16-bit instructions are supported or not */ - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION + /* RISC-V 32 or 64-bit, whether the "C" extension + * for compressed, 16-bit instructions are supported or not */ + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION __attribute__((always_inline)) __inline__ static void trap_instruction(void) { - /* See 'riscv-tdep.c' in GDB source, - * 'riscv_sw_breakpoint_from_kind' */ - __asm__ volatile(".4byte 0x00100073"); + /* See 'riscv-tdep.c' in GDB source, + * 'riscv_sw_breakpoint_from_kind' */ + __asm__ volatile(".4byte 0x00100073"); } #else - #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_SIGTRAP + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_SIGTRAP #endif @@ -1808,26 +1815,26 @@ __inline__ static void trap_instruction(void) __attribute__((always_inline)) __inline__ static void debug_break(void) { - trap_instruction(); + trap_instruction(); } #elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_BULTIN_DEBUGTRAP __attribute__((always_inline)) __inline__ static void debug_break(void) { - __builtin_debugtrap(); + __builtin_debugtrap(); } #elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_BULTIN_TRAP __attribute__((always_inline)) __inline__ static void debug_break(void) { - __builtin_trap(); + __builtin_trap(); } #elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_SIGTRAP #include __attribute__((always_inline)) __inline__ static void debug_break(void) { - raise(SIGTRAP); + raise(SIGTRAP); } #else #error "invalid DEBUG_BREAK_IMPL value" @@ -1919,7 +1926,7 @@ struct fail_type__ {}; #endif // _DOXYGEN_ -#ifdef NDEBUG +#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK) # define C4_DEBUG_BREAK() #else # ifdef __clang__ @@ -2347,7 +2354,7 @@ struct srcloc //included above: //#include -#if (defined(__GNUC__) && __GNUC_MAJOR >= 10) || defined(__has_builtin) +#if (defined(__GNUC__) && __GNUC__ >= 10) || defined(__has_builtin) #define _C4_USE_LSB_INTRINSIC(which) __has_builtin(which) #define _C4_USE_MSB_INTRINSIC(which) __has_builtin(which) #elif defined(C4_MSVC) @@ -2382,7 +2389,11 @@ C4_ALWAYS_INLINE void mem_zero(T* mem) memset(mem, 0, sizeof(T)); } -bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb); +C4_ALWAYS_INLINE C4_CONST bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb) +{ + // thanks @timwynants + return (((const char*)b + szb) > a && b < ((const char*)a+sza)); +} void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times); @@ -2392,9 +2403,9 @@ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num //----------------------------------------------------------------------------- template -bool is_aligned(T *ptr, size_t alignment=alignof(T)) +C4_ALWAYS_INLINE C4_CONST bool is_aligned(T *ptr, uintptr_t alignment=alignof(T)) { - return (uintptr_t(ptr) & (alignment - 1)) == 0u; + return (uintptr_t(ptr) & (alignment - uintptr_t(1))) == uintptr_t(0); } @@ -2793,6 +2804,118 @@ struct msb11 #undef _C4_USE_LSB_INTRINSIC #undef _C4_USE_MSB_INTRINSIC +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// there is an implicit conversion below; it happens when E or B are +// narrower than int, and thus any operation will upcast the result to +// int, and then downcast to assign +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wconversion") + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + if(exponent >= 0) + { + for(E e = 0; e < exponent; ++e) + r *= base; + } + else + { + exponent *= E(-1); + for(E e = 0; e < exponent; ++e) + r /= base; + } + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + if(exponent >= 0) + { + for(E e = 0; e < exponent; ++e) + r *= base; + } + else + { + exponent *= E(-1); + for(E e = 0; e < exponent; ++e) + r /= base; + } + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + B bbase = B(base); + if(exponent >= 0) + { + for(E e = 0; e < exponent; ++e) + r *= bbase; + } + else + { + exponent *= E(-1); + for(E e = 0; e < exponent; ++e) + r /= bbase; + } + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + for(E e = 0; e < exponent; ++e) + r *= base; + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + for(E e = 0; e < exponent; ++e) + r *= base; + return r; +} +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + B bbase = B(base); + for(E e = 0; e < exponent; ++e) + r *= bbase; + return r; +} + +C4_SUPPRESS_WARNING_GCC_CLANG_POP + + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -3139,21 +3262,21 @@ struct MemoryResource const char *name = nullptr; virtual ~MemoryResource() {} - void* allocate(size_t sz, size_t alignment=alignof(max_align_t), void *hint=nullptr) + void* allocate(size_t sz, size_t alignment=alignof(std::max_align_t), void *hint=nullptr) { void *mem = this->do_allocate(sz, alignment, hint); C4_CHECK_MSG(mem != nullptr, "could not allocate %lu bytes", sz); return mem; } - void* reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment=alignof(max_align_t)) + void* reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment=alignof(std::max_align_t)) { void *mem = this->do_reallocate(ptr, oldsz, newsz, alignment); C4_CHECK_MSG(mem != nullptr, "could not reallocate from %lu to %lu bytes", oldsz, newsz); return mem; } - void deallocate(void* ptr, size_t sz, size_t alignment=alignof(max_align_t)) + void deallocate(void* ptr, size_t sz, size_t alignment=alignof(std::max_align_t)) { this->do_deallocate(ptr, sz, alignment); } @@ -3389,7 +3512,7 @@ struct MemoryResourceLinearArr : public MemoryResourceLinear #pragma warning(push) #pragma warning(disable: 4324) // structure was padded due to alignment specifier #endif - alignas(alignof(max_align_t)) char m_arr[N]; + alignas(alignof(std::max_align_t)) char m_arr[N]; #ifdef _MSC_VER #pragma warning(pop) #endif @@ -5174,28 +5297,28 @@ struct C4CORE_EXPORT basic_substring /** @name Standard accessor methods */ /** @{ */ - bool has_str() const { return ! empty() && str[0] != C(0); } - bool empty() const { return (len == 0 || str == nullptr); } - bool not_empty() const { return (len != 0 && str != nullptr); } - size_t size() const { return len; } + C4_ALWAYS_INLINE C4_PURE bool has_str() const noexcept { return ! empty() && str[0] != C(0); } + C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { return (len == 0 || str == nullptr); } + C4_ALWAYS_INLINE C4_PURE bool not_empty() const noexcept { return (len != 0 && str != nullptr); } + C4_ALWAYS_INLINE C4_PURE size_t size() const noexcept { return len; } - iterator begin() { return str; } - iterator end () { return str + len; } + C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; } + C4_ALWAYS_INLINE C4_PURE iterator end () noexcept { return str + len; } - const_iterator begin() const { return str; } - const_iterator end () const { return str + len; } + C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; } + C4_ALWAYS_INLINE C4_PURE const_iterator end () const noexcept { return str + len; } - C * data() { return str; } - C const* data() const { return str; } + C4_ALWAYS_INLINE C4_PURE C * data() noexcept { return str; } + C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; } - inline C & operator[] (size_t i) { C4_ASSERT(i >= 0 && i < len); return str[i]; } - inline C const& operator[] (size_t i) const { C4_ASSERT(i >= 0 && i < len); return str[i]; } + C4_ALWAYS_INLINE C4_PURE C & operator[] (size_t i) noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; } + C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; } - inline C & front() { C4_ASSERT(len > 0 && str != nullptr); return *str; } - inline C const& front() const { C4_ASSERT(len > 0 && str != nullptr); return *str; } + C4_ALWAYS_INLINE C4_PURE C & front() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; } + C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; } - inline C & back() { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); } - inline C const& back() const { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); } + C4_ALWAYS_INLINE C4_PURE C & back() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); } + C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); } /** @} */ @@ -5204,28 +5327,35 @@ struct C4CORE_EXPORT basic_substring /** @name Comparison methods */ /** @{ */ - int compare(C const c) const + C4_PURE int compare(C const c) const noexcept { C4_XASSERT((str != nullptr) || len == 0); - if( ! len) + if(C4_LIKELY(str != nullptr && len > 0)) + return (*str != c) ? *str - c : (static_cast(len) - 1); + else return -1; - if(*str == c) - return static_cast(len - 1); - return *str - c; } - int compare(const char *that, size_t sz) const + C4_PURE int compare(const char *C4_RESTRICT that, size_t sz) const noexcept { C4_XASSERT(that || sz == 0); C4_XASSERT(str || len == 0); if(C4_LIKELY(str && that)) { - int ret = strncmp(str, that, len < sz ? len : sz); - if(ret == 0 && len != sz) - ret = len < sz ? -1 : 1; - return ret; + { + const size_t min = len < sz ? len : sz; + for(size_t i = 0; i < min; ++i) + if(str[i] != that[i]) + return str[i] < that[i] ? -1 : 1; + } + if(len < sz) + return -1; + else if(len == sz) + return 0; + else + return 1; } - if((!str && !that) || (len == sz)) + else if(len == sz) { C4_XASSERT(len == 0 && sz == 0); return 0; @@ -5233,31 +5363,31 @@ struct C4CORE_EXPORT basic_substring return len < sz ? -1 : 1; } - C4_ALWAYS_INLINE int compare(ro_substr const that) const { return this->compare(that.str, that.len); } + C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); } - C4_ALWAYS_INLINE bool operator== (std::nullptr_t) const { return str == nullptr || len == 0; } - C4_ALWAYS_INLINE bool operator!= (std::nullptr_t) const { return str != nullptr || len == 0; } + C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; } - C4_ALWAYS_INLINE bool operator== (C const c) const { return this->compare(c) == 0; } - C4_ALWAYS_INLINE bool operator!= (C const c) const { return this->compare(c) != 0; } - C4_ALWAYS_INLINE bool operator< (C const c) const { return this->compare(c) < 0; } - C4_ALWAYS_INLINE bool operator> (C const c) const { return this->compare(c) > 0; } - C4_ALWAYS_INLINE bool operator<= (C const c) const { return this->compare(c) <= 0; } - C4_ALWAYS_INLINE bool operator>= (C const c) const { return this->compare(c) >= 0; } + C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; } + C4_ALWAYS_INLINE C4_PURE bool operator< (C const c) const noexcept { return this->compare(c) < 0; } + C4_ALWAYS_INLINE C4_PURE bool operator> (C const c) const noexcept { return this->compare(c) > 0; } + C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; } + C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; } - template C4_ALWAYS_INLINE bool operator== (basic_substring const that) const { return this->compare(that) == 0; } - template C4_ALWAYS_INLINE bool operator!= (basic_substring const that) const { return this->compare(that) != 0; } - template C4_ALWAYS_INLINE bool operator< (basic_substring const that) const { return this->compare(that) < 0; } - template C4_ALWAYS_INLINE bool operator> (basic_substring const that) const { return this->compare(that) > 0; } - template C4_ALWAYS_INLINE bool operator<= (basic_substring const that) const { return this->compare(that) <= 0; } - template C4_ALWAYS_INLINE bool operator>= (basic_substring const that) const { return this->compare(that) >= 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring const that) const noexcept { return this->compare(that) == 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring const that) const noexcept { return this->compare(that) != 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator< (basic_substring const that) const noexcept { return this->compare(that) < 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator> (basic_substring const that) const noexcept { return this->compare(that) > 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring const that) const noexcept { return this->compare(that) <= 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring const that) const noexcept { return this->compare(that) >= 0; } - template C4_ALWAYS_INLINE bool operator== (const char (&that)[N]) const { return this->compare(that, N-1) == 0; } - template C4_ALWAYS_INLINE bool operator!= (const char (&that)[N]) const { return this->compare(that, N-1) != 0; } - template C4_ALWAYS_INLINE bool operator< (const char (&that)[N]) const { return this->compare(that, N-1) < 0; } - template C4_ALWAYS_INLINE bool operator> (const char (&that)[N]) const { return this->compare(that, N-1) > 0; } - template C4_ALWAYS_INLINE bool operator<= (const char (&that)[N]) const { return this->compare(that, N-1) <= 0; } - template C4_ALWAYS_INLINE bool operator>= (const char (&that)[N]) const { return this->compare(that, N-1) >= 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator< (const char (&that)[N]) const noexcept { return this->compare(that, N-1) < 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator> (const char (&that)[N]) const noexcept { return this->compare(that, N-1) > 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; } /** @} */ @@ -5267,39 +5397,38 @@ struct C4CORE_EXPORT basic_substring /** @{ */ /** true if *this is a substring of that (ie, from the same buffer) */ - inline bool is_sub(ro_substr const that) const + C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept { return that.is_super(*this); } /** true if that is a substring of *this (ie, from the same buffer) */ - inline bool is_super(ro_substr const that) const + C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept { - if(C4_UNLIKELY(len == 0)) - { + if(C4_LIKELY(len > 0)) + return that.str >= str && that.str+that.len <= str+len; + else return that.len == 0 && that.str == str && str != nullptr; - } - return that.begin() >= begin() && that.end() <= end(); } /** true if there is overlap of at least one element between that and *this */ - inline bool overlaps(ro_substr const that) const + C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept { // thanks @timwynants - return (that.end() > begin() && that.begin() < end()); + return that.str+that.len > str && that.str < str+len; } public: /** return [first,len[ */ - basic_substring sub(size_t first) const + C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept { C4_ASSERT(first >= 0 && first <= len); return basic_substring(str + first, len - first); } /** return [first,first+num[. If num==npos, return [first,len[ */ - basic_substring sub(size_t first, size_t num) const + C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept { C4_ASSERT(first >= 0 && first <= len); C4_ASSERT((num >= 0 && num <= len) || (num == npos)); @@ -5309,7 +5438,7 @@ struct C4CORE_EXPORT basic_substring } /** return [first,last[. If last==npos, return [first,len[ */ - basic_substring range(size_t first, size_t last=npos) const + C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept { C4_ASSERT(first >= 0 && first <= len); last = last != npos ? last : len; @@ -5318,24 +5447,26 @@ struct C4CORE_EXPORT basic_substring return basic_substring(str + first, last - first); } - /** return [0,num[*/ - basic_substring first(size_t num) const + /** return the first @p num elements: [0,num[*/ + C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept { - return sub(0, num); + C4_ASSERT(num <= len || num == npos); + return basic_substring(str, num != npos ? num : len); } - /** return [len-num,len[*/ - basic_substring last(size_t num) const + /** return the last @num elements: [len-num,len[*/ + C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept { - if(num == npos) - return *this; - return sub(len - num); + C4_ASSERT(num <= len || num == npos); + return num != npos ? + basic_substring(str + len - num, num) : + *this; } /** offset from the ends: return [left,len-right[ ; ie, trim a number of characters from the left and right. This is equivalent to python's negative list indices. */ - basic_substring offs(size_t left, size_t right) const + C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept { C4_ASSERT(left >= 0 && left <= len); C4_ASSERT(right >= 0 && right <= len); @@ -5343,27 +5474,47 @@ struct C4CORE_EXPORT basic_substring return basic_substring(str + left, len - right - left); } - /** return [0, pos+include_pos[ */ - basic_substring left_of(size_t pos, bool include_pos=false) const + /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */ + C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept { - if(pos == npos) - return *this; - return first(pos + include_pos); + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str, pos) : + *this; + } + + /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */ + C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept + { + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str, pos+include_pos) : + *this; + } + + /** return [pos+1, len[ */ + C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept + { + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str + (pos + 1), len - (pos + 1)) : + basic_substring(str + len, size_t(0)); } /** return [pos+!include_pos, len[ */ - basic_substring right_of(size_t pos, bool include_pos=false) const + C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept { - if(pos == npos) - return sub(len, 0); - return sub(pos + !include_pos); + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) : + basic_substring(str + len, size_t(0)); } public: /** given @p subs a substring of the current string, get the * portion of the current string to the left of it */ - basic_substring left_of(ro_substr const subs) const + C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept { C4_ASSERT(is_super(subs) || subs.empty()); auto ssb = subs.begin(); @@ -5377,7 +5528,7 @@ struct C4CORE_EXPORT basic_substring /** given @p subs a substring of the current string, get the * portion of the current string to the right of it */ - basic_substring right_of(ro_substr const subs) const + C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept { C4_ASSERT(is_super(subs) || subs.empty()); auto sse = subs.end(); @@ -6039,55 +6190,64 @@ struct C4CORE_EXPORT basic_substring basic_substring _first_integral_span(size_t skip_start) const { C4_ASSERT(!empty()); - if(skip_start == len) { + if(skip_start == len) return first(0); - } C4_ASSERT(skip_start < len); - if(first_of_any("0x", "0X")) // hexadecimal - { - skip_start += 2; - if(len == skip_start) - return first(0); - for(size_t i = skip_start; i < len; ++i) - { - if( ! _is_hex_char(str[i])) - return _is_delim_char(str[i]) ? first(i) : first(0); - } - } - else if(first_of_any("0o", "0O")) // octal + if(len >= skip_start + 3) { - skip_start += 2; - if(len == skip_start) - return first(0); - for(size_t i = skip_start; i < len; ++i) + if(str[skip_start] != '0') { - char c = str[i]; - if(c < '0' || c > '7') - return _is_delim_char(str[i]) ? first(i) : first(0); + for(size_t i = skip_start; i < len; ++i) + { + char c = str[i]; + if(c < '0' || c > '9') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } } - } - else if(first_of_any("0b", "0B")) // binary - { - skip_start += 2; - if(len == skip_start) - return first(0); - for(size_t i = skip_start; i < len; ++i) + else { - char c = str[i]; - if(c != '0' && c != '1') - return _is_delim_char(c) ? first(i) : first(0); + char next = str[skip_start + 1]; + if(next == 'x' || next == 'X') + { + skip_start += 2; + for(size_t i = skip_start; i < len; ++i) + { + const char c = str[i]; + if( ! _is_hex_char(c)) + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + return *this; + } + else if(next == 'b' || next == 'B') + { + skip_start += 2; + for(size_t i = skip_start; i < len; ++i) + { + const char c = str[i]; + if(c != '0' && c != '1') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + return *this; + } + else if(next == 'o' || next == 'O') + { + skip_start += 2; + for(size_t i = skip_start; i < len; ++i) + { + const char c = str[i]; + if(c < '0' || c > '7') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + return *this; + } } } - else // otherwise, decimal + // must be a decimal, or it is not a an number + for(size_t i = skip_start; i < len; ++i) { - if(len == skip_start) - return first(0); - for(size_t i = skip_start; i < len; ++i) - { - char c = str[i]; - if(c < '0' || c > '9') - return _is_delim_char(c) ? first(i) : first(0); - } + const char c = str[i]; + if(c < '0' || c > '9') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); } return *this; } @@ -6098,226 +6258,537 @@ struct C4CORE_EXPORT basic_substring basic_substring ne = first_non_empty_span(); if(ne.empty()) return ne; - size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-') ? 1 : 0; - if(ne.first_of_any("0x", "0X")) // hexadecimal - { - skip_start += 2; - if(ne.len == skip_start) - return ne.first(0); - for(size_t i = skip_start; i < ne.len; ++i) + size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-'); + C4_ASSERT(skip_start == 0 || skip_start == 1); + // if we have at least three digits after the leading sign, it + // can be decimal, or hex, or bin or oct. Ex: + // non-decimal: 0x0, 0b0, 0o0 + // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity + if(ne.len >= skip_start+3) + { + // if it does not have leading 0, it must be decimal, or it is not a real + if(ne.str[skip_start] != '0') { - char c = ne.str[i]; - if(( ! _is_hex_char(c)) && c != '.' && c != 'p' && c != 'P') + if(ne.str[skip_start] == 'i') // is it infinity or inf? { - if(c == '-' || c == '+') - { - // we can also have a sign for the exponent - if(i > 1 && (ne[i-1] == 'p' || ne[i-1] == 'P')) - { - continue; - } - } - return _is_delim_char(c) ? ne.first(i) : ne.first(0); + basic_substring word = ne._word_follows(skip_start + 1, "nfinity"); + if(word.len) + return word; + return ne._word_follows(skip_start + 1, "nf"); } - } - } - else if(ne.first_of_any("0b", "0B")) // binary - { - skip_start += 2; - if(ne.len == skip_start) - return ne.first(0); - for(size_t i = skip_start; i < ne.len; ++i) - { - char c = ne.str[i]; - if(c != '0' && c != '1' && c != '.') + else if(ne.str[skip_start] == 'n') // is it nan? { - return _is_delim_char(c) ? ne.first(i) : ne.first(0); + return ne._word_follows(skip_start + 1, "an"); } - } - } - else if(ne.first_of_any("0o", "0O")) // octal - { - skip_start += 2; - if(ne.len == skip_start) - return ne.first(0); - for(size_t i = skip_start; i < ne.len; ++i) - { - char c = ne.str[i]; - if((c < '0' || c > '7') && c != '.') + else // must be a decimal, or it is not a real { - return _is_delim_char(c) ? ne.first(i) : ne.first(0); + return ne._first_real_span_dec(skip_start); } } - } - else // assume decimal - { - if(ne.len == skip_start) - return ne.first(0); - for(size_t i = skip_start; i < ne.len; ++i) + else // starts with 0. is it 0x, 0b or 0o? { - char c = ne.str[i]; - if((c < '0' || c > '9') && (c != '.' && c != 'e' && c != 'E')) - { - if(c == '-' || c == '+') - { - // we can also have a sign for the exponent - if(i > 1 && (ne[i-1] == 'e' || ne[i-1] == 'E')) - { - continue; - } - } - else if(i == skip_start) - { - if(c == 'i') - { - if(ne.len >= skip_start + 8 && ne.sub(skip_start, 8) == "infinity") - return _is_delim_char(ne.str[skip_start + 8]) ? ne.first(skip_start + 8) : ne.first(0); - else if(ne.len >= skip_start + 3 && ne.sub(skip_start, 3) == "inf") - return _is_delim_char(ne.str[skip_start + 3]) ? ne.first(skip_start + 3) : ne.first(0); - else - return ne.first(0); - } - else if(c == 'n') - { - if(ne.len >= skip_start + 3 && ne.sub(skip_start, 3) == "nan") - return _is_delim_char(ne.str[skip_start + 3]) ? ne.first(skip_start + 3) : ne.first(0); - else - return ne.first(0); - } - else - { - return ne.first(0); - } - } - else - { - return _is_delim_char(c) ? ne.first(i) : ne.first(0); - } - } + const char next = ne.str[skip_start + 1]; + // hexadecimal + if(next == 'x' || next == 'X') + return ne._first_real_span_hex(skip_start + 2); + // binary + else if(next == 'b' || next == 'B') + return ne._first_real_span_bin(skip_start + 2); + // octal + else if(next == 'o' || next == 'O') + return ne._first_real_span_oct(skip_start + 2); + // none of the above. may still be a decimal. + else + return ne._first_real_span_dec(skip_start); // do not skip the 0. } } - return ne; + // less than 3 chars after the leading sign. It is either a + // decimal or it is not a real. (cannot be any of 0x0, etc). + return ne._first_real_span_dec(skip_start); } /** true if the character is a delimiter character *at the end* */ - static constexpr C4_ALWAYS_INLINE bool _is_delim_char(char c) noexcept + static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept { - return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\0' + return c == ' ' || c == '\n' || c == ']' || c == ')' || c == '}' - || c == ',' || c == ';'; + || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0'; } /** true if the character is in [0-9a-fA-F] */ - static constexpr C4_ALWAYS_INLINE bool _is_hex_char(char c) noexcept + static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } - /** true if the character is in [0-9a-fA-F] */ - static constexpr C4_ALWAYS_INLINE bool _is_oct_char(char c) noexcept + C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept { - return (c >= '0' && c <= '7'); + size_t posend = pos + word.len; + if(len >= posend && sub(pos, word.len) == word) + if(len == posend || _is_delim_char(str[posend])) + return first(posend); + return first(0); } - /** @} */ - -public: - - /** @name Splitting methods */ - /** @{ */ - - /** returns true if the string has not been exhausted yet, meaning - * it's ok to call next_split() again. When no instance of sep - * exists in the string, returns the full string. When the input - * is an empty string, the output string is the empty string. */ - bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept { - if(C4_LIKELY(*start_pos < len)) + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) { - for(size_t i = *start_pos, e = len; i < e; i++) + const char c = str[pos]; + if(c >= '0' && c <= '9') { - if(str[i] == sep) - { - out->assign(str + *start_pos, i - *start_pos); - *start_pos = i+1; - return true; - } + intchars = true; + } + else if(c == '.') + { + ++pos; + goto fractional_part_dec; + } + else if(c == 'e' || c == 'E') + { + ++pos; + goto power_part_dec; + } + else if(_is_delim_char(c)) + { + return intchars ? first(pos) : first(0); + } + else + { + return first(0); } - out->assign(str + *start_pos, len - *start_pos); - *start_pos = len + 1; - return true; } - else - { - bool valid = len > 0 && (*start_pos == len); - if(valid && !empty() && str[len-1] == sep) + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_dec: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') { - out->assign(str + len, (size_t)0); // the cast is needed to prevent overload ambiguity + fracchars = true; + } + else if(c == 'e' || c == 'E') + { + ++pos; + goto power_part_dec; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); } else { - out->assign(str + len + 1, (size_t)0); // the cast is needed to prevent overload ambiguity + return first(0); } - *start_pos = len + 1; - return valid; } + return intchars || fracchars ? + *this : + first(0); + power_part_dec: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); + } + return *this; } -private: - - struct split_proxy_impl + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept { - struct split_iterator_impl + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) { - split_proxy_impl const* m_proxy; - basic_substring m_str; - size_t m_pos; - NCC_ m_sep; - - split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep) - : m_proxy(proxy), m_pos(pos), m_sep(sep) + const char c = str[pos]; + if(_is_hex_char(c)) { - _tick(); + intchars = true; } - - void _tick() + else if(c == '.') { - m_proxy->m_str.next_split(m_sep, &m_pos, &m_str); + ++pos; + goto fractional_part_hex; } - - split_iterator_impl& operator++ () { _tick(); return *this; } - split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; } - - basic_substring& operator* () { return m_str; } - basic_substring* operator-> () { return &m_str; } - - bool operator!= (split_iterator_impl const& that) const + else if(c == 'p' || c == 'P') { - return !(this->operator==(that)); + ++pos; + goto power_part_hex; } - bool operator== (split_iterator_impl const& that) const + else if(_is_delim_char(c)) { - C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators"); - if(m_str.size() != that.m_str.size()) - return false; - if(m_str.data() != that.m_str.data()) - return false; - return m_pos == that.m_pos; + return intchars ? first(pos) : first(0); } - }; - - basic_substring m_str; - size_t m_start_pos; - C m_sep; - - split_proxy_impl(basic_substring str_, size_t start_pos, C sep) - : m_str(str_), m_start_pos(start_pos), m_sep(sep) - { + else + { + return first(0); + } + } + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_hex: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(_is_hex_char(c)) + { + fracchars = true; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_hex; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + return intchars || fracchars ? + *this : + first(0); + power_part_hex: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); } + return *this; + } - split_iterator_impl begin() const + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept + { + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) { - auto it = split_iterator_impl(this, m_start_pos, m_sep); + const char c = str[pos]; + if(c == '0' || c == '1') + { + intchars = true; + } + else if(c == '.') + { + ++pos; + goto fractional_part_bin; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_bin; + } + else if(_is_delim_char(c)) + { + return intchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_bin: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c == '0' || c == '1') + { + fracchars = true; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_bin; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + return intchars || fracchars ? + *this : + first(0); + power_part_bin: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); + } + return *this; + } + + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept + { + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '7') + { + intchars = true; + } + else if(c == '.') + { + ++pos; + goto fractional_part_oct; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_oct; + } + else if(_is_delim_char(c)) + { + return intchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_oct: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '7') + { + fracchars = true; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_oct; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + return intchars || fracchars ? + *this : + first(0); + power_part_oct: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); + } + return *this; + } + + /** @} */ + +public: + + /** @name Splitting methods */ + /** @{ */ + + /** returns true if the string has not been exhausted yet, meaning + * it's ok to call next_split() again. When no instance of sep + * exists in the string, returns the full string. When the input + * is an empty string, the output string is the empty string. */ + bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const + { + if(C4_LIKELY(*start_pos < len)) + { + for(size_t i = *start_pos, e = len; i < e; i++) + { + if(str[i] == sep) + { + out->assign(str + *start_pos, i - *start_pos); + *start_pos = i+1; + return true; + } + } + out->assign(str + *start_pos, len - *start_pos); + *start_pos = len + 1; + return true; + } + else + { + bool valid = len > 0 && (*start_pos == len); + if(valid && !empty() && str[len-1] == sep) + { + out->assign(str + len, (size_t)0); // the cast is needed to prevent overload ambiguity + } + else + { + out->assign(str + len + 1, (size_t)0); // the cast is needed to prevent overload ambiguity + } + *start_pos = len + 1; + return valid; + } + } + +private: + + struct split_proxy_impl + { + struct split_iterator_impl + { + split_proxy_impl const* m_proxy; + basic_substring m_str; + size_t m_pos; + NCC_ m_sep; + + split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep) + : m_proxy(proxy), m_pos(pos), m_sep(sep) + { + _tick(); + } + + void _tick() + { + m_proxy->m_str.next_split(m_sep, &m_pos, &m_str); + } + + split_iterator_impl& operator++ () { _tick(); return *this; } + split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; } + + basic_substring& operator* () { return m_str; } + basic_substring* operator-> () { return &m_str; } + + bool operator!= (split_iterator_impl const& that) const + { + return !(this->operator==(that)); + } + bool operator== (split_iterator_impl const& that) const + { + C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators"); + if(m_str.size() != that.m_str.size()) + return false; + if(m_str.data() != that.m_str.data()) + return false; + return m_pos == that.m_pos; + } + }; + + basic_substring m_str; + size_t m_start_pos; + C m_sep; + + split_proxy_impl(basic_substring str_, size_t start_pos, C sep) + : m_str(str_), m_start_pos(start_pos), m_sep(sep) + { + } + + split_iterator_impl begin() const + { + auto it = split_iterator_impl(this, m_start_pos, m_sep); return it; } split_iterator_impl end() const @@ -6587,7 +7058,11 @@ struct C4CORE_EXPORT basic_substring num = num != npos ? num : len - ifirst; num = num < that.len ? num : that.len; C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len); - memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(num) + memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num); } public: @@ -6705,7 +7180,7 @@ struct C4CORE_EXPORT basic_substring { \ C4_ASSERT((last) >= (first)); \ size_t num = static_cast((last) - (first)); \ - if(sz + num <= dst.len) \ + if(num > 0 && sz + num <= dst.len) \ { \ memcpy(dst.str + sz, first, num * sizeof(C)); \ } \ @@ -6908,44 +7383,40 @@ inline OStream& operator<< (OStream& os, basic_substring s) // fast_float by Daniel Lemire // fast_float by João Paulo Magalhaes -// + + // with contributions from Eugene Golushkov // with contributions from Maksim Kita // with contributions from Marcin Wojdyr // with contributions from Neal Richardson // with contributions from Tim Paine // with contributions from Fabio Pellacini + + +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: // -// MIT License Notice -// -// MIT License -// -// Copyright (c) 2021 The fast_float authors -// -// Permission is hereby granted, free of charge, to any -// person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the -// Software without restriction, including without -// limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of -// the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice -// shall be included in all copies or substantial portions -// of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. // +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + #ifndef FASTFLOAT_FAST_FLOAT_H #define FASTFLOAT_FAST_FLOAT_H @@ -7010,6 +7481,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last, } #endif // FASTFLOAT_FAST_FLOAT_H + #ifndef FASTFLOAT_FLOAT_COMMON_H #define FASTFLOAT_FLOAT_COMMON_H @@ -7019,8 +7491,6 @@ from_chars_result from_chars_advanced(const char *first, const char *last, #include //included above: //#include -//included above: -//#include #if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \ || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \ @@ -7057,9 +7527,7 @@ from_chars_result from_chars_advanced(const char *first, const char *last, #define FASTFLOAT_VISUAL_STUDIO 1 #endif -#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ -#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#elif defined _WIN32 +#ifdef _WIN32 #define FASTFLOAT_IS_BIG_ENDIAN 0 #else #if defined(__APPLE__) || defined(__FreeBSD__) @@ -7239,8 +7707,6 @@ constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}; template struct binary_format { - using equiv_uint = typename std::conditional::type; - static inline constexpr int mantissa_explicit_bits(); static inline constexpr int minimum_exponent(); static inline constexpr int infinite_power(); @@ -7254,9 +7720,6 @@ template struct binary_format { static inline constexpr int smallest_power_of_ten(); static inline constexpr T exact_power_of_ten(int64_t power); static inline constexpr size_t max_digits(); - static inline constexpr equiv_uint exponent_mask(); - static inline constexpr equiv_uint mantissa_mask(); - static inline constexpr equiv_uint hidden_bit_mask(); }; template <> inline constexpr int binary_format::mantissa_explicit_bits() { @@ -7364,33 +7827,6 @@ template <> inline constexpr size_t binary_format::max_digits() { return 114; } -template <> inline constexpr binary_format::equiv_uint - binary_format::exponent_mask() { - return 0x7F800000; -} -template <> inline constexpr binary_format::equiv_uint - binary_format::exponent_mask() { - return 0x7FF0000000000000; -} - -template <> inline constexpr binary_format::equiv_uint - binary_format::mantissa_mask() { - return 0x007FFFFF; -} -template <> inline constexpr binary_format::equiv_uint - binary_format::mantissa_mask() { - return 0x000FFFFFFFFFFFFF; -} - -template <> inline constexpr binary_format::equiv_uint - binary_format::hidden_bit_mask() { - return 0x00800000; -} -template <> inline constexpr binary_format::equiv_uint - binary_format::hidden_bit_mask() { - return 0x0010000000000000; -} - template fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) { uint64_t word = am.mantissa; @@ -7413,6 +7849,7 @@ fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &va #endif + #ifndef FASTFLOAT_ASCII_NUMBER_H #define FASTFLOAT_ASCII_NUMBER_H @@ -7647,6 +8084,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ #endif + #ifndef FASTFLOAT_FAST_TABLE_H #define FASTFLOAT_FAST_TABLE_H @@ -8348,6 +8786,7 @@ using powers = powers_template<>; #endif + #ifndef FASTFLOAT_DECIMAL_TO_BINARY_H #define FASTFLOAT_DECIMAL_TO_BINARY_H @@ -8399,9 +8838,9 @@ namespace detail { * where * p = log(5**q)/log(2) = q * log(5)/log(2) * - * For negative values of q in (-400,0), we have that + * For negative values of q in (-400,0), we have that * f = (((152170 + 65536) * q ) >> 16); - * is equal to + * is equal to * -ceil(p) + q * where * p = log(5**-q)/log(2) = -q * log(5)/log(2) @@ -8472,7 +8911,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { // In some very rare cases, this could happen, in which case we might need a more accurate // computation that what we can provide cheaply. This is very, very unlikely. // - const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0, + const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0, // and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation. if(!inside_safe_exponent) { return compute_error_scaled(q, product.high, lz); @@ -8544,6 +8983,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { #endif + #ifndef FASTFLOAT_BIGINT_H #define FASTFLOAT_BIGINT_H @@ -9137,6 +9577,7 @@ struct bigint { #endif + #ifndef FASTFLOAT_ASCII_NUMBER_H #define FASTFLOAT_ASCII_NUMBER_H @@ -9372,6 +9813,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_ #endif + #ifndef FASTFLOAT_DIGIT_COMPARISON_H #define FASTFLOAT_DIGIT_COMPARISON_H @@ -9419,24 +9861,40 @@ fastfloat_really_inline int32_t scientific_exponent(parsed_number_string& num) n // this converts a native floating-point number to an extended-precision float. template fastfloat_really_inline adjusted_mantissa to_extended(T value) noexcept { - using equiv_uint = typename binary_format::equiv_uint; - constexpr equiv_uint exponent_mask = binary_format::exponent_mask(); - constexpr equiv_uint mantissa_mask = binary_format::mantissa_mask(); - constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); - adjusted_mantissa am; int32_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); - equiv_uint bits; - ::memcpy(&bits, &value, sizeof(T)); - if ((bits & exponent_mask) == 0) { - // denormal - am.power2 = 1 - bias; - am.mantissa = bits & mantissa_mask; + if (std::is_same::value) { + constexpr uint32_t exponent_mask = 0x7F800000; + constexpr uint32_t mantissa_mask = 0x007FFFFF; + constexpr uint64_t hidden_bit_mask = 0x00800000; + uint32_t bits; + ::memcpy(&bits, &value, sizeof(T)); + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } } else { - // normal - am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); - am.power2 -= bias; - am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + constexpr uint64_t exponent_mask = 0x7FF0000000000000; + constexpr uint64_t mantissa_mask = 0x000FFFFFFFFFFFFF; + constexpr uint64_t hidden_bit_mask = 0x0010000000000000; + uint64_t bits; + ::memcpy(&bits, &value, sizeof(T)); + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } } return am; @@ -9461,7 +9919,7 @@ fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept if (-am.power2 >= mantissa_shift) { // have a denormal float int32_t shift = -am.power2 + 1; - cb(am, std::min(shift, 64)); + cb(am, std::min(shift, 64)); // check for round-up: if rounding-nearest carried us to the hidden bit. am.power2 = (am.mantissa < (uint64_t(1) << binary_format::mantissa_explicit_bits())) ? 0 : 1; return; @@ -9781,6 +10239,7 @@ inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa #endif + #ifndef FASTFLOAT_PARSE_NUMBER_H #define FASTFLOAT_PARSE_NUMBER_H @@ -9928,10 +10387,17 @@ from_chars_result from_chars_advanced(const char *first, const char *last, // forward declarations for std::vector #if defined(__GLIBCXX__) || defined(__GLIBCPP__) || defined(_MSC_VER) +#if defined(_MSC_VER) +__pragma(warning(push)) +__pragma(warning(disable : 4643)) +#endif namespace std { template class allocator; template class vector; } // namespace std +#if defined(_MSC_VER) +__pragma(warning(pop)) +#endif #elif defined(_LIBCPP_ABI_NAMESPACE) namespace std { inline namespace _LIBCPP_ABI_NAMESPACE { @@ -10030,8 +10496,8 @@ using string = basic_string, allocator>; namespace c4 { -c4::substr to_substr(std::string &s); -c4::csubstr to_csubstr(std::string const& s); +C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept; +C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept; bool operator== (c4::csubstr ss, std::string const& s); bool operator!= (c4::csubstr ss, std::string const& s); @@ -10128,6 +10594,10 @@ bool from_chars(c4::csubstr buf, std::string * s); * // Read a value from the string, which must be * // trimmed to the value (ie, no leading/trailing whitespace). * // return true if the conversion succeeded. + * // There is no check for overflow; the value wraps around in a way similar + * // to the standard C/C++ overflow behavior. For example, + * // from_chars("128", &val) returns true and val will be + * // set tot 0. * template bool c4::from_chars(csubstr buf, T * C4_RESTRICT val); * * @@ -10193,44 +10663,61 @@ bool from_chars(c4::csubstr buf, std::string * s); #ifndef C4CORE_NO_FAST_FLOAT - C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion") - C4_SUPPRESS_WARNING_GCC("-Warray-bounds") -#if __GNUC__ >= 5 - C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow") -#endif -// amalgamate: removed include of -// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp -//# include "c4/ext/fast_float.hpp" -#if !defined(C4_EXT_FAST_FLOAT_HPP_) && !defined(_C4_EXT_FAST_FLOAT_HPP_) -#error "amalgamate: file c4/ext/fast_float.hpp must have been included at this point" -#endif /* C4_EXT_FAST_FLOAT_HPP_ */ - - C4_SUPPRESS_WARNING_GCC_POP -# define C4CORE_HAVE_FAST_FLOAT 1 -# define C4CORE_HAVE_STD_FROMCHARS 0 # if (C4_CPP >= 17) # if defined(_MSC_VER) -# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) +# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros # include # define C4CORE_HAVE_STD_TOCHARS 1 +# define C4CORE_HAVE_STD_FROMCHARS 0 // prefer fast_float with MSVC +# define C4CORE_HAVE_FAST_FLOAT 1 # else # define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 # endif -# else // VS2017 and lower do not have these macros -# if __has_include() && __cpp_lib_to_chars -# define C4CORE_HAVE_STD_TOCHARS 1 +# else +# if __has_include() //included above: //# include +# if defined(__cpp_lib_to_chars) +# define C4CORE_HAVE_STD_TOCHARS 1 +# define C4CORE_HAVE_STD_FROMCHARS 0 // glibc uses fast_float internally +# define C4CORE_HAVE_FAST_FLOAT 1 +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 +# endif # else # define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 # endif # endif # else # define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 +# endif +# if C4CORE_HAVE_FAST_FLOAT + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion") + C4_SUPPRESS_WARNING_GCC("-Warray-bounds") +# if __GNUC__ >= 5 + C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow") +# endif +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp +//# include "c4/ext/fast_float.hpp" +#if !defined(C4_EXT_FAST_FLOAT_HPP_) && !defined(_C4_EXT_FAST_FLOAT_HPP_) +#error "amalgamate: file c4/ext/fast_float.hpp must have been included at this point" +#endif /* C4_EXT_FAST_FLOAT_HPP_ */ + + C4_SUPPRESS_WARNING_GCC_POP # endif #elif (C4_CPP >= 17) +# define C4CORE_HAVE_FAST_FLOAT 0 # if defined(_MSC_VER) -# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) +# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros //included above: //# include # define C4CORE_HAVE_STD_TOCHARS 1 @@ -10239,12 +10726,17 @@ bool from_chars(c4::csubstr buf, std::string * s); # define C4CORE_HAVE_STD_TOCHARS 0 # define C4CORE_HAVE_STD_FROMCHARS 0 # endif -# else // VS2017 and lower do not have these macros -# if __has_include() && __cpp_lib_to_chars -# define C4CORE_HAVE_STD_TOCHARS 1 -# define C4CORE_HAVE_STD_FROMCHARS 1 +# else +# if __has_include() //included above: //# include +# if defined(__cpp_lib_to_chars) +# define C4CORE_HAVE_STD_TOCHARS 1 +# define C4CORE_HAVE_STD_FROMCHARS 1 // glibc uses fast_float internally +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# endif # else # define C4CORE_HAVE_STD_TOCHARS 0 # define C4CORE_HAVE_STD_FROMCHARS 0 @@ -10253,10 +10745,11 @@ bool from_chars(c4::csubstr buf, std::string * s); #else # define C4CORE_HAVE_STD_TOCHARS 0 # define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 0 #endif -#if !C4CORE_HAVE_STD_FROMCHARS && !defined(C4CORE_HAVE_FAST_FLOAT) +#if !C4CORE_HAVE_STD_FROMCHARS #include #endif @@ -10282,52 +10775,33 @@ bool from_chars(c4::csubstr buf, std::string * s); namespace c4 { -typedef enum : uint8_t { +#if C4CORE_HAVE_STD_TOCHARS +/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */ +typedef enum : std::underlying_type::type { /** print the real number in floating point format (like %f) */ - FTOA_FLOAT = 0, + FTOA_FLOAT = static_cast::type>(std::chars_format::fixed), /** print the real number in scientific format (like %e) */ - FTOA_SCIENT = 1, + FTOA_SCIENT = static_cast::type>(std::chars_format::scientific), /** print the real number in flexible format (like %g) */ - FTOA_FLEX = 2, + FTOA_FLEX = static_cast::type>(std::chars_format::general), /** print the real number in hexadecimal format (like %a) */ - FTOA_HEXA = 3, - _FTOA_COUNT + FTOA_HEXA = static_cast::type>(std::chars_format::hex), } RealFormat_e; +#else +/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */ +typedef enum : char { + /** print the real number in floating point format (like %f) */ + FTOA_FLOAT = 'f', + /** print the real number in scientific format (like %e) */ + FTOA_SCIENT = 'e', + /** print the real number in flexible format (like %g) */ + FTOA_FLEX = 'g', + /** print the real number in hexadecimal format (like %a) */ + FTOA_HEXA = 'a', +} RealFormat_e; +#endif -inline C4_CONSTEXPR14 char to_c_fmt(RealFormat_e f) -{ - constexpr const char fmt[] = { - 'f', // FTOA_FLOAT - 'e', // FTOA_SCIENT - 'g', // FTOA_FLEX - 'a', // FTOA_HEXA - }; - C4_STATIC_ASSERT(C4_COUNTOF(fmt) == _FTOA_COUNT); - #if C4_CPP > 14 - C4_ASSERT(f < _FTOA_COUNT); - #endif - return fmt[f]; -} - - -#if C4CORE_HAVE_STD_TOCHARS -inline C4_CONSTEXPR14 std::chars_format to_std_fmt(RealFormat_e f) -{ - constexpr const std::chars_format fmt[] = { - std::chars_format::fixed, // FTOA_FLOAT - std::chars_format::scientific, // FTOA_SCIENT - std::chars_format::general, // FTOA_FLEX - std::chars_format::hex, // FTOA_HEXA - }; - C4_STATIC_ASSERT(C4_COUNTOF(fmt) == _FTOA_COUNT); - #if C4_CPP >= 14 - C4_ASSERT(f < _FTOA_COUNT); - #endif - return fmt[f]; -} -#endif // C4CORE_HAVE_STD_TOCHARS - /** in some platforms, int,unsigned int * are not any of int8_t...int64_t and * long,unsigned long are not any of uint8_t...uint64_t */ @@ -10661,7 +11135,7 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept const unsigned __b3 = __b2 * 8u; const unsigned long __b4 = __b3 * 8u; while(true) - { + { if(v < 8u) return __n; if(v < __b2) @@ -10672,7 +11146,7 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept return __n + 3; v /= (U) __b4; __n += 4; - } + } } @@ -10690,6 +11164,11 @@ C4_INLINE_CONSTEXPR const char digits0099[] = "8081828384858687888990919293949596979899"; } // namespace detail +C4_SUPPRESS_WARNING_GCC_PUSH +C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc has false positives here +#if (defined(__GNUC__) && (__GNUC__ >= 7)) +C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc has false positives here +#endif template C4_HOT C4_ALWAYS_INLINE @@ -10774,19 +11253,17 @@ void write_bin_unchecked(substr buf, T v, unsigned digits_v) noexcept /** write an integer to a string in decimal format. This is the * lowest level (and the fastest) function to do this task. * @note does not accept negative numbers - * - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t write_dec(substr buf, T v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); C4_ASSERT(v >= 0); unsigned digits = digits_dec(v); - if(C4_LIKELY(buf.len >= digits)) // VS does not have likely, so put the happy branch first + if(C4_LIKELY(buf.len >= digits)) write_dec_unchecked(buf, v, digits); return digits; } @@ -10794,18 +11271,18 @@ C4_ALWAYS_INLINE size_t write_dec(substr buf, T v) noexcept /** write an integer to a string in hexadecimal format. This is the * lowest level (and the fastest) function to do this task. * @note does not accept negative numbers - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * @note does not prefix with 0x + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t write_hex(substr buf, T v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); C4_ASSERT(v >= 0); unsigned digits = digits_hex(v); - if(C4_LIKELY(buf.len >= digits)) // VS does not have likely, so put the happy branch first + if(C4_LIKELY(buf.len >= digits)) write_hex_unchecked(buf, v, digits); return digits; } @@ -10814,18 +11291,17 @@ C4_ALWAYS_INLINE size_t write_hex(substr buf, T v) noexcept * lowest level (and the fastest) function to do this task. * @note does not accept negative numbers * @note does not prefix with 0o - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t write_oct(substr buf, T v) noexcept { C4_STATIC_ASSERT(std::is_integral::value); C4_ASSERT(v >= 0); unsigned digits = digits_oct(v); - if(C4_LIKELY(buf.len >= digits)) // VS does not have likely, so put the happy branch first + if(C4_LIKELY(buf.len >= digits)) write_oct_unchecked(buf, v, digits); return digits; } @@ -10834,11 +11310,10 @@ C4_ALWAYS_INLINE size_t write_oct(substr buf, T v) noexcept * lowest level (and the fastest) function to do this task. * @note does not accept negative numbers * @note does not prefix with 0b - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t write_bin(substr buf, T v) noexcept { @@ -10874,7 +11349,7 @@ size_t write_num_digits(substr buf, T v, size_t num_digits) noexcept /** same as c4::write_dec(), but pad with zeroes on the left * such that the resulting string is @p num_digits wide. - * If the given number is wider than num_digits, then the number prevails. */ + * If the given number is requires more than num_digits, then the number prevails. */ template C4_ALWAYS_INLINE size_t write_dec(substr buf, T val, size_t num_digits) noexcept { @@ -10883,7 +11358,7 @@ C4_ALWAYS_INLINE size_t write_dec(substr buf, T val, size_t num_digits) noexcept /** same as c4::write_hex(), but pad with zeroes on the left * such that the resulting string is @p num_digits wide. - * If the given number is wider than num_digits, then the number prevails. */ + * If the given number is requires more than num_digits, then the number prevails. */ template C4_ALWAYS_INLINE size_t write_hex(substr buf, T val, size_t num_digits) noexcept { @@ -10892,7 +11367,7 @@ C4_ALWAYS_INLINE size_t write_hex(substr buf, T val, size_t num_digits) noexcept /** same as c4::write_bin(), but pad with zeroes on the left * such that the resulting string is @p num_digits wide. - * If the given number is wider than num_digits, then the number prevails. */ + * If the given number is requires more than num_digits, then the number prevails. */ template C4_ALWAYS_INLINE size_t write_bin(substr buf, T val, size_t num_digits) noexcept { @@ -10901,13 +11376,15 @@ C4_ALWAYS_INLINE size_t write_bin(substr buf, T val, size_t num_digits) noexcept /** same as c4::write_oct(), but pad with zeroes on the left * such that the resulting string is @p num_digits wide. - * If the given number is wider than num_digits, then the number prevails. */ + * If the given number is requires more than num_digits, then the number prevails. */ template C4_ALWAYS_INLINE size_t write_oct(substr buf, T val, size_t num_digits) noexcept { return detail::write_num_digits>(buf, val, num_digits); } +C4_SUPPRESS_WARNING_GCC_POP + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -10918,7 +11395,12 @@ C4_ALWAYS_INLINE size_t write_oct(substr buf, T val, size_t num_digits) noexcept * @note does not accept negative numbers * @note The string must be trimmed. Whitespace is not accepted. * @note the string must not be empty - * @return true if the conversion was successful */ + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_dec("128", &val)` returns true + * and val will be set to 0 because 127 is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ template C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept { @@ -10940,7 +11422,12 @@ C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept * @note does not accept leading 0x or 0X * @note the string must not be empty * @note the string must be trimmed. Whitespace is not accepted. - * @return true if the conversion was successful */ + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_hex("80", &val)` returns true + * and val will be set to 0 because 7f is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ template C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept { @@ -10969,7 +11456,12 @@ C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept * @note does not accept leading 0b or 0B * @note the string must not be empty * @note the string must be trimmed. Whitespace is not accepted. - * @return true if the conversion was successful */ + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_bin("10000000", &val)` returns true + * and val will be set to 0 because 1111111 is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ template C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept { @@ -10993,7 +11485,12 @@ C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept * @note does not accept leading 0o or 0O * @note the string must not be empty * @note the string must be trimmed. Whitespace is not accepted. - * @return true if the conversion was successful */ + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_oct("200", &val)` returns true + * and val will be set to 0 because 177 is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ template C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept { @@ -11129,13 +11626,10 @@ C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept /** convert an integral signed decimal to a string. - * The resulting string is NOT zero-terminated. - * Writing stops at the buffer's end. - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t itoa(substr buf, T v) noexcept { @@ -11164,13 +11658,10 @@ C4_ALWAYS_INLINE size_t itoa(substr buf, T v) noexcept /** convert an integral signed integer to a string, using a specific * radix. The radix must be 2, 8, 10 or 16. * - * The resulting string is NOT zero-terminated. - * Writing stops at the buffer's end. - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix) noexcept { @@ -11241,16 +11732,13 @@ C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix) noexcept /** same as c4::itoa(), but pad with zeroes on the left such that the - * resulting string is @p num_digits wide, not account for radix - * prefix (0x,0o,0b). The @p radix must be 2, 8, 10 or 16. The - * resulting string is NOT zero-terminated. Writing stops at the - * buffer's end. + * resulting string is @p num_digits wide, not accounting for radix + * prefix (0x,0o,0b). The @p radix must be 2, 8, 10 or 16. * - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix, size_t num_digits) noexcept { @@ -11326,13 +11814,11 @@ C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix, size_t num_digits) noexce //----------------------------------------------------------------------------- /** convert an integral unsigned decimal to a string. - * The resulting string is NOT zero-terminated. - * Writing stops at the buffer's end. - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ + * + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t utoa(substr buf, T v) noexcept { @@ -11341,14 +11827,13 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v) noexcept return write_dec(buf, v); } -/** convert an integral unsigned integer to a string, using a specific radix. The radix must be 2, 8, 10 or 16. - * The resulting string is NOT zero-terminated. - * Writing stops at the buffer's end. - * @return the number of characters required for the string, if the - * buffer is large enough to accomodate the largest number of this - * type. Otherwise it returns the latter. This allows reporting the - * size of a successful write, or the size needed for any number of - * this type. */ +/** convert an integral unsigned integer to a string, using a specific + * radix. The radix must be 2, 8, 10 or 16. + * + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix) noexcept { @@ -11398,11 +11883,12 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix) noexcept /** same as c4::utoa(), but pad with zeroes on the left such that the * resulting string is @p num_digits wide. The @p radix must be 2, - * 8, 10 or 16. The resulting string is NOT zero-terminated. Writing - * occurs only if the buffer is large enough to contain the largest - * value of the type or @p num_digits if it is larger. + * 8, 10 or 16. * - * @return the number of characters required for the string */ + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ template C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexcept { @@ -11471,6 +11957,9 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexce * which case the result will wrap around the type's range. * This is similar to native behavior. * + * @note a positive sign is not accepted. ie, the string must not + * start with '+' + * * @see atoi_first() if the string is not trimmed to the value to read. */ template C4_ALWAYS_INLINE bool atoi(csubstr str, T * C4_RESTRICT v) noexcept @@ -11481,13 +11970,14 @@ C4_ALWAYS_INLINE bool atoi(csubstr str, T * C4_RESTRICT v) noexcept if(C4_UNLIKELY(str.len == 0)) return false; + C4_ASSERT(str.str[0] != '+'); + T sign = 1; size_t start = 0; if(str.str[0] == '-') { - if(C4_UNLIKELY(str.len == 1)) + if(C4_UNLIKELY(str.len == ++start)) return false; - ++start; sign = -1; } @@ -11646,7 +12136,7 @@ inline bool check_overflow(csubstr str, csubstr limit) noexcept */ template auto overflows(csubstr str) noexcept - -> typename std::enable_if::value, bool>::type + -> typename std::enable_if::value, bool>::type { C4_STATIC_ASSERT(std::is_integral::value); @@ -11710,7 +12200,7 @@ auto overflows(csubstr str) noexcept */ template auto overflows(csubstr str) - -> typename std::enable_if::value, bool>::type + -> typename std::enable_if::value, bool>::type { C4_STATIC_ASSERT(std::is_integral::value); if(C4_UNLIKELY(str.len == 0)) @@ -11811,17 +12301,18 @@ auto overflows(csubstr str) namespace detail { +#if (!C4CORE_HAVE_STD_FROMCHARS) /** @see http://www.exploringbinary.com/ for many good examples on float-str conversion */ template void get_real_format_str(char (& C4_RESTRICT fmt)[N], int precision, RealFormat_e formatting, const char* length_modifier="") { int iret; if(precision == -1) - iret = snprintf(fmt, sizeof(fmt), "%%%s%c", length_modifier, to_c_fmt(formatting)); + iret = snprintf(fmt, sizeof(fmt), "%%%s%c", length_modifier, formatting); else if(precision == 0) - iret = snprintf(fmt, sizeof(fmt), "%%.%s%c", length_modifier, to_c_fmt(formatting)); + iret = snprintf(fmt, sizeof(fmt), "%%.%s%c", length_modifier, formatting); else - iret = snprintf(fmt, sizeof(fmt), "%%.%d%s%c", precision, length_modifier, to_c_fmt(formatting)); + iret = snprintf(fmt, sizeof(fmt), "%%.%d%s%c", precision, length_modifier, formatting); C4_ASSERT(iret >= 2 && size_t(iret) < sizeof(fmt)); C4_UNUSED(iret); } @@ -11866,8 +12357,10 @@ size_t print_one(substr str, const char* full_fmt, T v) return ret; #endif } +#endif // (!C4CORE_HAVE_STD_FROMCHARS) + -#if !C4CORE_HAVE_STD_FROMCHARS && !defined(C4CORE_HAVE_FAST_FLOAT) +#if (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT) /** scans a string using the given type format, while at the same time * allowing non-null-terminated strings AND guaranteeing that the given * string length is strictly respected, so that no buffer overflows @@ -11904,24 +12397,28 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v) C4_ASSERT(num_chars >= 0); return (size_t)(num_chars); } -#endif +#endif // (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT) #if C4CORE_HAVE_STD_TOCHARS template -size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) +C4_ALWAYS_INLINE size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept { std::to_chars_result result; size_t pos = 0; if(formatting == FTOA_HEXA) { - _c4append('0'); - _c4append('x'); + if(buf.len > size_t(2)) + { + buf.str[0] = '0'; + buf.str[1] = 'x'; + } + pos += size_t(2); } if(precision == -1) - result = std::to_chars(buf.str + pos, buf.str + buf.len, v, to_std_fmt(formatting)); + result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting); else - result = std::to_chars(buf.str + pos, buf.str + buf.len, v, to_std_fmt(formatting), precision); + result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting, precision); if(result.ec == std::errc()) { // all good, no errors. @@ -11945,6 +12442,85 @@ size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX } #endif // C4CORE_HAVE_STD_TOCHARS + +#if C4CORE_HAVE_FAST_FLOAT +template +C4_ALWAYS_INLINE bool scan_rhex(csubstr s, T *C4_RESTRICT val) noexcept +{ + C4_ASSERT(s.len > 0); + C4_ASSERT(s.str[0] != '-'); + C4_ASSERT(s.str[0] != '+'); + C4_ASSERT(!s.begins_with("0x")); + C4_ASSERT(!s.begins_with("0X")); + size_t pos = 0; + // integer part + for( ; pos < s.len; ++pos) + { + const char c = s.str[pos]; + if(c >= '0' && c <= '9') + *val = *val * T(16) + T(c - '0'); + else if(c >= 'a' && c <= 'f') + *val = *val * T(16) + T(c - 'a'); + else if(c >= 'A' && c <= 'F') + *val = *val * T(16) + T(c - 'A'); + else if(c == '.') + { + ++pos; + break; // follow on to mantissa + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power; // no mantissa given, jump to power + } + else + { + return false; + } + } + // mantissa + { + // 0.0625 == 1/16 == value of first digit after the comma + for(T digit = T(0.0625); pos < s.len; ++pos, digit /= T(16)) + { + const char c = s.str[pos]; + if(c >= '0' && c <= '9') + *val += digit * T(c - '0'); + else if(c >= 'a' && c <= 'f') + *val += digit * T(c - 'a'); + else if(c >= 'A' && c <= 'F') + *val += digit * T(c - 'A'); + else if(c == 'p' || c == 'P') + { + ++pos; + goto power; // mantissa finished, jump to power + } + else + { + return false; + } + } + } + return true; +power: + if(C4_LIKELY(pos < s.len)) + { + if(s.str[pos] == '+') // atoi() cannot handle a leading '+' + ++pos; + if(C4_LIKELY(pos < s.len)) + { + int16_t powval = {}; + if(C4_LIKELY(atoi(s.sub(pos), &powval))) + { + *val *= ipow(powval); + return true; + } + } + } + return false; +} +#endif + } // namespace detail @@ -11952,11 +12528,15 @@ size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX #undef _c4append -/** Convert a single-precision real number to string. - * The string will in general be NOT null-terminated. - * For FTOA_FLEX, \p precision is the number of significand digits. Otherwise - * \p precision is the number of decimals. */ -inline size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) +/** Convert a single-precision real number to string. The string will + * in general be NOT null-terminated. For FTOA_FLEX, \p precision is + * the number of significand digits. Otherwise \p precision is the + * number of decimals. It is safe to call this function with an empty + * or too-small buffer. + * + * @return the size of the buffer needed to write the number + */ +C4_ALWAYS_INLINE size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept { #if C4CORE_HAVE_STD_TOCHARS return detail::rtoa(str, v, precision, formatting); @@ -11968,14 +12548,15 @@ inline size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formattin } -/** Convert a double-precision real number to string. - * The string will in general be NOT null-terminated. - * For FTOA_FLEX, \p precision is the number of significand digits. Otherwise - * \p precision is the number of decimals. +/** Convert a double-precision real number to string. The string will + * in general be NOT null-terminated. For FTOA_FLEX, \p precision is + * the number of significand digits. Otherwise \p precision is the + * number of decimals. It is safe to call this function with an empty + * or too-small buffer. * - * @return the number of characters written. + * @return the size of the buffer needed to write the number */ -inline size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) +C4_ALWAYS_INLINE size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept { #if C4CORE_HAVE_STD_TOCHARS return detail::rtoa(str, v, precision, formatting); @@ -11993,20 +12574,36 @@ inline size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatti * @return true iff the conversion succeeded * @see atof_first() if the string is not trimmed */ -inline bool atof(csubstr str, float * C4_RESTRICT v) noexcept +C4_ALWAYS_INLINE bool atof(csubstr str, float * C4_RESTRICT v) noexcept { + C4_ASSERT(str.len > 0); C4_ASSERT(str.triml(" \r\t\n").len == str.len); #if C4CORE_HAVE_FAST_FLOAT - fast_float::from_chars_result result; - result = fast_float::from_chars(str.str, str.str + str.len, *v); - return result.ec == std::errc(); + // fastfloat cannot parse hexadecimal floats + bool isneg = (str.str[0] == '-'); + csubstr rem = str.sub(isneg || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + { + fast_float::from_chars_result result; + result = fast_float::from_chars(str.str, str.str + str.len, *v); + return result.ec == std::errc(); + } + else if(detail::scan_rhex(rem.sub(2), v)) + { + *v *= isneg ? -1.f : 1.f; + return true; + } + return false; #elif C4CORE_HAVE_STD_FROMCHARS std::from_chars_result result; result = std::from_chars(str.str, str.str + str.len, *v); return result.ec == std::errc(); #else - size_t ret = detail::scan_one(str, "f", v); - return ret != csubstr::npos; + csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + return detail::scan_one(str, "f", v) != csubstr::npos; + else + return detail::scan_one(str, "a", v) != csubstr::npos; #endif } @@ -12017,20 +12614,35 @@ inline bool atof(csubstr str, float * C4_RESTRICT v) noexcept * @return true iff the conversion succeeded * @see atod_first() if the string is not trimmed */ -inline bool atod(csubstr str, double * C4_RESTRICT v) noexcept +C4_ALWAYS_INLINE bool atod(csubstr str, double * C4_RESTRICT v) noexcept { C4_ASSERT(str.triml(" \r\t\n").len == str.len); #if C4CORE_HAVE_FAST_FLOAT - fast_float::from_chars_result result; - result = fast_float::from_chars(str.str, str.str + str.len, *v); - return result.ec == std::errc(); + // fastfloat cannot parse hexadecimal floats + bool isneg = (str.str[0] == '-'); + csubstr rem = str.sub(isneg || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + { + fast_float::from_chars_result result; + result = fast_float::from_chars(str.str, str.str + str.len, *v); + return result.ec == std::errc(); + } + else if(detail::scan_rhex(rem.sub(2), v)) + { + *v *= isneg ? -1. : 1.; + return true; + } + return false; #elif C4CORE_HAVE_STD_FROMCHARS std::from_chars_result result; result = std::from_chars(str.str, str.str + str.len, *v); return result.ec == std::errc(); #else - size_t ret = detail::scan_one(str, "lf", v); - return ret != csubstr::npos; + csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + return detail::scan_one(str, "lf", v) != csubstr::npos; + else + return detail::scan_one(str, "la", v) != csubstr::npos; #endif } @@ -12099,6 +12711,9 @@ C4_ALWAYS_INLINE size_t xtoa(substr s, int16_t v, int16_t radix, size_t num_di C4_ALWAYS_INLINE size_t xtoa(substr s, int32_t v, int32_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); } C4_ALWAYS_INLINE size_t xtoa(substr s, int64_t v, int64_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, float v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return ftoa(s, v, precision, formatting); } +C4_ALWAYS_INLINE size_t xtoa(substr s, double v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return dtoa(s, v, precision, formatting); } + C4_ALWAYS_INLINE bool atox(csubstr s, uint8_t *C4_RESTRICT v) noexcept { return atou(s, v); } C4_ALWAYS_INLINE bool atox(csubstr s, uint16_t *C4_RESTRICT v) noexcept { return atou(s, v); } C4_ALWAYS_INLINE bool atox(csubstr s, uint32_t *C4_RESTRICT v) noexcept { return atou(s, v); } @@ -12295,7 +12910,15 @@ inline size_t to_chars(substr buf, csubstr v) noexcept { C4_ASSERT(!buf.overlaps(v)); size_t len = buf.len < v.len ? buf.len : v.len; - memcpy(buf.str, v.str, len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v.str != nullptr); + memcpy(buf.str, v.str, len); + } return v.len; } @@ -12322,20 +12945,36 @@ inline size_t to_chars(substr buf, substr v) noexcept { C4_ASSERT(!buf.overlaps(v)); size_t len = buf.len < v.len ? buf.len : v.len; - memcpy(buf.str, v.str, len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v.str != nullptr); + memcpy(buf.str, v.str, len); + } return v.len; } inline bool from_chars(csubstr buf, substr * C4_RESTRICT v) noexcept { C4_ASSERT(!buf.overlaps(*v)); - if(buf.len <= v->len) + // is the destination buffer wide enough? + if(v->len >= buf.len) { - memcpy(v->str, buf.str, buf.len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(buf.len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v->str != nullptr); + memcpy(v->str, buf.str, buf.len); + } v->len = buf.len; return true; } - memcpy(v->str, buf.str, v->len); return false; } @@ -12346,7 +12985,15 @@ inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept if(C4_UNLIKELY(trimmed.len == 0)) return csubstr::npos; size_t len = trimmed.len > v->len ? v->len : trimmed.len; - memcpy(v->str, trimmed.str, len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v->str != nullptr); + memcpy(v->str, trimmed.str, len); + } if(C4_UNLIKELY(trimmed.len > v->len)) return csubstr::npos; return static_cast(trimmed.end() - buf.begin()); @@ -12772,13 +13419,13 @@ using raw_wrapper = raw_wrapper_; /** mark a variable to be written in raw binary format, using memcpy * @see blob_ */ -inline const_raw_wrapper craw(cblob data, size_t alignment=alignof(max_align_t)) +inline const_raw_wrapper craw(cblob data, size_t alignment=alignof(std::max_align_t)) { return const_raw_wrapper(data, alignment); } /** mark a variable to be written in raw binary format, using memcpy * @see blob_ */ -inline const_raw_wrapper raw(cblob data, size_t alignment=alignof(max_align_t)) +inline const_raw_wrapper raw(cblob data, size_t alignment=alignof(std::max_align_t)) { return const_raw_wrapper(data, alignment); } @@ -12798,7 +13445,7 @@ inline const_raw_wrapper raw(T const& C4_RESTRICT data, size_t alignment=alignof } /** mark a variable to be read in raw binary format, using memcpy */ -inline raw_wrapper raw(blob data, size_t alignment=alignof(max_align_t)) +inline raw_wrapper raw(blob data, size_t alignment=alignof(std::max_align_t)) { return raw_wrapper(data, alignment); } @@ -15448,18 +16095,32 @@ namespace c4 { //----------------------------------------------------------------------------- -/** get a writeable view to an existing std::string */ -inline c4::substr to_substr(std::string &s) +/** get a writeable view to an existing std::string. + * When the string is empty, the returned view will be pointing + * at the character with value '\0', but the size will be zero. + * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at + */ +C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept { - char* data = ! s.empty() ? &s[0] : nullptr; - return c4::substr(data, s.size()); + #if C4_CPP < 11 + #error this function will do undefined behavior + #endif + // since c++11 it is legal to call s[s.size()]. + return c4::substr(&s[0], s.size()); } -/** get a readonly view to an existing std::string */ -inline c4::csubstr to_csubstr(std::string const& s) +/** get a readonly view to an existing std::string. + * When the string is empty, the returned view will be pointing + * at the character with value '\0', but the size will be zero. + * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at + */ +C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept { - const char* data = ! s.empty() ? &s[0] : nullptr; - return c4::csubstr(data, s.size()); + #if C4_CPP < 11 + #error this function will do undefined behavior + #endif + // since c++11 it is legal to call s[s.size()]. + return c4::csubstr(&s[0], s.size()); } //----------------------------------------------------------------------------- @@ -15485,7 +16146,15 @@ inline size_t to_chars(c4::substr buf, std::string const& s) { C4_ASSERT(!buf.overlaps(to_csubstr(s))); size_t len = buf.len < s.size() ? buf.len : s.size(); - memcpy(buf.str, s.data(), len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(s.data() != nullptr); + C4_ASSERT(buf.str != nullptr); + memcpy(buf.str, s.data(), len); + } return s.size(); // return the number of needed chars } @@ -15494,7 +16163,14 @@ inline bool from_chars(c4::csubstr buf, std::string * s) { s->resize(buf.len); C4_ASSERT(!buf.overlaps(to_csubstr(*s))); - memcpy(&(*s)[0], buf.str, buf.len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(buf.len) + { + C4_ASSERT(buf.str != nullptr); + memcpy(&(*s)[0], buf.str, buf.len); + } return true; } @@ -15579,7 +16255,13 @@ inline size_t to_chars(c4::substr buf, std::vector const& s) { C4_ASSERT(!buf.overlaps(to_csubstr(s))); size_t len = buf.len < s.size() ? buf.len : s.size(); - memcpy(buf.str, s.data(), len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len > 0) + { + memcpy(buf.str, s.data(), len); + } return s.size(); // return the number of needed chars } @@ -15589,7 +16271,13 @@ inline bool from_chars(c4::csubstr buf, std::vector * s) { s->resize(buf.len); C4_ASSERT(!buf.overlaps(to_csubstr(*s))); - memcpy(&(*s)[0], buf.str, buf.len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(buf.len > 0) + { + memcpy(&(*s)[0], buf.str, buf.len); + } return true; } @@ -16515,27 +17203,6 @@ bool from_chars(csubstr buf, fmt::raw_wrapper *r) namespace c4 { -/** returns true if the memory overlaps */ -bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb) -{ - if(a < b) - { - if(size_t(a) + sza > size_t(b)) - return true; - } - else if(a > b) - { - if(size_t(b) + szb > size_t(a)) - return true; - } - else if(a == b) - { - if(sza != 0 && szb != 0) - return true; - } - return false; -} - /** Fills 'dest' with the first 'pattern_size' bytes at 'pattern', 'num_times'. */ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times) { @@ -16802,7 +17469,7 @@ void detail::_MemoryResourceSingleChunk::acquire(size_t sz) { clear(); m_owner = true; - m_mem = (char*) impl_type::allocate(sz, alignof(max_align_t)); + m_mem = (char*) impl_type::allocate(sz, alignof(std::max_align_t)); m_size = sz; m_pos = 0; } @@ -17841,11 +18508,24 @@ bool is_debugger_attached() #endif +#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK) +# define RYML_DEBUG_BREAK() +#else +# define RYML_DEBUG_BREAK() \ + { \ + if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \ + { \ + C4_DEBUG_BREAK(); \ + } \ + } +#endif + + #define RYML_CHECK(cond) \ do { \ if(!(cond)) \ { \ - C4_DEBUG_BREAK(); \ + RYML_DEBUG_BREAK() \ c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ } \ } while(0) @@ -17855,7 +18535,7 @@ bool is_debugger_attached() { \ if(!(cond)) \ { \ - C4_DEBUG_BREAK(); \ + RYML_DEBUG_BREAK() \ c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ } \ } while(0) @@ -17865,9 +18545,9 @@ bool is_debugger_attached() # define RYML_DEPRECATED(msg) [[deprecated(msg)]] #else # if defined(_MSC_VER) -# define RYML_DEPRECATED(msg) __declspec(deprecated) +# define RYML_DEPRECATED(msg) __declspec(deprecated(msg)) # else // defined(__GNUC__) || defined(__clang__) -# define RYML_DEPRECATED(msg) __attribute__((deprecated)) +# define RYML_DEPRECATED(msg) __attribute__((deprecated(msg))) # endif #endif @@ -18003,7 +18683,7 @@ RYML_EXPORT void reset_callbacks(); do \ { \ const char msg[] = msg_literal; \ - C4_DEBUG_BREAK(); \ + RYML_DEBUG_BREAK() \ (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ } while(0) #define _RYML_CB_CHECK(cb, cond) \ @@ -18012,7 +18692,7 @@ do \ if(!(cond)) \ { \ const char msg[] = "check failed: " #cond; \ - C4_DEBUG_BREAK(); \ + RYML_DEBUG_BREAK() \ (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ } \ } while(0) @@ -18152,6 +18832,7 @@ struct NodeScalar; struct NodeInit; struct NodeData; class NodeRef; +class ConstNodeRef; class Tree; @@ -18291,6 +18972,8 @@ typedef enum : type_bits { DOCMAP = DOC|MAP, DOCSEQ = DOC|SEQ, DOCVAL = DOC|VAL, + _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG, + _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG, // these flags are from a work in progress and should not be used yet _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}') _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}') @@ -18330,9 +19013,6 @@ struct NodeType public: - C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } - C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } - C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {} C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {} C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {} @@ -18351,6 +19031,14 @@ struct NodeType C4_ALWAYS_INLINE void clear() { type = NOTYPE; } +public: + + C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } + C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } + + C4_ALWAYS_INLINE bool operator== (NodeType_e t) const { return type == t; } + C4_ALWAYS_INLINE bool operator!= (NodeType_e t) const { return type != t; } + public: #if defined(__clang__) @@ -18363,6 +19051,7 @@ struct NodeType # endif #endif + C4_ALWAYS_INLINE bool is_notype() const { return type == NOTYPE; } C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; } C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; } C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; } @@ -18591,14 +19280,10 @@ class RYML_EXPORT Tree inline bool empty() const { return m_size == 0; } - inline size_t size () const { return m_size; } + inline size_t size() const { return m_size; } inline size_t capacity() const { return m_cap; } inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } - inline size_t arena_size() const { return m_arena_pos; } - inline size_t arena_capacity() const { return m_arena.len; } - inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } - Callbacks const& callbacks() const { return m_callbacks; } void callbacks(Callbacks const& cb) { m_callbacks = cb; } @@ -18653,35 +19338,43 @@ class RYML_EXPORT Tree size_t root_id() const { RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } //! Get a NodeRef of a node by id - NodeRef ref(size_t id); + NodeRef ref(size_t id); + //! Get a NodeRef of a node by id + ConstNodeRef ref(size_t id) const; //! Get a NodeRef of a node by id - NodeRef const ref(size_t id) const; + ConstNodeRef cref(size_t id); + //! Get a NodeRef of a node by id + ConstNodeRef cref(size_t id) const; //! Get the root as a NodeRef - NodeRef rootref(); + NodeRef rootref(); + //! Get the root as a NodeRef + ConstNodeRef rootref() const; //! Get the root as a NodeRef - NodeRef const rootref() const; + ConstNodeRef crootref(); + //! Get the root as a NodeRef + ConstNodeRef crootref() const; //! find a root child by name, return it as a NodeRef //! @note requires the root to be a map. - NodeRef operator[] (csubstr key); + NodeRef operator[] (csubstr key); //! find a root child by name, return it as a NodeRef //! @note requires the root to be a map. - NodeRef const operator[] (csubstr key) const; + ConstNodeRef operator[] (csubstr key) const; //! find a root child by index: return the root node's @p i-th child as a NodeRef //! @note @i is NOT the node id, but the child's position - NodeRef operator[] (size_t i); + NodeRef operator[] (size_t i); //! find a root child by index: return the root node's @p i-th child as a NodeRef //! @note @i is NOT the node id, but the child's position - NodeRef const operator[] (size_t i) const; + ConstNodeRef operator[] (size_t i) const; //! get the i-th document of the stream //! @note @i is NOT the node id, but the doc position within the stream - NodeRef docref(size_t i); + NodeRef docref(size_t i); //! get the i-th document of the stream //! @note @i is NOT the node id, but the doc position within the stream - NodeRef const docref(size_t i) const; + ConstNodeRef docref(size_t i) const; /** @} */ @@ -18705,14 +19398,11 @@ class RYML_EXPORT Tree csubstr const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; } NodeScalar const& valsc (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; } - bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); if(is_key_quoted(node)) return false; csubstr s = _p(node)->m_key.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; } - bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); if(is_val_quoted(node)) return false; csubstr s = _p(node)->m_val.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; } - /** @} */ public: - /** @name node type predicates */ + /** @name node predicates */ /** @{ */ C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); } @@ -18744,9 +19434,20 @@ class RYML_EXPORT Tree C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); } /** true when key and val are empty, and has no children */ - bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } + C4_ALWAYS_INLINE bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } /** true when the node has an anchor named a */ - bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } + C4_ALWAYS_INLINE bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } + + C4_ALWAYS_INLINE bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && _is_null(n->m_key.scalar); } + C4_ALWAYS_INLINE bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && _is_null(n->m_val.scalar); } + static bool _is_null(csubstr s) noexcept + { + return s.str == nullptr || + s == "~" || + s == "null" || + s == "Null" || + s == "NULL"; + } /** @} */ @@ -18759,16 +19460,30 @@ class RYML_EXPORT Tree bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; } + /** true if @p node has a child with id @p ch */ + bool has_child(size_t node, size_t ch) const { return _p(ch)->m_parent == node; } + /** true if @p node has a child with key @p key */ bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; } - bool has_child(size_t node, size_t ch) const { return child_pos(node, ch) != npos; } + /** true if @p node has any children key */ bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; } - bool has_sibling(size_t node, size_t sib) const { return is_root(node) ? sib==node : child_pos(_p(node)->m_parent, sib) != npos; } + /** true if @p node has a sibling with id @p sib */ + bool has_sibling(size_t node, size_t sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } + /** true if one of the node's siblings has the given key */ bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; } - /** counts with *this */ - bool has_siblings(size_t /*node*/) const { return true; } - /** does not count with *this */ - bool has_other_siblings(size_t node) const { return is_root(node) ? false : (_p(_p(node)->m_parent)->m_first_child != _p(_p(node)->m_parent)->m_last_child); } + /** true if node is not a single child */ + bool has_other_siblings(size_t node) const + { + NodeData const *n = _p(node); + if(C4_LIKELY(n->m_parent != NONE)) + { + n = _p(n->m_parent); + return n->m_first_child != n->m_last_child; + } + return false; + } + + RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(size_t /*node*/) const { return true; } /** @} */ @@ -18909,20 +19624,22 @@ class RYML_EXPORT Tree /** @name modifying hierarchy */ /** @{ */ - /** create and insert a new child of "parent". insert after the (to-be) - * sibling "after", which must be a child of "parent". To insert as the + /** create and insert a new child of @p parent. insert after the (to-be) + * sibling @p after, which must be a child of @p parent. To insert as the * first child, set after to NONE */ - inline size_t insert_child(size_t parent, size_t after) + C4_ALWAYS_INLINE size_t insert_child(size_t parent, size_t after) { RYML_ASSERT(parent != NONE); RYML_ASSERT(is_container(parent) || is_root(parent)); - RYML_ASSERT(after == NONE || has_child(parent, after)); + RYML_ASSERT(after == NONE || (_p(after)->m_parent == parent)); size_t child = _claim(); _set_hierarchy(child, parent, after); return child; } - inline size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } - inline size_t append_child(size_t parent) { return insert_child(parent, last_child(parent)); } + /** create and insert a node as the first child of @p parent */ + C4_ALWAYS_INLINE size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } + /** create and insert a node as the last child of @p parent */ + C4_ALWAYS_INLINE size_t append_child(size_t parent) { return insert_child(parent, _p(parent)->m_last_child); } public: @@ -18937,17 +19654,13 @@ class RYML_EXPORT Tree #endif //! create and insert a new sibling of n. insert after "after" - inline size_t insert_sibling(size_t node, size_t after) + C4_ALWAYS_INLINE size_t insert_sibling(size_t node, size_t after) { - RYML_ASSERT(node != NONE); - RYML_ASSERT( ! is_root(node)); - RYML_ASSERT(parent(node) != NONE); - RYML_ASSERT(after == NONE || (has_sibling(node, after) && has_sibling(after, node))); - RYML_ASSERT(get(node) != nullptr); - return insert_child(get(node)->m_parent, after); + return insert_child(_p(node)->m_parent, after); } - inline size_t prepend_sibling(size_t node) { return insert_sibling(node, NONE); } - inline size_t append_sibling(size_t node) { return insert_sibling(node, last_sibling(node)); } + /** create and insert a node as the first node of @p parent */ + C4_ALWAYS_INLINE size_t prepend_sibling(size_t node) { return prepend_child(_p(node)->m_parent); } + C4_ALWAYS_INLINE size_t append_sibling(size_t node) { return append_child(_p(node)->m_parent); } public: @@ -19060,7 +19773,13 @@ class RYML_EXPORT Tree /** @{ */ /** get the current size of the tree's internal arena */ - size_t arena_pos() const { return m_arena_pos; } + RYML_DEPRECATED("use arena_size() instead") size_t arena_pos() const { return m_arena_pos; } + /** get the current size of the tree's internal arena */ + inline size_t arena_size() const { return m_arena_pos; } + /** get the current capacity of the tree's internal arena */ + inline size_t arena_capacity() const { return m_arena.len; } + /** get the current slack of the tree's internal arena */ + inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } /** get the current arena */ substr arena() const { return m_arena.first(m_arena_pos); } @@ -19071,51 +19790,117 @@ class RYML_EXPORT Tree return m_arena.is_super(s); } - /** serialize the given non-floating-point variable to the tree's arena, growing it as - * needed to accomodate the serialization. + /** serialize the given floating-point variable to the tree's + * arena, growing it as needed to accomodate the serialization. + * * @note Growing the arena may cause relocation of the entire - * existing arena, and thus change the contents of individual nodes. + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * * @see alloc_arena() */ template - typename std::enable_if::value, csubstr>::type + typename std::enable_if::value, csubstr>::type to_arena(T const& C4_RESTRICT a) { substr rem(m_arena.sub(m_arena_pos)); - size_t num = to_chars(rem, a); + size_t num = to_chars_float(rem, a); if(num > rem.len) { rem = _grow_arena(num); - num = to_chars(rem, a); + num = to_chars_float(rem, a); RYML_ASSERT(num <= rem.len); } rem = _request_span(num); return rem; } - /** serialize the given floating-point variable to the tree's arena, growing it as - * needed to accomodate the serialization. + /** serialize the given non-floating-point variable to the tree's + * arena, growing it as needed to accomodate the serialization. + * * @note Growing the arena may cause relocation of the entire - * existing arena, and thus change the contents of individual nodes. + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * * @see alloc_arena() */ template - typename std::enable_if::value, csubstr>::type + typename std::enable_if::value, csubstr>::type to_arena(T const& C4_RESTRICT a) { substr rem(m_arena.sub(m_arena_pos)); - size_t num = to_chars_float(rem, a); + size_t num = to_chars(rem, a); if(num > rem.len) { rem = _grow_arena(num); - num = to_chars_float(rem, a); + num = to_chars(rem, a); RYML_ASSERT(num <= rem.len); } rem = _request_span(num); return rem; } - /** copy the given substr to the tree's arena, growing it by the required size + /** serialize the given csubstr to the tree's arena, growing the + * arena as needed to accomodate the serialization. + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + csubstr to_arena(csubstr a) + { + if(a.len > 0) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars(rem, a); + RYML_ASSERT(num <= rem.len); + } + return _request_span(num); + } + else + { + if(a.str == nullptr) + { + return csubstr{}; + } + else if(m_arena.str == nullptr) + { + // Arena is empty and we want to store a non-null + // zero-length string. + // Even though the string has zero length, we need + // some "memory" to store a non-nullptr string + _grow_arena(1); + } + return _request_span(0); + } + } + C4_ALWAYS_INLINE csubstr to_arena(const char *s) + { + return to_arena(to_csubstr(s)); + } + C4_ALWAYS_INLINE csubstr to_arena(std::nullptr_t) + { + return csubstr{}; + } + + /** copy the given substr to the tree's arena, growing it by the + * required size + * * @note Growing the arena may cause relocation of the entire - * existing arena, and thus change the contents of individual nodes. + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * * @see alloc_arena() */ substr copy_to_arena(csubstr s) { @@ -19127,7 +19912,8 @@ class RYML_EXPORT Tree C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0 C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior #endif - memcpy(cp.str, s.str, s.len); + if(s.len) + memcpy(cp.str, s.str, s.len); #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) C4_SUPPRESS_WARNING_GCC_POP #endif @@ -19136,8 +19922,14 @@ class RYML_EXPORT Tree /** grow the tree's string arena by the given size and return a substr * of the added portion + * * @note Growing the arena may cause relocation of the entire - * existing arena, and thus change the contents of individual nodes. */ + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena(). + * + * @see reserve_arena() */ substr alloc_arena(size_t sz) { if(sz > arena_slack()) @@ -19147,7 +19939,8 @@ class RYML_EXPORT Tree } /** ensure the tree's internal string arena is at least the given capacity - * @note Growing the arena may cause relocation of the entire + * @note This operation has a potential complexity of O(numNodes)+O(arenasize). + * Growing the arena may cause relocation of the entire * existing arena, and thus change the contents of individual nodes. */ void reserve_arena(size_t arena_cap) { @@ -19172,7 +19965,7 @@ class RYML_EXPORT Tree substr _grow_arena(size_t more) { - size_t cap = m_arena_pos + more; + size_t cap = m_arena.len + more; cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap; cap = cap < 64 ? 64 : cap; reserve_arena(cap); @@ -19407,21 +20200,14 @@ class RYML_EXPORT Tree void _swap_hierarchy(size_t n_, size_t m_); void _copy_hierarchy(size_t dst_, size_t src_); - void _copy_props(size_t dst_, size_t src_) + inline void _copy_props(size_t dst_, size_t src_) { - auto & C4_RESTRICT dst = *_p(dst_); - auto const& C4_RESTRICT src = *_p(src_); - dst.m_type = src.m_type; - dst.m_key = src.m_key; - dst.m_val = src.m_val; + _copy_props(dst_, this, src_); } - void _copy_props_wo_key(size_t dst_, size_t src_) + inline void _copy_props_wo_key(size_t dst_, size_t src_) { - auto & C4_RESTRICT dst = *_p(dst_); - auto const& C4_RESTRICT src = *_p(src_); - dst.m_type = src.m_type; - dst.m_val = src.m_val; + _copy_props_wo_key(dst_, this, src_); } void _copy_props(size_t dst_, Tree const* that_tree, size_t src_) @@ -19437,7 +20223,7 @@ class RYML_EXPORT Tree { auto & C4_RESTRICT dst = *_p(dst_); auto const& C4_RESTRICT src = *that_tree->_p(src_); - dst.m_type = src.m_type; + dst.m_type = (src.m_type & ~_KEYMASK) | (dst.m_type & _KEYMASK); dst.m_val = src.m_val; } @@ -19465,7 +20251,7 @@ class RYML_EXPORT Tree inline void _clear_val(size_t node) { - _p(node)->m_key.clear(); + _p(node)->m_val.clear(); _rem_flags(node, VAL); } @@ -19586,110 +20372,150 @@ read(NodeRef const& n, T *v); //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -/** a reference to a node in an existing yaml tree, offering a more - * convenient API than the index-based API used in the tree. */ -class RYML_EXPORT NodeRef -{ -private: - - // require valid: a helper macro, undefined at the end - #define _C4RV() RYML_ASSERT(valid() && !is_seed()) - - Tree *C4_RESTRICT m_tree; - size_t m_id; - - /** This member is used to enable lazy operator[] writing. When a child - * with a key or index is not found, m_id is set to the id of the parent - * and the asked-for key or index are stored in this member until a write - * does happen. Then it is given as key or index for creating the child. - * When a key is used, the csubstr stores it (so the csubstr's string is - * non-null and the csubstr's size is different from NONE). When an index is - * used instead, the csubstr's string is set to null, and only the csubstr's - * size is set to a value different from NONE. Otherwise, when operator[] - * does find the child then this member is empty: the string is null and - * the size is NONE. */ - csubstr m_seed; +// forward decls +class NodeRef; +class ConstNodeRef; -public: +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- - /** @name node construction */ - /** @{ */ +namespace detail { - NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } - NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } - NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } - NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); } - NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; } - NodeRef(Tree *t, size_t id, csubstr seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {} - NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {} +template +struct child_iterator +{ + using value_type = NodeRefType; + using tree_type = typename NodeRefType::tree_type; - NodeRef(NodeRef const&) = default; - NodeRef(NodeRef &&) = default; + tree_type * C4_RESTRICT m_tree; + size_t m_child_id; - NodeRef& operator= (NodeRef const&) = default; - NodeRef& operator= (NodeRef &&) = default; + child_iterator(tree_type * t, size_t id) : m_tree(t), m_child_id(id) {} - /** @} */ + child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; } + child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; } -public: + NodeRefType operator* () const { return NodeRefType(m_tree, m_child_id); } + NodeRefType operator-> () const { return NodeRefType(m_tree, m_child_id); } - inline Tree * tree() { return m_tree; } - inline Tree const* tree() const { return m_tree; } + bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; } + bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; } +}; - inline size_t id() const { return m_id; } +template +struct children_view_ +{ + using n_iterator = child_iterator; - inline NodeData * get() { return m_tree->get(m_id); } - inline NodeData const* get() const { return m_tree->get(m_id); } + n_iterator b, e; - inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } - inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); } + inline children_view_(n_iterator const& C4_RESTRICT b_, + n_iterator const& C4_RESTRICT e_) : b(b_), e(e_) {} - inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } - inline bool operator!= (std::nullptr_t) const { return ! this->operator== (nullptr); } + inline n_iterator begin() const { return b; } + inline n_iterator end () const { return e; } +}; - inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } - inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } +template +bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +{ + size_t increment = 0; + if( ! (node.is_root() && skip_root)) + { + if(fn(node, indentation_level)) + return true; + ++increment; + } + if(node.has_children()) + { + for(auto ch : node.children()) + { + if(_visit(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root + { + return true; + } + } + } + return false; +} - //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); } +template +bool _visit_stacked(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +{ + size_t increment = 0; + if( ! (node.is_root() && skip_root)) + { + if(fn(node, indentation_level)) + { + return true; + } + ++increment; + } + if(node.has_children()) + { + fn.push(node, indentation_level); + for(auto ch : node.children()) + { + if(_visit_stacked(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root + { + fn.pop(node, indentation_level); + return true; + } + } + fn.pop(node, indentation_level); + } + return false; +} -public: - inline bool valid() const { return m_tree != nullptr && m_id != NONE; } - inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; } +//----------------------------------------------------------------------------- - inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; } +/** a CRTP base for read-only node methods */ +template +struct RoNodeMethods +{ + C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wcast-align") + // helper CRTP macros, undefined at the end + #define tree_ ((ConstImpl const* C4_RESTRICT)this)->m_tree + #define id_ ((ConstImpl const* C4_RESTRICT)this)->m_id + #define tree__ ((Impl const* C4_RESTRICT)this)->m_tree + #define id__ ((Impl const* C4_RESTRICT)this)->m_id + // require valid + #define _C4RV() \ + RYML_ASSERT(tree_ != nullptr); \ + _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE) + #define _C4_IF_MUTABLE(ty) typename std::enable_if::value, ty>::type public: /** @name node property getters */ /** @{ */ - inline NodeType type() const { _C4RV(); return m_tree->type(m_id); } - inline const char* type_str() const { _C4RV(); RYML_ASSERT(valid() && ! is_seed()); return m_tree->type_str(m_id); } + /** returns the data or null when the id is NONE */ + C4_ALWAYS_INLINE C4_PURE NodeData const* get() const noexcept { RYML_ASSERT(tree_ != nullptr); return tree_->get(id_); } + /** returns the data or null when the id is NONE */ + template + C4_ALWAYS_INLINE C4_PURE auto get() noexcept -> _C4_IF_MUTABLE(NodeData*) { RYML_ASSERT(tree_ != nullptr); return tree__->get(id__); } - inline csubstr key() const { _C4RV(); return m_tree->key(m_id); } - inline csubstr key_tag() const { _C4RV(); return m_tree->key_tag(m_id); } - inline csubstr key_ref() const { _C4RV(); return m_tree->key_ref(m_id); } - inline csubstr key_anchor() const { _C4RV(); return m_tree->key_anchor(m_id); } - inline NodeScalar keysc() const { _C4RV(); return m_tree->keysc(m_id); } + C4_ALWAYS_INLINE C4_PURE NodeType type() const noexcept { _C4RV(); return tree_->type(id_); } + C4_ALWAYS_INLINE C4_PURE const char* type_str() const noexcept { return tree_->type_str(id_); } - inline csubstr val() const { _C4RV(); return m_tree->val(m_id); } - inline csubstr val_tag() const { _C4RV(); return m_tree->val_tag(m_id); } - inline csubstr val_ref() const { _C4RV(); return m_tree->val_ref(m_id); } - inline csubstr val_anchor() const { _C4RV(); return m_tree->val_anchor(m_id); } - inline NodeScalar valsc() const { _C4RV(); return m_tree->valsc(m_id); } + C4_ALWAYS_INLINE C4_PURE csubstr key() const noexcept { _C4RV(); return tree_->key(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_tag() const noexcept { _C4RV(); return tree_->key_tag(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_ref() const noexcept { _C4RV(); return tree_->key_ref(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_anchor() const noexcept { _C4RV(); return tree_->key_anchor(id_); } - inline bool key_is_null() const { _C4RV(); return m_tree->key_is_null(m_id); } - inline bool val_is_null() const { _C4RV(); return m_tree->val_is_null(m_id); } + C4_ALWAYS_INLINE C4_PURE csubstr val() const noexcept { _C4RV(); return tree_->val(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_tag() const noexcept { _C4RV(); return tree_->val_tag(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_ref() const noexcept { _C4RV(); return tree_->val_ref(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_anchor() const noexcept { _C4RV(); return tree_->val_anchor(id_); } - /** decode the base64-encoded key deserialize and assign the - * decoded blob to the given buffer/ - * @return the size of base64-decoded blob */ - size_t deserialize_key(fmt::base64_wrapper v) const; - /** decode the base64-encoded key deserialize and assign the - * decoded blob to the given buffer/ - * @return the size of base64-decoded blob */ - size_t deserialize_val(fmt::base64_wrapper v) const; + C4_ALWAYS_INLINE C4_PURE NodeScalar const& keysc() const noexcept { _C4RV(); return tree_->keysc(id_); } + C4_ALWAYS_INLINE C4_PURE NodeScalar const& valsc() const noexcept { _C4RV(); return tree_->valsc(id_); } + + C4_ALWAYS_INLINE C4_PURE bool key_is_null() const noexcept { _C4RV(); return tree_->key_is_null(id_); } + C4_ALWAYS_INLINE C4_PURE bool val_is_null() const noexcept { _C4RV(); return tree_->val_is_null(id_); } /** @} */ @@ -19698,36 +20524,33 @@ class RYML_EXPORT NodeRef /** @name node property predicates */ /** @{ */ - C4_ALWAYS_INLINE bool is_stream() const { _C4RV(); return m_tree->is_stream(m_id); } - C4_ALWAYS_INLINE bool is_doc() const { _C4RV(); return m_tree->is_doc(m_id); } - C4_ALWAYS_INLINE bool is_container() const { _C4RV(); return m_tree->is_container(m_id); } - C4_ALWAYS_INLINE bool is_map() const { _C4RV(); return m_tree->is_map(m_id); } - C4_ALWAYS_INLINE bool is_seq() const { _C4RV(); return m_tree->is_seq(m_id); } - C4_ALWAYS_INLINE bool has_val() const { _C4RV(); return m_tree->has_val(m_id); } - C4_ALWAYS_INLINE bool has_key() const { _C4RV(); return m_tree->has_key(m_id); } - C4_ALWAYS_INLINE bool is_val() const { _C4RV(); return m_tree->is_val(m_id); } - C4_ALWAYS_INLINE bool is_keyval() const { _C4RV(); return m_tree->is_keyval(m_id); } - C4_ALWAYS_INLINE bool has_key_tag() const { _C4RV(); return m_tree->has_key_tag(m_id); } - C4_ALWAYS_INLINE bool has_val_tag() const { _C4RV(); return m_tree->has_val_tag(m_id); } - C4_ALWAYS_INLINE bool has_key_anchor() const { _C4RV(); return m_tree->has_key_anchor(m_id); } - C4_ALWAYS_INLINE bool is_key_anchor() const { _C4RV(); return m_tree->is_key_anchor(m_id); } - C4_ALWAYS_INLINE bool has_val_anchor() const { _C4RV(); return m_tree->has_val_anchor(m_id); } - C4_ALWAYS_INLINE bool is_val_anchor() const { _C4RV(); return m_tree->is_val_anchor(m_id); } - C4_ALWAYS_INLINE bool has_anchor() const { _C4RV(); return m_tree->has_anchor(m_id); } - C4_ALWAYS_INLINE bool is_anchor() const { _C4RV(); return m_tree->is_anchor(m_id); } - C4_ALWAYS_INLINE bool is_key_ref() const { _C4RV(); return m_tree->is_key_ref(m_id); } - C4_ALWAYS_INLINE bool is_val_ref() const { _C4RV(); return m_tree->is_val_ref(m_id); } - C4_ALWAYS_INLINE bool is_ref() const { _C4RV(); return m_tree->is_ref(m_id); } - C4_ALWAYS_INLINE bool is_anchor_or_ref() const { _C4RV(); return m_tree->is_anchor_or_ref(m_id); } - C4_ALWAYS_INLINE bool is_key_quoted() const { _C4RV(); return m_tree->is_key_quoted(m_id); } - C4_ALWAYS_INLINE bool is_val_quoted() const { _C4RV(); return m_tree->is_val_quoted(m_id); } - C4_ALWAYS_INLINE bool is_quoted() const { _C4RV(); return m_tree->is_quoted(m_id); } - - C4_ALWAYS_INLINE bool parent_is_seq() const { _C4RV(); return m_tree->parent_is_seq(m_id); } - C4_ALWAYS_INLINE bool parent_is_map() const { _C4RV(); return m_tree->parent_is_map(m_id); } - - /** true when name and value are empty, and has no children */ - C4_ALWAYS_INLINE bool empty() const { _C4RV(); return m_tree->empty(m_id); } + C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { _C4RV(); return tree_->empty(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_stream() const noexcept { _C4RV(); return tree_->is_stream(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_doc() const noexcept { _C4RV(); return tree_->is_doc(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_container() const noexcept { _C4RV(); return tree_->is_container(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_map() const noexcept { _C4RV(); return tree_->is_map(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_seq() const noexcept { _C4RV(); return tree_->is_seq(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val() const noexcept { _C4RV(); return tree_->has_val(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key() const noexcept { _C4RV(); return tree_->has_key(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val() const noexcept { _C4RV(); return tree_->is_val(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_keyval() const noexcept { _C4RV(); return tree_->is_keyval(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key_tag() const noexcept { _C4RV(); return tree_->has_key_tag(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val_tag() const noexcept { _C4RV(); return tree_->has_val_tag(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key_anchor() const noexcept { _C4RV(); return tree_->has_key_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_anchor() const noexcept { _C4RV(); return tree_->is_key_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val_anchor() const noexcept { _C4RV(); return tree_->has_val_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_anchor() const noexcept { _C4RV(); return tree_->is_val_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_anchor() const noexcept { _C4RV(); return tree_->has_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_anchor() const noexcept { _C4RV(); return tree_->is_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_ref() const noexcept { _C4RV(); return tree_->is_key_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_ref() const noexcept { _C4RV(); return tree_->is_val_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_ref() const noexcept { _C4RV(); return tree_->is_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_anchor_or_ref() const noexcept { _C4RV(); return tree_->is_anchor_or_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_quoted() const noexcept { _C4RV(); return tree_->is_key_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_quoted() const noexcept { _C4RV(); return tree_->is_val_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_quoted() const noexcept { _C4RV(); return tree_->is_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool parent_is_seq() const noexcept { _C4RV(); return tree_->parent_is_seq(id_); } + C4_ALWAYS_INLINE C4_PURE bool parent_is_map() const noexcept { _C4RV(); return tree_->parent_is_map(id_); } /** @} */ @@ -19736,19 +20559,19 @@ class RYML_EXPORT NodeRef /** @name hierarchy predicates */ /** @{ */ - inline bool is_root() const { _C4RV(); return m_tree->is_root(m_id); } - inline bool has_parent() const { _C4RV(); return m_tree->has_parent(m_id); } + C4_ALWAYS_INLINE C4_PURE bool is_root() const noexcept { _C4RV(); return tree_->is_root(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_parent() const noexcept { _C4RV(); return tree_->has_parent(id_); } - inline bool has_child(NodeRef const& ch) const { _C4RV(); return m_tree->has_child(m_id, ch.m_id); } - inline bool has_child(csubstr name) const { _C4RV(); return m_tree->has_child(m_id, name); } - inline bool has_children() const { _C4RV(); return m_tree->has_children(m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_child(ConstImpl const& ch) const noexcept { _C4RV(); return tree_->has_child(id_, ch.m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_child(csubstr name) const noexcept { _C4RV(); return tree_->has_child(id_, name); } + C4_ALWAYS_INLINE C4_PURE bool has_children() const noexcept { _C4RV(); return tree_->has_children(id_); } - inline bool has_sibling(NodeRef const& n) const { _C4RV(); return m_tree->has_sibling(m_id, n.m_id); } - inline bool has_sibling(csubstr name) const { _C4RV(); return m_tree->has_sibling(m_id, name); } + C4_ALWAYS_INLINE C4_PURE bool has_sibling(ConstImpl const& n) const noexcept { _C4RV(); return tree_->has_sibling(id_, n.m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_sibling(csubstr name) const noexcept { _C4RV(); return tree_->has_sibling(id_, name); } /** counts with this */ - inline bool has_siblings() const { _C4RV(); return m_tree->has_siblings(m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_siblings() const noexcept { _C4RV(); return tree_->has_siblings(id_); } /** does not count with this */ - inline bool has_other_siblings() const { _C4RV(); return m_tree->has_other_siblings(m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_other_siblings() const noexcept { _C4RV(); return tree_->has_other_siblings(id_); } /** @} */ @@ -19757,42 +20580,497 @@ class RYML_EXPORT NodeRef /** @name hierarchy getters */ /** @{ */ - NodeRef parent() { _C4RV(); return {m_tree, m_tree->parent(m_id)}; } - NodeRef const parent() const { _C4RV(); return {m_tree, m_tree->parent(m_id)}; } - NodeRef prev_sibling() { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; } - NodeRef const prev_sibling() const { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; } + template + C4_ALWAYS_INLINE C4_PURE auto doc(size_t num) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->doc(num)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl doc(size_t num) const noexcept { _C4RV(); return {tree_, tree_->doc(num)}; } + + + template + C4_ALWAYS_INLINE C4_PURE auto parent() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->parent(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl parent() const noexcept { _C4RV(); return {tree_, tree_->parent(id_)}; } - NodeRef next_sibling() { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; } - NodeRef const next_sibling() const { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; } /** O(#num_children) */ - size_t num_children() const { _C4RV(); return m_tree->num_children(m_id); } - size_t child_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_id, n.m_id); } - NodeRef first_child() { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; } - NodeRef const first_child() const { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; } - NodeRef last_child () { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; } - NodeRef const last_child () const { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; } - NodeRef child(size_t pos) { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; } - NodeRef const child(size_t pos) const { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; } - NodeRef find_child(csubstr name) { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; } - NodeRef const find_child(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; } + C4_ALWAYS_INLINE C4_PURE size_t child_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(id_, n.m_id); } + C4_ALWAYS_INLINE C4_PURE size_t num_children() const noexcept { _C4RV(); return tree_->num_children(id_); } + + template + C4_ALWAYS_INLINE C4_PURE auto first_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_child(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl first_child() const noexcept { _C4RV(); return {tree_, tree_->first_child(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto last_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_child(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl last_child () const noexcept { _C4RV(); return {tree_, tree_->last_child (id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto child(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->child(id__, pos)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl child(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->child(id_, pos)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto find_child(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_child(id__, name)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl find_child(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_child(id_, name)}; } + /** O(#num_siblings) */ - size_t num_siblings() const { _C4RV(); return m_tree->num_siblings(m_id); } - size_t num_other_siblings() const { _C4RV(); return m_tree->num_other_siblings(m_id); } - size_t sibling_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_tree->parent(m_id), n.m_id); } - NodeRef first_sibling() { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; } - NodeRef const first_sibling() const { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; } - NodeRef last_sibling () { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; } - NodeRef const last_sibling () const { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; } - NodeRef sibling(size_t pos) { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; } - NodeRef const sibling(size_t pos) const { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; } - NodeRef find_sibling(csubstr name) { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; } - NodeRef const find_sibling(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; } - - NodeRef doc(size_t num) { _C4RV(); return {m_tree, m_tree->doc(num)}; } - NodeRef const doc(size_t num) const { _C4RV(); return {m_tree, m_tree->doc(num)}; } + C4_ALWAYS_INLINE C4_PURE size_t num_siblings() const noexcept { _C4RV(); return tree_->num_siblings(id_); } + C4_ALWAYS_INLINE C4_PURE size_t num_other_siblings() const noexcept { _C4RV(); return tree_->num_other_siblings(id_); } + C4_ALWAYS_INLINE C4_PURE size_t sibling_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(tree_->parent(id_), n.m_id); } + + template + C4_ALWAYS_INLINE C4_PURE auto prev_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->prev_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl prev_sibling() const noexcept { _C4RV(); return {tree_, tree_->prev_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto next_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->next_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl next_sibling() const noexcept { _C4RV(); return {tree_, tree_->next_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto first_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl first_sibling() const noexcept { _C4RV(); return {tree_, tree_->first_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto last_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl last_sibling () const noexcept { _C4RV(); return {tree_, tree_->last_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto sibling(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->sibling(id__, pos)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl sibling(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->sibling(id_, pos)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto find_sibling(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_sibling(id__, name)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl find_sibling(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_sibling(id_, name)}; } + + + /** O(num_children) */ + C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (csubstr k) const noexcept + { + _C4RV(); + size_t ch = tree_->find_child(id_, k); + _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; + } + /** Find child by key. O(num_children). returns a seed node if no such child is found. */ + template + C4_ALWAYS_INLINE C4_PURE auto operator[] (csubstr k) noexcept -> _C4_IF_MUTABLE(Impl) + { + _C4RV(); + size_t ch = tree__->find_child(id__, k); + return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, k); + } + + /** O(num_children) */ + C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (size_t pos) const noexcept + { + _C4RV(); + size_t ch = tree_->child(id_, pos); + _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; + } + + /** Find child by position. O(pos). returns a seed node if no such child is found. */ + template + C4_ALWAYS_INLINE C4_PURE auto operator[] (size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) + { + _C4RV(); + size_t ch = tree__->child(id__, pos); + return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, pos); + } + + /** @} */ + +public: + + /** deserialization */ + /** @{ */ + + template + ConstImpl const& operator>> (T &v) const + { + _C4RV(); + if( ! read((ConstImpl const&)*this, &v)) + _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize value"); + return *((ConstImpl const*)this); + } + + /** deserialize the node's key to the given variable */ + template + ConstImpl const& operator>> (Key v) const + { + _C4RV(); + if( ! from_chars(key(), &v.k)) + _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key"); + return *((ConstImpl const*)this); + } + + /** deserialize the node's key as base64 */ + ConstImpl const& operator>> (Key w) const + { + deserialize_key(w.wrapper); + return *((ConstImpl const*)this); + } + + /** deserialize the node's val as base64 */ + ConstImpl const& operator>> (fmt::base64_wrapper w) const + { + deserialize_val(w); + return *((ConstImpl const*)this); + } + + /** decode the base64-encoded key and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_key(fmt::base64_wrapper v) const + { + _C4RV(); + return from_chars(key(), &v); + } + /** decode the base64-encoded key and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_val(fmt::base64_wrapper v) const + { + _C4RV(); + return from_chars(val(), &v); + }; + + template + bool get_if(csubstr name, T *var) const + { + auto ch = find_child(name); + if(!ch.valid()) + return false; + ch >> *var; + return true; + } + + template + bool get_if(csubstr name, T *var, T const& fallback) const + { + auto ch = find_child(name); + if(ch.valid()) + { + ch >> *var; + return true; + } + else + { + *var = fallback; + return false; + } + } + + /** @} */ + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + /** @name iteration */ + /** @{ */ + + using iterator = detail::child_iterator; + using const_iterator = detail::child_iterator; + using children_view = detail::children_view_; + using const_children_view = detail::children_view_; + + template + C4_ALWAYS_INLINE C4_PURE auto begin() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, tree__->first_child(id__)); } + C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + C4_ALWAYS_INLINE C4_PURE const_iterator cbegin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + + template + C4_ALWAYS_INLINE C4_PURE auto end() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, NONE); } + C4_ALWAYS_INLINE C4_PURE const_iterator end() const noexcept { _C4RV(); return const_iterator(tree_, NONE); } + C4_ALWAYS_INLINE C4_PURE const_iterator cend() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + + /** get an iterable view over children */ + template + C4_ALWAYS_INLINE C4_PURE auto children() noexcept -> _C4_IF_MUTABLE(children_view) { _C4RV(); return children_view(begin(), end()); } + /** get an iterable view over children */ + C4_ALWAYS_INLINE C4_PURE const_children_view children() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + /** get an iterable view over children */ + C4_ALWAYS_INLINE C4_PURE const_children_view cchildren() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + + /** get an iterable view over all siblings (including the calling node) */ + template + C4_ALWAYS_INLINE C4_PURE auto siblings() noexcept -> _C4_IF_MUTABLE(children_view) + { + _C4RV(); + NodeData const *nd = tree__->get(id__); + return (nd->m_parent != NONE) ? // does it have a parent? + children_view(iterator(tree__, tree_->get(nd->m_parent)->m_first_child), iterator(tree__, NONE)) + : + children_view(end(), end()); + } + /** get an iterable view over all siblings (including the calling node) */ + C4_ALWAYS_INLINE C4_PURE const_children_view siblings() const noexcept + { + _C4RV(); + NodeData const *nd = tree_->get(id_); + return (nd->m_parent != NONE) ? // does it have a parent? + const_children_view(const_iterator(tree_, tree_->get(nd->m_parent)->m_first_child), const_iterator(tree_, NONE)) + : + const_children_view(end(), end()); + } + /** get an iterable view over all siblings (including the calling node) */ + C4_ALWAYS_INLINE C4_PURE const_children_view csiblings() const noexcept { return siblings(); } + + /** visit every child node calling fn(node) */ + template + C4_ALWAYS_INLINE C4_PURE bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + { + return detail::_visit(*(ConstImpl*)this, fn, indentation_level, skip_root); + } + /** visit every child node calling fn(node) */ + template + auto visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + -> _C4_IF_MUTABLE(bool) + { + return detail::_visit(*(Impl*)this, fn, indentation_level, skip_root); + } + + /** visit every child node calling fn(node, level) */ + template + C4_ALWAYS_INLINE C4_PURE bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + { + return detail::_visit_stacked(*(ConstImpl*)this, fn, indentation_level, skip_root); + } + /** visit every child node calling fn(node, level) */ + template + auto visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + -> _C4_IF_MUTABLE(bool) + { + return detail::_visit_stacked(*(Impl*)this, fn, indentation_level, skip_root); + } + + /** @} */ + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + + #undef _C4_IF_MUTABLE + #undef _C4RV + #undef tree_ + #undef tree__ + #undef id_ + #undef id__ + + C4_SUPPRESS_WARNING_GCC_CLANG_POP +}; + +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods +{ +public: + + using tree_type = Tree const; + +public: + + Tree const* C4_RESTRICT m_tree; + size_t m_id; + + friend NodeRef; + friend struct detail::RoNodeMethods; + +public: + + /** @name construction */ + /** @{ */ + + ConstNodeRef() : m_tree(nullptr), m_id(NONE) {} + ConstNodeRef(Tree const &t) : m_tree(&t), m_id(t .root_id()) {} + ConstNodeRef(Tree const *t) : m_tree(t ), m_id(t->root_id()) {} + ConstNodeRef(Tree const *t, size_t id) : m_tree(t), m_id(id) {} + ConstNodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE) {} + + ConstNodeRef(ConstNodeRef const&) = default; + ConstNodeRef(ConstNodeRef &&) = default; + + ConstNodeRef(NodeRef const&); + ConstNodeRef(NodeRef &&); + + /** @} */ + +public: + + /** @name assignment */ + /** @{ */ + + ConstNodeRef& operator= (std::nullptr_t) { m_tree = nullptr; m_id = NONE; return *this; } + + ConstNodeRef& operator= (ConstNodeRef const&) = default; + ConstNodeRef& operator= (ConstNodeRef &&) = default; + + ConstNodeRef& operator= (NodeRef const&); + ConstNodeRef& operator= (NodeRef &&); + + + /** @} */ + +public: + + /** @name state queries */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool valid() const noexcept { return m_tree != nullptr && m_id != NONE; } + + /** @} */ + +public: + + /** @name member getters */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } + + /** @} */ + +public: + + /** @name comparisons */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool operator== (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return ! this->operator==(that); } + + C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return m_tree == nullptr || m_id == NONE; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return ! this->operator== (nullptr); } + + C4_ALWAYS_INLINE C4_PURE bool operator== (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + + /** @} */ + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a reference to a node in an existing yaml tree, offering a more + * convenient API than the index-based API used in the tree. */ +class RYML_EXPORT NodeRef : public detail::RoNodeMethods +{ +public: + + using tree_type = Tree; + using base_type = detail::RoNodeMethods; + +private: + + Tree *C4_RESTRICT m_tree; + size_t m_id; + + /** This member is used to enable lazy operator[] writing. When a child + * with a key or index is not found, m_id is set to the id of the parent + * and the asked-for key or index are stored in this member until a write + * does happen. Then it is given as key or index for creating the child. + * When a key is used, the csubstr stores it (so the csubstr's string is + * non-null and the csubstr's size is different from NONE). When an index is + * used instead, the csubstr's string is set to null, and only the csubstr's + * size is set to a value different from NONE. Otherwise, when operator[] + * does find the child then this member is empty: the string is null and + * the size is NONE. */ + csubstr m_seed; + + friend ConstNodeRef; + friend struct detail::RoNodeMethods; + + // require valid: a helper macro, undefined at the end + #define _C4RV() \ + RYML_ASSERT(m_tree != nullptr); \ + _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE && !is_seed()) + +public: + + /** @name construction */ + /** @{ */ + + NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } + NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; } + NodeRef(Tree *t, size_t id, csubstr seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {} + NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {} + + /** @} */ + +public: + + /** @name assignment */ + /** @{ */ + + NodeRef(NodeRef const&) = default; + NodeRef(NodeRef &&) = default; + + NodeRef& operator= (NodeRef const&) = default; + NodeRef& operator= (NodeRef &&) = default; + + /** @} */ + +public: + + /** @name state queries */ + /** @{ */ + + inline bool valid() const { return m_tree != nullptr && m_id != NONE; } + inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; } + + inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; } + + /** @} */ + +public: + + /** @name comparisons */ + /** @{ */ + + inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); } + + inline bool operator== (ConstNodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator!= (ConstNodeRef const& that) const { return ! this->operator==(that); } + + inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } + inline bool operator!= (std::nullptr_t) const { return m_tree != nullptr && m_id != NONE && !is_seed(); } + + inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } + inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + + //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); } + + /** @} */ + +public: + + /** @name node property getters */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE Tree * tree() noexcept { return m_tree; } + C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + + C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } /** @} */ @@ -19802,6 +21080,7 @@ class RYML_EXPORT NodeRef /** @{ */ void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); } + void set_type(NodeType t) { _C4RV(); m_tree->_set_flags(m_id, t); } void set_key(csubstr key) { _C4RV(); m_tree->_set_key(m_id, key); } void set_val(csubstr val) { _C4RV(); m_tree->_set_val(m_id, val); } @@ -19828,6 +21107,12 @@ class RYML_EXPORT NodeRef m_tree->_set_val(m_id, s); return s.len; } + size_t set_val_serialized(std::nullptr_t) + { + _C4RV(); + m_tree->_set_val(m_id, csubstr{}); + return 0; + } /** encode a blob as base64, then assign the result to the node's key * @return the size of base64-encoded blob */ @@ -19867,62 +21152,6 @@ class RYML_EXPORT NodeRef m_tree->remove_children(m_id); } - /** @} */ - -public: - - /** hierarchy getters */ - /** @{ */ - - /** O(num_children) */ - NodeRef operator[] (csubstr k) - { - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - size_t ch = m_tree->find_child(m_id, k); - NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, k); - return r; - } - - /** O(num_children) */ - NodeRef const operator[] (csubstr k) const - { - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - size_t ch = m_tree->find_child(m_id, k); - RYML_ASSERT(ch != NONE); - NodeRef const r(m_tree, ch); - return r; - } - - /** O(num_children) */ - NodeRef operator[] (size_t pos) - { - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - size_t ch = m_tree->child(m_id, pos); - NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, pos); - return r; - } - - /** O(num_children) */ - NodeRef const operator[] (size_t pos) const - { - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - size_t ch = m_tree->child(m_id, pos); - RYML_ASSERT(ch != NONE); - NodeRef const r(m_tree, ch); - return r; - } - - /** @} */ - -public: - - /** node modification */ - /** @{ */ - void create() { _apply_seed(); } inline void operator= (NodeType_e t) @@ -19949,6 +21178,12 @@ class RYML_EXPORT NodeRef _apply(v); } + inline void operator= (std::nullptr_t) + { + _apply_seed(); + _apply(csubstr{}); + } + inline void operator= (csubstr v) { _apply_seed(); @@ -19968,9 +21203,12 @@ class RYML_EXPORT NodeRef public: + /** @name serialization */ + /** @{ */ + /** serialize a variable to the arena */ template - inline csubstr to_arena(T const& C4_RESTRICT s) const + inline csubstr to_arena(T const& C4_RESTRICT s) { _C4RV(); return m_tree->to_arena(s); @@ -19991,25 +21229,10 @@ class RYML_EXPORT NodeRef inline NodeRef& operator<< (T const& C4_RESTRICT v) { _apply_seed(); - write(this, v); - return *this; - } - - template - inline NodeRef const& operator>> (T &v) const - { - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - RYML_ASSERT(get() != nullptr); - if( ! read(*this, &v)) - { - c4::yml::error("could not deserialize value"); - } + write(this, v); return *this; } -public: - /** serialize a variable, then assign the result to the node's key */ template inline NodeRef& operator<< (Key const& C4_RESTRICT v) @@ -20028,19 +21251,6 @@ class RYML_EXPORT NodeRef return *this; } - /** deserialize the node's key to the given variable */ - template - inline NodeRef const& operator>> (Key v) const - { - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - RYML_ASSERT(get() != nullptr); - from_chars(key(), &v.k); - return *this; - } - -public: - NodeRef& operator<< (Key w) { set_key_serialized(w.wrapper); @@ -20053,43 +21263,7 @@ class RYML_EXPORT NodeRef return *this; } - NodeRef const& operator>> (Key w) const - { - deserialize_key(w.wrapper); - return *this; - } - - NodeRef const& operator>> (fmt::base64_wrapper w) const - { - deserialize_val(w); - return *this; - } - -public: - - template - void get_if(csubstr name, T *var) const - { - auto ch = find_child(name); - if(ch.valid()) - { - ch >> *var; - } - } - - template - void get_if(csubstr name, T *var, T fallback) const - { - auto ch = find_child(name); - if(ch.valid()) - { - ch >> *var; - } - else - { - *var = fallback; - } - } + /** @} */ private: @@ -20133,6 +21307,9 @@ class RYML_EXPORT NodeRef public: + /** @name modification of hierarchy */ + /** @{ */ + inline NodeRef insert_child(NodeRef after) { _C4RV(); @@ -20182,7 +21359,7 @@ class RYML_EXPORT NodeRef public: - inline NodeRef insert_sibling(NodeRef const after) + inline NodeRef insert_sibling(ConstNodeRef const& after) { _C4RV(); RYML_ASSERT(after.m_tree == m_tree); @@ -20190,7 +21367,7 @@ class RYML_EXPORT NodeRef return r; } - inline NodeRef insert_sibling(NodeInit const& i, NodeRef const after) + inline NodeRef insert_sibling(NodeInit const& i, ConstNodeRef const& after) { _C4RV(); RYML_ASSERT(after.m_tree == m_tree); @@ -20261,20 +21438,23 @@ class RYML_EXPORT NodeRef public: - /** change the node's position within its parent */ - inline void move(NodeRef const after) + /** change the node's position within its parent, placing it after + * @p after. To move to the first position in the parent, simply + * pass an empty or default-constructed reference like this: + * `n.move({})`. */ + inline void move(ConstNodeRef const& after) { _C4RV(); m_tree->move(m_id, after.m_id); } - /** move the node to a different parent, which may belong to a different - * tree. When this is the case, then this node's tree pointer is reset to - * the tree of the parent node. */ - inline void move(NodeRef const parent, NodeRef const after) + /** move the node to a different @p parent (which may belong to a + * different tree), placing it after @p after. When the + * destination parent is in a new tree, then this node's tree + * pointer is reset to the tree of the parent node. */ + inline void move(NodeRef const& parent, ConstNodeRef const& after) { _C4RV(); - RYML_ASSERT(parent.m_tree == after.m_tree); if(parent.m_tree == m_tree) { m_tree->move(m_id, parent.m_id, after.m_id); @@ -20286,10 +21466,28 @@ class RYML_EXPORT NodeRef } } - inline NodeRef duplicate(NodeRef const parent, NodeRef const after) const + /** duplicate the current node somewhere within its parent, and + * place it after the node @p after. To place into the first + * position of the parent, simply pass an empty or + * default-constructed reference like this: `n.move({})`. */ + inline NodeRef duplicate(ConstNodeRef const& after) const { _C4RV(); - RYML_ASSERT(parent.m_tree == after.m_tree); + RYML_ASSERT(m_tree == after.m_tree || after.m_id == NONE); + size_t dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id); + NodeRef r(m_tree, dup); + return r; + } + + /** duplicate the current node somewhere into a different @p parent + * (possibly from a different tree), and place it after the node + * @p after. To place into the first position of the parent, + * simply pass an empty or default-constructed reference like + * this: `n.move({})`. */ + inline NodeRef duplicate(NodeRef const& parent, ConstNodeRef const& after) const + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree || after.m_id == NONE); if(parent.m_tree == m_tree) { size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); @@ -20304,7 +21502,7 @@ class RYML_EXPORT NodeRef } } - inline void duplicate_children(NodeRef const parent, NodeRef const after) const + inline void duplicate_children(NodeRef const& parent, ConstNodeRef const& after) const { _C4RV(); RYML_ASSERT(parent.m_tree == after.m_tree); @@ -20318,97 +21516,44 @@ class RYML_EXPORT NodeRef } } -private: - - template - struct child_iterator - { - Tree * m_tree; - size_t m_child_id; - - using value_type = NodeRef; - - child_iterator(Tree * t, size_t id) : m_tree(t), m_child_id(id) {} - - child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; } - child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; } - - Nd operator* () const { return Nd(m_tree, m_child_id); } - Nd operator-> () const { return Nd(m_tree, m_child_id); } - - bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; } - bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; } - }; - -public: - - using iterator = child_iterator< NodeRef>; - using const_iterator = child_iterator; - - inline iterator begin() { return iterator(m_tree, m_tree->first_child(m_id)); } - inline iterator end () { return iterator(m_tree, NONE); } - - inline const_iterator begin() const { return const_iterator(m_tree, m_tree->first_child(m_id)); } - inline const_iterator end () const { return const_iterator(m_tree, NONE); } - -private: - - template - struct children_view_ - { - using n_iterator = child_iterator; - - n_iterator b, e; - - inline children_view_(n_iterator const& b_, n_iterator const& e_) : b(b_), e(e_) {} - - inline n_iterator begin() const { return b; } - inline n_iterator end () const { return e; } - }; - -public: + /** @} */ - using children_view = children_view_< NodeRef>; - using const_children_view = children_view_; +#undef _C4RV +}; - children_view children() { return children_view(begin(), end()); } - const_children_view children() const { return const_children_view(begin(), end()); } - #if defined(__clang__) - # pragma clang diagnostic push - # pragma clang diagnostic ignored "-Wnull-dereference" - #elif defined(__GNUC__) - # pragma GCC diagnostic push - # if __GNUC__ >= 6 - # pragma GCC diagnostic ignored "-Wnull-dereference" - # endif - #endif +//----------------------------------------------------------------------------- - children_view siblings() { if(is_root()) { return children_view(end(), end()); } else { size_t p = get()->m_parent; return children_view(iterator(m_tree, m_tree->get(p)->m_first_child), iterator(m_tree, NONE)); } } - const_children_view siblings() const { if(is_root()) { return const_children_view(end(), end()); } else { size_t p = get()->m_parent; return const_children_view(const_iterator(m_tree, m_tree->get(p)->m_first_child), const_iterator(m_tree, NONE)); } } +inline ConstNodeRef::ConstNodeRef(NodeRef const& that) + : m_tree(that.m_tree) + , m_id(!that.is_seed() ? that.id() : NONE) +{ +} - #if defined(__clang__) - # pragma clang diagnostic pop - #elif defined(__GNUC__) - # pragma GCC diagnostic pop - #endif +inline ConstNodeRef::ConstNodeRef(NodeRef && that) + : m_tree(that.m_tree) + , m_id(!that.is_seed() ? that.id() : NONE) +{ +} -public: - /** visit every child node calling fn(node) */ - template bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true); - /** visit every child node calling fn(node) */ - template bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const; +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) +{ + m_tree = (that.m_tree); + m_id = (!that.is_seed() ? that.id() : NONE); + return *this; +} - /** visit every child node calling fn(node, level) */ - template bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true); - /** visit every child node calling fn(node, level) */ - template bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const; +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) +{ + m_tree = (that.m_tree); + m_id = (!that.is_seed() ? that.id() : NONE); + return *this; +} -#undef _C4RV -}; //----------------------------------------------------------------------------- + template inline void write(NodeRef *n, T const& v) { @@ -20421,81 +21566,26 @@ inline read(NodeRef const& n, T *v) { return from_chars(n.val(), v); } - template -typename std::enable_if< std::is_floating_point::value, bool>::type -inline read(NodeRef const& n, T *v) -{ - return from_chars_float(n.val(), v); -} - - -//----------------------------------------------------------------------------- -template -bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root) +typename std::enable_if< ! std::is_floating_point::value, bool>::type +inline read(ConstNodeRef const& n, T *v) { - return const_cast(this)->visit(fn, indentation_level, skip_root); + return from_chars(n.val(), v); } -template -bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root) const +template +typename std::enable_if::value, bool>::type +inline read(NodeRef const& n, T *v) { - size_t increment = 0; - if( ! (is_root() && skip_root)) - { - if(fn(this, indentation_level)) - { - return true; - } - ++increment; - } - if(has_children()) - { - for(auto ch : children()) - { - if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root - { - return true; - } - } - } - return false; + return from_chars_float(n.val(), v); } - - -template -bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root) +template +typename std::enable_if::value, bool>::type +inline read(ConstNodeRef const& n, T *v) { - return const_cast< NodeRef const* >(this)->visit_stacked(fn, indentation_level, skip_root); + return from_chars_float(n.val(), v); } -template -bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root) const -{ - size_t increment = 0; - if( ! (is_root() && skip_root)) - { - if(fn(this, indentation_level)) - { - return true; - } - ++increment; - } - if(has_children()) - { - fn.push(this, indentation_level); - for(auto ch : children()) - { - if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root - { - fn.pop(this, indentation_level); - return true; - } - } - fn.pop(this, indentation_level); - } - return false; -} } // namespace yml } // namespace c4 @@ -20947,6 +22037,20 @@ inline void __c4presc(const char *s, size_t len) #include "./node.hpp" #endif + +#define RYML_DEPRECATE_EMIT \ + RYML_DEPRECATED("use emit_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") +#ifdef emit +#error "emit is defined, likely from a Qt include. This will cause a compilation error. See https://github.com/biojppm/rapidyaml/issues/120" +#endif +#define RYML_DEPRECATE_EMITRS \ + RYML_DEPRECATED("use emitrs_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + namespace c4 { namespace yml { @@ -20970,7 +22074,7 @@ struct as_json size_t node; as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {} as_json(Tree const& t, size_t id) : tree(&t), node(id) {} - as_json(NodeRef const& n) : tree(n.tree()), node(n.id()) {} + as_json(ConstNodeRef const& n) : tree(n.tree()), node(n.id()) {} }; @@ -20994,11 +22098,11 @@ class Emitter : public Writer * * When writing to a file, the returned substr will be null, but its * length will be set to the number of bytes written. */ - substr emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess); + substr emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess); /** emit starting at the root node */ - substr emit(EmitType_e type, Tree const& t, bool error_on_excess=true); + substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true); /** emit the given node */ - substr emit(EmitType_e type, NodeRef const& n, bool error_on_excess=true); + substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true); private: @@ -21054,27 +22158,36 @@ class Emitter : public Writer /** emit YAML to the given file. A null file defaults to stdout. * Return the number of bytes written. */ -inline size_t emit(Tree const& t, size_t id, FILE *f) +inline size_t emit_yaml(Tree const& t, size_t id, FILE *f) { EmitterFile em(f); - return em.emit(EMIT_YAML, t, id, /*error_on_excess*/true).len; + return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, size_t id, FILE *f) +{ + return emit_yaml(t, id, f); } + /** emit JSON to the given file. A null file defaults to stdout. * Return the number of bytes written. */ inline size_t emit_json(Tree const& t, size_t id, FILE *f) { EmitterFile em(f); - return em.emit(EMIT_JSON, t, id, /*error_on_excess*/true).len; + return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len; } /** emit YAML to the given file. A null file defaults to stdout. * Return the number of bytes written. * @overload */ -inline size_t emit(Tree const& t, FILE *f=nullptr) +inline size_t emit_yaml(Tree const& t, FILE *f=nullptr) { EmitterFile em(f); - return em.emit(EMIT_YAML, t, /*error_on_excess*/true).len; + return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, FILE *f=nullptr) +{ + return emit_yaml(t, f); } /** emit JSON to the given file. A null file defaults to stdout. @@ -21083,26 +22196,30 @@ inline size_t emit(Tree const& t, FILE *f=nullptr) inline size_t emit_json(Tree const& t, FILE *f=nullptr) { EmitterFile em(f); - return em.emit(EMIT_JSON, t, /*error_on_excess*/true).len; + return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len; } /** emit YAML to the given file. A null file defaults to stdout. * Return the number of bytes written. * @overload */ -inline size_t emit(NodeRef const& r, FILE *f=nullptr) +inline size_t emit_yaml(ConstNodeRef const& r, FILE *f=nullptr) { EmitterFile em(f); - return em.emit(EMIT_YAML, r, /*error_on_excess*/true).len; + return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr) +{ + return emit_yaml(r, f); } /** emit JSON to the given file. A null file defaults to stdout. * Return the number of bytes written. * @overload */ -inline size_t emit_json(NodeRef const& r, FILE *f=nullptr) +inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr) { EmitterFile em(f); - return em.emit(EMIT_JSON, r, /*error_on_excess*/true).len; + return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len; } @@ -21113,17 +22230,17 @@ template inline OStream& operator<< (OStream& s, Tree const& t) { EmitterOStream em(s); - em.emit(EMIT_YAML, t); + em.emit_as(EMIT_YAML, t); return s; } /** emit YAML to an STL-like ostream * @overload */ template -inline OStream& operator<< (OStream& s, NodeRef const& n) +inline OStream& operator<< (OStream& s, ConstNodeRef const& n) { EmitterOStream em(s); - em.emit(EMIT_YAML, n); + em.emit_as(EMIT_YAML, n); return s; } @@ -21132,7 +22249,7 @@ template inline OStream& operator<< (OStream& s, as_json const& j) { EmitterOStream em(s); - em.emit(EMIT_JSON, *j.tree, j.node, true); + em.emit_as(EMIT_JSON, *j.tree, j.node, true); return s; } @@ -21143,10 +22260,14 @@ inline OStream& operator<< (OStream& s, as_json const& j) /** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. * @param error_on_excess Raise an error if the space in the buffer is insufficient. * @overload */ -inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +inline substr emit_yaml(Tree const& t, size_t id, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); - return em.emit(EMIT_YAML, t, id, error_on_excess); + return em.emit_as(EMIT_YAML, t, id, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, id, buf, error_on_excess); } /** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. @@ -21155,17 +22276,21 @@ inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=tr inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); - return em.emit(EMIT_JSON, t, id, error_on_excess); + return em.emit_as(EMIT_JSON, t, id, error_on_excess); } /** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. * @param error_on_excess Raise an error if the space in the buffer is insufficient. * @overload */ -inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) +inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); - return em.emit(EMIT_YAML, t, error_on_excess); + return em.emit_as(EMIT_YAML, t, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, buf, error_on_excess); } /** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. @@ -21174,7 +22299,7 @@ inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); - return em.emit(EMIT_JSON, t, error_on_excess); + return em.emit_as(EMIT_JSON, t, error_on_excess); } @@ -21182,20 +22307,24 @@ inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) * @param error_on_excess Raise an error if the space in the buffer is insufficient. * @overload */ -inline substr emit(NodeRef const& r, substr buf, bool error_on_excess=true) +inline substr emit_yaml(ConstNodeRef const& r, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); - return em.emit(EMIT_YAML, r, error_on_excess); + return em.emit_as(EMIT_YAML, r, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + return emit_yaml(r, buf, error_on_excess); } /** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. * @param error_on_excess Raise an error if the space in the buffer is insufficient. * @overload */ -inline substr emit_json(NodeRef const& r, substr buf, bool error_on_excess=true) +inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess=true) { EmitterBuf em(buf); - return em.emit(EMIT_JSON, r, error_on_excess); + return em.emit_as(EMIT_JSON, r, error_on_excess); } @@ -21204,18 +22333,23 @@ inline substr emit_json(NodeRef const& r, substr buf, bool error_on_excess=true) /** emit+resize: emit YAML to the given std::string/std::vector-like * container, resizing it as needed to fit the emitted YAML. */ template -substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont) +substr emitrs_yaml(Tree const& t, size_t id, CharOwningContainer * cont) { substr buf = to_substr(*cont); - substr ret = emit(t, id, buf, /*error_on_excess*/false); + substr ret = emit_yaml(t, id, buf, /*error_on_excess*/false); if(ret.str == nullptr && ret.len > 0) { cont->resize(ret.len); buf = to_substr(*cont); - ret = emit(t, id, buf, /*error_on_excess*/true); + ret = emit_yaml(t, id, buf, /*error_on_excess*/true); } return ret; } +template +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont) +{ + return emitrs_yaml(t, id, cont); +} /** emit+resize: emit JSON to the given std::string/std::vector-like * container, resizing it as needed to fit the emitted JSON. */ @@ -21237,15 +22371,22 @@ substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont) /** emit+resize: emit YAML to the given std::string/std::vector-like * container, resizing it as needed to fit the emitted YAML. */ template -CharOwningContainer emitrs(Tree const& t, size_t id) +CharOwningContainer emitrs_yaml(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_yaml(t, id, &c); + return c; +} +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, size_t id) { CharOwningContainer c; - emitrs(t, id, &c); + emitrs_yaml(t, id, &c); return c; } -/** emit+resize: emit JSON to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted JSON. */ +/** emit+resize: emit JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ template CharOwningContainer emitrs_json(Tree const& t, size_t id) { @@ -21255,18 +22396,23 @@ CharOwningContainer emitrs_json(Tree const& t, size_t id) } -/** emit+resize: YAML to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted YAML. */ +/** emit+resize: YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ template -substr emitrs(Tree const& t, CharOwningContainer * cont) +substr emitrs_yaml(Tree const& t, CharOwningContainer * cont) { if(t.empty()) return {}; - return emitrs(t, t.root_id(), cont); + return emitrs_yaml(t, t.root_id(), cont); +} +template +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, CharOwningContainer * cont) +{ + return emitrs_yaml(t, cont); } -/** emit+resize: JSON to the given std::string/std::vector-like container, - * resizing it as needed to fit the emitted JSON. */ +/** emit+resize: JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ template substr emitrs_json(Tree const& t, CharOwningContainer * cont) { @@ -21279,14 +22425,19 @@ substr emitrs_json(Tree const& t, CharOwningContainer * cont) /** emit+resize: YAML to the given std::string/std::vector-like container, * resizing it as needed to fit the emitted YAML. */ template -CharOwningContainer emitrs(Tree const& t) +CharOwningContainer emitrs_yaml(Tree const& t) { CharOwningContainer c; if(t.empty()) return c; - emitrs(t, t.root_id(), &c); + emitrs_yaml(t, t.root_id(), &c); return c; } +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t) +{ + return emitrs_yaml(t); +} /** emit+resize: JSON to the given std::string/std::vector-like container, * resizing it as needed to fit the emitted JSON. */ @@ -21304,16 +22455,21 @@ CharOwningContainer emitrs_json(Tree const& t) /** emit+resize: YAML to the given std::string/std::vector-like container, * resizing it as needed to fit the emitted YAML. */ template -substr emitrs(NodeRef const& n, CharOwningContainer * cont) +substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont) { _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); - return emitrs(*n.tree(), n.id(), cont); + return emitrs_yaml(*n.tree(), n.id(), cont); +} +template +RYML_DEPRECATE_EMITRS substr emitrs(ConstNodeRef const& n, CharOwningContainer * cont) +{ + return emitrs_yaml(n, cont); } /** emit+resize: JSON to the given std::string/std::vector-like container, * resizing it as needed to fit the emitted JSON. */ template -substr emitrs_json(NodeRef const& n, CharOwningContainer * cont) +substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont) { _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); return emitrs_json(*n.tree(), n.id(), cont); @@ -21323,18 +22479,23 @@ substr emitrs_json(NodeRef const& n, CharOwningContainer * cont) /** emit+resize: YAML to the given std::string/std::vector-like container, * resizing it as needed to fit the emitted YAML. */ template -CharOwningContainer emitrs(NodeRef const& n) +CharOwningContainer emitrs_yaml(ConstNodeRef const& n) { _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); CharOwningContainer c; - emitrs(*n.tree(), n.id(), &c); + emitrs_yaml(*n.tree(), n.id(), &c); return c; } +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n) +{ + return emitrs_yaml(n); +} /** emit+resize: JSON to the given std::string/std::vector-like container, * resizing it as needed to fit the emitted JSON. */ template -CharOwningContainer emitrs_json(NodeRef const& n) +CharOwningContainer emitrs_json(ConstNodeRef const& n) { _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); CharOwningContainer c; @@ -21345,6 +22506,9 @@ CharOwningContainer emitrs_json(NodeRef const& n) } // namespace yml } // namespace c4 +#undef RYML_DEPRECATE_EMIT +#undef RYML_DEPRECATE_EMITRS + // amalgamate: removed include of // https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp //#include "c4/yml/emit.def.hpp" @@ -21384,7 +22548,7 @@ namespace c4 { namespace yml { template -substr Emitter::emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess) +substr Emitter::emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess) { if(t.empty()) { @@ -21403,18 +22567,18 @@ substr Emitter::emit(EmitType_e type, Tree const& t, size_t id, bool err } template -substr Emitter::emit(EmitType_e type, Tree const& t, bool error_on_excess) +substr Emitter::emit_as(EmitType_e type, Tree const& t, bool error_on_excess) { if(t.empty()) return {}; - return emit(type, t, t.root_id(), error_on_excess); + return this->emit_as(type, t, t.root_id(), error_on_excess); } template -substr Emitter::emit(EmitType_e type, NodeRef const& n, bool error_on_excess) +substr Emitter::emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess) { _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); - return emit(type, *n.tree(), n.id(), error_on_excess); + return this->emit_as(type, *n.tree(), n.id(), error_on_excess); } @@ -22194,7 +23358,7 @@ void Emitter::_write_scalar(csubstr s, bool was_quoted) // was evaluated as true even if s.str was actually a nullptr (!!!) if(s.len == size_t(0)) { - if(was_quoted) + if(was_quoted || s.str != nullptr) this->Writer::_do_write("''"); return; } @@ -22207,10 +23371,10 @@ void Emitter::_write_scalar(csubstr s, bool was_quoted) && ( // has leading whitespace - s.begins_with_any(" \n\t\r") - || - // looks like reference or anchor or would be treated as a directive - s.begins_with_any("*&%") + // looks like reference or anchor + // would be treated as a directive + // see https://www.yaml.info/learn/quote.html#noplain + s.begins_with_any(" \n\t\r*&%@`") || s.begins_with("<<") || @@ -22251,16 +23415,27 @@ void Emitter::_write_scalar(csubstr s, bool was_quoted) } } template -void Emitter::_write_scalar_json(csubstr s, bool as_key, bool was_quoted) -{ - if(was_quoted) - { - this->Writer::_do_write('"'); - this->Writer::_do_write(s); - this->Writer::_do_write('"'); - } - // json only allows strings as keys - else if(!as_key && (s.is_number() || s == "true" || s == "null" || s == "false")) +void Emitter::_write_scalar_json(csubstr s, bool as_key, bool use_quotes) +{ + if((!use_quotes) + // json keys require quotes + && (!as_key) + && ( + // do not quote special cases + (s == "true" || s == "false" || s == "null") + || ( + // do not quote numbers + (s.is_number() + && ( + // quote integral numbers if they have a leading 0 + // https://github.com/biojppm/rapidyaml/issues/291 + (!(s.len > 1 && s.begins_with('0'))) + // do not quote reals with leading 0 + // https://github.com/biojppm/rapidyaml/issues/313 + || (s.find('.') != csubstr::npos) )) + ) + ) + ) { this->Writer::_do_write(s); } @@ -22270,15 +23445,43 @@ void Emitter::_write_scalar_json(csubstr s, bool as_key, bool was_quoted this->Writer::_do_write('"'); for(size_t i = 0; i < s.len; ++i) { - if(s[i] == '"') + switch(s.str[i]) { - if(i > 0) - { - csubstr sub = s.range(pos, i); - this->Writer::_do_write(sub); - } - pos = i + 1; - this->Writer::_do_write("\\\""); + case '"': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\""); + pos = i + 1; + break; + case '\n': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\n"); + pos = i + 1; + break; + case '\t': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\t"); + pos = i + 1; + break; + case '\\': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\\"); + pos = i + 1; + break; + case '\r': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\r"); + pos = i + 1; + break; + case '\b': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\b"); + pos = i + 1; + break; + case '\f': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\f"); + pos = i + 1; + break; } } if(pos < s.len) @@ -22641,6 +23844,36 @@ void stack::_cb(Callbacks const& cb) namespace c4 { namespace yml { +struct RYML_EXPORT ParserOptions +{ +private: + + typedef enum : uint32_t { + LOCATIONS = (1 << 0), + DEFAULTS = 0, + } Flags_e; + + uint32_t flags = DEFAULTS; +public: + ParserOptions() = default; + + /** @name source location tracking */ + /** @{ */ + + /** enable/disable source location tracking */ + ParserOptions& locations(bool enabled) + { + if(enabled) + flags |= LOCATIONS; + else + flags &= ~LOCATIONS; + return *this; + } + bool locations() const { return (flags & LOCATIONS) != 0u; } + + /** @} */ +}; + //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- @@ -22652,8 +23885,8 @@ class RYML_EXPORT Parser /** @name construction and assignment */ /** @{ */ - Parser() : Parser(get_callbacks()) {} - Parser(Callbacks const& cb); + Parser(Callbacks const& cb, ParserOptions opts={}); + Parser(ParserOptions opts={}) : Parser(get_callbacks(), opts) {} ~Parser(); Parser(Parser &&); @@ -22723,6 +23956,8 @@ class RYML_EXPORT Parser size_t locations_capacity() const { return m_newline_offsets_capacity; } size_t filter_arena_capacity() const { return m_filter_arena.len; } + ParserOptions const& options() const { return m_options; } + /** @} */ public: @@ -22786,7 +24021,7 @@ class RYML_EXPORT Parser /** @{ */ // READ THE NOTE ABOVE! - #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a compiler error." + #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a linker error." RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc); RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t); RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id); @@ -22854,7 +24089,7 @@ class RYML_EXPORT Parser /** Get the location of a node of the last tree to be parsed by this parser. */ Location location(Tree const& tree, size_t node_id) const; /** Get the location of a node of the last tree to be parsed by this parser. */ - Location location(NodeRef node) const; + Location location(ConstNodeRef node) const; /** Get the string starting at a particular location, to the end * of the parsed source buffer. */ csubstr location_contents(Location const& loc) const; @@ -22900,7 +24135,11 @@ class RYML_EXPORT Parser * Will only be written to if this method returns true. * Will be set to true if the scanned scalar was quoted, by '', "", > or |. */ - bool _scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); csubstr _scan_comment(); csubstr _scan_squot_scalar(); @@ -22970,9 +24209,9 @@ class RYML_EXPORT Parser csubstr _consume_scalar(); void _move_scalar_from_top(); - inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({str, size_t(0)}); } - inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({str, size_t(0)}); } - inline void _store_scalar_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({str, size_t(0)}, false); } + inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({nullptr, size_t(0)}); } + inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({nullptr, size_t(0)}); } + inline void _store_scalar_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({nullptr, size_t(0)}, false); } void _set_indentation(size_t behind); void _save_indentation(size_t behind=0); @@ -23130,11 +24369,13 @@ class RYML_EXPORT Parser void _grow_filter_arena(size_t num_characters); substr _finish_filter_arena(substr dst, size_t pos); - void _prepare_locations() const; // only changes mutable members - void _resize_locations(size_t sz) const; // only changes mutable members - void _mark_locations_dirty(); + void _prepare_locations(); + void _resize_locations(size_t sz); bool _locations_dirty() const; + bool _location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const; + bool _location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const; + private: void _free(); @@ -23151,6 +24392,8 @@ class RYML_EXPORT Parser private: + ParserOptions m_options; + csubstr m_file; substr m_buf; @@ -23175,10 +24418,10 @@ class RYML_EXPORT Parser substr m_filter_arena; - mutable size_t *m_newline_offsets; - mutable size_t m_newline_offsets_size; - mutable size_t m_newline_offsets_capacity; - mutable csubstr m_newline_offsets_buf; + size_t *m_newline_offsets; + size_t m_newline_offsets_size; + size_t m_newline_offsets_capacity; + csubstr m_newline_offsets_buf; }; @@ -23329,7 +24572,7 @@ void write(c4::yml::NodeRef *n, std::map const& m) } template -bool read(c4::yml::NodeRef const& n, std::map * m) +bool read(c4::yml::ConstNodeRef const& n, std::map * m) { K k{}; V v{}; @@ -23414,24 +24657,37 @@ namespace yml { // in the data tree hierarchy (a SEQ node in ryml parlance). // So it should be serialized via write()/read(). + template void write(c4::yml::NodeRef *n, std::vector const& vec) { *n |= c4::yml::SEQ; for(auto const& v : vec) - { n->append_child() << v; - } } template -bool read(c4::yml::NodeRef const& n, std::vector *vec) +bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) +{ + vec->resize(n.num_children()); + size_t pos = 0; + for(auto const ch : n) + ch >> (*vec)[pos++]; + return true; +} + +/** specialization: std::vector uses std::vector::reference as + * the return value of its operator[]. */ +template +bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) { vec->resize(n.num_children()); size_t pos = 0; + bool tmp; for(auto const ch : n) { - ch >> (*vec)[pos++]; + ch >> tmp; + (*vec)[pos++] = tmp; } return true; } @@ -23900,9 +25156,18 @@ NodeRef Tree::rootref() { return NodeRef(this, root_id()); } -NodeRef const Tree::rootref() const +ConstNodeRef Tree::rootref() const +{ + return ConstNodeRef(this, root_id()); +} + +ConstNodeRef Tree::crootref() +{ + return ConstNodeRef(this, root_id()); +} +ConstNodeRef Tree::crootref() const { - return NodeRef(const_cast(this), root_id()); + return ConstNodeRef(this, root_id()); } NodeRef Tree::ref(size_t id) @@ -23910,17 +25175,28 @@ NodeRef Tree::ref(size_t id) _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); return NodeRef(this, id); } -NodeRef const Tree::ref(size_t id) const +ConstNodeRef Tree::ref(size_t id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} + +ConstNodeRef Tree::cref(size_t id) +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} +ConstNodeRef Tree::cref(size_t id) const { _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); - return NodeRef(const_cast(this), id); + return ConstNodeRef(this, id); } NodeRef Tree::operator[] (csubstr key) { return rootref()[key]; } -NodeRef const Tree::operator[] (csubstr key) const +ConstNodeRef Tree::operator[] (csubstr key) const { return rootref()[key]; } @@ -23929,7 +25205,7 @@ NodeRef Tree::operator[] (size_t i) { return rootref()[i]; } -NodeRef const Tree::operator[] (size_t i) const +ConstNodeRef Tree::operator[] (size_t i) const { return rootref()[i]; } @@ -23938,9 +25214,9 @@ NodeRef Tree::docref(size_t i) { return ref(doc(i)); } -NodeRef const Tree::docref(size_t i) const +ConstNodeRef Tree::docref(size_t i) const { - return ref(doc(i)); + return cref(doc(i)); } @@ -24625,8 +25901,9 @@ void Tree::_swap_props(size_t n_, size_t m_) void Tree::move(size_t node, size_t after) { _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); - _RYML_CB_ASSERT(m_callbacks, has_sibling(node, after) && has_sibling(after, node)); + _RYML_CB_ASSERT(m_callbacks, (after == NONE) || (has_sibling(node, after) && has_sibling(after, node))); _rem_hierarchy(node); _set_hierarchy(node, parent(node), after); @@ -24637,7 +25914,10 @@ void Tree::move(size_t node, size_t after) void Tree::move(size_t node, size_t new_parent, size_t after) { _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != node); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); _rem_hierarchy(node); @@ -24646,8 +25926,10 @@ void Tree::move(size_t node, size_t new_parent, size_t after) size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after) { + _RYML_CB_ASSERT(m_callbacks, src != nullptr); _RYML_CB_ASSERT(m_callbacks, node != NONE); _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); size_t dup = duplicate(src, node, new_parent, after); src->remove(node); @@ -24848,15 +26130,17 @@ size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t pare remove(rep); prev = duplicate(src, i, parent, prev); } - else if(after_pos == NONE || rep_pos >= after_pos) + else if(prev == NONE) + { + // first iteration with prev = after = NONE and repetition + prev = rep; + } + else if(rep != prev) { // rep is located after the node which will be inserted // and overrides it. So move the rep into this node's place. - if(rep != prev) - { - move(rep, prev); - prev = rep; - } + move(rep, prev); + prev = rep; } } // there's a repetition } @@ -25197,9 +26481,7 @@ size_t Tree::num_children(size_t node) const { size_t count = 0; for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) - { ++count; - } return count; } @@ -25495,7 +26777,7 @@ void Tree::resolve_tags() return; size_t needed_size = _count_resolved_tags_size(this, root_id()); if(needed_size) - reserve_arena(arena_pos() + needed_size); + reserve_arena(arena_size() + needed_size); _resolve_tags(this, root_id()); } @@ -26010,8 +27292,9 @@ Parser::~Parser() _clr(); } -Parser::Parser(Callbacks const& cb) - : m_file() +Parser::Parser(Callbacks const& cb, ParserOptions opts) + : m_options(opts) + , m_file() , m_buf() , m_root_id(NONE) , m_tree() @@ -26039,7 +27322,8 @@ Parser::Parser(Callbacks const& cb) } Parser::Parser(Parser &&that) - : m_file(that.m_file) + : m_options(that.m_options) + , m_file(that.m_file) , m_buf(that.m_buf) , m_root_id(that.m_root_id) , m_tree(that.m_tree) @@ -26066,7 +27350,8 @@ Parser::Parser(Parser &&that) } Parser::Parser(Parser const& that) - : m_file(that.m_file) + : m_options(that.m_options) + , m_file(that.m_file) , m_buf(that.m_buf) , m_root_id(that.m_root_id) , m_tree(that.m_tree) @@ -26105,6 +27390,7 @@ Parser::Parser(Parser const& that) Parser& Parser::operator=(Parser &&that) { _free(); + m_options = (that.m_options); m_file = (that.m_file); m_buf = (that.m_buf); m_root_id = (that.m_root_id); @@ -26134,6 +27420,7 @@ Parser& Parser::operator=(Parser &&that) Parser& Parser::operator=(Parser const& that) { _free(); + m_options = (that.m_options); m_file = (that.m_file); m_buf = (that.m_buf); m_root_id = (that.m_root_id); @@ -26165,6 +27452,7 @@ Parser& Parser::operator=(Parser const& that) void Parser::_clr() { + m_options = {}; m_file = {}; m_buf = {}; m_root_id = {}; @@ -26229,7 +27517,10 @@ void Parser::_reset() m_val_anchor_indentation = 0; m_val_anchor.clear(); - _mark_locations_dirty(); + if(m_options.locations()) + { + _prepare_locations(); + } } //----------------------------------------------------------------------------- @@ -26535,7 +27826,7 @@ bool Parser::_handle_unk() csubstr saved_scalar; bool is_quoted; - if(_scan_scalar(&saved_scalar, &is_quoted)) + if(_scan_scalar_unk(&saved_scalar, &is_quoted)) { rem = m_state->line_contents.rem; _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar); @@ -26652,7 +27943,7 @@ bool Parser::_handle_unk() csubstr scalar; size_t indentation = m_state->line_contents.indentation; // save bool is_quoted; - if(_scan_scalar(&scalar, &is_quoted)) + if(_scan_scalar_unk(&scalar, &is_quoted)) { _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : ""); rem = m_state->line_contents.rem; @@ -26776,7 +28067,7 @@ bool Parser::_handle_seq_flow() { _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); bool is_quoted; - if(_scan_scalar(&rem, &is_quoted)) + if(_scan_scalar_seq_flow(&rem, &is_quoted)) { _c4dbgp("it's a scalar"); addrem_flags(RNXT, RVAL); @@ -26920,7 +28211,6 @@ bool Parser::_handle_seq_blck() rem = _scan_comment(); return true; } - if(has_any(RNXT)) { _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); @@ -26974,7 +28264,7 @@ bool Parser::_handle_seq_blck() csubstr s; bool is_quoted; - if(_scan_scalar(&s, &is_quoted)) // this also progresses the line + if(_scan_scalar_seq_blck(&s, &is_quoted)) // this also progresses the line { _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); @@ -27233,7 +28523,7 @@ bool Parser::_handle_map_flow() _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); bool is_quoted; - if(has_none(SSCL) && _scan_scalar(&rem, &is_quoted)) + if(has_none(SSCL) && _scan_scalar_map_flow(&rem, &is_quoted)) { _c4dbgp("it's a scalar"); _store_scalar(rem, is_quoted); @@ -27353,7 +28643,7 @@ bool Parser::_handle_map_flow() _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); bool is_quoted; - if(_scan_scalar(&rem, &is_quoted)) + if(_scan_scalar_map_flow(&rem, &is_quoted)) { _c4dbgp("it's a scalar"); addrem_flags(RNXT, RVAL|RKEY); @@ -27437,7 +28727,7 @@ bool Parser::_handle_map_flow() //----------------------------------------------------------------------------- bool Parser::_handle_map_blck() { - _c4dbgpf("handle_map_impl: node_id={} level={}", m_state->node_id, m_state->level); + _c4dbgpf("handle_map_blck: node_id={} level={}", m_state->node_id, m_state->level); csubstr rem = m_state->line_contents.rem; _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP)); @@ -27459,16 +28749,19 @@ bool Parser::_handle_map_blck() } if(_handle_indentation()) + { + _c4dbgp("indentation token"); return true; + } if(has_any(RKEY)) { _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); - _c4dbgp("read scalar?"); + _c4dbgp("RMAP|RKEY read scalar?"); bool is_quoted; - if(_scan_scalar(&rem, &is_quoted)) // this also progresses the line + if(_scan_scalar_map_blck(&rem, &is_quoted)) // this also progresses the line { _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); if(has_all(QMRK|SSCL)) @@ -27577,9 +28870,10 @@ bool Parser::_handle_map_blck() _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _c4dbgp("RMAP|RVAL read scalar?"); csubstr s; bool is_quoted; - if(_scan_scalar(&s, &is_quoted)) // this also progresses the line + if(_scan_scalar_map_blck(&s, &is_quoted)) // this also progresses the line { _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); @@ -27687,6 +28981,13 @@ bool Parser::_handle_map_blck() _start_new_doc(rem); return true; } + else if(rem.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + _line_progressed(3); + return true; + } else { _c4err("parse error"); @@ -28141,15 +29442,315 @@ csubstr Parser::_slurp_doc_scalar() return _scan_block(); } - _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos); + _line_progressed(static_cast(s.end() - (m_buf.begin() + pos))); + + _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + if(_at_line_end()) + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + return s; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + _c4dbgp("RSEQ|RVAL"); + if( ! _is_scalar_next__rseq_rval(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + + if(s.ends_with(':')) + { + --s.len; + } + else + { + auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); + if(first) + s.len = first.pos; + } + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _c4dbgp("_scan_scalar_map_blck"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); + + csubstr s = m_state->line_contents.rem; + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED + if(s.len == 0) + return false; + #endif + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + if( ! _is_scalar_next__rmap(s)) + return false; + + size_t colon_token = s.find(": "); + if(colon_token == npos) + { + _RYML_WITH_OR_WITHOUT_TAB_TOKENS( + // with tab tokens + colon_token = s.find(":\t"); + if(colon_token == npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + colon_token = s.find(':'); + if(colon_token != s.len-1) + colon_token = npos; + } + , + // without tab tokens + colon_token = s.find(':'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + if(colon_token != s.len-1) + colon_token = npos; + ) + } + + if(has_all(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); + if(has_any(QMRK)) + { + _c4dbgp("RMAP|RKEY|CPLX"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_token); + s = s.left_of(s.first_of("#")); + s = s.trimr(" \t"); + if(s.begins_with("---")) + return false; + else if(s.begins_with("...")) + return false; + } + else + { + _c4dbgp("RMAP|RKEY"); + _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_token); + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("---")) + { + return false; + } + else if(s.begins_with("...")) + { + return false; + } + } + } + else if(has_all(RVAL)) + { + _c4dbgp("RMAP|RVAL"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); + if( ! _is_scalar_next__rmap_val(s)) + return false; + _RYML_WITH_TAB_TOKENS( + else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RMAP|RVAL: scalar"); + s = s.left_of(s.find(" #")); // is there a comment? + s = s.left_of(s.find("\t#")); // is there a comment? + s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("---")) + return false; + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED + else if(s.begins_with("...")) + return false; + #endif + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + + if(has_all(RVAL)) + { + _c4dbgp("RSEQ|RVAL"); + if( ! _is_scalar_next__rseq_rval(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RSEQ|RVAL|FLOW"); + s = s.left_of(s.first_of(",]")); + if(s.ends_with(':')) + { + --s.len; + } + else + { + auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); + if(first) + s.len = first.pos; + } + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + } + + if(s.empty()) + return false; m_state->scalar_col = m_state->line_contents.current_col(s); - _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos); - _line_progressed(static_cast(s.end() - (m_buf.begin() + pos))); - - _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); - if(_at_line_end()) + if(_at_line_end() && s != '~') { _c4dbgpf("at line end. curr='{}'", s); s = _extend_scanned_scalar(s); @@ -28157,12 +29758,17 @@ csubstr Parser::_slurp_doc_scalar() _c4dbgpf("scalar was '{}'", s); - return s; + *scalar = s; + *quoted = false; + return true; } -//----------------------------------------------------------------------------- -bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +bool Parser::_scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); + csubstr s = m_state->line_contents.rem; if(s.len == 0) return false; @@ -28186,139 +29792,44 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) *quoted = true; return true; } - else if(s.begins_with('|') || s.begins_with('>')) - { - *scalar = _scan_block(); - *quoted = true; - return true; - } - else if(has_any(RTOP) && _is_doc_sep(s)) - { + + if( ! _is_scalar_next__rmap(s)) return false; - } - else if(has_any(RSEQ)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(RKEY)); - if(has_all(RVAL)) - { - _c4dbgp("RSEQ|RVAL"); - if( ! _is_scalar_next__rseq_rval(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - if(s.ends_with(':')) - { - --s.len; - } - else - { - auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); - if(first) - s.len = first.pos; - } - if(has_all(FLOW)) - { - _c4dbgp("RSEQ|RVAL|EXPL"); - s = s.left_of(s.first_of(",]")); - } - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - } - else - { - _c4err("internal error"); - } - } - else if(has_any(RMAP)) + + if(has_all(RKEY)) { - if( ! _is_scalar_next__rmap(s)) - return false; - size_t colon_space = s.find(": "); - if(colon_space == npos) + _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); + size_t colon_token = s.find(": "); + if(colon_token == npos) { _RYML_WITH_OR_WITHOUT_TAB_TOKENS( // with tab tokens - colon_space = s.find(":\t"); - if(colon_space == npos) + colon_token = s.find(":\t"); + if(colon_token == npos) { _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - colon_space = s.find(':'); - if(colon_space != s.len-1) - colon_space = npos; + colon_token = s.find(':'); + if(colon_token != s.len-1) + colon_token = npos; } , // without tab tokens - colon_space = s.find(':'); + colon_token = s.find(':'); _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); - if(colon_space != s.len-1) - colon_space = npos; + if(colon_token != s.len-1) + colon_token = npos; ) } - - if(has_all(RKEY)) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); - if(has_any(QMRK)) - { - _c4dbgp("RMAP|RKEY|CPLX"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); - if(s.begins_with("? ") || s == '?') - return false; - s = s.left_of(colon_space); - s = s.left_of(s.first_of("#")); - if(has_any(FLOW)) - s = s.left_of(s.first_of(':')); - s = s.trimr(" \t"); - if(s.begins_with("---")) - return false; - else if(s.begins_with("...")) - return false; - } - else - { - _c4dbgp("RMAP|RKEY"); - _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); - if(s.begins_with("? ") || s == '?') - return false; - s = s.left_of(colon_space); - s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); - if(has_any(FLOW)) - { - _c4dbgpf("RMAP|RKEY|EXPL: '{}'", s); - s = s.left_of(s.first_of(",}")); - if(s.ends_with(':')) - s = s.offs(0, 1); - } - else if(s.begins_with("---")) - { - return false; - } - else if(s.begins_with("...")) - { - return false; - } - } - } - else if(has_all(RVAL)) - { - _c4dbgp("RMAP|RVAL"); - _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); - if( ! _is_scalar_next__rmap_val(s)) - return false; - _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) - return false; - ) - s = s.left_of(s.find(" #")); // is there a comment? - s = s.left_of(s.find("\t#")); // is there a comment? - if(has_any(FLOW)) - { - _c4dbgp("RMAP|RVAL|EXPL"); - if(has_none(RSEQIMAP)) - s = s.left_of(s.first_of(",}")); - else - s = s.left_of(s.first_of(",]")); - } - s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("? ") || s == '?') + return false; + if(has_any(QMRK)) + { + _c4dbgp("RMAP|RKEY|CPLX"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + s = s.left_of(colon_token); + s = s.left_of(s.first_of("#")); + s = s.left_of(s.first_of(':')); + s = s.trimr(" \t"); if(s.begins_with("---")) return false; else if(s.begins_with("...")) @@ -28326,38 +29837,115 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) } else { - _c4err("parse error"); + _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); + _c4dbgp("RMAP|RKEY"); + s = s.left_of(colon_token); + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + _c4dbgpf("RMAP|RKEY|FLOW: '{}'", s); + s = s.left_of(s.first_of(",}")); + if(s.ends_with(':')) + --s.len; } } - else if(has_all(RUNK)) + else if(has_all(RVAL)) { - _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s); - if( ! _is_scalar_next__runk(s)) - { - _c4dbgp("RUNK: no scalar next"); + _c4dbgp("RMAP|RVAL"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); + if( ! _is_scalar_next__rmap_val(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) return false; - } - size_t pos = s.find(" #"); - if(pos != npos) - s = s.left_of(pos); - pos = s.find(": "); - if(pos != npos) - s = s.left_of(pos); - else if(s.ends_with(':')) - s = s.left_of(s.len-1); - _RYML_WITH_TAB_TOKENS( - else if((pos = s.find(":\t")) != npos) // TABS - s = s.left_of(pos); ) + _c4dbgp("RMAP|RVAL|FLOW"); + if(has_none(RSEQIMAP)) + s = s.left_of(s.first_of(",}")); else - s = s.left_of(s.first_of(',')); - s = s.trim(" \t"); - _c4dbgpf("RUNK: scalar='{}'", s); + s = s.left_of(s.first_of(",]")); + s = s.left_of(s.find(" #")); // is there a comment? + s = s.left_of(s.find("\t#")); // is there a comment? + s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); } - else + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RUNK)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s); + if( ! _is_scalar_next__runk(s)) { - _c4err("not implemented"); + _c4dbgp("RUNK: no scalar next"); + return false; } + size_t pos = s.find(" #"); + if(pos != npos) + s = s.left_of(pos); + pos = s.find(": "); + if(pos != npos) + s = s.left_of(pos); + else if(s.ends_with(':')) + s = s.left_of(s.len-1); + _RYML_WITH_TAB_TOKENS( + else if((pos = s.find(":\t")) != npos) // TABS + s = s.left_of(pos); + ) + else + s = s.left_of(s.first_of(',')); + s = s.trim(" \t"); + _c4dbgpf("RUNK: scalar='{}'", s); if(s.empty()) return false; @@ -28379,6 +29967,7 @@ bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) return true; } + //----------------------------------------------------------------------------- csubstr Parser::_extend_scanned_scalar(csubstr s) @@ -28445,7 +30034,7 @@ substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) csubstr tpkl = peeked_line.triml(' ').trimr("\r\n"); if(tpkl.begins_with(": ") || tpkl == ':') { - _c4dbgpf("rscalar[EXPL]: map value starts on the peeked line: '{}'", peeked_line); + _c4dbgpf("rscalar[FLOW]: map value starts on the peeked line: '{}'", peeked_line); peeked_line = peeked_line.first(0); break; } @@ -28455,7 +30044,7 @@ substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) if(colon_pos && colon_pos.pos < pos) { peeked_line = peeked_line.first(colon_pos.pos); - _c4dbgpf("rscalar[EXPL]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line); + _c4dbgpf("rscalar[FLOW]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line); _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); break; @@ -28464,13 +30053,13 @@ substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) } if(pos != npos) { - _c4dbgpf("rscalar[EXPL]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n")); + _c4dbgpf("rscalar[FLOW]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n")); peeked_line = peeked_line.left_of(pos); _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); break; } - _c4dbgpf("rscalar[EXPL]: append another line, full: '{}'", peeked_line.trimr("\r\n")); + _c4dbgpf("rscalar[FLOW]: append another line, full: '{}'", peeked_line.trimr("\r\n")); if(!first) { RYML_CHECK(_advance_to_peeked()); @@ -28792,12 +30381,17 @@ void Parser::_line_ended_undo() _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u); _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u); _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len); - _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - (m_state->line_contents.full.len - m_state->line_contents.stripped.len)); - m_state->pos.offset -= m_state->line_contents.full.len - m_state->line_contents.stripped.len; + size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len; + _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta); + m_state->pos.offset -= delta; --m_state->pos.line; m_state->pos.col = m_state->line_contents.stripped.len + 1u; + // don't forget to undo also the changes to the remainder of the line + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r'); + m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0); } + //----------------------------------------------------------------------------- void Parser::_set_indentation(size_t indentation) { @@ -29456,7 +31050,8 @@ void Parser::_move_scalar_from_top() } //----------------------------------------------------------------------------- -/** @todo this function is a monster and needs love. */ +/** @todo this function is a monster and needs love. Likely, it needs + * to be split like _scan_scalar_*() */ bool Parser::_handle_indentation() { _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); @@ -29477,38 +31072,40 @@ bool Parser::_handle_indentation() _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref); if(ind == m_state->indref) { - if(has_all(SSCL|RVAL) && ! rem.sub(ind).begins_with('-')) + _c4dbgpf("same indentation: {}", ind); + if(!rem.sub(ind).begins_with('-')) { - if(has_all(RMAP)) - { - _append_key_val_null(rem.str + ind - 1); - addrem_flags(RKEY, RVAL); - } - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED - else if(has_all(RSEQ)) + _c4dbgp("does not begin with -"); + if(has_any(RMAP)) { - _append_val(_consume_scalar()); - addrem_flags(RNXT, RVAL); - } - else - { - _c4err("internal error"); + if(has_all(SSCL|RVAL)) + { + _c4dbgp("add with null val"); + _append_key_val_null(rem.str + ind - 1); + addrem_flags(RKEY, RVAL); + } } - #endif - } - else if(has_all(RSEQ|RNXT) && ! rem.sub(ind).begins_with('-')) - { - if(m_stack.size() > 2) // do not pop to root level + else if(has_any(RSEQ)) { - _c4dbgp("end the indentless seq"); - _pop_level(); - return true; + if(m_stack.size() > 2) // do not pop to root level + { + if(has_any(RNXT)) + { + _c4dbgp("end the indentless seq"); + _pop_level(); + return true; + } + else if(has_any(RVAL)) + { + _c4dbgp("add with null val"); + _append_val_null(rem.str); + _c4dbgp("end the indentless seq"); + _pop_level(); + return true; + } + } } } - else - { - _c4dbgpf("same indentation ({}) -- nothing to see here", ind); - } _line_progressed(ind); return ind > 0; } @@ -29696,10 +31293,9 @@ csubstr Parser::_scan_squot_scalar() // leading whitespace also needs filtering needs_filter = needs_filter - || numlines > 1 + || (numlines > 1) || line_is_blank - || (_at_line_begin() && line.begins_with(' ')) - || (m_state->line_contents.full.last_of('\r') != csubstr::npos); + || (_at_line_begin() && line.begins_with(' ')); if(pos == npos) { @@ -29798,10 +31394,9 @@ csubstr Parser::_scan_dquot_scalar() // leading whitespace also needs filtering needs_filter = needs_filter - || numlines > 1 + || (numlines > 1) || line_is_blank - || (_at_line_begin() && line.begins_with(' ')) - || (m_state->line_contents.full.last_of('\r') != csubstr::npos); + || (_at_line_begin() && line.begins_with(' ')); if(pos == npos) { @@ -29903,8 +31498,7 @@ csubstr Parser::_scan_block() _line_ended(); _scan_line(); - _c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", - chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); + _c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); // start with a zero-length block, already pointing at the right place substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0); @@ -29951,15 +31545,17 @@ csubstr Parser::_scan_block() _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation); if(provisional_indentation == npos) { - #ifdef RYML_NO_COVERAGE__TO_BE_DELETED if(lc.indentation < m_state->indref) { _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref); + if(raw_block.len == 0) + { + _c4dbgp("scanning block: was empty, undo next line"); + _line_ended_undo(); + } break; } - else - #endif - if(lc.indentation == m_state->indref) + else if(lc.indentation == m_state->indref) { if(has_any(RSEQ|RMAP)) { @@ -30023,7 +31619,7 @@ csubstr Parser::_scan_block() _line_ended(); ++num_lines; } - _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0)); C4_UNUSED(num_lines); C4_UNUSED(first); @@ -30549,7 +32145,7 @@ csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s); - if(chomp != CHOMP_KEEP && s.trim(" \n\r\t").len == 0u) + if(chomp != CHOMP_KEEP && s.trim(" \n\r").len == 0u) { _c4dbgp("filt_block: empty scalar"); return s.first(0); @@ -31058,7 +32654,7 @@ csubstr Parser::location_contents(Location const& loc) const return m_buf.sub(loc.offset); } -Location Parser::location(NodeRef node) const +Location Parser::location(ConstNodeRef node) const { _RYML_CB_ASSERT(m_stack.m_callbacks, node.valid()); return location(*node.tree(), node.id()); @@ -31066,90 +32662,158 @@ Location Parser::location(NodeRef node) const Location Parser::location(Tree const& tree, size_t node) const { - _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); - _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); + // try hard to avoid getting the location from a null string. + Location loc; + if(_location_from_node(tree, node, &loc, 0)) + return loc; + return val_location(m_buf.str); +} + +bool Parser::_location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const +{ if(tree.has_key(node)) { - _RYML_CB_ASSERT(m_stack.m_callbacks, tree.key(node).is_sub(m_buf)); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.key(node))); - return val_location(tree.key(node).str); + csubstr k = tree.key(node); + if(C4_LIKELY(k.str != nullptr)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, k.is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(k)); + *loc = val_location(k.str); + return true; + } + } + + if(tree.has_val(node)) + { + csubstr v = tree.val(node); + if(C4_LIKELY(v.str != nullptr)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, v.is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(v)); + *loc = val_location(v.str); + return true; + } } - else if(tree.has_val(node)) + + if(tree.is_container(node)) { - _RYML_CB_ASSERT(m_stack.m_callbacks, tree.val(node).is_sub(m_buf)); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.val(node))); - return val_location(tree.val(node).str); + if(_location_from_cont(tree, node, loc)) + return true; } - else if(tree.is_container(node)) + + if(tree.type(node) != NOTYPE && level == 0) { - _RYML_CB_ASSERT(m_stack.m_callbacks, !tree.has_key(node)); - if(!tree.is_stream(node)) + // try the prev sibling { - const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container - if(tree.has_children(node)) + const size_t prev = tree.prev_sibling(node); + if(prev != NONE) { - size_t child = tree.first_child(node); - if(tree.has_key(child)) - { - // when a map starts, the container was set after the key - csubstr k = tree.key(child); - if(node_start > k.str) - node_start = k.str; - } + if(_location_from_node(tree, prev, loc, level+1)) + return true; + } + } + // try the next sibling + { + const size_t next = tree.next_sibling(node); + if(next != NONE) + { + if(_location_from_node(tree, next, loc, level+1)) + return true; } - return val_location(node_start); } - else // it's a stream + // try the parent { - return val_location(m_buf.str); // just return the front of the buffer + const size_t parent = tree.parent(node); + if(parent != NONE) + { + if(_location_from_node(tree, parent, loc, level+1)) + return true; + } } } - _RYML_CB_ASSERT(m_stack.m_callbacks, tree.type(node) == NOTYPE); - return val_location(m_buf.str); + + return false; +} + +bool Parser::_location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, tree.is_container(node)); + if(!tree.is_stream(node)) + { + const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container + if(tree.has_children(node)) + { + size_t child = tree.first_child(node); + if(tree.has_key(child)) + { + // when a map starts, the container was set after the key + csubstr k = tree.key(child); + if(k.str && node_start > k.str) + node_start = k.str; + } + } + *loc = val_location(node_start); + return true; + } + else // it's a stream + { + *loc = val_location(m_buf.str); // just return the front of the buffer + } + return true; } + Location Parser::val_location(const char *val) const { - if(_locations_dirty()) - _prepare_locations(); - csubstr src = m_buf; - _RYML_CB_CHECK(m_stack.m_callbacks, src.str == m_newline_offsets_buf.str); - _RYML_CB_CHECK(m_stack.m_callbacks, src.len == m_newline_offsets_buf.len); - _RYML_CB_CHECK(m_stack.m_callbacks, val >= src.begin() && val <= src.end()); + if(C4_UNLIKELY(val == nullptr)) + return {m_file, 0, 0, 0}; + + _RYML_CB_CHECK(m_stack.m_callbacks, m_options.locations()); + // NOTE: if any of these checks fails, the parser needs to be + // instantiated with locations enabled. + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_options.locations()); + _RYML_CB_ASSERT(m_stack.m_callbacks, !_locations_dirty()); _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr); _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0); - using linetype = size_t const* C4_RESTRICT; - linetype line = nullptr; + // NOTE: the pointer needs to belong to the buffer that was used to parse. + csubstr src = m_buf; + _RYML_CB_CHECK(m_stack.m_callbacks, val != nullptr || src.str == nullptr); + _RYML_CB_CHECK(m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr)); + // ok. search the first stored newline after the given ptr + using lineptr_type = size_t const* C4_RESTRICT; + lineptr_type lineptr = nullptr; size_t offset = (size_t)(val - src.begin()); - if(m_newline_offsets_size < 30) + if(m_newline_offsets_size < 30) // TODO magic number { - // do a linear search if the size is small. - for(linetype curr = m_newline_offsets; curr < m_newline_offsets + m_newline_offsets_size; ++curr) + // just do a linear search if the size is small. + for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr) { if(*curr > offset) { - line = curr; + lineptr = curr; break; } } } else { - // Do a bisection search if the size is not small. + // do a bisection search if the size is not small. // // We could use std::lower_bound but this is simple enough and // spares the include of . size_t count = m_newline_offsets_size; size_t step; - linetype it; - line = m_newline_offsets; + lineptr_type it; + lineptr = m_newline_offsets; while(count) { step = count >> 1; - it = line + step; + it = lineptr + step; if(*it < offset) { - line = ++it; + lineptr = ++it; count -= step + 1; } else @@ -31158,31 +32822,23 @@ Location Parser::val_location(const char *val) const } } } - if(line) - { - _RYML_CB_ASSERT(m_stack.m_callbacks, *line > offset); - } - else - { - _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.empty()); - _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == 1); - line = m_newline_offsets; - } - _RYML_CB_ASSERT(m_stack.m_callbacks, line >= m_newline_offsets && line < m_newline_offsets + m_newline_offsets_size);; - Location loc = {}; + _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr >= m_newline_offsets); + _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size); + _RYML_CB_ASSERT(m_stack.m_callbacks, *lineptr > offset); + Location loc; loc.name = m_file; loc.offset = offset; - loc.line = (size_t)(line - m_newline_offsets); - if(line > m_newline_offsets) - loc.col = (offset - *(line-1) - 1u); + loc.line = (size_t)(lineptr - m_newline_offsets); + if(lineptr > m_newline_offsets) + loc.col = (offset - *(lineptr-1) - 1u); else loc.col = offset; return loc; } -void Parser::_prepare_locations() const +void Parser::_prepare_locations() { - _RYML_CB_ASSERT(m_stack.m_callbacks, !m_file.empty()); + m_newline_offsets_buf = m_buf; size_t numnewlines = 1u + m_buf.count('\n'); _resize_locations(numnewlines); m_newline_offsets_size = 0; @@ -31193,7 +32849,7 @@ void Parser::_prepare_locations() const _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines); } -void Parser::_resize_locations(size_t numnewlines) const +void Parser::_resize_locations(size_t numnewlines) { if(numnewlines > m_newline_offsets_capacity) { @@ -31204,12 +32860,6 @@ void Parser::_resize_locations(size_t numnewlines) const } } -void Parser::_mark_locations_dirty() -{ - m_newline_offsets_size = 0u; - m_newline_offsets_buf = m_buf; -} - bool Parser::_locations_dirty() const { return !m_newline_offsets_size; @@ -31253,6 +32903,13 @@ bool Parser::_locations_dirty() const namespace c4 { namespace yml { + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w) { _apply_seed(); @@ -31269,22 +32926,6 @@ size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w) return encoded.len; } -size_t NodeRef::deserialize_key(c4::fmt::base64_wrapper w) const -{ - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - RYML_ASSERT(get() != nullptr); - return from_chars(key(), &w); -} - -size_t NodeRef::deserialize_val(c4::fmt::base64_wrapper w) const -{ - RYML_ASSERT( ! is_seed()); - RYML_ASSERT(valid()); - RYML_ASSERT(get() != nullptr); - return from_chars(val(), &w); -} - } // namespace yml } // namespace c4 @@ -31887,7 +33528,7 @@ inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bo //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- -inline void print_node(NodeRef const& p, int level=0) +inline void print_node(ConstNodeRef const& p, int level=0) { print_node(*p.tree(), p.id(), level, 0, true); } diff --git a/FireDogEditor/FireDogEditor.h b/FireDogEditor/FireDogEditor.h index 807e328..d23ec77 100644 --- a/FireDogEditor/FireDogEditor.h +++ b/FireDogEditor/FireDogEditor.h @@ -17,7 +17,7 @@ #include "Qss/Qss.h" -#define FIREDOG_EDITOR_VERSION "v2.1" +#define FIREDOG_EDITOR_VERSION "v2.2" #define EXIT_CODE_REBOOT -123456789 //ƥܼ diff --git a/README.md b/README.md index f4f0ace..362f483 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,20 @@ $ge: *comparison rule*, greater than or equal to comparison, use two *number rul # 更新日志 Update log +# v1.3.3 + +``` +FireDog Version: v1.3.3 +FireDog Editor Version: v2.2 +Feature Libraray Version: v1.2.1 + +1:修复检测结果为空时报错问题。 +2:优化yaml组件,让yaml组件支持gcc 4.8版本(centos7默认),增强引擎的跨平台编译能力。 + +1: Fix the error reporting that the detection result is empty. +2: Optimize the yaml component, let the yaml component support gcc 4.8 version (centos7 default), and enhance the cross-platform compilation ability of the engine. +``` + # v1.3.1 ```