(original) (raw)

//---------------------------------------------------------------------------------------------------------------------- // SaneCppStrings.h - Sane C++ Strings Library (single file build) //---------------------------------------------------------------------------------------------------------------------- // Dependencies: SaneCppFoundation.h // Version: release/2025/11 (cf7313e5) // LOC header: 918 (code) + 1041 (comments) // LOC implementation: 2181 (code) + 330 (comments) // Documentation: https://pagghiu.github.io/SaneCppLibraries // Source Code: https://github.com/pagghiu/SaneCppLibraries //---------------------------------------------------------------------------------------------------------------------- // All copyrights and SPDX information for this library (each amalgamated section has its own copyright attributions): // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT //---------------------------------------------------------------------------------------------------------------------- #include "SaneCppFoundation.h" #if !defined(SANE_CPP_STRINGS_HEADER) #define SANE_CPP_STRINGS_HEADER 1 //---------------------------------------------------------------------------------------------------------------------- // Strings/Console.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT //---------------------------------------------------------------------------------------------------------------------- // Strings/StringFormat.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT //---------------------------------------------------------------------------------------------------------------------- // Strings/StringView.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT //---------------------------------------------------------------------------------------------------------------------- // Strings/StringIterator.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT namespace SC { //! @addtogroup group_strings //! @{ /// @brief UTF code point (32 bit) using StringCodePoint = uint32_t; /// @brief Checks if two encodings have the same utf unit size /// @param encoding1 First encoding /// @param encoding2 Second encoding /// @return `true` if the two encodings have the same unit size constexpr bool StringEncodingAreBinaryCompatible(StringEncoding encoding1, StringEncoding encoding2) { return (encoding1 == encoding2) or (encoding2 == StringEncoding::Ascii and encoding1 == StringEncoding::Utf8) or (encoding2 == StringEncoding::Utf8 and encoding1 == StringEncoding::Ascii); } /// @brief A position inside a fixed range `[start, end)` of UTF code points. /// /// It's a range of bytes (start and end pointers) with a *current* pointer pointing at a specific code point of the /// range. There are three classes derived from it (SC::StringIteratorASCII, SC::StringIteratorUTF8 and /// SC::StringIteratorUTF16) and they allow doing operations along the string view in UTF code points. /// @note Code points are not the same as perceived characters (that would be grapheme clusters). /// Invariants: start <= end and it >= start and it <= end. /// @tparam CharIterator StringIteratorASCII, StringIteratorUTF8 or StringIteratorUTF16 template struct SC_COMPILER_EXPORT StringIterator { static constexpr StringEncoding getEncoding() { return CharIterator::getEncoding(); } using CodeUnit = char; using CodePoint = StringCodePoint; /// @brief Rewind current position to start of iterator range constexpr void setToStart() { it = start; } /// @brief Set current position to end of iterator range constexpr void setToEnd() { it = end; } /// @brief Check if current position is at end of iterator range /// @return `true` if position is at end of iterator range [[nodiscard]] constexpr bool isAtEnd() const { return it >= end; } /// @brief Check if current position is at start of iterator range /// @return `true` if position is at start of iterator range [[nodiscard]] constexpr bool isAtStart() const { return it <= start; } /// @brief Advances position towards `end` until it matches CodePoint `c` or `position == end` /// @param c The CodePoint to be searched /// @return `true` if c was found, `false` if end was reached [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c); /// @brief Moves position towards start until CodePoint `c` is found or `position == end` /// @param c The CodePoint to be searched /// @return `true` if `c` was found, `false` if `start` was reached [[nodiscard]] bool reverseAdvanceUntilMatches(CodePoint c); /// @brief Advances position towards `end` until a matching range of character equal to `other[it, end)` is found. /// Position pointer is advanced additional after the matching range. /// @param other The range of character to be found `[it, end)` /// @return `true` if other was found, `false` if end was reached template [[nodiscard]] bool advanceAfterFinding(StringIterator other) { return advanceBeforeOrAfterFinding<otheriterator, true="">(other); } [[nodiscard]] bool advanceAfterFinding(StringIterator other) { return advanceAfterFindingSameIterator(other); } /// @brief Advances position towards `end` until a matching range of character equal to `other[it, end)` is found. /// Position pointer is stopped before the first matching character of the range. /// @param other The range of character to be found `[it, end)` /// @return `true` if other was found, `false` if end was reached template [[nodiscard]] bool advanceBeforeFinding(StringIterator other) { return advanceBeforeOrAfterFinding<otheriterator, false="">(other); } [[nodiscard]] bool advanceBeforeFinding(StringIterator other) { return advanceBeforeFindingSameIterator(other); } /// @brief Advances position by the same number of code points as other /// @param other The other range of character /// @return `true` if advance succeeded, `false` if `end` was reached [[nodiscard]] bool advanceByLengthOf(StringIterator other) { return advanceOfBytes(other.end - other.it); } /// @brief Advances position until any CodePoint in the given Span is found /// @param items A contiguous span of CodePoint to match /// @param matched The matched CodePoint in the Span /// @return `true` if one CodePoint was matched, `false` if `end` was reached [[nodiscard]] bool advanceUntilMatchesAny(Span items, CodePoint& matched); /// @brief Moves position towards start until any CodePoint in the given Span is found /// @param items A contiguous span of CodePoint to match /// @param matched The matched CodePoint in the Span /// @return `true` if one CodePoint was matched, `false` if `start` was reached [[nodiscard]] bool reverseAdvanceUntilMatchesAny(Span items, CodePoint& matched); /// @brief Advances position until a code point different from `c` is found or `end` is reached /// @param c The CodePoint to be compared /// @param optionalReadChar The CodePoint that was found, different from `c` /// @return `true` if it finds at least one code point different from c, `false` if iterator ends before finding it [[nodiscard]] bool advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar = nullptr); /// @brief Advance position only if next code point matches `c`. /// @param c The CodePoint being searched /// @return `true` if next code point matches `c`, `false` otherwise or if position is already at end [[nodiscard]] constexpr bool advanceIfMatches(CodePoint c); /// @brief Move position by one code point towards start if previous code point matches `c` /// @param c The CodePoint being searched /// @return `true` if next code point matches `c`, `false` otherwise or if position is already at end [[nodiscard]] bool advanceBackwardIfMatches(CodePoint c); /// @brief Advance position only if any of the code points in given Span is matched /// @param items Span of points to be checked /// @return `true` if next code point was successfully matched [[nodiscard]] bool advanceIfMatchesAny(Span items); /// @brief Advance position if any code point in the range [first, last] is matched /// @param first The initial CodePoint defining the range to be checked /// @param last The final CodePoint defining the range to be checked /// @return `true` if a code point in the given range was matched [[nodiscard]] bool advanceIfMatchesRange(CodePoint first, CodePoint last); /// @brief Check if code unit at current position matches CodePoint `c` /// @param c code point to match /// @return `true` if code unit at current position matches `c`, `false` if there is no match or position is at /// `end` [[nodiscard]] bool match(CodePoint c) { return it < end and CharIterator::decode(it, end) == c; } /// @brief Decode code unit at current position and advance /// @param c output code point read /// @return `true` if position is at `end` before decoding code unit [[nodiscard]] constexpr bool advanceRead(CodePoint& c); /// @brief Read code unit at current position /// @param c output code point read /// @return `true` if position is at `end` before decoding code unit [[nodiscard]] bool read(CodePoint& c); /// @brief Move to previous position and read code unit /// @param c output code point read /// @return `true` if position is at `start` before decoding code unit [[nodiscard]] bool advanceBackwardRead(CodePoint& c); /// @brief Move position to next code point /// @return `true` if position is not at `end` before trying to move forward [[nodiscard]] constexpr bool stepForward(); /// @brief Move position to previous code point /// @return `true` if position is not at `start` before trying to move backwards [[nodiscard]] constexpr bool stepBackward(); /// @brief Move position forward (towards `end`) by variable number of code points /// @param numCodePoints number of code points to move forward /// @return `true` if it's possible advancing `numCodePoints` before reaching `end` [[nodiscard]] constexpr bool advanceCodePoints(size_t numCodePoints); /// @brief Move position backwards (towards `start`) by variable number of code pints /// @param numCodePoints number of code points to move backwards /// @return `true` if it's possible moving `numCodePoints` backwards before reaching `start` [[nodiscard]] bool reverseAdvanceCodePoints(size_t numCodePoints); /// @brief Check if next code point is `c` /// @param c the code point /// @return `true` if next code point is `c` [[nodiscard]] constexpr bool isFollowedBy(CodePoint c); /// @brief Check if previous code point is `c` /// @param c the code point /// @return `true` if previous code point is `c` [[nodiscard]] constexpr bool isPrecededBy(CodePoint c); /// @brief Returns another StringIterator range, starting from `start` to `otherPoint` position /// @param otherPoint The StringIterator containing the ending position to slice to /// @return The new StringIterator `[start, otherPoint.position]` [[nodiscard]] constexpr StringIterator sliceFromStartUntil(StringIterator otherPoint) const; /// @brief Get distance in bytes from current position to another StringIterator current position /// @param other The StringIterator from which to compute distance /// @return (signed) number of bytes between the two StringIterator [[nodiscard]] constexpr ssize_t bytesDistanceFrom(StringIterator other) const; /// @brief Check if this Iterator ends with any code point in the given span /// @param codePoints A span of code points to check for /// @return `true` if at least one code point of `codepoints` exists at the end of the range [[nodiscard]] bool endsWithAnyOf(Span codePoints) const; /// @brief Check if this Iterator starts with any code point in the given span /// @param codePoints A span of code points to check for /// @return `true` if at least one code point of `codepoints` exists at the start of the range [[nodiscard]] bool startsWithAnyOf(Span codePoints) const; /// @brief Check if this Iterator at its end matches entirely another Iterator's range /// @param other The other iterator to match /// @return `true` if this Iterator matches entire `other` Iterator at its `end` template [[nodiscard]] bool endsWith(IteratorType other) const; /// @brief Check if this Iterator at its start matches entirely another Iterator's range /// @param other The other iterator to match /// @return `true` if this Iterator matches entire `other` at its `start` template [[nodiscard]] bool startsWith(IteratorType other) const; protected: template [[nodiscard]] bool advanceBeforeOrAfterFinding(StringIterator other); [[nodiscard]] bool advanceAfterFindingSameIterator(StringIterator other); [[nodiscard]] bool advanceBeforeFindingSameIterator(StringIterator other); [[nodiscard]] bool advanceOfBytes(ssize_t bytesLength); friend struct StringView; static constexpr const CodeUnit* getNextOf(const CodeUnit* src, const char* end) { return CharIterator::getNextOf(src, end); } static constexpr const CodeUnit* getPreviousOf(const CodeUnit* src, const char* start) { return CharIterator::getPreviousOf(src, start); } constexpr StringIterator(const CodeUnit* it, const CodeUnit* end) : it(it), start(it), end(end) {} const CodeUnit* it; const CodeUnit* start; const CodeUnit* end; }; /// @brief A string iterator for ASCII strings struct SC_COMPILER_EXPORT StringIteratorASCII : public StringIterator{ [[nodiscard]] constexpr bool advanceUntilMatches(CodePoint c); private: [[nodiscard]] bool advanceUntilMatchesNonConstexpr(CodePoint c); using StringIterator::StringIterator; constexpr StringIteratorASCII(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {} using Parent = StringIterator; friend Parent; friend struct StringView; [[nodiscard]] static constexpr StringEncoding getEncoding() { return StringEncoding::Ascii; } [[nodiscard]] static constexpr const char* getNextOf(const char* src, const char*) { return src + 1; } [[nodiscard]] static constexpr const char* getPreviousOf(const char* src, const char*) { return src - 1; } [[nodiscard]] static constexpr CodePoint decode(const char* src, const char*) { return static_cast(*src); } }; /// @brief A string iterator for UTF16 strings struct SC_COMPILER_EXPORT StringIteratorUTF16 : public StringIterator{ private: using StringIterator::StringIterator; constexpr StringIteratorUTF16(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {} using Parent = StringIterator; friend Parent; friend struct StringView; [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf16; } [[nodiscard]] static const char* getNextOf(const char* bytes, const char* end); [[nodiscard]] static const char* getPreviousOf(const char* bytes, const char* start); [[nodiscard]] static uint32_t decode(const char* bytes, const char* end); }; /// @brief A string iterator for UTF8 strings struct SC_COMPILER_EXPORT StringIteratorUTF8 : public StringIterator{ private: using Parent = StringIterator; friend Parent; friend struct StringView; using StringIterator::StringIterator; constexpr StringIteratorUTF8(const CodeUnit* it, const CodeUnit* end) : StringIterator(it, end) {} [[nodiscard]] static StringEncoding getEncoding() { return StringEncoding::Utf8; } [[nodiscard]] static const char* getNextOf(const char* src, const char* end); [[nodiscard]] static const char* getPreviousOf(const char* src, const char* start); [[nodiscard]] static uint32_t decode(const char* src, const char* end); }; /// @brief Builds a constexpr bool skip table of 256 entries used in some parsers struct StringIteratorSkipTable { bool matches[256] = {false}; constexpr StringIteratorSkipTable(Span chars) { for (auto c : chars) { matches[static_cast(c)] = true; } } }; //! @} //----------------------------------------------------------------------------------------------------------------------- // Implementations Details //----------------------------------------------------------------------------------------------------------------------- template constexpr bool StringIterator::advanceUntilMatches(CodePoint c) { while (it < end) { if (CharIterator::decode(it, end) == c) return true; it = getNextOf(it, end); } return false; } template constexpr bool StringIterator::advanceIfMatches(CodePoint c) { if (it < end and CharIterator::decode(it, end) == c) { it = getNextOf(it, end); return true; } return false; } template constexpr bool StringIterator::advanceRead(CodePoint& c) { if (it < end) { c = CharIterator::decode(it, end); it = getNextOf(it, end); return true; } return false; } template constexpr bool StringIterator::stepForward() { if (it < end) { it = getNextOf(it, end); return true; } return false; } template constexpr bool StringIterator::stepBackward() { if (it > start) { it = getPreviousOf(it, start); return true; } return false; } template constexpr bool StringIterator::advanceCodePoints(size_t numCodePoints) { while (numCodePoints > 0) { numCodePoints -= 1; if (it >= end) { return false; } it = getNextOf(it, end); } return true; } template constexpr bool StringIterator::isFollowedBy(CodePoint c) { return it < end ? CharIterator::decode(getNextOf(it, end), end) == c : false; } template constexpr bool StringIterator::isPrecededBy(CodePoint c) { return it > start ? CharIterator::decode(getPreviousOf(it, start), it) == c : false; } template constexpr StringIterator StringIterator::sliceFromStartUntil( StringIterator otherPoint) const { SC_ASSERT_RELEASE(it <= otherPoint.it); return StringIterator(it, otherPoint.it); } template constexpr ssize_t StringIterator::bytesDistanceFrom(StringIterator other) const { return (it - other.it) * static_cast(sizeof(CodeUnit)); } // StringIteratorASCII [[nodiscard]] constexpr bool StringIteratorASCII::advanceUntilMatches(CodePoint c) { #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunreachable-code" #endif return __builtin_is_constant_evaluated() ? StringIterator::advanceUntilMatches(c) : advanceUntilMatchesNonConstexpr(c); #if defined(__clang__) #pragma clang diagnostic pop #endif } } // namespace SC namespace SC { struct SC_COMPILER_EXPORT StringView; struct SC_COMPILER_EXPORT StringViewTokenizer; struct SC_COMPILER_EXPORT StringAlgorithms; } // namespace SC //! @defgroup group_strings Strings //! @copybrief library_strings (see @ref library_strings for more details) //! @addtogroup group_strings //! @{ /// @brief Non-owning view over a range of characters with UTF Encoding. /// /// It additional also holds the SC::StringEncoding information (`ASCII`, `UTF8` or `UTF16`). /// During construction the encoding information and the null-termination state must be specified. /// All methods are const because it's not possible to modify a string with it. /// @n /// Example (Construct) /// @code{.cpp} /// StringView s("asd"); /// SC_ASSERT_RELEASE(s.sizeInBytes() == 3); /// SC_ASSERT_RELEASE(s.isNullTerminated()); /// @endcode /// Example (Construct from null terminated string) /// @code{.cpp} /// const char* someString = "asdf"; /// // construct only "asd", not null terminated (as there is 'f' after 'd') /// StringView s({someString, strlen(asd) - 1}, false, StringEncoding::Ascii); /// SC_ASSERT_RELEASE(s.sizeInBytes() == 3); /// SC_ASSERT_RELEASE(not s.isNullTerminated()); /// // /// // ... or /// StringView s2 = StringView::fromNullTerminated(s, StringEncoding::Ascii); // s2 == "asdf" /// @endcode struct SC::StringView : public StringSpan { StringView() : StringSpan() {} using StringSpan::StringSpan; constexpr StringView(StringSpan ssv) : StringSpan(ssv) {} static constexpr StringView fromNullTerminated(const char* text, StringEncoding encoding) { return StringSpan::fromNullTerminated(text, encoding); } #if SC_PLATFORM_WINDOWS static constexpr StringView fromNullTerminated(const wchar_t* text, StringEncoding encoding) { return StringSpan::fromNullTerminated(text, encoding); } #endif /// @brief Obtain a `const uint8_t` Span from this StringView /// @return Span representing this StringView Span toBytesSpan() const SC_LANGUAGE_LIFETIME_BOUND { return Span::reinterpret_bytes(text, textSizeInBytes); } /// @brief Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending on /// encoding. /// @tparam Func A function/lambda with `auto operator()({StringIteratorASCII | StringIteratorUTF8 | /// StringIteratorUTF16})` /// @param func lambda / functor to be called /// @return whatever value is returned by invoking func template [[nodiscard]] constexpr auto withIterator(Func&& func) const; /// @brief Call given lambda with one of StringIteratorASCII, StringIteratorUTF8, StringIteratorUTF16 depending on /// encoding. /// @tparam Func Func A function/lambda with `auto operator()({StringIteratorASCII | StringIteratorUTF8 | /// StringIteratorUTF16}, {StringIteratorASCII | StringIteratorUTF8 | StringIteratorUTF16})` /// @param s1 StringView that will be passed as first iterator /// @param s2 StringView that will be passed as second iterator /// @param func the lambda / to be invoked /// @return whatever value is returned by invoking func template [[nodiscard]] static constexpr auto withIterators(StringView s1, StringView s2, Func&& func); /// @brief Returns a StringIterator from current StringView /// @tparam StringIterator can be StringIteratorASCII, StringIteratorUTF8 or StringIteratorUTF16 /// @return StringIterator representing the StringView template constexpr StringIterator getIterator() const; /// @brief Compare this StringView with another StringView for inequality /// @param other StringView to be compared with current one /// @return 'true' if the two StringView are different [[nodiscard]] constexpr bool operator!=(StringView other) const { return not operator==(other); } /// @brief Compare this StringView with another StringView for equality /// @param other StringView to be compared with current one /// @return 'true' if the two StringView are the same [[nodiscard]] constexpr bool operator==(StringView other) const; /// @brief Check if this StringView is equal to other StringView (operates on code points, not on utf graphemes). /// Returns the number of code points that are the same in both StringView-s. /// @param other The StringView to be compared to /// @param commonOverlappingPoints number of equal code points in both StringView /// @return `true` if the two StringViews are equal /// /// Example: /// @code{.cpp} /// StringView asd = "123 456"_a8; /// size_t overlapPoints = 0; /// SC_TEST_EXPECT(not asd.fullyOverlaps("123___", overlapPoints) and overlapPoints == 3); /// @endcode [[nodiscard]] constexpr bool fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const; /// @brief Check if StringView is empty /// @return `true` if string is empty [[nodiscard]] constexpr bool isEmpty() const { return text == nullptr or textSizeInBytes == 0; } /// @brief Check if StringView is immediately followed by a null termination character /// @return `true` if StringView is immediately followed by a null termination character [[nodiscard]] constexpr bool isNullTerminated() const { return hasNullTerm; } /// @brief Get size of the StringView in bytes /// @return Size in bytes of StringView [[nodiscard]] constexpr size_t sizeInBytes() const { return textSizeInBytes; } /// @brief Check if StringView ends with any utf code point in the given span /// @param codePoints The utf code points to check against /// @return Returns `true` if this StringView ends with any code point inside codePoints /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("123 456".endsWithAnyOf({'a', '6'})); // '6' will match /// @endcode [[nodiscard]] bool endsWithAnyOf(Span codePoints) const; /// @brief Check if StringView starts with any utf code point in the given span /// @param codePoints The utf code points to check against /// @return Returns `true` if this StringView starts with any code point inside codePoints /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("123 456".startsWithAnyOf({'1', '8'})); // '1' will match /// @endcode [[nodiscard]] bool startsWithAnyOf(Span codePoints) const; /// @brief Check if StringView starts with another StringView /// @param str The other StringView to check with current /// @return Returns `true` if this StringView starts with str /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("123 456".startsWith("123")); /// @endcode [[nodiscard]] bool startsWith(const StringView str) const; /// @brief Check if StringView ends with another StringView /// @param str The other StringView to check with current /// @return Returns `true` if this StringView ends with str /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("123 456".endsWith("456")); /// @endcode [[nodiscard]] bool endsWith(const StringView str) const; /// @brief Check if StringView contains another StringView. /// @param str The other StringView to check with current /// @return Returns `true` if this StringView contains str /// /// Example: /// @code{.cpp} /// StringView asd = "123 456"; /// SC_TRY(asd.containsString("123")); /// SC_TRY(asd.containsString("456")); /// SC_TRY(not asd.containsString("124")); /// SC_TRY(not asd.containsString("4567")); /// @endcode [[nodiscard]] bool containsString(const StringView str) const; /// @brief Returns the remaining part of the string after matching stringToMatch /// @param stringToMatch String to match inside the source string /// @param remainingAfterSplit Portion of this StringView AFTER first match of stringToMatch (excluding the match) /// @return Returns `true` if stringToMatch has been found and split has been written to remainingAfterSplit /// /// Example: /// @code{.cpp} /// StringView str("KEY = VALUE"); /// StringView split; /// SC_TEST_EXPECT(str.splitAfter(" = ", split)); /// SC_TEST_EXPECT(split == "VALUE"); /// @endcode [[nodiscard]] bool splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const; /// @brief Returns the part of the string before matching stringToMatch /// @param stringToMatch String to match inside the source string /// @param stringBeforeSplit Portion of this StringView BEFORE first match of stringToMatch (excluding the match) /// @return Returns `true` if stringToMatch has been found and split has been written to remainingAfterSplit /// /// Example: /// @code{.cpp} /// StringView str("KEY = VALUE"); /// StringView split; /// SC_TEST_EXPECT(str.splitBefore(" = ", split)); /// SC_TEST_EXPECT(split == "KEY"); /// @endcode [[nodiscard]] bool splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const; /// @brief Check if StringView contains given utf code point /// @param c The utf code point to check against /// @return Returns `true` if this StringView contains code point c [[nodiscard]] bool containsCodePoint(StringCodePoint c) const; /// @brief Check if current StringView has compatible encoding with str. /// This means checking if two encodings have the same utf unit size. /// @param str The other StringView to check with current /// @return `true` if this StringView has compatible encoding with str [[nodiscard]] constexpr bool hasCompatibleEncoding(StringView str) const; /// @brief Returns a StringView starting at `from` and ending at `to`. /// @tparam StringIterator One among StringIteratorASCII, StringIteratorUTF8 and StringIteratorUTF16 /// @param from Indicates where the StringView will start. /// The from iterator will be shortened until the start of to. /// @param to Indicates where the StringView will end. /// @param encoding What encoding should be assigned to the resulting StringView /// @return A StringView starting at `from` and ending at `to`. /// @note If from is `>` to an empty StringView will be returned template static StringView fromIterators(StringIterator from, StringIterator to, StringEncoding encoding = StringIterator::getEncoding()); /// @brief Returns a section of a string, from `it` to end of StringView. /// @tparam StringIterator One among StringIteratorASCII, StringIteratorUTF8 and StringIteratorUTF16 /// @param it The iterator pointing at the start of the specified portion of StringView. /// @param encoding What encoding should be assigned to the resulting StringView /// @return Another StringView pointing at characters from `it` up to StringView end template static StringView fromIteratorUntilEnd(StringIterator it, StringEncoding encoding = StringIterator::getEncoding()); /// @brief Returns a section of a string, from start of StringView to `it`. /// @tparam StringIterator One among StringIteratorASCII, StringIteratorUTF8 and StringIteratorUTF16 /// @param it The iterator pointing at the start of the specified portion of StringView. /// @param encoding What encoding should be assigned to the resulting StringView /// @return Another StringView pointing at characters from start of StringView until `it` template static constexpr StringView fromIteratorFromStart(StringIterator it, StringEncoding encoding = StringIterator::getEncoding()); /// @brief Get slice `[start, end)` starting at offset `start` and ending at `end` (measured in utf code points) /// @param start The initial code point where the slice starts /// @param end One after the final code point where the slice ends /// @return The `[start, end)` StringView slice /// /// Example: /// @code{.cpp} /// StringView str = "123_567"; /// SC_TEST_EXPECT(str.sliceStartEnd(0, 3) == "123"); /// SC_TEST_EXPECT(str.sliceStartEnd(4, 7) == "567"); /// @endcode [[nodiscard]] StringView sliceStartEnd(size_t start, size_t end) const; /// @brief Get slice `[start, start+length]` starting at offset `start` and of `length` code points /// @param start The initial code point where the slice starts /// @param length One after the final code point where the slice ends /// @return The `[start, start+length]` StringView slice /// /// Example: /// @code{.cpp} /// StringView str = "123_567"; /// SC_TEST_EXPECT(str.sliceStartLength(7, 0) == ""); /// SC_TEST_EXPECT(str.sliceStartLength(0, 3) == "123"); /// @endcode [[nodiscard]] StringView sliceStartLength(size_t start, size_t length) const; /// @brief Get slice `[offset, end]` measured in utf code points /// @param offset The initial code point where the slice starts /// @return The sliced StringView `[offset, end]` /// /// Example: /// @code{.cpp} /// StringView str = "123_567"; /// SC_TEST_EXPECT(str.sliceStart(4) == "567"); /// @endcode [[nodiscard]] StringView sliceStart(size_t offset) const; /// @brief Get slice `[end-offset, end]` measured in utf code points /// @param offset The initial code point where the slice starts /// @return The sliced StringView `[end-offset, end]` /// /// Example: /// @code{.cpp} /// StringView str = "123_567"; /// SC_TEST_EXPECT(str.sliceEnd(4) == "123"); /// @endcode [[nodiscard]] StringView sliceEnd(size_t offset) const; /// @brief Returns a shortened StringView removing ending utf code points matching the `codePoints` span /// @param codePoints The span of utf code points to look for /// @return The trimmed StringView /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("myTest_\n__"_a8.trimEndAnyOf({'_', '\n'}) == "myTest"); /// SC_TEST_EXPECT("myTest"_a8.trimEndAnyOf({'_'}) == "myTest"); /// @endcode [[nodiscard]] StringView trimEndAnyOf(Span codePoints) const; /// @brief Returns a shortened StringView removing starting utf code points matching the `codePoints` span /// @param codePoints The span of utf code points to look for /// @return The trimmed StringView /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("__\n_myTest"_a8.trimStartAnyOf({'_', '\n'}) == "myTest"); /// SC_TEST_EXPECT("_myTest"_a8.trimStartAnyOf({'_'}) == "myTest"); /// @endcode [[nodiscard]] StringView trimStartAnyOf(Span codePoints) const; /// @brief Returns a shortened StringView removing starting and ending utf code points inside the `codePoints` span /// @param codePoints The span of utf code points to look for /// @return The trimmed StringView /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("__\n_myTest__\n"_a8.trimAnyOf({'_', '\n'}) == "myTest"); /// SC_TEST_EXPECT("_myTest"_a8.trimAnyOf({'_'}) == "myTest"); /// @endcode [[nodiscard]] StringView trimAnyOf(Span codePoints) const; /// @brief Returns a shortened StringView without starting/ending utf code points inside {'\\r', '\\n', '\\t', ' '} /// @return The trimmed StringView /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT(" \n_myTest__\n\t"_a8.trimWhiteSpaces() == "_myTest__"); /// SC_TEST_EXPECT("\nmyTest \r"_a8.trimWhiteSpaces() == "myTest"); /// @endcode [[nodiscard]] StringView trimWhiteSpaces() const; /// If the current view is an integer number, returns true /// @brief Check if StringView can be parsed as an integer number. /// @return `true` if StringView is an integer number. /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("-34"_a8.isIntegerNumber()); /// SC_TEST_EXPECT("+12"_a8.isIntegerNumber()); /// SC_TEST_EXPECT(not "+12$"_a8.isIntegerNumber()); /// @endcode [[nodiscard]] bool isIntegerNumber() const; /// @brief Check if StringView can be parsed as an floating point number. /// @return `true` if StringView is a floating point number. /// /// Example: /// @code{.cpp} /// SC_TEST_EXPECT("-34."_a8.isFloatingNumber()); /// SC_TEST_EXPECT("-34.0"_a8.isFloatingNumber()); /// SC_TEST_EXPECT("0.34"_a8.isFloatingNumber()); /// SC_TEST_EXPECT(not "+12$"_a8.isFloatingNumber()); /// SC_TEST_EXPECT(not "$+12"_a8.isFloatingNumber()); /// SC_TEST_EXPECT(not "+$12"_a8.isFloatingNumber()); /// @endcode [[nodiscard]] bool isFloatingNumber() const; /// @brief Try parsing current StringView as a 32 bit integer. /// @param value Will receive the parsed 32 bit integer, if function returns `true`. /// @return `true` if the StringView has been successfully parsed as a 32 bit integer. /// /// Example: /// @code{.cpp} /// StringView other("123"); /// int32_t value; /// if(other.parseInt32(value)) /// { /// // ... do something with value /// } /// @endcode [[nodiscard]] bool parseInt32(int32_t& value) const; /// @brief Try parsing current StringView as a floating point number. /// @param value Will receive the parsed floating point number, if function returns `true`. /// @return `true` if the StringView has been successfully parsed as a floating point number. /// /// Example: /// @code{.cpp} /// StringView other("12.34"); /// float value; /// if(other.parseFloat(value)) /// { /// // ... do something with value /// } /// @endcode [[nodiscard]] bool parseFloat(float& value) const; /// @brief Try parsing current StringView as a double precision floating point number. /// @param value Will receive the parsed double precision floating point number, if function returns `true`. /// @return `true` if the StringView has been successfully parsed as a double precision floating point number. /// /// Example: /// @code{.cpp} /// StringView other("12.342321"); /// double value; /// if(other.parseDouble(value)) /// { /// // ... do something with value /// } /// @endcode [[nodiscard]] bool parseDouble(double& value) const; private: template struct identity { }; template constexpr StringIteratorASCII getIterator(identity) const; constexpr StringIteratorUTF8 getIterator(identity) const; constexpr StringIteratorUTF16 getIterator(identity) const; template static constexpr bool equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points); template constexpr bool equalsIterator(StringView other, size_t& points) const; }; /// @brief Splits a StringView in tokens according to separators struct SC::StringViewTokenizer { StringCodePoint splittingCharacter = 0; ///< The last splitting character matched in current tokenization size_t numSplitsNonEmpty = 0; ///< How many non-empty splits have occurred in current tokenization size_t numSplitsTotal = 0; ///< How many total splits have occurred in current tokenization StringView component; ///< Current component that has been tokenized by tokenizeNext StringView processed; ///< Substring of original string passed in constructor processed so far StringView remaining; ///< Substring from current position until the end of original text enum Options { IncludeEmpty, ///< If to tokenizeNext should return also empty tokens SkipEmpty ///< If to tokenizeNext should NOT return also empty tokens }; /// @brief Build a tokenizer operating on the given text string view StringViewTokenizer(StringView text) : remaining(text), originalText(text) {} /// @brief Splits the string along a list of separators /// @param separators List of separators /// @param options If to skip empty tokens or not /// @return `true` if there are additional tokens to parse /// @n /// Example: /// @code{.cpp} /// StringViewTokenizer tokenizer("bring,me,the,horizon"); /// while (tokenizer.tokenizeNext(',', StringViewTokenizer::SkipEmpty)) /// { /// console.printLine(tokenizer.component); /// } /// @endcode [[nodiscard]] bool tokenizeNext(Span separators, Options options = Options::SkipEmpty); /// @brief Tokenizes from current position to first newline. /// /// @return `true` if a new line has been found /// @n /// Example: /// @code{.cpp} /// StringViewTokenizer lines("Line1\nLine2\nLine3\n"); /// SC_TEST_EXPECT(lines.tokenizeNextLine()); /// SC_TEST_EXPECT(lines.component == "Line1"); /// SC_TEST_EXPECT(lines.tokenizeNextLine()); /// SC_TEST_EXPECT(lines.component == "Line2"); /// SC_TEST_EXPECT(lines.tokenizeNextLine()); /// SC_TEST_EXPECT(lines.component == "Line3"); /// SC_TEST_EXPECT(not lines.tokenizeNextLine()); /// @endcode [[nodiscard]] bool tokenizeNextLine() { return tokenizeNext({'\n'}); } /// @brief Count the number of tokens that exist in the string view passed in constructor, when splitted along the /// given separators /// @param separators Separators to split the original string with /// @return Current StringViewTokenizer to inspect SC::StringViewTokenizer::numSplitsNonEmpty or /// SC::StringViewTokenizer::numSplitsTotal. /// @n /// Example: /// @code{.cpp} /// SC_TEST_EXPECT(StringViewTokenizer("___").countTokens('_').numSplitsNonEmpty == 0); /// SC_TEST_EXPECT(StringViewTokenizer("___").countTokens('_').numSplitsTotal == 3); /// @endcode StringViewTokenizer& countTokens(Span separators); /// @brief Check if the tokenizer has processed the entire the string view passed in the constructor [[nodiscard]] bool isFinished() const; private: StringView originalText; // Original text as passed in the constructor }; /// @brief Algorithms operating on strings (glob / wildcard). /// @n /// Example /// @code{.cpp} /// SC_ASSERT(StringAlgorithms::matchWildcard("", "")); /// SC_ASSERT(StringAlgorithms::matchWildcard("1?3", "123")); /// SC_ASSERT(StringAlgorithms::matchWildcard("1*3", "12223")); /// SC_ASSERT(StringAlgorithms::matchWildcard("*2", "12")); /// SC_ASSERT(not StringAlgorithms::matchWildcard("*1", "12")); /// SC_ASSERT(not StringAlgorithms::matchWildcard("*1", "112")); /// SC_ASSERT(not StringAlgorithms::matchWildcard("**1", "112")); /// SC_ASSERT(not StringAlgorithms::matchWildcard("*?1", "112")); /// SC_ASSERT(StringAlgorithms::matchWildcard("1*", "12123")); /// SC_ASSERT(StringAlgorithms::matchWildcard("*/myString", "myString/myString/myString")); /// SC_ASSERT(StringAlgorithms::matchWildcard("**/myString", "myString/myString/myString")); /// SC_ASSERT(not StringAlgorithms::matchWildcard("*/String", "myString/myString/myString")); /// SC_ASSERT(StringAlgorithms::matchWildcard("*/Directory/File.cpp", "/Root/Directory/File.cpp")); /// @endcode struct SC::StringAlgorithms { [[nodiscard]] static bool matchWildcard(StringView s1, StringView s2); private: template [[nodiscard]] static bool matchWildcardIterator(StringIterator1 pattern, StringIterator2 text); }; //! @} //----------------------------------------------------------------------------------------------------------------------- // Implementations Details //----------------------------------------------------------------------------------------------------------------------- namespace SC { constexpr SC::StringView operator""_a8(const char* txt, size_t sz) { return StringView({txt, sz}, true, StringEncoding::Ascii); } constexpr StringView operator""_u8(const char* txt, size_t sz) { return StringView({txt, sz}, true, StringEncoding::Utf8); } constexpr StringView operator""_u16(const char* txt, size_t sz) { const bool isNullTerminated = sz > 0 and sz % 2 == 1 and txt[sz - 1] == 0; return StringView({txt, isNullTerminated ? sz - 1 : sz}, isNullTerminated, StringEncoding::Utf16); } } // namespace SC template constexpr StringIterator SC::StringView::getIterator() const { // For GCC complaining about specialization in non-namespace scope return getIterator(identity()); } template constexpr SC::StringIteratorASCII SC::StringView::getIterator(identity) const { return StringIteratorASCII(text, text + textSizeInBytes); } constexpr SC::StringIteratorUTF8 SC::StringView::getIterator(identity) const { return StringIteratorUTF8(text, text + textSizeInBytes); } constexpr SC::StringIteratorUTF16 SC::StringView::getIterator(identity) const { return StringIteratorUTF16(text, text + textSizeInBytes); } template constexpr bool SC::StringView::equalsIterator(StringIterator1 t1, StringIterator2 t2, size_t& points) { StringCodePoint c1 = 0; StringCodePoint c2 = 0; while (t1.advanceRead(c1) and t2.advanceRead(c2)) { if (c1 != c2) { return false; } points++; } return t1.isAtEnd() and t2.isAtEnd(); } template constexpr bool SC::StringView::equalsIterator(StringView other, size_t& points) const { auto it = getIterator(); switch (other.getEncoding()) { case StringEncoding::Ascii: return equalsIterator(it, other.getIterator(), points); case StringEncoding::Utf8: return equalsIterator(it, other.getIterator(), points); case StringEncoding::Utf16: return equalsIterator(it, other.getIterator(), points); } Assert::unreachable(); } [[nodiscard]] constexpr bool SC::StringView::operator==(StringView other) const { #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunreachable-code" #endif if (__builtin_is_constant_evaluated()) { if (not hasCompatibleEncoding(other)) return false; auto it1 = text; auto it2 = other.text; auto sz = textSizeInBytes; for (size_t idx = 0; idx < sz; ++idx) if (it1[idx] != it2[idx]) return false; return true; } else { return StringSpan::operator==(other); } #if defined(__clang__) #pragma clang diagnostic pop #endif } constexpr bool SC::StringView::fullyOverlaps(StringView other, size_t& commonOverlappingPoints) const { commonOverlappingPoints = 0; switch (getEncoding()) { case StringEncoding::Ascii: return equalsIterator(other, commonOverlappingPoints); case StringEncoding::Utf8: return equalsIterator(other, commonOverlappingPoints); case StringEncoding::Utf16: return equalsIterator(other, commonOverlappingPoints); } Assert::unreachable(); } template constexpr auto SC::StringView::withIterator(Func&& func) const { switch (getEncoding()) { case StringEncoding::Ascii: return func(getIterator()); case StringEncoding::Utf8: return func(getIterator()); case StringEncoding::Utf16: return func(getIterator()); } Assert::unreachable(); } template constexpr auto SC::StringView::withIterators(StringView s1, StringView s2, Func&& func) { return s1.withIterator([&s2, &func](auto it1) { return s2.withIterator([&it1, &func](auto it2) { return func(it1, it2); }); }); } constexpr bool SC::StringView::hasCompatibleEncoding(StringView str) const { return StringEncodingAreBinaryCompatible(getEncoding(), str.getEncoding()); } template inline SC::StringView SC::StringView::fromIterators(StringIterator from, StringIterator to, StringEncoding encoding) { const ssize_t numBytes = to.bytesDistanceFrom(from); if (numBytes >= 0) { StringIterator fromEnd = from; fromEnd.setToEnd(); if (fromEnd.bytesDistanceFrom(to) >= 0) // If current iterator of to is inside from range return StringView({from.it, static_cast(numBytes)}, false, encoding); } return StringView(encoding); // TODO: Make StringView::fromIterators return bool to make it fallible } template inline SC::StringView SC::StringView::fromIteratorUntilEnd(StringIterator it, StringEncoding encoding) { StringIterator endIt = it; endIt.setToEnd(); const size_t numBytes = static_cast(endIt.bytesDistanceFrom(it)); return StringView({it.it, numBytes}, false, encoding); } template constexpr SC::StringView SC::StringView::fromIteratorFromStart(StringIterator it, StringEncoding encoding) { StringIterator start = it; start.setToStart(); const size_t numBytes = static_cast(it.bytesDistanceFrom(start)); return StringView({start.it, numBytes}, false, encoding); } namespace SC { struct Console; struct StringFormatOutput; template struct StringFormatterFor { static bool format(StringFormatOutput& data, const StringSpan specifier, const T& value) { return StringFormatterFor<decltype(value.view())>::format(data, specifier, value.view()); }; }; //! @addtogroup group_strings //! @{ /// @brief Allows pushing results of StringFormat to a buffer or to the console struct SC_COMPILER_EXPORT StringFormatOutput { StringFormatOutput(StringEncoding encoding, IGrowableBuffer& buffer) : encoding(encoding), growableBuffer(&buffer) {} /// @brief Constructs a StringFormatOutput object pushing to a console /// @param encoding The given encoding /// @param destination The destination console /// @param useStdOut If `true` output is sent to stdout, otherwise to stderr StringFormatOutput(StringEncoding encoding, Console& destination, bool useStdOut); /// @brief Appends the StringView (eventually converting it) to destination buffer /// @param text The StringView to be appended to buffer or console /// @return `true` if conversion succeeded [[nodiscard]] bool append(StringView text); /// @brief Method to be called when format begins, so that it can be rolled back on failure void onFormatBegin(); /// @brief Method to be called when format fails (will rollback buffer to length before `onFormatBegin`) void onFormatFailed(); /// @brief Method to be called when format succeeds /// @return `true` if IGrowableBuffer is still valid [[nodiscard]] bool onFormatSucceeded(); private: IGrowableBuffer* growableBuffer = nullptr; StringEncoding encoding; bool useStdOut = true; Console* console = nullptr; size_t backupSize = 0; }; /// @brief Formats String with a simple DSL embedded in the format string /// /// This is a small implementation to format using a minimal string based DSL, but good enough for simple usages. /// It uses the same `{}` syntax and supports positional arguments. @n /// `StringFormat::format(output, "{1} {0}", "World", "Hello")` is formatted as `"Hello World"`. @n /// Inside the `{}` after a colon (`:`) a specification string can be used to indicate how to format /// the given value. As the backend for actual number to string formatting is `snprintf`, such specification strings are /// the same as what would be given to snprintf. For example passing `"{:02}"` is transformed to `"%.02f"` when passed /// to snprintf. @n /// `{` is escaped if found near to another `{`. In other words `format("{{")` will print a single `{`. /// /// Example: /// @code{.cpp} /// String buffer(StringEncoding::Ascii); /// SC_TEST_EXPECT(StringBuilder::format(buffer, "{1}_{0}_{1}", 1, 0)); /// SC_TEST_EXPECT(buffer == "0_1_0"); /// SC_TEST_EXPECT(StringBuilder::format(buffer, "{0:.2}_{1}_{0:.4}", 1.2222, "salve")); /// SC_TEST_EXPECT(buffer == "1.22_salve_1.2222"); /// @endcode /// @note It's not convenient to use SC::StringFormat directly, as you should probably use SC::StringBuilder /// @tparam RangeIterator Type of the specific StringIterator used template struct StringFormat { /// @brief Formats fmt StringView using simple DSL where `{}` are replaced with args /// @tparam Types Types of the arguments being formatted /// @param data Destination abstraction (buffer or console) /// @param fmt The format string to be used /// @param args Actual arguments being formatted /// @return `true` if format succeeded template [[nodiscard]] static bool format(StringFormatOutput& data, StringView fmt, Types&&... args); private: struct Implementation; }; //! @} } // namespace SC //----------------------------------------------------------------------------------------------------------------------- // Implementations Details //----------------------------------------------------------------------------------------------------------------------- template struct SC::StringFormat::Implementation { template static bool formatArgument(StringFormatOutput& data, StringView specifier, int position, T&& arg, Rest&&... rest) { if (position == Total - N) { using First = typename TypeTraits::RemoveConst::type>::type; return StringFormatterFor::format(data, specifier, arg); } else { return formatArgument<total, 1="" n="" -="">(data, specifier, position, forward(rest)...); } } template static typename SC::TypeTraits::EnableIf<sizeof...(args) =="0," bool="">::type formatArgument(StringFormatOutput&, StringView, int, Args...) { return false; } template static bool parsePosition(StringFormatOutput& data, RangeIterator& it, int32_t& parsedPosition, Types&&... args) { const auto startOfSpecifier = it; if (it.advanceUntilMatches('}')) // We have an already matched '{' when arriving here { auto specifier = startOfSpecifier.sliceFromStartUntil(it); auto specifierPosition = specifier; if (specifier.advanceUntilMatches(':')) { specifierPosition = startOfSpecifier.sliceFromStartUntil(specifier); (void)specifier.stepForward(); // eat '{' } (void)specifierPosition.stepForward(); // eat '{' (void)it.stepForward(); // eat '}' const StringView positionString = StringView::fromIteratorUntilEnd(specifierPosition); const StringView specifierString = StringView::fromIteratorUntilEnd(specifier); if (not positionString.isEmpty()) { if (not positionString.parseInt32(parsedPosition)) { return false; } } constexpr auto maxArgs = sizeof...(args); return formatArgument<maxargs, maxargs="">(data, specifierString, parsedPosition, forward(args)...); } return false; } template static bool executeFormat(StringFormatOutput& data, RangeIterator it, Types&&... args) { StringCodePoint matchedChar; auto start = it; int32_t position = 0; int32_t maxPosition = 0; while (true) { if (it.advanceUntilMatchesAny({'{', '}'}, matchedChar)) // match start or end of specifier { if (it.isFollowedBy(matchedChar)) SC_LANGUAGE_UNLIKELY // if it's the same matched, let's escape it { (void)it.stepForward(); // we want to make sure we insert the escaped '{' or '}' if (not data.append(StringView::fromIterators(start, it))) return false; (void)it.stepForward(); // we don't want to insert the additional '{' or '}' needed for escaping start = it; } else if (matchedChar == '{') // it's a '{' not followed by itself, so let's parse specifier { if (not data.append(StringView::fromIterators(start, it))) // write everything before '{ return false; // try parse '}' and eventually format int32_t parsedPosition = position; if (not parsePosition(data, it, parsedPosition, forward(args)...)) return false; start = it; position += 1; maxPosition = max(maxPosition, parsedPosition + 1); } else { return false; // arriving here means end of string with as single, unescaped '}' } } else { if (not data.append(StringView::fromIterators(start, it))) // write everything before '{ return false; return maxPosition == static_cast(sizeof...(args)); // check right number of args } } } }; template template bool SC::StringFormat::format(StringFormatOutput& data, StringView fmt, Types&&... args) { SC_TRY(fmt.getEncoding() != StringEncoding::Utf16); // UTF16 format strings are not supported data.onFormatBegin(); if (Implementation::executeFormat(data, fmt.getIterator(), forward(args)...)) SC_LANGUAGE_LIKELY { return data.onFormatSucceeded(); } else { data.onFormatFailed(); return false; } } namespace SC { // clang-format off template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const float);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const double);}; #if SC_COMPILER_MSVC || SC_COMPILER_CLANG_CL #if SC_PLATFORM_64_BIT == 0 template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::ssize_t);}; #endif #else #if !SC_PLATFORM_LINUX template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::size_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::ssize_t);}; #endif #endif template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::int64_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::uint64_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::int32_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::uint32_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::int16_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::uint16_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::int8_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const SC::uint8_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const char);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const bool);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const StringView);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const char*);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const void*);}; #if SC_PLATFORM_WINDOWS template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const wchar_t);}; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const wchar_t*);}; #endif template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const StringSpan);}; struct StringPath; template <> struct SC_COMPILER_EXPORT StringFormatterFor {static bool format(StringFormatOutput&, const StringSpan, const StringPath&);}; // clang-format on template struct StringFormatterFor<char[n]> { static bool format(StringFormatOutput& data, const StringView specifier, const char* str) { const StringView sv({str, N - 1}, true, StringEncoding::Ascii); return StringFormatterFor::format(data, specifier, sv); } }; } // namespace SC namespace SC { struct String; //! @addtogroup group_strings //! @{ /// @brief Writes to console using SC::StringFormat. /// /// Example: /// @code{.cpp} /// // Use a custom buffer UTF conversions on windows (optional) /// char optionalConversionBuffer[512]; /// Console console(optionalConversionBuffer); /// String str = StringView("Test Test\n"); /// // Have fun printing /// console.print(str.view()); /// @endcode struct SC_COMPILER_EXPORT Console { /// @brief Constructs a console with an OPTIONAL conversion buffer used for UTF encoding conversions on Windows /// @param conversionBuffer The optional buffer used for UTF conversions Console(Span conversionBuffer = {}); /// @brief Prints a formatted string using SC::StringFormat to stdout /// @return `true` if message has been printed successfully to Console template bool print(StringSpan fmt, Types&&... args) { return printInternal(true, fmt, forward(args)...); } /// @brief Prints a formatted string using SC::StringFormat to stderr /// @return `true` if message has been printed successfully to Console template bool printError(StringSpan fmt, Types&&... args) { return printInternal(false, fmt, forward(args)...); } /// @brief Prints a string to console void print(const StringSpan str); /// @brief Prints a string to stderr void printError(const StringSpan str); /// @brief Flushes stdout void flush(); /// @brief Flushes stderr void flushStdErr(); /// @brief Prints a string to stdout and adds a newline at the end of it void printLine(const StringSpan str); /// @brief Prints a string to stderr and adds a newline at the end of it void printErrorLine(const StringSpan str); /// @brief Tries attaching current process to parent console (Windows only, has no effect elsewhere) /// @returns `true` if the parent console has been attached (Windows only, returns true elsewhere) static bool tryAttachingToParentConsole(); private: template bool printInternal(bool useStdOut, StringSpan fmt, Types&&... args) { StringFormatOutput output(fmt.getEncoding(), *this, useStdOut); if (fmt.getEncoding() == StringEncoding::Ascii || fmt.getEncoding() == StringEncoding::Utf8) { // It's ok parsing format string '{' and '}' both for utf8 and ascii with StringIteratorASCII // because on a valid UTF8 string, these chars are unambiguously recognizable return StringFormat::format(output, fmt, forward(args)...); } return false; // UTF16/32 format strings are not supported } Span conversionBuffer; #if SC_PLATFORM_WINDOWS void printWindows(const StringSpan str, void* handle); void* hStdOut; void* hStdErr; bool isConsole = true; bool isDebugger = true; #endif }; //! @} } // namespace SC extern SC::Console* globalConsole; #if !defined(SC_LOG_MESSAGE) #define SC_LOG_MESSAGE(fmt, ...) \ if (globalConsole) \ globalConsole->print(fmt, ##__VA_ARGS__) #endif //---------------------------------------------------------------------------------------------------------------------- // Strings/Path.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT namespace SC { struct String; //! @addtogroup group_file_system //! @{ /// @brief Parse and compose filesystem paths for windows and posix struct SC_COMPILER_EXPORT Path { /// @brief Path type (windows or posix) enum Type { AsPosix, AsWindows, #if SC_PLATFORM_WINDOWS AsNative = AsWindows #else AsNative = AsPosix #endif }; /// @brief Holds the various parsed components of a path struct ParsedView { bool endsWithSeparator = false; // TODO: ParsedView::directory and base are not defined consistently Type type = AsPosix; ///< Indicates if this is a windows or posix path StringView root; ///< Ex. `"C:\\"` on windows - `"/"` on posix StringView directory; ///< Ex. `"C:\\dir"` on windows - `"/dir"` on posix StringView base; ///< Ex. `"base"` for `"C:\\dir\\base"` on windows or `"/dir/base"` on posix StringView name; ///< Ex. `"name"` for `"C:\\dir\\name.ext"` on windows or `"/dir/name.ext"` on posix StringView ext; ///< Ex. `"ext"` for `"C:\\dir\\name.ext"` on windows or `"/dir/name.ext"` on posix /// @brief Parses all components on windows input path /// /// For example: /// @code{.cpp} /// path.parseWindows("C:\\ASD\\bbb\\name.ext"); /// path.root == "C:\\"; /// path.directory == "C:\\ASD\\bbb"; /// path.base == "name.ext"; /// path.name == "name"; /// path.ext == "ext"; /// path.endsWithSeparator == false; /// @endcode /// @param input A path in windows form (ex "C:\\directory\name.ext") /// @returns false if both name and extension will be empty after parsing or if parsing name/extension fails [[nodiscard]] bool parseWindows(StringView input); /// @brief Parses all components on posix input path /// /// For example: /// /// @code{.cpp} /// Path::ParsedView path; /// path.parsePosix("/123/456"); /// path.root == "/"; /// path.directory == "/123"; /// path.base == "456"; /// path.endsWithSeparator == false; /// @endcode /// @param input A path in posix form (ex "/directory/name.ext") /// @returns false if both name and extension will be empty after parsing or if parsing name/extension fails [[nodiscard]] bool parsePosix(StringView input); }; /// @brief Joins multiple StringView with a Separator into an output String /// @param[out] output The output string receiving the path /// @param[in] inputs The input paths to join /// @param[in] separator The separator to use. By default `/` on Posix and `\` on Windows /// @param[in] skipEmpty If true will skip empty entries in `inputs` Span /// @return true if the Path was successfully joined template [[nodiscard]] static bool join(T& output, Span inputs, StringView separator = SeparatorStringView(), bool skipEmpty = false) { return join(GrowableBuffer{output}, GrowableBuffer::getEncodingFor(output), inputs, separator, skipEmpty); } [[nodiscard]] static bool join(IGrowableBuffer&& output, StringEncoding encoding, Span inputs, StringView separator = SeparatorStringView(), bool skipEmpty = false); /// @brief Splits a StringView of type "name.ext" into "name" and "ext" /// @param[in] input An input filename (ex. "name.ext") /// @param[out] name Output string holding name ("name" in "name.ext") /// @param[out] extension Output string holding extension ("ext" in "name.ext") /// @returns `false` if both name and extension will be empty after trying to parse them /// /// Example: /// \snippet Tests/Libraries/FileSystem/PathTest.cpp parseNameExtensionSnippet [[nodiscard]] static bool parseNameExtension(const StringView input, StringView& name, StringView& extension); /// @brief Splits a Posix or Windows path into a ParsedView. /// @param[in] input The StringView with path to be parsed /// @param[out] pathView The output parsed ParsedView with all components of path /// @param[in] type Specify to parse as Windows or Posix path /// @return `true` if path was parsed successfully [[nodiscard]] static bool parse(StringView input, Path::ParsedView& pathView, Type type); /// @brief Returns the directory name of a path. Trailing separators are ignored. /// /// For example: /// /// @code{.cpp} /// Path::dirname("/dirname/basename", Path::AsPosix) == "/dirname"; /// Path::dirname("/dirname/basename//", Path::AsPosix) == "/dirname"; /// Path::dirname("C:\\dirname\\basename", Path::AsWindows) == "C:\\dirname"; /// Path::dirname("\\dirname\\basename\\\\", Path::AsWindows) == "\\dirname"; /// @endcode /// @param[in] input The StringView with path to be parsed. Trailing separators are ignored. /// @param[in] type Specify to parse as Windows or Posix path /// @param[in] repeat how many directory levels should be removed `dirname("/1/2/3/4", Path::AsPosix, 1) == "/1/2"` /// @return Substring of `input` holding the directory name [[nodiscard]] static StringView dirname(StringView input, Type type, int repeat = 0); /// @brief Returns the base name of a path. Trailing separators are ignored. /// /// For example: /// /// @code{.cpp} /// Path::basename("/a/basename", Path::AsPosix) == "basename"; /// Path::basename("/a/basename//", Path::AsPosix) == "basename"; /// @endcode /// @param[in] input The StringView with path to be parsed. Trailing separators are ignored. /// @param[in] type Specify to parse as Windows or Posix path /// @return Substring of `input` holding the base name [[nodiscard]] static StringView basename(StringView input, Type type); /// @brief Returns the base name of a path. Suffix is stripped if existing. Trailing separators are ignored. /// /// For example: /// /// @code{.cpp} /// Path::basename("/a/basename.html", ".html") == "basename"; /// @endcode /// @param[in] input The StringView with path to be parsed. Trailing separators are ignored. /// @param[in] suffix The StringView extension (or suffix in general) to strip if existing. /// @return Substring of `input` holding the base name [[nodiscard]] static StringView basename(StringView input, StringView suffix); /// @brief Checks if a path is absolute. /// /// For example: /// /// @code{.cpp} /// Path::isAbsolute("/dirname/basename", Path::AsPosix) == true; // Posix Absolute /// Path::isAbsolute("./dirname/basename", Path::AsPosix) == false; // Posix Relative /// Path::isAbsolute("C:\\dirname\\basename", Path::AsWindows) == true; // Windows with Drive /// Path::isAbsolute("\\\\server\\dir", Path::AsWindows) == true; // Windows with Network /// Path::isAbsolute("\\\\?\\C:\\server\\dir", Path::AsWindows) == true; // Windows with Long /// Path::isAbsolute("..\\dirname\\basename", Path::AsWindows) == false; // Windows relative /// @endcode /// @param[in] input The StringView with path to be parsed. Trailing separators are ignored. /// @param[in] type Specify to parse as Windows or Posix path /// @return `true` if `input` is absolute [[nodiscard]] static bool isAbsolute(StringView input, Type type); struct Windows { static const char Separator = '\\'; [[nodiscard]] static constexpr StringView SeparatorStringView() { return "\\"_a8; }; }; struct Posix { static const char Separator = '/'; [[nodiscard]] static constexpr StringView SeparatorStringView() { return "/"_a8; } }; /// Path separator char for current platform #if SC_PLATFORM_WINDOWS static constexpr char Separator = '\\'; #else static constexpr char Separator = '/'; #endif /// Path separator StringView for current platform #if SC_PLATFORM_WINDOWS [[nodiscard]] static constexpr StringView SeparatorStringView() { return "\\"_a8; }; #else [[nodiscard]] static constexpr StringView SeparatorStringView() { return "/"_a8; } #endif /// @brief Resolves all `..` to output a normalized path String /// /// For example: /// /// @code{.cpp} /// Path::normalize(path, "/Users/SC/../Documents/", Path::AsPosix); /// SC_RELEASE_ASSERT(path == "/Users/Documents"); /// @endcode /// @param[out] output Reference to String that will receive the normalized Path /// @param[in] view The path to be normalized (but it should not be a view() of the output String) /// @param[in] type Specify to parse as Windows or Posix path /// @return `true` if the Path was successfully parsed and normalized template [[nodiscard]] static bool normalize(T& output, StringView view, Type type) { StringView components[numComponents]; return normalize(GrowableBuffer{output}, output.getEncoding(), view, type, components); } [[nodiscard]] static bool normalize(IGrowableBuffer&& output, StringEncoding encoding, StringView view, Type type, Span components); /// @brief Get relative path that appended to `source` resolves to `destination`. /// /// For example: /// /// @code{.cpp} /// Path::relativeFromTo(path, "/a/b/1/2/3", "/a/b/d/e", Path::AsPosix, Path::AsPosix); /// SC_TEST_ASSERT(path == "../../../d/e"); /// @endcode /// @param[out] output The output relative path computed that transforms source into destination /// @param[in] source The source Path /// @param[in] destination The destination Path /// @param[in] type Specify to parse as Windows or Posix path /// @param[in] outputType Specify if the output relative path should be formatted as a Posix or Windows path /// @return `true` if source and destination paths can be properly parsed as absolute paths template [[nodiscard]] static bool relativeFromTo(T& output, StringView source, StringView destination, Type type, Type outputType = AsNative) { return relativeFromTo(GrowableBuffer{output}, output.getEncoding(), source, destination, type, outputType); } [[nodiscard]] static bool relativeFromTo(IGrowableBuffer&& output, StringEncoding encoding, StringView source, StringView destination, Type type, Type outputType = AsNative); /// @brief Append to an existing path a series of StringView with a separator /// @param[out] output The destination string containing the existing path, that will be extended /// @param[in] paths The path components to join, appended to `output` /// @param[in] inputType Specify to append as Windows or Posix path components /// @return `true` if the `output` path can joined properly template [[nodiscard]] static bool append(T& output, Span paths, Type inputType) { return append(GrowableBuffer{output}, GrowableBuffer::getEncodingFor(output), paths, inputType); } [[nodiscard]] static bool append(IGrowableBuffer&& output, StringEncoding encoding, Span paths, Type inputType); /// @brief Check if the path ends with a Windows or Posix separator /// @param[in] path The path to check /// @return `true` if path ends with a separator [[nodiscard]] static bool endsWithSeparator(StringView path); /// @brief Return a path without its (potential) starting separator /// @param[in] path The path to use /// @return A StringView without its initial separator [[nodiscard]] static StringView removeStartingSeparator(StringView path); /// @brief An extended Path::normalize handling a bug with incorrect __FILE__ backslash escape on Windows when /// using UNC Paths, and also removing quote characters '"' added when passing such paths to compiler command line. template [[nodiscard]] static bool normalizeUNCAndTrimQuotes(T& outputPath, StringView fileLocation, Type type, Span components) { return normalizeUNCAndTrimQuotes(GrowableBuffer{outputPath}, outputPath.getEncoding(), fileLocation, type, components); } [[nodiscard]] static bool normalizeUNCAndTrimQuotes(IGrowableBuffer&& outputPath, StringEncoding encoding, StringView fileLocation, Type type, Span components); private: [[nodiscard]] static StringView removeTrailingSeparator(StringView path); struct Internal; }; //! @} } // namespace SC //---------------------------------------------------------------------------------------------------------------------- // Strings/StringBuilder.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT namespace SC { template struct StringBuilderFor; //! @addtogroup group_strings //! @{ /// @brief Builds String out of a sequence of StringView or formatting through StringFormat /// /// The output can be a SC::Buffer or a SC::SmallBuffer (see [Foundation](@ref library_foundation)) /// One can do a: /// - One-shot StringBuilder::format /// - Multiple appends using StringBuilder::create or StringBuilder::createForAppendingTo followed by /// StringBuilder::finalize /// /// StringBuilder::format example: /// @snippet Tests/Libraries/Strings/StringBuilderTest.cpp stringBuilderFormatSnippet /// /// StringBuilder::create example: /// @snippet Tests/Libraries/Strings/StringBuilderTest.cpp stringBuilderTestAppendSnippet struct SC_COMPILER_EXPORT StringBuilder { // clang-format off /// @brief Creates a StringBuilder for the given string or buffer, replacing its current contents template static StringBuilderFor create(T& stringOrBuffer) noexcept { return {stringOrBuffer, StringBuilder::Clear}; } /// @brief Creates a StringBuilder for the given string or buffer, appending to its current contents template static StringBuilderFor createForAppendingTo(T& stringOrBuffer) noexcept { return {stringOrBuffer, StringBuilder::Append}; } /// @brief Helper to format a StringView against args, replacing destination contents, in a single function call template [[nodiscard]] static bool format(T& buffer, StringView fmt, Types&&... args) { return StringBuilder::create(buffer).append(fmt, forward(args)...); } // clang-format on /// @brief Formats the given StringView against args, appending to destination contents template [[nodiscard]] bool append(StringView fmt, Types&&... args) { // It's ok parsing format string '{' and '}' both for utf8 and ascii with StringIteratorASCII // because on a valid UTF8 string, these chars are unambiguously recognizable StringFormatOutput sfo(encoding, *buffer); return StringFormat::format(sfo, fmt, forward(args)...); } /// @brief Appends StringView to destination buffer /// @param str StringView to append to destination buffer [[nodiscard]] bool append(StringView str); /// @brief Appends source to destination buffer, replacing `occurrencesOf` StringView with StringView `with` /// @param source The StringView to be appended /// @param occurrencesOf The StringView to be searched inside `source` /// @param with The replacement StringView to be written in destination buffer /// /// Example: /// @snippet Tests/Libraries/Strings/StringBuilderTest.cpp stringBuilderTestAppendReplaceAllSnippet [[nodiscard]] bool appendReplaceAll(StringView source, StringView occurrencesOf, StringView with); /// @brief Option for StringBuilder::appendHex enum class AppendHexCase { UpperCase, LowerCase, }; /// @brief Appends given binary data escaping it as hexadecimal ASCII characters /// @param data Binary data to append to destination buffer /// @param casing Specifies if it should be appended using upper case or lower case /// @return `true` if append succeeded /// /// Example: /// @snippet Tests/Libraries/Strings/StringBuilderTest.cpp stringBuilderTestAppendHexSnippet [[nodiscard]] bool appendHex(Span data, AppendHexCase casing); protected: enum Flags { Clear, ///< Destination buffer will be cleared before pushing to it Append ///< Destination buffer will not be cleared before pushing to it }; friend struct Path; StringBuilder() = default; StringBuilder(IGrowableBuffer& ibuffer, StringEncoding encoding, Flags flags) noexcept; void initWithEncoding(IGrowableBuffer& bufferT, StringEncoding stringEncoding, Flags flags) noexcept; IGrowableBuffer* buffer = nullptr; StringEncoding encoding; }; /// @brief StringBuilder tied to a specific type, created through StringBuilder::create or /// StringBuilder::createForAppendingTo template struct StringBuilderFor : public StringBuilder { GrowableBuffer growableBuffer; StringView finalizedView; StringBuilderFor(T& stringOrBuffer, Flags flags) noexcept : growableBuffer(stringOrBuffer) { initWithEncoding(growableBuffer, GrowableBuffer::getEncodingFor(stringOrBuffer), flags); } ~StringBuilderFor() noexcept { finalize(); } /// @brief Finalizes the StringBuilder, returning the resulting StringView StringView finalize() noexcept { if (buffer) { growableBuffer.finalize(); finalizedView = {{buffer->data(), buffer->size()}, true, encoding}; buffer = nullptr; } return view(); } /// @brief Returns the resulting StringView after finalize /// @warning This method can be called only after finalize has been called [[nodiscard]] StringView view() noexcept { SC_ASSERT_RELEASE(buffer == nullptr); return finalizedView; } }; //! @} } // namespace SC //---------------------------------------------------------------------------------------------------------------------- // Strings/StringConverter.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT namespace SC { //! @addtogroup group_strings //! @{ /// @brief Converts String to a different encoding (UTF8, UTF16). /// /// SC::StringConverter converts strings between different UTF encodings and can add null-terminator if requested. /// When the SC::StringSpan is already null-terminated, the class just forwards the original SC::StringSpan. /// /// Example: /// \snippet Tests/Libraries/Strings/StringConverterTest.cpp stringConverterTestSnippet struct SC_COMPILER_EXPORT StringConverter { /// @brief Specifies if to add a null terminator enum StringTermination { NullTerminate, ///< A null terminator will be added at the end of the String DoNotTerminate ///< A null terminator will NOT be added at the end of the String }; /// @brief Appends to buffer text with requested encoding, optionally null-terminating it too. /// @param encoding The requested destination encoding to convert to /// @param text The StringSpan to be converted /// @param buffer Encoded text will be appended to buffer /// @param nullTerminate Specifies if the StringSpan will need to be null terminated or not /// @return `true` if the conversion succeeds template [[nodiscard]] static bool appendEncodingTo(StringEncoding encoding, StringSpan text, T& buffer, StringTermination nullTerminate) { GrowableBuffer growableBuffer{buffer}; return appendEncodingTo(encoding, text, static_cast<igrowablebuffer&>(growableBuffer), nullTerminate); } [[nodiscard]] static bool appendEncodingTo(StringEncoding encoding, StringSpan text, IGrowableBuffer& buffer, StringTermination nullTerminate); struct Internal; }; //! @} } // namespace SC #endif // SANE_CPP_STRINGS_HEADER #if defined(SANE_CPP_IMPLEMENTATION) && !defined(SANE_CPP_STRINGS_IMPLEMENTATION) #define SANE_CPP_STRINGS_IMPLEMENTATION 1 //---------------------------------------------------------------------------------------------------------------------- // Strings/Strings.cpp //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT //---------------------------------------------------------------------------------------------------------------------- // Strings/Internal/Console.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #if SC_PLATFORM_WINDOWS #define WIN32_LEAN_AND_MEAN #include #endif #include // stdout SC::Console::Console(Span conversionBuffer) : conversionBuffer(conversionBuffer) { #if SC_PLATFORM_WINDOWS hStdOut = ::GetStdHandle(STD_OUTPUT_HANDLE); hStdErr = ::GetStdHandle(STD_OUTPUT_HANDLE); isConsole = ::GetFileType(hStdOut) == FILE_TYPE_CHAR; isDebugger = ::IsDebuggerPresent() == TRUE; #endif } bool SC::Console::tryAttachingToParentConsole() { #if SC_PLATFORM_WINDOWS return ::AttachConsole(ATTACH_PARENT_PROCESS) == TRUE; #else return true; #endif } void SC::Console::printLine(const StringSpan str) { print(str); print("\n"_a8); } void SC::Console::printErrorLine(const StringSpan str) { printError(str); printError("\n"_a8); } void SC::Console::flush() { #if SC_PLATFORM_WINDOWS ::FlushFileBuffers(hStdOut); #else ::fflush(stdout); #endif } void SC::Console::flushStdErr() { #if SC_PLATFORM_WINDOWS ::FlushFileBuffers(hStdErr); #else ::fflush(stderr); #endif } void SC::Console::print(const StringSpan str) { if (str.isEmpty()) return; #if SC_PLATFORM_WINDOWS printWindows(str, hStdOut); #else ::fwrite(str.bytesWithoutTerminator(), sizeof(char), str.sizeInBytes(), stdout); #endif } void SC::Console::printError(const StringSpan str) { if (str.isEmpty()) return; #if SC_PLATFORM_WINDOWS printWindows(str, hStdErr); #else ::fwrite(str.bytesWithoutTerminator(), sizeof(char), str.sizeInBytes(), stderr); #endif } #if SC_PLATFORM_WINDOWS void SC::Console::printWindows(const StringSpan str, void* handle) { char fallbackBuffer[256]; if (conversionBuffer.sizeInBytes() < 256) { conversionBuffer = fallbackBuffer; } if (!isConsole) { if (str.getEncoding() == StringEncoding::Utf16) { // copy chunks of wide chars from str to conversion buffer, convert to utf8, looping until all are written const size_t totalNumWChars = str.sizeInBytes() / sizeof(wchar_t); size_t numWrittenWChars = 0; const size_t maxInputWChars = conversionBuffer.sizeInBytes() / 2; while (numWrittenWChars < totalNumWChars) { const size_t remainingWChars = totalNumWChars - numWrittenWChars; size_t numToConvertWChars = remainingWChars < maxInputWChars ? remainingWChars : maxInputWChars; int numChars = 0; // reduce number of wide chars to convert until conversion succeeds while (numToConvertWChars > 0) { numChars = ::WideCharToMultiByte( CP_UTF8, 0, reinterpret_cast(str.bytesWithoutTerminator()) + numWrittenWChars, static_cast(numToConvertWChars), reinterpret_cast<char*>(conversionBuffer.data()), static_cast(conversionBuffer.sizeInBytes() - 1), nullptr, 0); if (numChars > 0) { break; } numToConvertWChars--; } if (numChars > 0) { conversionBuffer.data()[numChars] = 0; // null terminator ::WriteFile(handle, conversionBuffer.data(), static_cast(numChars), nullptr, nullptr); } numWrittenWChars += numToConvertWChars; } } else { ::WriteFile(handle, str.bytesWithoutTerminator(), static_cast(str.sizeInBytes()), nullptr, nullptr); } } if (isConsole or isDebugger) { if (str.getEncoding() == StringEncoding::Ascii) { if (isConsole) { ::WriteConsoleA(handle, str.bytesWithoutTerminator(), static_cast(str.sizeInBytes()), nullptr, nullptr); } #if SC_CONFIGURATION_DEBUG if (isDebugger) { if (str.isNullTerminated()) { ::OutputDebugStringA(str.bytesIncludingTerminator()); } else { // copy chunks of bytes from str to conversion buffer looping until all of them are written size_t numWritten = 0; while (numWritten < str.sizeInBytes()) { const size_t numToWrite = (str.sizeInBytes() - numWritten) < (conversionBuffer.sizeInBytes() - 1) ? (str.sizeInBytes() - numWritten) : (conversionBuffer.sizeInBytes() - 1); ::memcpy(conversionBuffer.data(), str.bytesWithoutTerminator() + numWritten, numToWrite); conversionBuffer[numToWrite] = 0; // null terminator ::OutputDebugStringA(conversionBuffer.data()); numWritten += numToWrite; } } } #endif } else if (str.getEncoding() == StringEncoding::Utf16) { if (isConsole) { ::WriteConsoleW(handle, reinterpret_cast(str.bytesWithoutTerminator()), static_cast(str.sizeInBytes() / sizeof(wchar_t)), nullptr, nullptr); } #if SC_CONFIGURATION_DEBUG if (isDebugger) { if (str.isNullTerminated()) { ::OutputDebugStringW(str.getNullTerminatedNative()); } else { // copy chunks of bytes from str to conversion buffer looping until all of them are written wchar_t* buffer = reinterpret_cast<wchar_t*>(conversionBuffer.data()); const size_t bufferSizeInWChars = conversionBuffer.sizeInBytes() / sizeof(wchar_t); size_t numWrittenWChars = 0; const size_t totalNumToWriteWChars = str.sizeInBytes() / sizeof(wchar_t); while (numWrittenWChars < totalNumToWriteWChars) { // copy just a chunk of bytes to buffer and null terminate it, updating numWrittenWChars const size_t remainingWChars = totalNumToWriteWChars - numWrittenWChars; const size_t numToWriteWChars = remainingWChars < (bufferSizeInWChars - 1) ? remainingWChars : bufferSizeInWChars - 1; ::memcpy(buffer, reinterpret_cast(str.bytesWithoutTerminator()) + numWrittenWChars, numToWriteWChars * sizeof(wchar_t)); buffer[numToWriteWChars] = 0; // null terminator numWrittenWChars += numToWriteWChars; ::OutputDebugStringW(buffer); } } } #endif } else if (str.getEncoding() == StringEncoding::Utf8) { // convert to wide char and use OutputDebugStringW / WriteConsoleW wchar_t* buffer = reinterpret_cast<wchar_t*>(conversionBuffer.data()); const size_t bufferWChars = conversionBuffer.sizeInBytes() / sizeof(wchar_t); size_t numWrittenBytes = 0; while (numWrittenBytes < str.sizeInBytes()) { // convert just a chunk of bytes to wide char buffer, ensuring not to cut UTF8 characters, and null // terminate it convert the maximum number of bytes that can fit in the wide char buffer const size_t remainingBytes = str.sizeInBytes() - numWrittenBytes; size_t numToWrite = remainingBytes < (bufferWChars - 1) * 2 ? remainingBytes : (bufferWChars - 1) * 2; // Find the largest numToWrite that forms complete UTF8 characters int numWideChars = 0; while (numToWrite > 0) { numWideChars = ::MultiByteToWideChar( CP_UTF8, 0, str.bytesWithoutTerminator() + numWrittenBytes, static_cast(numToWrite), buffer, static_cast(bufferWChars - 1)); // -1 to leave space for null terminator if (numWideChars > 0) { break; } numToWrite--; } if (numWideChars > 0) { buffer[numWideChars] = 0; #if SC_CONFIGURATION_DEBUG if (isDebugger) { ::OutputDebugStringW(buffer); } #endif if (isConsole) { ::WriteConsoleW(handle, buffer, static_cast(numWideChars), nullptr, nullptr); } } numWrittenBytes += numToWrite; } } } if (conversionBuffer.data() == fallbackBuffer) { conversionBuffer = {}; } } #endif //---------------------------------------------------------------------------------------------------------------------- // Strings/Internal/Path.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #if SC_COMPILER_MSVC #endif struct SC::Path::Internal { // Parses a windows drive (Example C:\\) static StringView parseWindowsRoot(StringView input) { auto func = [](auto it) { const auto itBackup = it; StringCodePoint letter; if (it.advanceRead(letter)) { if ((letter >= 'a' and letter <= 'z') or (letter >= 'A' and letter <= 'Z')) { if (it.advanceIfMatches(':') and it.advanceIfMatchesAny({'\\', '/'})) { return StringView::fromIterators(itBackup, it); } } // Try parsing UNC path it = itBackup; if (it.advanceIfMatches('\\') and it.advanceIfMatches('\\')) { auto itCheckpoint = it; // Try parsing long path form that includes ? and another backslash if (it.advanceIfMatches('?') and it.advanceIfMatches('\\')) { return StringView::fromIterators(itBackup, it); } return StringView::fromIterators(itBackup, itCheckpoint); } else if (it.advanceIfMatches('/') and it.advanceIfMatches('/')) { auto itCheckpoint = it; // Try parsing long path form that includes ? and another backslash if (it.advanceIfMatches('?') and it.advanceIfMatches('/')) { return StringView::fromIterators(itBackup, it); } return StringView::fromIterators(itBackup, itCheckpoint); } } return StringView(); }; return input.withIterator(func); } // Parses a posix root (Example /) static StringView parsePosixRoot(StringView input) { auto func = [](auto it) { if (it.advanceIfMatches('/')) { return StringView::fromIteratorFromStart(it); } return StringView(); }; return input.withIterator(func); } template static StringView parseBase(StringView input) { auto func = [](auto it) { it.setToEnd(); StringCodePoint matched; (void)it.reverseAdvanceUntilMatchesAny({separator1, separator2}, matched); if (it.isAtStart()) it.setToEnd(); else (void)it.stepForward(); return StringView::fromIteratorUntilEnd(it); }; return input.withIterator(func); } template static bool rootIsFollowedByOnlySeparators(const StringView input, const StringView root) { if (not input.startsWith(root)) return false; StringView remaining; if (not input.splitAfter(root, remaining)) return false; auto func = [](auto it) { auto itBackup = it; if (not it.advanceUntilDifferentFrom(separator1)) { it = itBackup; (void)it.advanceUntilDifferentFrom(separator2); } return it.isAtEnd(); }; return remaining.withIterator(func); } template static StringView parseDirectory(const StringView input, const StringView root) { auto func = [&](auto it) { const auto itBackup = it; it.setToEnd(); StringCodePoint matched; if (it.reverseAdvanceUntilMatchesAny({separator1, separator2}, matched)) { const StringView directory = StringView::fromIterators(itBackup, it); if (directory.isEmpty()) { return root; } if (rootIsFollowedByOnlySeparators<separator1, separator2="">(input, root)) { return input; } return directory; } return StringView(); }; return input.withIterator(func); } template static SC::StringView dirname(StringView input, int repeat) { do { StringView dirnameOut; StringView base = basename<separator1, separator2="">(input, &dirnameOut); if (dirnameOut.isEmpty()) { return "."_a8; } else { input = dirnameOut; } } while (repeat-- > 0); return input; } template static SC::StringView basename(StringView input, StringView* dir = nullptr) { auto func = [&](auto it) { it.setToEnd(); while (it.stepBackward() and it.match(separator1)) {} auto itEnd = it; (void)itEnd.stepForward(); StringCodePoint matched; if (it.reverseAdvanceUntilMatchesAny({separator1, separator2}, matched)) { if (dir) { *dir = StringView::fromIteratorFromStart(it); } (void)it.stepForward(); return StringView::fromIterators(it, itEnd); } return input; }; return input.withIterator(func); } template static SC::StringView basename(StringView input, StringView suffix) { const StringView name = basename<separator1, separator2="">(input); auto func = [&](auto inputIt, auto suffixIt) -> StringView { inputIt.setToEnd(); suffixIt.setToEnd(); StringCodePoint c; while (suffixIt.advanceBackwardRead(c)) { if (not inputIt.advanceBackwardIfMatches(c)) return name; // Suffix not found } if (suffixIt.isAtStart()) { return StringView::fromIteratorFromStart(inputIt); // Suffix found entirely } return name; // Suffix not found entirely }; return StringView::withIterators(name, suffix, func); } /// Splits a Windows path of type "C:\\directory\\base" into root=C:\\ - directory=C:\\directory\\ - base=base /// @param input Windows path of the form "C:\directory\base" /// @param root if input == "C:\\directory\\base" it would return "C:\\" /// @param directory if input == "C:\\directory\\base" it would return "C:\\directory\\" /// @param base if input == "C:\\directory\\base" it would return "base" /// @param endsWithSeparator true if the path ends with a backslash [[nodiscard]] static bool parseWindows(StringView input, StringView& root, StringView& directory, StringView& base, bool& endsWithSeparator) { // Parse the drive, then look for rightmost backslash separator to get directory // Everything after it will be the base. root = Internal::parseWindowsRoot(input); directory = Internal::parseDirectory<'\\', '/'>(input, root); if (root.startsWith(directory)) { if (root.endsWithAnyOf({'\\', '/'})) directory = root; } base = Internal::parseBase<'\\', '/'>(input); endsWithSeparator = input.endsWithAnyOf({'\\', '/'}); return !(root.isEmpty() && directory.isEmpty()); } /// Splits a Posix path of type "/usr/dir/base" into root=/ - directory=/usr/dir - base=base /// @param input Posix path of the form "/usr/dir/base" /// @param root if input == "/usr/dir/base" it would return "/" /// @param directory if input == "/usr/dir/base" it would return "/usr/dir'" /// @param base if input == "/usr/dir/base" it would return "base" /// @param endsWithSeparator true if the path ends with a backslash [[nodiscard]] static bool parsePosix(StringView input, StringView& root, StringView& directory, StringView& base, bool& endsWithSeparator) { root = Internal::parsePosixRoot(input); directory = Internal::parseDirectory<'/', '/'>(input, root); base = Internal::parseBase<'/', '/'>(input); endsWithSeparator = input.endsWithAnyOf({'/'}); return !(root.isEmpty() && directory.isEmpty()); } }; bool SC::Path::parseNameExtension(const StringView input, StringView& name, StringView& extension) { return input.withIterator( [&](auto it) { auto itBackup = it; // Try searching for a '.' but if it's not found then just set the entire content // to be the name. it.setToEnd(); if (it.reverseAdvanceUntilMatches('.')) { name = StringView::fromIterators(itBackup, it); // from 'name.ext' keep 'name' (void)it.stepForward(); // skip the . extension = StringView::fromIteratorUntilEnd(it); // from 'name.ext' keep 'ext' } else { name = input; extension = StringView(); } return !(name.isEmpty() && extension.isEmpty()); }); } bool SC::Path::parse(StringView input, Path::ParsedView& pathView, Type type) { switch (type) { case AsWindows: return pathView.parseWindows(input); case AsPosix: return pathView.parsePosix(input); } Assert::unreachable(); } bool SC::Path::ParsedView::parseWindows(StringView input) { if (!Path::Internal::parseWindows(input, root, directory, base, endsWithSeparator)) { return false; } if (!base.isEmpty()) { if (!Path::parseNameExtension(base, name, ext)) return false; } type = AsWindows; return true; } bool SC::Path::ParsedView::parsePosix(StringView input) { if (!Path::Internal::parsePosix(input, root, directory, base, endsWithSeparator)) { return false; } if (!base.isEmpty()) { if (!Path::parseNameExtension(base, name, ext)) return false; } type = AsPosix; return true; } SC::StringView SC::Path::dirname(StringView input, Type type, int repeat) { switch (type) { case AsWindows: return Internal::dirname<windows::separator, posix::separator="">(input, repeat); case AsPosix: return Internal::dirname<posix::separator, posix::separator="">(input, repeat); } Assert::unreachable(); } SC::StringView SC::Path::basename(StringView input, Type type) { switch (type) { case AsWindows: return Internal::basename<windows::separator, posix::separator="">(input); case AsPosix: return Internal::basename<posix::separator, posix::separator="">(input); } Assert::unreachable(); } SC::StringView SC::Path::basename(StringView input, StringView suffix) { return Internal::basename<windows::separator, posix::separator="">(input, suffix); } bool SC::Path::isAbsolute(StringView input, Type type) { switch (type) { case AsPosix: return input.startsWithAnyOf({'/'}); case AsWindows: return not Internal::parseWindowsRoot(input).isEmpty(); } Assert::unreachable(); } bool SC::Path::join(IGrowableBuffer&& output, StringEncoding encoding, Span inputs, StringView separator, bool skipEmpty) { StringBuilder sb(output, encoding, StringBuilder::Clear); const size_t numElements = inputs.sizeInElements(); for (size_t idx = 0; idx < numElements; ++idx) { const StringView element = inputs.data()[idx]; if (skipEmpty and element.isEmpty()) continue; SC_TRY(sb.append(element)); if (idx + 1 != numElements) { SC_TRY(sb.append(separator)); } } return true; } bool SC::Path::normalizeUNCAndTrimQuotes(IGrowableBuffer&& outputPath, StringEncoding encoding, StringView fileLocation, Type type, Span components) { // The macro escaping Library Path from defines adds escaped double quotes fileLocation = fileLocation.trimAnyOf({'"'}); #if SC_COMPILER_MSVC StringPath fixUncPathsOnMSVC; if (fileLocation.startsWithAnyOf({'\\'}) and not fileLocation.startsWith("\\\\")) { // On MSVC __FILE__ when building on UNC paths reports a single starting backslash... SC_TRY(fixUncPathsOnMSVC.append("\\")); SC_TRY(fixUncPathsOnMSVC.append(fileLocation)); fileLocation = fixUncPathsOnMSVC.view(); } #endif SC_TRY(Path::normalize(move(outputPath), encoding, fileLocation, type, components)); return true; } bool SC::Path::normalize(IGrowableBuffer&& output, StringEncoding encoding, StringView view, Type type, Span components) { if (view.isEmpty()) return false; auto newView = removeTrailingSeparator(view); if (not newView.isEmpty()) { view = newView; } else { switch (type) { case AsWindows: view = "\\\\"; break; case AsPosix: view = "/"; break; } StringBuilder sb(output, encoding, StringBuilder::Clear); return sb.append(view); } bool normalizationHappened = false; StringViewTokenizer tokenizer(view); auto isDoubleDot = [](StringView it) -> bool { constexpr StringView utf8dot = ".."; #if SC_PLATFORM_WINDOWS constexpr StringView utf16dot = L".."; return it == (it.getEncoding() == StringEncoding::Utf16 ? utf16dot : utf8dot); #else return it == utf8dot; #endif }; auto isDot = [&](StringView it) -> bool { constexpr StringView utf8dot = "."; #if SC_PLATFORM_WINDOWS constexpr StringView utf16dot = L"."; return it == (it.getEncoding() == StringEncoding::Utf16 ? utf16dot : utf8dot); #else return it == utf8dot; #endif }; // Need to IncludeEmpty in order to preserve starting / size_t numComponents = 0; StringView remaining = tokenizer.remaining; while (tokenizer.tokenizeNext({'/', '\\'}, StringViewTokenizer::IncludeEmpty)) { const auto component = tokenizer.component; if (not component.isEmpty()) { tokenizer = remaining; break; } remaining = tokenizer.remaining; numComponents++; } while (tokenizer.tokenizeNext({'/', '\\'})) { const auto component = tokenizer.component; if (tokenizer.splittingCharacter == '\\' and type == Type::AsPosix) { normalizationHappened = true; } else if (tokenizer.splittingCharacter == '/' and type == Type::AsWindows) { normalizationHappened = true; } if (isDoubleDot(component)) { if (numComponents == 0 or isDoubleDot(components[numComponents - 1])) { if (numComponents >= components.sizeInElements()) { return false; } components[numComponents] = component; numComponents += 1; } else { numComponents -= 1; } normalizationHappened = true; } else if (isDot(component)) { normalizationHappened = true; } else { if (numComponents >= components.sizeInElements()) { return false; } components[numComponents] = component; numComponents += 1; } } normalizationHappened = normalizationHappened or (tokenizer.numSplitsTotal != tokenizer.numSplitsNonEmpty); if (normalizationHappened) { StringBuilder sb(output, encoding, StringBuilder::Clear); Span inputs = {components.data(), numComponents}; // Preserve UNC start '\\' even when normalizing as posix paths if (view.startsWith("\\\\")) { SC_TRY(sb.append("\\\\")) inputs = {inputs.data() + 2, numComponents - 2}; // first two components are empty } auto separator = type == AsPosix ? Posix::SeparatorStringView() : Windows::SeparatorStringView(); const size_t numElements = inputs.sizeInElements(); for (size_t idx = 0; idx < numElements; ++idx) { const StringView element = inputs.data()[idx]; SC_TRY(sb.append(element)); if (idx + 1 != numElements) { SC_TRY(sb.append(separator)); } } return true; } else { return StringBuilder(output, encoding, StringBuilder::Clear).append(view); } } bool SC::Path::relativeFromTo(IGrowableBuffer&& output, StringEncoding encoding, StringView source, StringView destination, Type inputType, Type outputType) { bool skipRelativeCheck = false; if (inputType == AsPosix) { if (source.startsWith("\\\\")) { source = source.sliceStart(2); skipRelativeCheck = true; } if (destination.startsWith("\\\\")) { destination = destination.sliceStart(2); skipRelativeCheck = true; } } if (!skipRelativeCheck) { Path::ParsedView pathViewSource, pathViewDestination; SC_TRY(Path::parse(source, pathViewSource, inputType)); SC_TRY(Path::parse(destination, pathViewDestination, inputType)); if (pathViewSource.root.isEmpty() or pathViewDestination.root.isEmpty()) return false; // Relative paths are not supported } if (source == destination) { return StringBuilder(output, encoding, StringBuilder::Clear).append("."); } const StringView separator = outputType == AsWindows ? Windows::SeparatorStringView() : Posix::SeparatorStringView(); StringViewTokenizer srcTokenizer(source); StringViewTokenizer dstTokenizer(destination); size_t numMatches = 0; size_t numSeparators = 0; StringBuilder builder(output, encoding, StringBuilder::Clear); StringView destRemaining = destination; while (srcTokenizer.tokenizeNext({'/', '\\'}, StringViewTokenizer::IncludeEmpty)) { if (not dstTokenizer.tokenizeNext({'/', '\\'}, StringViewTokenizer::IncludeEmpty) or (srcTokenizer.component != dstTokenizer.component)) { numSeparators++; SC_TRY(builder.append("..")); break; } destRemaining = dstTokenizer.remaining; numMatches++; } if (numMatches == 0) { return false; // no common part } while (srcTokenizer.tokenizeNext({'/', '\\'}, StringViewTokenizer::SkipEmpty)) { if (numSeparators > 0) { SC_TRY(builder.append(separator)); } numSeparators++; SC_TRY(builder.append("..")); } StringView destToAppend = Path::removeTrailingSeparator(destRemaining); if (not destToAppend.isEmpty()) { if (numSeparators > 0) { SC_TRY(builder.append(separator)); } SC_TRY(builder.append(Path::removeTrailingSeparator(destRemaining))); } return true; } SC::StringView SC::Path::removeTrailingSeparator(StringView path) { return path.trimEndAnyOf({'/', '\\'}); } SC::StringView SC::Path::removeStartingSeparator(StringView path) { return path.trimStartAnyOf({'/', '\\'}); } bool SC::Path::endsWithSeparator(StringView path) { return path.endsWithAnyOf({'/', '\\'}); } bool SC::Path::append(IGrowableBuffer&& output, StringEncoding encoding, Span paths, Type type) { StringBuilder builder(output, encoding, StringBuilder::Append); for (auto& path : paths) { if (isAbsolute(path, type)) { return false; } switch (type) { case AsWindows: SC_TRY(builder.append(Windows::SeparatorStringView())); break; case AsPosix: SC_TRY(builder.append(Posix::SeparatorStringView())); break; } SC_TRY(builder.append(removeTrailingSeparator(path))); } return true; } //---------------------------------------------------------------------------------------------------------------------- // Strings/Internal/StringBuilder.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT namespace SC { StringBuilder::StringBuilder(IGrowableBuffer& ibuffer, StringEncoding encoding, Flags flags) noexcept { initWithEncoding(ibuffer, encoding, flags); } void StringBuilder::initWithEncoding(IGrowableBuffer& ibuffer, StringEncoding stringEncoding, Flags flags) noexcept { encoding = stringEncoding; buffer = &ibuffer; if (flags == Clear) { if (buffer) buffer->clear(); } } bool StringBuilder::append(StringView str) { if (buffer == nullptr) return false; if (str.isEmpty()) return true; return StringConverter::appendEncodingTo(encoding, str, *buffer, StringConverter::DoNotTerminate); } bool StringBuilder::appendReplaceAll(StringView source, StringView occurrencesOf, StringView with) { if (buffer == nullptr) return false; SC_ASSERT_RELEASE(occurrencesOf.hasCompatibleEncoding(with)); if (source.isEmpty()) { return true; } if (occurrencesOf.isEmpty()) { return append(source); } StringView current = source; auto func = [&](auto sourceIt, auto occurrencesIt) -> bool { bool matches; do { sourceIt = current.getIterator<decltype(sourceit)>(); matches = sourceIt.advanceBeforeFinding(occurrencesIt); SC_TRY(append(StringView::fromIteratorFromStart(sourceIt))); if (matches) { SC_TRY(append(with)); sourceIt = current.getIterator<decltype(sourceit)>(); matches = sourceIt.advanceAfterFinding(occurrencesIt); // TODO: advanceBeforeAndAfterFinding? current = StringView::fromIteratorUntilEnd(sourceIt); } } while (matches); return true; }; SC_TRY(StringView::withIterators(current, occurrencesOf, func)); return append(current); } bool StringBuilder::appendHex(Span data, AppendHexCase casing) { if (buffer == nullptr) return false; if (encoding == StringEncoding::Utf16) return false; // TODO: Support appendHex for UTF16 const size_t previousSize = buffer->size(); SC_TRY(buffer->resizeWithoutInitializing(buffer->size() + data.sizeInBytes() * 2)); const size_t sizeInBytes = data.sizeInBytes(); const uint8_t* sourceBytes = data.data(); char* destination = buffer->data(); for (size_t idx = 0; idx < sizeInBytes; idx++) { constexpr char bytesUpper[] = "0123456789ABCDEF"; constexpr char bytesLower[] = "0123456789abcdef"; switch (casing) { case AppendHexCase::UpperCase: destination[previousSize + idx * 2 + 0] = bytesUpper[sourceBytes[idx] >> 4]; destination[previousSize + idx * 2 + 1] = bytesUpper[sourceBytes[idx] & 0x0F]; break; case AppendHexCase::LowerCase: destination[previousSize + idx * 2 + 0] = bytesLower[sourceBytes[idx] >> 4]; destination[previousSize + idx * 2 + 1] = bytesLower[sourceBytes[idx] & 0x0F]; break; } } return true; } } // namespace SC //---------------------------------------------------------------------------------------------------------------------- // Strings/Internal/StringConverter.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #if SC_PLATFORM_WINDOWS #define WIN32_LEAN_AND_MEAN #include #elif SC_PLATFORM_APPLE #include #endif struct SC::StringConverter::Internal { // Fallbacks for platforms without an API to do the conversion out of the box (Linux) [[nodiscard]] static bool convertUTF16LE_to_UTF8(const StringSpan sourceUtf16, IGrowableBuffer& destination, int& writtenCodeUnits); [[nodiscard]] static bool convertUTF8_to_UTF16LE(const StringSpan sourceUtf8, IGrowableBuffer& destination, int& writtenCodeUnits); [[nodiscard]] static bool convertSameEncoding(StringSpan text, IGrowableBuffer& buffer, StringTermination terminate); [[nodiscard]] static bool convertEncodingToUTF8(StringSpan text, IGrowableBuffer& ibuffer, StringTermination terminate); [[nodiscard]] static bool convertEncodingToUTF16(StringSpan text, IGrowableBuffer& ibuffer, StringTermination terminate); static void eventuallyNullTerminate(IGrowableBuffer& buffer, StringEncoding destinationEncoding, StringTermination terminate); struct ConverterBuffer : public IGrowableBuffer { size_t size() const { return directAccess.sizeInBytes; } char* data() const { return static_cast<char*>(directAccess.data); } [[nodiscard]] bool append(Span span, size_t extraZeroes = 0) { const size_t oldSize = size(); const size_t newSize = oldSize + span.sizeInBytes() + extraZeroes; SC_TRY(resizeWithoutInitializing(newSize)); if (not span.empty()) { ::memcpy(data() + oldSize, span.data(), span.sizeInBytes()); } if (extraZeroes > 0) { ::memset(data() + oldSize + span.sizeInBytes(), 0, extraZeroes); } return true; } }; }; bool SC::StringConverter::Internal::convertSameEncoding(StringSpan text, IGrowableBuffer& ibuffer, StringTermination terminate) { Internal::ConverterBuffer& buffer = static_castinternal::converterbuffer&(ibuffer); const size_t nullBytes = terminate == NullTerminate ? StringEncodingGetSize(text.getEncoding()) : 0; SC_TRY(buffer.append(text.toCharSpan(), nullBytes)); return true; } void SC::StringConverter::Internal::eventuallyNullTerminate(IGrowableBuffer& ibuffer, StringEncoding destinationEncoding, StringConverter::StringTermination terminate) { Internal::ConverterBuffer& buffer = static_castinternal::converterbuffer&(ibuffer); const auto destinationCharSize = StringEncodingGetSize(destinationEncoding); if (terminate == StringConverter::NullTerminate) { char* bufferData = buffer.data(); size_t size = buffer.size(); for (uint32_t idx = 0; idx < destinationCharSize; ++idx) { bufferData[size - idx - 1] = 0; // null terminator } } } bool SC::StringConverter::Internal::convertEncodingToUTF8(StringSpan text, IGrowableBuffer& ibuffer, StringTermination terminate) { Internal::ConverterBuffer& buffer = static_castinternal::converterbuffer&(ibuffer); if (text.getEncoding() == StringEncoding::Utf8 || text.getEncoding() == StringEncoding::Ascii) { return Internal::convertSameEncoding(text, buffer, terminate); } else if (text.getEncoding() == StringEncoding::Utf16) { const bool nullTerminate = terminate == NullTerminate; const auto oldSize = buffer.size(); #if SC_PLATFORM_WINDOWS const int sourceSizeInBytes = static_cast(text.sizeInBytes()); const wchar_t* source = reinterpret_cast(text.bytesWithoutTerminator()); const int numChars = WideCharToMultiByte(CP_UTF8, 0, source, sourceSizeInBytes / sizeof(uint16_t), nullptr, 0, nullptr, 0); #elif SC_PLATFORM_APPLE const int sourceSizeInBytes = static_cast(text.sizeInBytes()); CFIndex numChars = 0; const UInt8* source = reinterpret_cast(text.bytesWithoutTerminator()); CFStringRef tmpStr = CFStringCreateWithBytes(kCFAllocatorDefault, source, sourceSizeInBytes, kCFStringEncodingUTF16, false); auto deferDeleteTempString = MakeDeferred([&] { CFRelease(tmpStr); }); CFRange charRange = {0, CFStringGetLength(tmpStr)}; CFStringGetBytes(tmpStr, charRange, kCFStringEncodingUTF8, 0, false, NULL, 0, &numChars); #else int numChars = -1; SC_TRY(Internal::convertUTF16LE_to_UTF8(text, buffer, numChars)); #endif if (numChars <= 0) { return false; } const size_t newSize = oldSize + (static_cast(numChars) + (nullTerminate ? 1 : 0)); SC_TRY(buffer.resizeWithoutInitializing(newSize)); #if SC_PLATFORM_WINDOWS WideCharToMultiByte(CP_UTF8, 0, source, sourceSizeInBytes / sizeof(uint16_t), reinterpret_cast<char*>(buffer.data() + oldSize), numChars, nullptr, 0); #elif SC_PLATFORM_APPLE CFStringGetBytes(tmpStr, charRange, kCFStringEncodingUTF8, 0, false, reinterpret_cast<uint8*>(buffer.data() + oldSize), numChars, NULL); #endif Internal::eventuallyNullTerminate(buffer, StringEncoding::Utf8, terminate); return true; } return false; } bool SC::StringConverter::Internal::convertEncodingToUTF16(StringSpan text, IGrowableBuffer& ibuffer, StringTermination terminate) { Internal::ConverterBuffer& buffer = static_castinternal::converterbuffer&(ibuffer); if (text.getEncoding() == StringEncoding::Utf16) { return Internal::convertSameEncoding(text, buffer, terminate); } else if (text.getEncoding() == StringEncoding::Utf8 || text.getEncoding() == StringEncoding::Ascii) { const StringEncoding destinationEncoding = StringEncoding::Utf16; const bool nullTerminate = terminate == NullTerminate; const auto destinationCharSize = StringEncodingGetSize(destinationEncoding); const auto oldSize = buffer.size(); #if SC_PLATFORM_WINDOWS const int sourceSizeInBytes = static_cast(text.sizeInBytes()); const int writtenCodeUnits = MultiByteToWideChar(CP_UTF8, 0, text.bytesWithoutTerminator(), sourceSizeInBytes, nullptr, 0); #elif SC_PLATFORM_APPLE const int sourceSizeInBytes = static_cast(text.sizeInBytes()); CFIndex numChars = 0; const UInt8* source = reinterpret_cast(text.bytesWithoutTerminator()); CFStringRef tmpStr = CFStringCreateWithBytes(kCFAllocatorDefault, source, sourceSizeInBytes, kCFStringEncodingUTF8, false); auto deferDeleteTempString = MakeDeferred([&] { CFRelease(tmpStr); }); CFRange charRange = {0, CFStringGetLength(tmpStr)}; CFStringGetBytes(tmpStr, charRange, kCFStringEncodingUTF16, 0, false, NULL, 0, &numChars); CFIndex writtenCodeUnits = numChars / static_cast(destinationCharSize); #else int writtenCodeUnits = -1; SC_TRY(Internal::convertUTF8_to_UTF16LE(text, buffer, writtenCodeUnits)); #endif if (writtenCodeUnits <= 0) { return false; } const size_t newSize = oldSize + (static_cast(writtenCodeUnits) + (nullTerminate ? 1 : 0)) * destinationCharSize; SC_TRY(buffer.resizeWithoutInitializing(newSize)); #if SC_PLATFORM_WINDOWS MultiByteToWideChar(CP_UTF8, 0, text.bytesWithoutTerminator(), static_cast(text.sizeInBytes()), reinterpret_cast<wchar_t*>(buffer.data() + oldSize), writtenCodeUnits); #elif SC_PLATFORM_APPLE CFStringGetBytes(tmpStr, charRange, kCFStringEncodingUTF16, 0, false, reinterpret_cast<uint8*>(buffer.data() + oldSize), numChars, NULL); #endif Internal::eventuallyNullTerminate(buffer, destinationEncoding, terminate); return true; } return false; } bool SC::StringConverter::appendEncodingTo(StringEncoding encoding, StringSpan text, IGrowableBuffer& buffer, StringTermination terminate) { if (text.isEmpty()) { if (terminate == NullTerminate) { return static_castinternal::converterbuffer&(buffer).append({}, StringEncodingGetSize(encoding)); } return true; } switch (encoding) { case StringEncoding::Ascii: return Internal::convertEncodingToUTF8(text, buffer, terminate); case StringEncoding::Utf8: return Internal::convertEncodingToUTF8(text, buffer, terminate); case StringEncoding::Utf16: return Internal::convertEncodingToUTF16(text, buffer, terminate); } return false; } #if !SC_PLATFORM_WINDOWS && !SC_PLATFORM_APPLE bool SC::StringConverter::Internal::convertUTF8_to_UTF16LE(const StringSpan sourceUtf8, IGrowableBuffer& ibuffer, int& writtenCodeUnits) { Internal::ConverterBuffer& buffer = static_castinternal::converterbuffer&(ibuffer); const char* utf8 = sourceUtf8.bytesWithoutTerminator(); const size_t utf8Len = sourceUtf8.sizeInBytes(); // Calculate the maximum possible size for the UTF-16 string // Each UTF-8 character can be represented by at most one UTF-16 code unit SC_TRY(buffer.resizeWithoutInitializing(utf8Len + 1)); uint16_t* utf16 = reinterpret_cast<uint16_t*>(buffer.data()); size_t srcIndex = 0; size_t dstIndex = 0; // Assuming little-endian byte order for UTF-16 while (srcIndex < utf8Len) { uint32_t codePoint; // Decode the next UTF-8 character if ((utf8[srcIndex] & 0x80) == 0x00) { // Single-byte character codePoint = static_cast(utf8[srcIndex++]); } else if ((utf8[srcIndex] & 0xE0) == 0xC0) { // Two-byte character codePoint = static_cast(((utf8[srcIndex] & 0x1F) << 6) | (utf8[srcIndex + 1] & 0x3F)); srcIndex += 2; } else if ((utf8[srcIndex] & 0xF0) == 0xE0) { // Three-byte character codePoint = static_cast(((utf8[srcIndex] & 0x0F) << 12) | ((utf8[srcIndex + 1] & 0x3F) << 6) | (utf8[srcIndex + 2] & 0x3F)); srcIndex += 3; } else if ((utf8[srcIndex] & 0xF8) == 0xF0) { // Four-byte character codePoint = static_cast(((utf8[srcIndex] & 0x07) << 18) | ((utf8[srcIndex + 1] & 0x3F) << 12) | ((utf8[srcIndex + 2] & 0x3F) << 6) | (utf8[srcIndex + 3] & 0x3F)); srcIndex += 4; } else { return false; } // Encode the code point in UTF-16 if (codePoint <= 0xFFFF) { // Single 16-bit code unit utf16[dstIndex++] = (uint16_t)codePoint; } else if (codePoint <= 0x10FFFF) { // Surrogate pair utf16[dstIndex++] = (uint16_t)(((codePoint - 0x10000) >> 10) | 0xD800); utf16[dstIndex++] = (uint16_t)(((codePoint - 0x10000) & 0x3FF) | 0xDC00); } else { return false; } } writtenCodeUnits = static_cast(dstIndex); return true; } bool SC::StringConverter::Internal::convertUTF16LE_to_UTF8(const StringSpan sourceUtf16, IGrowableBuffer& ibuffer, int& writtenCodeUnits) { Internal::ConverterBuffer& buffer = static_castinternal::converterbuffer&(ibuffer); const uint16_t* utf16 = reinterpret_cast(sourceUtf16.bytesWithoutTerminator()); const size_t utf16Len = sourceUtf16.sizeInBytes() / sizeof(uint16_t); // Calculate the maximum possible size for the UTF-8 string // UTF-8 uses at most 4 bytes per character SC_TRY(buffer.resizeWithoutInitializing(4 * utf16Len + 1)); char* utf8 = buffer.data(); size_t srcIndex = 0; size_t dstIndex = 0; // Assuming little-endian byte order for UTF-16 while (srcIndex < utf16Len) { uint32_t codePoint; if ((utf16[srcIndex] & 0xFC00) == 0xD800 && (srcIndex + 1) < utf16Len && (utf16[srcIndex + 1] & 0xFC00) == 0xDC00) { // Surrogate pair codePoint = 0x10000U + ((utf16[srcIndex] & 0x3FFU) << 10U) + (utf16[srcIndex + 1] & 0x3FF); srcIndex += 2; } else { // Single 16-bit code unit codePoint = utf16[srcIndex]; srcIndex += 1; } // Encode the code point in UTF-8 if (codePoint <= 0x7F) { // Single-byte character utf8[dstIndex++] = static_cast(codePoint); } else if (codePoint <= 0x7FF) { // Two-byte character utf8[dstIndex++] = static_cast(0xC0 | ((codePoint >> 6) & 0x1F)); utf8[dstIndex++] = static_cast(0x80 | (codePoint & 0x3F)); } else if (codePoint <= 0xFFFF) { // Three-byte character utf8[dstIndex++] = static_cast(0xE0 | ((codePoint >> 12) & 0x0F)); utf8[dstIndex++] = static_cast(0x80 | ((codePoint >> 6) & 0x3F)); utf8[dstIndex++] = static_cast(0x80 | (codePoint & 0x3F)); } else if (codePoint <= 0x10FFFF) { // Four-byte character utf8[dstIndex++] = static_cast(0xF0 | ((codePoint >> 18) & 0x07)); utf8[dstIndex++] = static_cast(0x80 | ((codePoint >> 12) & 0x3F)); utf8[dstIndex++] = static_cast(0x80 | ((codePoint >> 6) & 0x3F)); utf8[dstIndex++] = static_cast(0x80 | (codePoint & 0x3F)); } else { return false; // Invalid sequence } } writtenCodeUnits = static_cast(dstIndex); return true; } #endif //---------------------------------------------------------------------------------------------------------------------- // Strings/Internal/StringFormat.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #include // PRIu64 / PRIi64 #include // snprintf #include // wcslen namespace SC { template static bool formatSprintf(StringFormatOutput& data, const char (&formatSpecifier)[FORMAT_LENGTH], StringView specifier, const Value value) { const size_t SPECIFIER_SIZE = 50; char compoundSpecifier[SPECIFIER_SIZE]; compoundSpecifier[0] = '%'; const size_t specifierLength = specifier.sizeInBytes(); if (specifierLength > sizeof(compoundSpecifier) - FORMAT_LENGTH - 2) return false; // if someone things it's a good idea doing a specifier > 50 chars... if (specifierLength > 0) { memcpy(compoundSpecifier + 1, specifier.bytesWithoutTerminator(), specifierLength); } memcpy(compoundSpecifier + 1 + specifierLength, formatSpecifier, FORMAT_LENGTH); compoundSpecifier[1 + specifierLength + FORMAT_LENGTH] = 0; char buffer[BUFFER_SIZE]; const int numCharsExcludingTerminator = snprintf(buffer, sizeof(buffer), compoundSpecifier, value); const bool validResult = (numCharsExcludingTerminator >= 0) and (static_cast(numCharsExcludingTerminator + 1) < BUFFER_SIZE); return validResult && data.append(StringView({buffer, static_cast(numCharsExcludingTerminator)}, true, StringEncoding::Ascii)); } #if SC_COMPILER_MSVC || SC_COMPILER_CLANG_CL #if SC_PLATFORM_64_BIT == 0 bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const long value) { constexpr char formatSpecifier[] = "d"; return formatSprintf(data, formatSpecifier, specifier, value); } #endif #else #if !SC_PLATFORM_LINUX bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::size_t value) { constexpr char formatSpecifier[] = "zu"; return formatSprintf(data, formatSpecifier, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::ssize_t value) { constexpr char formatSpecifier[] = "zd"; return formatSprintf(data, formatSpecifier, specifier, value); } #endif #endif bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::int64_t value) { constexpr char formatSpecifier[] = PRIi64; return formatSprintf(data, formatSpecifier, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::uint64_t value) { constexpr char formatSpecifier[] = PRIu64; return formatSprintf(data, formatSpecifier, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::int32_t value) { constexpr char formatSpecifier[] = "d"; return formatSprintf(data, formatSpecifier, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::uint32_t value) { constexpr char formatSpecifier[] = "d"; return formatSprintf(data, formatSpecifier, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::int16_t value) { return StringFormatterFor::format(data, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::uint16_t value) { return StringFormatterFor::format(data, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::int8_t value) { return StringFormatterFor::format(data, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const SC::uint8_t value) { return StringFormatterFor::format(data, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const bool value) { SC_COMPILER_UNUSED(specifier); return data.append(value ? "true"_a8 : "false"_a8); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const float value) { constexpr char formatSpecifier[] = "f"; return formatSprintf(data, formatSpecifier, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const double value) { constexpr char formatSpecifier[] = "f"; return formatSprintf(data, formatSpecifier, specifier, value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const char value) { SC_COMPILER_UNUSED(specifier); return data.append(StringView({&value, sizeof(value)}, false, StringEncoding::Ascii)); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const char* value) { SC_COMPILER_UNUSED(specifier); return data.append(StringView::fromNullTerminated(value, StringEncoding::Ascii)); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const void* value) { constexpr char formatSpecifier[] = "p"; return formatSprintf(data, formatSpecifier, specifier, value); } #if SC_PLATFORM_WINDOWS bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const wchar_t value) { SC_COMPILER_UNUSED(specifier); return data.append(StringView({&value, 1}, false)); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const wchar_t* value) { SC_COMPILER_UNUSED(specifier); return data.append(StringView({value, wcslen(value)}, true)); } #endif bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const StringView value) { SC_COMPILER_UNUSED(specifier); return data.append(value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, StringSpan value) { SC_COMPILER_UNUSED(specifier); return data.append(value); } bool StringFormatterFor::format(StringFormatOutput& data, const StringSpan specifier, const StringPath& str) { return StringFormatterFor::format(data, specifier, str.view()); } //----------------------------------------------------------------------------------------------------------------------- // StringFormatOutput //----------------------------------------------------------------------------------------------------------------------- bool StringFormatOutput::append(StringView text) { if (text.isEmpty()) { return true; } if (console != nullptr) { if (useStdOut) { console->print(text); } else { console->printError(text); } return true; } else if (growableBuffer != nullptr) { return StringConverter::appendEncodingTo(encoding, text, *growableBuffer, StringConverter::DoNotTerminate); } else { SC_ASSERT_DEBUG("StringFormatOutput::write - Forgot to set buffer or console" && 0); return false; } } StringFormatOutput::StringFormatOutput(StringEncoding encoding, Console& newConsole, bool useStdOut) : encoding(encoding), useStdOut(useStdOut) { console = &newConsole; } void StringFormatOutput::onFormatBegin() { if (growableBuffer != nullptr) { backupSize = growableBuffer->getDirectAccess().sizeInBytes; } } bool StringFormatOutput::onFormatSucceeded() { if (growableBuffer != nullptr) { SC_TRY(StringConverter::appendEncodingTo(encoding, {}, *growableBuffer, StringConverter::DoNotTerminate)); growableBuffer = nullptr; return true; } return true; } void StringFormatOutput::onFormatFailed() { if (growableBuffer != nullptr) { SC_ASSERT_RELEASE(growableBuffer->resizeWithoutInitializing(backupSize)); growableBuffer = nullptr; } } } // namespace SC //---------------------------------------------------------------------------------------------------------------------- // Strings/Internal/StringIterator.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT namespace SC { template bool StringIterator::reverseAdvanceUntilMatches(CodePoint c) { while (it > start) { auto prevIt = it; it = getPreviousOf(it, start); if (CharIterator::decode(it, prevIt) == c) return true; } return false; } template bool StringIterator::reverseAdvanceUntilMatches(CodePoint c); template bool StringIterator::reverseAdvanceUntilMatches(CodePoint c); template bool StringIterator::reverseAdvanceUntilMatches(CodePoint c); template bool StringIterator::advanceAfterFindingSameIterator(StringIterator other) { const size_t thisLength = static_cast(end - it); const size_t otherLength = static_cast(other.end - other.it); if (otherLength > thisLength) { return false; } if (otherLength == thisLength) { if (memcmp(it, other.it, otherLength * sizeof(CodeUnit)) == 0) { setToEnd(); return true; } return false; } const size_t difference = thisLength - otherLength; for (size_t index = 0; index <= difference; index++) { size_t subIndex = 0; for (subIndex = 0; subIndex < otherLength; subIndex++) { if (it[index + subIndex] != other.it[subIndex]) break; } if (subIndex == otherLength) { it += index + subIndex; return true; } } return false; } template template bool StringIterator::advanceBeforeOrAfterFinding(StringIterator other) { if (StringEncodingAreBinaryCompatible(StringIterator::getEncoding(), StringIterator::getEncoding())) { if (after) return advanceAfterFindingSameIterator(reinterpret_cast<stringiterator&>(other)); else return advanceBeforeFindingSameIterator(reinterpret_cast<stringiterator&>(other)); } auto outerIterator = *this; while (not outerIterator.isAtEnd()) { auto innerIterator = outerIterator; bool matchFound = true; CodePoint otherCodePoint; CodePoint thisCodePoint; auto otherIterator = other; while (otherIterator.advanceRead(otherCodePoint)) { if (not innerIterator.advanceRead(thisCodePoint)) { matchFound = false; break; } if (thisCodePoint != otherCodePoint) { matchFound = false; break; } } if (matchFound and otherIterator.isAtEnd()) { if (after) { it = innerIterator.it; } else { it = outerIterator.it; } return true; } outerIterator.it = getNextOf(outerIterator.it, outerIterator.end); } return false; } // clang-format off template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf8, false="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf8, true="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf16, false="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf16, true="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorascii, false="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorascii, true="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf16, true="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf16, false="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf8, true="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorutf8, false="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorascii, false="">(StringIterator); template bool StringIterator::advanceBeforeOrAfterFinding<stringiteratorascii, true="">(StringIterator); // clang-format on template bool StringIterator::advanceBeforeFindingSameIterator(StringIterator other) { if (advanceAfterFindingSameIterator(other)) { return advanceOfBytes(other.it - other.end); } return false; } // clang-format off template bool StringIterator::advanceBeforeFindingSameIterator(StringIterator); template bool StringIterator::advanceAfterFindingSameIterator(StringIterator); template bool StringIterator::advanceBeforeFindingSameIterator(StringIterator); template bool StringIterator::advanceAfterFindingSameIterator(StringIterator); template bool StringIterator::advanceBeforeFindingSameIterator(StringIterator); template bool StringIterator::advanceAfterFindingSameIterator(StringIterator); // clang-format on template bool StringIterator::advanceOfBytes(ssize_t bytesLength) { auto newIt = it + bytesLength; if (newIt >= start and newIt <= end) { it = newIt; return true; } return false; } template bool StringIterator::advanceOfBytes(ssize_t bytesLength); template bool StringIterator::advanceOfBytes(ssize_t bytesLength); template bool StringIterator::advanceOfBytes(ssize_t bytesLength); template bool StringIterator::advanceUntilMatchesAny(Span items, CodePoint& matched) { while (it < end) { const auto decoded = CharIterator::decode(it, end); for (auto c : items) { if (decoded == c) { matched = c; return true; } } it = getNextOf(it, end); } return false; } // clang-format off template bool StringIterator::advanceUntilMatchesAny(Span items,CodePoint& matched); template bool StringIterator::advanceUntilMatchesAny(Span items,CodePoint& matched); template bool StringIterator::advanceUntilMatchesAny(Span items,CodePoint& matched); // clang-format on template bool StringIterator::reverseAdvanceUntilMatchesAny(Span items, CodePoint& matched) { while (it > start) { auto prevIt = it; it = getPreviousOf(it, start); const auto decoded = CharIterator::decode(it, prevIt); for (auto c : items) { if (decoded == c) { matched = c; return true; } } } return false; } // clang-format off template bool StringIterator::reverseAdvanceUntilMatchesAny(Span items, CodePoint& matched); template bool StringIterator::reverseAdvanceUntilMatchesAny(Span items, CodePoint& matched); template bool StringIterator::reverseAdvanceUntilMatchesAny(Span items,CodePoint& matched); // clang-format on template bool StringIterator::advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar) { while (it < end) { auto readChar = CharIterator::decode(it, end); if (readChar != c) { if (optionalReadChar) { *optionalReadChar = readChar; } return true; } it = getNextOf(it, end); } return false; } template bool StringIterator::advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar); template bool StringIterator::advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar); template bool StringIterator::advanceUntilDifferentFrom(CodePoint c, CodePoint* optionalReadChar); template bool StringIterator::advanceBackwardIfMatches(CodePoint c) { if (it > start) { auto otherIt = getPreviousOf(it, start); if (CharIterator::decode(otherIt, it) == c) { it = otherIt; return true; } } return false; } template bool StringIterator::advanceBackwardIfMatches(CodePoint c); template bool StringIterator::advanceBackwardIfMatches(CodePoint c); template bool StringIterator::advanceBackwardIfMatches(CodePoint c); template bool StringIterator::advanceIfMatchesAny(Span items) { if (it < end) { const auto decoded = CharIterator::decode(it, end); for (auto c : items) { if (decoded == c) { it = getNextOf(it, end); return true; } } } return false; } template bool StringIterator::advanceIfMatchesAny(Span items); template bool StringIterator::advanceIfMatchesAny(Span items); template bool StringIterator::advanceIfMatchesAny(Span items); template bool StringIterator::advanceIfMatchesRange(CodePoint first, CodePoint last) { SC_ASSERT_RELEASE(first <= last); if (it < end) { const auto decoded = CharIterator::decode(it, end); if (decoded >= first and decoded <= last) { it = getNextOf(it, end); return true; } } return false; } template bool StringIterator::advanceIfMatchesRange(CodePoint first, CodePoint last); template bool StringIterator::advanceIfMatchesRange(CodePoint first, CodePoint last); template bool StringIterator::advanceIfMatchesRange(CodePoint first, CodePoint last); template bool StringIterator::read(CodePoint& c) { if (it < end) { c = CharIterator::decode(it, end); return true; } return false; } template bool StringIterator::read(CodePoint& c); template bool StringIterator::read(CodePoint& c); template bool StringIterator::read(CodePoint& c); template bool StringIterator::advanceBackwardRead(CodePoint& c) { if (it > start) { auto prevIt = it; it = getPreviousOf(it, start); c = CharIterator::decode(it, prevIt); return true; } return false; } template bool StringIterator::advanceBackwardRead(CodePoint& c); template bool StringIterator::advanceBackwardRead(CodePoint& c); template bool StringIterator::advanceBackwardRead(CodePoint& c); template bool StringIterator::reverseAdvanceCodePoints(size_t numCodePoints) { while (numCodePoints-- > 0) { if (it <= start) { return false; } it = getPreviousOf(it, start); } return true; } template bool StringIterator::reverseAdvanceCodePoints(size_t numCodePoints); template bool StringIterator::reverseAdvanceCodePoints(size_t numCodePoints); template bool StringIterator::reverseAdvanceCodePoints(size_t numCodePoints); template bool StringIterator::endsWithAnyOf(Span codePoints) const { if (start != end) { auto pointerToLast = CharIterator::getPreviousOf(end, start); auto decodedCodePoint = CharIterator::decode(pointerToLast, end); for (CodePoint codePoint : codePoints) { if (codePoint == decodedCodePoint) { return true; } } } return false; } template bool StringIterator::endsWithAnyOf(Span codePoints) const; template bool StringIterator::endsWithAnyOf(Span codePoints) const; template bool StringIterator::endsWithAnyOf(Span codePoints) const; template bool StringIterator::startsWithAnyOf(Span codePoints) const { if (start != end) { auto decodedCodePoint = CharIterator::decode(start, end); for (CodePoint codePoint : codePoints) { if (codePoint == decodedCodePoint) { return true; } } } return false; } template bool StringIterator::startsWithAnyOf(Span codePoints) const; template bool StringIterator::startsWithAnyOf(Span codePoints) const; template bool StringIterator::startsWithAnyOf(Span codePoints) const; template template bool StringIterator::endsWith(IteratorType other) const { StringIterator copy = *this; copy.setToEnd(); other.setToEnd(); typename IteratorType::CodePoint c; while (other.advanceBackwardRead(c)) { if (not copy.advanceBackwardIfMatches(c)) return false; } return other.isAtStart(); } template bool StringIterator::endsWith(StringIteratorASCII other) const; template bool StringIterator::endsWith(StringIteratorUTF8 other) const; template bool StringIterator::endsWith(StringIteratorUTF16 other) const; template bool StringIterator::endsWith(StringIteratorUTF8 other) const; template bool StringIterator::endsWith(StringIteratorASCII other) const; template bool StringIterator::endsWith(StringIteratorUTF16 other) const; template bool StringIterator::endsWith(StringIteratorUTF16 other) const; template bool StringIterator::endsWith(StringIteratorASCII other) const; template bool StringIterator::endsWith(StringIteratorUTF8 other) const; template template bool StringIterator::startsWith(IteratorType other) const { StringIterator copy = *this; copy.setToStart(); other.setToStart(); typename IteratorType::CodePoint c; while (other.advanceRead(c)) { if (not copy.advanceIfMatches(c)) return false; } return other.isAtEnd(); } template bool StringIterator::startsWith(StringIteratorASCII other) const; template bool StringIterator::startsWith(StringIteratorUTF8 other) const; template bool StringIterator::startsWith(StringIteratorUTF16 other) const; template bool StringIterator::startsWith(StringIteratorUTF8 other) const; template bool StringIterator::startsWith(StringIteratorASCII other) const; template bool StringIterator::startsWith(StringIteratorUTF16 other) const; template bool StringIterator::startsWith(StringIteratorUTF16 other) const; template bool StringIterator::startsWith(StringIteratorASCII other) const; template bool StringIterator::startsWith(StringIteratorUTF8 other) const; // StringIteratorASCII bool StringIteratorASCII::advanceUntilMatchesNonConstexpr(CodePoint c) { // clang-format off if (it == nullptr or c > 127) SC_LANGUAGE_UNLIKELY { it = end; return false; } // clang-format on char charC = static_cast(c); auto res = ::memchr(it, charC, static_cast(end - it)); if (res != nullptr) it = static_cast(res); else it = end; return it < end; } // StringIteratorUTF16 const char* StringIteratorUTF16::getNextOf(const char* src, const char* end) { const char* it = src; StringSpan::advanceUTF16(it, end); return it; } const char* StringIteratorUTF16::getPreviousOf(const char* bytes, const char* start) { if (bytes - 2 < start) return nullptr; uint16_t value; ::memcpy(&value, bytes - 2, sizeof(uint16_t)); // Avoid potential unaligned read if (value >= 0xD800 and value <= 0xDFFF) // TODO: This assumes Little endian { return bytes - 4 < start ? nullptr : bytes - 4; // Multi-byte character } else { return bytes - 2; // Single-byte character } } SC::uint32_t StringIteratorUTF16::decode(const char* bytes, const char* end) { const char* srcCopy = bytes; return StringSpan::advanceUTF16(srcCopy, end); } // StringIteratorUTF8 const char* StringIteratorUTF8::getNextOf(const char* src, const char* end) { const char* it = src; StringSpan::advanceUTF8(it, end); return it; } const char* StringIteratorUTF8::getPreviousOf(const char* src, const char* start) { do { --src; } while (src >= start and (*src & 0xC0) == 0x80); return src; } SC::uint32_t StringIteratorUTF8::decode(const char* src, const char* end) { const char* srcCopy = src; return StringSpan::advanceUTF8(srcCopy, end); } } // namespace SC //---------------------------------------------------------------------------------------------------------------------- // Strings/Internal/StringView.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #include // errno #include // INT32_MIN/MAX #include // atoi #include // strlen bool SC::StringView::parseInt32(int32_t& value) const { if (text == nullptr) return false; char buffer[12]; // 10 digits + sign + nullTerm switch (getEncoding()) { case StringEncoding::Utf16: { StringIteratorUTF16 it(getIterator()); StringCodePoint codePoint; if (not it.advanceRead(codePoint)) return false; if ((codePoint < '0' or codePoint > '9') and (codePoint != '-' and codePoint != '+')) return false; int index = 0; buffer[index++] = static_cast(codePoint); while (it.advanceRead(codePoint)) { if (codePoint < '0' or codePoint > '9') return false; buffer[index++] = static_cast(codePoint); } buffer[index] = 0; return StringView({buffer, sizeof(buffer) - 1}, true, StringEncoding::Ascii).parseInt32(value); } break; default: { const auto* parseText = text; if (!hasNullTerm) { if (textSizeInBytes >= sizeof(buffer)) return false; memcpy(buffer, text, textSizeInBytes); buffer[textSizeInBytes] = 0; parseText = buffer; } errno = 0; char* endText; auto parsed = ::strtol(parseText, &endText, 10); if (errno == 0 && parseText < endText) { if (parsed >= INT32_MIN and parsed <= INT32_MAX) { value = static_cast(parsed); return true; } } } break; } return false; } bool SC::StringView::parseFloat(float& value) const { double dValue; if (parseDouble(dValue)) { value = static_cast(dValue); return true; } return false; } bool SC::StringView::parseDouble(double& value) const { if (text == nullptr) return false; if (hasNullTerm) { value = atof(text); } else { char buffer[255]; const size_t bufferSize = min(textSizeInBytes, static_cast<decltype(textsizeinbytes)>(sizeof(buffer) - 1)); if (text != nullptr) memcpy(buffer, text, bufferSize); buffer[bufferSize] = 0; value = atof(buffer); } if (value == 0.0f) { // atof returns 0 on failed parsing... // TODO: Handle float scientific notation StringIteratorASCII it = getIterator(); (void)it.advanceIfMatchesAny({'-', '+'}); // optional if (it.isAtEnd()) // we now require something { return false; } (void)it.advanceUntilDifferentFrom('0'); // any number of 0s if (it.advanceIfMatches('.')) // optional { if (it.isAtEnd()) // but if it exists now we need at least a number { return false; } } (void)it.advanceUntilDifferentFrom('0'); // any number of 0s return it.isAtEnd(); // if they where all zeroes } return true; } bool SC::StringView::startsWith(const StringView str) const { if (hasCompatibleEncoding(str)) { if (str.textSizeInBytes <= textSizeInBytes) { const StringView ours({text, str.textSizeInBytes}, false, getEncoding()); return str == ours; } return false; } return withIterator([str](auto it1) { return str.withIterator([it1](auto it2) { return it1.startsWith(it2); }); }); } bool SC::StringView::endsWith(const StringView str) const { if (hasCompatibleEncoding(str)) { if (str.sizeInBytes() <= sizeInBytes()) { const StringView ours({text + textSizeInBytes - str.textSizeInBytes, str.textSizeInBytes}, false, getEncoding()); return str == ours; } return false; } return withIterator([str](auto it1) { return str.withIterator([it1](auto it2) { return it1.endsWith(it2); }); }); } bool SC::StringView::containsString(const StringView str) const { return withIterators(*this, str, [](auto it1, auto it2) { return it1.advanceAfterFinding(it2); }); } bool SC::StringView::splitAfter(const StringView stringToMatch, StringView& remainingAfterSplit) const { return withIterators(*this, stringToMatch, [&](auto it1, auto it2) { if (it1.advanceAfterFinding(it2)) { remainingAfterSplit = StringView::fromIteratorUntilEnd(it1); return true; } return false; }); } bool SC::StringView::splitBefore(const StringView stringToMatch, StringView& stringBeforeSplit) const { return withIterators(*this, stringToMatch, [&](auto it1, auto it2) { if (it1.advanceBeforeFinding(it2)) { stringBeforeSplit = StringView::fromIteratorFromStart(it1); return true; } return false; }); } bool SC::StringView::containsCodePoint(StringCodePoint c) const { return withIterator([c](auto it) { return it.advanceUntilMatches(c); }); } bool SC::StringView::endsWithAnyOf(Span codePoints) const { return withIterator([codePoints](auto it) { return it.endsWithAnyOf(codePoints); }); } bool SC::StringView::startsWithAnyOf(Span codePoints) const { return withIterator([codePoints](auto it) { return it.startsWithAnyOf(codePoints); }); } SC::StringView SC::StringView::sliceStartEnd(size_t start, size_t end) const { return withIterator( [&](auto it) { SC_ASSERT_RELEASE(it.advanceCodePoints(start)); auto startIt = it; SC_ASSERT_RELEASE(start <= end && it.advanceCodePoints(end - start)); const size_t distance = static_cast(it.bytesDistanceFrom(startIt)); return StringView({startIt.it, distance}, hasNullTerm and (start + distance == sizeInBytesIncludingTerminator()), getEncoding()); }); } SC::StringView SC::StringView::sliceStart(size_t offset) const { return withIterator( [&](auto it) { SC_ASSERT_RELEASE(it.advanceCodePoints(offset)); auto startIt = it; it.setToEnd(); const size_t distance = static_cast(it.bytesDistanceFrom(startIt)); return StringView({startIt.it, distance}, hasNullTerm and (offset + distance == sizeInBytesIncludingTerminator()), getEncoding()); }); } SC::StringView SC::StringView::sliceEnd(size_t offset) const { return withIterator( [&](auto it) { auto startIt = it; it.setToEnd(); SC_ASSERT_RELEASE(it.reverseAdvanceCodePoints(offset)); const size_t distance = static_cast(it.bytesDistanceFrom(startIt)); return StringView({startIt.it, distance}, hasNullTerm and (offset + distance == sizeInBytesIncludingTerminator()), getEncoding()); }); } SC::StringView SC::StringView::trimEndAnyOf(Span codePoints) const { auto sv = *this; while (sv.endsWithAnyOf(codePoints)) { sv = sv.sliceEnd(1); } return sv; } SC::StringView SC::StringView::trimStartAnyOf(Span codePoints) const { auto sv = *this; while (sv.startsWithAnyOf(codePoints)) { sv = sv.sliceStart(1); } return sv; } SC::StringView SC::StringView::trimAnyOf(Span codePoints) const { return trimEndAnyOf(codePoints).trimStartAnyOf(codePoints); } SC::StringView SC::StringView::trimWhiteSpaces() const { return trimAnyOf({'\r', '\n', '\t', ' '}); } SC::StringView SC::StringView::sliceStartLength(size_t start, size_t length) const { return sliceStartEnd(start, start + length); } bool SC::StringView::isIntegerNumber() const { return withIterator( [&](auto it) { (void)it.advanceIfMatchesAny({'-', '+'}); // optional bool matchedAtLeastOneDigit = false; while (it.advanceIfMatchesRange('0', '9')) matchedAtLeastOneDigit = true; return matchedAtLeastOneDigit and it.isAtEnd(); }); } bool SC::StringView::isFloatingNumber() const { // TODO: Floating point exponential notation return withIterator( [&](auto it) { (void)it.advanceIfMatchesAny({'-', '+'}); // optional bool matchedAtLeastOneDigit = false; while (it.advanceIfMatchesRange('0', '9')) matchedAtLeastOneDigit = true; if (it.advanceIfMatches('.')) // optional while (it.advanceIfMatchesRange('0', '9')) matchedAtLeastOneDigit = true; return matchedAtLeastOneDigit and it.isAtEnd(); }); } //----------------------------------------------------------------------------------------------------------------------- // StringViewTokenizer //----------------------------------------------------------------------------------------------------------------------- [[nodiscard]] bool SC::StringViewTokenizer::isFinished() const { return remaining.isEmpty(); } bool SC::StringViewTokenizer::tokenizeNext(Span separators, Options options) { if (isFinished()) { return false; } auto oldNonEmpty = numSplitsNonEmpty; remaining.withIterator( [&](auto iterator) { auto originalTextStart = originalText.getIterator<decltype(iterator)>(); do { auto componentStart = iterator; (void)iterator.advanceUntilMatchesAny(separators, splittingCharacter); component = StringView::fromIterators(componentStart, iterator); processed = StringView::fromIterators(originalTextStart, iterator); (void)iterator.stepForward(); remaining = StringView::fromIteratorUntilEnd(iterator); numSplitsTotal++; if (not component.isEmpty()) { numSplitsNonEmpty++; break; } else if (options == IncludeEmpty) { break; } } while (not remaining.isEmpty()); }); return options == IncludeEmpty ? true : numSplitsNonEmpty > oldNonEmpty; } SC::StringViewTokenizer& SC::StringViewTokenizer::countTokens(Span separators) { while (tokenizeNext(separators, SkipEmpty)) {} return *this; } //----------------------------------------------------------------------------------------------------------------------- // StringViewAlgorithms //----------------------------------------------------------------------------------------------------------------------- bool SC::StringAlgorithms::matchWildcard(StringView s1, StringView s2) { return StringView::withIterators(s1, s2, [](auto it1, auto it2) { return matchWildcardIterator(it1, it2); }); } template bool SC::StringAlgorithms::matchWildcardIterator(StringIterator1 pattern, StringIterator2 text) { typename decltype(pattern)::CodePoint patternChar = 0; typename decltype(text)::CodePoint textChar = 0; StringIterator1 lastPattern = pattern; StringIterator2 lastText = text; if (not pattern.read(patternChar)) { // If pattern is empty, only match with an empty text return text.isAtEnd(); } while (text.advanceRead(textChar)) { if (patternChar == '*') { // Skip consecutive asterisks if (not pattern.advanceUntilDifferentFrom('*', &patternChar)) { // but if pattern ends with asterisk, it matches everything return true; } lastPattern = pattern; lastText = text; (void)lastText.stepForward(); } else if (patternChar == '?' or patternChar == textChar) { (void)pattern.stepForward(); (void)pattern.read(patternChar); } else if (not lastPattern.isAtStart()) { pattern = lastPattern; text = lastText; (void)pattern.read(patternChar); (void)lastText.stepForward(); } else { return false; } } // Discard any trailing * and if pattern is at end if (pattern.advanceUntilDifferentFrom('*')) { // there are some more chars in the pattern that are unmatched return false; } else { // pattern is now at end and fully matched return true; } }; #endif // SANE_CPP_STRINGS_IMPLEMENTATION</decltype(iterator)></decltype(textsizeinbytes)></stringiteratorascii,></stringiteratorascii,></stringiteratorutf8,></stringiteratorutf8,></stringiteratorutf16,></stringiteratorutf16,></stringiteratorascii,></stringiteratorascii,></stringiteratorutf16,></stringiteratorutf16,></stringiteratorutf8,></stringiteratorutf8,></stringiterator</stringiterator</internal::converterbuffer&></uint16_t*></internal::converterbuffer&></internal::converterbuffer&></uint8*></wchar_t*></internal::converterbuffer&></uint8*></char*></internal::converterbuffer&></internal::converterbuffer&></internal::converterbuffer&></char*></decltype(sourceit)></decltype(sourceit)></windows::separator,></posix::separator,></windows::separator,></posix::separator,></windows::separator,></separator1,></separator1,></separator1,></wchar_t*></wchar_t*></char*></igrowablebuffer&></char[n]></maxargs,></sizeof...(args)></total,></decltype(value.view())></otheriterator,></otheriterator,>