(original) (raw)

//---------------------------------------------------------------------------------------------------------------------- // SaneCppFileSystemIterator.h - Sane C++ FileSystemIterator Library (single file build) //---------------------------------------------------------------------------------------------------------------------- // Dependencies: SaneCppFoundation.h // Version: release/2025/11 (cf7313e5) // LOC header: 78 (code) + 72 (comments) // LOC implementation: 352 (code) + 68 (comments) // Documentation: https://pagghiu.github.io/SaneCppLibraries // Source Code: https://github.com/pagghiu/SaneCppLibraries //---------------------------------------------------------------------------------------------------------------------- // All copyrights and SPDX information for this library (each amalgamated section has its own copyright attributions): // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT //---------------------------------------------------------------------------------------------------------------------- #include "SaneCppFoundation.h" #if !defined(SANE_CPP_FILESYSTEMITERATOR_HEADER) #define SANE_CPP_FILESYSTEMITERATOR_HEADER 1 //---------------------------------------------------------------------------------------------------------------------- // FileSystemIterator/FileSystemIterator.h //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT namespace SC { //! @defgroup group_file_system_iterator FileSystem Iterator //! @copybrief library_file_system_iterator (see @ref library_file_system_iterator for more details) //! @addtogroup group_file_system_iterator //! @{ /// @brief Iterates files and directories inside a given path without allocating any memory. /// FileSystemIterator uses an iterator pattern to enumerate files instead of a callback. /// This allows avoiding blocking on enumeration of very large directories and also the allocation of a huge number of /// strings to hold all filenames. /// When configuring an iteration, the caller can ask for a fully recursive enumeration or manually call /// SC::FileSystemIterator::recurseSubdirectory when the current SC::FileSystemIterator::Entry item /// (obtained with SC::FileSystemIterator::get) matches a directory of interest. /// The maximum number of nested recursion levels that will be allowed depends on the size of the /// FileSystemIterator::FolderState span (can be a static array) passed in during init by the caller. /// /// @note This class doesn't allocate any dynamic memory. /// /// Example of recursive iteration of a directory: /// \snippet Tests/Libraries/FileSystemIterator/FileSystemIteratorTest.cpp walkRecursiveSnippet /// /// If only some directories should be recursed, manual recursion can help speeding up directory iteration: /// \snippet Tests/Libraries/FileSystemIterator/FileSystemIteratorTest.cpp walkRecursiveManualSnippet struct FileSystemIterator { /// Entry type (File or Directory) enum class Type { Directory, File }; /// @brief Holds state of a folder when recursing into it to list its files struct FolderState { #if SC_PLATFORM_WINDOWS void* fileDescriptor = (void*)(long long)-1; #else int fileDescriptor = -1; void* dirEnumerator = nullptr; #endif size_t textLengthInBytes = 0; bool gotDot1 = false; bool gotDot2 = false; }; /// @brief Contains information on a file or directory struct Entry { StringSpan name; ///< Name of current entry (file with extension or directory) StringSpan path; ///< Absolute path of the current entry uint32_t level = 0; ///< Current level of nesting from start of iteration Type type = Type::File; ///< Tells if it's a file or a directory /// @brief Check if current entry is a directory bool isDirectory() const { return type == Type::Directory; } #if SC_PLATFORM_WINDOWS void* parentFileDescriptor = nullptr; #else int parentFileDescriptor = 0; #endif }; /// @brief Options when iterating (recursive and other options) struct Options { bool recursive = false; ///< `true` will recurse automatically into subdirectories bool forwardSlashes = false; ///< `true` will return paths forward slash `/` even on Windows }; Options options; ///< Options to control recursive behaviour and other options /// @brief Destroys the FileSystemIterator object ~FileSystemIterator(); /// @brief Get current Entry being iterated /// @return Current entry const Entry& get() const { return currentEntry; } /// @brief Check if any error happened during iteration /// @return A valid Result if no errors have happened during file system iteration Result checkErrors() { errorsChecked = true; return errorResult; } /// @brief Initializes the iterator on a given directory /// @param directory Directory to iterate /// @param recursiveEntries User supplied buffer for the stack used during folder recursion (must be >= 1 elements) /// @return Valid result if directory exists and is accessible Result init(StringSpan directory, Span recursiveEntries); /// Returned string is only valid until next enumerateNext call and/or another init call /// @brief Moves iterator to next file /// @return Valid result if there are more files to iterate Result enumerateNext(); /// @brief Recurse into current item (assuming Entry::isDirectory == `true`) /// @return Valid result if current item is a directory and it can be accessed successfully Result recurseSubdirectory(); private: static constexpr auto MaxPath = StringPath::MaxPath; struct Internal; struct RecurseStack { Span recursiveEntries; int currentEntry = -1; FolderState& back(); void pop_back(); Result push_back(const FolderState& other); size_t size() const { return size_t(currentEntry + 1); } bool isEmpty() const { return currentEntry == -1; } }; RecurseStack recurseStack; Entry currentEntry; Result errorResult = Result(true); bool errorsChecked = false; #if SC_PLATFORM_WINDOWS bool expectDotDirectories = true; StringPath currentPath; uint64_t dirEnumeratorBuffer[592 / sizeof(uint64_t)]; #else StringPath currentPath; #endif Result enumerateNextInternal(Entry& entry); Result recurseSubdirectoryInternal(Entry& entry); }; //! @} } // namespace SC #endif // SANE_CPP_FILESYSTEMITERATOR_HEADER #if defined(SANE_CPP_IMPLEMENTATION) && !defined(SANE_CPP_FILESYSTEMITERATOR_IMPLEMENTATION) #define SANE_CPP_FILESYSTEMITERATOR_IMPLEMENTATION 1 //---------------------------------------------------------------------------------------------------------------------- // FileSystemIterator/FileSystemIterator.cpp //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #if SC_PLATFORM_WINDOWS //---------------------------------------------------------------------------------------------------------------------- // FileSystemIterator/Internal/FileSystemIteratorWindows.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #define WIN32_LEAN_AND_MEAN #include struct SC::FileSystemIterator::Internal { static Result initFolderState(FolderState& entry, const wchar_t* path, WIN32_FIND_DATAW& dirEnumerator) { entry.fileDescriptor = ::FindFirstFileW(path, &dirEnumerator); if (INVALID_HANDLE_VALUE == entry.fileDescriptor) { return Result::Error("FindFirstFileW failed"); } return Result(true); } static void closeFolderState(FolderState& entry) { if (entry.fileDescriptor != INVALID_HANDLE_VALUE) { ::FindClose(entry.fileDescriptor); } } static void destroy(RecurseStack& recurseStack) { while (not recurseStack.isEmpty()) { closeFolderState(recurseStack.back()); recurseStack.pop_back(); } } }; SC::Result SC::FileSystemIterator::init(StringSpan directory, Span recursiveEntries) { Internal::destroy(recurseStack); recurseStack.recursiveEntries = recursiveEntries; recurseStack.currentEntry = -1; SC_TRY_MSG(currentPath.assign(directory), "Directory path is too long"); const size_t dirLen = currentPath.view().sizeInBytes() / sizeof(wchar_t); SC_TRY_MSG(currentPath.append(L"\\*.*"), "Directory path is too long"); { FolderState entry; entry.textLengthInBytes = dirLen * sizeof(wchar_t); SC_TRY(recurseStack.push_back(entry)); } FolderState& currentFolder = recurseStack.back(); WIN32_FIND_DATAW& dirEnumerator = reinterpret_cast<win32_find_dataw&>(dirEnumeratorBuffer); currentFolder.fileDescriptor = ::FindFirstFileW(currentPath.view().getNullTerminatedNative(), &dirEnumerator); // Set currentPathString back to just the directory (no pattern) currentPath.writableSpan().data()[dirLen] = L'\0'; if (INVALID_HANDLE_VALUE == currentFolder.fileDescriptor) { return Result::Error("FindFirstFileW failed"); } expectDotDirectories = true; return Result(true); } SC::Result SC::FileSystemIterator::enumerateNextInternal(Entry& entry) { FolderState& parent = recurseStack.back(); static_assert(sizeof(dirEnumeratorBuffer) >= sizeof(WIN32_FIND_DATAW), "WIN32_FIND_DATAW"); WIN32_FIND_DATAW& dirEnumerator = reinterpret_cast<win32_find_dataw&>(dirEnumeratorBuffer); size_t dirLen = parent.textLengthInBytes / sizeof(wchar_t); for (;;) { if (!expectDotDirectories) { if (!::FindNextFileW(parent.fileDescriptor, &dirEnumerator)) { Internal::closeFolderState(recurseStack.back()); recurseStack.pop_back(); if (recurseStack.isEmpty()) return Result::Error("Iteration Finished"); parent = recurseStack.back(); dirLen = parent.textLengthInBytes / sizeof(wchar_t); continue; } } expectDotDirectories = false; if (!(parent.gotDot1 && parent.gotDot2)) { const bool isDot1 = ::wcsncmp(dirEnumerator.cFileName, L".", 2) == 0; const bool isDot2 = ::wcsncmp(dirEnumerator.cFileName, L"..", 3) == 0; if (isDot1) parent.gotDot1 = true; if (isDot2) parent.gotDot2 = true; if (isDot1 || isDot2) continue; } break; } entry.name = StringSpan({dirEnumerator.cFileName, ::wcsnlen(dirEnumerator.cFileName, MAX_PATH)}, true); (void)currentPath.resize(dirLen); SC_TRY_MSG(currentPath.append(L"\\"), "Path too long"); SC_TRY_MSG(currentPath.append(entry.name), "Path too long"); if (options.forwardSlashes) { // Convert backslashes to forward slashes wchar_t* pathData = currentPath.writableSpan().data(); const size_t pathLength = currentPath.view().sizeInBytes() / sizeof(wchar_t); for (size_t i = dirLen; i < pathLength; ++i) { if (pathData[i] == L'\\') pathData[i] = L'/'; } } entry.path = currentPath.view(); entry.level = static_cast<decltype(entry.level)>(recurseStack.size() - 1); entry.parentFileDescriptor = parent.fileDescriptor; if (dirEnumerator.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { entry.type = Type::Directory; if (options.recursive) { SC_TRY(recurseSubdirectoryInternal(entry)); } } else { entry.type = Type::File; } return Result(true); } SC::Result SC::FileSystemIterator::recurseSubdirectoryInternal(Entry& entry) { StringPath recursePath; // Build subdirectory path recursePath = currentPath; (void)recursePath.resize(recurseStack.back().textLengthInBytes / sizeof(wchar_t)); SC_TRY_MSG(recursePath.append(L"\\"), "Directory path is too long"); SC_TRY_MSG(recursePath.append(entry.name), "Directory path is too long"); { // Store the length of the sub directory without the trailing \*.* added later FolderState newParent; newParent.textLengthInBytes = recursePath.view().sizeInBytes(); SC_TRY(recurseStack.push_back(newParent)); } SC_TRY_MSG(recursePath.append(L"\\*.*"), "Directory path is too long"); FolderState& currentFolder = recurseStack.back(); WIN32_FIND_DATAW& dirEnumerator = reinterpret_cast<win32_find_dataw&>(dirEnumeratorBuffer); currentFolder.fileDescriptor = ::FindFirstFileW(recursePath.view().getNullTerminatedNative(), &dirEnumerator); if (INVALID_HANDLE_VALUE == currentFolder.fileDescriptor) { return Result::Error("FindFirstFileW failed"); } expectDotDirectories = true; return Result(true); } #else //---------------------------------------------------------------------------------------------------------------------- // FileSystemIterator/Internal/FileSystemIteratorPosix.inl //---------------------------------------------------------------------------------------------------------------------- // Copyright (c) Stefano Cristiano // SPDX-License-Identifier: MIT #include #include #include #include #include #include #include #if SC_PLATFORM_LINUX #include // For PATH_MAX on Linux #else #include // For PATH_MAX on Apple and other POSIX systems #endif struct SC::FileSystemIterator::Internal { static Result translateErrorCode(int errorCode) { switch (errorCode) { case EACCES: return Result::Error("EACCES"); case EDQUOT: return Result::Error("EDQUOT"); case EEXIST: return Result::Error("EEXIST"); case EFAULT: return Result::Error("EFAULT"); case EIO: return Result::Error("EIO"); case ELOOP: return Result::Error("ELOOP"); case EMLINK: return Result::Error("EMLINK"); case ENAMETOOLONG: return Result::Error("ENAMETOOLONG"); case ENOENT: return Result::Error("ENOENT"); case ENOSPC: return Result::Error("ENOSPC"); case ENOTDIR: return Result::Error("ENOTDIR"); case EROFS: return Result::Error("EROFS"); case EBADF: return Result::Error("EBADF"); case EPERM: return Result::Error("EPERM"); case ENOMEM: return Result::Error("ENOMEM"); case ENOTSUP: return Result::Error("ENOTSUP"); case EINVAL: return Result::Error("EINVAL"); } return Result::Error("Unknown"); } static Result initFolderState(FolderState& entry, int fd) { entry.fileDescriptor = fd; if (entry.fileDescriptor == -1) { return translateErrorCode(errno); } entry.dirEnumerator = ::fdopendir(entry.fileDescriptor); if (entry.dirEnumerator == nullptr) { ::close(entry.fileDescriptor); entry.fileDescriptor = -1; // Reset file descriptor on error return translateErrorCode(errno); } return Result(true); } static void closeFolderState(FolderState& entry) { if (entry.dirEnumerator != nullptr) { ::closedir(static_cast<dir*>(entry.dirEnumerator)); } if (entry.fileDescriptor != -1) { ::close(entry.fileDescriptor); } } static void destroy(RecurseStack& recurseStack) { while (not recurseStack.isEmpty()) { closeFolderState(recurseStack.back()); recurseStack.pop_back(); } } }; SC::Result SC::FileSystemIterator::init(StringSpan directory, Span recursiveEntries) { Internal::destroy(recurseStack); recurseStack.recursiveEntries = recursiveEntries; recurseStack.currentEntry = -1; FolderState entry; if (directory.getEncoding() == StringEncoding::Utf16) { return Result::Error("FileSystemIterator on Posix does not support UTF16 encoded paths"); } SC_TRY_MSG(currentPath.assign(directory), "Directory path is too long"); entry.textLengthInBytes = directory.sizeInBytes(); SC_TRY_MSG(recurseStack.push_back(entry), "Exceeding maximum number of recursive entries"); const int fd = ::open(currentPath.view().bytesIncludingTerminator(), O_DIRECTORY); SC_TRY(Internal::initFolderState(recurseStack.back(), fd)); return Result(true); } SC::Result SC::FileSystemIterator::enumerateNextInternal(Entry& entry) { if (recurseStack.isEmpty()) return Result::Error("Forgot to call init"); FolderState& parent = recurseStack.back(); struct dirent* item; for (;;) { item = ::readdir(static_cast<dir*>(parent.dirEnumerator)); if (item == nullptr) { Internal::closeFolderState(recurseStack.back()); recurseStack.pop_back(); if (recurseStack.isEmpty()) { return Result::Error("Iteration Finished"); } parent = recurseStack.back(); (void)currentPath.resize(parent.textLengthInBytes); continue; } if (not(parent.gotDot1 and parent.gotDot2)) { if (::strcmp(item->d_name, "..") == 0) { parent.gotDot2 = true; continue; } else if (::strcmp(item->d_name, ".") == 0) { parent.gotDot1 = true; continue; } } break; } #if SC_PLATFORM_APPLE entry.name = StringSpan({item->d_name, item->d_namlen}, true, StringEncoding::Utf8); #else entry.name = StringSpan({item->d_name, strlen(item->d_name)}, true, StringEncoding::Utf8); #endif (void)currentPath.resize(recurseStack.back().textLengthInBytes); SC_TRY_MSG(currentPath.append("/"), "Insufficient space on current path string"); SC_TRY_MSG(currentPath.append(entry.name), "Insufficient space on current path string"); entry.path = currentPath.view(); entry.level = static_cast<decltype(entry.level)>(recurseStack.size() - 1); entry.parentFileDescriptor = parent.fileDescriptor; if (item->d_type == DT_DIR) { entry.type = Type::Directory; if (options.recursive) { SC_TRY(recurseSubdirectoryInternal(entry)); } } else { entry.type = Type::File; } return Result(true); } SC::Result SC::FileSystemIterator::recurseSubdirectoryInternal(Entry& entry) { FolderState newParent; (void)currentPath.resize(recurseStack.back().textLengthInBytes); SC_TRY_MSG(currentPath.append("/"), "Directory path is too long"); SC_TRY_MSG(currentPath.append(entry.name), "Directory path is too long"); newParent.textLengthInBytes = currentPath.view().sizeInBytes(); SC_TRY(entry.name.isNullTerminated()); SC_TRY_MSG(recurseStack.push_back(newParent), "Exceeding maximum number of recursive entries"); const int fd = ::openat(entry.parentFileDescriptor, entry.name.getNullTerminatedNative(), O_DIRECTORY); SC_TRY(Internal::initFolderState(recurseStack.back(), fd)); return Result(true); } #endif SC::FileSystemIterator::~FileSystemIterator() { Internal::destroy(recurseStack); } SC::Result SC::FileSystemIterator::enumerateNext() { Result res = enumerateNextInternal(currentEntry); if (not res) { if (::strcmp(res.message, "Iteration Finished") != 0) { errorResult = res; errorsChecked = false; } } return res; } SC::Result SC::FileSystemIterator::recurseSubdirectory() { if (options.recursive) { errorResult = Result::Error("Cannot recurseSubdirectory() with recursive==true"); errorsChecked = false; return errorResult; } return recurseSubdirectoryInternal(currentEntry); } SC::FileSystemIterator::FolderState& SC::FileSystemIterator::RecurseStack::back() { SC_ASSERT_RELEASE(currentEntry >= 0); return recursiveEntries[size_t(currentEntry)]; } void SC::FileSystemIterator::RecurseStack::pop_back() { SC_ASSERT_RELEASE(currentEntry >= 0); currentEntry--; } SC::Result SC::FileSystemIterator::RecurseStack::push_back(const FolderState& other) { if (size_t(currentEntry + 1) >= recursiveEntries.sizeInElements()) return Result::Error("FileSystemIterator - Not enough space in recurse stack"); currentEntry += 1; recursiveEntries.data()[currentEntry] = other; return Result(true); } #endif // SANE_CPP_FILESYSTEMITERATOR_IMPLEMENTATION </decltype(entry.level)></dir*></dir*></win32_find_dataw&></decltype(entry.level)></win32_find_dataw&></win32_find_dataw&>