Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove mutex locks from Zip/7z backed archives #1916

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions rts/System/FileSystem/ArchiveScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,8 +977,10 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive
// just a file, can MT
numParallelFileReads = isOnSpinningDisk ? NUM_PARALLEL_FILE_READS_SD : ThreadPool::GetNumThreads();
} break;
case ARCHIVE_TYPE_SDZ: [[fallthrough]]; // mutex locked, not thread safe, makes no sense to throw more threads on it
case ARCHIVE_TYPE_SD7: [[fallthrough]]; // mutex locked, not thread safe, makes no sense to throw more threads on it
case ARCHIVE_TYPE_SDZ: [[fallthrough]];
case ARCHIVE_TYPE_SD7:
numParallelFileReads = ThreadPool::GetNumThreads(); // will open NumThreads parallel archives, this way GetFile() is no longer mutex locked
break;
default: // just default to 1 thread
numParallelFileReads = 1;
break;
Expand Down Expand Up @@ -1034,7 +1036,7 @@ bool CArchiveScanner::GetArchiveChecksum(const std::string& archiveName, Archive
tasks.emplace_back(ThreadPool::Enqueue(ComputeHashesTask, i));
}

const auto erasePredicate = [](decltype(tasks)::value_type item) {
const auto erasePredicate = [](decltype(tasks)::value_type& item) {
using namespace std::chrono_literals;
return item.wait_for(0us) == std::future_status::ready;
};
Expand Down
68 changes: 38 additions & 30 deletions rts/System/FileSystem/Archives/BufferedArchive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,24 @@

CBufferedArchive::~CBufferedArchive()
{
// filter archives for which only {map,mod}info.lua was accessed
if (cacheSize <= 1 || fileCount <= 1)
return;
uint32_t cachedSize = 0;
uint32_t uncachedSize = 0;
uint32_t fileCount = 0;

LOG_L(L_INFO, "[%s][name=%s] %u bytes cached in %u files", __func__, archiveFile.c_str(), cacheSize, fileCount);
for (const auto& [numAccessed, gotBuffered, fileData] : fileCache) {
if (gotBuffered) {
cachedSize += fileData.size();
fileCount++;
} else {
uncachedSize += fileData.size();
}
}

LOG_L(L_INFO, "[%s][name=%s] %u bytes cached in %u files, %u bytes cached in %u files",
__func__, archiveFile.c_str(),
cachedSize, fileCount,
uncachedSize, static_cast<uint32_t>(fileCache.size() - fileCount)
);
}

bool CBufferedArchive::GetFile(unsigned int fid, std::vector<std::uint8_t>& buffer)
Expand All @@ -24,41 +37,36 @@ bool CBufferedArchive::GetFile(unsigned int fid, std::vector<std::uint8_t>& buff

if (!globalConfig.vfsCacheArchiveFiles || noCache) {
if ((ret = GetFileImpl(fid, buffer)) != 1)
LOG_L(L_WARNING, "[BufferedArchive::%s(fid=%u)][noCache=%d,vfsCache=%d] name=%s ret=%d size=" _STPF_, __func__, fid, static_cast<int>(noCache), static_cast<int>(globalConfig.vfsCacheArchiveFiles), archiveFile.c_str(), ret, buffer.size());
LOG_L(L_ERROR, "[BufferedArchive::%s(fid=%u)][noCache=%d,vfsCache=%d] name=%s ret=%d size=" _STPF_, __func__, fid, static_cast<int>(noCache), static_cast<int>(globalConfig.vfsCacheArchiveFiles), archiveFile.c_str(), ret, buffer.size());

return (ret == 1);
}

// NumFiles is virtual, can't do this in ctor
if (fileCache.empty())
fileCache.resize(NumFiles());
{
std::scoped_lock lck(mutex);
if (fileCache.empty())
fileCache.resize(NumFiles());
}

FileBuffer& fb = fileCache.at(fid);
// numAccessed/gotBuffered are not atomic, and simultaneous access to the same fid will cause issues
// however, the access pattern is such that each thread accesses a different fid, so this should be fine
auto& [numAccessed, gotBuffered, fileData] = fileCache[fid];

fb.numAccessed++;
if (!fb.populated) {
if (fb.numAccessed > 1) {
fb.exists = ((ret = GetFileImpl(fid, fb.data)) == 1);
fb.populated = true;
numAccessed++;

cacheSize += fb.data.size();
fileCount += fb.exists;
}
else { // most files are only accessed once, don't bother with those
ret = GetFileImpl(fid, buffer);
return (ret == 1);
}
}

if (!fb.exists) {
LOG_L(L_WARNING, "[BufferedArchive::%s(fid=%u)][!fb.exists] name=%s ret=%d size=" _STPF_, __func__, fid, archiveFile.c_str(), ret, fb.data.size());
return false;
if (gotBuffered) {
buffer.assign(fileData.begin(), fileData.end());
return true;
}

if (buffer.size() != fb.data.size())
buffer.resize(fb.data.size());
if ((ret = GetFileImpl(fid, buffer)) != 1)
LOG_L(L_ERROR, "[BufferedArchive::%s(fid=%u)][noCache=%d,vfsCache=%d] name=%s ret=%d size=" _STPF_, __func__, fid, static_cast<int>(noCache), static_cast<int>(globalConfig.vfsCacheArchiveFiles), archiveFile.c_str(), ret, buffer.size());

// TODO: zero-copy access
std::copy(fb.data.begin(), fb.data.end(), buffer.begin());
return true;
if (numAccessed == 2 && (ret == 1)) {
fileData.assign(buffer.begin(), buffer.end());
gotBuffered = true;
}

return (ret == 1);
}
31 changes: 8 additions & 23 deletions rts/System/FileSystem/Archives/BufferedArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#ifndef _BUFFERED_ARCHIVE_H
#define _BUFFERED_ARCHIVE_H

#include <tuple>

#include "IArchive.h"
#include "System/Threading/SpringThreading.h"

Expand All @@ -17,36 +19,19 @@ class CBufferedArchive : public IArchive
noCache = !cached;
}

virtual ~CBufferedArchive();
~CBufferedArchive() override;

virtual int GetType() const override { return ARCHIVE_TYPE_BUF; }
int GetType() const override { return ARCHIVE_TYPE_BUF; }

bool GetFile(unsigned int fid, std::vector<std::uint8_t>& buffer) override;
bool GetFile(uint32_t fid, std::vector<std::uint8_t>& buffer) override;

protected:
virtual int GetFileImpl(unsigned int fid, std::vector<std::uint8_t>& buffer) = 0;

struct FileBuffer {
FileBuffer() = default;
FileBuffer(const FileBuffer& fb) = delete;
FileBuffer(FileBuffer&& fb) { *this = std::move(fb); }

FileBuffer& operator = (const FileBuffer& fb) = delete;
FileBuffer& operator = (FileBuffer&& fb) = default;

uint32_t numAccessed = 0;
bool populated = false; // files may be empty (0 bytes)
bool exists = false;

std::vector<std::uint8_t> data;
};
virtual int GetFileImpl(uint32_t fid, std::vector<std::uint8_t>& buffer) = 0;

// indexed by file-id
std::vector<FileBuffer> fileCache;
std::vector<std::tuple<uint32_t, bool, std::vector<uint8_t>>> fileCache = {};
private:
uint32_t cacheSize = 0;
uint32_t fileCount = 0;

spring::spinlock mutex;
bool noCache = false;
};

Expand Down
45 changes: 35 additions & 10 deletions rts/System/FileSystem/Archives/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
set(SOURCE_ROOT ../../..)

include_directories(${SOURCE_ROOT})
include_directories(${SPRING_MINIZIP_INCLUDE_DIR})

add_definitions(${PIC_FLAG})
add_library(archives STATIC
set(archives_sources
BufferedArchive.cpp
DirArchive.cpp
IArchive.cpp
Expand All @@ -13,10 +9,39 @@ add_library(archives STATIC
VirtualArchive.cpp
ZipArchive.cpp
${sources_engine_System_Log}
${sources_engine_System_Log_sinkConsole}
)
${sources_engine_System_Log_sinkConsole})

# Can't remove definitions per target
remove_definitions(-DTHREADPOOL)

# This double library below is needed because engine build is cursed:
# - It wants to within a single CMake invocation build 4 different variants
# of the same code: dedicated, unitsync, headless, legacy. All libraries
# that need different properties need to then be create in multiple variants
# - There is extremely poor separation of different engine pieces into well
# defined libraries (because of the previous problem) and main targets are
# just created by copying massive lists of source files which allows for
# a spaghetti of inter dependencies with everything linking against
# everything.
#
# And that implies:
# - We need to define 2 separate targets with different compilation options
# because it will be linked against targets with different requirements.
# - Both of those static libraries are *not* self contained, but require the
# targets it will be linked against to export compatible symbols.

add_library(archives STATIC ${archives_sources})
if (THREADPOOL)
target_compile_definitions(archives PRIVATE -DTHREADPOOL)
endif()
target_include_directories(archives
PRIVATE ${SOURCE_ROOT} ${SPRING_MINIZIP_INCLUDE_DIR})
target_link_libraries(archives
7zip
${SPRING_MINIZIP_LIBRARY}
)
PRIVATE 7zip ${SPRING_MINIZIP_LIBRARY} Tracy::TracyClient)

add_library(archives_nothreadpool STATIC ${archives_sources})
target_compile_options(archives_nothreadpool PRIVATE ${PIC_FLAG})
target_include_directories(archives_nothreadpool
PRIVATE ${SOURCE_ROOT} ${SPRING_MINIZIP_INCLUDE_DIR})
target_link_libraries(archives_nothreadpool
PRIVATE 7zip ${SPRING_MINIZIP_LIBRARY})
28 changes: 14 additions & 14 deletions rts/System/FileSystem/Archives/IArchive.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* @brief Abstraction of different archive types
*
* Loosely resembles STL container:
* for (unsigned int fid = 0; fid < NumFiles(); ++fid) {
* for (uint32_t fid = 0; fid < NumFiles(); ++fid) {
* //stuff
* }
*/
Expand All @@ -37,12 +37,12 @@ class IArchive
* @return The amount of files in the archive, does not change during
* lifetime
*/
virtual unsigned int NumFiles() const = 0;
virtual uint32_t NumFiles() const = 0;
/**
* Returns whether the supplied fileId is valid and available in this
* archive.
*/
inline bool IsFileId(unsigned int fileId) const {
inline bool IsFileId(uint32_t fileId) const {
return (fileId < NumFiles());
}
/**
Expand All @@ -60,39 +60,39 @@ class IArchive
* @param filePath VFS path to the file, for example "maps/myMap.smf"
* @return fileID of the file, NumFiles() if not found
*/
unsigned int FindFile(const std::string& filePath) const;
uint32_t FindFile(const std::string& filePath) const;
/**
* Fetches the content of a file by its ID.
* @param fid file ID in [0, NumFiles())
* @param buffer on success, this will be filled with the contents
* of the file
* @return true if the file was found, and its contents have been
* successfully read into buffer
* @see GetFile(unsigned int fid, std::vector<std::uint8_t>& buffer)
* @see GetFile(uint32_t fid, std::vector<std::uint8_t>& buffer)
*/
virtual bool GetFile(unsigned int fid, std::vector<std::uint8_t>& buffer) = 0;
virtual bool GetFile(uint32_t fid, std::vector<std::uint8_t>& buffer) = 0;
/**
* Fetches the content of a file by its name.
* @param name VFS path to the file, for example "maps/myMap.smf"
* @param buffer on success, this will be filled with the contents
* of the file
* @return true if the file was found, and its contents have been
* successfully read into buffer
* @see GetFile(unsigned int fid, std::vector<std::uint8_t>& buffer)
* @see GetFile(uint32_t fid, std::vector<std::uint8_t>& buffer)
*/
bool GetFile(const std::string& name, std::vector<std::uint8_t>& buffer);

std::pair<std::string, int> FileInfo(unsigned int fid) const {
std::pair<std::string, int> FileInfo(uint32_t fid) const {
std::pair<std::string, int> info;
FileInfo(fid, info.first, info.second);
return info;
}

unsigned int ExtractedSize() const {
unsigned int size = 0;
uint32_t ExtractedSize() const {
uint32_t size = 0;

// no archive should be larger than 4GB when extracted
for (unsigned int fid = 0; fid < NumFiles(); fid++) {
for (uint32_t fid = 0; fid < NumFiles(); fid++) {
size += (FileInfo(fid).second);
}

Expand All @@ -102,7 +102,7 @@ class IArchive
/**
* Fetches the name and size in bytes of a file by its ID.
*/
virtual void FileInfo(unsigned int fid, std::string& name, int& size) const = 0;
virtual void FileInfo(uint32_t fid, std::string& name, int& size) const = 0;

/**
* Returns true if the cost of reading the file is qualitatively relative
Expand All @@ -115,7 +115,7 @@ class IArchive
* Most implementations may always return true.
* @return true if cost is ~ relative to its file-size
*/
virtual bool HasLowReadingCost(unsigned int fid) const { return true; }
virtual bool HasLowReadingCost(uint32_t fid) const { return true; }

/**
* @return true if archive type can be packed solid (which is VERY slow when reading)
Expand All @@ -131,7 +131,7 @@ class IArchive
// Spring expects the contents of archives to be case-independent
// this map (which must be populated by subclass archives) is kept
// to allow converting back from lowercase to original case
spring::unordered_map<std::string, unsigned int> lcNameIndex;
spring::unordered_map<std::string, uint32_t> lcNameIndex;

protected:
/// "ExampleArchive.sdd"
Expand Down
Loading