From 2f1795640c14bfc885044166fc9ff32b4341d89a Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 22 Mar 2024 11:21:31 +0100 Subject: [PATCH 1/5] Correctly handle failing mmap on FileReader. If we try to mmap a region to high (>4GB), `makeMmappedBuffer` will throw a `MMapException`. In this case, we want to do a read in the file instead of mmap. This case is handled in `MultiPartFileReader` but was missing in `FileReader`. Fix #866 --- src/file_reader.cpp | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/file_reader.cpp b/src/file_reader.cpp index d32895b50..f456bcd95 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -199,7 +199,10 @@ const Buffer MultiPartFileReader::get_buffer(offset_t offset, zsize_t size) cons } catch(MMapException& e) #endif { - // The range is several part, or we are on Windows. + // We cannot do the mmap, for several possible reasons: + // - Mmap offset is too big (>4GB on 32 bits) + // - The range is several part + // - We are on Windows. // We will have to do some memory copies :/ // [TODO] Use Windows equivalent for mmap. auto ret_buffer = Buffer::makeBuffer(size); @@ -276,14 +279,22 @@ const Buffer FileReader::get_buffer(offset_t offset, zsize_t size) const { ASSERT(size, <=, _size); #ifdef ENABLE_USE_MMAP - offset += _offset; - int fd = _fhandle->getNativeHandle(); - return Buffer::makeBuffer(makeMmappedBuffer(fd, offset, size), size); -#else // We are on Windows. [TODO] Use Windows equivalent for mmap. - auto ret_buffer = Buffer::makeBuffer(size); - read(const_cast(ret_buffer.data()), offset, size); - return ret_buffer; + try { + auto local_offset = offset + _offset; + int fd = _fhandle->getNativeHandle(); + return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size); + } catch(MMapException& e) #endif + { + // We cannot do the mmap, for several possible reasons: + // - Mmap offset is too big (>4GB on 32 bits) + // - We are on Windows. + // We will have to do some memory copies :/ + // [TODO] Use Windows equivalent for mmap. + auto ret_buffer = Buffer::makeBuffer(size); + read(const_cast(ret_buffer.data()), offset, size); + return ret_buffer; + } } std::unique_ptr From 5484fcbeca2d4b667ba7078f25138f60cfa9c91d Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 29 Mar 2024 11:27:25 +0100 Subject: [PATCH 2/5] Fix explicit constructors. --- src/file_reader.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/file_reader.h b/src/file_reader.h index 36c3a7417..f5234e41c 100644 --- a/src/file_reader.h +++ b/src/file_reader.h @@ -33,7 +33,7 @@ class FileReader : public Reader { typedef std::shared_ptr FileHandle; public: // functions - explicit FileReader(FileHandle fh, offset_t offset, zsize_t size); + FileReader(FileHandle fh, offset_t offset, zsize_t size); ~FileReader() = default; zsize_t size() const { return _size; }; @@ -56,7 +56,7 @@ class FileReader : public Reader { class MultiPartFileReader : public Reader { public: - MultiPartFileReader(std::shared_ptr source); + explicit MultiPartFileReader(std::shared_ptr source); ~MultiPartFileReader() {}; zsize_t size() const { return _size; }; From a77ab8b507c1fd4b771b26d755953c9d90eeb7a2 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 29 Mar 2024 11:51:29 +0100 Subject: [PATCH 3/5] Introduce `BaseFileReader`. This commit move only the _size and _offset (and accessors) in BaseFileReader. --- src/file_reader.cpp | 10 ++++------ src/file_reader.h | 27 +++++++++++++++------------ 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/file_reader.cpp b/src/file_reader.cpp index f456bcd95..3ec575917 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -53,9 +53,8 @@ MultiPartFileReader::MultiPartFileReader(std::shared_ptr sou : MultiPartFileReader(source, offset_t(0), source->fsize()) {} MultiPartFileReader::MultiPartFileReader(std::shared_ptr source, offset_t offset, zsize_t size) - : source(source), - _offset(offset), - _size(size) + : BaseFileReader(offset, size), + source(source) { ASSERT(offset.v, <=, source->fsize().v); ASSERT(offset.v+size.v, <=, source->fsize().v); @@ -229,9 +228,8 @@ std::unique_ptr MultiPartFileReader::sub_reader(offset_t offset, z //////////////////////////////////////////////////////////////////////////////// FileReader::FileReader(FileHandle fh, offset_t offset, zsize_t size) - : _fhandle(fh) - , _offset(offset) - , _size(size) + : BaseFileReader(offset, size) + , _fhandle(fh) { } diff --git a/src/file_reader.h b/src/file_reader.h index f5234e41c..a6650c3e0 100644 --- a/src/file_reader.h +++ b/src/file_reader.h @@ -28,7 +28,20 @@ namespace zim { class FileCompound; -class FileReader : public Reader { +class BaseFileReader : public Reader { + public: // functions + BaseFileReader(offset_t offset, zsize_t size) + : _offset(offset), _size(size) {} + ~BaseFileReader() = default; + zsize_t size() const { return _size; }; + offset_t offset() const { return _offset; }; + + protected: // data + offset_t _offset; + zsize_t _size; +}; + +class FileReader : public BaseFileReader { public: // types typedef std::shared_ptr FileHandle; @@ -36,9 +49,6 @@ class FileReader : public Reader { FileReader(FileHandle fh, offset_t offset, zsize_t size); ~FileReader() = default; - zsize_t size() const { return _size; }; - offset_t offset() const { return _offset; }; - char read(offset_t offset) const; void read(char* dest, offset_t offset, zsize_t size) const; const Buffer get_buffer(offset_t offset, zsize_t size) const; @@ -50,18 +60,13 @@ class FileReader : public Reader { // by a sub_reader (otherwise the file handle would be invalidated by // FD destructor when the sub-reader is destroyed). FileHandle _fhandle; - offset_t _offset; - zsize_t _size; }; -class MultiPartFileReader : public Reader { +class MultiPartFileReader : public BaseFileReader { public: explicit MultiPartFileReader(std::shared_ptr source); ~MultiPartFileReader() {}; - zsize_t size() const { return _size; }; - offset_t offset() const { return _offset; }; - char read(offset_t offset) const; void read(char* dest, offset_t offset, zsize_t size) const; const Buffer get_buffer(offset_t offset, zsize_t size) const; @@ -72,8 +77,6 @@ class MultiPartFileReader : public Reader { MultiPartFileReader(std::shared_ptr source, offset_t offset, zsize_t size); std::shared_ptr source; - offset_t _offset; - zsize_t _size; }; }; From e333968431fdcb75a73e7571a373a9775f69caf6 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 29 Mar 2024 11:49:58 +0100 Subject: [PATCH 4/5] Introduce `BaseFileReader::get_mmap_buffer`. BaseFileReader is now responsible for trying to get a mmap and then fallback to reading in file. --- src/file_reader.cpp | 62 +++++++++++++++++++++------------------------ src/file_reader.h | 12 ++++++--- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/file_reader.cpp b/src/file_reader.cpp index 3ec575917..7a1d06b0b 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -177,24 +177,11 @@ makeMmappedBuffer(int fd, offset_t offset, zsize_t size) } // unnamed namespace #endif // ENABLE_USE_MMAP -const Buffer MultiPartFileReader::get_buffer(offset_t offset, zsize_t size) const { +const Buffer BaseFileReader::get_buffer(offset_t offset, zsize_t size) const { ASSERT(size, <=, _size); #ifdef ENABLE_USE_MMAP try { - auto found_range = source->locate(_offset+offset, size); - auto first_part_containing_it = found_range.first; - if (++first_part_containing_it != found_range.second) { - throw MMapException(); - } - - // The range is in only one part - auto range = found_range.first->first; - auto part = found_range.first->second; - auto logical_local_offset = offset + _offset - range.min; - ASSERT(size, <=, part->size()); - int fd = part->fhandle().getNativeHandle(); - auto physical_local_offset = logical_local_offset + part->offset(); - return Buffer::makeBuffer(makeMmappedBuffer(fd, physical_local_offset, size), size); + return get_mmap_buffer(offset, size); } catch(MMapException& e) #endif { @@ -210,6 +197,27 @@ const Buffer MultiPartFileReader::get_buffer(offset_t offset, zsize_t size) cons } } +const Buffer MultiPartFileReader::get_mmap_buffer(offset_t offset, zsize_t size) const { +#ifdef ENABLE_USE_MMAP + auto found_range = source->locate(_offset + offset, size); + auto first_part_containing_it = found_range.first; + if (++first_part_containing_it != found_range.second) { + throw MMapException(); + } + + // The range is in only one part + auto range = found_range.first->first; + auto part = found_range.first->second; + auto logical_local_offset = offset + _offset - range.min; + ASSERT(size, <=, part->size()); + int fd = part->fhandle().getNativeHandle(); + auto physical_local_offset = logical_local_offset + part->offset(); + return Buffer::makeBuffer(makeMmappedBuffer(fd, physical_local_offset, size), size); +#else + return Buffer::makeBuffer(size); // unreachable +#endif +} + bool Reader::can_read(offset_t offset, zsize_t size) const { return (offset.v <= this->size().v && (offset.v+size.v) <= this->size().v); @@ -273,26 +281,14 @@ void FileReader::read(char* dest, offset_t offset, zsize_t size) const }; } -const Buffer FileReader::get_buffer(offset_t offset, zsize_t size) const -{ - ASSERT(size, <=, _size); +const Buffer FileReader::get_mmap_buffer(offset_t offset, zsize_t size) const { #ifdef ENABLE_USE_MMAP - try { - auto local_offset = offset + _offset; - int fd = _fhandle->getNativeHandle(); - return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size); - } catch(MMapException& e) + auto local_offset = offset + _offset; + int fd = _fhandle->getNativeHandle(); + return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size); +#else + return Buffer::makeBuffer(size); // unreachable #endif - { - // We cannot do the mmap, for several possible reasons: - // - Mmap offset is too big (>4GB on 32 bits) - // - We are on Windows. - // We will have to do some memory copies :/ - // [TODO] Use Windows equivalent for mmap. - auto ret_buffer = Buffer::makeBuffer(size); - read(const_cast(ret_buffer.data()), offset, size); - return ret_buffer; - } } std::unique_ptr diff --git a/src/file_reader.h b/src/file_reader.h index a6650c3e0..ae366c764 100644 --- a/src/file_reader.h +++ b/src/file_reader.h @@ -36,6 +36,10 @@ class BaseFileReader : public Reader { zsize_t size() const { return _size; }; offset_t offset() const { return _offset; }; + virtual const Buffer get_mmap_buffer(offset_t offset, + zsize_t size) const = 0; + const Buffer get_buffer(offset_t offset, zsize_t size) const; + protected: // data offset_t _offset; zsize_t _size; @@ -50,9 +54,9 @@ class FileReader : public BaseFileReader { ~FileReader() = default; char read(offset_t offset) const; - void read(char* dest, offset_t offset, zsize_t size) const; - const Buffer get_buffer(offset_t offset, zsize_t size) const; + void read(char *dest, offset_t offset, zsize_t size) const; + const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const; std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; private: // data @@ -68,9 +72,9 @@ class MultiPartFileReader : public BaseFileReader { ~MultiPartFileReader() {}; char read(offset_t offset) const; - void read(char* dest, offset_t offset, zsize_t size) const; - const Buffer get_buffer(offset_t offset, zsize_t size) const; + void read(char *dest, offset_t offset, zsize_t size) const; + const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const; std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; private: From 916afdf25e45c67888ca79ebad76ac2c414e2797 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 29 Mar 2024 12:33:16 +0100 Subject: [PATCH 5/5] Throw MMapException if mmap fail. --- src/file_reader.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/file_reader.cpp b/src/file_reader.cpp index 7a1d06b0b..6dae460c8 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -146,11 +146,13 @@ mmapReadOnly(int fd, offset_type offset, size_type size) #endif const auto p = (char*)mmap(NULL, size, PROT_READ, MAP_FLAGS, fd, offset); - if (p == MAP_FAILED) - throw std::runtime_error(Formatter() - << "Cannot mmap size " << size << " at off " - << offset << " : " << strerror(errno)); - + if (p == MAP_FAILED) { + // mmap may fails for a lot of reason. + // Most of them (io error, too big size...) may not recoverable but some of + // them may be relative to mmap only and a "simple" read from the file would work. + // Let's throw a MMapException to fallback to read (and potentially fail again there). + throw MMapException(); + } return p; } @@ -189,7 +191,8 @@ const Buffer BaseFileReader::get_buffer(offset_t offset, zsize_t size) const { // - Mmap offset is too big (>4GB on 32 bits) // - The range is several part // - We are on Windows. - // We will have to do some memory copies :/ + // - Mmap itself has failed + // We will have to do some memory copies (or fail trying to) :/ // [TODO] Use Windows equivalent for mmap. auto ret_buffer = Buffer::makeBuffer(size); read(const_cast(ret_buffer.data()), offset, size);