Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly handle failing mmap on FileReader. #867

Merged
merged 5 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 45 additions & 37 deletions src/file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@
: MultiPartFileReader(source, offset_t(0), source->fsize()) {}

MultiPartFileReader::MultiPartFileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size)
: source(source),
_offset(offset),
_size(size)
: BaseFileReader(offset, size),
source(source)
{
ASSERT(offset.v, <=, source->fsize().v);
ASSERT(offset.v+size.v, <=, source->fsize().v);
Expand Down Expand Up @@ -147,11 +146,13 @@
#endif

const auto p = (char*)mmap(NULL, size, PROT_READ, MAP_FLAGS, fd, offset);
if (p == MAP_FAILED)
throw std::runtime_error(Formatter()
<< "Cannot mmap size " << size << " at off "
<< offset << " : " << strerror(errno));

if (p == MAP_FAILED) {
// mmap may fails for a lot of reason.
// Most of them (io error, too big size...) may not recoverable but some of
// them may be relative to mmap only and a "simple" read from the file would work.
// Let's throw a MMapException to fallback to read (and potentially fail again there).
throw MMapException();

Check warning on line 154 in src/file_reader.cpp

View check run for this annotation

Codecov / codecov/patch

src/file_reader.cpp#L154

Added line #L154 was not covered by tests
}
return p;
}

Expand All @@ -178,36 +179,48 @@
} // unnamed namespace
#endif // ENABLE_USE_MMAP

const Buffer MultiPartFileReader::get_buffer(offset_t offset, zsize_t size) const {
const Buffer BaseFileReader::get_buffer(offset_t offset, zsize_t size) const {
ASSERT(size, <=, _size);
#ifdef ENABLE_USE_MMAP
try {
auto found_range = source->locate(_offset+offset, size);
auto first_part_containing_it = found_range.first;
if (++first_part_containing_it != found_range.second) {
throw MMapException();
}

// The range is in only one part
auto range = found_range.first->first;
auto part = found_range.first->second;
auto logical_local_offset = offset + _offset - range.min;
ASSERT(size, <=, part->size());
int fd = part->fhandle().getNativeHandle();
auto physical_local_offset = logical_local_offset + part->offset();
return Buffer::makeBuffer(makeMmappedBuffer(fd, physical_local_offset, size), size);
return get_mmap_buffer(offset, size);
} catch(MMapException& e)
#endif
{
// The range is several part, or we are on Windows.
// We will have to do some memory copies :/
// We cannot do the mmap, for several possible reasons:
// - Mmap offset is too big (>4GB on 32 bits)
// - The range is several part
// - We are on Windows.
// - Mmap itself has failed
// We will have to do some memory copies (or fail trying to) :/
// [TODO] Use Windows equivalent for mmap.
auto ret_buffer = Buffer::makeBuffer(size);
read(const_cast<char*>(ret_buffer.data()), offset, size);
return ret_buffer;
}
}

const Buffer MultiPartFileReader::get_mmap_buffer(offset_t offset, zsize_t size) const {
#ifdef ENABLE_USE_MMAP
auto found_range = source->locate(_offset + offset, size);
auto first_part_containing_it = found_range.first;
if (++first_part_containing_it != found_range.second) {
throw MMapException();
}

// The range is in only one part
auto range = found_range.first->first;
auto part = found_range.first->second;
auto logical_local_offset = offset + _offset - range.min;
ASSERT(size, <=, part->size());
int fd = part->fhandle().getNativeHandle();
auto physical_local_offset = logical_local_offset + part->offset();
return Buffer::makeBuffer(makeMmappedBuffer(fd, physical_local_offset, size), size);
#else
return Buffer::makeBuffer(size); // unreachable
#endif
}

bool Reader::can_read(offset_t offset, zsize_t size) const
{
return (offset.v <= this->size().v && (offset.v+size.v) <= this->size().v);
Expand All @@ -226,9 +239,8 @@
////////////////////////////////////////////////////////////////////////////////

FileReader::FileReader(FileHandle fh, offset_t offset, zsize_t size)
: _fhandle(fh)
, _offset(offset)
, _size(size)
: BaseFileReader(offset, size)
, _fhandle(fh)
{
}

Expand Down Expand Up @@ -272,17 +284,13 @@
};
}

const Buffer FileReader::get_buffer(offset_t offset, zsize_t size) const
{
ASSERT(size, <=, _size);
const Buffer FileReader::get_mmap_buffer(offset_t offset, zsize_t size) const {
#ifdef ENABLE_USE_MMAP
offset += _offset;
auto local_offset = offset + _offset;
int fd = _fhandle->getNativeHandle();
return Buffer::makeBuffer(makeMmappedBuffer(fd, offset, size), size);
#else // We are on Windows. [TODO] Use Windows equivalent for mmap.
auto ret_buffer = Buffer::makeBuffer(size);
read(const_cast<char*>(ret_buffer.data()), offset, size);
return ret_buffer;
return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size);
#else
return Buffer::makeBuffer(size); // unreachable
#endif
}

Expand Down
43 changes: 25 additions & 18 deletions src/file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,52 +28,59 @@ namespace zim {

class FileCompound;

class FileReader : public Reader {
class BaseFileReader : public Reader {
public: // functions
BaseFileReader(offset_t offset, zsize_t size)
: _offset(offset), _size(size) {}
~BaseFileReader() = default;
zsize_t size() const { return _size; };
offset_t offset() const { return _offset; };

virtual const Buffer get_mmap_buffer(offset_t offset,
zsize_t size) const = 0;
const Buffer get_buffer(offset_t offset, zsize_t size) const;

protected: // data
offset_t _offset;
zsize_t _size;
};

class FileReader : public BaseFileReader {
public: // types
typedef std::shared_ptr<const DEFAULTFS::FD> FileHandle;

public: // functions
explicit FileReader(FileHandle fh, offset_t offset, zsize_t size);
FileReader(FileHandle fh, offset_t offset, zsize_t size);
~FileReader() = default;

zsize_t size() const { return _size; };
offset_t offset() const { return _offset; };

char read(offset_t offset) const;
void read(char* dest, offset_t offset, zsize_t size) const;
const Buffer get_buffer(offset_t offset, zsize_t size) const;
void read(char *dest, offset_t offset, zsize_t size) const;

const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;

private: // data
// The file handle is stored via a shared pointer so that it can be shared
// by a sub_reader (otherwise the file handle would be invalidated by
// FD destructor when the sub-reader is destroyed).
FileHandle _fhandle;
offset_t _offset;
zsize_t _size;
};

class MultiPartFileReader : public Reader {
class MultiPartFileReader : public BaseFileReader {
public:
MultiPartFileReader(std::shared_ptr<const FileCompound> source);
explicit MultiPartFileReader(std::shared_ptr<const FileCompound> source);
~MultiPartFileReader() {};

zsize_t size() const { return _size; };
offset_t offset() const { return _offset; };

char read(offset_t offset) const;
void read(char* dest, offset_t offset, zsize_t size) const;
const Buffer get_buffer(offset_t offset, zsize_t size) const;
void read(char *dest, offset_t offset, zsize_t size) const;

const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;

private:
MultiPartFileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size);

std::shared_ptr<const FileCompound> source;
offset_t _offset;
zsize_t _size;
};

};
Expand Down
Loading