From 92fe831c900bdffcd5517cf6af61aabb962bef3f Mon Sep 17 00:00:00 2001 From: Miles Date: Thu, 30 Nov 2023 18:01:20 +0000 Subject: [PATCH] GH-38857: [Python] Add append mode for pyarrow.OsFile (#38820) ### Rationale for this change Seems reasonable. :) ### What changes are included in this PR? Exposes the C++ append parameter from FileOutputStream to PyArrow's OSFile. ### Are these changes tested? Yes. ### Are there any user-facing changes? Can add `a` or `ab` to `mode` parameter in `pyarrow.OsFile` * Closes: #38857 Authored-by: Miles Granger Signed-off-by: AlenkaF --- python/pyarrow/includes/libarrow.pxd | 3 +++ python/pyarrow/io.pxi | 24 ++++++++++++++++++++++-- python/pyarrow/lib.pxd | 1 + python/pyarrow/tests/test_io.py | 19 +++++++++++++++++++ 4 files changed, 45 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index fda9d4449763e..59b63b5fb7912 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1385,6 +1385,9 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil: @staticmethod CResult[shared_ptr[COutputStream]] Open(const c_string& path) + @staticmethod + CResult[shared_ptr[COutputStream]] Open(const c_string& path, c_bool append) + int file_descriptor() cdef cppclass ReadableFile(CRandomAccessFile): diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 1908dcdfa2270..24b4e003a2cf7 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -111,6 +111,7 @@ cdef class NativeFile(_Weakrefable): self.is_readable = False self.is_writable = False self.is_seekable = False + self._is_appending = False def __dealloc__(self): if self.own_file: @@ -139,12 +140,15 @@ cdef class NativeFile(_Weakrefable): * rb: binary read * wb: binary write * rb+: binary read and write + * ab: binary append """ # Emulate built-in file modes if self.is_readable and self.is_writable: return 'rb+' elif self.is_readable: return 'rb' + elif self.is_writable and self._is_appending: + return 'ab' elif self.is_writable: return 'wb' else: @@ -1113,6 +1117,19 @@ cdef class OSFile(NativeFile): 'rb' b'OSFile' + Open the file to append: + + >>> with pa.OSFile('example_osfile.arrow', mode='ab') as f: + ... f.mode + ... f.write(b' is super!') + ... + 'ab' + 10 + >>> with pa.OSFile('example_osfile.arrow') as f: + ... f.read() + ... + b'OSFile is super!' + Inspect created OSFile: >>> pa.OSFile('example_osfile.arrow') @@ -1134,6 +1151,8 @@ cdef class OSFile(NativeFile): self._open_readable(c_path, maybe_unbox_memory_pool(memory_pool)) elif mode in ('w', 'wb'): self._open_writable(c_path) + elif mode in ('a', 'ab'): + self._open_writable(c_path, append=True) else: raise ValueError('Invalid file mode: {0}'.format(mode)) @@ -1146,10 +1165,11 @@ cdef class OSFile(NativeFile): self.is_readable = True self.set_random_access_file( handle) - cdef _open_writable(self, c_string path): + cdef _open_writable(self, c_string path, c_bool append=False): with nogil: - self.output_stream = GetResultValue(FileOutputStream.Open(path)) + self.output_stream = GetResultValue(FileOutputStream.Open(path, append)) self.is_writable = True + self._is_appending = append def fileno(self): self._assert_open() diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index ae197eca1ca6b..1440ba0750094 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -519,6 +519,7 @@ cdef class NativeFile(_Weakrefable): bint is_readable bint is_writable bint is_seekable + bint _is_appending bint own_file # By implementing these "virtual" functions (all functions in Cython diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 0c9e591ccd466..9609e4066ab4f 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -1114,6 +1114,13 @@ def test_os_file_writer(tmpdir): with pytest.raises(IOError): f2.read(5) + f2.close() + + # Append + with pa.OSFile(path, mode='ab') as f4: + f4.write(b'bar') + with pa.OSFile(path) as f5: + assert f5.size() == 6 # foo + bar def test_native_file_write_reject_unicode(): @@ -1152,6 +1159,18 @@ def test_native_file_modes(tmpdir): assert f.writable() assert not f.seekable() + with pa.OSFile(path, mode='ab') as f: + assert f.mode == 'ab' + assert not f.readable() + assert f.writable() + assert not f.seekable() + + with pa.OSFile(path, mode='a') as f: + assert f.mode == 'ab' + assert not f.readable() + assert f.writable() + assert not f.seekable() + with open(path, 'wb') as f: f.write(b'foooo')