Skip to content

Commit

Permalink
apacheGH-38857: [Python] Add append mode for pyarrow.OsFile (apache#3…
Browse files Browse the repository at this point in the history
…8820)

### Rationale for this change

Seems reasonable. :) 

### What changes are included in this PR?

Exposes the C++ append parameter from FileOutputStream  to PyArrow's OSFile.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Can add `a` or `ab` to `mode` parameter in `pyarrow.OsFile`

* Closes: apache#38857

Authored-by: Miles Granger <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
  • Loading branch information
milesgranger authored Nov 30, 2023
1 parent d555890 commit 92fe831
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 2 deletions.
3 changes: 3 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,9 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
@staticmethod
CResult[shared_ptr[COutputStream]] Open(const c_string& path)

@staticmethod
CResult[shared_ptr[COutputStream]] Open(const c_string& path, c_bool append)

int file_descriptor()

cdef cppclass ReadableFile(CRandomAccessFile):
Expand Down
24 changes: 22 additions & 2 deletions python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ cdef class NativeFile(_Weakrefable):
self.is_readable = False
self.is_writable = False
self.is_seekable = False
self._is_appending = False

def __dealloc__(self):
if self.own_file:
Expand Down Expand Up @@ -139,12 +140,15 @@ cdef class NativeFile(_Weakrefable):
* rb: binary read
* wb: binary write
* rb+: binary read and write
* ab: binary append
"""
# Emulate built-in file modes
if self.is_readable and self.is_writable:
return 'rb+'
elif self.is_readable:
return 'rb'
elif self.is_writable and self._is_appending:
return 'ab'
elif self.is_writable:
return 'wb'
else:
Expand Down Expand Up @@ -1113,6 +1117,19 @@ cdef class OSFile(NativeFile):
'rb'
b'OSFile'
Open the file to append:
>>> with pa.OSFile('example_osfile.arrow', mode='ab') as f:
... f.mode
... f.write(b' is super!')
...
'ab'
10
>>> with pa.OSFile('example_osfile.arrow') as f:
... f.read()
...
b'OSFile is super!'
Inspect created OSFile:
>>> pa.OSFile('example_osfile.arrow')
Expand All @@ -1134,6 +1151,8 @@ cdef class OSFile(NativeFile):
self._open_readable(c_path, maybe_unbox_memory_pool(memory_pool))
elif mode in ('w', 'wb'):
self._open_writable(c_path)
elif mode in ('a', 'ab'):
self._open_writable(c_path, append=True)
else:
raise ValueError('Invalid file mode: {0}'.format(mode))

Expand All @@ -1146,10 +1165,11 @@ cdef class OSFile(NativeFile):
self.is_readable = True
self.set_random_access_file(<shared_ptr[CRandomAccessFile]> handle)

cdef _open_writable(self, c_string path):
cdef _open_writable(self, c_string path, c_bool append=False):
with nogil:
self.output_stream = GetResultValue(FileOutputStream.Open(path))
self.output_stream = GetResultValue(FileOutputStream.Open(path, append))
self.is_writable = True
self._is_appending = append

def fileno(self):
self._assert_open()
Expand Down
1 change: 1 addition & 0 deletions python/pyarrow/lib.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ cdef class NativeFile(_Weakrefable):
bint is_readable
bint is_writable
bint is_seekable
bint _is_appending
bint own_file

# By implementing these "virtual" functions (all functions in Cython
Expand Down
19 changes: 19 additions & 0 deletions python/pyarrow/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,13 @@ def test_os_file_writer(tmpdir):

with pytest.raises(IOError):
f2.read(5)
f2.close()

# Append
with pa.OSFile(path, mode='ab') as f4:
f4.write(b'bar')
with pa.OSFile(path) as f5:
assert f5.size() == 6 # foo + bar


def test_native_file_write_reject_unicode():
Expand Down Expand Up @@ -1152,6 +1159,18 @@ def test_native_file_modes(tmpdir):
assert f.writable()
assert not f.seekable()

with pa.OSFile(path, mode='ab') as f:
assert f.mode == 'ab'
assert not f.readable()
assert f.writable()
assert not f.seekable()

with pa.OSFile(path, mode='a') as f:
assert f.mode == 'ab'
assert not f.readable()
assert f.writable()
assert not f.seekable()

with open(path, 'wb') as f:
f.write(b'foooo')

Expand Down

0 comments on commit 92fe831

Please sign in to comment.