From 136ed1496fe69033045c414afd2631e7babb82fe Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Fri, 1 Nov 2024 15:20:32 -0400 Subject: [PATCH] Re-expose `range` in `GetOptions` --- docs/api/get.md | 2 + obstore/python/obstore/_get.pyi | 30 ++++++++++---- obstore/python/obstore/_obstore.pyi | 2 + obstore/src/get.rs | 64 +++++++++++++++++++---------- tests/test_get.py | 37 ++++++++++++++++- 5 files changed, 103 insertions(+), 32 deletions(-) diff --git a/docs/api/get.md b/docs/api/get.md index ccf6d0f..eb201e0 100644 --- a/docs/api/get.md +++ b/docs/api/get.md @@ -9,3 +9,5 @@ ::: obstore.GetOptions ::: obstore.GetResult ::: obstore.Buffer +::: obstore.OffsetRange +::: obstore.SuffixRange diff --git a/obstore/python/obstore/_get.pyi b/obstore/python/obstore/_get.pyi index c099512..1ba835c 100644 --- a/obstore/python/obstore/_get.pyi +++ b/obstore/python/obstore/_get.pyi @@ -6,6 +6,18 @@ from ._buffer import Buffer from ._list import ObjectMeta from .store import ObjectStore +class OffsetRange(TypedDict): + """Request all bytes starting from a given byte offset""" + + offset: int + """The byte offset for the offset range request.""" + +class SuffixRange(TypedDict): + """Request up to the last `n` bytes""" + + suffix: int + """The number of bytes from the suffix to request.""" + class GetOptions(TypedDict, total=False): """Options for a get request. @@ -15,7 +27,7 @@ class GetOptions(TypedDict, total=False): if_match: str | None """ Request will succeed if the `ObjectMeta::e_tag` matches - otherwise returning [`Error::Precondition`] + otherwise returning [`PreconditionError`][obstore.exceptions.PreconditionError]. See @@ -31,7 +43,7 @@ class GetOptions(TypedDict, total=False): if_none_match: str | None """ Request will succeed if the `ObjectMeta::e_tag` does not match - otherwise returning [`Error::NotModified`] + otherwise returning [`NotModifiedError`][obstore.exceptions.NotModifiedError]. See @@ -54,7 +66,7 @@ class GetOptions(TypedDict, total=False): if_modified_since: datetime | None """ Request will succeed if the object has not been modified since - otherwise returning [`Error::Precondition`] + otherwise returning [`PreconditionError`][obstore.exceptions.PreconditionError]. Some stores, such as S3, will only return `NotModified` for exact timestamp matches, instead of for any timestamp greater than or equal. @@ -62,10 +74,10 @@ class GetOptions(TypedDict, total=False): """ - # range: Tuple[int | None, int | None] + range: Tuple[int, int] | List[int] | OffsetRange | SuffixRange """ Request transfer of only the specified range of bytes - otherwise returning [`Error::NotModified`] + otherwise returning [`NotModifiedError`][obstore.exceptions.NotModifiedError]. The semantics of this tuple are: @@ -78,13 +90,13 @@ class GetOptions(TypedDict, total=False): The `end` offset is _exclusive_. - - `(int, None)`: Request all bytes starting from a given byte offset. + - `{"offset": int}`: Request all bytes starting from a given byte offset. This is equivalent to `bytes={int}-` as an HTTP header. - - `(None, int)`: Request the last `int` bytes. Note that here, `int` is _this size - of the request_, not the byte offset. This is equivalent to `bytes=-{int}` as an - HTTP header. + - `{"suffix": int}`: Request the last `int` bytes. Note that here, `int` is _the + size of the request_, not the byte offset. This is equivalent to `bytes=-{int}` + as an HTTP header. """ diff --git a/obstore/python/obstore/_obstore.pyi b/obstore/python/obstore/_obstore.pyi index a5afa7b..1d04cc7 100644 --- a/obstore/python/obstore/_obstore.pyi +++ b/obstore/python/obstore/_obstore.pyi @@ -10,6 +10,8 @@ from ._delete import delete_async as delete_async from ._get import Buffer as Buffer from ._get import GetOptions as GetOptions from ._get import GetResult as GetResult +from ._get import OffsetRange as OffsetRange +from ._get import SuffixRange as SuffixRange from ._get import get as get from ._get import get_async as get_async from ._get import get_range as get_range diff --git a/obstore/src/get.rs b/obstore/src/get.rs index 20d8b8b..65b240b 100644 --- a/obstore/src/get.rs +++ b/obstore/src/get.rs @@ -27,8 +27,7 @@ pub(crate) struct PyGetOptions { if_none_match: Option, if_modified_since: Option>, if_unmodified_since: Option>, - // Taking out of public API until we can decide the right way to expose this - // range: Option, + range: Option, version: Option, head: bool, } @@ -49,7 +48,7 @@ impl<'py> FromPyObject<'py> for PyGetOptions { .get("if_unmodified_since") .map(|x| x.extract()) .transpose()?, - // range: dict.get("range").map(|x| x.extract()).transpose()?, + range: dict.get("range").map(|x| x.extract()).transpose()?, version: dict.get("version").map(|x| x.extract()).transpose()?, head: dict .get("head") @@ -67,37 +66,58 @@ impl From for GetOptions { if_none_match: value.if_none_match, if_modified_since: value.if_modified_since, if_unmodified_since: value.if_unmodified_since, - range: Default::default(), + range: value.range.map(|inner| inner.0), version: value.version, head: value.head, } } } -#[allow(dead_code)] +#[derive(FromPyObject)] +pub(crate) struct PyOffsetRange { + #[pyo3(item)] + offset: usize, +} + +impl From for GetRange { + fn from(value: PyOffsetRange) -> Self { + GetRange::Offset(value.offset) + } +} + +#[derive(FromPyObject)] +pub(crate) struct PySuffixRange { + #[pyo3(item)] + suffix: usize, +} + +impl From for GetRange { + fn from(value: PySuffixRange) -> Self { + GetRange::Suffix(value.suffix) + } +} + pub(crate) struct PyGetRange(GetRange); // TODO: think of a better API here so that the distinction between each of these is easy to // understand. +// Allowed input: +// - [usize, usize] to refer to a bounded range from start to end (exclusive) +// - {"offset": usize} to request all bytes starting from a given byte offset +// - {"suffix": usize} to request the last `n` bytes impl<'py> FromPyObject<'py> for PyGetRange { fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { - let range = ob.extract::<[Option; 2]>()?; - match (range[0], range[1]) { - (Some(start), Some(end)) => { - if start >= end { - return Err(PyValueError::new_err( - format!("End range must be strictly greater than start range. Got start: {}, end: {}", start, end ), - )); - } - - Ok(Self(GetRange::Bounded(start..end))) - } - (Some(start), None) => Ok(Self(GetRange::Offset(start))), - // Note: in this case `end` means `suffix bytes` - (None, Some(end)) => Ok(Self(GetRange::Suffix(end))), - (None, None) => Err(PyValueError::new_err( - "Cannot provide (None, None) for range.", - )), + if let Ok(bounded) = ob.extract::<[usize; 2]>() { + Ok(Self(GetRange::Bounded(bounded[0]..bounded[1]))) + } else if let Ok(offset_range) = ob.extract::() { + Ok(Self(offset_range.into())) + } else if let Ok(suffix_range) = ob.extract::() { + Ok(Self(suffix_range.into())) + } else { + // dbg!(ob); + // let x = ob.extract::()?; + // dbg!(x.offset); + Err(PyValueError::new_err("Unexpected input for byte range.\nExpected two-integer tuple or list, or dict with 'offset' or 'suffix' key." )) } } } diff --git a/tests/test_get.py b/tests/test_get.py index d1d08b9..0a06552 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -47,7 +47,6 @@ async def test_stream_async(): assert pos == len(data) -@pytest.mark.skip("Skip until we restore range in get_options") def test_get_with_options(): store = MemoryStore() @@ -61,6 +60,42 @@ def test_get_with_options(): buf = result.bytes() assert buf == data[5:10] + # Test list input + result = obs.get(store, path, options={"range": [5, 10]}) + assert result.range == (5, 10) + buf = result.bytes() + assert buf == data[5:10] + + +def test_get_with_options_offset(): + store = MemoryStore() + + data = b"the quick brown fox jumps over the lazy dog," * 100 + path = "big-data.txt" + + obs.put(store, path, data) + + result = obs.get(store, path, options={"range": {"offset": 100}}) + result_range = result.range + assert result_range == (100, 4400) + buf = result.bytes() + assert buf == data[result_range[0] : result_range[1]] + + +def test_get_with_options_suffix(): + store = MemoryStore() + + data = b"the quick brown fox jumps over the lazy dog," * 100 + path = "big-data.txt" + + obs.put(store, path, data) + + result = obs.get(store, path, options={"range": {"suffix": 100}}) + result_range = result.range + assert result_range == (4300, 4400) + buf = result.bytes() + assert buf == data[result_range[0] : result_range[1]] + def test_get_range(): store = MemoryStore()