Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-expose range in GetOptions #67

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/api/get.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
::: obstore.GetOptions
::: obstore.GetResult
::: obstore.Buffer
::: obstore.OffsetRange
::: obstore.SuffixRange
30 changes: 21 additions & 9 deletions obstore/python/obstore/_get.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ from ._buffer import Buffer
from ._list import ObjectMeta
from .store import ObjectStore

class OffsetRange(TypedDict):
"""Request all bytes starting from a given byte offset"""

offset: int
"""The byte offset for the offset range request."""

class SuffixRange(TypedDict):
"""Request up to the last `n` bytes"""

suffix: int
"""The number of bytes from the suffix to request."""

class GetOptions(TypedDict, total=False):
"""Options for a get request.

Expand All @@ -15,7 +27,7 @@ class GetOptions(TypedDict, total=False):
if_match: str | None
"""
Request will succeed if the `ObjectMeta::e_tag` matches
otherwise returning [`Error::Precondition`]
otherwise returning [`PreconditionError`][obstore.exceptions.PreconditionError].

See <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match>

Expand All @@ -31,7 +43,7 @@ class GetOptions(TypedDict, total=False):
if_none_match: str | None
"""
Request will succeed if the `ObjectMeta::e_tag` does not match
otherwise returning [`Error::NotModified`]
otherwise returning [`NotModifiedError`][obstore.exceptions.NotModifiedError].

See <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2>

Expand All @@ -54,18 +66,18 @@ class GetOptions(TypedDict, total=False):
if_modified_since: datetime | None
"""
Request will succeed if the object has not been modified since
otherwise returning [`Error::Precondition`]
otherwise returning [`PreconditionError`][obstore.exceptions.PreconditionError].

Some stores, such as S3, will only return `NotModified` for exact
timestamp matches, instead of for any timestamp greater than or equal.

<https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.4>
"""

# range: Tuple[int | None, int | None]
range: Tuple[int, int] | List[int] | OffsetRange | SuffixRange
"""
Request transfer of only the specified range of bytes
otherwise returning [`Error::NotModified`]
otherwise returning [`NotModifiedError`][obstore.exceptions.NotModifiedError].

The semantics of this tuple are:

Expand All @@ -78,13 +90,13 @@ class GetOptions(TypedDict, total=False):

The `end` offset is _exclusive_.

- `(int, None)`: Request all bytes starting from a given byte offset.
- `{"offset": int}`: Request all bytes starting from a given byte offset.

This is equivalent to `bytes={int}-` as an HTTP header.

- `(None, int)`: Request the last `int` bytes. Note that here, `int` is _this size
of the request_, not the byte offset. This is equivalent to `bytes=-{int}` as an
HTTP header.
- `{"suffix": int}`: Request the last `int` bytes. Note that here, `int` is _the
size of the request_, not the byte offset. This is equivalent to `bytes=-{int}`
as an HTTP header.

<https://datatracker.ietf.org/doc/html/rfc9110#name-range>
"""
Expand Down
2 changes: 2 additions & 0 deletions obstore/python/obstore/_obstore.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ from ._delete import delete_async as delete_async
from ._get import Buffer as Buffer
from ._get import GetOptions as GetOptions
from ._get import GetResult as GetResult
from ._get import OffsetRange as OffsetRange
from ._get import SuffixRange as SuffixRange
from ._get import get as get
from ._get import get_async as get_async
from ._get import get_range as get_range
Expand Down
64 changes: 42 additions & 22 deletions obstore/src/get.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ pub(crate) struct PyGetOptions {
if_none_match: Option<String>,
if_modified_since: Option<DateTime<Utc>>,
if_unmodified_since: Option<DateTime<Utc>>,
// Taking out of public API until we can decide the right way to expose this
// range: Option<PyGetRange>,
range: Option<PyGetRange>,
version: Option<String>,
head: bool,
}
Expand All @@ -49,7 +48,7 @@ impl<'py> FromPyObject<'py> for PyGetOptions {
.get("if_unmodified_since")
.map(|x| x.extract())
.transpose()?,
// range: dict.get("range").map(|x| x.extract()).transpose()?,
range: dict.get("range").map(|x| x.extract()).transpose()?,
version: dict.get("version").map(|x| x.extract()).transpose()?,
head: dict
.get("head")
Expand All @@ -67,37 +66,58 @@ impl From<PyGetOptions> for GetOptions {
if_none_match: value.if_none_match,
if_modified_since: value.if_modified_since,
if_unmodified_since: value.if_unmodified_since,
range: Default::default(),
range: value.range.map(|inner| inner.0),
version: value.version,
head: value.head,
}
}
}

#[allow(dead_code)]
#[derive(FromPyObject)]
pub(crate) struct PyOffsetRange {
#[pyo3(item)]
offset: usize,
}

impl From<PyOffsetRange> for GetRange {
fn from(value: PyOffsetRange) -> Self {
GetRange::Offset(value.offset)
}
}

#[derive(FromPyObject)]
pub(crate) struct PySuffixRange {
#[pyo3(item)]
suffix: usize,
}

impl From<PySuffixRange> for GetRange {
fn from(value: PySuffixRange) -> Self {
GetRange::Suffix(value.suffix)
}
}

pub(crate) struct PyGetRange(GetRange);

// TODO: think of a better API here so that the distinction between each of these is easy to
// understand.
// Allowed input:
// - [usize, usize] to refer to a bounded range from start to end (exclusive)
// - {"offset": usize} to request all bytes starting from a given byte offset
// - {"suffix": usize} to request the last `n` bytes
impl<'py> FromPyObject<'py> for PyGetRange {
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
let range = ob.extract::<[Option<usize>; 2]>()?;
match (range[0], range[1]) {
(Some(start), Some(end)) => {
if start >= end {
return Err(PyValueError::new_err(
format!("End range must be strictly greater than start range. Got start: {}, end: {}", start, end ),
));
}

Ok(Self(GetRange::Bounded(start..end)))
}
(Some(start), None) => Ok(Self(GetRange::Offset(start))),
// Note: in this case `end` means `suffix bytes`
(None, Some(end)) => Ok(Self(GetRange::Suffix(end))),
(None, None) => Err(PyValueError::new_err(
"Cannot provide (None, None) for range.",
)),
if let Ok(bounded) = ob.extract::<[usize; 2]>() {
Ok(Self(GetRange::Bounded(bounded[0]..bounded[1])))
} else if let Ok(offset_range) = ob.extract::<PyOffsetRange>() {
Ok(Self(offset_range.into()))
} else if let Ok(suffix_range) = ob.extract::<PySuffixRange>() {
Ok(Self(suffix_range.into()))
} else {
// dbg!(ob);
// let x = ob.extract::<PyOffsetRange>()?;
// dbg!(x.offset);
Err(PyValueError::new_err("Unexpected input for byte range.\nExpected two-integer tuple or list, or dict with 'offset' or 'suffix' key." ))
}
}
}
Expand Down
37 changes: 36 additions & 1 deletion tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ async def test_stream_async():
assert pos == len(data)


@pytest.mark.skip("Skip until we restore range in get_options")
def test_get_with_options():
store = MemoryStore()

Expand All @@ -61,6 +60,42 @@ def test_get_with_options():
buf = result.bytes()
assert buf == data[5:10]

# Test list input
result = obs.get(store, path, options={"range": [5, 10]})
assert result.range == (5, 10)
buf = result.bytes()
assert buf == data[5:10]


def test_get_with_options_offset():
store = MemoryStore()

data = b"the quick brown fox jumps over the lazy dog," * 100
path = "big-data.txt"

obs.put(store, path, data)

result = obs.get(store, path, options={"range": {"offset": 100}})
result_range = result.range
assert result_range == (100, 4400)
buf = result.bytes()
assert buf == data[result_range[0] : result_range[1]]


def test_get_with_options_suffix():
store = MemoryStore()

data = b"the quick brown fox jumps over the lazy dog," * 100
path = "big-data.txt"

obs.put(store, path, data)

result = obs.get(store, path, options={"range": {"suffix": 100}})
result_range = result.range
assert result_range == (4300, 4400)
buf = result.bytes()
assert buf == data[result_range[0] : result_range[1]]


def test_get_range():
store = MemoryStore()
Expand Down