Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add missing stat() methods to DBFSPath and WorkspacePath #144

Merged
merged 3 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion src/databricks/labs/blueprint/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import posixpath
import re
import shutil
import stat
from abc import abstractmethod
from collections.abc import Generator, Iterable, Sequence
from io import BytesIO, StringIO
Expand Down Expand Up @@ -121,7 +122,6 @@ class _DatabricksPath(Path, abc.ABC): # pylint: disable=too-many-public-methods
# Public APIs that we don't support.
as_uri = _na("as_uri")
cwd = _na("cwd")
stat = _na("stat")
chmod = _na("chmod")
lchmod = _na("lchmod")
lstat = _na("lstat")
Expand All @@ -138,6 +138,7 @@ def __new__(cls, *args, **kwargs):
# Force all initialisation to go via __init__() irrespective of the (Python-specific) base version.
return object.__new__(cls)

# pylint: disable=super-init-not-called
def __init__(self, ws: WorkspaceClient, *args: str | bytes | os.PathLike) -> None:
# We deliberately do _not_ call the super initializer because we're taking over complete responsibility for the
# implementation of the public API.
Expand Down Expand Up @@ -385,6 +386,7 @@ def with_suffix(self: P, suffix: str) -> P:
raise ValueError(msg)
return self.with_name(stem + suffix)

# pylint: disable=arguments-differ
def relative_to(self: P, *other: str | bytes | os.PathLike, walk_up: bool = False) -> P:
normalized = self.with_segments(*other)
if self.anchor != normalized.anchor:
Expand Down Expand Up @@ -691,6 +693,14 @@ def _file_info(self) -> FileInfo:
self._cached_file_info = self._ws.dbfs.get_status(self.as_posix())
return self._cached_file_info

def stat(self, *, follow_symlinks=True) -> os.stat_result:
seq: list[float] = [-1.0] * 10
seq[stat.ST_SIZE] = self._file_info.file_size or -1 # 6
seq[stat.ST_MTIME] = (
float(self._file_info.modification_time) / 1000.0 if self._file_info.modification_time else -1.0
) # 8
return os.stat_result(seq)

def is_dir(self) -> bool:
"""Return True if the path points to a DBFS directory."""
try:
Expand Down Expand Up @@ -841,6 +851,15 @@ def _object_info(self) -> ObjectInfo:
self._cached_object_info = self._ws.workspace.get_status(self.as_posix())
return self._object_info

def stat(self, *, follow_symlinks=True) -> os.stat_result:
seq: list[float] = [-1.0] * 10
seq[stat.ST_SIZE] = self._object_info.size or -1 # 6
seq[stat.ST_MTIME] = (
float(self._object_info.modified_at) / 1000.0 if self._object_info.modified_at else -1.0
) # 8
seq[stat.ST_CTIME] = float(self._object_info.created_at) / 1000.0 if self._object_info.created_at else -1.0 # 9
return os.stat_result(seq)

def is_dir(self) -> bool:
"""Return True if the path points to a directory in Databricks Workspace."""
try:
Expand Down
16 changes: 16 additions & 0 deletions tests/integration/test_paths.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import codecs
from datetime import datetime
from pathlib import Path

import pytest
Expand Down Expand Up @@ -67,6 +68,21 @@ def test_open_text_io(ws, make_random, cls):
assert not hello_txt.exists()


@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE)
def test_stat(ws, make_random, cls):
now = datetime.now().timestamp()
name = make_random()
wsp = cls(ws, f"~/{name}/a/b/c")
with_user = wsp.expanduser()
with_user.mkdir(parents=True)

hello_txt = with_user / "hello.txt"
hello_txt.write_text("Hello, World!")
if cls is WorkspacePath: # DBFSPath has no st_ctime
assert hello_txt.stat().st_ctime >= now
assert hello_txt.stat().st_mtime >= now


@pytest.mark.parametrize("cls", DATABRICKS_PATHLIKE)
def test_unlink(ws, make_random, cls):
name = make_random()
Expand Down
24 changes: 23 additions & 1 deletion tests/unit/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound, ResourceDoesNotExist
from databricks.sdk.mixins.workspace import WorkspaceExt
from databricks.sdk.service.files import FileInfo
from databricks.sdk.service.workspace import (
ImportFormat,
Language,
ObjectInfo,
ObjectType,
)

from databricks.labs.blueprint.paths import WorkspacePath
from databricks.labs.blueprint.paths import DBFSPath, WorkspacePath


def test_empty_init() -> None:
Expand Down Expand Up @@ -1007,3 +1008,24 @@ def test_rglob() -> None:
WorkspacePath(ws, "/test/path/dir1/file1.json"),
WorkspacePath(ws, "/test/path/dir2/file2.json"),
}


def test_workspace_path_stat_has_fields():
info = ObjectInfo(created_at=1234, modified_at=2345, size=3456)
ws = create_autospec(WorkspaceClient)
ws.workspace.get_status.return_value = info
workspace_path = WorkspacePath(ws, "/test/path")
stats = workspace_path.stat()
assert stats.st_ctime == info.created_at / 1000.0
assert stats.st_mtime == info.modified_at / 1000.0
assert stats.st_size == info.size


def test_dbfs_path_stat_has_fields():
info = FileInfo(modification_time=2345, file_size=3456)
ws = create_autospec(WorkspaceClient)
ws.dbfs.get_status.return_value = info
dbfs_path = DBFSPath(ws, "/test/path")
stats = dbfs_path.stat()
assert stats.st_mtime == info.modification_time / 1000.0
assert stats.st_size == info.file_size
Loading