Skip to content

Commit

Permalink
Speed up git backend (fsspec#1712)
Browse files Browse the repository at this point in the history
  • Loading branch information
mxmlnkn authored Oct 15, 2024
1 parent 03e89cc commit 952cd98
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 40 deletions.
66 changes: 27 additions & 39 deletions fsspec/implementations/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def _path_to_object(self, path, ref):
tree = comm.tree
for part in parts:
if part and isinstance(tree, pygit2.Tree):
if part not in tree:
raise FileNotFoundError(path)
tree = tree[part]
return tree

Expand All @@ -69,46 +71,32 @@ def _get_kwargs_from_urls(path):
out["ref"], path = path.split("@", 1)
return out

@staticmethod
def _object_to_info(obj, path=None):
# obj.name and obj.filemode are None for the root tree!
is_dir = isinstance(obj, pygit2.Tree)
return {
"type": "directory" if is_dir else "file",
"name": (
"/".join([path, obj.name or ""]).lstrip("/") if path else obj.name
),
"hex": str(obj.id),
"mode": "100644" if obj.filemode is None else f"{obj.filemode:o}",
"size": 0 if is_dir else obj.size,
}

def ls(self, path, detail=True, ref=None, **kwargs):
path = self._strip_protocol(path)
tree = self._path_to_object(path, ref)
if isinstance(tree, pygit2.Tree):
out = []
for obj in tree:
if isinstance(obj, pygit2.Tree):
out.append(
{
"type": "directory",
"name": "/".join([path, obj.name]).lstrip("/"),
"hex": str(obj.id),
"mode": f"{obj.filemode:o}",
"size": 0,
}
)
else:
out.append(
{
"type": "file",
"name": "/".join([path, obj.name]).lstrip("/"),
"hex": str(obj.id),
"mode": f"{obj.filemode:o}",
"size": obj.size,
}
)
else:
obj = tree
out = [
{
"type": "file",
"name": obj.name,
"hex": str(obj.id),
"mode": f"{obj.filemode:o}",
"size": obj.size,
}
]
if detail:
return out
return [o["name"] for o in out]
tree = self._path_to_object(self._strip_protocol(path), ref)
return [
GitFileSystem._object_to_info(obj, path)
if detail
else GitFileSystem._object_to_info(obj, path)["name"]
for obj in (tree if isinstance(tree, pygit2.Tree) else [tree])
]

def info(self, path, ref=None, **kwargs):
tree = self._path_to_object(self._strip_protocol(path), ref)
return GitFileSystem._object_to_info(tree, path)

def ukey(self, path, ref=None):
return self.info(path, ref=ref)["hex"]
Expand Down
33 changes: 33 additions & 0 deletions fsspec/implementations/tests/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,39 @@ def test_refs(repo):
assert f.read() == b"data3"


def _check_FileNotFoundError(f, *args, **kwargs):
with pytest.raises(FileNotFoundError):
f(*args, **kwargs)


def test_file_existence_checks(repo):
d, sha = repo

fs, _ = fsspec.url_to_fs(f"git://{d}:abranch@")

assert fs.lexists("inner")
assert fs.exists("inner")
assert fs.isdir("inner")
assert fs.info("inner")
assert fs.ls("inner")

assert fs.lexists("inner/file1")
assert fs.exists("inner/file1")
assert fs.info("inner/file1")
assert fs.ls("inner/file1")

assert not fs.lexists("non-existing-file")
assert not fs.exists("non-existing-file")

assert not fs.isfile("non-existing-file")
assert not fs.isdir("non-existing-file")

_check_FileNotFoundError(fs.info, "non-existing-file")
_check_FileNotFoundError(fs.size, "non-existing-file")
_check_FileNotFoundError(fs.ls, "non-existing-file")
_check_FileNotFoundError(fs.open, "non-existing-file")


def test_url(repo):
d, sha = repo
fs, _, paths = fsspec.core.get_fs_token_paths(f"git://file1::file://{d}")
Expand Down
2 changes: 1 addition & 1 deletion fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def info(self, path, **kwargs):
Returns a single dictionary, with exactly the same information as ``ls``
would with ``detail=True``.
The default implementation should calls ls and could be overridden by a
The default implementation calls ls and could be overridden by a
shortcut. kwargs are passed on to ```ls()``.
Some file systems might not be able to measure the file's size, in
Expand Down

0 comments on commit 952cd98

Please sign in to comment.