From 2aa7589d746bc090d23f738490144864bd5afb43 Mon Sep 17 00:00:00 2001 From: Srini Kadamati Date: Fri, 4 Aug 2023 13:19:45 -0400 Subject: [PATCH 01/10] Correct capitalization of XetHub (#1328) Sorry -- a nitpicky PR that fixes the capitalization of XetHub --- docs/source/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index af85b6b7a..eecfa202b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -52,7 +52,7 @@ The following libraries use ``fsspec`` internally for path and file handling: #. `Kedro`_, a Python framework for reproducible, maintainable and modular data science code #. `pyxet`_, a Python library for mounting and - accessing very large datasets from Xethub + accessing very large datasets from XetHub ``fsspec`` filesystems are also supported by: From 45a6aec7da1407243f9767c6ab0cff40efee72eb Mon Sep 17 00:00:00 2001 From: Nico Kemnitz Date: Thu, 10 Aug 2023 17:45:00 +0200 Subject: [PATCH 02/10] Verify Content-Encoding when querying Content-Length (#1320) --- fsspec/implementations/http.py | 6 ++++-- fsspec/implementations/tests/test_http.py | 13 +++++++++++++ fsspec/tests/conftest.py | 14 +++++++++++++- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index afd0c2664..5d118dcbd 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -195,7 +195,6 @@ async def _ls_real(self, url, detail=True, **kwargs): return list(sorted(out)) async def _ls(self, url, detail=True, **kwargs): - if self.use_listings_cache and url in self.dircache: out = self.dircache[url] else: @@ -841,7 +840,10 @@ async def _file_info(url, session, size_policy="head", **kwargs): # or 'Accept-Ranges': 'none' (not 'bytes') # to mean streaming only, no random access => return None if "Content-Length" in r.headers: - info["size"] = int(r.headers["Content-Length"]) + # Some servers may choose to ignore Accept-Encoding and return + # compressed content, in which case the returned size is unreliable. + if r.headers.get("Content-Encoding", "identity") == "identity": + info["size"] = int(r.headers["Content-Length"]) elif "Content-Range" in r.headers: info["size"] = int(r.headers["Content-Range"].split("/")[1]) diff --git a/fsspec/implementations/tests/test_http.py b/fsspec/implementations/tests/test_http.py index d8bd64524..fb3a55cad 100644 --- a/fsspec/implementations/tests/test_http.py +++ b/fsspec/implementations/tests/test_http.py @@ -10,6 +10,7 @@ import fsspec.asyn import fsspec.utils +from fsspec.implementations.http import HTTPStreamFile from fsspec.tests.conftest import data, reset_files, server, win # noqa: F401 @@ -280,6 +281,18 @@ def test_content_length_zero(server): assert f.read() == data +def test_content_encoding_gzip(server): + h = fsspec.filesystem( + "http", headers={"give_length": "true", "gzip_encoding": "true"} + ) + url = server + "/index/realfile" + + with h.open(url, "rb") as f: + assert isinstance(f, HTTPStreamFile) + assert f.size is None + assert f.read() == data + + def test_download(server, tmpdir): h = fsspec.filesystem("http", headers={"give_length": "true", "head_ok": "true "}) url = server + "/index/realfile" diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py index fdaf03335..9fdf25b7a 100644 --- a/fsspec/tests/conftest.py +++ b/fsspec/tests/conftest.py @@ -1,4 +1,5 @@ import contextlib +import gzip import json import os import threading @@ -76,7 +77,14 @@ def do_GET(self): if "use_206" in self.headers: status = 206 if "give_length" in self.headers: - response_headers = {"Content-Length": len(file_data)} + if "gzip_encoding" in self.headers: + file_data = gzip.compress(file_data) + response_headers = { + "Content-Length": len(file_data), + "Content-Encoding": "gzip", + } + else: + response_headers = {"Content-Length": len(file_data)} self._respond(status, response_headers, file_data) elif "give_range" in self.headers: self._respond(status, {"Content-Range": content_range}, file_data) @@ -123,6 +131,10 @@ def do_HEAD(self): response_headers = {"Content-Length": len(file_data)} if "zero_length" in self.headers: response_headers["Content-Length"] = 0 + elif "gzip_encoding" in self.headers: + file_data = gzip.compress(file_data) + response_headers["Content-Encoding"] = "gzip" + response_headers["Content-Length"] = len(file_data) self._respond(200, response_headers) elif "give_range" in self.headers: From a988ce5c956571d9b14eafd193e81300603031b9 Mon Sep 17 00:00:00 2001 From: Genevieve Buckley <30920819+GenevieveBuckley@users.noreply.github.com> Date: Wed, 16 Aug 2023 11:32:24 +1000 Subject: [PATCH 03/10] Allow file mode="x" with get_fs_token_paths (#1333) * Also expand paths in exclusive file creation mode=x * Add test for filepath expandsion with file mode=x --- fsspec/core.py | 2 ++ fsspec/tests/test_core.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/fsspec/core.py b/fsspec/core.py index c777a339c..797d401d9 100644 --- a/fsspec/core.py +++ b/fsspec/core.py @@ -635,6 +635,8 @@ def get_fs_token_paths( else: if "w" in mode and expand: paths = _expand_paths(paths, name_function, num) + elif "x" in mode and expand: + paths = _expand_paths(paths, name_function, num) elif "*" in paths: paths = [f for f in sorted(fs.glob(paths)) if not fs.isdir(f)] else: diff --git a/fsspec/tests/test_core.py b/fsspec/tests/test_core.py index 7271d9875..c78664354 100644 --- a/fsspec/tests/test_core.py +++ b/fsspec/tests/test_core.py @@ -13,6 +13,7 @@ _expand_paths, expand_paths_if_needed, get_compression, + get_fs_token_paths, open_files, open_local, ) @@ -75,6 +76,11 @@ def test_expand_error(): _expand_paths("*.*", None, 1) +@pytest.mark.parametrize("mode", ["w", "w+", "x", "x+"]) +def test_expand_fs_token_paths(mode): + assert len(get_fs_token_paths("path", mode, num=2, expand=True)[-1]) == 2 + + def test_openfile_api(m): m.open("somepath", "wb").write(b"data") of = OpenFile(m, "somepath") From 2fbe8deff1c8d79f3125def9d930d19c213f03f6 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Thu, 17 Aug 2023 14:15:32 +0100 Subject: [PATCH 04/10] Support cache mapper that is basename plus fixed number of parent directories (#1318) --- fsspec/implementations/cache_mapper.py | 33 ++++++-- fsspec/implementations/cached.py | 36 ++++++-- fsspec/implementations/tests/test_cached.py | 91 +++++++++++++++++++-- 3 files changed, 141 insertions(+), 19 deletions(-) diff --git a/fsspec/implementations/cache_mapper.py b/fsspec/implementations/cache_mapper.py index f9ee29ac2..000ccebc8 100644 --- a/fsspec/implementations/cache_mapper.py +++ b/fsspec/implementations/cache_mapper.py @@ -2,9 +2,10 @@ import abc import hashlib -import os from typing import TYPE_CHECKING +from fsspec.implementations.local import make_path_posix + if TYPE_CHECKING: from typing import Any @@ -30,14 +31,36 @@ def __hash__(self) -> int: class BasenameCacheMapper(AbstractCacheMapper): - """Cache mapper that uses the basename of the remote URL. + """Cache mapper that uses the basename of the remote URL and a fixed number + of directory levels above this. - Different paths with the same basename will therefore have the same cached - basename. + The default is zero directory levels, meaning different paths with the same + basename will have the same cached basename. """ + def __init__(self, directory_levels: int = 0): + if directory_levels < 0: + raise ValueError( + "BasenameCacheMapper requires zero or positive directory_levels" + ) + self.directory_levels = directory_levels + + # Separator for directories when encoded as strings. + self._separator = "_@_" + def __call__(self, path: str) -> str: - return os.path.basename(path) + path = make_path_posix(path) + prefix, *bits = path.rsplit("/", self.directory_levels + 1) + if bits: + return self._separator.join(bits) + else: + return prefix # No separator found, simple filename + + def __eq__(self, other: Any) -> bool: + return super().__eq__(other) and self.directory_levels == other.directory_levels + + def __hash__(self) -> int: + return super().__hash__() ^ hash(self.directory_levels) class HashCacheMapper(AbstractCacheMapper): diff --git a/fsspec/implementations/cached.py b/fsspec/implementations/cached.py index c47c3b290..30aeb119d 100644 --- a/fsspec/implementations/cached.py +++ b/fsspec/implementations/cached.py @@ -8,7 +8,7 @@ import tempfile import time from shutil import rmtree -from typing import Any, ClassVar +from typing import TYPE_CHECKING, Any, Callable, ClassVar from fsspec import AbstractFileSystem, filesystem from fsspec.callbacks import _DEFAULT_CALLBACK @@ -19,6 +19,9 @@ from fsspec.spec import AbstractBufferedFile from fsspec.utils import infer_compression +if TYPE_CHECKING: + from fsspec.implementations.cache_mapper import AbstractCacheMapper + logger = logging.getLogger("fsspec.cached") @@ -53,8 +56,9 @@ def __init__( expiry_time=604800, target_options=None, fs=None, - same_names=False, + same_names: bool | None = None, compression=None, + cache_mapper: AbstractCacheMapper | None = None, **kwargs, ): """ @@ -84,13 +88,19 @@ def __init__( fs: filesystem instance The target filesystem to run against. Provide this or ``protocol``. same_names: bool (optional) - By default, target URLs are hashed, so that files from different backends - with the same basename do not conflict. If this is true, the original - basename is used. + By default, target URLs are hashed using a ``HashCacheMapper`` so + that files from different backends with the same basename do not + conflict. If this argument is ``true``, a ``BasenameCacheMapper`` + is used instead. Other cache mapper options are available by using + the ``cache_mapper`` keyword argument. Only one of this and + ``cache_mapper`` should be specified. compression: str (optional) To decompress on download. Can be 'infer' (guess from the URL name), one of the entries in ``fsspec.compression.compr``, or None for no decompression. + cache_mapper: AbstractCacheMapper (optional) + The object use to map from original filenames to cached filenames. + Only one of this and ``same_names`` should be specified. """ super().__init__(**kwargs) if fs is None and target_protocol is None: @@ -115,7 +125,19 @@ def __init__( self.check_files = check_files self.expiry = expiry_time self.compression = compression - self._mapper = create_cache_mapper(same_names) + + if same_names is not None and cache_mapper is not None: + raise ValueError( + "Cannot specify both same_names and cache_mapper in " + "CachingFileSystem.__init__" + ) + if cache_mapper is not None: + self._mapper = cache_mapper + else: + self._mapper = create_cache_mapper( + same_names if same_names is not None else False + ) + self.target_protocol = ( target_protocol if isinstance(target_protocol, str) @@ -128,7 +150,7 @@ def _strip_protocol(path): # acts as a method, since each instance has a difference target return self.fs._strip_protocol(type(self)._strip_protocol(path)) - self._strip_protocol = _strip_protocol + self._strip_protocol: Callable = _strip_protocol def _mkcache(self): os.makedirs(self.storage[-1], exist_ok=True) diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index d8295e778..19ac0975a 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -8,7 +8,11 @@ import fsspec from fsspec.compression import compr from fsspec.exceptions import BlocksizeMismatchError -from fsspec.implementations.cache_mapper import create_cache_mapper +from fsspec.implementations.cache_mapper import ( + BasenameCacheMapper, + HashCacheMapper, + create_cache_mapper, +) from fsspec.implementations.cached import CachingFileSystem, LocalTempFile from fsspec.implementations.local import make_path_posix @@ -36,10 +40,20 @@ def local_filecache(): def test_mapper(): mapper0 = create_cache_mapper(True) + assert mapper0("somefile") == "somefile" + assert mapper0("/somefile") == "somefile" assert mapper0("/somedir/somefile") == "somefile" assert mapper0("/otherdir/somefile") == "somefile" mapper1 = create_cache_mapper(False) + assert ( + mapper1("somefile") + == "dd00b9487898b02555b6a2d90a070586d63f93e80c70aaa60c992fa9e81a72fe" + ) + assert ( + mapper1("/somefile") + == "884c07bc2efe65c60fb9d280a620e7f180488718fb5d97736521b7f9cf5c8b37" + ) assert ( mapper1("/somedir/somefile") == "67a6956e5a5f95231263f03758c1fd9254fdb1c564d311674cec56b0372d2056" @@ -57,9 +71,47 @@ def test_mapper(): assert hash(create_cache_mapper(True)) == hash(mapper0) assert hash(create_cache_mapper(False)) == hash(mapper1) - -@pytest.mark.parametrize("same_names", [False, True]) -def test_metadata(tmpdir, same_names): + with pytest.raises( + ValueError, + match="BasenameCacheMapper requires zero or positive directory_levels", + ): + BasenameCacheMapper(-1) + + mapper2 = BasenameCacheMapper(1) + assert mapper2("/somefile") == "somefile" + assert mapper2("/somedir/somefile") == "somedir_@_somefile" + assert mapper2("/otherdir/somefile") == "otherdir_@_somefile" + assert mapper2("/dir1/dir2/dir3/somefile") == "dir3_@_somefile" + + assert mapper2 != mapper0 + assert mapper2 != mapper1 + assert BasenameCacheMapper(1) == mapper2 + + assert hash(mapper2) != hash(mapper0) + assert hash(mapper2) != hash(mapper1) + assert hash(BasenameCacheMapper(1)) == hash(mapper2) + + mapper3 = BasenameCacheMapper(2) + assert mapper3("/somefile") == "somefile" + assert mapper3("/somedir/somefile") == "somedir_@_somefile" + assert mapper3("/otherdir/somefile") == "otherdir_@_somefile" + assert mapper3("/dir1/dir2/dir3/somefile") == "dir2_@_dir3_@_somefile" + + assert mapper3 != mapper0 + assert mapper3 != mapper1 + assert mapper3 != mapper2 + assert BasenameCacheMapper(2) == mapper3 + + assert hash(mapper3) != hash(mapper0) + assert hash(mapper3) != hash(mapper1) + assert hash(mapper3) != hash(mapper2) + assert hash(BasenameCacheMapper(2)) == hash(mapper3) + + +@pytest.mark.parametrize( + "cache_mapper", [BasenameCacheMapper(), BasenameCacheMapper(1), HashCacheMapper()] +) +def test_metadata(tmpdir, cache_mapper): source = os.path.join(tmpdir, "source") afile = os.path.join(source, "afile") os.mkdir(source) @@ -69,7 +121,7 @@ def test_metadata(tmpdir, same_names): "filecache", target_protocol="file", cache_storage=os.path.join(tmpdir, "cache"), - same_names=same_names, + cache_mapper=cache_mapper, ) with fs.open(afile, "rb") as f: @@ -85,8 +137,33 @@ def test_metadata(tmpdir, same_names): assert detail["original"] == afile_posix assert detail["fn"] == fs._mapper(afile_posix) - if same_names: - assert detail["fn"] == "afile" + + if isinstance(cache_mapper, BasenameCacheMapper): + if cache_mapper.directory_levels == 0: + assert detail["fn"] == "afile" + else: + assert detail["fn"] == "source_@_afile" + + +def test_constructor_kwargs(tmpdir): + fs = fsspec.filesystem("filecache", target_protocol="file", same_names=True) + assert isinstance(fs._mapper, BasenameCacheMapper) + + fs = fsspec.filesystem("filecache", target_protocol="file", same_names=False) + assert isinstance(fs._mapper, HashCacheMapper) + + fs = fsspec.filesystem("filecache", target_protocol="file") + assert isinstance(fs._mapper, HashCacheMapper) + + with pytest.raises( + ValueError, match="Cannot specify both same_names and cache_mapper" + ): + fs = fsspec.filesystem( + "filecache", + target_protocol="file", + cache_mapper=HashCacheMapper(), + same_names=True, + ) def test_idempotent(): From 1f12ee61e001e17c81af2a882a89d2eb7f44c885 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 21 Aug 2023 20:33:12 -0400 Subject: [PATCH 05/10] Add counter (#1337) --- docs/source/api.rst | 5 +++++ docs/source/async.rst | 5 +++++ docs/source/changelog.rst | 5 +++++ docs/source/copying.rst | 5 +++++ docs/source/developer.rst | 5 +++++ docs/source/features.rst | 5 +++++ docs/source/index.rst | 5 +++++ docs/source/intro.rst | 5 +++++ docs/source/usage.rst | 5 +++++ 9 files changed, 45 insertions(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index a334ebdf1..b863789ec 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -268,3 +268,8 @@ Utilities fsspec.utils.read_block .. autofunction:: fsspec.utils.read_block + +.. raw:: html + + diff --git a/docs/source/async.rst b/docs/source/async.rst index 22202b76d..58af6f752 100644 --- a/docs/source/async.rst +++ b/docs/source/async.rst @@ -147,3 +147,8 @@ available as the attribute ``.loop``. .. autofunction:: fsspec.asyn.sync .. autofunction:: fsspec.asyn.sync_wrapper + +.. raw:: html + + diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index eccbb9e73..42adb556e 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -760,3 +760,8 @@ Version 0.6.0 HTTP server responds with an (incorrect) content-length of 0 (:pr:`163`) * Added a ``detail=True`` parameter to :meth:`fsspec.spec.AbstractFileSystem.ls` (:pr:`168`) * Fixed handling of UNC/DFS paths (:issue:`154`) + +.. raw:: html + + diff --git a/docs/source/copying.rst b/docs/source/copying.rst index d091f35fd..a4e9df40a 100644 --- a/docs/source/copying.rst +++ b/docs/source/copying.rst @@ -341,3 +341,8 @@ Forward slashes are used for directory separators throughout. The trailing slash is required on the new directory otherwise it is interpreted as a filename rather than a directory. + +.. raw:: html + + diff --git a/docs/source/developer.rst b/docs/source/developer.rst index ecaa768b7..c2b3b028e 100644 --- a/docs/source/developer.rst +++ b/docs/source/developer.rst @@ -119,3 +119,8 @@ commit hooks if you intend to make PRs, as linting is done as part of the CI. Docs use sphinx and the numpy docstring style. Please add an entry to the changelog along with any PR. + +.. raw:: html + + diff --git a/docs/source/features.rst b/docs/source/features.rst index db613e9f5..823993aa1 100644 --- a/docs/source/features.rst +++ b/docs/source/features.rst @@ -403,3 +403,8 @@ backends. See the docstrings in the callbacks module for further details. ``fsspec.callbacks.TqdmCallback`` can be used to display a progress bar using tqdm. + +.. raw:: html + + diff --git a/docs/source/index.rst b/docs/source/index.rst index eecfa202b..d89ad1e18 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -115,3 +115,8 @@ The current list of known implementations can be found as follows async.rst api.rst changelog.rst + +.. raw:: html + + diff --git a/docs/source/intro.rst b/docs/source/intro.rst index 24d4ac0e2..58bb184fd 100644 --- a/docs/source/intro.rst +++ b/docs/source/intro.rst @@ -92,3 +92,8 @@ develop new file-system implementations. ``fsspec/spec.py`` contains the main ab to derive from, ``AbstractFileSystem``. .. _zarr: https://zarr.readthedocs.io + +.. raw:: html + + diff --git a/docs/source/usage.rst b/docs/source/usage.rst index cb9e1400f..5634a0fc3 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -117,3 +117,8 @@ or write mode (create names). Critically, the file on the backend system is not print(line) if "KENYA" in line: break + +.. raw:: html + + From 1812a6cd7578f03924b5b0b368a9a0c4cbf8bc41 Mon Sep 17 00:00:00 2001 From: Jonathan Langlois Date: Tue, 22 Aug 2023 14:28:46 +0900 Subject: [PATCH 06/10] feat: test to cp/get/put a directory with files with same name prefix --- fsspec/tests/abstract/__init__.py | 47 +++++++++++++++++++++++++++++++ fsspec/tests/abstract/copy.py | 15 ++++++++++ fsspec/tests/abstract/get.py | 17 +++++++++++ fsspec/tests/abstract/put.py | 16 +++++++++++ 4 files changed, 95 insertions(+) diff --git a/fsspec/tests/abstract/__init__.py b/fsspec/tests/abstract/__init__.py index fc63e9ca9..059439d04 100644 --- a/fsspec/tests/abstract/__init__.py +++ b/fsspec/tests/abstract/__init__.py @@ -37,6 +37,18 @@ def fs_glob_edge_cases_files(self, fs, fs_join, fs_path): yield source fs.rm(source, recursive=True) + @pytest.fixture + def fs_dir_and_file_with_same_name_prefix(self, fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used to check cp/get/put on directory + and file with the same name prefixes. + + Cleans up at the end of each test it which it is used. + """ + source = self._dir_and_file_with_same_name_prefix(fs, fs_join, fs_path) + yield source + fs.rm(source, recursive=True) + @pytest.fixture def fs_target(self, fs, fs_join, fs_path): """ @@ -71,6 +83,22 @@ def local_glob_edge_cases_files(self, local_fs, local_join, local_path): yield source local_fs.rm(source, recursive=True) + @pytest.fixture + def local_dir_and_file_with_same_name_prefix( + self, local_fs, local_join, local_path + ): + """ + Scenario on local filesystem that is used to check cp/get/put on directory + and file with the same name prefixes. + + Cleans up at the end of each test it which it is used. + """ + source = self._dir_and_file_with_same_name_prefix( + local_fs, local_join, local_path + ) + yield source + local_fs.rm(source, recursive=True) + @pytest.fixture def local_target(self, local_fs, local_join, local_path): """ @@ -141,6 +169,25 @@ def _bulk_operations_scenario_0(self, some_fs, some_join, some_path): some_fs.touch(some_join(nesteddir, "nestedfile")) return source + def _dir_and_file_with_same_name_prefix(self, some_fs, some_join, some_path): + """ + Scenario that is used to check cp/get/put on directory and file with + the same name prefixes. Creates the following directory and file structure: + + 📁 source + ├── 📄 subdir.txt + └── 📁 subdir + └── 📄 subfile.txt + """ + source = some_join(some_path, "source") + subdir = some_join(source, "subdir") + file = some_join(source, "subdir.txt") + subfile = some_join(subdir, "subfile.txt") + some_fs.makedirs(subdir) + some_fs.touch(file) + some_fs.touch(subfile) + return source + class AbstractFixtures(BaseAbstractFixtures): """ diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index a5eb19038..477dc894e 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -487,3 +487,18 @@ def test_copy_two_files_new_directory( assert fs.isdir(target) assert fs.isfile(fs_join(target, "file1")) assert fs.isfile(fs_join(target, "file2")) + + def test_copy_directory_without_files_with_same_name_prefix( + self, + fs, + fs_join, + fs_target, + fs_dir_and_file_with_same_name_prefix, + ): + # Create the test dirs + source = fs_dir_and_file_with_same_name_prefix + + fs.cp(fs_join(source, "subdir"), fs_target, recursive=True) + + assert fs.isfile(fs_join(fs_target, "subfile.txt")) + assert not fs.isfile(fs_join(fs_target, "subdir.txt")) diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index 08c04e909..166915287 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -525,3 +525,20 @@ def test_get_directory_recursive( assert local_fs.isdir(target) assert local_fs.isfile(local_join(target, "file")) assert not local_fs.exists(local_join(target, "src")) + + def test_get_directory_without_files_with_same_name_prefix( + self, + fs, + fs_join, + local_fs, + local_join, + local_target, + fs_dir_and_file_with_same_name_prefix, + ): + # Create the test dirs + source = fs_dir_and_file_with_same_name_prefix + + fs.get(fs_join(source, "subdir"), local_target, recursive=True) + + assert local_fs.isfile(local_join(local_target, "subfile.txt")) + assert not local_fs.isfile(local_join(local_target, "subdir.txt")) diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index a92bc4a13..327514a04 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -520,3 +520,19 @@ def test_put_directory_recursive( assert fs.isdir(target) assert fs.isfile(fs_join(target, "file")) assert not fs.exists(fs_join(target, "src")) + + def test_put_directory_without_files_with_same_name_prefix( + self, + fs, + fs_join, + fs_target, + local_join, + local_dir_and_file_with_same_name_prefix, + ): + # Create the test dirs + source = local_dir_and_file_with_same_name_prefix + + fs.put(local_join(source, "subdir"), fs_target, recursive=True) + + assert fs.isfile(fs_join(fs_target, "subfile.txt")) + assert not fs.isfile(fs_join(fs_target, "subdir.txt")) From 0070ddd4f34b34b1af61907c10b0bff79bdd8955 Mon Sep 17 00:00:00 2001 From: Jonathan Langlois Date: Tue, 22 Aug 2023 14:50:46 +0900 Subject: [PATCH 07/10] feat: add a glob test --- fsspec/tests/abstract/copy.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 477dc894e..11a7a1fbc 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -502,3 +502,10 @@ def test_copy_directory_without_files_with_same_name_prefix( assert fs.isfile(fs_join(fs_target, "subfile.txt")) assert not fs.isfile(fs_join(fs_target, "subdir.txt")) + + # Test if glob still works though + # (maybe not the best place for this test) + test = fs.glob(fs_join(source, "subdir*")) + assert sorted(test) == sorted( + [fs_join(source, "subdir"), fs_join(source, "subdir.txt")] + ) From a3c84f17b0d0dd0f033e1de186c62a70523f1a16 Mon Sep 17 00:00:00 2001 From: Jonathan Langlois Date: Tue, 22 Aug 2023 15:21:14 +0900 Subject: [PATCH 08/10] feat: use a better test for globs --- fsspec/tests/abstract/copy.py | 27 ++++++++++++++++++--------- fsspec/tests/abstract/get.py | 18 +++++++++++++++--- fsspec/tests/abstract/put.py | 16 ++++++++++++++++ 3 files changed, 49 insertions(+), 12 deletions(-) diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 11a7a1fbc..8f0d6d0ba 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -494,18 +494,27 @@ def test_copy_directory_without_files_with_same_name_prefix( fs_join, fs_target, fs_dir_and_file_with_same_name_prefix, + supports_empty_directories, ): # Create the test dirs source = fs_dir_and_file_with_same_name_prefix + target = fs_target - fs.cp(fs_join(source, "subdir"), fs_target, recursive=True) + # Test without glob + fs.cp(fs_join(source, "subdir"), target, recursive=True) - assert fs.isfile(fs_join(fs_target, "subfile.txt")) - assert not fs.isfile(fs_join(fs_target, "subdir.txt")) + assert fs.isfile(fs_join(target, "subfile.txt")) + assert not fs.isfile(fs_join(target, "subdir.txt")) - # Test if glob still works though - # (maybe not the best place for this test) - test = fs.glob(fs_join(source, "subdir*")) - assert sorted(test) == sorted( - [fs_join(source, "subdir"), fs_join(source, "subdir.txt")] - ) + fs.rm([fs_join(target, "subfile.txt")]) + if supports_empty_directories: + assert fs.ls(target) == [] + else: + assert not fs.exists(target) + + # Test with glob + fs.cp(fs_join(source, "subdir*"), target, recursive=True) + + assert fs.isdir(fs_join(target, "subdir")) + assert fs.isfile(fs_join(target, "subdir", "subfile.txt")) + assert fs.isfile(fs_join(target, "subdir.txt")) diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index 166915287..3c2711467 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -537,8 +537,20 @@ def test_get_directory_without_files_with_same_name_prefix( ): # Create the test dirs source = fs_dir_and_file_with_same_name_prefix + target = local_target + + # Test without glob + fs.get(fs_join(source, "subdir"), target, recursive=True) + + assert local_fs.isfile(local_join(target, "subfile.txt")) + assert not local_fs.isfile(local_join(target, "subdir.txt")) + + local_fs.rm([local_join(target, "subfile.txt")]) + assert local_fs.ls(target) == [] - fs.get(fs_join(source, "subdir"), local_target, recursive=True) + # Test with glob + fs.get(fs_join(source, "subdir*"), target, recursive=True) - assert local_fs.isfile(local_join(local_target, "subfile.txt")) - assert not local_fs.isfile(local_join(local_target, "subdir.txt")) + assert local_fs.isdir(local_join(target, "subdir")) + assert local_fs.isfile(local_join(target, "subdir", "subfile.txt")) + assert local_fs.isfile(local_join(target, "subdir.txt")) diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index 327514a04..ad30ecbef 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -528,11 +528,27 @@ def test_put_directory_without_files_with_same_name_prefix( fs_target, local_join, local_dir_and_file_with_same_name_prefix, + supports_empty_directories, ): # Create the test dirs source = local_dir_and_file_with_same_name_prefix + target = fs_target + # Test without glob fs.put(local_join(source, "subdir"), fs_target, recursive=True) assert fs.isfile(fs_join(fs_target, "subfile.txt")) assert not fs.isfile(fs_join(fs_target, "subdir.txt")) + + fs.rm([fs_join(target, "subfile.txt")]) + if supports_empty_directories: + assert fs.ls(target) == [] + else: + assert not fs.exists(target) + + # Test with glob + fs.put(local_join(source, "subdir*"), fs_target, recursive=True) + + assert fs.isdir(fs_join(fs_target, "subdir")) + assert fs.isfile(fs_join(fs_target, "subdir", "subfile.txt")) + assert fs.isfile(fs_join(fs_target, "subdir.txt")) From c3b4bc36a8c2fa1b98429f5cd963ea0b4d04ad11 Mon Sep 17 00:00:00 2001 From: Jonathan Langlois <37172224+john-jam@users.noreply.github.com> Date: Tue, 22 Aug 2023 22:47:43 +0900 Subject: [PATCH 09/10] Better double asterisks `**` support (#1329) --- fsspec/asyn.py | 104 ++- fsspec/implementations/http.py | 47 +- fsspec/implementations/local.py | 19 - .../tests/local/local_fixtures.py | 6 +- fsspec/implementations/tests/test_local.py | 8 +- fsspec/implementations/tests/test_memory.py | 4 +- fsspec/spec.py | 106 ++- fsspec/tests/abstract/__init__.py | 60 ++ fsspec/tests/abstract/copy.py | 252 ++++-- fsspec/tests/abstract/get.py | 226 ++++- fsspec/tests/abstract/put.py | 257 ++++-- fsspec/tests/conftest.py | 178 +++- fsspec/tests/test_generic.py | 2 + fsspec/tests/test_spec.py | 855 ++++++++++++++++-- fsspec/tests/test_utils.py | 33 +- fsspec/utils.py | 7 +- setup.cfg | 2 +- 17 files changed, 1797 insertions(+), 369 deletions(-) diff --git a/fsspec/asyn.py b/fsspec/asyn.py index b8f8642a0..03b3fcc39 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -13,12 +13,7 @@ from .callbacks import _DEFAULT_CALLBACK from .exceptions import FSTimeoutError -from .implementations.local import ( - LocalFileSystem, - make_path_posix, - trailing_sep, - trailing_sep_maybe_asterisk, -) +from .implementations.local import LocalFileSystem, make_path_posix, trailing_sep from .spec import AbstractBufferedFile, AbstractFileSystem from .utils import is_exception, other_paths @@ -357,14 +352,19 @@ async def _copy( if not paths: return - isdir = isinstance(path2, str) and ( + source_is_file = len(paths) == 1 + dest_is_dir = isinstance(path2, str) and ( trailing_sep(path2) or await self._isdir(path2) ) + + exists = source_is_str and ( + (has_magic(path1) and source_is_file) + or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) + ) path2 = other_paths( paths, path2, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(path1), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) batch_size = batch_size or self.batch_size @@ -514,15 +514,20 @@ async def _put( if not lpaths: return - isdir = isinstance(rpath, str) and ( + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( trailing_sep(rpath) or await self._isdir(rpath) ) + rpath = self._strip_protocol(rpath) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) rpaths = other_paths( lpaths, rpath, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(lpath), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -571,11 +576,9 @@ async def _get( """ source_is_str = isinstance(rpath, str) # First check for rpath trailing slash as _strip_protocol removes it. - source_not_trailing_sep = source_is_str and not trailing_sep_maybe_asterisk( - rpath - ) + source_not_trailing_sep = source_is_str and not trailing_sep(rpath) rpath = self._strip_protocol(rpath) - rpaths = await self._expand_path(rpath, recursive=recursive) + rpaths = await self._expand_path(rpath, recursive=recursive, maxdepth=maxdepth) if source_is_str and (not recursive or maxdepth is not None): # Non-recursive glob does not copy directories rpaths = [ @@ -585,14 +588,19 @@ async def _get( return lpath = make_path_posix(lpath) - isdir = isinstance(lpath, str) and ( + source_is_file = len(rpaths) == 1 + dest_is_dir = isinstance(lpath, str) and ( trailing_sep(lpath) or LocalFileSystem().isdir(lpath) ) + + exists = source_is_str and ( + (has_magic(rpath) and source_is_file) + or (not has_magic(rpath) and dest_is_dir and source_not_trailing_sep) + ) lpaths = other_paths( rpaths, lpath, - exists=isdir and source_not_trailing_sep, - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) [os.makedirs(os.path.dirname(lp), exist_ok=True) for lp in lpaths] @@ -695,25 +703,24 @@ async def _walk(self, path, maxdepth=None, on_error="omit", **kwargs): ): yield _ - async def _glob(self, path, **kwargs): + async def _glob(self, path, maxdepth=None, **kwargs): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + import re ends = path.endswith("/") path = self._strip_protocol(path) - indstar = path.find("*") if path.find("*") >= 0 else len(path) - indques = path.find("?") if path.find("?") >= 0 else len(path) - indbrace = path.find("[") if path.find("[") >= 0 else len(path) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) - ind = min(indstar, indques, indbrace) + min_idx = min(idx_star, idx_qmark, idx_brace) detail = kwargs.pop("detail", False) if not has_magic(path): - root = path - depth = 1 - if ends: - path += "/*" - elif await self._exists(path): + if await self._exists(path): if not detail: return [path] else: @@ -723,13 +730,21 @@ async def _glob(self, path, **kwargs): return [] # glob of non-existent returns empty else: return {} - elif "/" in path[:ind]: - ind2 = path[:ind].rindex("/") - root = path[: ind2 + 1] - depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1 + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 else: root = "" - depth = None if "**" in path else path[ind + 1 :].count("/") + 1 + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None allpaths = await self._find( root, maxdepth=depth, withdirs=True, detail=True, **kwargs @@ -757,14 +772,23 @@ async def _glob(self, path, **kwargs): ) + "$" ) - pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) + pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern) + pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) - pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) + pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern) + pattern = re.sub("=DOUBLE_STARS=", ".*", pattern) + pattern = re.compile(pattern) out = { p: allpaths[p] for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance + if ends: + out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: @@ -785,6 +809,12 @@ async def _find(self, path, maxdepth=None, withdirs=False, **kwargs): path = self._strip_protocol(path) out = dict() detail = kwargs.pop("detail", False) + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and await self._isdir(path): + out[path] = await self._info(path) + # async for? async for _, dirs, files in self._walk(path, maxdepth, detail=True, **kwargs): if withdirs: @@ -811,7 +841,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None): path = [self._strip_protocol(p) for p in path] for p in path: # can gather here if has_magic(p): - bit = set(await self._glob(p)) + bit = set(await self._glob(p, maxdepth=maxdepth)) out |= bit if recursive: # glob call above expanded one depth so if maxdepth is defined diff --git a/fsspec/implementations/http.py b/fsspec/implementations/http.py index 5d118dcbd..e37b0001e 100644 --- a/fsspec/implementations/http.py +++ b/fsspec/implementations/http.py @@ -431,7 +431,7 @@ async def _info(self, url, **kwargs): return {"name": url, "size": None, **info, "type": "file"} - async def _glob(self, path, **kwargs): + async def _glob(self, path, maxdepth=None, **kwargs): """ Find files by glob-matching. @@ -439,23 +439,21 @@ async def _glob(self, path, **kwargs): but "?" is not considered as a character for globbing, because it is so common in URLs, often identifying the "query" part. """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") import re ends = path.endswith("/") path = self._strip_protocol(path) - indstar = path.find("*") if path.find("*") >= 0 else len(path) - indbrace = path.find("[") if path.find("[") >= 0 else len(path) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) - ind = min(indstar, indbrace) + min_idx = min(idx_star, idx_brace) detail = kwargs.pop("detail", False) if not has_magic(path): - root = path - depth = 1 - if ends: - path += "/*" - elif await self._exists(path): + if await self._exists(path): if not detail: return [path] else: @@ -465,13 +463,21 @@ async def _glob(self, path, **kwargs): return [] # glob of non-existent returns empty else: return {} - elif "/" in path[:ind]: - ind2 = path[:ind].rindex("/") - root = path[: ind2 + 1] - depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1 + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 else: root = "" - depth = None if "**" in path else path[ind + 1 :].count("/") + 1 + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None allpaths = await self._find( root, maxdepth=depth, withdirs=True, detail=True, **kwargs @@ -498,14 +504,23 @@ async def _glob(self, path, **kwargs): ) + "$" ) - pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) + pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern) + pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) - pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) + pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern) + pattern = re.sub("=DOUBLE_STARS=", ".*", pattern) + pattern = re.compile(pattern) out = { p: allpaths[p] for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance + if ends: + out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 1a8ffc29f..971074e95 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -65,10 +65,6 @@ def ls(self, path, detail=False, **kwargs): else: return [posixpath.join(path, f) for f in os.listdir(path)] - def glob(self, path, **kwargs): - path = self._strip_protocol(path) - return super().glob(path, **kwargs) - def info(self, path, **kwargs): if isinstance(path, os.DirEntry): # scandir DirEntry @@ -287,21 +283,6 @@ def trailing_sep(path): return path.endswith(os.sep) or (os.altsep is not None and path.endswith(os.altsep)) -def trailing_sep_maybe_asterisk(path): - """Return True if the path ends with a path separator and optionally an - asterisk. - - A forward slash is always considered a path separator, even on Operating - Systems that normally use a backslash. - """ - # TODO: if all incoming paths were posix-compliant then separator would - # always be a forward slash, simplifying this function. - # See https://github.com/fsspec/filesystem_spec/pull/1250 - return path.endswith((os.sep, os.sep + "*")) or ( - os.altsep is not None and path.endswith((os.altsep, os.altsep + "*")) - ) - - class LocalFileOpener(io.IOBase): def __init__( self, path, mode, autocommit=True, fs=None, compression=None, **kwargs diff --git a/fsspec/implementations/tests/local/local_fixtures.py b/fsspec/implementations/tests/local/local_fixtures.py index d850fcf5f..bafff60d9 100644 --- a/fsspec/implementations/tests/local/local_fixtures.py +++ b/fsspec/implementations/tests/local/local_fixtures.py @@ -1,6 +1,6 @@ import pytest -from fsspec.implementations.local import LocalFileSystem +from fsspec.implementations.local import LocalFileSystem, make_path_posix from fsspec.tests.abstract import AbstractFixtures @@ -12,3 +12,7 @@ def fs(self): @pytest.fixture def fs_path(self, tmpdir): return str(tmpdir) + + @pytest.fixture + def fs_sanitize_path(self): + return make_path_posix diff --git a/fsspec/implementations/tests/test_local.py b/fsspec/implementations/tests/test_local.py index 20e54311e..1cfd14c0a 100644 --- a/fsspec/implementations/tests/test_local.py +++ b/fsspec/implementations/tests/test_local.py @@ -323,7 +323,9 @@ def test_globfind_dirs(tmpdir): fs.glob(tmpdir + "/dir/*", detail=True)[tmpdir + "/dir/afile"]["type"] == "file" ) assert [tmpdir + "/dir/afile"] == fs.find(tmpdir) - assert [tmpdir + "/dir", tmpdir + "/dir/afile"] == fs.find(tmpdir, withdirs=True) + assert [tmpdir, tmpdir + "/dir", tmpdir + "/dir/afile"] == fs.find( + tmpdir, withdirs=True + ) def test_touch(tmpdir): @@ -952,12 +954,12 @@ def test_cp_get_put_empty_directory(tmpdir, funcname): # cp/get/put without slash, target directory exists assert fs.isdir(target) func(empty, target) - assert fs.find(target, withdirs=True) == [] + assert fs.find(target, withdirs=True) == [make_path_posix(target)] # cp/get/put with slash, target directory exists assert fs.isdir(target) func(empty + "/", target) - assert fs.find(target, withdirs=True) == [] + assert fs.find(target, withdirs=True) == [make_path_posix(target)] fs.rmdir(target) diff --git a/fsspec/implementations/tests/test_memory.py b/fsspec/implementations/tests/test_memory.py index 5bf1131c9..05a40b287 100644 --- a/fsspec/implementations/tests/test_memory.py +++ b/fsspec/implementations/tests/test_memory.py @@ -316,12 +316,12 @@ def test_cp_empty_directory(m): # cp without slash, target directory exists assert m.isdir(target) m.cp(empty, target) - assert m.find(target, withdirs=True) == [] + assert m.find(target, withdirs=True) == [target] # cp with slash, target directory exists assert m.isdir(target) m.cp(empty + "/", target) - assert m.find(target, withdirs=True) == [] + assert m.find(target, withdirs=True) == [target] m.rmdir(target) diff --git a/fsspec/spec.py b/fsspec/spec.py index 457c082e2..ea6949c26 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -486,6 +486,12 @@ def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): # TODO: allow equivalent of -name parameter path = self._strip_protocol(path) out = dict() + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and self.isdir(path): + out[path] = self.info(path) + for _, dirs, files in self.walk(path, maxdepth, detail=True, **kwargs): if withdirs: files.update(dirs) @@ -534,40 +540,40 @@ def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): else: return sizes - def glob(self, path, **kwargs): + def glob(self, path, maxdepth=None, **kwargs): """ Find files by glob-matching. - If the path ends with '/' and does not contain "*", it is essentially - the same as ``ls(path)``, returning only files. + If the path ends with '/', only folders are returned. We support ``"**"``, ``"?"`` and ``"[..]"``. We do not support ^ for pattern negation. + The `maxdepth` option is applied on the first `**` found in the path. + Search path names that contain embedded characters special to this implementation of glob may not produce expected results; e.g., 'foo/bar/*starredfilename*'. kwargs are passed to ``ls``. """ + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + import re ends = path.endswith("/") path = self._strip_protocol(path) - indstar = path.find("*") if path.find("*") >= 0 else len(path) - indques = path.find("?") if path.find("?") >= 0 else len(path) - indbrace = path.find("[") if path.find("[") >= 0 else len(path) + idx_star = path.find("*") if path.find("*") >= 0 else len(path) + idx_qmark = path.find("?") if path.find("?") >= 0 else len(path) + idx_brace = path.find("[") if path.find("[") >= 0 else len(path) - ind = min(indstar, indques, indbrace) + min_idx = min(idx_star, idx_qmark, idx_brace) detail = kwargs.pop("detail", False) if not has_magic(path): - root = path - depth = 1 - if ends: - path += "/*" - elif self.exists(path): + if self.exists(path): if not detail: return [path] else: @@ -577,13 +583,21 @@ def glob(self, path, **kwargs): return [] # glob of non-existent returns empty else: return {} - elif "/" in path[:ind]: - ind2 = path[:ind].rindex("/") - root = path[: ind2 + 1] - depth = None if "**" in path else path[ind2 + 1 :].count("/") + 1 + elif "/" in path[:min_idx]: + min_idx = path[:min_idx].rindex("/") + root = path[: min_idx + 1] + depth = path[min_idx + 1 :].count("/") + 1 else: root = "" - depth = None if "**" in path else path[ind + 1 :].count("/") + 1 + depth = path[min_idx + 1 :].count("/") + 1 + + if "**" in path: + if maxdepth is not None: + idx_double_stars = path.find("**") + depth_double_stars = path[idx_double_stars:].count("/") + 1 + depth = depth - depth_double_stars + maxdepth + else: + depth = None allpaths = self.find(root, maxdepth=depth, withdirs=True, detail=True, **kwargs) # Escape characters special to python regex, leaving our supported @@ -609,14 +623,24 @@ def glob(self, path, **kwargs): ) + "$" ) - pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) + pattern = re.sub("/[*]{2}", "=SLASH_DOUBLE_STARS=", pattern) + pattern = re.sub("[*]{2}/?", "=DOUBLE_STARS=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) - pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) + pattern = re.sub("=SLASH_DOUBLE_STARS=", "(|/.*)", pattern) + pattern = re.sub("=DOUBLE_STARS=", ".*", pattern) + pattern = re.compile(pattern) + out = { p: allpaths[p] for p in sorted(allpaths) if pattern.match(p.replace("//", "/").rstrip("/")) } + + # Return directories only when the glob end by a slash + # This is needed for posix glob compliance + if ends: + out = {k: v for k, v in out.items() if v["type"] == "directory"} + if detail: return out else: @@ -918,7 +942,6 @@ def get( LocalFileSystem, make_path_posix, trailing_sep, - trailing_sep_maybe_asterisk, ) source_is_str = isinstance(rpath, str) @@ -931,14 +954,20 @@ def get( if isinstance(lpath, str): lpath = make_path_posix(lpath) - isdir = isinstance(lpath, str) and ( + + source_is_file = len(rpaths) == 1 + dest_is_dir = isinstance(lpath, str) and ( trailing_sep(lpath) or LocalFileSystem().isdir(lpath) ) + + exists = source_is_str and ( + (has_magic(rpath) and source_is_file) + or (not has_magic(rpath) and dest_is_dir and not trailing_sep(rpath)) + ) lpaths = other_paths( rpaths, lpath, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(rpath), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -988,7 +1017,6 @@ def put( LocalFileSystem, make_path_posix, trailing_sep, - trailing_sep_maybe_asterisk, ) source_is_str = isinstance(lpath, str) @@ -1002,17 +1030,24 @@ def put( if not lpaths: return - isdir = isinstance(rpath, str) and (trailing_sep(rpath) or self.isdir(rpath)) + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( + trailing_sep(rpath) or self.isdir(rpath) + ) + rpath = ( self._strip_protocol(rpath) if isinstance(rpath, str) else [self._strip_protocol(p) for p in rpath] ) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) rpaths = other_paths( lpaths, rpath, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(lpath), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -1045,7 +1080,7 @@ def copy( not-found exceptions will cause the path to be skipped; defaults to raise unless recursive is true, where the default is ignore """ - from .implementations.local import trailing_sep, trailing_sep_maybe_asterisk + from .implementations.local import trailing_sep if on_error is None and recursive: on_error = "ignore" @@ -1060,12 +1095,19 @@ def copy( if not paths: return - isdir = isinstance(path2, str) and (trailing_sep(path2) or self.isdir(path2)) + source_is_file = len(paths) == 1 + dest_is_dir = isinstance(path2, str) and ( + trailing_sep(path2) or self.isdir(path2) + ) + + exists = source_is_str and ( + (has_magic(path1) and source_is_file) + or (not has_magic(path1) and dest_is_dir and not trailing_sep(path1)) + ) path2 = other_paths( paths, path2, - exists=isdir and source_is_str and not trailing_sep_maybe_asterisk(path1), - is_dir=isdir, + exists=exists, flatten=not source_is_str, ) @@ -1093,7 +1135,7 @@ def expand_path(self, path, recursive=False, maxdepth=None, **kwargs): path = [self._strip_protocol(p) for p in path] for p in path: if has_magic(p): - bit = set(self.glob(p, **kwargs)) + bit = set(self.glob(p, maxdepth=maxdepth, **kwargs)) out |= bit if recursive: # glob call above expanded one depth so if maxdepth is defined diff --git a/fsspec/tests/abstract/__init__.py b/fsspec/tests/abstract/__init__.py index d2bc1627d..fc63e9ca9 100644 --- a/fsspec/tests/abstract/__init__.py +++ b/fsspec/tests/abstract/__init__.py @@ -26,6 +26,17 @@ def fs_bulk_operations_scenario_0(self, fs, fs_join, fs_path): yield source fs.rm(source, recursive=True) + @pytest.fixture + def fs_glob_edge_cases_files(self, fs, fs_join, fs_path): + """ + Scenario on remote filesystem that is used for glob edge cases cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._glob_edge_cases_files(fs, fs_join, fs_path) + yield source + fs.rm(source, recursive=True) + @pytest.fixture def fs_target(self, fs, fs_join, fs_path): """ @@ -49,6 +60,17 @@ def local_bulk_operations_scenario_0(self, local_fs, local_join, local_path): yield source local_fs.rm(source, recursive=True) + @pytest.fixture + def local_glob_edge_cases_files(self, local_fs, local_join, local_path): + """ + Scenario on local filesystem that is used for glob edge cases cp/get/put tests. + + Cleans up at the end of each test it which it is used. + """ + source = self._glob_edge_cases_files(local_fs, local_join, local_path) + yield source + local_fs.rm(source, recursive=True) + @pytest.fixture def local_target(self, local_fs, local_join, local_path): """ @@ -61,6 +83,39 @@ def local_target(self, local_fs, local_join, local_path): if local_fs.exists(target): local_fs.rm(target, recursive=True) + def _glob_edge_cases_files(self, some_fs, some_join, some_path): + """ + Scenario that is used for glob edge cases cp/get/put tests. + Creates the following directory and file structure: + + 📁 source + ├── 📄 file1 + ├── 📄 file2 + ├── 📁 subdir0 + │ ├── 📄 subfile1 + │ ├── 📄 subfile2 + │ └── 📁 nesteddir + │ └── 📄 nestedfile + └── 📁 subdir1 + ├── 📄 subfile1 + ├── 📄 subfile2 + └── 📁 nesteddir + └── 📄 nestedfile + """ + source = some_join(some_path, "source") + some_fs.touch(some_join(source, "file1")) + some_fs.touch(some_join(source, "file2")) + + for subdir_idx in range(2): + subdir = some_join(source, f"subdir{subdir_idx}") + nesteddir = some_join(subdir, "nesteddir") + some_fs.makedirs(nesteddir) + some_fs.touch(some_join(subdir, "subfile1")) + some_fs.touch(some_join(subdir, "subfile2")) + some_fs.touch(some_join(nesteddir, "nestedfile")) + + return source + def _bulk_operations_scenario_0(self, some_fs, some_join, some_path): """ Scenario that is used for many cp/get/put tests. Creates the following @@ -133,8 +188,13 @@ def local_join(self): def local_path(self, tmpdir): return tmpdir + @pytest.fixture def supports_empty_directories(self): """ Return whether this implementation supports empty directories. """ return True + + @pytest.fixture + def fs_sanitize_path(self): + return lambda x: x diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index 6498fd215..a5eb19038 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -1,13 +1,25 @@ +from itertools import product + +import pytest + +from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS + + class AbstractCopyTests: def test_copy_file_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1a source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file fs.touch(fs_join(target, "dummy")) assert fs.isdir(target) @@ -53,13 +65,22 @@ def test_copy_file_to_new_directory( assert fs.isfile(fs_join(target, "newdir", "subfile1")) def test_copy_file_to_file_in_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1c source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) fs.cp(fs_join(source, "subdir", "subfile1"), fs_join(target, "newfile")) assert fs.isfile(fs_join(target, "newfile")) @@ -80,14 +101,19 @@ def test_copy_file_to_file_in_new_directory( assert fs.isfile(fs_join(target, "newdir", "newfile")) def test_copy_directory_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1e source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -101,7 +127,7 @@ def test_copy_directory_to_existing_directory( # Without recursive does nothing fs.cp(s, t) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive fs.cp(s, t, recursive=True) @@ -112,7 +138,14 @@ def test_copy_directory_to_existing_directory( assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -121,7 +154,7 @@ def test_copy_directory_to_existing_directory( assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # Limit recursive by maxdepth fs.cp(s, t, recursive=True, maxdepth=1) @@ -131,7 +164,13 @@ def test_copy_directory_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -139,10 +178,15 @@ def test_copy_directory_to_existing_directory( assert not fs.exists(fs_join(target, "subdir", "nesteddir")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_copy_directory_to_new_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1f source = fs_bulk_operations_scenario_0 @@ -160,7 +204,11 @@ def test_copy_directory_to_new_directory( # Without recursive does nothing fs.cp(s, t) - assert fs.ls(target) == [] + if supports_empty_directories: + assert fs.ls(target) == [] + else: + with pytest.raises(FileNotFoundError): + fs.ls(target) # With recursive fs.cp(s, t, recursive=True) @@ -186,13 +234,23 @@ def test_copy_directory_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) def test_copy_glob_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 1g source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + dummy = fs_join(target, "dummy") + fs.touch(dummy) + assert fs.isdir(target) for target_slash in [False, True]: t = target + "/" if target_slash else target @@ -205,29 +263,51 @@ def test_copy_glob_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert fs.isdir(fs_join(target, "nesteddir")) - assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] + for glob, recursive in zip(["*", "**"], [True, False]): + fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) - # Limit recursive by maxdepth - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert not fs.exists(fs_join(target, "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) + + # Limit recursive by maxdepth + fs.cp( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_copy_glob_to_new_directory( self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target @@ -257,39 +337,92 @@ def test_copy_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert fs.isdir(fs_join(target, "newdir", "nesteddir")) - assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + for glob, recursive in zip(["*", "**"], [True, False]): + fs.cp(fs_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.cp( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_copy_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_glob_edge_cases_files, + fs_target, + fs_sanitize_path, + ): + # Copy scenario 1g + source = fs_glob_edge_cases_files - fs.rm(fs_join(target, "newdir"), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + target = fs_target - # Limit recursive by maxdepth - fs.cp(fs_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert not fs.exists(fs_join(target, "newdir", "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + for new_dir, target_slash in product([True, False], [True, False]): + fs.mkdir(target) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + t = fs_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.copy(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = fs.find(target) + if new_dir: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + fs.rm(target, recursive=True) + except FileNotFoundError: + pass def test_copy_list_of_files_to_existing_directory( - self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target + self, + fs, + fs_join, + fs_bulk_operations_scenario_0, + fs_target, + supports_empty_directories, ): # Copy scenario 2a source = fs_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -309,8 +442,15 @@ def test_copy_list_of_files_to_existing_directory( assert fs.isfile(fs_join(target, "file2")) assert fs.isfile(fs_join(target, "subfile1")) - fs.rm(fs.find(target)) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "file1"), + fs_join(target, "file2"), + fs_join(target, "subfile1"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_copy_list_of_files_to_new_directory( self, fs, fs_join, fs_bulk_operations_scenario_0, fs_target diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index baa9aa4a9..08c04e909 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -1,3 +1,11 @@ +from itertools import product + +import pytest + +from fsspec.implementations.local import make_path_posix +from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS + + class AbstractGetTests: def test_get_file_to_existing_directory( self, @@ -66,7 +74,6 @@ def test_get_file_to_file_in_existing_directory( self, fs, fs_join, - fs_path, fs_bulk_operations_scenario_0, local_fs, local_join, @@ -117,6 +124,7 @@ def test_get_directory_to_existing_directory( target = local_target local_fs.mkdir(target) + assert local_fs.isdir(target) for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") @@ -125,9 +133,8 @@ def test_get_directory_to_existing_directory( t = target + "/" if target_slash else target # Without recursive does nothing - # ERROR: erroneously creates new directory - # fs.get(s, t) - # assert fs.ls(target) == [] + fs.get(s, t) + assert local_fs.ls(target) == [] # With recursive fs.get(s, t, recursive=True) @@ -136,6 +143,7 @@ def test_get_directory_to_existing_directory( assert local_fs.isfile(local_join(target, "subfile2")) assert local_fs.isdir(local_join(target, "nesteddir")) assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) local_fs.rm( [ @@ -157,8 +165,29 @@ def test_get_directory_to_existing_directory( local_fs.rm(local_join(target, "subdir"), recursive=True) assert local_fs.ls(target) == [] - # Limit by maxdepth - # ERROR: maxdepth ignored here + # Limit recursive by maxdepth + fs.get(s, t, recursive=True, maxdepth=1) + if source_slash: + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert not local_fs.exists(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + else: + assert local_fs.isdir(local_join(target, "subdir")) + assert local_fs.isfile(local_join(target, "subdir", "subfile1")) + assert local_fs.isfile(local_join(target, "subdir", "subfile2")) + assert not local_fs.exists(local_join(target, "subdir", "nesteddir")) + + local_fs.rm(local_join(target, "subdir"), recursive=True) + assert local_fs.ls(target) == [] def test_get_directory_to_new_directory( self, @@ -184,9 +213,8 @@ def test_get_directory_to_new_directory( t += "/" # Without recursive does nothing - # ERROR: erroneously creates new directory - # fs.get(s, t) - # assert fs.ls(target) == [] + fs.get(s, t) + assert local_fs.ls(target) == [] # With recursive fs.get(s, t, recursive=True) @@ -197,12 +225,21 @@ def test_get_directory_to_new_directory( assert local_fs.isfile( local_join(target, "newdir", "nesteddir", "nestedfile") ) + assert not local_fs.exists(local_join(target, "subdir")) local_fs.rm(local_join(target, "newdir"), recursive=True) assert local_fs.ls(target) == [] - # Limit by maxdepth - # ERROR: maxdepth ignored here + # Limit recursive by maxdepth + fs.get(s, t, recursive=True, maxdepth=1) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) def test_get_glob_to_existing_directory( self, @@ -219,20 +256,62 @@ def test_get_glob_to_existing_directory( target = local_target local_fs.mkdir(target) - # for target_slash in [False, True]: - for target_slash in [False]: + for target_slash in [False, True]: t = target + "/" if target_slash else target # Without recursive fs.get(fs_join(source, "subdir", "*"), t) assert local_fs.isfile(local_join(target, "subfile1")) assert local_fs.isfile(local_join(target, "subfile2")) - # assert not local_fs.isdir(local_join(target, "nesteddir")) # ERROR - assert not local_fs.isdir(local_join(target, "subdir")) + assert not local_fs.isdir(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] # With recursive + for glob, recursive in zip(["*", "**"], [True, False]): + fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert local_fs.isdir(local_join(target, "nesteddir")) + assert local_fs.isfile(local_join(target, "nesteddir", "nestedfile")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + local_join(target, "nesteddir"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] - # Limit by maxdepth + # Limit recursive by maxdepth + fs.get( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert local_fs.isfile(local_join(target, "subfile1")) + assert local_fs.isfile(local_join(target, "subfile2")) + assert not local_fs.exists(local_join(target, "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + + local_fs.rm( + [ + local_join(target, "subfile1"), + local_join(target, "subfile2"), + ], + recursive=True, + ) + assert local_fs.ls(target) == [] def test_get_glob_to_new_directory( self, @@ -259,27 +338,91 @@ def test_get_glob_to_new_directory( assert local_fs.isdir(local_join(target, "newdir")) assert local_fs.isfile(local_join(target, "newdir", "subfile1")) assert local_fs.isfile(local_join(target, "newdir", "subfile2")) - # ERROR - do not copy empty directory - # assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists( + local_join(target, "newdir", "nesteddir", "nestedfile") + ) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) local_fs.rm(local_join(target, "newdir"), recursive=True) assert local_fs.ls(target) == [] # With recursive - fs.get(fs_join(source, "subdir", "*"), t, recursive=True) - assert local_fs.isdir(local_join(target, "newdir")) - assert local_fs.isfile(local_join(target, "newdir", "subfile1")) - assert local_fs.isfile(local_join(target, "newdir", "subfile2")) - assert local_fs.isdir(local_join(target, "newdir", "nesteddir")) - assert local_fs.isfile( - local_join(target, "newdir", "nesteddir", "nestedfile") - ) + for glob, recursive in zip(["*", "**"], [True, False]): + fs.get(fs_join(source, "subdir", glob), t, recursive=recursive) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert local_fs.isdir(local_join(target, "newdir", "nesteddir")) + assert local_fs.isfile( + local_join(target, "newdir", "nesteddir", "nestedfile") + ) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) - local_fs.rm(local_join(target, "newdir"), recursive=True) - assert local_fs.ls(target) == [] + local_fs.rm(local_join(target, "newdir"), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.get( + fs_join(source, "subdir", glob), t, recursive=recursive, maxdepth=1 + ) + assert local_fs.isdir(local_join(target, "newdir")) + assert local_fs.isfile(local_join(target, "newdir", "subfile1")) + assert local_fs.isfile(local_join(target, "newdir", "subfile2")) + assert not local_fs.exists(local_join(target, "newdir", "nesteddir")) + assert not local_fs.exists(local_join(target, "subdir")) + assert not local_fs.exists(local_join(target, "newdir", "subdir")) + + local_fs.rm(local_fs.ls(target, detail=False), recursive=True) + assert not local_fs.exists(local_join(target, "newdir")) + + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_get_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_glob_edge_cases_files, + local_fs, + local_join, + local_target, + ): + # Copy scenario 1g + source = fs_glob_edge_cases_files - # Limit by maxdepth - # ERROR: this is not correct + target = local_target + + for new_dir, target_slash in product([True, False], [True, False]): + local_fs.mkdir(target) + + t = local_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.get(fs_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = local_fs.find(target) + if new_dir: + prefixed_expected = [ + make_path_posix(local_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + make_path_posix(local_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + local_fs.rm(target, recursive=True) + except FileNotFoundError: + pass def test_get_list_of_files_to_existing_directory( self, @@ -310,7 +453,14 @@ def test_get_list_of_files_to_existing_directory( assert local_fs.isfile(local_join(target, "file2")) assert local_fs.isfile(local_join(target, "subfile1")) - local_fs.rm(local_fs.find(target)) + local_fs.rm( + [ + local_join(target, "file1"), + local_join(target, "file2"), + local_join(target, "subfile1"), + ], + recursive=True, + ) assert local_fs.ls(target) == [] def test_get_list_of_files_to_new_directory( @@ -358,13 +508,13 @@ def test_get_directory_recursive( fs.get(src, target, recursive=True) assert local_fs.isdir(target) - if loop == 0: - assert local_fs.isfile(local_join(target, "file")) - assert not local_fs.exists(local_join(target, "src")) - else: - assert local_fs.isfile(local_join(target, "file")) - assert local_fs.isdir(local_join(target, "src")) - assert local_fs.isfile(local_join(target, "src", "file")) + if loop == 0: + assert local_fs.isfile(local_join(target, "file")) + assert not local_fs.exists(local_join(target, "src")) + else: + assert local_fs.isfile(local_join(target, "file")) + assert local_fs.isdir(local_join(target, "src")) + assert local_fs.isfile(local_join(target, "src", "file")) local_fs.rm(target, recursive=True) diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index d06f9d9b5..a92bc4a13 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -1,3 +1,10 @@ +from itertools import product + +import pytest + +from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS + + class AbstractPutTests: def test_put_file_to_existing_directory( self, @@ -6,13 +13,14 @@ def test_put_file_to_existing_directory( fs_target, local_join, local_bulk_operations_scenario_0, + supports_empty_directories, ): # Copy scenario 1a source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file fs.touch(fs_join(target, "dummy")) assert fs.isdir(target) @@ -58,13 +66,23 @@ def test_put_file_to_new_directory( assert fs.isfile(fs_join(target, "newdir", "subfile1")) def test_put_file_to_file_in_existing_directory( - self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_join, + supports_empty_directories, + local_bulk_operations_scenario_0, ): # Copy scenario 1c source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) + if not supports_empty_directories: + # Force target directory to exist by adding a dummy file + fs.touch(fs_join(target, "dummy")) + assert fs.isdir(target) fs.put(local_join(source, "subdir", "subfile1"), fs_join(target, "newfile")) assert fs.isfile(fs_join(target, "newfile")) @@ -86,14 +104,19 @@ def test_put_file_to_file_in_new_directory( assert fs.isfile(fs_join(target, "newdir", "newfile")) def test_put_directory_to_existing_directory( - self, fs, fs_join, fs_target, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_bulk_operations_scenario_0, + supports_empty_directories, ): # Copy scenario 1e source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -107,7 +130,7 @@ def test_put_directory_to_existing_directory( # Without recursive does nothing fs.put(s, t) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive fs.put(s, t, recursive=True) @@ -118,7 +141,14 @@ def test_put_directory_to_existing_directory( assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -127,7 +157,7 @@ def test_put_directory_to_existing_directory( assert fs.isfile(fs_join(target, "subdir", "nesteddir", "nestedfile")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # Limit recursive by maxdepth fs.put(s, t, recursive=True, maxdepth=1) @@ -137,7 +167,13 @@ def test_put_directory_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) else: assert fs.isdir(fs_join(target, "subdir")) assert fs.isfile(fs_join(target, "subdir", "subfile1")) @@ -145,21 +181,21 @@ def test_put_directory_to_existing_directory( assert not fs.exists(fs_join(target, "subdir", "nesteddir")) fs.rm(fs_join(target, "subdir"), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_put_directory_to_new_directory( - self, fs, fs_join, fs_target, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_bulk_operations_scenario_0, + supports_empty_directories, ): # Copy scenario 1f source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): - # Force target directory to exist by adding a dummy file - dummy = fs_join(target, "dummy") - fs.touch(dummy) - assert fs.isdir(target) for source_slash, target_slash in zip([False, True], [False, True]): s = fs_join(source, "subdir") @@ -171,7 +207,11 @@ def test_put_directory_to_new_directory( # Without recursive does nothing fs.put(s, t) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + if supports_empty_directories: + assert fs.ls(target) == [] + else: + with pytest.raises(FileNotFoundError): + fs.ls(target) # With recursive fs.put(s, t, recursive=True) @@ -197,14 +237,20 @@ def test_put_directory_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) def test_put_glob_to_existing_directory( - self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 + self, + fs, + fs_join, + fs_target, + local_join, + supports_empty_directories, + local_bulk_operations_scenario_0, ): # Copy scenario 1g source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -221,29 +267,54 @@ def test_put_glob_to_existing_directory( assert not fs.exists(fs_join(target, "nesteddir", "nestedfile")) assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) # With recursive - fs.put(local_join(source, "subdir", "*"), t, recursive=True) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert fs.isdir(fs_join(target, "nesteddir")) - assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + for glob, recursive in zip(["*", "**"], [True, False]): + fs.put(local_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert fs.isdir(fs_join(target, "nesteddir")) + assert fs.isfile(fs_join(target, "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) - # Limit recursive by maxdepth - fs.put(local_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isfile(fs_join(target, "subfile1")) - assert fs.isfile(fs_join(target, "subfile2")) - assert not fs.exists(fs_join(target, "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + fs_join(target, "nesteddir"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) + + # Limit recursive by maxdepth + fs.put( + local_join(source, "subdir", glob), + t, + recursive=recursive, + maxdepth=1, + ) + assert fs.isfile(fs_join(target, "subfile1")) + assert fs.isfile(fs_join(target, "subfile2")) + assert not fs.exists(fs_join(target, "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) - fs.rm(fs.ls(target, detail=False), recursive=True) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "subfile1"), + fs_join(target, "subfile2"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_put_glob_to_new_directory( self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 @@ -253,11 +324,6 @@ def test_put_glob_to_new_directory( target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): - # Force target directory to exist by adding a dummy file - dummy = fs_join(target, "dummy") - fs.touch(dummy) - assert fs.isdir(target) for target_slash in [False, True]: t = fs_join(target, "newdir") @@ -278,29 +344,81 @@ def test_put_glob_to_new_directory( assert not fs.exists(fs_join(target, "newdir")) # With recursive - fs.put(local_join(source, "subdir", "*"), t, recursive=True) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert fs.isdir(fs_join(target, "newdir", "nesteddir")) - assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + for glob, recursive in zip(["*", "**"], [True, False]): + fs.put(local_join(source, "subdir", glob), t, recursive=recursive) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert fs.isdir(fs_join(target, "newdir", "nesteddir")) + assert fs.isfile(fs_join(target, "newdir", "nesteddir", "nestedfile")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) + + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) + + # Limit recursive by maxdepth + fs.put( + local_join(source, "subdir", glob), + t, + recursive=recursive, + maxdepth=1, + ) + assert fs.isdir(fs_join(target, "newdir")) + assert fs.isfile(fs_join(target, "newdir", "subfile1")) + assert fs.isfile(fs_join(target, "newdir", "subfile2")) + assert not fs.exists(fs_join(target, "newdir", "nesteddir")) + assert not fs.exists(fs_join(target, "subdir")) + assert not fs.exists(fs_join(target, "newdir", "subdir")) - fs.rm(fs_join(target, "newdir"), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + fs.rm(fs_join(target, "newdir"), recursive=True) + assert not fs.exists(fs_join(target, "newdir")) - # Limit recursive by maxdepth - fs.put(local_join(source, "subdir", "*"), t, recursive=True, maxdepth=1) - assert fs.isdir(fs_join(target, "newdir")) - assert fs.isfile(fs_join(target, "newdir", "subfile1")) - assert fs.isfile(fs_join(target, "newdir", "subfile2")) - assert not fs.exists(fs_join(target, "newdir", "nesteddir")) - assert not fs.exists(fs_join(target, "subdir")) - assert not fs.exists(fs_join(target, "newdir", "subdir")) + @pytest.mark.parametrize( + GLOB_EDGE_CASES_TESTS["argnames"], + GLOB_EDGE_CASES_TESTS["argvalues"], + ) + def test_put_glob_edge_cases( + self, + path, + recursive, + maxdepth, + expected, + fs, + fs_join, + fs_target, + local_glob_edge_cases_files, + local_join, + fs_sanitize_path, + ): + # Copy scenario 1g + source = local_glob_edge_cases_files - fs.rm(fs_join(target, "newdir"), recursive=True) - assert not fs.exists(fs_join(target, "newdir")) + target = fs_target + + for new_dir, target_slash in product([True, False], [True, False]): + fs.mkdir(target) + + t = fs_join(target, "newdir") if new_dir else target + t = t + "/" if target_slash else t + + fs.put(local_join(source, path), t, recursive=recursive, maxdepth=maxdepth) + + output = fs.find(target) + if new_dir: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, "newdir", p)) for p in expected + ] + else: + prefixed_expected = [ + fs_sanitize_path(fs_join(target, p)) for p in expected + ] + assert sorted(output) == sorted(prefixed_expected) + + try: + fs.rm(target, recursive=True) + except FileNotFoundError: + pass def test_put_list_of_files_to_existing_directory( self, @@ -309,14 +427,14 @@ def test_put_list_of_files_to_existing_directory( fs_target, local_join, local_bulk_operations_scenario_0, - fs_path, + supports_empty_directories, ): # Copy scenario 2a source = local_bulk_operations_scenario_0 target = fs_target fs.mkdir(target) - if not self.supports_empty_directories(): + if not supports_empty_directories: # Force target directory to exist by adding a dummy file dummy = fs_join(target, "dummy") fs.touch(dummy) @@ -336,8 +454,15 @@ def test_put_list_of_files_to_existing_directory( assert fs.isfile(fs_join(target, "file2")) assert fs.isfile(fs_join(target, "subfile1")) - fs.rm(fs.find(target)) - assert fs.ls(target) == [] if self.supports_empty_directories() else [dummy] + fs.rm( + [ + fs_join(target, "file1"), + fs_join(target, "file2"), + fs_join(target, "subfile1"), + ], + recursive=True, + ) + assert fs.ls(target) == ([] if supports_empty_directories else [dummy]) def test_put_list_of_files_to_new_directory( self, fs, fs_join, fs_target, local_join, local_bulk_operations_scenario_0 diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py index 9fdf25b7a..544e8a0e0 100644 --- a/fsspec/tests/conftest.py +++ b/fsspec/tests/conftest.py @@ -18,6 +18,182 @@ ).read() win = os.name == "nt" +GLOB_EDGE_CASES_TESTS = { + "argnames": ("path", "recursive", "maxdepth", "expected"), + "argvalues": [ + ("fil?1", False, None, ["file1"]), + ("fil?1", True, None, ["file1"]), + ("file[1-2]", False, None, ["file1", "file2"]), + ("file[1-2]", True, None, ["file1", "file2"]), + ("*", False, None, ["file1", "file2"]), + ( + "*", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*", True, 1, ["file1", "file2"]), + ( + "*", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("*1", False, None, ["file1"]), + ( + "*1", + True, + None, + [ + "file1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]), + ( + "**", + False, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**", True, 1, ["file1", "file2"]), + ( + "**", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + False, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("**1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ( + "**1", + True, + None, + [ + "file1", + "subdir0/subfile1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**1", True, 1, ["file1"]), + ( + "**1", + True, + 2, + ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"], + ), + ("**1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ("**/subdir0", False, None, []), + ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("**/subdir0/nested*", False, 2, []), + ("**/subdir0/nested*", True, 2, ["nestedfile"]), + ("subdir[1-2]", False, None, []), + ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]), + ("subdir[0-1]", False, None, []), + ( + "subdir[0-1]", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + False, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ], +} + @pytest.fixture def reset_files(): @@ -72,7 +248,7 @@ def do_GET(self): else: # suffix only l = len(file_data) - content_range = f"bytes {l-int(end)}-{l-1}/{l}" + content_range = f"bytes {l - int(end)}-{l - 1}/{l}" file_data = file_data[-int(end) :] if "use_206" in self.headers: status = 206 diff --git a/fsspec/tests/test_generic.py b/fsspec/tests/test_generic.py index 4eff3f434..aa8d9bfc3 100644 --- a/fsspec/tests/test_generic.py +++ b/fsspec/tests/test_generic.py @@ -64,6 +64,7 @@ def test_rsync(tmpdir, m): assert set(allfiles) == { f"file://{pos_tmpdir}{_}" for _ in [ + "", "/deep", "/deep/path", "/deep/path/afile", @@ -76,6 +77,7 @@ def test_rsync(tmpdir, m): assert set(allfiles2) == { f"file://{pos_tmpdir}{_}" for _ in [ + "", "/deep", "/deep/path", "/deep/path/afile", diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index 68be18c66..f095ebdf1 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -1,6 +1,9 @@ +import glob import json import os import pickle +import subprocess +import sys from collections import defaultdict import numpy as np @@ -9,8 +12,371 @@ import fsspec from fsspec.implementations.ftp import FTPFileSystem from fsspec.implementations.http import HTTPFileSystem +from fsspec.implementations.local import LocalFileSystem from fsspec.spec import AbstractBufferedFile, AbstractFileSystem +PATHS_FOR_GLOB_TESTS = ( + {"name": "test0.json", "type": "file", "size": 100}, + {"name": "test0.yaml", "type": "file", "size": 100}, + {"name": "test0", "type": "directory", "size": 0}, + {"name": "test0/test0.json", "type": "file", "size": 100}, + {"name": "test0/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test1", "type": "directory", "size": 0}, + {"name": "test0/test1/test0.json", "type": "file", "size": 100}, + {"name": "test0/test1/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test1/test2", "type": "directory", "size": 0}, + {"name": "test0/test1/test2/test0.json", "type": "file", "size": 100}, + {"name": "test0/test1/test2/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test2", "type": "directory", "size": 0}, + {"name": "test0/test2/test0.json", "type": "file", "size": 100}, + {"name": "test0/test2/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test2/test1", "type": "directory", "size": 0}, + {"name": "test0/test2/test1/test0.json", "type": "file", "size": 100}, + {"name": "test0/test2/test1/test0.yaml", "type": "file", "size": 100}, + {"name": "test0/test2/test1/test3", "type": "directory", "size": 0}, + {"name": "test0/test2/test1/test3/test0.json", "type": "file", "size": 100}, + {"name": "test0/test2/test1/test3/test0.yaml", "type": "file", "size": 100}, + {"name": "test1.json", "type": "file", "size": 100}, + {"name": "test1.yaml", "type": "file", "size": 100}, + {"name": "test1", "type": "directory", "size": 0}, + {"name": "test1/test0.json", "type": "file", "size": 100}, + {"name": "test1/test0.yaml", "type": "file", "size": 100}, + {"name": "test1/test0", "type": "directory", "size": 0}, + {"name": "test1/test0/test0.json", "type": "file", "size": 100}, + {"name": "test1/test0/test0.yaml", "type": "file", "size": 100}, + {"name": "special_chars", "type": "directory", "size": 0}, + {"name": "special_chars/f\\oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f.oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f+oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f(oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f)oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f|oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f^oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f$oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f{oo.txt", "type": "file", "size": 100}, + {"name": "special_chars/f}oo.txt", "type": "file", "size": 100}, +) + +GLOB_POSIX_TESTS = { + "argnames": ("path", "expected"), + "argvalues": [ + ("nonexistent", []), + ("test0.json", ["test0.json"]), + ("test0", ["test0"]), + ("test0/", ["test0"]), + ("test1/test0.yaml", ["test1/test0.yaml"]), + ("test0/test[1-2]", ["test0/test1", "test0/test2"]), + ("test0/test[1-2]/", ["test0/test1", "test0/test2"]), + ( + "test0/test[1-2]/*", + [ + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + ], + ), + ( + "test0/test[1-2]/*.[j]*", + ["test0/test1/test0.json", "test0/test2/test0.json"], + ), + ("special_chars/f\\oo.*", ["special_chars/f\\oo.txt"]), + ("special_chars/f.oo.*", ["special_chars/f.oo.txt"]), + ("special_chars/f+oo.*", ["special_chars/f+oo.txt"]), + ("special_chars/f(oo.*", ["special_chars/f(oo.txt"]), + ("special_chars/f)oo.*", ["special_chars/f)oo.txt"]), + ("special_chars/f|oo.*", ["special_chars/f|oo.txt"]), + ("special_chars/f^oo.*", ["special_chars/f^oo.txt"]), + ("special_chars/f$oo.*", ["special_chars/f$oo.txt"]), + ("special_chars/f{oo.*", ["special_chars/f{oo.txt"]), + ("special_chars/f}oo.*", ["special_chars/f}oo.txt"]), + ( + "*", + [ + "special_chars", + "test0.json", + "test0.yaml", + "test0", + "test1.json", + "test1.yaml", + "test1", + ], + ), + ("*.yaml", ["test0.yaml", "test1.yaml"]), + ( + "**", + [ + "special_chars", + "special_chars/f$oo.txt", + "special_chars/f(oo.txt", + "special_chars/f)oo.txt", + "special_chars/f+oo.txt", + "special_chars/f.oo.txt", + "special_chars/f\\oo.txt", + "special_chars/f^oo.txt", + "special_chars/f{oo.txt", + "special_chars/f|oo.txt", + "special_chars/f}oo.txt", + "test0.json", + "test0.yaml", + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + "test1.json", + "test1.yaml", + "test1", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0", + "test1/test0/test0.json", + "test1/test0/test0.yaml", + ], + ), + ("*/", ["special_chars", "test0", "test1"]), + ( + "**/", + [ + "special_chars", + "test0", + "test0/test1", + "test0/test1/test2", + "test0/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + "test1", + "test1/test0", + ], + ), + ("*/*.yaml", ["test0/test0.yaml", "test1/test0.yaml"]), + ( + "**/*.yaml", + [ + "test0.yaml", + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + "test1.yaml", + "test1/test0.yaml", + "test1/test0/test0.yaml", + ], + ), + ( + "*/test1/*", + ["test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2"], + ), + ("*/test1/*.yaml", ["test0/test1/test0.yaml"]), + ( + "**/test1/*", + [ + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0", + ], + ), + ( + "**/test1/*.yaml", + [ + "test0/test1/test0.yaml", + "test0/test2/test1/test0.yaml", + "test1/test0.yaml", + ], + ), + ("*/test1/*/", ["test0/test1/test2"]), + ( + "**/test1/*/", + ["test0/test1/test2", "test0/test2/test1/test3", "test1/test0"], + ), + ( + "*/test1/**", + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + ], + ), + ( + "**/test1/**", + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + "test1", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0", + "test1/test0/test0.json", + "test1/test0/test0.yaml", + ], + ), + ("*/test1/**/", ["test0/test1", "test0/test1/test2"]), + ( + "**/test1/**/", + [ + "test0/test1", + "test0/test1/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + "test1", + "test1/test0", + ], + ), + ( + "test0/*", + ["test0/test0.json", "test0/test0.yaml", "test0/test1", "test0/test2"], + ), + ("test0/*.yaml", ["test0/test0.yaml"]), + ( + "test0/**", + [ + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ("test0/*/", ["test0/test1", "test0/test2"]), + ( + "test0/**/", + [ + "test0", + "test0/test1", + "test0/test1/test2", + "test0/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + ], + ), + ("test0/*/*.yaml", ["test0/test1/test0.yaml", "test0/test2/test0.yaml"]), + ( + "test0/**/*.yaml", + [ + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ( + "test0/*/test1/*", + [ + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + ], + ), + ("test0/*/test1/*.yaml", ["test0/test2/test1/test0.yaml"]), + ( + "test0/**/test1/*", + [ + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + ], + ), + ( + "test0/**/test1/*.yaml", + ["test0/test1/test0.yaml", "test0/test2/test1/test0.yaml"], + ), + ("test0/*/test1/*/", ["test0/test2/test1/test3"]), + ("test0/**/test1/*/", ["test0/test1/test2", "test0/test2/test1/test3"]), + ( + "test0/*/test1/**", + [ + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ( + "test0/**/test1/**", + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.json", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3", + "test0/test2/test1/test3/test0.json", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ("test0/*/test1/**/", ["test0/test2/test1", "test0/test2/test1/test3"]), + ( + "test0/**/test1/**/", + [ + "test0/test1", + "test0/test1/test2", + "test0/test2/test1", + "test0/test2/test1/test3", + ], + ), + ], +} + class DummyTestFS(AbstractFileSystem): protocol = "mock" @@ -43,17 +409,13 @@ class DummyTestFS(AbstractFileSystem): }, {"name": "misc", "type": "directory"}, {"name": "misc/foo.txt", "type": "file", "size": 100}, - {"name": "glob_test", "type": "directory", "size": 0}, - {"name": "glob_test/hat", "type": "directory", "size": 0}, - {"name": "glob_test/hat/^foo.txt", "type": "file", "size": 100}, - {"name": "glob_test/dollar", "type": "directory", "size": 0}, - {"name": "glob_test/dollar/$foo.txt", "type": "file", "size": 100}, - {"name": "glob_test/lbrace", "type": "directory", "size": 0}, - {"name": "glob_test/lbrace/{foo.txt", "type": "file", "size": 100}, - {"name": "glob_test/rbrace", "type": "directory", "size": 0}, - {"name": "glob_test/rbrace/}foo.txt", "type": "file", "size": 100}, ) + def __init__(self, fs_content=None, **kwargs): + if fs_content is not None: + self._fs_contents = fs_content + super().__init__(**kwargs) + def __getitem__(self, name): for item in self._fs_contents: if item["name"] == name: @@ -107,69 +469,123 @@ def _open( @pytest.mark.parametrize( - "test_path, expected", + ["test_paths", "recursive", "maxdepth", "expected"], [ ( - "mock://top_level/second_level/date=2019-10-01/a.parquet", - ["top_level/second_level/date=2019-10-01/a.parquet"], + ( + "top_level/second_level", + "top_level/sec*", + "top_level/sec*vel", + "top_level/*", + ), + True, + None, + [ + "top_level/second_level", + "top_level/second_level/date=2019-10-01", + "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", + "top_level/second_level/date=2019-10-04/a.parquet", + ], + ), + ( + ( + "top_level/second_level", + "top_level/sec*", + "top_level/sec*vel", + "top_level/*", + ), + False, + None, + [ + "top_level/second_level", + ], + ), + ( + ("top_level/second_level",), + True, + 1, + [ + "top_level/second_level", + "top_level/second_level/date=2019-10-01", + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-04", + ], ), ( - "mock://top_level/second_level/date=2019-10-01/*", + ("top_level/second_level",), + True, + 2, [ + "top_level/second_level", + "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", + "top_level/second_level/date=2019-10-04/a.parquet", ], ), - ("mock://top_level/second_level/date=2019-10", []), ( - "mock://top_level/second_level/date=2019-10-0[1-4]", + ("top_level/*", "top_level/sec*", "top_level/sec*vel", "top_level/*"), + True, + 1, + ["top_level/second_level"], + ), + ( + ("top_level/*", "top_level/sec*", "top_level/sec*vel", "top_level/*"), + True, + 2, [ + "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-04", ], ), ( - "mock://top_level/second_level/date=2019-10-0[1-4]/*", + ("top_level/**",), + False, + None, [ + "top_level", + "top_level/second_level", + "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( - "mock://top_level/second_level/date=2019-10-0[1-4]/[a].*", + ("top_level/**",), + True, + None, [ + "top_level", + "top_level/second_level", + "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-01/b.parquet", + "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), - ("mock://glob_test/hat/^foo.*", ["glob_test/hat/^foo.txt"]), - ("mock://glob_test/dollar/$foo.*", ["glob_test/dollar/$foo.txt"]), - ("mock://glob_test/lbrace/{foo.*", ["glob_test/lbrace/{foo.txt"]), - ("mock://glob_test/rbrace/}foo.*", ["glob_test/rbrace/}foo.txt"]), - ], -) -def test_glob(test_path, expected): - test_fs = DummyTestFS() - res = test_fs.glob(test_path) - res = sorted(res) # FIXME: py35 back-compat - assert res == expected - res = test_fs.glob(test_path, detail=True) - assert isinstance(res, dict) - assert sorted(res) == expected # FIXME: py35 back-compat - for name, info in res.items(): - assert info == test_fs[name] - - -@pytest.mark.parametrize( - ["test_paths", "expected"], - [ + (("top_level/**",), True, 1, ["top_level", "top_level/second_level"]), ( - ("top_level/second_level", "top_level/sec*", "top_level/*"), + ("top_level/**",), + True, + 2, [ + "top_level", "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", @@ -180,58 +596,87 @@ def test_glob(test_path, expected): "top_level/second_level/date=2019-10-04/a.parquet", ], ), - (("misc/foo.txt", "misc/*.txt"), ["misc/foo.txt"]), + ( + ("top_level/**/a.*",), + False, + None, + [ + "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04/a.parquet", + ], + ), + ( + ("top_level/**/a.*",), + True, + None, + [ + "top_level/second_level/date=2019-10-01/a.parquet", + "top_level/second_level/date=2019-10-02/a.parquet", + "top_level/second_level/date=2019-10-04/a.parquet", + ], + ), + ( + ("top_level/**/second_level/date=2019-10-02",), + False, + 2, + [ + "top_level/second_level/date=2019-10-02", + ], + ), + ( + ("top_level/**/second_level/date=2019-10-02",), + True, + 2, + [ + "top_level/second_level/date=2019-10-02", + "top_level/second_level/date=2019-10-02/a.parquet", + ], + ), + [("misc/foo.txt", "misc/*.txt"), False, None, ["misc/foo.txt"]], + [("misc/foo.txt", "misc/*.txt"), True, None, ["misc/foo.txt"]], ( ("",), + False, + None, + [DummyTestFS.root_marker], + ), + ( + ("",), + True, + None, DummyTestFS.get_test_paths() + [DummyTestFS.root_marker], ), ], - # ids=["all_second_level", "single_file"], ) -def test_expand_path_recursive(test_paths, expected): +def test_expand_path(test_paths, recursive, maxdepth, expected): """Test a number of paths and then their combination which should all yield the same set of expanded paths""" test_fs = DummyTestFS() # test single query for test_path in test_paths: - paths = test_fs.expand_path(test_path, recursive=True) + paths = test_fs.expand_path(test_path, recursive=recursive, maxdepth=maxdepth) assert sorted(paths) == sorted(expected) # test with all queries - paths = test_fs.expand_path(list(test_paths), recursive=True) + paths = test_fs.expand_path( + list(test_paths), recursive=recursive, maxdepth=maxdepth + ) assert sorted(paths) == sorted(expected) - # test with maxdepth - assert test_fs.expand_path("top_level", recursive=True, maxdepth=1) == [ - "top_level", - "top_level/second_level", - ] - - assert test_fs.expand_path("top_level", recursive=True, maxdepth=2) == [ - "top_level", - "top_level/second_level", - "top_level/second_level/date=2019-10-01", - "top_level/second_level/date=2019-10-02", - "top_level/second_level/date=2019-10-04", - ] - - assert test_fs.expand_path("top_level", recursive=True, maxdepth=3) == [ - "top_level", - "top_level/second_level", - "top_level/second_level/date=2019-10-01", - "top_level/second_level/date=2019-10-01/a.parquet", - "top_level/second_level/date=2019-10-01/b.parquet", - "top_level/second_level/date=2019-10-02", - "top_level/second_level/date=2019-10-02/a.parquet", - "top_level/second_level/date=2019-10-04", - "top_level/second_level/date=2019-10-04/a.parquet", - ] + +def test_expand_paths_with_wrong_args(): + test_fs = DummyTestFS() with pytest.raises(ValueError): test_fs.expand_path("top_level", recursive=True, maxdepth=0) with pytest.raises(ValueError): test_fs.expand_path("top_level", maxdepth=0) + with pytest.raises(FileNotFoundError): + test_fs.expand_path("top_level/**/second_level/date=2019-10-02", maxdepth=1) + with pytest.raises(FileNotFoundError): + test_fs.expand_path("nonexistent/*") @pytest.mark.xfail @@ -342,7 +787,6 @@ class UploadError(ValueError): ... class DummyBufferedFile(AbstractBufferedFile): - can_initiate = False def _initiate_upload(self): @@ -611,3 +1055,274 @@ def check_events(lpaths, rpaths): fs.get(base, dest, callback=callback) check_events(base, dest) callback.events.clear() + + +def _clean_paths(paths, prefix=""): + """ + Helper to cleanup paths results by doing the following: + - remove the prefix provided from all paths + - remove the trailing slashes from all paths + - remove duplicates paths + - sort all paths + """ + paths_list = paths + if isinstance(paths, dict): + paths_list = list(paths) + paths_list = [p.replace(prefix, "").strip("/") for p in sorted(set(paths_list))] + if isinstance(paths, dict): + return {p: paths[p] for p in paths_list} + return paths_list + + +@pytest.fixture(scope="function") +def glob_fs(): + return DummyTestFS(fs_content=PATHS_FOR_GLOB_TESTS) + + +@pytest.fixture(scope="function") +def glob_files_folder(tmp_path): + local_fs = LocalFileSystem(auto_mkdir=True) + local_fake_dir = str(tmp_path) + for path_info in PATHS_FOR_GLOB_TESTS: + if path_info["type"] == "file": + local_fs.touch(path=f"{str(tmp_path)}/{path_info['name']}") + return local_fake_dir + + +@pytest.mark.skipif( + sys.platform.startswith("win"), + reason="no need to run python glob posix tests on windows", +) +@pytest.mark.parametrize( + GLOB_POSIX_TESTS["argnames"], + GLOB_POSIX_TESTS["argvalues"], +) +def test_posix_tests_python_glob(path, expected, glob_files_folder): + """ + Tests against python glob to check if our posix tests are accurate. + """ + os.chdir(glob_files_folder) + + python_output = glob.glob(pathname=path, recursive=True) + assert _clean_paths(python_output, glob_files_folder) == _clean_paths(expected) + + +@pytest.mark.skipif( + sys.platform.startswith("win"), + reason="no need to run bash stat posix tests on windows", +) +@pytest.mark.parametrize( + GLOB_POSIX_TESTS["argnames"], + GLOB_POSIX_TESTS["argvalues"], +) +def test_posix_tests_bash_stat(path, expected, glob_files_folder): + """ + Tests against bash stat to check if our posix tests are accurate. + """ + try: + subprocess.check_output(["bash", "-c", "shopt -s globstar"]) + except FileNotFoundError: + pytest.skip("bash is not available") + except subprocess.CalledProcessError: + pytest.skip("globstar option is not available") + + bash_path = ( + path.replace("\\", "\\\\") + .replace("$", "\\$") + .replace("(", "\\(") + .replace(")", "\\)") + .replace("|", "\\|") + ) + bash_output = subprocess.run( + [ + "bash", + "-c", + f"cd {glob_files_folder} && shopt -s globstar && stat -c %N {bash_path}", + ], + capture_output=True, + ) + # Remove the last element always empty + bash_output = bash_output.stdout.decode("utf-8").replace("'", "").split("\n")[:-1] + assert _clean_paths(bash_output, glob_files_folder) == _clean_paths(expected) + + +@pytest.mark.parametrize( + GLOB_POSIX_TESTS["argnames"], + GLOB_POSIX_TESTS["argvalues"], +) +def test_glob_posix_rules(path, expected, glob_fs): + output = glob_fs.glob(path=f"mock://{path}") + assert _clean_paths(output) == _clean_paths(expected) + + detailed_output = glob_fs.glob(path=f"mock://{path}", detail=True) + for name, info in _clean_paths(detailed_output).items(): + assert info == glob_fs[name] + + +@pytest.mark.parametrize( + ("path", "maxdepth", "expected"), + [ + ( + "test1**", + None, + [ + "test1", + "test1.json", + "test1.yaml", + "test1/test0", + "test1/test0.json", + "test1/test0.yaml", + "test1/test0/test0.json", + "test1/test0/test0.yaml", + ], + ), + ("test1**/", None, ["test1", "test1/test0"]), + ( + "**.yaml", + None, + [ + "test0.yaml", + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + "test1.yaml", + "test1/test0.yaml", + "test1/test0/test0.yaml", + ], + ), + ("**1/", None, ["test0/test1", "test0/test2/test1", "test1"]), + ( + "**1/*.yaml", + None, + [ + "test0/test1/test0.yaml", + "test0/test2/test1/test0.yaml", + "test1/test0.yaml", + ], + ), + ( + "test0**1**.yaml", + None, + [ + "test0/test1/test2/test0.yaml", + "test0/test1/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ( + "test0/t**.yaml", + None, + [ + "test0/test0.yaml", + "test0/test1/test0.yaml", + "test0/test1/test2/test0.yaml", + "test0/test2/test0.yaml", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test3/test0.yaml", + ], + ), + ("test0/t**1/", None, ["test0/test1", "test0/test2/test1"]), + ( + "test0/t**1/*.yaml", + None, + ["test0/test1/test0.yaml", "test0/test2/test1/test0.yaml"], + ), + ( + "test0/**", + 1, + [ + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test2", + ], + ), + ( + "test0/**", + 2, + [ + "test0", + "test0/test0.json", + "test0/test0.yaml", + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + ], + ), + ("test0/**/test1/*", 1, []), + ( + "test0/**/test1/*", + 2, + ["test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2"], + ), + ("test0/**/test1/**", 1, ["test0/test1"]), + ( + "test0/**/test1/**", + 2, + [ + "test0/test1", + "test0/test1/test0.json", + "test0/test1/test0.yaml", + "test0/test1/test2", + "test0/test2/test1", + ], + ), + ( + "test0/test[1-2]/**", + 1, + [ + "test0/test1", + "test0/test1/test0.yaml", + "test0/test1/test0.json", + "test0/test1/test2", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + ], + ), + ( + "test0/test[1-2]/**", + 2, + [ + "test0/test1", + "test0/test1/test0.yaml", + "test0/test1/test0.json", + "test0/test1/test2", + "test0/test1/test2/test0.json", + "test0/test1/test2/test0.yaml", + "test0/test2", + "test0/test2/test0.json", + "test0/test2/test0.yaml", + "test0/test2/test1", + "test0/test2/test1/test0.yaml", + "test0/test2/test1/test0.json", + "test0/test2/test1/test3", + ], + ), + ], +) +def test_glob_non_posix_rules(path, maxdepth, expected, glob_fs): + output = glob_fs.glob(path=f"mock://{path}", maxdepth=maxdepth) + assert _clean_paths(output) == _clean_paths(expected) + + detailed_output = glob_fs.glob( + path=f"mock://{path}", maxdepth=maxdepth, detail=True + ) + for name, info in _clean_paths(detailed_output).items(): + assert info == glob_fs[name] + + +def test_glob_with_wrong_args(glob_fs): + with pytest.raises(ValueError): + _ = glob_fs.glob(path="mock://test0/*", maxdepth=0) diff --git a/fsspec/tests/test_utils.py b/fsspec/tests/test_utils.py index 517d42ba9..c83eeea0c 100644 --- a/fsspec/tests/test_utils.py +++ b/fsspec/tests/test_utils.py @@ -255,20 +255,19 @@ def test_common_prefix(paths, out): @pytest.mark.parametrize( - "paths, other, is_dir, exists, expected", + "paths, other, exists, expected", ( - (["/path1"], "/path2", False, False, ["/path2"]), - (["/path1"], "/path2", True, True, ["/path2/path1"]), - (["/path1"], "/path2", None, False, ["/path2"]), - (["/path1"], "/path2/", True, True, ["/path2/path1"]), - (["/path1"], ["/path2"], True, False, ["/path2"]), - (["/path1"], ["/path2"], True, True, ["/path2"]), - (["/path1", "/path2"], "/path2", True, False, ["/path2/path1", "/path2/path2"]), - (["/path1", "/path2"], "/path2", True, True, ["/path2/path1", "/path2/path2"]), + (["/path1"], "/path2", False, ["/path2"]), + (["/path1"], "/path2", True, ["/path2/path1"]), + (["/path1"], "/path2", False, ["/path2"]), + (["/path1"], "/path2/", True, ["/path2/path1"]), + (["/path1"], ["/path2"], False, ["/path2"]), + (["/path1"], ["/path2"], True, ["/path2"]), + (["/path1", "/path2"], "/path2", False, ["/path2/path1", "/path2/path2"]), + (["/path1", "/path2"], "/path2", True, ["/path2/path1", "/path2/path2"]), ( ["/more/path1", "/more/path2"], "/path2", - True, False, ["/path2/path1", "/path2/path2"], ), @@ -276,63 +275,55 @@ def test_common_prefix(paths, out): ["/more/path1", "/more/path2"], "/path2", True, - True, ["/path2/more/path1", "/path2/more/path2"], ), ( ["/more/path1", "/more/path2"], "/path2", False, - False, ["/path2/path1", "/path2/path2"], ), ( ["/more/path1", "/more/path2"], "/path2", - False, True, ["/path2/more/path1", "/path2/more/path2"], ), ( ["/more/path1", "/more/path2"], "/path2/", - None, False, ["/path2/path1", "/path2/path2"], ), ( ["/more/path1", "/more/path2"], "/path2/", - None, True, ["/path2/more/path1", "/path2/more/path2"], ), ( ["/more/path1", "/diff/path2"], "/path2/", - None, False, ["/path2/more/path1", "/path2/diff/path2"], ), ( ["/more/path1", "/diff/path2"], "/path2/", - None, True, ["/path2/more/path1", "/path2/diff/path2"], ), - (["a", "b/", "b/c"], "dest/", True, False, ["dest/a", "dest/b/", "dest/b/c"]), + (["a", "b/", "b/c"], "dest/", False, ["dest/a", "dest/b/", "dest/b/c"]), ( ["/a", "/b/", "/b/c"], "dest/", - True, False, ["dest/a", "dest/b/", "dest/b/c"], ), ), ) -def test_other_paths(paths, other, is_dir, exists, expected): - assert other_paths(paths, other, is_dir, exists) == expected +def test_other_paths(paths, other, exists, expected): + assert other_paths(paths, other, exists) == expected def test_log(): diff --git a/fsspec/utils.py b/fsspec/utils.py index 1aa630c01..91bc6ad1a 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -343,7 +343,7 @@ def common_prefix(paths): return "/".join(parts[0][:i]) -def other_paths(paths, path2, is_dir=None, exists=False, flatten=False): +def other_paths(paths, path2, exists=False, flatten=False): """In bulk file operations, construct a new file tree from a list of files Parameters @@ -353,10 +353,6 @@ def other_paths(paths, path2, is_dir=None, exists=False, flatten=False): path2: str or list of str Root to construct the new list in. If this is already a list of str, we just assert it has the right number of elements. - is_dir: bool (optional) - For the special case where the input in one element, whether to regard the value - as the target path, or as a directory to put a file path within. If None, a - directory is inferred if the path ends in '/' exists: bool (optional) For a str destination, it is already exists (and is a dir), files should end up inside. @@ -370,7 +366,6 @@ def other_paths(paths, path2, is_dir=None, exists=False, flatten=False): """ if isinstance(path2, str): - is_dir = is_dir or path2.endswith("/") path2 = path2.rstrip("/") if flatten: diff --git a/setup.cfg b/setup.cfg index d87021a4a..8a8bdee72 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,4 +51,4 @@ warn_unused_ignores = True # don't bother type-checking test_*.py or conftest.py files -exclude = (test_.*|conftest)\.py$ +exclude = (test.*|conftest)\.py$ From 2107d4a46e87a0474d04fba18ce4a4dac8c49457 Mon Sep 17 00:00:00 2001 From: Jonathan Langlois <37172224+john-jam@users.noreply.github.com> Date: Wed, 23 Aug 2023 11:05:16 +0900 Subject: [PATCH 10/10] fix: move glob edge case tests into abstract module (#1339) --- fsspec/tests/abstract/common.py | 175 +++++++++++++++++++++++++++++++ fsspec/tests/abstract/copy.py | 2 +- fsspec/tests/abstract/get.py | 2 +- fsspec/tests/abstract/put.py | 2 +- fsspec/tests/conftest.py | 176 -------------------------------- 5 files changed, 178 insertions(+), 179 deletions(-) create mode 100644 fsspec/tests/abstract/common.py diff --git a/fsspec/tests/abstract/common.py b/fsspec/tests/abstract/common.py new file mode 100644 index 000000000..93896a443 --- /dev/null +++ b/fsspec/tests/abstract/common.py @@ -0,0 +1,175 @@ +GLOB_EDGE_CASES_TESTS = { + "argnames": ("path", "recursive", "maxdepth", "expected"), + "argvalues": [ + ("fil?1", False, None, ["file1"]), + ("fil?1", True, None, ["file1"]), + ("file[1-2]", False, None, ["file1", "file2"]), + ("file[1-2]", True, None, ["file1", "file2"]), + ("*", False, None, ["file1", "file2"]), + ( + "*", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*", True, 1, ["file1", "file2"]), + ( + "*", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("*1", False, None, ["file1"]), + ( + "*1", + True, + None, + [ + "file1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]), + ( + "**", + False, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + True, + None, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**", True, 1, ["file1", "file2"]), + ( + "**", + True, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "**", + False, + 2, + [ + "file1", + "file2", + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ("**1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ( + "**1", + True, + None, + [ + "file1", + "subdir0/subfile1", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ("**1", True, 1, ["file1"]), + ( + "**1", + True, + 2, + ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"], + ), + ("**1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), + ("**/subdir0", False, None, []), + ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("**/subdir0/nested*", False, 2, []), + ("**/subdir0/nested*", True, 2, ["nestedfile"]), + ("subdir[1-2]", False, None, []), + ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), + ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]), + ("subdir[0-1]", False, None, []), + ( + "subdir[0-1]", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir0/nesteddir/nestedfile", + "subdir1/subfile1", + "subdir1/subfile2", + "subdir1/nesteddir/nestedfile", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + False, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ( + "subdir[0-1]/*fil[e]*", + True, + None, + [ + "subdir0/subfile1", + "subdir0/subfile2", + "subdir1/subfile1", + "subdir1/subfile2", + ], + ), + ], +} diff --git a/fsspec/tests/abstract/copy.py b/fsspec/tests/abstract/copy.py index a5eb19038..32766feab 100644 --- a/fsspec/tests/abstract/copy.py +++ b/fsspec/tests/abstract/copy.py @@ -2,7 +2,7 @@ import pytest -from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS +from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS class AbstractCopyTests: diff --git a/fsspec/tests/abstract/get.py b/fsspec/tests/abstract/get.py index 08c04e909..8d8ab2c7e 100644 --- a/fsspec/tests/abstract/get.py +++ b/fsspec/tests/abstract/get.py @@ -3,7 +3,7 @@ import pytest from fsspec.implementations.local import make_path_posix -from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS +from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS class AbstractGetTests: diff --git a/fsspec/tests/abstract/put.py b/fsspec/tests/abstract/put.py index a92bc4a13..2d1f2bc34 100644 --- a/fsspec/tests/abstract/put.py +++ b/fsspec/tests/abstract/put.py @@ -2,7 +2,7 @@ import pytest -from fsspec.tests.conftest import GLOB_EDGE_CASES_TESTS +from fsspec.tests.abstract.common import GLOB_EDGE_CASES_TESTS class AbstractPutTests: diff --git a/fsspec/tests/conftest.py b/fsspec/tests/conftest.py index 544e8a0e0..2c7a1742b 100644 --- a/fsspec/tests/conftest.py +++ b/fsspec/tests/conftest.py @@ -18,182 +18,6 @@ ).read() win = os.name == "nt" -GLOB_EDGE_CASES_TESTS = { - "argnames": ("path", "recursive", "maxdepth", "expected"), - "argvalues": [ - ("fil?1", False, None, ["file1"]), - ("fil?1", True, None, ["file1"]), - ("file[1-2]", False, None, ["file1", "file2"]), - ("file[1-2]", True, None, ["file1", "file2"]), - ("*", False, None, ["file1", "file2"]), - ( - "*", - True, - None, - [ - "file1", - "file2", - "subdir0/subfile1", - "subdir0/subfile2", - "subdir0/nesteddir/nestedfile", - "subdir1/subfile1", - "subdir1/subfile2", - "subdir1/nesteddir/nestedfile", - ], - ), - ("*", True, 1, ["file1", "file2"]), - ( - "*", - True, - 2, - [ - "file1", - "file2", - "subdir0/subfile1", - "subdir0/subfile2", - "subdir1/subfile1", - "subdir1/subfile2", - ], - ), - ("*1", False, None, ["file1"]), - ( - "*1", - True, - None, - [ - "file1", - "subdir1/subfile1", - "subdir1/subfile2", - "subdir1/nesteddir/nestedfile", - ], - ), - ("*1", True, 2, ["file1", "subdir1/subfile1", "subdir1/subfile2"]), - ( - "**", - False, - None, - [ - "file1", - "file2", - "subdir0/subfile1", - "subdir0/subfile2", - "subdir0/nesteddir/nestedfile", - "subdir1/subfile1", - "subdir1/subfile2", - "subdir1/nesteddir/nestedfile", - ], - ), - ( - "**", - True, - None, - [ - "file1", - "file2", - "subdir0/subfile1", - "subdir0/subfile2", - "subdir0/nesteddir/nestedfile", - "subdir1/subfile1", - "subdir1/subfile2", - "subdir1/nesteddir/nestedfile", - ], - ), - ("**", True, 1, ["file1", "file2"]), - ( - "**", - True, - 2, - [ - "file1", - "file2", - "subdir0/subfile1", - "subdir0/subfile2", - "subdir0/nesteddir/nestedfile", - "subdir1/subfile1", - "subdir1/subfile2", - "subdir1/nesteddir/nestedfile", - ], - ), - ( - "**", - False, - 2, - [ - "file1", - "file2", - "subdir0/subfile1", - "subdir0/subfile2", - "subdir1/subfile1", - "subdir1/subfile2", - ], - ), - ("**1", False, None, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), - ( - "**1", - True, - None, - [ - "file1", - "subdir0/subfile1", - "subdir1/subfile1", - "subdir1/subfile2", - "subdir1/nesteddir/nestedfile", - ], - ), - ("**1", True, 1, ["file1"]), - ( - "**1", - True, - 2, - ["file1", "subdir0/subfile1", "subdir1/subfile1", "subdir1/subfile2"], - ), - ("**1", False, 2, ["file1", "subdir0/subfile1", "subdir1/subfile1"]), - ("**/subdir0", False, None, []), - ("**/subdir0", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), - ("**/subdir0/nested*", False, 2, []), - ("**/subdir0/nested*", True, 2, ["nestedfile"]), - ("subdir[1-2]", False, None, []), - ("subdir[1-2]", True, None, ["subfile1", "subfile2", "nesteddir/nestedfile"]), - ("subdir[1-2]", True, 2, ["subfile1", "subfile2"]), - ("subdir[0-1]", False, None, []), - ( - "subdir[0-1]", - True, - None, - [ - "subdir0/subfile1", - "subdir0/subfile2", - "subdir0/nesteddir/nestedfile", - "subdir1/subfile1", - "subdir1/subfile2", - "subdir1/nesteddir/nestedfile", - ], - ), - ( - "subdir[0-1]/*fil[e]*", - False, - None, - [ - "subdir0/subfile1", - "subdir0/subfile2", - "subdir1/subfile1", - "subdir1/subfile2", - ], - ), - ( - "subdir[0-1]/*fil[e]*", - True, - None, - [ - "subdir0/subfile1", - "subdir0/subfile2", - "subdir1/subfile1", - "subdir1/subfile2", - ], - ), - ], -} - @pytest.fixture def reset_files():