From 80f71f37fdd9891c7e2bfb496b790eec23742b5c Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 25 Sep 2024 20:42:24 +0800 Subject: [PATCH] Bug: Fix rm api and walk api bug --- poetry.lock | 16 ++++++---------- pyproject.toml | 2 +- tosfs/core.py | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/poetry.lock b/poetry.lock index 4b17890..b779fe6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -291,13 +291,13 @@ test = ["pytest (>=6)"] [[package]] name = "fsspec" -version = "2024.6.1" +version = "2023.5.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"}, - {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"}, + {file = "fsspec-2023.5.0-py3-none-any.whl", hash = "sha256:51a4ad01a5bb66fcc58036e288c0d53d3975a0df2a5dc59a93b59bade0391f2a"}, + {file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"}, ] [package.extras] @@ -305,8 +305,7 @@ abfs = ["adlfs"] adl = ["adlfs"] arrow = ["pyarrow (>=1)"] dask = ["dask", "distributed"] -dev = ["pre-commit", "ruff"] -doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] +devel = ["pytest", "pytest-cov"] dropbox = ["dropbox", "dropboxdrivefs", "requests"] full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] fuse = ["fusepy"] @@ -316,16 +315,13 @@ github = ["requests"] gs = ["gcsfs"] gui = ["panel"] hdfs = ["pyarrow (>=1)"] -http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] libarchive = ["libarchive-c"] oci = ["ocifs"] s3 = ["s3fs"] sftp = ["paramiko"] smb = ["smbprotocol"] ssh = ["paramiko"] -test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"] -test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"] -test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] tqdm = ["tqdm"] [[package]] @@ -737,4 +733,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "2afca8d4ce9d407ec9c308bfb562ae0c046061066ad9c7a115a215e1c7941238" +content-hash = "1bb1712f54089469cbb3c278bad0114a8104a28f988d82ff8d87bf88aa5d0fa5" diff --git a/pyproject.toml b/pyproject.toml index db1a095..3d59f1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.9" -fsspec = ">=2023.5.0" +fsspec = "==2023.5.0" tos = ">=2.7.0" [tool.poetry.group.dev.dependencies] diff --git a/tosfs/core.py b/tosfs/core.py index 58e4a0b..6f10a2e 100644 --- a/tosfs/core.py +++ b/tosfs/core.py @@ -1091,7 +1091,7 @@ def find( "Can not specify 'prefix' option alongside 'maxdepth' options." ) if maxdepth: - return super().find( + return self._fsspec_find( bucket + "/" + key, maxdepth=maxdepth, withdirs=withdirs, @@ -1106,6 +1106,54 @@ def find( else: return [o["name"] for o in out] + def _fsspec_find( # noqa # + self, + path: str, + maxdepth: Optional[int] = None, + withdirs: bool = False, + detail: bool = False, + **kwargs: Any, # type: ignore + ) -> Any: + """List all files below path. + + Copied from fsspec(2024.9.0) to fix fsspec(2023.5.0.) + + Like posix ``find`` command without conditions + + Parameters + ---------- + path : str + maxdepth: int or None + If not None, the maximum number of levels to descend + withdirs: bool + Whether to include directory paths in the output. This is True + when used by glob, but users usually only want files. + kwargs are passed to ``ls``. + + """ + # TODO: allow equivalent of -name parameter + path = self._strip_protocol(path) + out = {} + + # Add the root directory if withdirs is requested + # This is needed for posix glob compliance + if withdirs and path != "" and self.isdir(path): + out[path] = self.info(path) + + for _, dirs, files in self._fsspec_walk(path, maxdepth, detail=True, **kwargs): + if withdirs: + files.update(dirs) + out.update({info["name"]: info for name, info in files.items()}) + if not out and self.isfile(path): + # walk works on directories, but find should also return [path] + # when path happens to be a file + out[path] = {} + names = sorted(out) + if not detail: + return names + else: + return {name: out[name] for name in names} + def expand_path( self, path: Union[str, List[str]],