From 8f29d9548e3dfcd325a5c729093e3bf6657d7b6c Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 2 Aug 2024 16:50:13 -0400 Subject: [PATCH 01/11] to support https://github.com/fsspec/kerchunk/pull/488 (#1657) --- fsspec/implementations/reference.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index 58acd31a7..c14ee8302 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -502,6 +502,7 @@ def flush(self, base_url=None, storage_options=None): if k != ".zmetadata" and ".z" in k: self.zmetadata[k] = json.loads(self._items.pop(k)) met = {"metadata": self.zmetadata, "record_size": self.record_size} + self._items.clear() self._items[".zmetadata"] = json.dumps(met).encode() self.fs.pipe( "/".join([base_url or self.out_root, ".zmetadata"]), @@ -1085,7 +1086,7 @@ def isdir(self, path): # overwrite auto-sync version if self.dircache: return path in self.dircache elif isinstance(self.references, LazyReferenceMapper): - return path in self.references.listdir("") + return path in self.references.listdir() else: # this may be faster than building dircache for single calls, but # by looping will be slow for many calls; could cache it? From 36cd82e2cf344183ad4f23bcb0f90513385586ae Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 8 Aug 2024 12:52:38 -0400 Subject: [PATCH 02/11] run tests with miniconda (#1658) --- .github/workflows/main.yaml | 19 ++++++------------- ci/environment-downstream.yml | 2 +- ci/environment-friends.yml | 2 +- ci/environment-py38.yml | 1 + ci/environment-win.yml | 4 +++- install_s3fs.sh | 2 +- 6 files changed, 13 insertions(+), 17 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 353ddfc15..9797e6a9c 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -25,19 +25,15 @@ jobs: fetch-depth: 0 - name: Setup conda - uses: mamba-org/setup-micromamba@v1 + uses: conda-incubator/setup-miniconda@v3 with: environment-file: ci/environment-py38.yml - create-args: >- - python=${{ matrix.PY }} + python-version: ${{ matrix.PY }} - name: Run Tests shell: bash -l {0} run: | - pip install s3fs - pip uninstall s3fs pip install -e .[test_full] - pip install s3fs --no-deps pytest -v win: @@ -54,17 +50,14 @@ jobs: fetch-depth: 0 - name: Setup conda - uses: mamba-org/setup-micromamba@v1 + uses: conda-incubator/setup-miniconda@v3 with: environment-file: ci/environment-win.yml - name: Run Tests shell: bash -l {0} run: | - pip install s3fs - pip uninstall s3fs pip install -e .[test] - pip install s3fs --no-deps pytest -v lint: @@ -84,7 +77,7 @@ jobs: # uses: actions/checkout@v4 # # - name: Setup conda -# uses: mamba-org/setup-micromamba@v1 +# uses: conda-incubator/setup-miniconda@v3 # with: # environment-file: ci/environment-typecheck.yml # @@ -104,7 +97,7 @@ jobs: fetch-depth: 0 - name: Setup conda - uses: mamba-org/setup-micromamba@v1 + uses: conda-incubator/setup-miniconda@v3 with: environment-file: ci/environment-downstream.yml @@ -145,7 +138,7 @@ jobs: uses: actions/checkout@v4 - name: Setup conda - uses: mamba-org/setup-micromamba@v1 + uses: conda-incubator/setup-miniconda@v3 with: environment-file: ci/environment-friends.yml diff --git a/ci/environment-downstream.yml b/ci/environment-downstream.yml index 6f0445413..7b02e61ed 100644 --- a/ci/environment-downstream.yml +++ b/ci/environment-downstream.yml @@ -2,6 +2,6 @@ name: test_env channels: - conda-forge dependencies: - - python=3.9 + - python=3.11 - pip: - git+https://github.com/dask/dask diff --git a/ci/environment-friends.yml b/ci/environment-friends.yml index 570ad4817..114c85444 100644 --- a/ci/environment-friends.yml +++ b/ci/environment-friends.yml @@ -2,7 +2,7 @@ name: test_env channels: - conda-forge dependencies: - - python=3.9 + - python=3.12 - pytest - pytest-asyncio !=0.22.0 - pytest-benchmark diff --git a/ci/environment-py38.yml b/ci/environment-py38.yml index 9c514de98..03dc7eb06 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py38.yml @@ -5,6 +5,7 @@ dependencies: - pip - git <2.45.0 - py + - s3fs - pip: - hadoop-test-cluster - smbprotocol diff --git a/ci/environment-win.yml b/ci/environment-win.yml index 7f8eee0b5..f446fe258 100644 --- a/ci/environment-win.yml +++ b/ci/environment-win.yml @@ -2,4 +2,6 @@ name: test_env channels: - conda-forge dependencies: - - python=3.9 + - python=3.11 + - s3fs + - pytest diff --git a/install_s3fs.sh b/install_s3fs.sh index 8202d2569..a0a413317 100644 --- a/install_s3fs.sh +++ b/install_s3fs.sh @@ -10,4 +10,4 @@ git clone https://github.com/fsspec/s3fs mv s3fs ./downstream/s3fs # s3fs is pinned to a specific version of fsspec -pip install -e ./downstream/s3fs # installs all deps, including latest released fsspec +pip install --no-input -e ./downstream/s3fs # installs all deps, including latest released fsspec From 32ce723b7633ccd190dd30f94d7ffa5fe9b355b2 Mon Sep 17 00:00:00 2001 From: bartvaneswhiffle <129189801+bartvaneswhiffle@users.noreply.github.com> Date: Mon, 12 Aug 2024 17:11:47 +0200 Subject: [PATCH 03/11] Add tls to ftp (#1581) --- fsspec/implementations/ftp.py | 21 +++++++++---- fsspec/implementations/reference.py | 8 +++-- fsspec/implementations/tests/ftp_tls.py | 38 ++++++++++++++++++++++++ fsspec/implementations/tests/keycert.pem | 24 +++++++++++++++ fsspec/implementations/tests/test_ftp.py | 29 ++++++++++++++++-- 5 files changed, 109 insertions(+), 11 deletions(-) create mode 100644 fsspec/implementations/tests/ftp_tls.py create mode 100644 fsspec/implementations/tests/keycert.pem diff --git a/fsspec/implementations/ftp.py b/fsspec/implementations/ftp.py index 9e245d39f..e56edd2ba 100644 --- a/fsspec/implementations/ftp.py +++ b/fsspec/implementations/ftp.py @@ -2,7 +2,7 @@ import sys import uuid import warnings -from ftplib import FTP, Error, error_perm +from ftplib import FTP, FTP_TLS, Error, error_perm from typing import Any from ..spec import AbstractBufferedFile, AbstractFileSystem @@ -27,6 +27,7 @@ def __init__( tempdir=None, timeout=30, encoding="utf-8", + tls=False, **kwargs, ): """ @@ -56,28 +57,38 @@ def __init__( Timeout of the ftp connection in seconds encoding: str Encoding to use for directories and filenames in FTP connection + tls: bool + Use FTP-TLS, by default False """ super().__init__(**kwargs) self.host = host self.port = port self.tempdir = tempdir or "/tmp" - self.cred = username, password, acct + self.cred = username or "", password or "", acct or "" + print(self.cred) self.timeout = timeout self.encoding = encoding if block_size is not None: self.blocksize = block_size else: self.blocksize = 2**16 + self.tls = tls self._connect() + if self.tls: + self.ftp.prot_p() def _connect(self): + if self.tls: + ftp_cls = FTP_TLS + else: + ftp_cls = FTP if sys.version_info >= (3, 9): - self.ftp = FTP(timeout=self.timeout, encoding=self.encoding) + self.ftp = ftp_cls(timeout=self.timeout, encoding=self.encoding) elif self.encoding: warnings.warn("`encoding` not supported for python<3.9, ignoring") - self.ftp = FTP(timeout=self.timeout) + self.ftp = ftp_cls(timeout=self.timeout) else: - self.ftp = FTP(timeout=self.timeout) + self.ftp = ftp_cls(timeout=self.timeout) self.ftp.connect(self.host, self.port) self.ftp.login(*self.cred) diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index c14ee8302..5340aaf1c 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -997,9 +997,11 @@ def _process_gen(self, gens): out = {} for gen in gens: dimension = { - k: v - if isinstance(v, list) - else range(v.get("start", 0), v["stop"], v.get("step", 1)) + k: ( + v + if isinstance(v, list) + else range(v.get("start", 0), v["stop"], v.get("step", 1)) + ) for k, v in gen["dimensions"].items() } products = ( diff --git a/fsspec/implementations/tests/ftp_tls.py b/fsspec/implementations/tests/ftp_tls.py new file mode 100644 index 000000000..6d1359bfa --- /dev/null +++ b/fsspec/implementations/tests/ftp_tls.py @@ -0,0 +1,38 @@ +import os + +from pyftpdlib.authorizers import DummyAuthorizer +from pyftpdlib.handlers import TLS_FTPHandler +from pyftpdlib.servers import FTPServer + + +def ftp(): + """Script to run FTP server that accepts TLS""" + # Set up FTP server parameters + FTP_HOST = "localhost" + FTP_PORT = 2121 + FTP_DIRECTORY = os.path.dirname(os.path.abspath(__file__)) + + # Instantiate a dummy authorizer + authorizer = DummyAuthorizer() + authorizer.add_user( + "user", + "pass", + FTP_DIRECTORY, + "elradfmwMT", + ) + authorizer.add_anonymous(FTP_DIRECTORY) + + # Instantiate TLS_FTPHandler with required parameters + handler = TLS_FTPHandler + handler.certfile = os.path.join(os.path.dirname(__file__), "keycert.pem") + handler.authorizer = authorizer + + # Instantiate FTP server with TLS handler and authorizer + server = FTPServer((FTP_HOST, FTP_PORT), handler) + server.authorizer = authorizer + + server.serve_forever() + + +if __name__ == "__main__": + ftp() diff --git a/fsspec/implementations/tests/keycert.pem b/fsspec/implementations/tests/keycert.pem new file mode 100644 index 000000000..2093f1d15 --- /dev/null +++ b/fsspec/implementations/tests/keycert.pem @@ -0,0 +1,24 @@ +-----BEGIN EC PARAMETERS----- +BggqhkjOPQMBBw== +-----END EC PARAMETERS----- +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIBTg1e61mzYYPJ+MDkOWCSevnT1HUaaK9iopgTGyDoIuoAoGCCqGSM49 +AwEHoUQDQgAEDy3E+4WgohcRUlaSZBndEZQBTyoRztCSoaDbhZkqsPFBbeaGJ5zA +E7qX+9LICDezAUsCiq2RYltOqDCsELteiQ== +-----END EC PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICdzCCAh2gAwIBAgIUNN4kmTSxbLOoQXLFiYOs2XeK1jIwCgYIKoZIzj0EAwIw +gY8xCzAJBgNVBAYTAk5MMRUwEwYDVQQIDAxadWlkLUhvbGxhbmQxDjAMBgNVBAcM +BURlbGZ0MRAwDgYDVQQKDAdXaGlmZmxlMQ0wCwYDVQQLDARERVZBMRIwEAYDVQQD +DAlCYXJ0dmFuRXMxJDAiBgkqhkiG9w0BCQEWFWJhcnQudmFuZXNAd2hpZmZsZS5u +bDAgFw0yNDA0MTgxMDI0NDFaGA8yMjk4MDIwMTEwMjQ0MVowgY8xCzAJBgNVBAYT +Ak5MMRUwEwYDVQQIDAxadWlkLUhvbGxhbmQxDjAMBgNVBAcMBURlbGZ0MRAwDgYD +VQQKDAdXaGlmZmxlMQ0wCwYDVQQLDARERVZBMRIwEAYDVQQDDAlCYXJ0dmFuRXMx +JDAiBgkqhkiG9w0BCQEWFWJhcnQudmFuZXNAd2hpZmZsZS5ubDBZMBMGByqGSM49 +AgEGCCqGSM49AwEHA0IABA8txPuFoKIXEVJWkmQZ3RGUAU8qEc7QkqGg24WZKrDx +QW3mhiecwBO6l/vSyAg3swFLAoqtkWJbTqgwrBC7XomjUzBRMB0GA1UdDgQWBBRb +1nPqritk/P2cbDzTw9SQ9vO7JDAfBgNVHSMEGDAWgBRb1nPqritk/P2cbDzTw9SQ +9vO7JDAPBgNVHRMBAf8EBTADAQH/MAoGCCqGSM49BAMCA0gAMEUCIBcvCFS4AD3p +Ix1v8pp3hcMvGFIQLeczh4kXkPfZWvBkAiEAiTCqsdKhZi8k814H6FFkaoQVIjTe +iUtUlW6RfyDNZ9E= +-----END CERTIFICATE----- diff --git a/fsspec/implementations/tests/test_ftp.py b/fsspec/implementations/tests/test_ftp.py index 7bf0c0e5d..e480ecaff 100644 --- a/fsspec/implementations/tests/test_ftp.py +++ b/fsspec/implementations/tests/test_ftp.py @@ -2,6 +2,7 @@ import subprocess import sys import time +from ftplib import FTP, FTP_TLS import pytest @@ -17,7 +18,7 @@ def ftp(): pytest.importorskip("pyftpdlib") P = subprocess.Popen( - [sys.executable, "-m", "pyftpdlib", "-d", here], + [sys.executable, os.path.join(here, "ftp_tls.py")], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, ) @@ -29,9 +30,31 @@ def ftp(): P.wait() -def test_basic(ftp): +@pytest.mark.parametrize( + "tls,exp_cls", + ( + (False, FTP), + (True, FTP_TLS), + ), +) +def test_tls(ftp, tls, exp_cls): host, port = ftp - fs = FTPFileSystem(host, port) + fs = FTPFileSystem(host, port, tls=tls) + assert isinstance(fs.ftp, exp_cls) + + +@pytest.mark.parametrize( + "tls,username,password", + ( + (False, "", ""), + (True, "", ""), + (False, "user", "pass"), + (True, "user", "pass"), + ), +) +def test_basic(ftp, tls, username, password): + host, port = ftp + fs = FTPFileSystem(host, port, username, password, tls=tls) assert fs.ls("/", detail=False) == sorted(os.listdir(here)) out = fs.cat(f"/{os.path.basename(__file__)}") assert out == open(__file__, "rb").read() From 4b7965413a170f808221a8407f1a63bb3a36cc65 Mon Sep 17 00:00:00 2001 From: skshetry <18718008+skshetry@users.noreply.github.com> Date: Mon, 12 Aug 2024 21:07:26 +0545 Subject: [PATCH 04/11] localfs: reduce stats calls during info (#1659) --- fsspec/implementations/local.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fsspec/implementations/local.py b/fsspec/implementations/local.py index 9881606f1..f032d8aeb 100644 --- a/fsspec/implementations/local.py +++ b/fsspec/implementations/local.py @@ -79,6 +79,14 @@ def info(self, path, **kwargs): t = "file" else: t = "other" + + size = out.st_size + if link: + try: + out2 = path.stat(follow_symlinks=True) + size = out2.st_size + except OSError: + size = 0 path = self._strip_protocol(path.path) else: # str or path-like @@ -87,6 +95,7 @@ def info(self, path, **kwargs): link = stat.S_ISLNK(out.st_mode) if link: out = os.stat(path, follow_symlinks=True) + size = out.st_size if stat.S_ISDIR(out.st_mode): t = "directory" elif stat.S_ISREG(out.st_mode): @@ -95,20 +104,15 @@ def info(self, path, **kwargs): t = "other" result = { "name": path, - "size": out.st_size, + "size": size, "type": t, "created": out.st_ctime, "islink": link, } for field in ["mode", "uid", "gid", "mtime", "ino", "nlink"]: result[field] = getattr(out, f"st_{field}") - if result["islink"]: + if link: result["destination"] = os.readlink(path) - try: - out2 = os.stat(path, follow_symlinks=True) - result["size"] = out2.st_size - except OSError: - result["size"] = 0 return result def lexists(self, path, **kwargs): From 4f883adf314bc38e8cfeaea4c03b976edb1605dd Mon Sep 17 00:00:00 2001 From: bartvaneswhiffle <129189801+bartvaneswhiffle@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:18:47 +0200 Subject: [PATCH 05/11] Remove print statement (#1661) --- fsspec/implementations/ftp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/fsspec/implementations/ftp.py b/fsspec/implementations/ftp.py index e56edd2ba..f3471b996 100644 --- a/fsspec/implementations/ftp.py +++ b/fsspec/implementations/ftp.py @@ -65,7 +65,6 @@ def __init__( self.port = port self.tempdir = tempdir or "/tmp" self.cred = username or "", password or "", acct or "" - print(self.cred) self.timeout = timeout self.encoding = encoding if block_size is not None: From ee98ae3cc43c68455d4632cb23dd57bb702e5684 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:36:50 +0200 Subject: [PATCH 06/11] Apply more ruff rules (#1660) --- fsspec/asyn.py | 2 +- fsspec/implementations/arrow.py | 2 +- fsspec/implementations/dbfs.py | 16 ++++++++-------- fsspec/implementations/reference.py | 2 +- fsspec/implementations/smb.py | 2 +- fsspec/implementations/tests/test_arrow.py | 2 +- fsspec/implementations/tests/test_reference.py | 10 +++++----- fsspec/implementations/tests/test_smb.py | 2 +- fsspec/implementations/webhdfs.py | 4 ++-- fsspec/spec.py | 2 +- fsspec/tests/abstract/__init__.py | 6 +++--- fsspec/tests/test_async.py | 3 --- fsspec/tests/test_downstream.py | 2 +- fsspec/tests/test_fuse.py | 4 ++-- fsspec/tests/test_registry.py | 4 ++-- fsspec/tests/test_spec.py | 2 +- fsspec/tests/test_utils.py | 13 ------------- fsspec/utils.py | 5 +---- pyproject.toml | 3 --- 19 files changed, 32 insertions(+), 54 deletions(-) diff --git a/fsspec/asyn.py b/fsspec/asyn.py index a040efc4b..f203fa0a4 100644 --- a/fsspec/asyn.py +++ b/fsspec/asyn.py @@ -1072,7 +1072,7 @@ async def flush(self, force=False): self.offset = 0 try: await self._initiate_upload() - except: # noqa: E722 + except: self.closed = True raise diff --git a/fsspec/implementations/arrow.py b/fsspec/implementations/arrow.py index f9fea70d2..530df901a 100644 --- a/fsspec/implementations/arrow.py +++ b/fsspec/implementations/arrow.py @@ -128,7 +128,7 @@ def cp_file(self, path1, path2, **kwargs): with self.open(tmp_fname, "wb") as rstream: shutil.copyfileobj(lstream, rstream) self.fs.move(tmp_fname, path2) - except BaseException: # noqa + except BaseException: with suppress(FileNotFoundError): self.fs.delete_file(tmp_fname) raise diff --git a/fsspec/implementations/dbfs.py b/fsspec/implementations/dbfs.py index 19f2ffc19..30c2947b0 100644 --- a/fsspec/implementations/dbfs.py +++ b/fsspec/implementations/dbfs.py @@ -79,7 +79,7 @@ def ls(self, path, detail=True, **kwargs): if e.error_code == "RESOURCE_DOES_NOT_EXIST": raise FileNotFoundError(e.message) from e - raise e + raise files = r["files"] out = [ { @@ -125,7 +125,7 @@ def makedirs(self, path, exist_ok=True): if e.error_code == "RESOURCE_ALREADY_EXISTS": raise FileExistsError(e.message) from e - raise e + raise self.invalidate_cache(self._parent(path)) def mkdir(self, path, create_parents=True, **kwargs): @@ -171,7 +171,7 @@ def rm(self, path, recursive=False, **kwargs): # Using the same exception as the os module would use here raise OSError(e.message) from e - raise e + raise self.invalidate_cache(self._parent(path)) def mv( @@ -216,7 +216,7 @@ def mv( elif e.error_code == "RESOURCE_ALREADY_EXISTS": raise FileExistsError(e.message) from e - raise e + raise self.invalidate_cache(self._parent(source_path)) self.invalidate_cache(self._parent(destination_path)) @@ -299,7 +299,7 @@ def _create_handle(self, path, overwrite=True): if e.error_code == "RESOURCE_ALREADY_EXISTS": raise FileExistsError(e.message) from e - raise e + raise def _close_handle(self, handle): """ @@ -316,7 +316,7 @@ def _close_handle(self, handle): if e.error_code == "RESOURCE_DOES_NOT_EXIST": raise FileNotFoundError(e.message) from e - raise e + raise def _add_data(self, handle, data): """ @@ -346,7 +346,7 @@ def _add_data(self, handle, data): elif e.error_code == "MAX_BLOCK_SIZE_EXCEEDED": raise ValueError(e.message) from e - raise e + raise def _get_data(self, path, start, end): """ @@ -376,7 +376,7 @@ def _get_data(self, path, start, end): elif e.error_code in ["INVALID_PARAMETER_VALUE", "MAX_READ_SIZE_EXCEEDED"]: raise ValueError(e.message) from e - raise e + raise def invalidate_cache(self, path=None): if path is None: diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index 5340aaf1c..6904d7b60 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -159,7 +159,7 @@ def open_refs(field, record): path = self.url.format(field=field, record=record) data = io.BytesIO(self.fs.cat_file(path)) df = self.pd.read_parquet(data, engine="fastparquet") - refs = {c: df[c].values for c in df.columns} + refs = {c: df[c].to_numpy() for c in df.columns} return refs self.open_refs = open_refs diff --git a/fsspec/implementations/smb.py b/fsspec/implementations/smb.py index a3c2d1b2d..db6b3f5c3 100644 --- a/fsspec/implementations/smb.py +++ b/fsspec/implementations/smb.py @@ -202,7 +202,7 @@ def _connect(self): else: # All another ValueError exceptions should be raised, as they are not # related to network issues. - raise exc + raise except Exception as exc: # Save the exception and retry to connect. This except might be dropped # in the future, once all exceptions suited for retry are identified. diff --git a/fsspec/implementations/tests/test_arrow.py b/fsspec/implementations/tests/test_arrow.py index af706c530..b9cbb2137 100644 --- a/fsspec/implementations/tests/test_arrow.py +++ b/fsspec/implementations/tests/test_arrow.py @@ -5,7 +5,7 @@ pyarrow_fs = pytest.importorskip("pyarrow.fs") FileSystem = pyarrow_fs.FileSystem -from fsspec.implementations.arrow import ArrowFSWrapper, HadoopFileSystem # noqa +from fsspec.implementations.arrow import ArrowFSWrapper, HadoopFileSystem # noqa: E402 @pytest.fixture(scope="function") diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index d980cd139..09ea3eb33 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -13,7 +13,7 @@ from fsspec.tests.conftest import data, realfile, reset_files, server, win # noqa: F401 -def test_simple(server): # noqa: F811 +def test_simple(server): # The dictionary in refs may be dumped with a different separator # depending on whether json or ujson is imported from fsspec.implementations.reference import json as json_impl @@ -37,7 +37,7 @@ def test_simple(server): # noqa: F811 assert f.read(2) == "he" -def test_simple_ver1(server): # noqa: F811 +def test_simple_ver1(server): # The dictionary in refs may be dumped with a different separator # depending on whether json or ujson is imported from fsspec.implementations.reference import json as json_impl @@ -75,7 +75,7 @@ def test_target_options(m): assert fs.cat("a") == b"hello" -def test_ls(server): # noqa: F811 +def test_ls(server): refs = {"a": b"data", "b": (realfile, 0, 5), "c/d": (realfile, 1, 6)} h = fsspec.filesystem("http") fs = fsspec.filesystem("reference", fo=refs, fs=h) @@ -99,7 +99,7 @@ def test_nested_dirs_ls(): assert {e["name"] for e in fs.ls("B")} == {"B/C", "B/_"} -def test_info(server): # noqa: F811 +def test_info(server): refs = { "a": b"data", "b": (realfile, 0, 5), @@ -173,7 +173,7 @@ def test_put_get_single(tmpdir): assert fs.cat("hi") == b"data" -def test_defaults(server): # noqa: F811 +def test_defaults(server): refs = {"a": b"data", "b": (None, 0, 5)} fs = fsspec.filesystem( "reference", diff --git a/fsspec/implementations/tests/test_smb.py b/fsspec/implementations/tests/test_smb.py index 68b595725..a83e3cc91 100644 --- a/fsspec/implementations/tests/test_smb.py +++ b/fsspec/implementations/tests/test_smb.py @@ -50,7 +50,7 @@ def smb_params(request): cfg = "-p -u 'testuser;testpass' -s 'home;/share;no;no;no;testuser'" port = request.param if request.param is not None else default_port img = ( - f"docker run --name {container} --detach -p 139:139 -p {port}:445 dperson/samba" # noqa: E231 E501 + f"docker run --name {container} --detach -p 139:139 -p {port}:445 dperson/samba" ) cmd = f"{img} {cfg}" try: diff --git a/fsspec/implementations/webhdfs.py b/fsspec/implementations/webhdfs.py index 4bac5d51a..300bb9cdf 100644 --- a/fsspec/implementations/webhdfs.py +++ b/fsspec/implementations/webhdfs.py @@ -102,7 +102,7 @@ def __init__( if self._cached: return super().__init__(**kwargs) - self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" # noqa + self.url = f"{'https' if use_https else 'http'}://{host}:{port}/webhdfs/v1" self.kerb = kerberos self.kerb_kwargs = kerb_kwargs or {} self.pars = {} @@ -393,7 +393,7 @@ def cp_file(self, lpath, rpath, **kwargs): with self.open(tmp_fname, "wb") as rstream: shutil.copyfileobj(lstream, rstream) self.mv(tmp_fname, rpath) - except BaseException: # noqa + except BaseException: with suppress(FileNotFoundError): self.rm(tmp_fname) raise diff --git a/fsspec/spec.py b/fsspec/spec.py index 1463a4499..8229170e2 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -1892,7 +1892,7 @@ def flush(self, force=False): self.offset = 0 try: self._initiate_upload() - except: # noqa: E722 + except: self.closed = True raise diff --git a/fsspec/tests/abstract/__init__.py b/fsspec/tests/abstract/__init__.py index 45d081921..44181420f 100644 --- a/fsspec/tests/abstract/__init__.py +++ b/fsspec/tests/abstract/__init__.py @@ -4,9 +4,9 @@ import pytest from fsspec.implementations.local import LocalFileSystem -from fsspec.tests.abstract.copy import AbstractCopyTests # noqa -from fsspec.tests.abstract.get import AbstractGetTests # noqa -from fsspec.tests.abstract.put import AbstractPutTests # noqa +from fsspec.tests.abstract.copy import AbstractCopyTests # noqa: F401 +from fsspec.tests.abstract.get import AbstractGetTests # noqa: F401 +from fsspec.tests.abstract.put import AbstractPutTests # noqa: F401 class BaseAbstractFixtures: diff --git a/fsspec/tests/test_async.py b/fsspec/tests/test_async.py index e1a29420f..aa3c9bd4f 100644 --- a/fsspec/tests/test_async.py +++ b/fsspec/tests/test_async.py @@ -203,9 +203,6 @@ async def _upload_chunk(self, final=False): async def get_data(self): return self.temp_buffer.getbuffer().tobytes() - async def get_data(self): - return self.temp_buffer.getbuffer().tobytes() - @pytest.mark.asyncio async def test_async_streamed_file_write(): diff --git a/fsspec/tests/test_downstream.py b/fsspec/tests/test_downstream.py index 172b2a7a7..1f0a0bc0e 100644 --- a/fsspec/tests/test_downstream.py +++ b/fsspec/tests/test_downstream.py @@ -4,7 +4,7 @@ pytest.importorskip("moto") try: - from s3fs.tests.test_s3fs import ( # noqa: E402,F401 + from s3fs.tests.test_s3fs import ( # noqa: F401 endpoint_uri, s3, s3_base, diff --git a/fsspec/tests/test_fuse.py b/fsspec/tests/test_fuse.py index db627ffc9..5222d2fd9 100644 --- a/fsspec/tests/test_fuse.py +++ b/fsspec/tests/test_fuse.py @@ -6,10 +6,10 @@ import pytest try: - pytest.importorskip("fuse") # noqa: E402 + pytest.importorskip("fuse") except OSError: # can succeed in importing fuse, but fail to load so - pytest.importorskip("nonexistent") # noqa: E402 + pytest.importorskip("nonexistent") from fsspec.fuse import main, run from fsspec.implementations.memory import MemoryFileSystem diff --git a/fsspec/tests/test_registry.py b/fsspec/tests/test_registry.py index 0664912a1..fae72368f 100644 --- a/fsspec/tests/test_registry.py +++ b/fsspec/tests/test_registry.py @@ -105,7 +105,7 @@ def test_entry_points_registered_on_import(clear_registry, clean_imports): import_location = "importlib.metadata.entry_points" with patch(import_location, return_value={"fsspec.specs": [mock_ep]}): assert "test" not in registry - import fsspec # noqa + import fsspec # noqa: F401 get_filesystem_class("test") assert "test" in registry @@ -117,7 +117,7 @@ def test_filesystem_warning_arrow_hdfs_deprecated(clear_registry, clean_imports) mock_ep.value = "fsspec.spec.AbstractFileSystem" import_location = "importlib.metadata.entry_points" with patch(import_location, return_value={"fsspec.specs": [mock_ep]}): - import fsspec # noqa + import fsspec # noqa: F401 with pytest.warns(DeprecationWarning): filesystem("arrow_hdfs") diff --git a/fsspec/tests/test_spec.py b/fsspec/tests/test_spec.py index e11b7abdd..3927c6550 100644 --- a/fsspec/tests/test_spec.py +++ b/fsspec/tests/test_spec.py @@ -1276,7 +1276,7 @@ def glob_files_folder(tmp_path): local_fake_dir = str(tmp_path) for path_info in PATHS_FOR_GLOB_TESTS: if path_info["type"] == "file": - local_fs.touch(path=f"{str(tmp_path)}/{path_info['name']}") + local_fs.touch(path=f"{tmp_path}/{path_info['name']}") return local_fake_dir diff --git a/fsspec/tests/test_utils.py b/fsspec/tests/test_utils.py index b9167b5d2..1eeee912b 100644 --- a/fsspec/tests/test_utils.py +++ b/fsspec/tests/test_utils.py @@ -261,7 +261,6 @@ def test_common_prefix(paths, out): ( (["/path1"], "/path2", False, ["/path2"]), (["/path1"], "/path2", True, ["/path2/path1"]), - (["/path1"], "/path2", False, ["/path2"]), (["/path1"], "/path2/", True, ["/path2/path1"]), (["/path1"], ["/path2"], False, ["/path2"]), (["/path1"], ["/path2"], True, ["/path2"]), @@ -279,18 +278,6 @@ def test_common_prefix(paths, out): True, ["/path2/more/path1", "/path2/more/path2"], ), - ( - ["/more/path1", "/more/path2"], - "/path2", - False, - ["/path2/path1", "/path2/path2"], - ), - ( - ["/more/path1", "/more/path2"], - "/path2", - True, - ["/path2/more/path1", "/path2/more/path2"], - ), ( ["/more/path1", "/more/path2"], "/path2/", diff --git a/fsspec/utils.py b/fsspec/utils.py index 703d55f4e..faa63937f 100644 --- a/fsspec/utils.py +++ b/fsspec/utils.py @@ -427,10 +427,7 @@ def is_exception(obj: Any) -> bool: def isfilelike(f: Any) -> TypeGuard[IO[bytes]]: - for attr in ["read", "close", "tell"]: - if not hasattr(f, attr): - return False - return True + return all(hasattr(f, attr) for attr in ["read", "close", "tell"]) def get_protocol(url: str) -> str: diff --git a/pyproject.toml b/pyproject.toml index 945ecdabd..48368711f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -192,9 +192,6 @@ ignore = [ "B026", # No explicit `stacklevel` keyword argument found "B028", - # Within an `except` clause, raise exceptions with `raise ... from err` or - # `raise ... from None` to distinguish them from errors in exception handling - "B904", # Assigning lambda expression "E731", # Ambiguous variable names From 7793ab89a56d70814de6d123c257e81f8e5bd52f Mon Sep 17 00:00:00 2001 From: Johan Dahlberg Date: Thu, 22 Aug 2024 21:58:39 +0200 Subject: [PATCH 07/11] Improve performance find zip archive (#1664) Co-authored-by: Martin Durant --- fsspec/implementations/tests/test_zip.py | 340 +++++++++++++++++++++++ fsspec/implementations/zip.py | 42 +++ 2 files changed, 382 insertions(+) diff --git a/fsspec/implementations/tests/test_zip.py b/fsspec/implementations/tests/test_zip.py index ec30c8778..ecd082f3f 100644 --- a/fsspec/implementations/tests/test_zip.py +++ b/fsspec/implementations/tests/test_zip.py @@ -1,10 +1,13 @@ import collections.abc import os.path +from pathlib import Path +from shutil import make_archive import pytest import fsspec from fsspec.implementations.tests.test_archive import archive_data, tempzip +from fsspec.implementations.zip import ZipFileSystem def test_info(): @@ -132,3 +135,340 @@ def test_append(m, tmpdir): fs.close() assert len(fsspec.open_files("zip://*::memory://out.zip")) == 2 + + +@pytest.fixture(name="zip_file") +def zip_file_fixture(tmp_path): + data_dir = tmp_path / "data/" + data_dir.mkdir() + file1 = data_dir / "file1.txt" + file1.write_text("Hello, World!") + file2 = data_dir / "file2.txt" + file2.write_text("Lorem ipsum dolor sit amet") + + empty_dir = data_dir / "dir1" + empty_dir.mkdir() + + dir_with_files = data_dir / "dir2" + dir_with_files.mkdir() + file3 = dir_with_files / "file3.txt" + file3.write_text("Hello!") + + potential_mix_up_path = data_dir / "dir2startwithsamename.txt" + potential_mix_up_path.write_text("Hello again!") + + zip_file = tmp_path / "test" + return Path(make_archive(zip_file, "zip", data_dir)) + + +def _assert_all_except_context_dependent_variables(result, expected_result): + for path in expected_result.keys(): + assert result[path] + result_without_date_time = result[path].copy() + result_without_date_time.pop("date_time") + result_without_date_time.pop("_raw_time") + result_without_date_time.pop("external_attr") + result_without_date_time.pop("create_system") + + expected_result_without_date_time = expected_result[path].copy() + expected_result_without_date_time.pop("date_time") + expected_result_without_date_time.pop("_raw_time") + expected_result_without_date_time.pop("external_attr") + expected_result_without_date_time.pop("create_system") + assert result_without_date_time == expected_result_without_date_time + + +def test_find_returns_expected_result_detail_true(zip_file): + zip_file_system = ZipFileSystem(zip_file) + + result = zip_file_system.find("/", detail=True) + + expected_result = { + "dir2/file3.txt": { + "orig_filename": "dir2/file3.txt", + "filename": "dir2/file3.txt", + "date_time": (2024, 8, 16, 10, 46, 18), + "compress_type": 8, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 2175008768, + "header_offset": 260, + "CRC": 2636827734, + "compress_size": 8, + "file_size": 6, + "_raw_time": 21961, + "_end_offset": 312, + "name": "dir2/file3.txt", + "size": 6, + "type": "file", + }, + "file1.txt": { + "orig_filename": "file1.txt", + "filename": "file1.txt", + "date_time": (2024, 8, 16, 10, 46, 18), + "compress_type": 8, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 2175008768, + "header_offset": 139, + "CRC": 3964322768, + "compress_size": 15, + "file_size": 13, + "_raw_time": 21961, + "_end_offset": 193, + "name": "file1.txt", + "size": 13, + "type": "file", + }, + "file2.txt": { + "orig_filename": "file2.txt", + "filename": "file2.txt", + "date_time": (2024, 8, 16, 10, 46, 18), + "compress_type": 8, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 2175008768, + "header_offset": 193, + "CRC": 1596576865, + "compress_size": 28, + "file_size": 26, + "_raw_time": 21961, + "_end_offset": 260, + "name": "file2.txt", + "size": 26, + "type": "file", + }, + } + + _assert_all_except_context_dependent_variables(result, expected_result) + + +def test_find_returns_expected_result_detail_false(zip_file): + zip_file_system = ZipFileSystem(zip_file) + + result = zip_file_system.find("/", detail=False) + expected_result = [ + "dir2/file3.txt", + "dir2startwithsamename.txt", + "file1.txt", + "file2.txt", + ] + + assert result == expected_result + + +def test_find_returns_expected_result_detail_true_include_dirs(zip_file): + zip_file_system = ZipFileSystem(zip_file) + + result = zip_file_system.find("/", detail=True, withdirs=True) + expected_result = { + "dir1": { + "orig_filename": "dir1/", + "filename": "dir1/", + "date_time": (2024, 8, 16, 10, 54, 24), + "compress_type": 0, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 1106051088, + "header_offset": 0, + "CRC": 0, + "compress_size": 0, + "file_size": 0, + "_raw_time": 22220, + "_end_offset": 35, + "name": "dir1", + "size": 0, + "type": "directory", + }, + "dir2": { + "orig_filename": "dir2/", + "filename": "dir2/", + "date_time": (2024, 8, 16, 10, 54, 24), + "compress_type": 0, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 1106051088, + "header_offset": 35, + "CRC": 0, + "compress_size": 0, + "file_size": 0, + "_raw_time": 22220, + "_end_offset": 70, + "name": "dir2", + "size": 0, + "type": "directory", + }, + "dir2/file3.txt": { + "orig_filename": "dir2/file3.txt", + "filename": "dir2/file3.txt", + "date_time": (2024, 8, 16, 10, 54, 24), + "compress_type": 8, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 2175008768, + "header_offset": 260, + "CRC": 2636827734, + "compress_size": 8, + "file_size": 6, + "_raw_time": 22220, + "_end_offset": 312, + "name": "dir2/file3.txt", + "size": 6, + "type": "file", + }, + "file1.txt": { + "orig_filename": "file1.txt", + "filename": "file1.txt", + "date_time": (2024, 8, 16, 10, 54, 24), + "compress_type": 8, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 2175008768, + "header_offset": 139, + "CRC": 3964322768, + "compress_size": 15, + "file_size": 13, + "_raw_time": 22220, + "_end_offset": 193, + "name": "file1.txt", + "size": 13, + "type": "file", + }, + "file2.txt": { + "orig_filename": "file2.txt", + "filename": "file2.txt", + "date_time": (2024, 8, 16, 10, 54, 24), + "compress_type": 8, + "_compresslevel": None, + "comment": b"", + "extra": b"", + "create_system": 3, + "create_version": 20, + "extract_version": 20, + "reserved": 0, + "flag_bits": 0, + "volume": 0, + "internal_attr": 0, + "external_attr": 2175008768, + "header_offset": 193, + "CRC": 1596576865, + "compress_size": 28, + "file_size": 26, + "_raw_time": 22220, + "_end_offset": 260, + "name": "file2.txt", + "size": 26, + "type": "file", + }, + } + + _assert_all_except_context_dependent_variables(result, expected_result) + + +def test_find_returns_expected_result_detail_false_include_dirs(zip_file): + zip_file_system = ZipFileSystem(zip_file) + + result = zip_file_system.find("/", detail=False, withdirs=True) + expected_result = [ + "dir1", + "dir2", + "dir2/file3.txt", + "dir2startwithsamename.txt", + "file1.txt", + "file2.txt", + ] + + assert result == expected_result + + +def test_find_returns_expected_result_path_set(zip_file): + zip_file_system = ZipFileSystem(zip_file) + + result = zip_file_system.find("/dir2") + expected_result = ["dir2/file3.txt"] + + assert result == expected_result + + +def test_find_with_and_without_slash_should_return_same_result(zip_file): + zip_file_system = ZipFileSystem(zip_file) + + assert zip_file_system.find("/dir2/") == zip_file_system.find("/dir2") + + +def test_find_should_return_file_if_exact_match(zip_file): + zip_file_system = ZipFileSystem(zip_file) + + result = zip_file_system.find("/dir2startwithsamename.txt", detail=False) + expected_result = ["dir2startwithsamename.txt"] + + assert result == expected_result + + +def test_find_returns_expected_result_recursion_depth_set(zip_file): + zip_file_system = ZipFileSystem(zip_file) + result = zip_file_system.find("/", maxdepth=1) + + expected_result = [ + "dir2startwithsamename.txt", + "file1.txt", + "file2.txt", + ] + + assert result == expected_result diff --git a/fsspec/implementations/zip.py b/fsspec/implementations/zip.py index 9d9c046bf..aa6a57842 100644 --- a/fsspec/implementations/zip.py +++ b/fsspec/implementations/zip.py @@ -132,3 +132,45 @@ def _open( out.size = info["size"] out.name = info["name"] return out + + def find(self, path, maxdepth=None, withdirs=False, detail=False, **kwargs): + if maxdepth is not None and maxdepth < 1: + raise ValueError("maxdepth must be at least 1") + + # Remove the leading slash, as the zip file paths are always + # given without a leading slash + path = path.lstrip("/") + path_parts = list(filter(lambda s: bool(s), path.split("/"))) + + def _matching_starts(file_path): + file_parts = filter(lambda s: bool(s), file_path.split("/")) + return all(a == b for a, b in zip(path_parts, file_parts)) + + self._get_dirs() + + result = {} + # To match posix find, if an exact file name is given, we should + # return only that file + if path in self.dir_cache and self.dir_cache[path]["type"] == "file": + result[path] = self.dir_cache[path] + return result if detail else [path] + + for file_path, file_info in self.dir_cache.items(): + if not (path == "" or _matching_starts(file_path)): + continue + + if file_info["type"] == "directory": + if withdirs: + if file_path not in result: + result[file_path.strip("/")] = file_info + continue + + if file_path not in result: + result[file_path] = file_info if detail else None + + if maxdepth: + path_depth = path.count("/") + result = { + k: v for k, v in result.items() if k.count("/") - path_depth < maxdepth + } + return result if detail else sorted(result) From 76ca4a68885d572880ac6800f079738df562f02c Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 4 Sep 2024 11:05:52 -0400 Subject: [PATCH 08/11] Changelog (#1670) --- docs/source/changelog.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 48b4e3971..b19ee9e3b 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,6 +1,28 @@ Changelog ========= +2024.9.0 +-------- + +Enhancements + +- fewer stat calls in localFS (#1659) +- faster find in ZIP (#1664) + +Fixes + +- paths without "/" in dirFS (#1638) +- paths with "/" in FTS (#1643, 1644) +- ls in parquet-based nested reference sets, and append (#1645, 1657) +- exception handling for SMB (#1650) + + +Other + +- style (#1640, 1641, 1660) +- docs: xrootd (#1646) +- CI back on miniconda (#1658) + 2024.6.1 -------- From dc4f5a97d90238b862fa7974a9b8e93602f44540 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 23 Sep 2024 09:42:21 -0400 Subject: [PATCH 09/11] Don't require absolute offsets in zip tests (#1691) --- fsspec/implementations/tests/test_zip.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fsspec/implementations/tests/test_zip.py b/fsspec/implementations/tests/test_zip.py index ecd082f3f..8bf8155d9 100644 --- a/fsspec/implementations/tests/test_zip.py +++ b/fsspec/implementations/tests/test_zip.py @@ -169,12 +169,16 @@ def _assert_all_except_context_dependent_variables(result, expected_result): result_without_date_time.pop("_raw_time") result_without_date_time.pop("external_attr") result_without_date_time.pop("create_system") + result_without_date_time.pop("_end_offset", None) + result_without_date_time.pop("header_offset", None) expected_result_without_date_time = expected_result[path].copy() expected_result_without_date_time.pop("date_time") expected_result_without_date_time.pop("_raw_time") expected_result_without_date_time.pop("external_attr") expected_result_without_date_time.pop("create_system") + expected_result_without_date_time.pop("_end_offset", None) + expected_result_without_date_time.pop("header_offset", None) assert result_without_date_time == expected_result_without_date_time From 4af4e76f3143f268db876c63c6506edd4d8f7d4f Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Mon, 23 Sep 2024 09:51:21 -0400 Subject: [PATCH 10/11] Skip test_deep_parq without kerchunk installed (#1687) Co-authored-by: Martin Durant --- fsspec/implementations/tests/test_reference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index 09ea3eb33..7bfd7d08e 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -762,6 +762,7 @@ def test_append_parquet(lazy_refs, m): def test_deep_parq(m): + pytest.importorskip("kerchunk") zarr = pytest.importorskip("zarr") lz = fsspec.implementations.reference.LazyReferenceMapper.create( "memory://out.parq", fs=m From 1f6151250e8586ba8af3b7cd910dc4f0e1715596 Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Mon, 23 Sep 2024 09:51:45 -0400 Subject: [PATCH 11/11] Fix handling of Paths in zip implementation (#1689) Fixes #1688 Co-authored-by: Martin Durant --- fsspec/implementations/zip.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fsspec/implementations/zip.py b/fsspec/implementations/zip.py index aa6a57842..6db3ae278 100644 --- a/fsspec/implementations/zip.py +++ b/fsspec/implementations/zip.py @@ -1,3 +1,4 @@ +import os import zipfile import fsspec @@ -48,7 +49,7 @@ def __init__( if mode not in set("rwa"): raise ValueError(f"mode '{mode}' no understood") self.mode = mode - if isinstance(fo, str): + if isinstance(fo, (str, os.PathLike)): if mode == "a": m = "r+b" else: