diff --git a/tosfs/consts.py b/tosfs/consts.py new file mode 100644 index 0000000..aa1a4da --- /dev/null +++ b/tosfs/consts.py @@ -0,0 +1,18 @@ +# ByteDance Volcengine EMR, Copyright 2024. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The module contains constants for the tosfs package.""" + +# Tos server response codes +TOS_SERVER_RESPONSE_CODE_NOT_FOUND = 404 diff --git a/tosfs/core.py b/tosfs/core.py index 52c0cc3..9fbc1f6 100644 --- a/tosfs/core.py +++ b/tosfs/core.py @@ -15,7 +15,7 @@ """The core module of TOSFS.""" import logging import os -from typing import List, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union import tos from fsspec import AbstractFileSystem @@ -23,15 +23,13 @@ from tos.models import CommonPrefixInfo from tos.models2 import ListedObject, ListedObjectVersion +from tosfs.consts import TOS_SERVER_RESPONSE_CODE_NOT_FOUND from tosfs.exceptions import TosfsError from tosfs.utils import find_bucket_key # environment variable names ENV_NAME_TOSFS_LOGGING_LEVEL = "TOSFS_LOGGING_LEVEL" -# constants -SERVER_RESPONSE_CODE_NOT_FOUND = 404 - logger = logging.getLogger("tosfs") @@ -268,7 +266,7 @@ def _bucket_info(self, bucket: str) -> dict: except tos.exceptions.TosClientError as e: raise e except tos.exceptions.TosServerError as e: - if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND: + if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND: raise FileNotFoundError(bucket) from e else: raise e @@ -327,7 +325,7 @@ def _object_info( except tos.exceptions.TosClientError as e: raise e except tos.exceptions.TosServerError as e: - if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND: + if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND: pass else: raise e @@ -365,6 +363,152 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict except Exception as e: raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + def exists(self, path: str, **kwargs: Any) -> bool: + """Check if a path exists in the TOS. + + Parameters + ---------- + path : str + The path to check for existence. + **kwargs : Any, optional + Additional arguments if needed in the future. + + Returns + ------- + bool + True if the path exists, False otherwise. + + Raises + ------ + tos.exceptions.TosClientError + If there is a client error while checking the path. + tos.exceptions.TosServerError + If there is a server error while checking the path. + TosfsError + If there is an unknown error while checking the path. + + Examples + -------- + >>> fs = TosFileSystem() + >>> fs.exists("tos://bucket/to/file") + True + >>> fs.exists("tos://mybucket/nonexistentfile") + False + + """ + if path in ["", "/"]: + # the root always exists + return True + + path = self._strip_protocol(path) + bucket, key, version_id = self._split_path(path) + # if the path is a bucket + if not key: + return self._exists_bucket(bucket) + else: + object_exists = self._exists_object(bucket, key, path, version_id) + if not object_exists: + return self._exists_object( + bucket, key.rstrip("/") + "/", path, version_id + ) + return object_exists + + def _exists_bucket(self, bucket: str) -> bool: + """Check if a bucket exists in the TOS. + + Parameters + ---------- + bucket : str + The name of the bucket to check for existence. + + Returns + ------- + bool + True if the bucket exists, False otherwise. + + Raises + ------ + tos.exceptions.TosClientError + If there is a client error while checking the bucket. + tos.exceptions.TosServerError + If there is a server error while checking the bucket. + TosfsError + If there is an unknown error while checking the bucket. + + Examples + -------- + >>> fs = TosFileSystem() + >>> fs._exists_bucket("mybucket") + True + >>> fs._exists_bucket("nonexistentbucket") + False + + """ + try: + self.tos_client.head_bucket(bucket) + return True + except tos.exceptions.TosClientError as e: + raise e + except tos.exceptions.TosServerError as e: + if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND: + return False + else: + raise e + except Exception as e: + raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + + def _exists_object( + self, bucket: str, key: str, path: str, version_id: Optional[str] = None + ) -> bool: + """Check if an object exists in the TOS. + + Parameters + ---------- + bucket : str + The name of the bucket. + key : str + The key of the object. + path : str + The full path of the object. + version_id : str, optional + The version ID of the object (default is None). + + Returns + ------- + bool + True if the object exists, False otherwise. + + Raises + ------ + tos.exceptions.TosClientError + If there is a client error while checking the object. + tos.exceptions.TosServerError + If there is a server error while checking the object. + TosfsError + If there is an unknown error while checking the object. + + Examples + -------- + >>> fs = TosFileSystem() + >>> fs._exists_object("mybucket", "myfile", "tos://mybucket/myfile") + True + >>> fs._exists_object("mybucket", "nonexistentfile", "tos://mybucket/nonexistentfile") + False + + """ + try: + self.tos_client.head_object(bucket, key) + return True + except tos.exceptions.TosClientError as e: + raise e + except tos.exceptions.TosServerError as e: + if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND: + return False + else: + raise e + except Exception as e: + raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + def _lsbuckets(self) -> List[dict]: """List all buckets in the account. diff --git a/tosfs/tests/conftest.py b/tosfs/tests/conftest.py index 9833631..b62bb9d 100644 --- a/tosfs/tests/conftest.py +++ b/tosfs/tests/conftest.py @@ -56,6 +56,6 @@ def temporary_workspace( yield workspace try: tosfs.rmdir(f"{bucket}/{workspace}/") - except Exception: - logger.error("Ignore exception.") + except Exception as e: + logger.error(f"Ignore exception {e}.") assert not tosfs.exists(f"{bucket}/{workspace}/") diff --git a/tosfs/tests/test_tosfs.py b/tosfs/tests/test_tosfs.py index 60abbc2..6f207f2 100644 --- a/tosfs/tests/test_tosfs.py +++ b/tosfs/tests/test_tosfs.py @@ -109,3 +109,27 @@ def test_rmdir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> N assert f"{bucket}/{temporary_workspace}" not in tosfs.ls( bucket, detail=False, refresh=True ) + + +def test_exists_bucket( + tosfs: TosFileSystem, bucket: str, temporary_workspace: str +) -> None: + assert tosfs.exists("") + assert tosfs.exists("/") + assert tosfs.exists(bucket) + assert not tosfs.exists("nonexistent") + + +def test_exists_object( + tosfs: TosFileSystem, bucket: str, temporary_workspace: str +) -> None: + file_name = random_path() + tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}") + assert tosfs.exists(f"{bucket}/{temporary_workspace}") + assert tosfs.exists(f"{bucket}/{temporary_workspace}/") + assert tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}") + assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent") + assert not tosfs.exists(f"{bucket}/nonexistent") + assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent") + tosfs.rm_file(f"{bucket}/{temporary_workspace}/{file_name}") + assert not tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")