Skip to content

Commit

Permalink
Core: Implement exists api
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua committed Aug 26, 2024
1 parent 5d23c6a commit c68746d
Show file tree
Hide file tree
Showing 3 changed files with 193 additions and 7 deletions.
18 changes: 18 additions & 0 deletions tosfs/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# ByteDance Volcengine EMR, Copyright 2024.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""The module contains constants for the tosfs package."""

# Tos server response codes
TOS_SERVER_RESPONSE_CODE_NOT_FOUND = 404
158 changes: 151 additions & 7 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,13 @@
from tos.models import CommonPrefixInfo
from tos.models2 import ListedObject, ListedObjectVersion

from tosfs.consts import TOS_SERVER_RESPONSE_CODE_NOT_FOUND
from tosfs.exceptions import TosfsError
from tosfs.utils import find_bucket_key

# environment variable names
ENV_NAME_TOSFS_LOGGING_LEVEL = "TOSFS_LOGGING_LEVEL"

# constants
SERVER_RESPONSE_CODE_NOT_FOUND = 404

logger = logging.getLogger("tosfs")


Expand Down Expand Up @@ -316,7 +314,7 @@ def isdir(self, path: str) -> bool:
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
Expand Down Expand Up @@ -348,7 +346,7 @@ def isfile(self, path: str) -> bool:
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
raise e
except Exception as e:
Expand Down Expand Up @@ -391,7 +389,7 @@ def _bucket_info(self, bucket: str) -> dict:
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
raise FileNotFoundError(bucket) from e
else:
raise e
Expand Down Expand Up @@ -450,7 +448,7 @@ def _object_info(
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
pass
else:
raise e
Expand Down Expand Up @@ -488,6 +486,152 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def exists(self, path: str, **kwargs: Any) -> bool:
"""Check if a path exists in the TOS.
Parameters
----------
path : str
The path to check for existence.
**kwargs : Any, optional
Additional arguments if needed in the future.
Returns
-------
bool
True if the path exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the path.
tos.exceptions.TosServerError
If there is a server error while checking the path.
TosfsError
If there is an unknown error while checking the path.
Examples
--------
>>> fs = TosFileSystem()
>>> fs.exists("tos://bucket/to/file")
True
>>> fs.exists("tos://mybucket/nonexistentfile")
False
"""
if path in ["", "/"]:
# the root always exists
return True

path = self._strip_protocol(path)
bucket, key, version_id = self._split_path(path)
# if the path is a bucket
if not key:
return self._exists_bucket(bucket)
else:
object_exists = self._exists_object(bucket, key, path, version_id)
if not object_exists:
return self._exists_object(
bucket, key.rstrip("/") + "/", path, version_id
)
return object_exists

def _exists_bucket(self, bucket: str) -> bool:
"""Check if a bucket exists in the TOS.
Parameters
----------
bucket : str
The name of the bucket to check for existence.
Returns
-------
bool
True if the bucket exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the bucket.
tos.exceptions.TosServerError
If there is a server error while checking the bucket.
TosfsError
If there is an unknown error while checking the bucket.
Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_bucket("mybucket")
True
>>> fs._exists_bucket("nonexistentbucket")
False
"""
try:
self.tos_client.head_bucket(bucket)
return True
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _exists_object(
self, bucket: str, key: str, path: str, version_id: Optional[str] = None
) -> bool:
"""Check if an object exists in the TOS.
Parameters
----------
bucket : str
The name of the bucket.
key : str
The key of the object.
path : str
The full path of the object.
version_id : str, optional
The version ID of the object (default is None).
Returns
-------
bool
True if the object exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the object.
tos.exceptions.TosServerError
If there is a server error while checking the object.
TosfsError
If there is an unknown error while checking the object.
Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_object("mybucket", "myfile", "tos://mybucket/myfile")
True
>>> fs._exists_object("mybucket", "nonexistentfile", "tos://mybucket/nonexistentfile")
False
"""
try:
self.tos_client.head_object(bucket, key)
return True
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _lsbuckets(self) -> List[dict]:
"""List all buckets in the account.
Expand Down
24 changes: 24 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,27 @@ def test_isfile(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) ->
assert not tosfs.isfile(f"{bucket}/{temporary_workspace}/")

tosfs._rm(f"{bucket}/{temporary_workspace}/{file_name}")


def test_exists_bucket(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
assert tosfs.exists("")
assert tosfs.exists("/")
assert tosfs.exists(bucket)
assert not tosfs.exists("nonexistent")


def test_exists_object(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
file_name = random_path()
tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
assert not tosfs.exists(f"{bucket}/nonexistent")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
tosfs.rm_file(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")

0 comments on commit c68746d

Please sign in to comment.