Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core: Implement exists api #19

Merged
merged 1 commit into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions tosfs/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# ByteDance Volcengine EMR, Copyright 2024.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""The module contains constants for the tosfs package."""

# Tos server response codes
TOS_SERVER_RESPONSE_CODE_NOT_FOUND = 404
158 changes: 151 additions & 7 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,13 @@
from tos.models import CommonPrefixInfo
from tos.models2 import ListedObject, ListedObjectVersion

from tosfs.consts import TOS_SERVER_RESPONSE_CODE_NOT_FOUND
from tosfs.exceptions import TosfsError
from tosfs.utils import find_bucket_key

# environment variable names
ENV_NAME_TOSFS_LOGGING_LEVEL = "TOSFS_LOGGING_LEVEL"

# constants
SERVER_RESPONSE_CODE_NOT_FOUND = 404

logger = logging.getLogger("tosfs")


Expand Down Expand Up @@ -316,7 +314,7 @@ def isdir(self, path: str) -> bool:
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
Expand Down Expand Up @@ -348,7 +346,7 @@ def isfile(self, path: str) -> bool:
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
raise e
except Exception as e:
Expand Down Expand Up @@ -391,7 +389,7 @@ def _bucket_info(self, bucket: str) -> dict:
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
raise FileNotFoundError(bucket) from e
else:
raise e
Expand Down Expand Up @@ -450,7 +448,7 @@ def _object_info(
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
pass
else:
raise e
Expand Down Expand Up @@ -488,6 +486,152 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def exists(self, path: str, **kwargs: Any) -> bool:
"""Check if a path exists in the TOS.

Parameters
----------
path : str
The path to check for existence.
**kwargs : Any, optional
Additional arguments if needed in the future.

Returns
-------
bool
True if the path exists, False otherwise.

Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the path.
tos.exceptions.TosServerError
If there is a server error while checking the path.
TosfsError
If there is an unknown error while checking the path.

Examples
--------
>>> fs = TosFileSystem()
>>> fs.exists("tos://bucket/to/file")
True
>>> fs.exists("tos://mybucket/nonexistentfile")
False

"""
if path in ["", "/"]:
# the root always exists
return True

path = self._strip_protocol(path)
bucket, key, version_id = self._split_path(path)
# if the path is a bucket
if not key:
return self._exists_bucket(bucket)
else:
object_exists = self._exists_object(bucket, key, path, version_id)
if not object_exists:
return self._exists_object(
bucket, key.rstrip("/") + "/", path, version_id
)
return object_exists

def _exists_bucket(self, bucket: str) -> bool:
"""Check if a bucket exists in the TOS.

Parameters
----------
bucket : str
The name of the bucket to check for existence.

Returns
-------
bool
True if the bucket exists, False otherwise.

Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the bucket.
tos.exceptions.TosServerError
If there is a server error while checking the bucket.
TosfsError
If there is an unknown error while checking the bucket.

Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_bucket("mybucket")
True
>>> fs._exists_bucket("nonexistentbucket")
False

"""
try:
self.tos_client.head_bucket(bucket)
return True
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _exists_object(
self, bucket: str, key: str, path: str, version_id: Optional[str] = None
) -> bool:
"""Check if an object exists in the TOS.

Parameters
----------
bucket : str
The name of the bucket.
key : str
The key of the object.
path : str
The full path of the object.
version_id : str, optional
The version ID of the object (default is None).

Returns
-------
bool
True if the object exists, False otherwise.

Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the object.
tos.exceptions.TosServerError
If there is a server error while checking the object.
TosfsError
If there is an unknown error while checking the object.

Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_object("mybucket", "myfile", "tos://mybucket/myfile")
True
>>> fs._exists_object("mybucket", "nonexistentfile", "tos://mybucket/nonexistentfile")
False

"""
try:
self.tos_client.head_object(bucket, key)
return True
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _lsbuckets(self) -> List[dict]:
"""List all buckets in the account.

Expand Down
24 changes: 24 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,27 @@ def test_isfile(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) ->
assert not tosfs.isfile(f"{bucket}/{temporary_workspace}/")

tosfs._rm(f"{bucket}/{temporary_workspace}/{file_name}")


def test_exists_bucket(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
assert tosfs.exists("")
assert tosfs.exists("/")
assert tosfs.exists(bucket)
assert not tosfs.exists("nonexistent")


def test_exists_object(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
file_name = random_path()
tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
assert not tosfs.exists(f"{bucket}/nonexistent")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
tosfs.rm_file(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")