Skip to content

Commit

Permalink
Core: Implement exists api
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua committed Aug 24, 2024
1 parent cb00219 commit 1840c03
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 8 deletions.
18 changes: 18 additions & 0 deletions tosfs/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# ByteDance Volcengine EMR, Copyright 2024.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""The module contains constants for the tosfs package."""

# Tos server response codes
TOS_SERVER_RESPONSE_CODE_NOT_FOUND = 404
156 changes: 150 additions & 6 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,21 @@
"""The core module of TOSFS."""
import logging
import os
from typing import List, Optional, Tuple, Union
from typing import Any, List, Optional, Tuple, Union

import tos
from fsspec import AbstractFileSystem
from fsspec.utils import setup_logging as setup_logger
from tos.models import CommonPrefixInfo
from tos.models2 import ListedObject, ListedObjectVersion

from tosfs.consts import TOS_SERVER_RESPONSE_CODE_NOT_FOUND
from tosfs.exceptions import TosfsError
from tosfs.utils import find_bucket_key

# environment variable names
ENV_NAME_TOSFS_LOGGING_LEVEL = "TOSFS_LOGGING_LEVEL"

# constants
SERVER_RESPONSE_CODE_NOT_FOUND = 404

logger = logging.getLogger("tosfs")


Expand Down Expand Up @@ -268,7 +266,7 @@ def _bucket_info(self, bucket: str) -> dict:
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
raise FileNotFoundError(bucket) from e
else:
raise e
Expand Down Expand Up @@ -327,7 +325,7 @@ def _object_info(
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == SERVER_RESPONSE_CODE_NOT_FOUND:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
pass
else:
raise e
Expand Down Expand Up @@ -365,6 +363,152 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def exists(self, path: str, **kwargs: Any) -> bool:
"""Check if a path exists in the TOS.
Parameters
----------
path : str
The path to check for existence.
**kwargs : Any, optional
Additional arguments if needed in the future.
Returns
-------
bool
True if the path exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the path.
tos.exceptions.TosServerError
If there is a server error while checking the path.
TosfsError
If there is an unknown error while checking the path.
Examples
--------
>>> fs = TosFileSystem()
>>> fs.exists("tos://bucket/to/file")
True
>>> fs.exists("tos://mybucket/nonexistentfile")
False
"""
if path in ["", "/"]:
# the root always exists, even if anon
return True

path = self._strip_protocol(path)
bucket, key, version_id = self._split_path(path)
# if the path is a bucket
if not key:
return self._exists_bucket(bucket)
else:
object_exists = self._exists_object(bucket, key, path, version_id)
if not object_exists:
return self._exists_object(
bucket, key.rstrip("/") + "/", path, version_id
)
return object_exists

def _exists_bucket(self, bucket: str) -> bool:
"""Check if a bucket exists in the TOS.
Parameters
----------
bucket : str
The name of the bucket to check for existence.
Returns
-------
bool
True if the bucket exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the bucket.
tos.exceptions.TosServerError
If there is a server error while checking the bucket.
TosfsError
If there is an unknown error while checking the bucket.
Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_bucket("mybucket")
True
>>> fs._exists_bucket("nonexistentbucket")
False
"""
try:
self.tos_client.head_bucket(bucket)
return True
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _exists_object(
self, bucket: str, key: str, path: str, version_id: Optional[str] = None
) -> bool:
"""Check if an object exists in the TOS.
Parameters
----------
bucket : str
The name of the bucket.
key : str
The key of the object.
path : str
The full path of the object.
version_id : str, optional
The version ID of the object (default is None).
Returns
-------
bool
True if the object exists, False otherwise.
Raises
------
tos.exceptions.TosClientError
If there is a client error while checking the object.
tos.exceptions.TosServerError
If there is a server error while checking the object.
TosfsError
If there is an unknown error while checking the object.
Examples
--------
>>> fs = TosFileSystem()
>>> fs._exists_object("mybucket", "myfile", "tos://mybucket/myfile")
True
>>> fs._exists_object("mybucket", "nonexistentfile", "tos://mybucket/nonexistentfile")
False
"""
try:
self.tos_client.head_object(bucket, key)
return True
except tos.exceptions.TosClientError as e:
raise e
except tos.exceptions.TosServerError as e:
if e.status_code == TOS_SERVER_RESPONSE_CODE_NOT_FOUND:
return False
else:
raise e
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def _lsbuckets(self) -> List[dict]:
"""List all buckets in the account.
Expand Down
4 changes: 2 additions & 2 deletions tosfs/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,6 @@ def temporary_workspace(
yield workspace
try:
tosfs.rmdir(f"{bucket}/{workspace}/")
except Exception:
logger.error("Ignore exception.")
except Exception as e:
logger.error(f"Ignore exception {e}.")
assert not tosfs.exists(f"{bucket}/{workspace}/")
24 changes: 24 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,27 @@ def test_rmdir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> N
assert f"{bucket}/{temporary_workspace}" not in tosfs.ls(
bucket, detail=False, refresh=True
)


def test_exists_bucket(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
assert tosfs.exists("")
assert tosfs.exists("/")
assert tosfs.exists(bucket)
assert not tosfs.exists("nonexistent")


def test_exists_object(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
file_name = random_path()
tosfs.tos_client.put_object(bucket=bucket, key=f"{temporary_workspace}/{file_name}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/")
assert tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
assert not tosfs.exists(f"{bucket}/nonexistent")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/nonexistent")
tosfs.rm_file(f"{bucket}/{temporary_workspace}/{file_name}")
assert not tosfs.exists(f"{bucket}/{temporary_workspace}/{file_name}")

0 comments on commit 1840c03

Please sign in to comment.