diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 3c77f06..da1fc14 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10", 3.11, 3.12, 3.13] + python-version: [3.9, "3.10", 3.11, 3.12, 3.13] steps: - uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index 6361508..0a750f6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,6 @@ __pycache__/ *.py[cod] *$py.class -# C extensions -*.so - # Distribution / packaging .Python build/ @@ -47,40 +44,15 @@ coverage.xml .hypothesis/ .pytest_cache/ -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - # Sphinx documentation docs/_build/ # PyBuilder target/ -# Jupyter Notebook -.ipynb_checkpoints - # pyenv .python-version -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - # Environments .env .venv @@ -97,13 +69,11 @@ venv.bak/ # Rope project settings .ropeproject -# mkdocs documentation -/site - # mypy .mypy_cache/ # PyCharm .idea/ -pyproject.toml +# Pipfile +Pipfile.lock \ No newline at end of file diff --git a/Pipfile b/Pipfile index ff0d577..f98efda 100644 --- a/Pipfile +++ b/Pipfile @@ -2,6 +2,7 @@ url = "https://pypi.org/simple" verify_ssl = true name = "pypi" +python_version = "3.13" [packages] moto = "*" diff --git a/docs/advance.rst b/docs/advance.rst index e481b11..a20fb78 100644 --- a/docs/advance.rst +++ b/docs/advance.rst @@ -123,6 +123,7 @@ To enable the old (pathlib common) Algorithm you can configure it like this: >>> path = PureS3Path('/') >>> register_configuration_parameter(path, glob_new_algorithm=False) +**Note: from version 0.6.0 glob implementation will work only with the new algorithm, there for the glob_new_algorithm arg is in depreciation cycle** .. _pathlib : https://docs.python.org/3/library/pathlib.html .. _boto3 : https://github.com/boto/boto3 diff --git a/docs/interface.rst b/docs/interface.rst index a452b1d..699eb61 100644 --- a/docs/interface.rst +++ b/docs/interface.rst @@ -159,7 +159,8 @@ In other words, it enables recursive globbing: S3Path('/pypi-proxy/boto3/index.html'), S3Path('/pypi-proxy/botocore/index.html')] -New in version 0.4.0: + +In version 0.4.0: New Algorithm that better suited to s3 API. Especially for recursive searches. @@ -169,6 +170,9 @@ To enable the old (pathlib common) Algorithm you can configure it like this: register_configuration_parameter(path, glob_new_algorithm=False) +New version 0.6.0: +glob implementation will work only with the new algorithm, there for the glob_new_algorithm arg is in depreciation cycle + For more configuration details please see this `Advanced S3Path configuration`_ **NOTE:** Using the "**" pattern in large Buckets may consume an inordinate amount of time in the old algorithm. @@ -325,10 +329,13 @@ This is like calling S3Path.glob_ with ``"**/"`` added in front of the given rel S3Path('/pypi-proxy/index.html'), S3Path('/pypi-proxy/botocore/index.html')] -New in version 0.4.0: +Version 0.4.0: New Algorithm that better suited to s3 API. Especially for recursive searches. +New version 0.6.0: +glob implementation will work only with the new algorithm, there for the glob_new_algorithm arg is in depreciation cycle + S3Path.rmdir() ^^^^^^^^^^^^^^ diff --git a/s3path/__init__.py b/s3path/__init__.py index 7877e26..9661d94 100644 --- a/s3path/__init__.py +++ b/s3path/__init__.py @@ -5,7 +5,7 @@ from pathlib import Path from . import accessor -__version__ = '0.5.8' +__version__ = '0.6.0' __all__ = ( 'Path', 'register_configuration_parameter', diff --git a/s3path/accessor.py b/s3path/accessor.py index fc7f22d..5b46f66 100644 --- a/s3path/accessor.py +++ b/s3path/accessor.py @@ -1,5 +1,6 @@ import sys import importlib.util +from warnings import warn from os import stat_result from threading import Lock from itertools import chain @@ -493,6 +494,8 @@ def set_configuration(self, path, *, resource=None, arguments=None, glob_new_alg if resource is not None: self.resources[path_name] = resource if glob_new_algorithm is not None: + warn(f'glob_new_algorithm Configuration is Deprecated, ' + f'in the new version we use only the new algorithm for Globing', category=DeprecationWarning) self.general_options[path_name] = {'glob_new_algorithm': glob_new_algorithm} self.get_configuration.cache_clear() diff --git a/s3path/current_version.py b/s3path/current_version.py index aaee77a..5d05634 100644 --- a/s3path/current_version.py +++ b/s3path/current_version.py @@ -34,8 +34,6 @@ def register_configuration_parameter( raise TypeError(f'parameters argument have to be a dict type. got {type(path)}') if parameters is None and resource is None and glob_new_algorithm is None: raise ValueError('user have to specify parameters or resource arguments') - if glob_new_algorithm is False and sys.version_info >= (3, 13): - raise ValueError('old glob algorithm can only be used by python versions below 3.13') accessor.configuration_map.set_configuration( path, resource=resource, @@ -43,26 +41,21 @@ def register_configuration_parameter( glob_new_algorithm=glob_new_algorithm) - -class _S3Flavour: +class _S3Parser: def __getattr__(self, name): return getattr(posixpath, name) -flavour = _S3Flavour() - - class PureS3Path(PurePath): """ PurePath subclass for AWS S3 service. S3 is not a file-system but we can look at it like a POSIX system. """ - _flavour = flavour - __slots__ = () - if sys.version_info >= (3, 13): - parser = _flavour + parser = _flavour = _S3Parser() # _flavour is not relevant after Python version 3.13 + + __slots__ = () def __init__(self, *args): super().__init__(*args) @@ -103,7 +96,7 @@ def from_bucket_key(cls, bucket: str, key: str): >> PureS3Path.from_bucket_key(bucket='', key='') << PureS3Path('//') """ - bucket = cls(cls._flavour.sep, bucket) + bucket = cls(cls.parser.sep, bucket) if len(bucket.parts) != 2: raise ValueError(f'bucket argument contains more then one path element: {bucket}') key = cls(key) @@ -135,7 +128,7 @@ def key(self) -> str: The AWS S3 Key name, or '' """ self._absolute_path_validation() - key = self._flavour.sep.join(self.parts[2:]) + key = self.parser.sep.join(self.parts[2:]) return key def as_uri(self) -> str: @@ -320,7 +313,7 @@ def owner(self) -> str: raise KeyError('file not found') return accessor.owner(self) - def rename(self, target): # todo: Union[str, S3Path]) -> S3Path: + def rename(self, target): """ Renames this file or Bucket / key prefix / key to the given target. If target exists and is a file, it will be replaced silently if the user has permission. @@ -334,7 +327,7 @@ def rename(self, target): # todo: Union[str, S3Path]) -> S3Path: accessor.rename(self, target) return type(self)(target) - def replace(self, target): # todo: Union[str, S3Path]) -> S3Path: + def replace(self, target): """ Renames this Bucket / key prefix / key to the given target. If target points to an existing Bucket / key prefix / key, it will be unconditionally replaced. @@ -392,7 +385,7 @@ def mkdir(self, mode: int = 0o777, parents: bool = False, exist_ok: bool = False raise FileNotFoundError(f'No bucket in {type(self)} {self}') if self.key and not parents: raise FileNotFoundError(f'Only bucket path can be created, got {self}') - if type(self)(self._flavour.sep, self.bucket).exists(): + if type(self)(self.parser.sep, self.bucket).exists(): raise FileExistsError(f'Bucket {self.bucket} already exists') accessor.mkdir(self, mode) except OSError: @@ -433,34 +426,13 @@ def exists(self) -> bool: return True return accessor.exists(self) - def iterdir(self): # todo: -> Generator[S3Path, None, None]: + def iterdir(self): """ When the path points to a Bucket or a key prefix, yield path objects of the directory contents """ self._absolute_path_validation() for name in accessor.listdir(self): - yield self._make_child_relpath(name) - - def _make_child_relpath(self, name): - # _make_child_relpath was removed from Python 3.13 in - # 30f0643e36d2c9a5849c76ca0b27b748448d0567 - if sys.version_info < (3, 13): - return super()._make_child_relpath(name) - - path_str = str(self) - tail = self._tail - if tail: - path_str = f'{path_str}{self._flavour.sep}{name}' - elif path_str != '.': - path_str = f'{path_str}{name}' - else: - path_str = name - path = self.with_segments(path_str) - path._str = path_str - path._drv = self.drive - path._root = self.root - path._tail_cached = tail + [name] - return path + yield self / name def open( self, @@ -481,34 +453,49 @@ def open( errors=errors, newline=newline) - def glob(self, pattern: str, *, case_sensitive=None, recurse_symlinks=False): # todo: -> Generator[S3Path, None, None]: + def glob(self, pattern: str, *, case_sensitive=None, recurse_symlinks=False): """ Glob the given relative pattern in the Bucket / key prefix represented by this path, yielding all matching files (of any kind) + + The glob method is using a new Algorithm that better fit S3 API """ self._absolute_path_validation() - general_options = accessor.configuration_map.get_general_options(self) - glob_new_algorithm = general_options['glob_new_algorithm'] - if sys.version_info >= (3, 13): - glob_new_algorithm = True - if not glob_new_algorithm: - yield from super().glob(pattern) - return - yield from self._glob(pattern) + if case_sensitive is False or recurse_symlinks is True: + raise ValueError('Glob is case-sensitive and no symbolic links are allowed') + + sys.audit("pathlib.Path.glob", self, pattern) + if not pattern: + raise ValueError(f'Unacceptable pattern: {pattern}') + drv, root, pattern_parts = self._parse_path(pattern) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + for part in pattern_parts: + if part != '**' and '**' in part: + raise ValueError("Invalid pattern: '**' can only be an entire path component") + selector = _Selector(self, pattern=pattern) + yield from selector.select() - def rglob(self, pattern: str, *, case_sensitive=None, recurse_symlinks=False): # todo: -> Generator[S3Path, None, None]: + def rglob(self, pattern: str, *, case_sensitive=None, recurse_symlinks=False): """ This is like calling S3Path.glob with "**/" added in front of the given relative pattern + + The rglob method is using a new Algorithm that better fit S3 API """ self._absolute_path_validation() - general_options = accessor.configuration_map.get_general_options(self) - glob_new_algorithm = general_options['glob_new_algorithm'] - if sys.version_info >= (3, 13): - glob_new_algorithm = True - if not glob_new_algorithm: - yield from super().rglob(pattern) - return - yield from self._rglob(pattern) + + sys.audit("pathlib.Path.rglob", self, pattern) + if not pattern: + raise ValueError(f'Unacceptable pattern: {pattern}') + drv, root, pattern_parts = self._parse_path(pattern) + if drv or root: + raise NotImplementedError("Non-relative patterns are unsupported") + for part in pattern_parts: + if part != '**' and '**' in part: + raise ValueError("Invalid pattern: '**' can only be an entire path component") + pattern = f'**{self.parser.sep}{pattern}' + selector = _Selector(self, pattern=pattern) + yield from selector.select() def get_presigned_url(self, expire_in: Union[timedelta, int] = 3600) -> str: """ @@ -587,35 +574,6 @@ def _scandir(self): """ return accessor.scandir(self) - def _glob(self, pattern): - """ Glob with new Algorithm that better fit S3 API """ - sys.audit("pathlib.Path.glob", self, pattern) - if not pattern: - raise ValueError(f'Unacceptable pattern: {pattern}') - drv, root, pattern_parts = self._parse_path(pattern) - if drv or root: - raise NotImplementedError("Non-relative patterns are unsupported") - for part in pattern_parts: - if part != '**' and '**' in part: - raise ValueError("Invalid pattern: '**' can only be an entire path component") - selector = _Selector(self, pattern=pattern) - yield from selector.select() - - def _rglob(self, pattern): - """ RGlob with new Algorithm that better fit S3 API """ - sys.audit("pathlib.Path.rglob", self, pattern) - if not pattern: - raise ValueError(f'Unacceptable pattern: {pattern}') - drv, root, pattern_parts = self._parse_path(pattern) - if drv or root: - raise NotImplementedError("Non-relative patterns are unsupported") - for part in pattern_parts: - if part != '**' and '**' in part: - raise ValueError("Invalid pattern: '**' can only be an entire path component") - pattern = f'**{self._flavour.sep}{pattern}' - selector = _Selector(self, pattern=pattern) - yield from selector.select() - class PureVersionedS3Path(PureS3Path): """ @@ -723,14 +681,14 @@ def __init__(self, path, *, pattern): def select(self): for target in self._deep_cached_dir_scan(): - target = f'{self._path._flavour.sep}{self._path.bucket}{target}' + target = f'{self._path.parser.sep}{self._path.bucket}{target}' if self.match(target): yield type(self._path)(target) def _prefix_splitter(self, pattern): if not _is_wildcard_pattern(pattern): if self._path.key: - return f'{self._path.key}{self._path._flavour.sep}{pattern}', '' + return f'{self._path.key}{self._path.parser.sep}{pattern}', '' return pattern, '' *_, pattern_parts = self._path._parse_path(pattern) @@ -738,21 +696,21 @@ def _prefix_splitter(self, pattern): for index, part in enumerate(pattern_parts): if _is_wildcard_pattern(part): break - prefix += f'{part}{self._path._flavour.sep}' + prefix += f'{part}{self._path.parser.sep}' if pattern.startswith(prefix): pattern = pattern.replace(prefix, '', 1) key_prefix = self._path.key if key_prefix: - prefix = self._path._flavour.sep.join((key_prefix, prefix)) + prefix = self._path.parser.sep.join((key_prefix, prefix)) return prefix, pattern def _calculate_pattern_level(self, pattern): if '**' in pattern: return None if self._prefix: - pattern = f'{self._prefix}{self._path._flavour.sep}{pattern}' + pattern = f'{self._prefix}{self._path.parser.sep}{pattern}' *_, pattern_parts = self._path._parse_path(pattern) return len(pattern_parts) @@ -767,23 +725,23 @@ def _calculate_full_or_just_folder(self, pattern): def _deep_cached_dir_scan(self): cache = set() - prefix_sep_count = self._prefix.count(self._path._flavour.sep) + prefix_sep_count = self._prefix.count(self._path.parser.sep) for key in accessor.iter_keys(self._path, prefix=self._prefix, full_keys=self._full_keys): - key_sep_count = key.count(self._path._flavour.sep) + 1 - key_parts = key.rsplit(self._path._flavour.sep, maxsplit=key_sep_count - prefix_sep_count) + key_sep_count = key.count(self._path.parser.sep) + 1 + key_parts = key.rsplit(self._path.parser.sep, maxsplit=key_sep_count - prefix_sep_count) target_path_parts = key_parts[:self._target_level] target_path = '' for part in target_path_parts: if not part: continue - target_path += f'{self._path._flavour.sep}{part}' + target_path += f'{self._path.parser.sep}{part}' if target_path in cache: continue yield target_path cache.add(target_path) def _compile_pattern_parts(self, prefix, pattern, bucket): - pattern = self._path._flavour.sep.join(( + pattern = self._path.parser.sep.join(( '', bucket, prefix, @@ -793,14 +751,14 @@ def _compile_pattern_parts(self, prefix, pattern, bucket): new_regex_pattern = '' for part in pattern_parts: - if part == self._path._flavour.sep: + if part == self._path.parser.sep: continue if '**' in part: - new_regex_pattern += f'{self._path._flavour.sep}*(?s:{part.replace("**", ".*")})' + new_regex_pattern += f'{self._path.parser.sep}*(?s:{part.replace("**", ".*")})' continue if '*' == part: - new_regex_pattern += f'{self._path._flavour.sep}(?s:[^/]+)' + new_regex_pattern += f'{self._path.parser.sep}(?s:[^/]+)' continue - new_regex_pattern += f'{self._path._flavour.sep}{fnmatch.translate(part)[:-2]}' + new_regex_pattern += f'{self._path.parser.sep}{fnmatch.translate(part)[:-2]}' new_regex_pattern += r'/*\Z' return re.compile(new_regex_pattern).fullmatch diff --git a/setup.py b/setup.py index ec5dd52..114befb 100644 --- a/setup.py +++ b/setup.py @@ -10,14 +10,11 @@ author='Lior Mizrahi', author_email='li.mizr@gmail.com', packages=['s3path'], - install_requires=[ - 'boto3>=1.16.35', - 'smart-open>=5.1.0', - ], + install_requires=['boto3>=1.16.35','smart-open>=5.1.0',], license='Apache 2.0', long_description=long_description, long_description_content_type='text/x-rst', - python_requires='>=3.8', + python_requires='>=3.9', include_package_data=True, classifiers=[ 'Development Status :: 4 - Beta', @@ -26,7 +23,6 @@ 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', diff --git a/tests/test_path_operations.py b/tests/test_path_operations.py index 5daeea9..68e269d 100644 --- a/tests/test_path_operations.py +++ b/tests/test_path_operations.py @@ -164,14 +164,12 @@ def test_glob_nested_folders_issue_no_120(s3_mock): assert list(path.glob("further/*")) == [S3Path('/my-bucket/s3path-test/nested/further/test.txt')] -@pytest.mark.skipif(sys.version_info >= (3, 13), reason="requires python3.12 or lower") def test_glob_old_algo(s3_mock, enable_old_glob): - test_glob(s3_mock) - - -@pytest.mark.skipif(sys.version_info >= (3, 13), reason="requires python3.12 or lower") -def test_glob_nested_folders_issue_no_115_old_algo(s3_mock, enable_old_glob): - test_glob_nested_folders_issue_no_115(s3_mock) + if sys.version_info > (3, 12): + with pytest.deprecated_call(): + test_glob(s3_mock) + else: + test_glob(s3_mock) def test_glob_issue_160(s3_mock): @@ -247,21 +245,6 @@ def test_glob_nested_folders_issue_no_179(s3_mock): S3Path('/my-bucket/s3path/nested/further/andfurther')] -@pytest.mark.skipif(sys.version_info >= (3, 13), reason="requires python3.12 or lower") -def test_glob_issue_160_old_algo(s3_mock, enable_old_glob): - test_glob_issue_160(s3_mock) - - -@pytest.mark.skipif(sys.version_info >= (3, 13), reason="requires python3.12 or lower") -def test_glob_issue_160_weird_behavior_old_algo(s3_mock, enable_old_glob): - test_glob_issue_160_weird_behavior(s3_mock) - - -@pytest.mark.skipif(sys.version_info >= (3, 13), reason="requires python3.12 or lower") -def test_glob_nested_folders_issue_no_179_old_algo(s3_mock, enable_old_glob): - test_glob_nested_folders_issue_no_179(s3_mock) - - def test_rglob(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') @@ -290,9 +273,12 @@ def test_rglob(s3_mock): S3Path('/test-bucket/test_pathlib.py')] -@pytest.mark.skipif(sys.version_info >= (3, 13), reason="requires python3.12 or lower") def test_rglob_old_algo(s3_mock, enable_old_glob): - test_rglob(s3_mock) + if sys.version_info > (3, 12): + with pytest.deprecated_call(): + test_rglob(s3_mock) + else: + test_rglob(s3_mock) def test_accessor_scandir(s3_mock): @@ -319,9 +305,12 @@ def test_accessor_scandir(s3_mock): S3Path('/test-bucket/test_pathlib.py')] -@pytest.mark.skipif(sys.version_info >= (3, 13), reason="requires python3.12 or lower") def test_accessor_scandir_old_algo(s3_mock, enable_old_glob): - test_accessor_scandir(s3_mock) + if sys.version_info > (3, 12): + with pytest.deprecated_call(): + test_accessor_scandir(s3_mock) + else: + test_accessor_scandir(s3_mock) def test_is_dir(s3_mock): @@ -849,6 +838,7 @@ def test_unlink(s3_mock): S3Path("/test-bucket/fake_folder").unlink(missing_ok=True) S3Path("/fake-bucket/").unlink(missing_ok=True) + def test_absolute(s3_mock): s3 = boto3.resource('s3') s3.create_bucket(Bucket='test-bucket') diff --git a/tests/test_pure_path_operations.py b/tests/test_pure_path_operations.py index a96a720..1894b2a 100644 --- a/tests/test_pure_path_operations.py +++ b/tests/test_pure_path_operations.py @@ -1,4 +1,5 @@ import os +import sys import pytest from pathlib import Path, PurePosixPath, PureWindowsPath from s3path import PureS3Path diff --git a/tests/test_s3path_configuration.py b/tests/test_s3path_configuration.py index b69184b..f042862 100644 --- a/tests/test_s3path_configuration.py +++ b/tests/test_s3path_configuration.py @@ -142,9 +142,3 @@ def test_issue_123(): new_resource, _ = accessor.configuration_map.get_configuration(path) assert new_resource is s3 assert new_resource is not old_resource - - -@pytest.mark.skipif(sys.version_info < (3, 13), reason="requires python3.13 or higher") -def test_register_configuration_parameter_old_algo(): - with pytest.raises(ValueError): - register_configuration_parameter(PureS3Path('/'), glob_new_algorithm=False) \ No newline at end of file