From d67c1a1b15f4ca371e1e4647897148d66e5ac562 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Wed, 3 May 2023 19:54:17 +0100 Subject: [PATCH] Set safe default extraction filter for tar archives [PEP 706](https://peps.python.org/pep-0706/), first implemented in Python 3.11.4, mitigates some of the security issues of `TarFile.extract()` and `TarFile.extractall()` by allowing to specify a `filter` keyword-only parameter. Set a safe default (`data_filter`) for the filter if available, reverting to Python 3.11 behavior ('fully_trusted') otherwise, see https://docs.python.org/3/library/tarfile.html#supporting-older-python-versions --- lib/galaxy/tool_util/verify/interactor.py | 3 +++ lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py | 6 +++--- lib/galaxy/util/compression_utils.py | 4 +++- lib/tool_shed/util/repository_content_util.py | 5 ++++- test/unit/tool_shed/test_shed_index.py | 4 +++- 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/galaxy/tool_util/verify/interactor.py b/lib/galaxy/tool_util/verify/interactor.py index 7d66d3a49a6a..e3e1617e77f6 100644 --- a/lib/galaxy/tool_util/verify/interactor.py +++ b/lib/galaxy/tool_util/verify/interactor.py @@ -454,6 +454,9 @@ def test_data_download(self, tool_id, filename, mode="file", is_output=True, too else: # Galaxy < 21.01 with tarfile.open(fileobj=fileobj) as tar_contents: + tar_contents.extraction_filter = getattr( + tarfile, "data_filter", (lambda member, path: member) + ) tar_contents.extractall(path=path) result = path else: diff --git a/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py b/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py index 0b630e34de40..63fa25ed1f61 100644 --- a/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py +++ b/lib/galaxy/tools/imp_exp/unpack_tar_gz_archive.py @@ -60,9 +60,9 @@ def unpack_archive(archive_file, dest_dir): with zipfile.ZipFile(archive_file, "r") as zip_archive: zip_archive.extractall(path=dest_dir) else: - archive_fp = tarfile.open(archive_file, mode="r") - archive_fp.extractall(path=dest_dir) - archive_fp.close() + with tarfile.open(archive_file, mode="r") as archive_fp: + archive_fp.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member)) + archive_fp.extractall(path=dest_dir) def main(options, args): diff --git a/lib/galaxy/util/compression_utils.py b/lib/galaxy/util/compression_utils.py index 93b3457ee953..bc075d44a57e 100644 --- a/lib/galaxy/util/compression_utils.py +++ b/lib/galaxy/util/compression_utils.py @@ -337,7 +337,9 @@ def isfile(self, member: ArchiveMemberType) -> bool: return False def open_tar(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> tarfile.TarFile: - return tarfile.open(filepath, mode, errorlevel=0) + tf = tarfile.open(filepath, mode, errorlevel=0) + tf.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member)) + return tf def open_zip(self, filepath: StrPath, mode: Literal["a", "r", "w", "x"]) -> zipfile.ZipFile: return zipfile.ZipFile(filepath, mode) diff --git a/lib/tool_shed/util/repository_content_util.py b/lib/tool_shed/util/repository_content_util.py index 10b59af6f5be..2356aa786bf3 100644 --- a/lib/tool_shed/util/repository_content_util.py +++ b/lib/tool_shed/util/repository_content_util.py @@ -37,6 +37,7 @@ def tar_open(uploaded_file): tar = tarfile.open(uploaded_file, "r:*") else: tar = tarfile.open(uploaded_file) + tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member)) return tar @@ -49,7 +50,7 @@ def upload_tar( dry_run: bool = False, remove_repo_files_not_in_tar: bool = True, new_repo_alert: bool = False, - tar=None, + tar: Optional[tarfile.TarFile] = None, rdah: Optional[RepositoryDependencyAttributeHandler] = None, tdah: Optional[ToolDependencyAttributeHandler] = None, ) -> ChangeResponseT: @@ -57,6 +58,8 @@ def upload_tar( app = trans.app if tar is None: tar = tar_open(uploaded_file) + else: + tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member)) rdah = rdah or RepositoryDependencyAttributeHandler(trans, unpopulate=False) tdah = tdah or ToolDependencyAttributeHandler(trans, unpopulate=False) # Upload a tar archive of files. diff --git a/test/unit/tool_shed/test_shed_index.py b/test/unit/tool_shed/test_shed_index.py index 880489151e2d..9d70a051812c 100644 --- a/test/unit/tool_shed/test_shed_index.py +++ b/test/unit/tool_shed/test_shed_index.py @@ -29,7 +29,9 @@ def community_file_dir(): response = requests.get(URL) response.raise_for_status() b = BytesIO(response.content) - tarfile.open(fileobj=b, mode="r:gz").extractall(extracted_archive_dir) + with tarfile.open(fileobj=b, mode="r:gz") as tar: + tar.extraction_filter = getattr(tarfile, "data_filter", (lambda member, path: member)) + tar.extractall(extracted_archive_dir) try: yield extracted_archive_dir finally: