From cd2e5906ac378ec01d1a7ebf02b471fa4719ab46 Mon Sep 17 00:00:00 2001 From: John Sirois Date: Sun, 23 Jul 2023 09:00:49 -0600 Subject: [PATCH] Add support for selecting packages and modules. (#2181) This complements the existing `-D` / `--sources-directory` support for adding local sources and resources with finer-grained control over what files are included in the PEX file. Notably, this allows cleanly packaging projects with no `setup.py` / `pyproject.toml` based build when the projects have their Python code at the top level mixed with other files that should not be included in the PEX (e.g.: build scripts, CI configuration, documentation, etc.). Fixes #2134 --- pex/bin/pex.py | 220 ++++++++++++++++++++++----- pex/testing.py | 3 +- tests/integration/test_issue_2134.py | 212 ++++++++++++++++++++++++++ 3 files changed, 393 insertions(+), 42 deletions(-) create mode 100644 tests/integration/test_issue_2134.py diff --git a/pex/bin/pex.py b/pex/bin/pex.py index 281467c6c..d114e984f 100755 --- a/pex/bin/pex.py +++ b/pex/bin/pex.py @@ -23,7 +23,7 @@ global_environment, register_global_arguments, ) -from pex.common import die, safe_mkdtemp +from pex.common import die, filter_pyc_dirs, filter_pyc_files, safe_mkdtemp from pex.enum import Enum from pex.inherit_path import InheritPath from pex.interpreter_constraints import InterpreterConstraints @@ -48,9 +48,14 @@ if TYPE_CHECKING: from argparse import Namespace - from typing import Dict, List, Optional + from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple + + import attr # vendor:skip from pex.resolve.resolver_options import ResolverConfiguration +else: + from pex.third_party import attr + CANNOT_SETUP_INTERPRETER = 102 INVALID_OPTIONS = 103 @@ -461,6 +466,149 @@ def __call__(self, parser, namespace, value, option_str=None): setattr(namespace, self.dest, seed) +@attr.s(frozen=True) +class PythonSource(object): + @classmethod + def parse(cls, name): + # type: (str) -> PythonSource + subdir = None + parts = name.split("@", 1) + if len(parts) == 2: + name, subdir = parts + return cls(name=name, subdir=subdir) + + name = attr.ib() # type: str + subdir = attr.ib(default=None) # type: Optional[str] + + def iter_files(self): + # type: () -> Iterator[Tuple[str, str]] + components = self.name.split(".") + parent_package_dirs = components[:-1] + source = components[-1] + + package_path = [self.subdir] if self.subdir else [] # type: List[str] + for package_dir in parent_package_dirs: + package_path.append(package_dir) + package_file_src = os.path.join(*(package_path + ["__init__.py"])) + if os.path.exists(package_file_src): + package_file_dst = ( + os.path.relpath(package_file_src, self.subdir) + if self.subdir + else package_file_src + ) + yield package_file_src, package_file_dst + + for src, dst in self._iter_source_files(package_path, source): + yield src, dst + + def _iter_source_files( + self, + parent_package_path, # type: List[str] + source, # type: str + ): + # type: (...) -> Iterator[Tuple[str, str]] + raise NotImplementedError() + + +class Package(PythonSource): + def _iter_source_files( + self, + parent_package_path, # type: List[str] + source, # type: str + ): + # type: (...) -> Iterator[Tuple[str, str]] + package_dir = os.path.join(*(parent_package_path + [source])) + for root, dirs, files in os.walk(package_dir): + dirs[:] = list(filter_pyc_dirs(dirs)) + for f in filter_pyc_files(files): + src = os.path.join(root, f) + dst = os.path.relpath(src, self.subdir) if self.subdir else src + yield src, dst + + +class Module(PythonSource): + def _iter_source_files( + self, + parent_package_path, # type: List[str] + source, # type: str + ): + # type: (...) -> Iterator[Tuple[str, str]] + module_src = os.path.join(*(parent_package_path + ["{module}.py".format(module=source)])) + module_dest = os.path.relpath(module_src, self.subdir) if self.subdir else module_src + yield module_src, module_dest + + +def configure_clp_sources(parser): + # type: (ArgumentParser) -> None + + parser.add_argument( + "-D", + "--sources-directory", + dest="sources_directory", + metavar="DIR", + default=[], + type=str, + action="append", + help=( + "Add a directory containing sources and/or resources to be packaged into the generated " + ".pex file. This option can be used multiple times." + ), + ) + + parser.add_argument( + "-R", + "--resources-directory", + dest="resources_directory", + metavar="DIR", + default=[], + type=str, + action="append", + help=( + "Add resources directory to be packaged into the generated .pex file." + " This option can be used multiple times. DEPRECATED: Use -D/--sources-directory " + "instead." + ), + ) + + parser.add_argument( + "-P", + "--package", + dest="packages", + metavar="PACKAGE_SPEC", + default=[], + type=Package.parse, + action="append", + help=( + "Add a package and all its sub-packages to the generated .pex file. The package is " + "expected to be found relative to the the current directory. If the package is housed " + "in a subdirectory, indicate that by appending `@`. For example, to add " + "the top-level package `foo` housed in the current directory, use `-P foo`. If the " + "top-level `foo` package is in the `src` subdirectory use `-P foo@src`. If you wish to " + "just use the `foo.bar` package in the `src` subdirectory, use `-P foo.bar@src`. This " + "option can be used multiple times." + ), + ) + + parser.add_argument( + "-M", + "--module", + dest="modules", + metavar="MODULE_SPEC", + default=[], + type=Module.parse, + action="append", + help=( + "Add an individual module to the generated .pex file. The module is expected to be " + "found relative to the the current directory. If the module is housed in a " + "subdirectory, indicate that by appending `@`. For example, to add the " + "top-level module `foo` housed in the current directory, use `-M foo`. If the " + "top-level `foo` module is in the `src` subdirectory use `-M foo@src`. If you wish to " + "just use the `foo.bar` module in the `src` subdirectory, use `-M foo.bar@src`. This " + "option can be used multiple times." + ), + ) + + def configure_clp(): # type: () -> ArgumentParser usage = ( @@ -504,35 +652,7 @@ def configure_clp(): help="The name of a file to be included as the preamble for the generated .pex file", ) - parser.add_argument( - "-D", - "--sources-directory", - dest="sources_directory", - metavar="DIR", - default=[], - type=str, - action="append", - help=( - "Add a directory containing sources and/or resources to be packaged into the generated " - ".pex file. This option can be used multiple times." - ), - ) - - parser.add_argument( - "-R", - "--resources-directory", - dest="resources_directory", - metavar="DIR", - default=[], - type=str, - action="append", - help=( - "Add resources directory to be packaged into the generated .pex file." - " This option can be used multiple times. DEPRECATED: Use -D/--sources-directory " - "instead." - ), - ) - + configure_clp_sources(parser) requirement_options.register(parser) parser.add_argument( @@ -580,6 +700,24 @@ def configure_clp(): return parser +def _iter_directory_sources(directories): + # type: (Iterable[str]) -> Iterator[Tuple[str, str]] + for directory in directories: + src_dir = os.path.normpath(directory) + for root, _, files in os.walk(src_dir): + for f in files: + src_file_path = os.path.join(root, f) + dst_path = os.path.relpath(src_file_path, src_dir) + yield src_file_path, dst_path + + +def _iter_python_sources(python_sources): + # type: (Iterable[PythonSource]) -> Iterator[Tuple[str, str]] + for python_source in python_sources: + for src, dst in python_source.iter_files(): + yield src, dst + + def build_pex( requirement_configuration, # type: RequirementConfiguration resolver_configuration, # type: ResolverConfiguration @@ -626,16 +764,16 @@ def build_pex( "dependency cache." ) - directories = OrderedSet( - options.sources_directory + options.resources_directory - ) # type: OrderedSet[str] - for directory in directories: - src_dir = os.path.normpath(directory) - for root, _, files in os.walk(src_dir): - for f in files: - src_file_path = os.path.join(root, f) - dst_path = os.path.relpath(src_file_path, src_dir) - pex_builder.add_source(src_file_path, dst_path) + seen = set() # type: Set[Tuple[str, str]] + for src, dst in itertools.chain( + _iter_directory_sources( + OrderedSet(options.sources_directory + options.resources_directory) + ), + _iter_python_sources(OrderedSet(options.packages + options.modules)), + ): + if (src, dst) not in seen: + pex_builder.add_source(src, dst) + seen.add((src, dst)) pex_info = pex_builder.info pex_info.inject_env = dict(options.inject_env) diff --git a/pex/testing.py b/pex/testing.py index 99f9c0118..4c6bdc178 100644 --- a/pex/testing.py +++ b/pex/testing.py @@ -394,6 +394,7 @@ def run_pex_command( env=None, # type: Optional[Dict[str, str]] python=None, # type: Optional[str] quiet=False, # type: bool + cwd=None, # type: Optional[str] ): # type: (...) -> IntegResults """Simulate running pex command for integration testing. @@ -404,7 +405,7 @@ def run_pex_command( """ cmd = create_pex_command(args, python=python, quiet=quiet) process = Executor.open_process( - cmd=cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE + cmd=cmd, env=env, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) output, error = process.communicate() return IntegResults(output.decode("utf-8"), error.decode("utf-8"), process.returncode) diff --git a/tests/integration/test_issue_2134.py b/tests/integration/test_issue_2134.py new file mode 100644 index 000000000..a4a283840 --- /dev/null +++ b/tests/integration/test_issue_2134.py @@ -0,0 +1,212 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +import os.path +import subprocess +import sys +from typing import Iterable, Optional + +import pytest + +from pex import layout +from pex.common import open_zip, touch +from pex.testing import run_pex_command +from pex.typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + + import attr # vendor:skip +else: + from pex.third_party import attr + + +@attr.s(frozen=True) +class SourceTree(object): + base_dir = attr.ib() # type: str + + def create(self, offset=None): + # type: (Optional[str]) -> str + + project_dir = os.path.join(self.base_dir, "project") + source_root = os.path.join(project_dir, offset) if offset else project_dir + + touch(os.path.join(source_root, "top_level_module.py")) + + touch(os.path.join(source_root, "top_level_package_classic", "__init__.py")) + touch(os.path.join(source_root, "top_level_package_classic", "module.py")) + touch(os.path.join(source_root, "top_level_package_classic", "sub", "__init__.py")) + touch(os.path.join(source_root, "top_level_package_classic", "sub", "foo.py")) + touch(os.path.join(source_root, "top_level_package_classic", "sub", "bar.py")) + + # N.B.: This test will run against a range of interpreters, some supporting PEP420 and some + # not, so we never import code in the tests that use this fixture and instead just check + # file lists, + touch(os.path.join(source_root, "top_level_package_pep420", "module.py")) + touch(os.path.join(source_root, "top_level_package_pep420", "sub", "module.py")) + + touch(os.path.join(source_root, "top_level_package_mixed", "module.py")) + touch(os.path.join(source_root, "top_level_package_mixed", "sub", "__init__.py")) + touch(os.path.join(source_root, "top_level_package_mixed", "sub", "module.py")) + + subprocess.check_call(args=[sys.executable, "-m", "compileall", source_root]) + + return project_dir + + def assert_sources( + self, + pex_args, # type: Iterable[str] + expected_sources, # type: Iterable[str] + offset=None, # type: Optional[str] + ): + # type: (...) -> None + pex = os.path.join(self.base_dir, "pex") + args = ["-o", pex] + args.extend(pex_args) + run_pex_command(args=args, cwd=self.create(offset=offset)).assert_success() + + with open_zip(pex) as zf: + actual_sources = [ + f + for f in zf.namelist() + if not f.startswith((layout.BOOTSTRAP_DIR, layout.DEPS_DIR, "__pex__")) + and f not in (layout.PEX_INFO_PATH, "__main__.py") + ] + + assert sorted(expected_sources) == sorted(actual_sources) + + +@pytest.fixture +def source_tree(tmpdir): + # type: (Any) -> SourceTree + return SourceTree(str(tmpdir)) + + +def test_add_top_level_package(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=["-P", "top_level_package_classic"], + expected_sources=[ + "top_level_package_classic/", + "top_level_package_classic/__init__.py", + "top_level_package_classic/module.py", + "top_level_package_classic/sub/", + "top_level_package_classic/sub/__init__.py", + "top_level_package_classic/sub/foo.py", + "top_level_package_classic/sub/bar.py", + ], + ) + + +def test_add_sub_package(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=["-P", "top_level_package_classic.sub"], + expected_sources=[ + "top_level_package_classic/", + "top_level_package_classic/__init__.py", + "top_level_package_classic/sub/", + "top_level_package_classic/sub/__init__.py", + "top_level_package_classic/sub/foo.py", + "top_level_package_classic/sub/bar.py", + ], + ) + + +def test_add_sub_package_pep_420(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=["-P", "top_level_package_pep420.sub"], + expected_sources=[ + "top_level_package_pep420/", + "top_level_package_pep420/sub/", + "top_level_package_pep420/sub/module.py", + ], + ) + + +def test_add_sub_package_mixed(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=["-P", "top_level_package_mixed.sub"], + expected_sources=[ + "top_level_package_mixed/", + "top_level_package_mixed/sub/", + "top_level_package_mixed/sub/__init__.py", + "top_level_package_mixed/sub/module.py", + ], + ) + + +def test_add_package_offset(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=["-P", "top_level_package_classic.sub@src"], + offset="src", + expected_sources=[ + "top_level_package_classic/", + "top_level_package_classic/__init__.py", + "top_level_package_classic/sub/", + "top_level_package_classic/sub/__init__.py", + "top_level_package_classic/sub/foo.py", + "top_level_package_classic/sub/bar.py", + ], + ) + + +def test_add_top_level_module(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=["-M", "top_level_module"], expected_sources=["top_level_module.py"] + ) + + +def test_add_module_in_package(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=["-M", "top_level_package_classic.sub.foo"], + expected_sources=[ + "top_level_package_classic/", + "top_level_package_classic/__init__.py", + "top_level_package_classic/sub/", + "top_level_package_classic/sub/__init__.py", + "top_level_package_classic/sub/foo.py", + ], + ) + + +def test_add_module_offset(source_tree): + # type: (SourceTree) -> None + offset = os.path.join("src", "python") + source_tree.assert_sources( + pex_args=["-M", "top_level_package_classic.sub.bar@{offset}".format(offset=offset)], + offset=offset, + expected_sources=[ + "top_level_package_classic/", + "top_level_package_classic/__init__.py", + "top_level_package_classic/sub/", + "top_level_package_classic/sub/__init__.py", + "top_level_package_classic/sub/bar.py", + ], + ) + + +def test_overlap(source_tree): + # type: (SourceTree) -> None + source_tree.assert_sources( + pex_args=[ + "-M", + "top_level_package_classic.sub.foo@src", + "-P", + "top_level_package_classic.sub@src", + ], + offset="src", + expected_sources=[ + "top_level_package_classic/", + "top_level_package_classic/__init__.py", + "top_level_package_classic/sub/", + "top_level_package_classic/sub/__init__.py", + "top_level_package_classic/sub/foo.py", + "top_level_package_classic/sub/bar.py", + ], + )