rapidsai · jameslamb · Oct 22, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 16, 2024
diff --git a/README.md b/README.md
@@ -322,7 +322,7 @@ ENV_NAME="cudf_test"
 rapids-dependency-file-generator \
   --file-key "test" \
   --output "conda" \
-  --matrix "cuda=11.5;arch=$(arch)" > env.yaml
+  --matrix "cuda=12.5;arch=$(arch)" > env.yaml
 mamba env create --file env.yaml
 mamba activate "$ENV_NAME"
 
@@ -335,6 +335,16 @@ The `--file-key`, `--output`, and `--matrix` flags must be used together. `--mat
 
 Where multiple values for the same key are passed to `--matrix`, e.g. `cuda_suffixed=true;cuda_suffixed=false`, only the last value will be used.
 
+Where `--file-key` is supplied multiple times in the same invocation, the output printed to `stdout` will contain a union (without duplicates) of all of the corresponding dependencies. For example:
+
+```shell
+rapids-dependency-file-generator \
+  --file-key "test" \
+  --file-key "test_notebooks" \
+  --output "conda" \
+  --matrix "cuda=12.5;arch=$(arch)" > env.yaml
+```
+
 The `--prepend-channel` argument accepts additional channels to use, like `rapids-dependency-file-generator --prepend-channel my_channel --prepend-channel my_other_channel`.
 If both `--output` and `--prepend-channel` are provided, the output format must be conda.
 Prepending channels can be useful for adding local channels with packages to be tested in CI workflows.

diff --git a/src/rapids_dependency_file_generator/_cli.py b/src/rapids_dependency_file_generator/_cli.py
@@ -35,7 +35,11 @@ def validate_args(argv):
     codependent_args = parser.add_argument_group("optional, but codependent")
     codependent_args.add_argument(
         "--file-key",
-        help="The file key from `dependencies.yaml` to generate.",
+        action="append",
+        help=(
+            "The file key from `dependencies.yaml` to generate. "
+            "If supplied multiple times, dependency lists from all requested file keys will be merged."
+        ),
     )
     codependent_args.add_argument(
         "--output",
@@ -109,7 +113,7 @@ def main(argv=None) -> None:
     to_stdout = all([args.file_key, args.output, args.matrix is not None])
 
     if to_stdout:
-        file_keys = [args.file_key]
+        file_keys = args.file_key
         output = {Output(args.output)}
     else:
         file_keys = list(parsed_config.files.keys())

diff --git a/src/rapids_dependency_file_generator/_rapids_dependency_file_generator.py b/src/rapids_dependency_file_generator/_rapids_dependency_file_generator.py
@@ -4,6 +4,7 @@
 import textwrap
 import typing
 from collections.abc import Generator
+from dataclasses import dataclass
 
 import tomlkit
 import yaml
@@ -95,27 +96,32 @@ def grid(gridspec: dict[str, list[str]]) -> Generator[dict[str, str], None, None
 def make_dependency_file(
     *,
     file_type: _config.Output,
-    name: os.PathLike,
+    conda_env_name: str,
+    file_name: str,
     config_file: os.PathLike,
     output_dir: os.PathLike,
     conda_channels: list[str],
     dependencies: typing.Sequence[typing.Union[str, dict[str, list[str]]]],
     extras: typing.Union[_config.FileExtras, None],
-):
+) -> str:
     """Generate the contents of the dependency file.
 
     Parameters
     ----------
     file_type : Output
         An Output value used to determine the file type.
-    name : PathLike
-        The name of the file to write.
+    conda_env_name : str
+        Name to put in the 'name: ' field when generating conda environment YAML files.
+        Only used when ``file_type`` is CONDA.
+    file_name : str
+        Name of a file in ``output_dir`` to read in.
+        Only used when ``file_type`` is PYPROJECT.
     config_file : PathLike
         The full path to the dependencies.yaml file.
     output_dir : PathLike
         The path to the directory where the dependency files will be written.
     conda_channels : list[str]
-        The channels to include in the file. Only used when `file_type` is
+        The channels to include in the file. Only used when ``file_type`` is
         CONDA.
     dependencies : Sequence[str | dict[str, list[str]]]
         The dependencies to include in the file.
@@ -137,7 +143,7 @@ def make_dependency_file(
     if file_type == _config.Output.CONDA:
         file_contents += yaml.dump(
             {
-                "name": os.path.splitext(name)[0],
+                "name": conda_env_name,
                 "channels": conda_channels,
                 "dependencies": dependencies,
             }
@@ -173,7 +179,7 @@ def make_dependency_file(
             key = extras.key
 
         # This file type needs to be modified in place instead of built from scratch.
-        with open(os.path.join(output_dir, name)) as f:
+        with open(os.path.join(output_dir, file_name)) as f:
             file_contents_toml = tomlkit.load(f)
 
         toml_deps = tomlkit.array()
@@ -320,6 +326,32 @@ def should_use_specific_entry(matrix_combo: dict[str, str], specific_entry_matri
     )
 
 
+@dataclass
+class _DependencyCollection:
+    str_deps: set[str]
+    # e.g. {"pip": ["dgl", "pyg"]}, used in conda envs
+    dict_deps: dict[str, list[str]]
+
+    def update(self, deps: typing.Sequence[typing.Union[str, dict[str, list[str]]]]) -> None:
+        for dep in deps:
+            if isinstance(dep, dict):
+                for k, v in dep.items():
+                    if k in self.dict_deps:
+                        self.dict_deps[k].extend(v)
+                        self.dict_deps[k] = sorted(set(self.dict_deps[k]))
+                    else:
+                        self.dict_deps[k] = v
+            else:
+                self.str_deps.add(dep)
+
+    @property
+    def deps_list(self) -> typing.Sequence[typing.Union[str, dict[str, list[str]]]]:
+        if self.dict_deps:
+            return [*sorted(self.str_deps), self.dict_deps]
+
+        return [*sorted(self.str_deps)]
+
+
 def make_dependency_files(
     *,
     parsed_config: _config.Config,
@@ -360,6 +392,19 @@ def make_dependency_files(
         If the file is malformed. There are numerous different error cases
         which are described by the error messages.
     """
+    if to_stdout and len(file_keys) > 1 and output is not None and _config.Output.PYPROJECT in output:
+        raise ValueError(
+            f"Using --file-key multiple times together with '--output {_config.Output.PYPROJECT.value}' "
+            "when writing to stdout is not supported."
+        )
+
+    # the list of conda channels does not depend on individual file keys
+    conda_channels = prepend_channels + parsed_config.channels
+
+    # initialize a container for "all dependencies found across all files", to support
+    # passing multiple files keys and writing a merged result to stdout
+    all_dependencies = _DependencyCollection(str_deps=set(), dict_deps={})
+
     for file_key in file_keys:
         file_config = parsed_config.files[file_key]
         file_types_to_generate = file_config.output if output is None else output
@@ -438,18 +483,50 @@ def make_dependency_files(
                 )
                 contents = make_dependency_file(
                     file_type=file_type,
-                    name=full_file_name,
+                    conda_env_name=os.path.splitext(full_file_name)[0],
+                    file_name=full_file_name,
                     config_file=parsed_config.path,
                     output_dir=output_dir,
-                    conda_channels=prepend_channels + parsed_config.channels,
+                    conda_channels=conda_channels,
                     dependencies=deduped_deps,
                     extras=file_config.extras,
                 )
 
                 if to_stdout:
-                    print(contents)
+                    if len(file_keys) == 1:
+                        print(contents)
+                    else:
+                        all_dependencies.update(deduped_deps)
                 else:
                     os.makedirs(output_dir, exist_ok=True)
                     file_path = os.path.join(output_dir, full_file_name)
                     with open(file_path, "w") as f:
                         f.write(contents)
+
+    # create one unified output from all the file_keys, and print it to stdout
+    if to_stdout and len(file_keys) > 1:
+        # convince mypy that 'output' is not None here
+        #
+        # 'output' is technically a set because of https://github.com/rapidsai/dependency-file-generator/pull/74,
+        # but since https://github.com/rapidsai/dependency-file-generator/pull/79 it's only ever one of the following:
+        #
+        #   - an exactly-1-item set (stdout=True, or when used by rapids-build-backend)
+        #   - 'None' (stdout=False)
+        #
+        err_msg = (
+            "Exactly 1 output type should be provided when asking rapids-dependency-file-generator to write to stdout. "
+            "If you see this, you've found a bug. Please report it at https://github.com/rapidsai/dependency-file-generator/issues."
+        )
+        assert output is not None, err_msg
+
+        contents = make_dependency_file(
+            file_type=output.pop(),
+            conda_env_name="rapids-dfg-combined",
+            file_name="ignored-because-multiple-pyproject-files-are-not-supported",
 contents = make_dependency_file( 
     conda_channels : list[str] 
         The channels to include in the file. Only used when `file_type` is 
         CONDA. 
 contents = make_dependency_file( 
     conda_channels : list[str] 
         The channels to include in the file. Only used when `file_type` is 
         CONDA. 
+            config_file=parsed_config.path,
+            output_dir=parsed_config.path,
+            conda_channels=conda_channels,
+            dependencies=all_dependencies.deps_list,
+            extras=None,
+        )
+        print(contents)
diff --git a/tests/examples/overlapping-deps/dependencies.yaml b/tests/examples/overlapping-deps/dependencies.yaml
@@ -0,0 +1,82 @@
+files:
+  build_deps:
+    output: [pyproject]
+    pyproject_dir: output/actual
+    extras:
+      table: build-system
+    includes:
+      - rapids_build_skbuild
+      - depends_on_numpy
+  even_more_build_deps:
+    output: [pyproject]
+    pyproject_dir: output/actual
+    extras:
+      table: tool.rapids-build-backend
+      key: requires
+    includes:
+      - depends_on_numpy
+      - depends_on_pandas
+  test_deps:
+    output: none
+    includes:
+      - depends_on_numpy
+      - depends_on_pandas
+  even_more_test_deps:
+    output: none
+    includes:
+      - depends_on_numpy
+      - test_python
+  test_with_sklearn:
+    output: none
+    includes:
+      - depends_on_scikit_learn
+channels:
+  - rapidsai
+  - conda-forge
+dependencies:
+  depends_on_numpy:
+    common:
+      - output_types: [requirements, pyproject]
+        packages:
+          - numpy>=2.0
+      # using 'pip' intentionally to test handling of that nested list
+      - output_types: [conda]
+        packages:
+          - pip
+          - pip:
+            - numpy >=2.0
+  depends_on_pandas:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - pandas<3.0
+  depends_on_scikit_learn:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - scikit-learn>=1.5
+  test_python:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - matplotlib
+      - output_types: [conda]
+        packages:
+          - pip
+          # intentional overlap (numpy) with depends_on_numpy's pip list, to
+          # test that pip dependencies don't have duplicates
+          - pip:
+            # intentionally not in alphabetical order
+            - numpy >=2.0
+            - folium
+  rapids_build_skbuild:
+    common:
+      - output_types: [conda, requirements, pyproject]
+        packages:
+          - rapids-build-backend>=0.3.1
+      - output_types: [requirements, pyproject]
+        packages:
+          - scikit-build-core[pyproject]>=0.9.0
+      - output_types: [conda]
+        packages:
+          - scikit-build-core>=0.9.0
diff --git a/tests/examples/overlapping-deps/output/expected/pyproject.toml b/tests/examples/overlapping-deps/output/expected/pyproject.toml
@@ -0,0 +1,20 @@
+[build-system]
+build-backend = "rapids_build_backend.build_meta"
+requires = [
+    "numpy>=2.0",
+    "rapids-build-backend>=0.3.1",
+    "scikit-build-core[pyproject]>=0.9.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "libbeepboop"
+version = "0.1.2"
+dependencies = [
+    "scipy",
+]
+
+[tool.rapids-build-backend]
+requires = [
+    "numpy>=2.0",
+    "pandas<3.0",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -130,3 +130,47 @@ def test_validate_args():
             "all",
         ]
     )
+
+    # Valid, with 2 files for --output requirements
+    validate_args(
+        [
+            "--output",
+            "requirements",
+            "--matrix",
+            "cuda=12.5",
+            "--file-key",
+            "all",
+            "--file-key",
+            "test_python",
+        ]
+    )
+
+    # Valid, with 2 files for --output conda
+    validate_args(
+        [
+            "--output",
+            "conda",
+            "--matrix",
+            "cuda=12.5",
+            "--file-key",
+            "all",
+            "--file-key",
+            "test_python",
+        ]
+    )
+
+    # Valid, with 3 files
+    validate_args(
+        [
+            "--output",
+            "requirements",
+            "--matrix",
+            "cuda=12.5",
+            "--file-key",
+            "all",
+            "--file-key",
+            "test_python",
+            "--file-key",
+            "build_python",
+        ]
+    )