Skip to content

Commit

Permalink
Merge branch 'main' into consolidate-gitignores
Browse files Browse the repository at this point in the history
  • Loading branch information
grst authored Jan 8, 2025
2 parents 50a69c9 + 2ab70b3 commit 196544a
Show file tree
Hide file tree
Showing 30 changed files with 668 additions and 505 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ and this project adheres to [Semantic Versioning][].
do not contain a separate `.gitignore` anymore. This means empty folders won't be tracked by git, but
this solves issues with dvc refusing to track the output folder because it is already partly tracked by git.

### New Features

- Python API that mirrors `dso-r` functionality (e.g. to be used from Jupyter notebooks) ([#30](https://github.com/Boehringer-Ingelheim/dso/pull/30))

### Chore

- Refactor CLI into separate module ([#30](https://github.com/Boehringer-Ingelheim/dso/pull/30))
- Defer imports in CLI until they are actually needed to speed up CLI ([#30](https://github.com/Boehringer-Ingelheim/dso/pull/30))
- Make all modules explicitly private that are not part of the public API ([#30](https://github.com/Boehringer-Ingelheim/dso/pull/30))

## v0.10.1

### Fixes
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ optional-dependencies.test = [
urls.Documentation = "https://github.com/Boehringer-Ingelheim/dso"
urls.Home-page = "https://github.com/Boehringer-Ingelheim/dso"
urls.Source = "https://github.com/Boehringer-Ingelheim/dso"
scripts.dso = "dso:cli"
scripts.dso = "dso.cli:cli"

[tool.hatch.version]
source = "vcs"
Expand Down
64 changes: 3 additions & 61 deletions src/dso/__init__.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,4 @@
import logging
import os
from ._metadata import __version__ # noqa
from .api import here, read_params, set_stage, stage_here

import rich_click as click

from ._logging import log
from ._metadata import __version__
from .compile_config import cli as compile_config_cli
from .create import cli as create_cli
from .exec import cli as exec_cli
from .get_config import cli as get_config_cli
from .init import cli as init_cli
from .lint import cli as lint_cli
from .repro import cli as repro_cli
from .watermark import cli as watermark_cli

click.rich_click.USE_MARKDOWN = True


@click.group()
@click.option(
"-q",
"--quiet",
count=True,
help=(
"Reduce verbosity. `-q` disables info messages, `-qq` disables warnings. Errors messages cannot be disabled. "
"The same can be achieved by setting the env var `DSO_QUIET=1` or `DSO_QUIET=2`, respectively."
),
default=int(os.environ.get("DSO_QUIET", 0)),
)
@click.option(
"-v",
"--verbose",
help=(
"Increase logging verbosity to include debug messages. "
"The same can be achieved by setting the env var `DSO_VERBOSE=1`."
),
default=bool(int(os.environ.get("DSO_VERBOSE", 0))),
is_flag=True,
)
@click.version_option(version=__version__, prog_name="dso")
def cli(quiet: int, verbose: bool):
"""Root command"""
if quiet >= 2:
log.setLevel(logging.ERROR)
os.environ["DSO_QUIET"] = "2"
elif quiet == 1:
log.setLevel(logging.WARNING)
os.environ["DSO_QUIET"] = "1"
elif verbose:
log.setLevel(logging.DEBUG)
os.environ["DSO_VERBOSE"] = "1"


cli.add_command(create_cli)
cli.add_command(init_cli)
cli.add_command(compile_config_cli)
cli.add_command(repro_cli)
cli.add_command(exec_cli)
cli.add_command(lint_cli)
cli.add_command(get_config_cli)
cli.add_command(watermark_cli)
__all__ = ["read_params", "here", "stage_here", "set_stage"]
21 changes: 2 additions & 19 deletions src/dso/compile_config.py → src/dso/_compile_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
from textwrap import dedent

import hiyapyco
import rich_click as click
from ruamel.yaml import YAML, yaml_object

from ._logging import log
from ._util import _find_in_parent, check_project_roots, get_project_root
from ._util import check_project_roots, find_in_parent, get_project_root

PARAMS_YAML_DISCLAIMER = dedent(
"""\
Expand Down Expand Up @@ -117,7 +116,7 @@ def _get_list_of_configs_to_compile(paths: Sequence[Path], project_root: Path):
# Check each parent directory if it contains a "params.in.yaml" - If yes, add it to the list of all configs.
# We don't need to re-check the parents of added items, because their parent is per definition also a parent
# of a config that was already part of the list.
while (tmp_path := _find_in_parent(tmp_path.parent, "params.in.yaml", project_root)) is not None:
while (tmp_path := find_in_parent(tmp_path.parent, "params.in.yaml", project_root)) is not None:
all_configs.add(tmp_path)
# we don't want to find the current config again, therefore .parent
tmp_path = tmp_path.parent
Expand Down Expand Up @@ -198,19 +197,3 @@ def compile_all_configs(paths: Sequence[Path]):
log.debug(f"./{config.relative_to(project_root)} [green]is already up-to-date!")

log.info("[green]Configuration compiled successfully.")


@click.command(name="compile-config")
@click.argument("args", nargs=-1)
def cli(args):
"""Compile params.in.yaml into params.yaml using Jinja2 templating and resolving recursive templates.
If passing no arguments, configs will be resolved for the current working directory (i.e. all parent configs,
and all configs in child directories). Alternatively a list of paths can be specified. In that case, all configs
related to these paths will be compiled (useful for using with pre-commit).
"""
if not len(args):
paths = [Path.cwd()]
else:
paths = [Path(x) for x in args]
compile_all_configs(paths)
48 changes: 6 additions & 42 deletions src/dso/get_config.py → src/dso/_get_config.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import os
"""Get configuration for a stage based on params.in.yaml and dvc.yaml"""

import re
import sys
from collections.abc import Collection
from itertools import groupby
from pathlib import Path

import rich_click as click
from ruamel.yaml import YAML

from dso._logging import log
from dso._util import get_project_root
from dso.compile_config import compile_all_configs


def _filter_nested_dict(data: dict, keys: Collection[str]) -> dict:
Expand Down Expand Up @@ -45,8 +44,11 @@ def get_config(stage: str, *, all: bool = False, skip_compile: bool = False) ->
all
If true, the config is not filtered based on the `dvc.yaml` file.
skip_compile
If true, do not compile the config before loading it
If `True`, do not compile the config before loading it.
If `False`, always compile.
"""
from dso._compile_config import compile_all_configs

proj_root = get_project_root(Path.cwd())
log.info(f"Retrieving config for stage ./{stage}")
if ":" in stage:
Expand Down Expand Up @@ -117,41 +119,3 @@ def get_config(stage: str, *, all: bool = False, skip_compile: bool = False) ->
keep_params = {p for p in keep_params if not (p.startswith("item.") or p == "item")}

return _filter_nested_dict(config, keep_params)


@click.command(name="get-config")
@click.option(
"--all",
is_flag=True,
type=bool,
default=False,
help="Include all parameters, not only those mentioned in `dvc.yaml`",
)
@click.option(
"--skip-compile",
is_flag=True,
type=bool,
default=bool(int(os.environ.get("DSO_SKIP_COMPILE", 0))),
help="Do not compile configs before loading it. The same can be achieved by setting the `DSO_SKIP_COMPILE=1` env var.",
)
@click.argument(
"stage",
)
def cli(stage, all, skip_compile):
"""Get the configuration for a given stage and print it to STDOUT in yaml format.
The path to the stage must be relative to the root dir of the project.
By default, the configuration is filtered to include only the keys that are mentioned in `dvc.yaml` to force
declaring all dependencies.
If multiple stages are defined in a single `dvc.yaml`, the stage name MUST be specified using
`path/to/stage:stage_name` unless `--all` is given.
"""
try:
out_config = get_config(stage, all=all, skip_compile=skip_compile)
yaml = YAML()
yaml.dump(out_config, sys.stdout)
except KeyError as e:
log.error(f"dvc.yaml defines parameter {e} that is not in params.yaml")
sys.exit(1)
43 changes: 7 additions & 36 deletions src/dso/lint.py → src/dso/_lint.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
"""Linting functions for DSO projects"""

import re
import sys
from abc import ABC, abstractmethod
Expand All @@ -7,12 +8,10 @@
from os import chdir
from pathlib import Path

import rich_click as click
from ruamel.yaml import YAML

from dso._logging import log
from dso._util import _find_in_parent, _git_list_files, check_project_roots, get_project_root
from dso.compile_config import compile_all_configs
from dso._util import check_project_roots, find_in_parent, get_project_root, git_list_files


class LintError(Exception):
Expand Down Expand Up @@ -67,7 +66,7 @@ def is_applicable(cls: type["QuartoRule"], file: Path) -> bool:
Return true, if "dso exec quarto" is found in the dvc.yaml associated with this stage AND
the file matches the pattern
"""
dvc_yaml = _find_in_parent(file, "dvc.yaml", get_project_root(file))
dvc_yaml = find_in_parent(file, "dvc.yaml", get_project_root(file))
assert dvc_yaml is not None, "No dvc.yaml found in project"
is_quarto_stage = "dso exec quarto ." in dvc_yaml.read_text()
return is_quarto_stage and Rule._match_filename_pattern(cls.PATTERN, file)
Expand All @@ -82,7 +81,7 @@ class DSO001(QuartoRule):
def check(cls, file):
"""Check that the file passes the linting step."""
root_path = get_project_root(file)
stage_path_expected = _find_in_parent(file, "dvc.yaml", root_path)
stage_path_expected = find_in_parent(file, "dvc.yaml", root_path)
assert stage_path_expected is not None, "No dvc.yaml found in project"
# .parent to remove the dvc.yaml filename
stage_path_expected = str(stage_path_expected.parent.relative_to(root_path))
Expand Down Expand Up @@ -204,7 +203,7 @@ def lint(self, file: Path):
if not file.is_file():
raise ValueError("Only existing files (not directories) may be passed to linter")

config_path = _find_in_parent(file, "params.yaml", get_project_root(file))
config_path = find_in_parent(file, "params.yaml", get_project_root(file))
assert config_path is not None, "No params.yaml found in project"
config = DSOLinter._get_linting_config(config_path)
rules = [r for r in self.rules if r.__name__ not in config.get("exclude", [])]
Expand Down Expand Up @@ -240,7 +239,7 @@ def lint(paths: Sequence[Path]):
if p.is_file():
files.add(p)
else:
files.update(_git_list_files(p))
files.update(git_list_files(p))

log.info(f"Compiled a list of {len(files)} to be linted")

Expand All @@ -258,31 +257,3 @@ def lint(paths: Sequence[Path]):
log.warning(f"Linting completed with {warn} warnings and {error} errors")
if error:
sys.exit(1)


@click.command(name="lint")
@click.option(
"--skip-compile",
help="Do not compile configs before linting. The same can be achieved by setting the `DSO_SKIP_COMPILE=1` env var.",
type=bool,
default=bool(int(os.environ.get("DSO_SKIP_COMPILE", 0))),
is_flag=True,
)
@click.argument("args", nargs=-1)
def cli(args, skip_compile: bool = False):
"""Lint a dso project
Performs consistency checks according to a set of rules.
If passing no arguments, linting will be performed for the current working directory. Alternatively a list of paths
can be specified. In that case, all stages related to any of the files are linted (useful for using with pre-commit).
Configurations are compiled before linting.
"""
if not len(args):
paths = [Path.cwd()]
else:
paths = [Path(x) for x in args]
if not skip_compile:
compile_all_configs(paths)
lint(paths)
96 changes: 96 additions & 0 deletions src/dso/_quarto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Helper functions for rendering quarto documents"""

import os
import stat
import subprocess
import sys
import tempfile
from contextlib import contextmanager
from pathlib import Path
from textwrap import dedent, indent

from ruamel.yaml import YAML


def render_quarto(quarto_dir: Path, report_dir: Path, before_script: str, cwd: Path, with_pandocfilter: bool = False):
"""
Render a quarto project
Parameters
----------
quarto_dir
Path that contains the _quarto.yml document
report_dir
Output directory of the rendered document
before_script
Bash snippet to execute before running quarto (e.g. to setup the enviornment)
"""
before_script = indent(before_script, " " * 8)
report_dir = report_dir.absolute()
report_dir.mkdir(exist_ok=True)

# clean up existing `.rmarkdown` files that may interfere with rendering
# these are leftovers from a previous, failed `quarto render` attempt. If they still exist, the next attempt
# fails. We remove them *before* the run instead of cleaning them up *after* the run, because they
# may be usefule for debugging failures.
# see https://github.com/Boehringer-Ingelheim/dso/issues/54
for f in quarto_dir.glob("*.rmarkdown"):
if f.is_file():
f.unlink()

# Enable pandocfilter if requested.
# We create a temporary script that then calls the current python binary with the dso.pandocfilter module
# This may seem cumbersome, but we do it this way because
# * pandoc only supports a single binary for `--filter`, referring to subcommands or `-m` is not possible here
# * we want to ensure that exactly the same python/dso version is used for the pandocfilter as for the
# parent command (important when running through dso-mgr)
filter_script = None
if with_pandocfilter:
with tempfile.NamedTemporaryFile(delete=False, mode="w") as f:
f.write("#!/bin/bash\n")
f.write(f'{sys.executable} -m dso.pandocfilter "$@"\n')
filter_script = Path(f.name)

filter_script.chmod(filter_script.stat().st_mode | stat.S_IEXEC)

pandocfilter = f"--filter {filter_script}"
else:
pandocfilter = ""

# propagate quiet setting to quarto
quiet = "--quiet" if bool(int(os.environ.get("DSO_QUIET", 0))) else ""
script = dedent(
f"""\
#!/bin/bash
set -euo pipefail
# this flags enables building larger reports with embedded resources
export QUARTO_DENO_V8_OPTIONS=--max-old-space-size=8192
{before_script}
quarto render "{quarto_dir}" --output-dir "{report_dir}" {quiet} {pandocfilter}
"""
)
res = subprocess.run(script, shell=True, executable="/bin/bash", cwd=cwd)

# clean up
if filter_script is not None:
filter_script.unlink()

if res.returncode:
sys.exit(res.returncode)


@contextmanager
def quarto_config_yml(quarto_config: dict | None, quarto_dir: Path):
"""Context manager that temporarily creates a _quarto.yml file and cleans up after itself"""
if quarto_config is None:
quarto_config = {}
config_file = quarto_dir / "_quarto.yml"
yaml = YAML(typ="safe")
yaml.dump(quarto_config, config_file)
try:
yield
finally:
config_file.unlink()
Loading

0 comments on commit 196544a

Please sign in to comment.