Skip to content

Commit

Permalink
run config diff check now considers number of images
Browse files Browse the repository at this point in the history
New input files are now detected when the config uses glob expressions. Fixes #645.
  • Loading branch information
marxide committed Mar 16, 2022
1 parent beb490c commit e84eb7a
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 15 deletions.
15 changes: 14 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ django-stubs = "^1.9.0"
lightgallery = "^0"
mkdocs-git-revision-date-localized-plugin = "^0"
mike = "^1.1.2"
pyfakefs = "^4.5.5"

[tool.poetry.extras]
prod = ["gevent", "gunicorn"]
Expand Down
4 changes: 2 additions & 2 deletions vast_pipeline/management/commands/runpipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ def run_pipe(
# Before parquets are started to be copied and backed up, a
# check is run to see if anything has actually changed in
# the config
config_diff = pipeline.config.check_prev_config_diff()
if config_diff:
config_diff = pipeline.config.check_prev_config_diff(p_run.n_images)
if not config_diff:
logger.info(
"The config file has either not changed since the"
" previous run or other settings have changed such"
Expand Down
51 changes: 40 additions & 11 deletions vast_pipeline/pipeline/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from glob import glob
import logging
import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

from django.contrib.auth.models import User
from django.conf import settings
Expand Down Expand Up @@ -507,25 +507,54 @@ def validate(self, user: User = None):
if not os.path.exists(file):
raise PipelineConfigError(f"{file} does not exist.")

def check_prev_config_diff(self) -> bool:
def count_images(self) -> int:
"""Count the number of input images in the config. Assumes the config has been
validated and glob expressions have been resolved.
Returns:
int: The number of input images in the config.
"""
n_images = 0
for _, images in self._yaml.data["inputs"]["image"].items():
n_images += len(images)
return n_images

def check_prev_config_diff(
self, prev_n_images: int, prev_config: Optional["PipelineConfig"] = None
) -> bool:
"""
Checks if the previous config file differs from the current config file. Used in
add mode. Only returns true if the images are different and the other general
settings are the same (the requirement for add mode). Otherwise False is returned.
Args:
prev_n_images (int): The number of input images in the previous run. If the
number of input images after resolving glob expressions is greater than
this value, the config is considered different.
Returns:
`True` if images are different but general settings are the same,
otherwise `False` is returned.
"""
prev_config = PipelineConfig.from_file(
os.path.join(self["run"]["path"], "config_prev.yaml"),
label="previous run config",
)
if self._yaml == prev_config._yaml:
return True
if prev_config is None:
prev_config = PipelineConfig.from_file(
os.path.join(self["run"]["path"], "config_prev.yaml"),
label="previous run config",
)

# count the input images in case the config uses glob expressions and new images
# have appeared on disk
n_images = self.count_images()

# is the config unchanged, including the image count?
if self._yaml == prev_config._yaml and n_images == prev_n_images:
return False

# are the input image files different?
images_changed = self["inputs"]["image"] != prev_config["inputs"]["image"]
images_changed = (
self["inputs"]["image"] != prev_config["inputs"]["image"]
or n_images > prev_n_images
)

# are all the non-input file configs the same?
config_dict = self._yaml.data
Expand All @@ -535,8 +564,8 @@ def check_prev_config_diff(self) -> bool:
settings_check = config_dict == prev_config_dict

if images_changed and settings_check:
return False
return True
return True
return False

def image_opts(self) -> Dict[str, Any]:
"""
Expand Down
67 changes: 66 additions & 1 deletion vast_pipeline/tests/test_pipelineconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from django.conf import settings
from django.contrib.auth.models import AnonymousUser
from django.test import SimpleTestCase, override_settings
from pyfakefs.fake_filesystem_unittest import TestCase as FakeFsTestCase
import strictyaml as yaml

from vast_pipeline.management.commands.initpiperun import make_config_template
Expand All @@ -20,8 +21,16 @@
PIPELINE_WORKING_DIR=os.path.join(TEST_ROOT, "pipeline-runs"),
MAX_PIPERUN_IMAGES=10,
)
class CheckRunConfigValidationTest(SimpleTestCase):
class CheckRunConfigValidationTest(SimpleTestCase, FakeFsTestCase):
def setUp(self):
# set up fake filesystem
self.setUpPyfakefs()
self.fs.add_real_directory(
os.path.join(settings.PIPELINE_WORKING_DIR, "basic-association")
)
self.fs.add_real_file(PipelineConfig.TEMPLATE_PATH)
self.fs.add_real_directory("vast_pipeline/tests/data")

# load a base run configuration file
self.config_path = os.path.join(
settings.PIPELINE_WORKING_DIR, "basic-association", "config.yaml"
Expand Down Expand Up @@ -185,6 +194,62 @@ def test_input_glob(self):
pipeline_config_original._yaml.data, pipeline_config_globs._yaml.data
)

def test_diff_glob(self):
"""Test that changes to the input files on disk trigger a config change for runs
that use glob expressions."""
def gen_config(config_dict) -> PipelineConfig:
config_yaml = yaml.as_document(config_dict)
config = PipelineConfig(config_yaml)
config.validate()
return config

# replace the inputs with glob expressions
self.config_dict["inputs"]["image"] = {
"glob": "vast_pipeline/tests/data/epoch??.fits"
}
self.config_dict["inputs"]["selavy"] = {
"glob": "vast_pipeline/tests/data/epoch??.selavy.components.txt"
}
self.config_dict["inputs"]["noise"] = {
"glob": "vast_pipeline/tests/data/epoch??.noiseMap.fits"
}
self.config_dict["inputs"]["background"] = {
"glob": "vast_pipeline/tests/data/epoch??.meanMap.fits"
}
pipeline_config_globs = gen_config(self.config_dict)
n_images = pipeline_config_globs.count_images()

# Generate another pipeline config object using the same glob expressions as
# above. Contents will be the same as pipeline_config_globs.
pipeline_config_globs_nochange = gen_config(self.config_dict)

# no change to config nor filesystem, diff check should return False
self.assertFalse(
pipeline_config_globs_nochange.check_prev_config_diff(
n_images, prev_config=pipeline_config_globs
)
)

# add a new input files to the fake filesystem
for f in (
"epoch00.fits",
"epoch00.meanMap.fits",
"epoch00.noiseMap.fits",
"epoch00.selavy.components.txt",
):
self.fs.create_file(f"vast_pipeline/tests/data/{f}")

# Generate another pipeline config object using the same glob expressions as
# above. Contents will be different as new inputs were added to the filesystem.
pipeline_config_globs_newfiles = gen_config(self.config_dict)

# the config inputs should now be different
self.assertTrue(
pipeline_config_globs_newfiles.check_prev_config_diff(
n_images, prev_config=pipeline_config_globs
)
)

def test_input_multiple_globs(self):
"""Test multiple consecutive glob expressions"""
config_yaml_original = yaml.as_document(self.config_dict)
Expand Down

0 comments on commit e84eb7a

Please sign in to comment.