Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Try to load config_name_mapping from disk just once #1013

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tron/config/config_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import itertools
import logging
import os
from copy import deepcopy
from typing import Dict
from typing import List
from typing import Optional
Expand Down Expand Up @@ -1068,11 +1069,14 @@ def validate_config_mapping(config_mapping):
msg = "A config mapping requires a %s namespace"
raise ConfigError(msg % MASTER_NAMESPACE)

master = valid_config(config_mapping.pop(MASTER_NAMESPACE))
# we mutate this mapping - so let's make sure that we're making a copy
# in case the passed-in mapping is used elsewhere
config_mapping_to_validate = deepcopy(config_mapping)
master = valid_config(config_mapping_to_validate.pop(MASTER_NAMESPACE))
nodes = get_nodes_from_master_namespace(master)
yield MASTER_NAMESPACE, master

for name, content in config_mapping.items():
for name, content in config_mapping_to_validate.items():
context = ConfigContext(
name,
nodes,
Expand Down
52 changes: 45 additions & 7 deletions tron/config/manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import hashlib
import logging
import os
from copy import deepcopy
from typing import Union

from tron import yaml
from tron.config import config_parse
Expand Down Expand Up @@ -42,7 +44,7 @@ def read_raw(path) -> str:
return fh.read()


def hash_digest(content):
def hash_digest(content: Union[str, bytes]) -> str:
return hashlib.sha1(
maybe_encode(content)
).hexdigest() # TODO: TRON-2293 maybe_encode is a relic of Python2->Python3 migration. Remove it.
Expand Down Expand Up @@ -97,6 +99,7 @@ class ConfigManager:
def __init__(self, config_path, manifest=None):
self.config_path = config_path
self.manifest = manifest or ManifestFile(config_path)
self.name_mapping = None

def build_file_path(self, name):
name = name.replace(".", "_").replace(os.path.sep, "_")
Expand All @@ -107,23 +110,32 @@ def read_raw_config(self, name=schema.MASTER_NAMESPACE) -> str:
filename = self.manifest.get_file_name(name)
return read_raw(filename)

def write_config(self, name, content):
def write_config(self, name: str, content: str) -> None:
loaded_content = from_string(content)
self.validate_with_fragment(
name,
from_string(content),
content=loaded_content,
# TODO: remove this constraint after tron triggers across clusters are supported.
should_validate_missing_dependency=False,
)
# validate_with_fragment throws if the updated content is invalid - so if we get here
# we know it's safe to reflect the update in our config store
self.get_config_name_mapping()[name] = loaded_content

# ...and then let's also persist the update to disk since memory is temporary, but disk is forever™
filename = self.get_filename_from_manifest(name)
write_raw(filename, content)

def delete_config(self, name):
def delete_config(self, name: str) -> None:
filename = self.manifest.get_file_name(name)
if not filename:
msg = "Namespace %s does not exist in manifest, cannot delete."
log.info(msg % name)
return

# to avoid needing to reload from disk on every config load - we need to ensure that
# we also persist config deletions into our cache
self.get_config_name_mapping().pop(name, None)
self.manifest.delete(name)
os.remove(filename)

Expand All @@ -141,7 +153,11 @@ def validate_with_fragment(
content,
should_validate_missing_dependency=True,
):
name_mapping = self.get_config_name_mapping()
# NOTE: we deepcopy rather than swap values to keep this a pure function
# get_config_name_mapping() returns a shared dict, so this would otherwise
# actually update the mapping - which would be unwanted/need to be rolled-back
# should validation fail.
name_mapping = deepcopy(self.get_config_name_mapping())
name_mapping[name] = content
try:
JobGraph(
Expand All @@ -152,8 +168,11 @@ def validate_with_fragment(
raise ConfigError(str(e))

def get_config_name_mapping(self):
seq = self.manifest.get_file_mapping().items()
return {name: read(filename) for name, filename in seq}
if self.name_mapping is None:
log.info("Creating config mapping cache...")
seq = self.manifest.get_file_mapping().items()
self.name_mapping = {name: read(filename) for name, filename in seq}
return self.name_mapping

def load(self):
"""Return the fully constructed configuration."""
Expand All @@ -165,6 +184,25 @@ def get_hash(self, name) -> str:
"""Return a hash of the configuration contents for name."""
if name not in self:
return self.DEFAULT_HASH

if name in self.get_config_name_mapping():
# unfortunately, we have the parsed dict in memory.
# rather than hit the disk to get the raw string - let's convert
# the in-memory dict to a yaml string and hash that to save a couple
# ms (in testing, ~3ms over loading from disk and ~1ms over dumping to json :p)
# TODO: consider storing the hash alongside the config so that we only calculate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea! ++

# hashes once?
return hash_digest(
yaml.dump(
self.get_config_name_mapping()[name],
# ensure that the keys are always in a stable order
sort_keys=True,
),
)

# the config for any name should always be in our name mapping
# ...but just in case, let's fallback to reading from disk.
log.warning("%s not found in name mapping - falling back to hashing contents on disk!")
return hash_digest(self.read_raw_config(name))

def __contains__(self, name):
Expand Down