From e30a28c03ff8ab09335789301abe313142d03631 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Fri, 20 Dec 2024 13:22:17 -0800 Subject: [PATCH 1/3] Try to load config_name_mapping from disk just once There's enough files (and enough YAML in these files) that the IO/YAML parsing takes a significant amount of time. While it's nice to always load from the source-of-truth (i.e., the files on-disk) - it's not worth paying the performance penalty (especially at the scale we're seeing internally) for a negligible benefit. Locally (with a large test config), this results in a ~5x improvement in timings. More concretely, my test configs were taking ~30ish seconds *without* this diff and ~6ish seconds *with* this diff. --- tron/config/config_parse.py | 8 ++++++-- tron/config/manager.py | 30 ++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tron/config/config_parse.py b/tron/config/config_parse.py index 04e66a599..f07afd27f 100644 --- a/tron/config/config_parse.py +++ b/tron/config/config_parse.py @@ -11,6 +11,7 @@ import itertools import logging import os +from copy import deepcopy from typing import Dict from typing import List from typing import Optional @@ -1066,11 +1067,14 @@ def validate_config_mapping(config_mapping): msg = "A config mapping requires a %s namespace" raise ConfigError(msg % MASTER_NAMESPACE) - master = valid_config(config_mapping.pop(MASTER_NAMESPACE)) + # we mutate this mapping - so let's make sure that we're making a copy + # in case the passed-in mapping is used elsewhere + config_mapping_to_validate = deepcopy(config_mapping) + master = valid_config(config_mapping_to_validate.pop(MASTER_NAMESPACE)) nodes = get_nodes_from_master_namespace(master) yield MASTER_NAMESPACE, master - for name, content in config_mapping.items(): + for name, content in config_mapping_to_validate.items(): context = ConfigContext( name, nodes, diff --git a/tron/config/manager.py b/tron/config/manager.py index 7717f039a..7da307061 100644 --- a/tron/config/manager.py +++ b/tron/config/manager.py @@ -1,6 +1,7 @@ import hashlib import logging import os +from copy import deepcopy from tron import yaml from tron.config import config_parse @@ -97,6 +98,7 @@ class ConfigManager: def __init__(self, config_path, manifest=None): self.config_path = config_path self.manifest = manifest or ManifestFile(config_path) + self.name_mapping = None def build_file_path(self, name): name = name.replace(".", "_").replace(os.path.sep, "_") @@ -107,23 +109,32 @@ def read_raw_config(self, name=schema.MASTER_NAMESPACE) -> str: filename = self.manifest.get_file_name(name) return read_raw(filename) - def write_config(self, name, content): + def write_config(self, name: str, content: str) -> None: + loaded_content = from_string(content) self.validate_with_fragment( name, - from_string(content), + content=loaded_content, # TODO: remove this constraint after tron triggers across clusters are supported. should_validate_missing_dependency=False, ) + # validate_with_fragment throws if the updated content is invalid - so if we get here + # we know it's safe to reflect the update in our config store + self.get_config_name_mapping()[name] = loaded_content + + # ...and then let's also persist the update to disk since memory is temporary, but disk is foreverâ„¢ filename = self.get_filename_from_manifest(name) write_raw(filename, content) - def delete_config(self, name): + def delete_config(self, name: str) -> None: filename = self.manifest.get_file_name(name) if not filename: msg = "Namespace %s does not exist in manifest, cannot delete." log.info(msg % name) return + # to avoid needing to reload from disk on every config load - we need to ensure that + # we also persist config deletions into our cache + self.get_config_name_mapping().pop(name, None) self.manifest.delete(name) os.remove(filename) @@ -141,7 +152,11 @@ def validate_with_fragment( content, should_validate_missing_dependency=True, ): - name_mapping = self.get_config_name_mapping() + # NOTE: we deepcopy rather than swap values to keep this a pure function + # get_config_name_mapping() returns a shared dict, so this would otherwise + # actually update the mapping - which would be unwanted/need to be rolled-back + # should validation fail. + name_mapping = deepcopy(self.get_config_name_mapping()) name_mapping[name] = content try: JobGraph( @@ -152,8 +167,11 @@ def validate_with_fragment( raise ConfigError(str(e)) def get_config_name_mapping(self): - seq = self.manifest.get_file_mapping().items() - return {name: read(filename) for name, filename in seq} + if self.name_mapping is None: + log.info("Creating config mapping cache...") + seq = self.manifest.get_file_mapping().items() + self.name_mapping = {name: read(filename) for name, filename in seq} + return self.name_mapping def load(self): """Return the fully constructed configuration.""" From ea486612e8155896ac723194cfeb2bc1c100e438 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Thu, 9 Jan 2025 11:48:42 -0800 Subject: [PATCH 2/3] Also use cache in get_hash --- tron/config/manager.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tron/config/manager.py b/tron/config/manager.py index 7da307061..072dbdfe6 100644 --- a/tron/config/manager.py +++ b/tron/config/manager.py @@ -2,6 +2,7 @@ import logging import os from copy import deepcopy +from typing import Union from tron import yaml from tron.config import config_parse @@ -43,7 +44,7 @@ def read_raw(path) -> str: return fh.read() -def hash_digest(content): +def hash_digest(content: Union[str, bytes]) -> str: return hashlib.sha1( maybe_encode(content) ).hexdigest() # TODO: TRON-2293 maybe_encode is a relic of Python2->Python3 migration. Remove it. @@ -183,6 +184,25 @@ def get_hash(self, name) -> str: """Return a hash of the configuration contents for name.""" if name not in self: return self.DEFAULT_HASH + + if name in self.get_config_name_mapping(): + # unfortunately, we have the parsed dict in memory. + # rather than hit the disk to get the raw string - let's convert + # the in-memory dict to a yaml string and hash that to save a couple + # ms (in testing, ~3ms over loading from disk and ~1ms over dumping to json :p) + # TODO: consider storing the hash alongside the config so that we only calculate + # hashes once? + return hash_digest( + yaml.dumps( + self.get_config_name_mapping()[name], + # ensure that the keys are always in a stable order + sort_keys=True, + ), + ) + + # the config for any name should always be in our name mapping + # ...but just in case, let's fallback to reading from disk. + log.warning("%s not found in name mapping - falling back to hashing contents on disk!") return hash_digest(self.read_raw_config(name)) def __contains__(self, name): From 6da166821db7cbd2efcb6fa6ae450daf15a40de1 Mon Sep 17 00:00:00 2001 From: Luis Perez Date: Thu, 9 Jan 2025 14:52:56 -0800 Subject: [PATCH 3/3] fix typo --- tron/config/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tron/config/manager.py b/tron/config/manager.py index 072dbdfe6..494f06bcd 100644 --- a/tron/config/manager.py +++ b/tron/config/manager.py @@ -193,7 +193,7 @@ def get_hash(self, name) -> str: # TODO: consider storing the hash alongside the config so that we only calculate # hashes once? return hash_digest( - yaml.dumps( + yaml.dump( self.get_config_name_mapping()[name], # ensure that the keys are always in a stable order sort_keys=True,