diff --git a/CHANGELOG.md b/CHANGELOG.md index 136844eb2..61997c4d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ This release limits the maximum python version to `3.12.3` because of the issue * fixes a bug where it could happen that a config value could be overwritten by a default in a later configuration in a multi source config scenario * fixes a bug in the `field_manager` where extending a non list target leads to a processing failure +* fixes a bug in `pseudonymizer` where a missing regex_mapping from an existing config_file causes logprep to crash continuously ## 12.0.0 diff --git a/logprep/processor/pseudonymizer/processor.py b/logprep/processor/pseudonymizer/processor.py index 9b2ce3a98..e084af194 100644 --- a/logprep/processor/pseudonymizer/processor.py +++ b/logprep/processor/pseudonymizer/processor.py @@ -58,6 +58,7 @@ from urlextract import URLExtract from logprep.abc.processor import Processor +from logprep.factory_error import InvalidConfigurationError from logprep.metrics.metrics import CounterMetric, GaugeMetric from logprep.processor.field_manager.processor import FieldManager from logprep.processor.pseudonymizer.rule import PseudonymizerRule @@ -241,6 +242,10 @@ def _replace_regex_keywords_by_regex_expression(self): for dotted_field, regex_keyword in rule.pseudonyms.items(): if regex_keyword in self._regex_mapping: rule.pseudonyms[dotted_field] = re.compile(self._regex_mapping[regex_keyword]) + elif isinstance(regex_keyword, str): # after the first run, the regex is compiled + raise InvalidConfigurationError( + f"Regex keyword '{regex_keyword}' not found in regex_mapping '{self._config.regex_mapping}'" + ) def _apply_rules(self, event: dict, rule: PseudonymizerRule): source_dict = {} diff --git a/tests/unit/processor/pseudonymizer/test_pseudonymizer.py b/tests/unit/processor/pseudonymizer/test_pseudonymizer.py index b73f1ae75..3ca0562c8 100644 --- a/tests/unit/processor/pseudonymizer/test_pseudonymizer.py +++ b/tests/unit/processor/pseudonymizer/test_pseudonymizer.py @@ -10,6 +10,7 @@ import pytest from logprep.factory import Factory +from logprep.factory_error import InvalidConfigurationError from logprep.util.pseudo.encrypter import ( DualPKCS1HybridCTREncrypter, DualPKCS1HybridGCMEncrypter, @@ -1065,3 +1066,20 @@ def test_uses_encrypter(self, mode, encrypter_class): config["mode"] = mode object_with_encrypter = Factory.create({"pseudonymizer": config}) assert isinstance(object_with_encrypter._encrypter, encrypter_class) + + def test_setup_raises_invalid_configuration_on_missing_regex_mapping(self): + rule_dict = { + "filter": "winlog.event_id: 1234 AND winlog.provider_name: Test456", + "pseudonymizer": { + "mapping": { + "winlog.event_data.param2": "RE_WHOLE_FIELD", + } + }, + } + self._load_specific_rule(rule_dict) + self.object._specific_rules[0].mapping["winlog.event_data.param2"] = "RE_DOES_NOT_EXIST" + error_message = ( + r"Regex keyword 'RE_DOES_NOT_EXIST' not found in regex_mapping '.*\/regex_mapping.yml'" + ) + with pytest.raises(InvalidConfigurationError, match=error_message): + self.object.setup()