From 575a19d50648ee58753af88ec924f5c23d54d95a Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 30 Jun 2023 09:32:42 +0200 Subject: [PATCH 01/13] Refactor rule tree --- .../filter/expression/filter_expression.py | 102 ++-- .../framework/rule_tree/demorgan_resolver.py | 102 ++++ logprep/framework/rule_tree/node.py | 31 +- logprep/framework/rule_tree/rule_parser.py | 569 ++---------------- logprep/framework/rule_tree/rule_segmenter.py | 243 ++++++++ logprep/framework/rule_tree/rule_sorter.py | 88 +++ logprep/framework/rule_tree/rule_tagger.py | 125 ++++ logprep/framework/rule_tree/rule_tree.py | 60 +- .../framework/rule_tree/shared_constants.py | 11 + .../rule_tree/test_demorgan_resolver.py | 56 ++ tests/unit/framework/rule_tree/test_node.py | 19 +- .../framework/rule_tree/test_rule_parser.py | 325 ++-------- .../rule_tree/test_rule_segment_sorter.py | 33 + .../rule_tree/test_rule_segmenter.py | 80 +++ .../framework/rule_tree/test_rule_tagger.py | 70 +++ .../framework/rule_tree/test_rule_tree.py | 559 +++++------------ 16 files changed, 1148 insertions(+), 1325 deletions(-) create mode 100644 logprep/framework/rule_tree/demorgan_resolver.py create mode 100644 logprep/framework/rule_tree/rule_segmenter.py create mode 100644 logprep/framework/rule_tree/rule_sorter.py create mode 100644 logprep/framework/rule_tree/rule_tagger.py create mode 100644 tests/unit/framework/rule_tree/shared_constants.py create mode 100644 tests/unit/framework/rule_tree/test_demorgan_resolver.py create mode 100644 tests/unit/framework/rule_tree/test_rule_segment_sorter.py create mode 100644 tests/unit/framework/rule_tree/test_rule_segmenter.py create mode 100644 tests/unit/framework/rule_tree/test_rule_tagger.py diff --git a/logprep/filter/expression/filter_expression.py b/logprep/filter/expression/filter_expression.py index 673c00d04..8bb68c9ae 100644 --- a/logprep/filter/expression/filter_expression.py +++ b/logprep/filter/expression/filter_expression.py @@ -62,10 +62,9 @@ def does_match(self, document: dict) -> bool: """ - # Return the value for the given key from - # the document. @staticmethod def _get_value(key: List[str], document: dict) -> Any: + """Return the value for the given key from the document.""" if not key: raise KeyDoesNotExistError @@ -83,23 +82,6 @@ def __eq__(self, other): return False return True - @staticmethod - def as_dotted_string(key_list: List[str]) -> str: - """Converts list of keys to dotted string. - - Parameters - ---------- - key_list : List[str] - List of keys. - - Returns - ------- - str - Returns dotted string. - - """ - return ".".join([str(i) for i in key_list]) - class Always(FilterExpression): """Filter expression that can be set to match always or never.""" @@ -120,20 +102,20 @@ class Not(FilterExpression): """Filter expression that negates a match.""" def __init__(self, expression: FilterExpression): - self.expression = expression + self.child = expression def __repr__(self) -> str: - return f"NOT ({str(self.expression)})" + return f"NOT ({str(self.child)})" def does_match(self, document: dict) -> bool: - return not self.expression.matches(document) + return not self.child.matches(document) class CompoundFilterExpression(FilterExpression): """Base class of filter expressions that combine other filter expressions.""" def __init__(self, *args: FilterExpression): - self.expressions = args + self.children = args def does_match(self, document: dict): raise NotImplementedError @@ -143,31 +125,57 @@ class And(CompoundFilterExpression): """Compound filter expression that is a logical conjunction.""" def __repr__(self) -> str: - return f'({" AND ".join([str(exp) for exp in self.expressions])})' + return f'({" AND ".join([str(exp) for exp in self.children])})' def does_match(self, document: dict) -> bool: - return all((expression.matches(document) for expression in self.expressions)) + return all((expression.matches(document) for expression in self.children)) class Or(CompoundFilterExpression): """Compound filter expression that is a logical disjunction.""" def __repr__(self) -> str: - return f'({" OR ".join([str(exp) for exp in self.expressions])})' + return f'({" OR ".join([str(exp) for exp in self.children])})' def does_match(self, document: dict) -> bool: - return any((expression.matches(document) for expression in self.expressions)) + return any((expression.matches(document) for expression in self.children)) -class KeyValueBasedFilterExpression(FilterExpression): +class KeyBasedFilterExpression(FilterExpression): """Base class of filter expressions that match a certain value on a given key.""" - def __init__(self, key: List[str], expected_value: Any): + def __init__(self, key: List[str]): self.key = key + self._key_as_dotted_string = ".".join([str(i) for i in self.key]) + + def __repr__(self) -> str: + return f"{self.key_as_dotted_string}" + + def does_match(self, document): + raise NotImplementedError + + @property + def key_as_dotted_string(self) -> str: + """Converts key of expression to dotted string. + + Returns + ------- + str + Returns dotted string. + + """ + return self._key_as_dotted_string + + +class KeyValueBasedFilterExpression(KeyBasedFilterExpression): + """Base class of filter expressions that match a certain value on a given key.""" + + def __init__(self, key: List[str], expected_value: Any): + super().__init__(key) self._expected_value = expected_value def __repr__(self) -> str: - return f"{self.as_dotted_string(self.key)}:{str(self._expected_value)}" + return f"{self.key_as_dotted_string}:{str(self._expected_value)}" def does_match(self, document): raise NotImplementedError @@ -184,7 +192,7 @@ def does_match(self, document: dict) -> bool: return str(value) == self._expected_value def __repr__(self) -> str: - return f'{self.as_dotted_string(self.key)}:"{str(self._expected_value)}"' + return f'{self.key_as_dotted_string}:"{str(self._expected_value)}"' class WildcardStringFilterExpression(KeyValueBasedFilterExpression): @@ -236,7 +244,7 @@ def _replace_wildcard(expected, matches, symbol, wildcard): return "".join([x for x in chain.from_iterable(zip_longest(split, matches)) if x]) def __repr__(self) -> str: - return f'{self.as_dotted_string(self.key)}:"{self._expected_value}"' + return f'{self.key_as_dotted_string}:"{self._expected_value}"' class SigmaFilterExpression(WildcardStringFilterExpression): @@ -263,16 +271,16 @@ def does_match(self, document: dict) -> bool: return value == self._expected_value -class RangeBasedFilterExpression(FilterExpression): +class RangeBasedFilterExpression(KeyBasedFilterExpression): """Base class of filter expressions that match for a range of values.""" def __init__(self, key: List[str], lower_bound: float, upper_bound: float): - self.key = key + super().__init__(key) self._lower_bound = lower_bound self._upper_bound = upper_bound def __repr__(self) -> str: - return f"{self.as_dotted_string(self.key)}:[{self._lower_bound} TO {self._upper_bound}]" + return f"{self.key_as_dotted_string}:[{self._lower_bound} TO {self._upper_bound}]" def does_match(self, document: dict): raise NotImplementedError @@ -296,19 +304,19 @@ def does_match(self, document: dict) -> bool: return self._lower_bound <= value <= self._upper_bound -class RegExFilterExpression(FilterExpression): +class RegExFilterExpression(KeyBasedFilterExpression): """Filter expression that matches a value using regex.""" match_escaping_pattern = re.compile(r".*?(?P\\*)\$$") match_parts_pattern = re.compile(r"^(?P\(\?\w\))?(?P\^)?(?P.*)") def __init__(self, key: List[str], regex: str): - self.key = key + super().__init__(key) self._regex = self._normalize_regex(regex) self._matcher = re.compile(self._regex) def __repr__(self) -> str: - return f"{self.as_dotted_string(self.key)}:/{self._regex.strip('^$')}/" + return f"{self.key_as_dotted_string}:/{self._regex.strip('^$')}/" @staticmethod def _normalize_regex(regex: str) -> str: @@ -331,22 +339,19 @@ def does_match(self, document: dict) -> bool: return self._matcher.match(str(value)) is not None -class Exists(FilterExpression): +class Exists(KeyBasedFilterExpression): """Filter expression that returns true if a given field exists.""" - def __init__(self, value: list): - self.split_field = value - def __repr__(self) -> str: - return f"{self.as_dotted_string(self.split_field)}: *" + return f"{self.key_as_dotted_string}: *" def does_match(self, document: dict) -> bool: - if not self.split_field: + if not self.key: return False try: current = document - for sub_field in self.split_field: + for sub_field in self.key: if ( sub_field not in current.keys() ): # .keys() is important as it is used to "check" for dict @@ -361,14 +366,11 @@ def does_match(self, document: dict) -> bool: return True -class Null(FilterExpression): +class Null(KeyBasedFilterExpression): """Filter expression that returns true if a given field is set to null.""" - def __init__(self, key: List[str]): - self.key = key - def __repr__(self) -> str: - return f"{self.as_dotted_string(self.key)}:{None}" + return f"{self.key_as_dotted_string}:{None}" def does_match(self, document: dict) -> bool: value = self._get_value(self.key, document) diff --git a/logprep/framework/rule_tree/demorgan_resolver.py b/logprep/framework/rule_tree/demorgan_resolver.py new file mode 100644 index 000000000..5b4afb49c --- /dev/null +++ b/logprep/framework/rule_tree/demorgan_resolver.py @@ -0,0 +1,102 @@ +"""Module implements functionality to apply De Morgan's law on rule filter expressions""" +from logprep.filter.expression.filter_expression import ( + Not, + And, + Or, + FilterExpression, + CompoundFilterExpression, +) + + +class DeMorganResolverException(Exception): + """Raise if demorgan resolver encounters a problem.""" + + +class DeMorganResolver: + """Used to apply De Morgan's law on rule filter expressions""" + + def resolve(self, expression: FilterExpression) -> FilterExpression: + """Parse NOT-expressions in given filter expression. + + This function resolves NOT-expressions found in the given filter expression according to + De Morgan's law. + + Parameters + ---------- + expression: FilterExpression + Given filter expression to be parsed. + + Returns + ------- + result: FilterExpression + Resulting filter expression created by resolving NOT-expressions in the given filter + expression. + + """ + if not self._has_unresolved_expression(expression): + return expression + + if isinstance(expression, Not): + return self._resolve_not_expression(expression) + if isinstance(expression, CompoundFilterExpression): + return self._resolve_compound_expression(expression) + + raise DeMorganResolverException(f"Could not resolve expression {expression}") + + @staticmethod + def _has_unresolved_expression(expression: FilterExpression) -> bool: + """Check if given filter expression contains NOT-expressions. + + This function checks if the given filter expression contains any unresolved NOT-expressions. + Simple NOT(field: value) expressions do not count as unresolved expression since it cannot + be resolved. + + This is achieved by iterating over the input expression and all of its sub expressions. + The input expression needs to be resolved if a negated compound expression is found. + Otherwise, no resolving is required. + + Parameters + ---------- + expression: FilterExpression + Filter expression to be checked for NOT-expressions. + + Returns + ------- + has_unresolved_not_expression: bool + Decision if given filter expression contains any unresolved NOT-expressions. + + """ + expressions_stack = [expression] + while expressions_stack: + current_expression = expressions_stack.pop() + if isinstance(current_expression, Not): + if isinstance(current_expression.child, CompoundFilterExpression): + return True + if isinstance(current_expression, CompoundFilterExpression): + for sub_expression in current_expression.children: + expressions_stack.append(sub_expression) + return False + + def _resolve_not_expression(self, not_expression: Not) -> FilterExpression: + if not isinstance(not_expression.child, CompoundFilterExpression): + return not_expression + + compound_expression = not_expression.child + negated_children = (Not(expression) for expression in compound_expression.children) + + if isinstance(compound_expression, Or): + expression = And(*negated_children) + elif isinstance(compound_expression, And): + expression = Or(*negated_children) + else: + raise DeMorganResolverException(f"Could not resolve expression {not_expression}") + + return self._resolve_compound_expression(expression) + + def _resolve_compound_expression( + self, compound_expression: CompoundFilterExpression + ) -> CompoundFilterExpression: + compound_expression.children = tuple( + self.resolve(expression) for expression in compound_expression.children + ) + return compound_expression diff --git a/logprep/framework/rule_tree/node.py b/logprep/framework/rule_tree/node.py index 835367649..75267ec41 100644 --- a/logprep/framework/rule_tree/node.py +++ b/logprep/framework/rule_tree/node.py @@ -1,6 +1,6 @@ """This module implements the tree node functionality for the tree model.""" -from typing import Optional, List +from typing import Optional, List, Union from logprep.filter.expression.filter_expression import FilterExpression from logprep.filter.expression.filter_expression import KeyDoesNotExistError @@ -9,7 +9,13 @@ class Node: """Tree node for rule tree model.""" - def __init__(self, expression: FilterExpression): + __slots__ = ("_expression", "_children", "matching_rules") + + _expression: Union[FilterExpression, str] + _children: list + matching_rules: list + + def __init__(self, expression: Optional[Union[FilterExpression, str]]): """Node initialization function. Initializes a new node with a given expression and empty lists of children and matching @@ -63,25 +69,6 @@ def add_child(self, node: "Node"): """ self._children.append(node) - def has_child_with_expression(self, expression: FilterExpression) -> Optional["Node"]: - """Check if node has child with given expression. - - This function checks if a node has a child with the given filter expression. - It is used to iterate through a tree in the process of adding a new rule to a tree. - - Parameters - ---------- - expression: FilterExpression - Filter expression to check for. - - Returns - ------- - has_child: bool - Decision if the node has a child with the given expression. - - """ - return self.get_child_with_expression(expression) - def get_child_with_expression(self, expression: FilterExpression) -> Optional["Node"]: """Get child of node with given expression. @@ -107,8 +94,10 @@ def get_child_with_expression(self, expression: FilterExpression) -> Optional["N @property def expression(self) -> FilterExpression: + """Filter expression of the node.""" return self._expression @property def children(self) -> List["Node"]: + """Children of the node.""" return self._children diff --git a/logprep/framework/rule_tree/rule_parser.py b/logprep/framework/rule_tree/rule_parser.py index f76093cfa..470d0d2a1 100644 --- a/logprep/framework/rule_tree/rule_parser.py +++ b/logprep/framework/rule_tree/rule_parser.py @@ -5,19 +5,21 @@ """ -from typing import Union +from typing import TYPE_CHECKING from logprep.filter.expression.filter_expression import ( Always, - And, - CompoundFilterExpression, Exists, - FilterExpression, Not, - Or, - StringFilterExpression, ) -from logprep.util.helper import get_dotted_field_list + +from logprep.framework.rule_tree.demorgan_resolver import DeMorganResolver +from logprep.framework.rule_tree.rule_sorter import RuleSorter +from logprep.framework.rule_tree.rule_tagger import RuleTagger +from logprep.framework.rule_tree.rule_segmenter import RuleSegmenter + +if TYPE_CHECKING: + from logprep.processor.base.rule import Rule class RuleParserException(Exception): @@ -27,8 +29,27 @@ class RuleParserException(Exception): class RuleParser: """Parse rule into list of less complex rules.""" - @staticmethod - def parse_rule(rule, priority_dict: dict, tag_map: dict) -> list: + __slots__ = ("_demorgan_resolver", "_rule_segmenter", "_rule_tagger") + + _demorgan_resolver: DeMorganResolver + _rule_segmenter: RuleSegmenter + _rule_tagger: RuleTagger + + def __init__(self, tag_map: dict): + """Initializes objects used for the rule parsing. + + Parameters + ---------- + tag_map: dict + Dictionary containing field names as keys and tags as values that is used to add special + tags to the rule. + + """ + self._demorgan_resolver = DeMorganResolver() + self._rule_segmenter = RuleSegmenter() + self._rule_tagger = RuleTagger(tag_map) + + def parse_rule(self, rule: "Rule", priority_dict: dict) -> list: """Main parsing function to parse rule into list of less complex rules. This function aims to parse a rule into a list of less complex rules that shows the same @@ -43,14 +64,13 @@ def parse_rule(rule, priority_dict: dict, tag_map: dict) -> list: priority_dict: dict Dictionary containing priority values for field names that are used to sort filter expression in a rule. - tag_map: dict - Dictionary containing field names as keys and tags as values that is used to add special - tags to the rule. Returns ------- - parsed_rule_filter_list: list + list List of parsed rules. Each parsed rule is a list of filter expressions itself. + The first list represents a disjunction and the sub-lists represent conjunctions, + like in the disjunctive normal form. Raises ------ @@ -58,501 +78,13 @@ def parse_rule(rule, priority_dict: dict, tag_map: dict) -> list: Throws RuleParserException when parser encounters a problem during the parsing process. """ - rule_filter = rule.filter - rule_filter_parsed_not = RuleParser._parse_not_expression(rule_filter) - - if RuleParser._has_or_expression(rule_filter_parsed_not): - parsed_rule_filter_list = RuleParser._parse_or_expression(rule_filter_parsed_not) - elif isinstance(rule_filter_parsed_not, And): - parsed_rule_filter_list = [RuleParser._parse_and_expression(rule_filter_parsed_not)] - else: - parsed_rule_filter_list = [[rule_filter_parsed_not]] - - if not parsed_rule_filter_list: - raise RuleParserException("Rule probably not parsed correctly:", rule_filter) - - RuleParser._sort_rule_segments(parsed_rule_filter_list, priority_dict) - RuleParser._add_exists_filter(parsed_rule_filter_list) - RuleParser._add_special_tags(parsed_rule_filter_list, tag_map) - - return parsed_rule_filter_list - - @staticmethod - def _parse_not_expression( - rule: Union[Not, And, Or, StringFilterExpression] - ) -> Union[Not, And, Or, StringFilterExpression]: - """Parse NOT-expressions in given filter expression. - - This function resolves NOT-expressions found in the given filter expression according to - De Morgan's Law. - - Parameters - ---------- - rule: Union[Not, And, Or, StringFilterExpression] - Given filter expression to be parsed. - - Returns - ------- - result: Union[Not, And, Or, StringFilterExpression] - Resulting filter expression created by resolving NOT-expressions in the given filter - expression. - - """ - if RuleParser._has_unresolved_not_expression(rule): - if isinstance(rule, Not): - exp = rule.expression - - if isinstance(exp, StringFilterExpression): - return rule - if isinstance(exp, Or): - result_segments = () - - for or_segment in exp.expressions: - result_segments = result_segments + (Not(or_segment),) - - result = And(*result_segments) - - if RuleParser._has_unresolved_not_expression(result): - result = RuleParser._parse_not_expression(result) - - return result - if isinstance(exp, And): - result_segments = () - - for and_segment in exp.expressions: - result_segments = result_segments + (Not(and_segment),) - - result = Or(*result_segments) - - if RuleParser._has_unresolved_not_expression(result): - result = RuleParser._parse_not_expression(result) - - return result - elif isinstance(rule, And): - result_segments = () - - for and_segment in rule.expressions: - result_segments = result_segments + ( - RuleParser._parse_not_expression(and_segment), - ) - - result = And(*result_segments) - return result - elif isinstance(rule, Or): - result_segments = () - - for or_segment in rule.expressions: - result_segments = result_segments + ( - RuleParser._parse_not_expression(or_segment), - ) - - result = Or(*result_segments) - return result - else: - return rule - - @staticmethod - def _has_unresolved_not_expression(rule: FilterExpression) -> bool: - """Check if given filter expression contains NOT-expressions. - - This function checks if the given filter expression contains any unresolved NOT-expressions. - Simple NOT(field: value) expressions do not count as unresolved expression since it cannot - be resolved. - - Parameters - ---------- - rule: FilterExpression - Filter expression to be checked for NOT-expressions. - - Returns - ------- - has_unresolved_not_expression: bool - Decision if given filter expression contains any unresolved NOT-expressions. - - """ - if isinstance(rule, Not): - if isinstance(rule.expression, CompoundFilterExpression): - return True - elif isinstance(rule, CompoundFilterExpression): - for expression in rule.expressions: - if RuleParser._has_unresolved_not_expression(expression): - return True - return False - - @staticmethod - def _parse_or_expression(rule: FilterExpression) -> Union[list, tuple, FilterExpression, None]: - """Parse filters with OR-expressions. - - This function parses filter expressions with OR-expressions recursively by splitting them - into separate filter expressions using the distributive property of the logical operators - AND and OR. During the recursive parsing process, different types are returned. - Hence, different cases have to be handled when constructing the results. - - Parameters - ---------- - rule: FilterExpression - Filter expression with OR-expressions to be parsed. - - Returns - ------- - result: Union[list, tuple, FilterExpression, None] - Resulting filter expression created by resolving OR- and AND-expressions in the given - filter expression. The return type may differ depending on the level of recursion. - - """ - if RuleParser._has_or_expression(rule): - # If expression is OR-expression, parse it - if isinstance(rule, Or): - result_list = [] - - for exp in rule.expressions: - # Recursively parse subexpressions of current expressions - loop_result = RuleParser._parse_or_expression(exp) - - # Differentiate between different loop_result types - # and construct result_list accordingly - if not isinstance(loop_result, list): - if isinstance(loop_result, tuple): - loop_result = list(loop_result) - else: - loop_result = [loop_result] - if isinstance(loop_result, list) and isinstance(loop_result[0], list): - for element in loop_result: - result_list.append(element) - else: - result_list.append(loop_result) - - return result_list - # Else, if expression is AND-expression, - # parse it and continue to parse OR-expression afterwards - if isinstance(rule, And): - loop_results = [] - - for exp in rule.expressions: - # Recursively parse subexpressions of current expressions - loop_results.append(RuleParser._parse_or_expression(exp)) - - # Iterate through loop_results and resolve tuples - for loop_result in loop_results: - if isinstance(loop_result, tuple): - tuple_segment = loop_result - loop_results.remove(tuple_segment) # pylint: disable=W4701 - - for tuple_element in tuple_segment: - loop_results.insert(0, tuple_element) - - # Continue to parse OR-expressions in already parsed subexpressions - return RuleParser._parse_or(loop_results) - else: - # Handle cases that may occur in recursive parsing process - if isinstance(rule, And): - return tuple(RuleParser._parse_and_expression(rule)) - return rule - return None - - @staticmethod - def _parse_or(loop_results: list): - """Parse OR-expressions. - - This function handles the parsing of OR-subexpressions in AND-expression filters in a - recursive manner. - - Parameters - ---------- - loop_results: list - List of filter expressions constructed during the parsing of AND-expressions that - contain OR-expressions. - - Returns - ------- - result_list: list - Given input list with resolved OR-subexpressions. - - """ - result_list = [] - - or_segment = RuleParser._pop_or_loop_result(loop_results) - - # Resolve OR expressions using distributive property - for or_element in or_segment: - result_list.append(loop_results + or_element) - - # Recursively resolve elements in result_list - for parsed_rule in result_list.copy(): - for segment in parsed_rule: - if isinstance(segment, list): - if parsed_rule in result_list: - result_list.remove(parsed_rule) - rule_list = RuleParser._parse_or(parsed_rule) - - for rule in rule_list: - result_list.append(rule) - - return result_list - - @staticmethod - def _pop_or_loop_result(loop_results: list) -> list: - """Pop element from list. - - This function iterates through the given list until it finds a list element that is a list - itself, i.e. an OR-expression. The found element is then removed from the list and returned. - - Parameters - ---------- - loop_results: list - List of filter expressions to pop list from. - - Returns - ------- - or_segment: list - First element of given list that is a list itself, i.e. an OR-expression. - - """ - for loop_result in loop_results: - if isinstance(loop_result, list): - or_segment = loop_result - loop_results.remove(or_segment) - return or_segment - return [] - - @staticmethod - def _has_or_expression(expression: FilterExpression) -> bool: - """Check if given expression has OR-(sub)expression. - - This function checks if the given expression is an OR-expression or if any subexpression of - the given expression is an OR-expression. Needed during recursive parsing processes. - - Parameters - ---------- - expression: FilterExpression - Given expression to check for OR-expression. - - Returns - ------- - has_or_expression: bool - Decision if given expression has OR-expression. - - """ - if isinstance(expression, Or): - return True - if isinstance(expression, CompoundFilterExpression): - for exp in expression.expressions: - if RuleParser._has_or_expression(exp): - return True - - if isinstance(expression, Not): - return RuleParser._has_or_expression(expression.expression) - - return False - - @staticmethod - def _sort_rule_segments(parsed_rule_list: list, priority_dict: dict): - """Sort filter expressions in rule. - - This function sorts the filter expressions in all parsed rules without changing the rule's - decision behavior. - The expressions are sorted alphabetically or according to a defined priority dictionary. - - Goal of the sorting process is to achieve better processing times using the defined - priorities and to minimize the resulting tree by ensuring an order in which fields to be - checked occur in all rules. - - Parameters - ---------- - parsed_rule_list: list - List of parsed rules where every rule is a list of filter expressions. - priority_dict: dict - Dictionary with sorting priority information (key -> field name; value -> priority). - - """ - for parsed_rule in parsed_rule_list: - parsed_rule.sort(key=lambda r: RuleParser._sort(r, priority_dict)) - - @staticmethod - def _sort(expr: StringFilterExpression, priority_dict: dict) -> Union[dict, str, None]: - """Helper function for _sort_rule_segments. - - This function is used by the _sort_rule_segments() function in the sorting key. - It includes various cases to cover all the different expression classes. For every class it - tries to get a priority value from the priority dict. If the field name used in the - expression does not exist in the priority dict, the field name itself is returned to use an - alphabetical sort. - - Parameters - ---------- - expr: StringFilterExpression - Filter expression to get comparison value for. - priority_dict: dict - Dictionary with sorting priority information (key -> field name; value -> priority). - - Returns - ------- - comparison_value: Union[dict, str, None] - Comparison value to use for sorting. - - """ - if isinstance(expr, Always): - return None - if isinstance(expr, Not): - try: - if isinstance(expr.expression, Exists): - return priority_dict[ - expr.expression.as_dotted_string(expr.expression.split_field) - ] - if isinstance(expr.expression, Not): - return priority_dict[expr.expression.expression.split_field[0]] - return priority_dict[expr.as_dotted_string(expr.expression.key)] - except KeyError: - return RuleParser._sort(expr.expression, priority_dict) - elif isinstance(expr, Exists): - try: - return priority_dict[expr.as_dotted_string(expr.split_field)] - except KeyError: - return repr(expr) - else: - try: - return priority_dict[expr.as_dotted_string(expr.key)] - except KeyError: - return repr(expr) - - @staticmethod - def _parse_and_expression(expression: FilterExpression) -> list: - """Parse AND-expression. - - This function parses AND-(sub)expressions in the given filter expression to a list of - filter expressions. - - Parameters - ---------- - expression: FilterExpression - Filter expression to be parsed recursively. - - Returns - ------- - rule_list: list - List of filter expressions parsed from given filter expression. - - """ - rule_list = [] - - if isinstance(expression, And): - for segment in expression.expressions: - if not isinstance(segment, And): - rule_list.append(segment) - else: - looped_result = RuleParser._parse_and_expression(segment) - - for looped_segment in looped_result: - rule_list.append(looped_segment) - - return rule_list - - @staticmethod - def _add_special_tags(parsed_rules: list, tag_map: dict): - """Add tags to rule filter. - - This function adds tags to the parsed rule filter. Tags are added according to a defined - tag_map dictionary where the keys are field names and the values are filter expressions. - - If a field name defined in tag_map.keys() is found in a rule segment's filter expressions, - the corresponding filter expression tag is created and added to the rule's segments as - first segment. - - The idea behind tags is to improve a rule's processing time by checking the - tag before processing the actual rule. - - Parameters - ---------- - parsed_rules: list - List containing parsed rules in a format where each rule consists of a list of filter - expressions. - tag_map: dict - Dictionary containing field names as keys and tags as values that is used to add special - tags to the rule. - - """ - - if tag_map: - for rule in parsed_rules: - temp_rule = rule.copy() + filter_expression = self._demorgan_resolver.resolve(rule.filter) + dnf_rule_segments = self._rule_segmenter.segment_into_dnf(rule, filter_expression) + RuleSorter.sort_rule_segments(dnf_rule_segments, priority_dict) + self._add_exists_filter(dnf_rule_segments) + self._rule_tagger.add(dnf_rule_segments) - # Iterate through all segments and handle different cases - for segment in temp_rule: - if isinstance(segment, Exists): - if segment.split_field[0] in tag_map.keys(): - RuleParser._add_tag(rule, tag_map[segment.split_field[0]]) - elif isinstance(segment, Not): - expression = segment.expression - if isinstance(expression, Exists): - if expression.split_field[0] in tag_map.keys(): - RuleParser._add_tag(rule, tag_map[expression.split_field[0]]) - elif expression.key[0] in tag_map.keys(): - RuleParser._add_tag(rule, tag_map[expression.key[0]]) - # Always Expressions do not need tags - elif isinstance(segment, Always): - continue - else: - if ( - segment.key[0] in tag_map.keys() - and Exists([tag_map[segment.key[0]]]) not in rule - ): - RuleParser._add_tag(rule, tag_map[segment.key[0]]) - - @staticmethod - def _add_tag(rule, tag_map_value: str): - """Add tag helper function. - - This function implements the functionality to add a tag for _add_special_tags(). - - If the tag to add already exists, the function skips the new tag. - Furthermore, it is distinguished between tags that create a simple Exists-filter expression - and StringFilter-expressions (containing a ":"). - - Parameters - ---------- - rule: list - List containing filter expressions representing the parsed rule. - tag_map_value: str - Value that is used to create the tag. If it contains a ":", a StringFilterExpression - will be created. - Else, an Exists-expression will be created. - - """ - if RuleParser._tag_exists(rule[0], tag_map_value): - return - - if ":" in tag_map_value: - key, value = tag_map_value.split(":") - rule.insert(0, StringFilterExpression(get_dotted_field_list(key), value)) - else: - rule.insert(0, Exists(tag_map_value.split("."))) - - @staticmethod - def _tag_exists(segment: Union[Exists, StringFilterExpression], tag: str) -> bool: - """Helper function for _add_tag. - - Checks if the given segment is equal to the given tag. - - Parameters - ---------- - segment: Union[Exists, StringFilterExpression] - Segment to check if equal to tag. - tag: str - Tag to check for. - - Returns - ------- - tag_exists: bool - Decision if the given tag already exists as the given segment. - - """ - if isinstance(segment, Exists): - if repr(segment)[1:-1] == tag: - return True - elif isinstance(segment, StringFilterExpression): - if repr(segment).replace('"', "") == tag: - return True - return False + return dnf_rule_segments @staticmethod def _add_exists_filter(parsed_rules: list): @@ -575,20 +107,13 @@ def _add_exists_filter(parsed_rules: list): skipped_counter = 0 for segment_index, segment in enumerate(temp_parsed_rule): - # Skip Always()-, Exists()- and Not()-expressions when adding Exists()-filter - # Not()-expressions need to be skipped for cases where the field does not exist - if ( - not isinstance(segment, Exists) - and not isinstance(segment, Not) - and not isinstance(segment, Always) - ): - exists_filter = Exists(segment.key) + if isinstance(segment, (Exists, Not, Always)): + skipped_counter += 1 + continue - # Skip if Exists()-filter already exists in Rule. No need to add it twice - if exists_filter in parsed_rule: - skipped_counter += 1 - else: - # Insert Exists filter at the right place in the rule - parsed_rule.insert(segment_index * 2 - skipped_counter, exists_filter) - else: + exists_filter = Exists(segment.key) + if exists_filter in parsed_rule: skipped_counter += 1 + continue + + parsed_rule.insert(segment_index * 2 - skipped_counter, exists_filter) diff --git a/logprep/framework/rule_tree/rule_segmenter.py b/logprep/framework/rule_tree/rule_segmenter.py new file mode 100644 index 000000000..5b4867530 --- /dev/null +++ b/logprep/framework/rule_tree/rule_segmenter.py @@ -0,0 +1,243 @@ +"""This module implements functionality to segment expressions into simplified list expression.""" + +from typing import Union + +from logprep.filter.expression.filter_expression import ( + And, + CompoundFilterExpression, + FilterExpression, + Not, + Or, +) + + +class RuleSegmenterException(Exception): + """Raise if rule segmenter encounters a problem.""" + + +class RuleSegmenter: + """Segments filter expression into list of less complex expressions.""" + + def segment_into_dnf(self, rule, expression): + """Segment expression into list of less complex expressions.""" + if self._has_disjunction(expression): + rule_segments = self._segment_expression(expression) + elif isinstance(expression, And): + rule_segments = [self._segment_conjunctive_expression(expression)] + else: + rule_segments = [[expression]] + if not rule_segments: + raise RuleSegmenterException("Rule probably not parsed correctly:", rule.filter) + return rule_segments + + @staticmethod + def _has_disjunction(expression: FilterExpression) -> bool: + """Check if given expression has OR-(sub)expression. + + This function checks if the given expression is an OR-expression or if any subexpression of + the given expression is an OR-expression. Needed during recursive parsing processes. + + Parameters + ---------- + expression: FilterExpression + Given expression to check for OR-expression. + + Returns + ------- + has_or_expression: bool + Decision if given expression has OR-expression. + + """ + if isinstance(expression, Or): + return True + if isinstance(expression, CompoundFilterExpression): + for exp in expression.children: + if RuleSegmenter._has_disjunction(exp): + return True + + if isinstance(expression, Not): + return RuleSegmenter._has_disjunction(expression.child) + + return False + + @staticmethod + def _segment_expression( + filter_expression: FilterExpression, + ) -> Union[list, tuple, FilterExpression]: + """Parse filters with OR-expressions. + + This function parses filter expressions with OR-expressions recursively by splitting them + into separate filter expressions using the distributive property of the logical operators + AND and OR. During the recursive parsing process, different types are returned. + Hence, different cases have to be handled when constructing the results. + + Parameters + ---------- + filter_expression: FilterExpression + Filter expression with OR-expressions to be parsed. + + Returns + ------- + result: Union[list, tuple, FilterExpression] + Resulting filter expression created by resolving OR- and AND-expressions in the given + filter expression. The return type may differ depending on the level of recursion. + + """ + if not RuleSegmenter._has_disjunction(filter_expression): + # Handle cases that may occur in recursive parsing process + if isinstance(filter_expression, And): + return tuple(RuleSegmenter._segment_conjunctive_expression(filter_expression)) + return filter_expression + + if isinstance(filter_expression, Or): + return RuleSegmenter._segment_disjunctive_expression(filter_expression) + + if isinstance(filter_expression, And): + segmented_sub_expressions = RuleSegmenter._segment_sub_expressions(filter_expression) + RuleSegmenter._flatten_tuples_in_list(segmented_sub_expressions) + return CnfToDnfConverter.convert_cnf_to_dnf(segmented_sub_expressions) + raise RuleSegmenterException(f"Could not segment {filter_expression}") + + @staticmethod + def _segment_disjunctive_expression(filter_expression): + result_list = [] + segmented_expression = RuleSegmenter._segment_sub_expressions(filter_expression) + for expression in segmented_expression: + expression_as_list = RuleSegmenter._convert_expression_to_list(expression) + if all(isinstance(sub_expression, list) for sub_expression in expression_as_list): + for sub_expression in expression_as_list: + result_list.append(sub_expression) + else: + result_list.append(expression_as_list) + return result_list + + @staticmethod + def _convert_expression_to_list(expression): + if isinstance(expression, tuple): + return list(expression) + if not isinstance(expression, list): + return [expression] + return expression + + @staticmethod + def _segment_sub_expressions(filter_expression: CompoundFilterExpression) -> list: + """Recursively segment subexpressions of current expressions""" + return [ + RuleSegmenter._segment_expression(expression) + for expression in filter_expression.children + ] + + @staticmethod + def _flatten_tuples_in_list(expressions: list): + """Iterate through sub_expressions and resolve tuples""" + for expression in expressions: + if isinstance(expression, tuple): + expressions.remove(expression) # pylint: disable=W4701 + for tuple_element in expression: + expressions.insert(0, tuple_element) + + @staticmethod + def _segment_conjunctive_expression(expression: FilterExpression) -> list: + """Parse AND-expression. + + This function parses AND-(sub)expressions in the given filter expression to a list of + filter expressions. + + Parameters + ---------- + expression: FilterExpression + Filter expression to be parsed recursively. + + Returns + ------- + rule_list: list + List of filter expressions parsed from given filter expression. + + """ + rule_list = [] + + if isinstance(expression, And): + for segment in expression.children: + if not isinstance(segment, And): + rule_list.append(segment) + else: + for looped_segment in RuleSegmenter._segment_conjunctive_expression(segment): + rule_list.append(looped_segment) + + return rule_list + + +class CnfToDnfConverter: + """Converts simplified rules from the conjunctive normal form to the disjunctive normal form""" + + @staticmethod + def convert_cnf_to_dnf(cnf: list): + """Convert rule from conjunctive normal form into disjunctive normal form. + + + This function handles the parsing of OR-subexpressions in AND-expression filters in a + recursive manner. + It converts an input in conjunctive normal form into the disjunctive normal form. + + For the input the list represents a conjunction and the sub-lists represent disjunctions. + For the output the list represents a disjunction and the sub-lists represent conjunctions. + + Parameters + ---------- + cnf: list + List of filter expressions constructed during the parsing of AND-expressions that + contain OR-expressions. + + Returns + ------- + result_list: list + Given input list with resolved OR-subexpressions. + + """ + dnf = [] + + or_segment = CnfToDnfConverter._pop_disjunctive_segment(cnf) + + CnfToDnfConverter._resolve_disjunctive_segment(or_segment, cnf, dnf) + + for parsed_expression in dnf.copy(): + for segment in parsed_expression: + if isinstance(segment, list): + if parsed_expression in dnf: + dnf.remove(parsed_expression) + resolved_expressions = CnfToDnfConverter.convert_cnf_to_dnf(parsed_expression) + + for resolved_expression in resolved_expressions: + dnf.append(resolved_expression) + return dnf + + @staticmethod + def _pop_disjunctive_segment(expressions: list) -> list: + """Pop OR-expression from list of expressions. + + This function iterates through the given list until it finds an OR-expression. + That OR-expression is then removed from the list and returned. + OR-expressions are represented as elements of the type list in the expressions list. + + Parameters + ---------- + expressions: list + List of filter expressions to pop list from. + + Returns + ------- + or_segment: list + First element of given list that is a list itself, i.e. an OR-expression. + + """ + for expression in expressions: + if isinstance(expression, list): + expressions.remove(expression) + return expression + return [] + + @staticmethod + def _resolve_disjunctive_segment(or_segment, expressions_in_cnf, expressions_in_dnf): + """Resolve OR expressions using distributive property.""" + for or_element in or_segment: + expressions_in_dnf.append(expressions_in_cnf + or_element) diff --git a/logprep/framework/rule_tree/rule_sorter.py b/logprep/framework/rule_tree/rule_sorter.py new file mode 100644 index 000000000..9b8566f57 --- /dev/null +++ b/logprep/framework/rule_tree/rule_sorter.py @@ -0,0 +1,88 @@ +"""This module implements functionality to sort rule filter segments.""" + +from typing import Union + +from logprep.filter.expression.filter_expression import ( + Always, + Not, + KeyBasedFilterExpression, + FilterExpression, +) + + +class RuleSorterException(Exception): + """Raise if rule sorter encounters a problem.""" + + +class RuleSorter: + """Sorts rule filter segments.""" + + @staticmethod + def sort_rule_segments(parsed_rule_list: list, priority_dict: dict): + """Sort filter expressions in rule. + + This function sorts the filter expressions in all parsed rules without changing the rule's + decision behavior. + The expressions are sorted alphabetically or according to a defined priority dictionary. + + Goal of the sorting process is to achieve better processing times using the defined + priorities and to minimize the resulting tree by ensuring an order in which fields to be + checked occur in all rules. + + Parameters + ---------- + parsed_rule_list: list + List of parsed rules where every rule is a list of filter expressions. + priority_dict: dict + Dictionary with sorting priority information (key -> field name; value -> priority). + + """ + for parsed_rule in parsed_rule_list: + parsed_rule.sort(key=lambda r: RuleSorter._sort(r, priority_dict)) + + @staticmethod + def _sort(expression: FilterExpression, priority_dict: dict) -> Union[dict, str, None]: + """Helper function for _sort_rule_segments. + + This function is used by the _sort_rule_segments() function in the sorting key. + It includes various cases to cover all the different expression classes. For every class it + tries to get a priority value from the priority dict. If the field name used in the + expression does not exist in the priority dict, the field name itself is returned to use an + alphabetical sort. + + Parameters + ---------- + expression: FilterExpression + Filter expression to get comparison value for. + priority_dict: dict + Dictionary with sorting priority information (key -> field name; value -> priority). + + Returns + ------- + comparison_value: Union[dict, str, None] + Comparison value to use for sorting. + + """ + if isinstance(expression, Always): + return None + + if isinstance(expression, Not): + return RuleSorter._sort_not_expression(expression, priority_dict) + + if isinstance(expression, KeyBasedFilterExpression): + return priority_dict.get(expression.key_as_dotted_string, repr(expression)) + + raise RuleSorterException(f'Could not sort "{expression}"') + + @staticmethod + def _sort_not_expression(expression, priority_dict): + try: + if isinstance(expression.child, Not): + if isinstance(expression.child.child, KeyBasedFilterExpression): + return priority_dict[expression.child.child.key[0]] + + if isinstance(expression.child, KeyBasedFilterExpression): + return priority_dict[expression.child.key_as_dotted_string] + except KeyError: + pass + return RuleSorter._sort(expression.child, priority_dict) diff --git a/logprep/framework/rule_tree/rule_tagger.py b/logprep/framework/rule_tree/rule_tagger.py new file mode 100644 index 000000000..784c0c093 --- /dev/null +++ b/logprep/framework/rule_tree/rule_tagger.py @@ -0,0 +1,125 @@ +""" This module implements functionality to add tags to filter expressions. """ +from typing import Union, List + +from logprep.filter.expression.filter_expression import ( + StringFilterExpression, + Exists, + Not, + KeyBasedFilterExpression, +) +from logprep.util.helper import get_dotted_field_list + + +class RuleTagger: + """Adds tags to filter expressions.""" + + __slots__ = ["_tag_map"] + + _tag_map: dict + + def __init__(self, tag_map: dict): + """Used to add tags to rule filters. + + The idea behind tags is to improve a rule's processing time by checking the + tag before processing the actual rule. + + Parameters + ---------- + tag_map: dict + Dictionary containing field names as keys and tags as values that is used to add special + tags to the rule. + + """ + self._tag_map = tag_map + + def add(self, list_of_rule_expressions: List[List[Union[Exists, StringFilterExpression]]]): + """Add tags to rule filter. + + This function adds tags to the parsed rule filter. Tags are added according to a defined + tag_map dictionary where the keys are field names and the values are filter expressions. + + If a field name defined in tag_map.keys() is found in a rule segment's filter expressions, + the corresponding filter expression tag is created and added to the rule's segments as + first segment. + + Parameters + ---------- + list_of_rule_expressions: list + List containing parsed rules in a format where each rule consists of a list of filter + expressions. + + """ + + if not self._tag_map: + return + + for rule_expressions in list_of_rule_expressions: + self._add_tags_to_rule_expressions(rule_expressions) + + def _add_tags_to_rule_expressions(self, rule_expressions): + """Iterate through all expressions and handle different cases""" + for expression in rule_expressions.copy(): + next_expression = expression.child if isinstance(expression, Not) else expression + if self._expression_in_tag_map(next_expression): + if Exists([self._tag_map[next_expression.key[0]]]) not in rule_expressions: + self._add_tag(rule_expressions, self._tag_map[next_expression.key[0]]) + + def _expression_in_tag_map(self, expression): + return ( + isinstance(expression, KeyBasedFilterExpression) + and expression.key[0] in self._tag_map.keys() + ) + + @staticmethod + def _add_tag(expressions: List[KeyBasedFilterExpression], tag_map_value: str): + """Add tag helper function. + + This function implements the functionality to add a tag for _add_special_tags(). + + If the tag to add already exists, the function skips the new tag. + Furthermore, it is distinguished between tags that create a simple Exists-filter expression + and StringFilter-expressions (containing a ":"). + + Parameters + ---------- + expressions: list + List containing filter expressions representing the parsed rule. + tag_map_value: str + Value that is used to create the tag. If it contains a ":", a StringFilterExpression + will be created. + Else, an Exists-expression will be created. + + """ + if RuleTagger._tag_exists(expressions[0], tag_map_value): + return + + if ":" in tag_map_value: + key, value = tag_map_value.split(":") + expressions.insert(0, StringFilterExpression(get_dotted_field_list(key), value)) + else: + expressions.insert(0, Exists(tag_map_value.split("."))) + + @staticmethod + def _tag_exists(expression: KeyBasedFilterExpression, tag: str) -> bool: + """Check if the given segment is equal to the given tag. + + Parameters + ---------- + expression: Union[Exists, StringFilterExpression] + Expression to check if equal to tag. + tag: str + Tag to check for. + + Returns + ------- + tag_exists: bool + Decision if the given tag already exists as the given segment. + + """ + if isinstance(expression, Exists): + if repr(expression)[1:-1] == tag: + return True + elif isinstance(expression, StringFilterExpression): + if repr(expression).replace('"', "") == tag: + return True + return False diff --git a/logprep/framework/rule_tree/rule_tree.py b/logprep/framework/rule_tree/rule_tree.py index 96e00de08..b0f5971d1 100644 --- a/logprep/framework/rule_tree/rule_tree.py +++ b/logprep/framework/rule_tree/rule_tree.py @@ -1,7 +1,7 @@ """This module contains the rule tree functionality.""" from logging import Logger -from typing import List, TYPE_CHECKING +from typing import List, TYPE_CHECKING, Optional import numpy as np from attr import define, Factory @@ -45,6 +45,22 @@ def mean_processing_time(self): # pylint: enable=not-an-iterable # pylint: enable=protected-access + __slots__ = ( + "rule_parser", + "metrics", + "priority_dict", + "_rule_mapping", + "_config_path", + "_root", + ) + + rule_parser: Optional[RuleParser] + metrics: RuleTreeMetrics + priority_dict: dict + _rule_mapping: dict + _config_path: str + _root: Node + def __init__(self, root: Node = None, config_path: str = None, metric_labels: dict = None): """Rule tree initialization function. @@ -60,6 +76,7 @@ def __init__(self, root: Node = None, config_path: str = None, metric_labels: di Path to the optional configuration file that contains the new rule tree's configuration. """ + self.rule_parser = None self._rule_mapping = {} self._config_path = config_path self._setup() @@ -79,12 +96,13 @@ def _setup(self): """ self.priority_dict = {} - self.tag_map = {} + tag_map = {} if self._config_path: config_data = getter.GetterFactory.from_string(self._config_path).get_json() self.priority_dict = config_data["priority_dict"] - self.tag_map = config_data["tag_map"] + tag_map = config_data["tag_map"] + self.rule_parser = RuleParser(tag_map) def add_rule(self, rule: "Rule", logger: Logger = None): """Add rule to rule tree. @@ -105,18 +123,18 @@ def add_rule(self, rule: "Rule", logger: Logger = None): """ try: - parsed_rule_list = RuleParser.parse_rule(rule, self.priority_dict, self.tag_map) - except Exception as ex: + parsed_rule = self.rule_parser.parse_rule(rule, self.priority_dict) + except Exception as error: # pylint: disable=broad-except logger.warning( - f'Error parsing rule "{rule.filter}": {type(ex).__name__}: {ex}.' + f'Error parsing rule "{rule.filter}": {type(error).__name__}: {error}.' f"\nIgnore and continue with next rule." ) return self.metrics.number_of_rules += 1 - for parsed_rule in parsed_rule_list: - end_node = self._add_parsed_rule(parsed_rule) + for rule_segment in parsed_rule: + end_node = self._add_parsed_rule(rule_segment) if rule not in end_node.matching_rules: end_node.matching_rules.append(rule) @@ -126,7 +144,7 @@ def add_rule(self, rule: "Rule", logger: Logger = None): def _add_parsed_rule(self, parsed_rule: list): """Add parsed rule to rule tree. - This function adds a parsed subrule of a given rule to the rule tree by iterating through + This function adds a parsed sub-rule of a given rule to the rule tree by iterating through the current tree. For every filter expression in the parsed rule, the children of the current node are @@ -148,12 +166,13 @@ def _add_parsed_rule(self, parsed_rule: list): current_node = self.root for expression in parsed_rule: - if current_node.has_child_with_expression(expression): - current_node = current_node.get_child_with_expression(expression) - continue - new_node = Node(expression) - current_node.add_child(new_node) - current_node = new_node + child_with_expression = current_node.get_child_with_expression(expression) + if child_with_expression: + current_node = child_with_expression + else: + new_node = Node(expression) + current_node.add_child(new_node) + current_node = new_node return current_node @@ -234,14 +253,12 @@ def print(self, current_node: Node = None, depth: int = 1): current_node = self._root for child in current_node.children: + indentations = "\t" * (depth - 1) + arrow_length = "-" * depth print( - "\t" * (depth - 1) + str(current_node.expression), - "\t", - "-" * depth + ">", - child.expression, - child.matching_rules, + f"{indentations}{current_node.expression} \t {arrow_length}> " + f"{child.expression} {child.matching_rules}" ) - self.print(child, depth + 1) def get_size(self, current_node: Node = None) -> int: @@ -276,6 +293,7 @@ def _get_rules_as_list(self) -> List["Rule"]: Returns ------- rules: List[Rule] + """ return list(self._rule_mapping) diff --git a/tests/unit/framework/rule_tree/shared_constants.py b/tests/unit/framework/rule_tree/shared_constants.py new file mode 100644 index 000000000..5287f5126 --- /dev/null +++ b/tests/unit/framework/rule_tree/shared_constants.py @@ -0,0 +1,11 @@ +# pylint: disable=missing-docstring +from logprep.filter.expression.filter_expression import StringFilterExpression, Exists + +sfe_1 = StringFilterExpression(["key1"], "value1") +sfe_2 = StringFilterExpression(["key2"], "value2") +sfe_3 = StringFilterExpression(["key3"], "value3") +sfe_4 = StringFilterExpression(["key4"], "value4") +sfe_5 = StringFilterExpression(["key5", "subkey5"], "value5") + +ex_1 = Exists(["ABC.def"]) +ex_2 = Exists(["xyz"]) diff --git a/tests/unit/framework/rule_tree/test_demorgan_resolver.py b/tests/unit/framework/rule_tree/test_demorgan_resolver.py new file mode 100644 index 000000000..3937a4adb --- /dev/null +++ b/tests/unit/framework/rule_tree/test_demorgan_resolver.py @@ -0,0 +1,56 @@ +# pylint: disable=protected-access +# pylint: disable=missing-docstring +# pylint: disable=line-too-long +# pylint: disable=too-many-statements + +import pytest + +from logprep.filter.expression.filter_expression import And, Or, Not +from logprep.framework.rule_tree.demorgan_resolver import DeMorganResolver + +from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4 + + +@pytest.fixture(name="demorgan_resolver") +def fixture_demorgan_resolver(): + return DeMorganResolver() + + +class TestDeMorganResolver: + @pytest.mark.parametrize( + "expression, resolve_state", + [ + (sfe_1, False), + (Not(sfe_1), False), + (And(sfe_1, sfe_2), False), + (And(Not(sfe_1), sfe_2), False), + (Or(And(sfe_1, Not(sfe_2))), False), + (Or(And(sfe_1, sfe_2)), False), + (Not(And(sfe_1, sfe_2)), True), + (Or(Not(And(sfe_1, sfe_2)), sfe_3), True), + ], + ) + def test_has_unresolved_not_expression(self, expression, resolve_state, demorgan_resolver): + assert demorgan_resolver._has_unresolved_expression(expression) == resolve_state + + @pytest.mark.parametrize( + "expression, resolved_expr", + [ + (sfe_1, sfe_1), + (Not(sfe_1), Not(sfe_1)), + (Not(And(Not(sfe_1))), Or(Not(Not(sfe_1)))), + (Not(Or(sfe_1, sfe_2)), And(Not(sfe_1), Not(sfe_2))), + (Not(And(sfe_1, sfe_2)), Or(Not(sfe_1), Not(sfe_2))), + (And(Not(Or(sfe_1, sfe_2)), sfe_3), And(And(Not(sfe_1), Not(sfe_2)), sfe_3)), + (Or(Not(Or(sfe_1, sfe_2)), sfe_3), Or(And(Not(sfe_1), Not(sfe_2)), sfe_3)), + (Not(Or(And(sfe_1, sfe_2), sfe_3)), And(Or(Not(sfe_1), Not(sfe_2)), Not(sfe_3))), + (Not(And(Or(sfe_1, sfe_2), sfe_3)), Or(And(Not(sfe_1), Not(sfe_2)), Not(sfe_3))), + (And(Not(And(sfe_1, sfe_2)), sfe_3), And(Or(Not(sfe_1), Not(sfe_2)), sfe_3)), + ( + And(Not(Or(sfe_1, sfe_2)), Not(And(sfe_3, sfe_4))), + And(And(Not(sfe_1), Not(sfe_2)), Or(Not(sfe_3), Not(sfe_4))), + ), + ], + ) + def test_resolve_not_expression(self, expression, resolved_expr, demorgan_resolver): + assert demorgan_resolver.resolve(expression) == resolved_expr diff --git a/tests/unit/framework/rule_tree/test_node.py b/tests/unit/framework/rule_tree/test_node.py index a0ef85a0d..a448a4d34 100644 --- a/tests/unit/framework/rule_tree/test_node.py +++ b/tests/unit/framework/rule_tree/test_node.py @@ -1,15 +1,16 @@ +# pylint: disable=missing-docstring from logprep.filter.expression.filter_expression import StringFilterExpression from logprep.framework.rule_tree.node import Node class TestNode: def test_init(self): - expression = StringFilterExpression("foo", "bar") + expression = StringFilterExpression(["foo"], "bar") node = Node(expression) assert isinstance(node.expression, StringFilterExpression) assert node.expression == expression - assert node.children == [] + assert not node.children def test_does_match_returns_true_as_expected(self): expression = StringFilterExpression(["foo"], "bar") @@ -38,23 +39,11 @@ def test_add_child(self): assert node_start.children == [node_end] assert node_start.children[0].expression == expression_end - def test_has_child_with_expression(self): - expression_end = StringFilterExpression("foo", "bar") - - node_start = Node(None) - node_end = Node(expression_end) - - node_start.add_child(node_end) - - assert node_start.has_child_with_expression(StringFilterExpression("foo", "bar")) - assert not node_start.has_child_with_expression(StringFilterExpression("foooo", "baaar")) - def test_get_child_with_expression(self): - expression_end = StringFilterExpression("foo", "bar") + expression_end = StringFilterExpression(["foo"], "bar") node_start = Node(None) node_end = Node(expression_end) node_start.add_child(node_end) - assert node_start.get_child_with_expression(expression_end) == node_end diff --git a/tests/unit/framework/rule_tree/test_rule_parser.py b/tests/unit/framework/rule_tree/test_rule_parser.py index dcb874f2a..aabf39cd9 100644 --- a/tests/unit/framework/rule_tree/test_rule_parser.py +++ b/tests/unit/framework/rule_tree/test_rule_parser.py @@ -5,20 +5,13 @@ import pytest -from logprep.filter.expression.filter_expression import And, Or, StringFilterExpression, Not, Exists -from logprep.framework.rule_tree.rule_parser import RuleParser as RP +from logprep.filter.expression.filter_expression import StringFilterExpression, Not, Exists +from logprep.framework.rule_tree.rule_parser import RuleParser from logprep.processor.pre_detector.rule import PreDetectorRule -pytest.importorskip("logprep.processor.pre_detector") - -str1 = StringFilterExpression(["key1"], "value1") -str2 = StringFilterExpression(["key2"], "value2") -str3 = StringFilterExpression(["key3"], "value3") -str4 = StringFilterExpression(["key4"], "value4") -str5 = StringFilterExpression(["key5", "subkey5"], "value5") +from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4, sfe_5 -ex1 = Exists(["ABC.def"]) -ex2 = Exists(["xyz"]) +pytest.importorskip("logprep.processor.pre_detector") class TestRuleParser: @@ -545,268 +538,54 @@ class TestRuleParser: ], ) def test_parse_rule_param(self, rule, priority_dict, tag_map, expected_expressions): + rule_parser = RuleParser(tag_map) if expected_expressions is not None: - assert RP.parse_rule(rule, priority_dict, tag_map) == expected_expressions + assert rule_parser.parse_rule(rule, priority_dict) == expected_expressions else: - assert RP.parse_rule(rule, priority_dict, tag_map) - - def test_has_unresolved_not_expression(self): - exp = And(str1, str2) - assert not RP._has_unresolved_not_expression(exp) - - exp = Not(str1) - assert not RP._has_unresolved_not_expression(exp) - - exp = And(Not(str1), str2) - assert not RP._has_unresolved_not_expression(exp) - - exp = Or(And(str1, Not(str2))) - assert not RP._has_unresolved_not_expression(exp) - - exp = Or(And(str1, str2)) - assert not RP._has_unresolved_not_expression(exp) - - exp = Not(And(str1, str2)) - assert RP._has_unresolved_not_expression(exp) - - exp = Or(Not(And(str1, str2)), str3) - assert RP._has_unresolved_not_expression(exp) - - # pylint: disable=invalid-name - def test_parse_NOT(self): - exp = Not(str1) - assert RP._parse_not_expression(exp) == exp - - exp = Not(Or(str1, str2)) - assert RP._parse_not_expression(exp) == And(Not(str1), Not(str2)) - - exp = Not(And(str1, str2)) - assert RP._parse_not_expression(exp) == Or(Not(str1), Not(str2)) - - exp = And(Not(Or(str1, str2)), str3) - assert RP._parse_not_expression(exp) == And(And(Not(str1), Not(str2)), str3) - - exp = Or(Not(Or(str1, str2)), str3) - assert RP._parse_not_expression(exp) == Or(And(Not(str1), Not(str2)), str3) - - exp = Not(Or(And(str1, str2), str3)) - assert RP._parse_not_expression(exp) == And(Or(Not(str1), Not(str2)), Not(str3)) - - exp = Not(And(Or(str1, str2), str3)) - assert RP._parse_not_expression(exp) == Or(And(Not(str1), Not(str2)), Not(str3)) - - exp = And(Not(And(str1, str2)), str3) - assert RP._parse_not_expression(exp) == And(Or(Not(str1), Not(str2)), str3) - - exp = And(Not(Or(str1, str2)), Not(And(str3, str4))) - assert RP._parse_not_expression(exp) == And( - And(Not(str1), Not(str2)), Or(Not(str3), Not(str4)) - ) - - def test_parse_AND(self): - exp = And(str1, str2) - assert RP._parse_and_expression(exp) == [str1, str2] - - exp = And(str1, str2, str3) - assert RP._parse_and_expression(exp) == [str1, str2, str3] - - exp = And(str1, Not(str2)) - assert RP._parse_and_expression(exp) == [str1, Not(str2)] - - exp = And(str1, And(Not(str2), str3)) - assert RP._parse_and_expression(exp) == [str1, Not(str2), str3] - - def test_parse_OR(self): - exp = Or(str1, str2) - assert RP._parse_or_expression(exp) == [[str1], [str2]] - - exp = And(str1, Or(str2, str3)) - assert RP._parse_or_expression(exp) == [[str1, str2], [str1, str3]] - - exp = And(str1, Or(str2, str3), str4) - assert RP._parse_or_expression(exp) == [[str1, str4, str2], [str1, str4, str3]] - - exp = Or(And(Not(str1), Not(str2)), str3) - assert RP._parse_or_expression(exp) == [[Not(str1), Not(str2)], [str3]] - - exp = And(Or(Not(str1), Not(str2)), Not(str3)) - assert RP._parse_or_expression(exp) == [[Not(str3), Not(str1)], [Not(str3), Not(str2)]] - - exp = And(Not(str1), Not(str2), Or(Not(str3), Not(str4))) - assert RP._parse_or_expression(exp) == [ - [Not(str1), Not(str2), Not(str3)], - [Not(str1), Not(str2), Not(str4)], - ] - - exp = And(And(Not(str1), Not(str2)), Or(Not(str3), Not(str4))) - assert RP._parse_or_expression(exp) == [ - [Not(str2), Not(str1), Not(str3)], - [Not(str2), Not(str1), Not(str4)], - ] - - exp = And(Or(str1, str2), Or(str3, str4)) - assert RP._parse_or_expression(exp) == [ - [str1, str3], - [str1, str4], - [str2, str3], - [str2, str4], - ] - - exp = Or(And(str1, Or(str2, str3)), str4) - assert RP._parse_or_expression(exp) == [[str1, str2], [str1, str3], [str4]] - - # pylint: enable=invalid-name - - def test_has_or_expression(self): - exp = And(str1, str2) - assert not RP._has_or_expression(exp) - - exp = Or(str1, str2) - assert RP._has_or_expression(exp) - - exp = Not(str1) - assert not RP._has_or_expression(exp) - - exp = Not(And(str1, str2)) - assert not RP._has_or_expression(exp) - - exp = Not(Or(str1, str2)) - assert RP._has_or_expression(exp) - - exp = And(Not(Or(str1, str2))) - assert RP._has_or_expression(exp) - - exp = And(Not(And(str1, str2))) - assert not RP._has_or_expression(exp) - - def test_sort_rule_segments(self): - rule_list = [[str1, str4, str3, str2]] - - RP._sort_rule_segments(rule_list, {}) - - assert rule_list == [[str1, str2, str3, str4]] - - rule_list = [[str1, str4, str3, str2]] - priority_dict = {"key2": "1"} - - RP._sort_rule_segments(rule_list, priority_dict) - - assert rule_list == [[str2, str1, str3, str4]] - - rule_list = [[str1, str3, ex1, str2, ex2]] - RP._sort_rule_segments(rule_list, {}) - - assert rule_list == [[ex1, str1, str2, str3, ex2]] + assert rule_parser.parse_rule(rule, priority_dict) - rule_list = [[str1, str3, ex1, str2, ex2]] - priority_dict = {"xyz": "1"} - RP._sort_rule_segments(rule_list, priority_dict) - - assert rule_list == [[ex2, ex1, str1, str2, str3]] - - rule_list = [[str2, Not(str1)]] - priority_dict = {"key1": "1"} - RP._sort_rule_segments(rule_list, priority_dict) - - assert rule_list == [[Not(str1), str2]] - - def test_add_special_tags(self): - rule_list = [[str1, str2]] - tag_map = {"key2": "TAG"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [[Exists(["TAG"]), str1, str2]] - - rule_list = [[str1, str2], [str1, str3]] - tag_map = {"key2": "TAG2", "key3": "TAG3"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [[Exists(["TAG2"]), str1, str2], [Exists(["TAG3"]), str1, str3]] - - rule_list = [[str1, str4, str2], [str2, str3], [str2], [str4, str3]] - tag_map = {"key1": "TAG1", "key2": "TAG2"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [ - [Exists(["TAG2"]), Exists(["TAG1"]), str1, str4, str2], - [Exists(["TAG2"]), str2, str3], - [Exists(["TAG2"]), str2], - [str4, str3], - ] - - rule_list = [[str1, str3], [str2, str4]] - tag_map = {"key1": "TAG1", "key2": "TAG2.SUBTAG2"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [ - [Exists(["TAG1"]), str1, str3], - [Exists(["TAG2", "SUBTAG2"]), str2, str4], - ] - - rule_list = [[str1, str3], [str2, str4]] - tag_map = {"key1": "TAG1:Value1", "key2": "TAG2.SUBTAG2"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [ - [StringFilterExpression(["TAG1"], "Value1"), str1, str3], - [Exists(["TAG2", "SUBTAG2"]), str2, str4], - ] - - rule_list = [[str1, str3], [str2, str4]] - tag_map = {"key1": "TAG1.SUBTAG1:Value1", "key2": "TAG2.SUBTAG2"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [ - [StringFilterExpression(["TAG1", "SUBTAG1"], "Value1"), str1, str3], - [Exists(["TAG2", "SUBTAG2"]), str2, str4], - ] - - rule_list = [[str1, ex2]] - tag_map = {"xyz": "TAG:VALUE"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [[StringFilterExpression(["TAG"], "VALUE"), str1, ex2]] - - rule_list = [[Not(str1)]] - tag_map = {"key1": "TAG"} - - RP._add_special_tags(rule_list, tag_map) - assert rule_list == [[Exists(["TAG"]), Not(str1)]] - - def test_add_exists_filter(self): - rule_list = [[str1, str2, str3, str4]] - RP._add_exists_filter(rule_list) - - assert rule_list == [ - [ - Exists(["key1"]), - str1, - Exists(["key2"]), - str2, - Exists(["key3"]), - str3, - Exists(["key4"]), - str4, - ] - ] - - rule_list = [[str1, str3, str5]] - RP._add_exists_filter(rule_list) - - assert rule_list == [ - [Exists(["key1"]), str1, Exists(["key3"]), str3, Exists(["key5", "subkey5"]), str5] - ] - - rule_list = [[str1], [str2], [str3]] - RP._add_exists_filter(rule_list) - - assert rule_list == [ - [Exists(["key1"]), str1], - [Exists(["key2"]), str2], - [Exists(["key3"]), str3], - ] - - rule_list = [[Not(str1)]] - RP._add_exists_filter(rule_list) - - assert rule_list == [[Not(str1)]] + @pytest.mark.parametrize( + "rule_list, expected", + [ + ( + [[sfe_1, sfe_2, sfe_3, sfe_4]], + [ + [ + Exists(["key1"]), + sfe_1, + Exists(["key2"]), + sfe_2, + Exists(["key3"]), + sfe_3, + Exists(["key4"]), + sfe_4, + ] + ], + ), + ( + [[sfe_1, sfe_3, sfe_5]], + [ + [ + Exists(["key1"]), + sfe_1, + Exists(["key3"]), + sfe_3, + Exists(["key5", "subkey5"]), + sfe_5, + ] + ], + ), + ( + [[sfe_1], [sfe_2], [sfe_3]], + [ + [Exists(["key1"]), sfe_1], + [Exists(["key2"]), sfe_2], + [Exists(["key3"]), sfe_3], + ], + ), + ([[Not(sfe_1)]], [[Not(sfe_1)]]), + ], + ) + def test_add_exists_filter(self, rule_list, expected): + RuleParser._add_exists_filter(rule_list) + assert rule_list == expected diff --git a/tests/unit/framework/rule_tree/test_rule_segment_sorter.py b/tests/unit/framework/rule_tree/test_rule_segment_sorter.py new file mode 100644 index 000000000..0ae9c0989 --- /dev/null +++ b/tests/unit/framework/rule_tree/test_rule_segment_sorter.py @@ -0,0 +1,33 @@ +# pylint: disable=protected-access +# pylint: disable=missing-docstring +# pylint: disable=line-too-long +# pylint: disable=too-many-statements + +import pytest + +from logprep.filter.expression.filter_expression import Not + +from logprep.framework.rule_tree.rule_sorter import RuleSorter +from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4, ex_1, ex_2 + +pytest.importorskip("logprep.processor.pre_detector") + + +class TestRuleSegmentSorter: + @pytest.mark.parametrize( + "rule_list, priority_dict, expected", + [ + ([[sfe_1, sfe_4, sfe_3, sfe_2]], {}, [[sfe_1, sfe_2, sfe_3, sfe_4]]), + ([[sfe_1, sfe_4, sfe_3, sfe_2]], {"key2": "1"}, [[sfe_2, sfe_1, sfe_3, sfe_4]]), + ([[sfe_1, sfe_3, ex_1, sfe_2, ex_2]], {}, [[ex_1, sfe_1, sfe_2, sfe_3, ex_2]]), + ( + [[sfe_1, sfe_3, ex_1, sfe_2, ex_2]], + {"xyz": "1"}, + [[ex_2, ex_1, sfe_1, sfe_2, sfe_3]], + ), + ([[sfe_2, Not(sfe_1)]], {"key1": "1"}, [[Not(sfe_1), sfe_2]]), + ], + ) + def test_sort_rule_segments(self, rule_list, priority_dict, expected): + RuleSorter.sort_rule_segments(rule_list, priority_dict) + assert rule_list == expected diff --git a/tests/unit/framework/rule_tree/test_rule_segmenter.py b/tests/unit/framework/rule_tree/test_rule_segmenter.py new file mode 100644 index 000000000..9c7200a8d --- /dev/null +++ b/tests/unit/framework/rule_tree/test_rule_segmenter.py @@ -0,0 +1,80 @@ +# pylint: disable=protected-access +# pylint: disable=missing-docstring +# pylint: disable=line-too-long +# pylint: disable=too-many-statements + +import pytest + +from logprep.filter.expression.filter_expression import And, Or, Not +from logprep.framework.rule_tree.rule_segmenter import RuleSegmenter, CnfToDnfConverter +from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4 + + +class TestRuleSegmenter: + @pytest.mark.parametrize( + "expression, expected", + [ + (And(sfe_1, sfe_2), [sfe_1, sfe_2]), + (And(sfe_1, sfe_2, sfe_3), [sfe_1, sfe_2, sfe_3]), + (And(sfe_1, Not(sfe_2)), [sfe_1, Not(sfe_2)]), + (And(sfe_1, And(Not(sfe_2), sfe_3)), [sfe_1, Not(sfe_2), sfe_3]), + ], + ) + def test_parse_and_expression(self, expression, expected): + assert RuleSegmenter._segment_conjunctive_expression(expression) == expected + + @pytest.mark.parametrize( + "expression, expected", + [ + (Or(sfe_1, sfe_2), [[sfe_1], [sfe_2]]), + (And(sfe_1, Or(sfe_2, sfe_3)), [[sfe_1, sfe_2], [sfe_1, sfe_3]]), + (And(sfe_1, Or(sfe_2, sfe_3), sfe_4), [[sfe_1, sfe_4, sfe_2], [sfe_1, sfe_4, sfe_3]]), + (Or(And(Not(sfe_1), Not(sfe_2)), sfe_3), [[Not(sfe_1), Not(sfe_2)], [sfe_3]]), + ( + And(Or(Not(sfe_1), Not(sfe_2)), Not(sfe_3)), + [[Not(sfe_3), Not(sfe_1)], [Not(sfe_3), Not(sfe_2)]], + ), + ( + And(Not(sfe_1), Not(sfe_2), Or(Not(sfe_3), Not(sfe_4))), + [[Not(sfe_1), Not(sfe_2), Not(sfe_3)], [Not(sfe_1), Not(sfe_2), Not(sfe_4)]], + ), + ( + And(And(Not(sfe_1), Not(sfe_2)), Or(Not(sfe_3), Not(sfe_4))), + [[Not(sfe_2), Not(sfe_1), Not(sfe_3)], [Not(sfe_2), Not(sfe_1), Not(sfe_4)]], + ), + ( + And(Or(sfe_1, sfe_2), Or(sfe_3, sfe_4)), + [[sfe_1, sfe_3], [sfe_1, sfe_4], [sfe_2, sfe_3], [sfe_2, sfe_4]], + ), + (Or(And(sfe_1, Or(sfe_2, sfe_3)), sfe_4), [[sfe_1, sfe_2], [sfe_1, sfe_3], [sfe_4]]), + ], + ) + def test_parse_or_expression(self, expression, expected): + assert RuleSegmenter._segment_expression(expression) == expected + + @pytest.mark.parametrize( + "expression, expected", + [ + (And(sfe_1, sfe_2), False), + (Or(sfe_1, sfe_2), True), + (Not(sfe_1), False), + (Not(And(sfe_1, sfe_2)), False), + (Not(Or(sfe_1, sfe_2)), True), + (And(Not(Or(sfe_1, sfe_2))), True), + (And(Not(And(sfe_1, sfe_2))), False), + ], + ) + def test_has_or_expression(self, expression, expected): + assert RuleSegmenter._has_disjunction(expression) is expected + + @pytest.mark.parametrize( + "expression_cnf, expected_dnf", + [ + ([sfe_1, [[sfe_2]]], [[sfe_1, sfe_2]]), + ([[[sfe_1]], [[sfe_2]]], [[sfe_1, sfe_2]]), + ([sfe_1, [[sfe_2]], sfe_3], [[sfe_1, sfe_3, sfe_2]]), + ([sfe_1, [[sfe_2], [sfe_3]]], [[sfe_1, sfe_2], [sfe_1, sfe_3]]), + ], + ) + def test_convert_cnf_to_dnf(self, expression_cnf, expected_dnf): + assert CnfToDnfConverter.convert_cnf_to_dnf(expression_cnf) == expected_dnf diff --git a/tests/unit/framework/rule_tree/test_rule_tagger.py b/tests/unit/framework/rule_tree/test_rule_tagger.py new file mode 100644 index 000000000..a260b4eab --- /dev/null +++ b/tests/unit/framework/rule_tree/test_rule_tagger.py @@ -0,0 +1,70 @@ +# pylint: disable=protected-access +# pylint: disable=missing-docstring +# pylint: disable=line-too-long +# pylint: disable=too-many-statements + +import pytest + +from logprep.filter.expression.filter_expression import StringFilterExpression, Not, Exists +from logprep.framework.rule_tree.rule_tagger import RuleTagger +from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4, ex_2 + +pytest.importorskip("logprep.processor.pre_detector") + + +class TestRuleTagger: + @pytest.mark.parametrize( + "rule_list, tag_map, expected", + [ + ([[sfe_1, sfe_2]], {"key2": "TAG"}, [[Exists(["TAG"]), sfe_1, sfe_2]]), + ( + [[sfe_1, sfe_2], [sfe_1, sfe_3]], + {"key2": "TAG2", "key3": "TAG3"}, + [[Exists(["TAG2"]), sfe_1, sfe_2], [Exists(["TAG3"]), sfe_1, sfe_3]], + ), + ( + [[sfe_1, sfe_4, sfe_2], [sfe_2, sfe_3], [sfe_2], [sfe_4, sfe_3]], + {"key1": "TAG1", "key2": "TAG2"}, + [ + [Exists(["TAG2"]), Exists(["TAG1"]), sfe_1, sfe_4, sfe_2], + [Exists(["TAG2"]), sfe_2, sfe_3], + [Exists(["TAG2"]), sfe_2], + [sfe_4, sfe_3], + ], + ), + ( + [[sfe_1, sfe_3], [sfe_2, sfe_4]], + {"key1": "TAG1", "key2": "TAG2.SUBTAG2"}, + [ + [Exists(["TAG1"]), sfe_1, sfe_3], + [Exists(["TAG2", "SUBTAG2"]), sfe_2, sfe_4], + ], + ), + ( + [[sfe_1, sfe_3], [sfe_2, sfe_4]], + {"key1": "TAG1:Value1", "key2": "TAG2.SUBTAG2"}, + [ + [StringFilterExpression(["TAG1"], "Value1"), sfe_1, sfe_3], + [Exists(["TAG2", "SUBTAG2"]), sfe_2, sfe_4], + ], + ), + ( + [[sfe_1, sfe_3], [sfe_2, sfe_4]], + {"key1": "TAG1.SUBTAG1:Value1", "key2": "TAG2.SUBTAG2"}, + [ + [StringFilterExpression(["TAG1", "SUBTAG1"], "Value1"), sfe_1, sfe_3], + [Exists(["TAG2", "SUBTAG2"]), sfe_2, sfe_4], + ], + ), + ( + [[sfe_1, ex_2]], + {"xyz": "TAG:VALUE"}, + [[StringFilterExpression(["TAG"], "VALUE"), sfe_1, ex_2]], + ), + ([[Not(sfe_1)]], {"key1": "TAG"}, [[Exists(["TAG"]), Not(sfe_1)]]), + ], + ) + def test_add_special_tags(self, rule_list, tag_map, expected): + rule_tagger = RuleTagger(tag_map) + rule_tagger.add(rule_list) + assert rule_list == expected diff --git a/tests/unit/framework/rule_tree/test_rule_tree.py b/tests/unit/framework/rule_tree/test_rule_tree.py index 5bc6d20ad..08683c159 100644 --- a/tests/unit/framework/rule_tree/test_rule_tree.py +++ b/tests/unit/framework/rule_tree/test_rule_tree.py @@ -1,13 +1,43 @@ # pylint: disable=protected-access # pylint: disable=missing-docstring -# pylint: disable=no-self-use # pylint: disable=line-too-long +from copy import deepcopy + +import pytest from logprep.filter.expression.filter_expression import Exists, StringFilterExpression from logprep.framework.rule_tree.node import Node +from logprep.framework.rule_tree.rule_parser import RuleParser from logprep.framework.rule_tree.rule_tree import RuleTree from logprep.processor.pre_detector.rule import PreDetectorRule +RULE_DICT = { + "filter": "winlog: 123", + "pre_detector": { + "id": 1, + "title": "1", + "severity": "0", + "case_condition": "directly", + "mitre": [], + }, +} + + +@pytest.fixture(name="rule_dict") +def rule_dict_fixture(): + rule_dict = { + "filter": "winlog: 123", + "pre_detector": { + "id": 1, + "title": "1", + "severity": "0", + "case_condition": "directly", + "mitre": [], + }, + } + + return rule_dict + class TestRuleTree: def test_init(self): @@ -16,20 +46,9 @@ def test_init(self): assert isinstance(rule_tree.root, Node) assert rule_tree.root.expression == "root" - def test_add_rule(self): + def test_add_rule(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.root.children[0].expression == Exists(["winlog"]) @@ -38,18 +57,8 @@ def test_add_rule(self): ) assert rule_tree.root.children[0].children[0].matching_rules == [rule] - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 AND xfoo: bar" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.root.children[0].children[0].children[0].expression == Exists(["xfoo"]) @@ -60,295 +69,135 @@ def test_add_rule(self): rule ] - def test_get_rule_id(self): + def test_get_rule_id(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_rule_id(rule) == 0 - rule2 = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 AND xfoo: bar" + rule2 = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule2) assert rule_tree.get_rule_id(rule) == 0 assert rule_tree.get_rule_id(rule2) == 1 - def test_match_simple(self): + def test_match_simple(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_matching_rules({"winlog": "123"}) == [rule] - def test_match_complex_case(self): + @pytest.mark.parametrize( + "document", + [ + {"winlog": "123", "test": "Good"}, + {"winlog": "123", "test": "Okay"}, + {"winlog": "123", "test": "Bad"}, + {"foo": "bar"}, + ], + ) + def test_match_complex_case(self, rule_dict, document): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND test: (Good OR Okay OR Bad) OR foo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 AND test: (Good OR Okay OR Bad) OR foo: bar" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) - assert rule_tree.get_matching_rules({"winlog": "123", "test": "Good"}) == [rule] - assert rule_tree.get_matching_rules({"winlog": "123", "test": "Okay"}) == [rule] - assert rule_tree.get_matching_rules({"winlog": "123", "test": "Bad"}) == [rule] - assert rule_tree.get_matching_rules({"foo": "bar"}) == [rule] + assert rule_tree.get_matching_rules(document) == [rule] - def test_match_event_matches_multiple_rules(self): + def test_match_event_matches_multiple_rules(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND test: (Good OR Okay OR Bad)", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 AND test: (Good OR Okay OR Bad)" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) - rule2 = PreDetectorRule._create_from_dict( - { - "filter": "foo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "foo: bar" + rule2 = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule2) + matchings_rules = rule_tree.get_matching_rules( {"winlog": "123", "test": "Good", "foo": "bar"} ) assert matchings_rules == [rule, rule2] - def test_match_rule_once_with_conjunction_like_sub_rule(self): + def test_match_rule_once_with_conjunction_like_sub_rule(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog OR winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog OR winlog: 123" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_matching_rules({"winlog": "123"}) == [rule] - def test_match_rule_once_with_conjunction_same(self): + def test_match_rule_once_with_conjunction_same(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 OR winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 OR winlog: 123" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_matching_rules({"winlog": "123"}) == [rule] - def test_match_rule_once_with_conjunction_both_match(self): + def test_match_rule_once_with_conjunction_both_match(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "foo: 123 OR bar: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "foo: 123 OR bar: 123" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_matching_rules({"foo": "123", "bar": "123"}) == [rule] - def test_match_rule_with_conjunction_for_different_events(self): + def test_match_rule_with_conjunction_for_different_events(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 OR winlog: 456", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 OR winlog: 456" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_matching_rules({"winlog": "123"}) == [rule] assert rule_tree.get_matching_rules({"winlog": "456"}) == [rule] - def test_match_two_identical_rules(self): + def test_match_two_identical_rules(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) rule_tree.add_rule(rule) assert rule_tree.get_matching_rules({"winlog": "123"}) == [rule] - def test_get_matching_rules_has_deterministic_order(self): + def test_get_matching_rules_has_deterministic_order(self, rule_dict): rule_tree = RuleTree() test_rules = 5 + rule_dict["filter"] = "foo: 123 OR bar: 123" for i in range(test_rules): - rule = PreDetectorRule._create_from_dict( - { - "filter": "foo: 123 OR bar: 123", - "pre_detector": { - "id": i, - "title": f"{i}", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["pre_detector"]["id"] = i + rule_dict["pre_detector"]["title"] = f"{i}" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) matching_rules = rule_tree.get_matching_rules({"foo": "123", "bar": "123"}) rule_ids = [rule.detection_data["id"] for rule in matching_rules] assert rule_ids == list(range(test_rules)) - def test_match_exists_filter_is_subfield(self): + @pytest.mark.parametrize( + "rule_filter, document", + [ + ("foo.bar: 123", {"foo": {"bar": "123"}}), + ("foo.bar.test: 123", {"foo": {"bar": {"test": "123"}}}), + ("abc: DEF AND foo.bar.test: 567", {"abc": "DEF", "foo": {"bar": {"test": "567"}}}), + ], + ) + def test_match_exists_filter_is_subfield(self, rule_filter, document, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "foo.bar: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = rule_filter + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) - assert rule_tree.get_matching_rules({"foo": {"bar": "123"}}) == [rule] - - rule = PreDetectorRule._create_from_dict( - { - "filter": "foo.bar.test: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) - rule_tree.add_rule(rule) - assert rule_tree.get_matching_rules({"foo": {"bar": {"test": "123"}}}) == [rule] - - rule = PreDetectorRule._create_from_dict( - { - "filter": "abc: DEF AND foo.bar.test: 567", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) - rule_tree.add_rule(rule) - - matching_rules = rule_tree.get_matching_rules( - {"abc": "DEF", "foo": {"bar": {"test": "567"}}} - ) - assert matching_rules == [rule] + assert rule_tree.get_matching_rules(document) == [rule] - def test_match_including_tags(self): + def test_match_including_tags(self, rule_dict): tag_map = {"winlog": "WINDOWS"} rule_tree = RuleTree() - rule_tree.tag_map = tag_map - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND test: (Good OR Okay OR Bad) OR foo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_tree.rule_parser = RuleParser(tag_map) + rule_dict["filter"] = "winlog: 123 AND test: (Good OR Okay OR Bad) OR foo: bar" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_matching_rules({"foo": "bar"}) @@ -358,19 +207,9 @@ def test_match_including_tags(self): tag_map = {"winlog": "source.windows"} rule_tree = RuleTree() - rule_tree.tag_map = tag_map - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND test: (Good OR Okay OR Bad) OR foo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_tree.rule_parser = RuleParser(tag_map) + + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) @@ -381,124 +220,47 @@ def test_match_including_tags(self): {"winlog": "123", "test": "Okay", "source": {"windows": "foo"}} ) - def test_match_with_subrules(self): + def test_match_with_subrules(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "EventID: 1 AND winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "EventID: 1 AND winlog: 123" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) - subrule = PreDetectorRule._create_from_dict( - { - "filter": "EventID: 1", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "EventID: 1" + subrule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(subrule) assert rule_tree.get_matching_rules({"EventID": "1", "winlog": "123"}) == [subrule, rule] - def test_get_size(self): + def test_get_size(self, rule_dict): rule_tree = RuleTree() - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_size() == 2 - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 AND xfoo: bar" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_size() == 4 - rule = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: foo", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_dict["filter"] = "winlog: 123 AND xfoo: foo" + rule = PreDetectorRule._create_from_dict(rule_dict) rule_tree.add_rule(rule) assert rule_tree.get_size() == 5 - def test_get_rules_as_list(self): + def test_get_rules_as_list(self, rule_dict): rule_tree = RuleTree() + + rule_dict_2 = deepcopy(rule_dict) + rule_dict_3 = deepcopy(rule_dict) + + rule_dict_2["filter"] = "winlog: 123 AND xfoo: bar" + rule_dict_3["filter"] = "winlog: 123 AND xfoo: foo" + rules = [ - PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ), - PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ), - PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: foo", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ), + PreDetectorRule._create_from_dict(rule_dict), + PreDetectorRule._create_from_dict(rule_dict_2), + PreDetectorRule._create_from_dict(rule_dict_3), ] _ = [rule_tree.add_rule(rule) for rule in rules] rules_from_rule_tree = rule_tree._get_rules_as_list() @@ -506,87 +268,38 @@ def test_get_rules_as_list(self): for rule in rules: assert rule in rules_from_rule_tree - def test_rule_tree_metrics_counts_number_of_rules(self): + def test_rule_tree_metrics_counts_number_of_rules(self, rule_dict): rule_tree = RuleTree() assert rule_tree.metrics.number_of_rules == 0 - rule_tree.add_rule( - PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) - ) + rule_tree.add_rule(PreDetectorRule._create_from_dict(rule_dict)) assert rule_tree.metrics.number_of_rules == 1 - def test_rule_tree_metrics_number_of_matches_returns_number_of_all_rule_matches(self): + def test_rule_tree_metrics_number_of_matches_returns_number_of_all_rule_matches( + self, rule_dict + ): rule_tree = RuleTree() - rule_one = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_one = PreDetectorRule._create_from_dict(rule_dict) rule_one.metrics._number_of_matches = 1 - rule_two = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + + rule_dict["filter"] = "winlog: 123 AND xfoo: bar" + rule_two = PreDetectorRule._create_from_dict(rule_dict) rule_two.metrics._number_of_matches = 2 + rule_tree.add_rule(rule_one) rule_tree.add_rule(rule_two) assert rule_tree.metrics.number_of_matches == 1 + 2 def test_rule_tree_metrics_mean_processing_time_returns_mean_of_all_rule_mean_processing_times( - self, + self, rule_dict ): rule_tree = RuleTree() - rule_one = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + rule_one = PreDetectorRule._create_from_dict(rule_dict) rule_one.metrics.update_mean_processing_time(1) - rule_two = PreDetectorRule._create_from_dict( - { - "filter": "winlog: 123 AND xfoo: bar", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, - } - ) + + rule_dict["filter"] = "winlog: 123 AND xfoo: bar" + rule_two = PreDetectorRule._create_from_dict(rule_dict) rule_two.metrics.update_mean_processing_time(2) + rule_tree.add_rule(rule_one) rule_tree.add_rule(rule_two) assert rule_tree.metrics.mean_processing_time == 1.5 From 358e672303186e8f42a2df8e46faa8ba0c0da61e Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 30 Jun 2023 11:54:56 +0200 Subject: [PATCH 02/13] Fix problem with rule tagger existence check and new lucene representation --- logprep/framework/rule_tree/rule_tagger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logprep/framework/rule_tree/rule_tagger.py b/logprep/framework/rule_tree/rule_tagger.py index 784c0c093..e7622ded8 100644 --- a/logprep/framework/rule_tree/rule_tagger.py +++ b/logprep/framework/rule_tree/rule_tagger.py @@ -117,7 +117,7 @@ def _tag_exists(expression: KeyBasedFilterExpression, tag: str) -> bool: """ if isinstance(expression, Exists): - if repr(expression)[1:-1] == tag: + if repr(expression).rstrip(": *") == tag: return True elif isinstance(expression, StringFilterExpression): if repr(expression).replace('"', "") == tag: From 8ee590a86678f6b712ce6fbfc5d81ce0bcf06b2f Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 30 Jun 2023 11:55:38 +0200 Subject: [PATCH 03/13] Improve test coverage for rule tagger --- .../framework/rule_tree/test_rule_tagger.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/unit/framework/rule_tree/test_rule_tagger.py b/tests/unit/framework/rule_tree/test_rule_tagger.py index a260b4eab..c6d1fb3be 100644 --- a/tests/unit/framework/rule_tree/test_rule_tagger.py +++ b/tests/unit/framework/rule_tree/test_rule_tagger.py @@ -62,9 +62,30 @@ class TestRuleTagger: [[StringFilterExpression(["TAG"], "VALUE"), sfe_1, ex_2]], ), ([[Not(sfe_1)]], {"key1": "TAG"}, [[Exists(["TAG"]), Not(sfe_1)]]), + ([[sfe_1]], {"key1": "key1:value1"}, [[sfe_1]]), + ( + [[sfe_1, sfe_2, ex_2]], + {"key1": "TAG1", "key2": "xyz"}, + [[Exists(["TAG1"]), sfe_1, sfe_2, ex_2]], + ), + ([[sfe_1]], {}, [[sfe_1]]), ], ) def test_add_special_tags(self, rule_list, tag_map, expected): rule_tagger = RuleTagger(tag_map) rule_tagger.add(rule_list) assert rule_list == expected + + @pytest.mark.parametrize( + "expression, tag, tag_map, expected", + [ + (ex_2, "xyz", {"key1": "xyz"}, True), + (ex_2, "foo", {"key1": "foo"}, False), + (sfe_1, "key1:value1", {"key1": "key1:value1"}, True), + (sfe_1, "foo:bar", {"key1": "foo:bar"}, False), + ], + ) + def test_tag_exists(self, expression, tag, tag_map, expected): + rule_tagger = RuleTagger(tag_map) + print(type(expression), expression) + assert rule_tagger._tag_exists(expression, tag) is expected From 6025420a259ee604f68341b6921665add8a55078 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 30 Jun 2023 11:57:21 +0200 Subject: [PATCH 04/13] Improve test coverage for rule sorter and refactor --- logprep/framework/rule_tree/rule_sorter.py | 12 ++++++--- ..._segment_sorter.py => test_rule_sorter.py} | 26 ++++++++++++++++--- 2 files changed, 31 insertions(+), 7 deletions(-) rename tests/unit/framework/rule_tree/{test_rule_segment_sorter.py => test_rule_sorter.py} (52%) diff --git a/logprep/framework/rule_tree/rule_sorter.py b/logprep/framework/rule_tree/rule_sorter.py index 9b8566f57..993788f4f 100644 --- a/logprep/framework/rule_tree/rule_sorter.py +++ b/logprep/framework/rule_tree/rule_sorter.py @@ -38,11 +38,15 @@ def sort_rule_segments(parsed_rule_list: list, priority_dict: dict): """ for parsed_rule in parsed_rule_list: - parsed_rule.sort(key=lambda r: RuleSorter._sort(r, priority_dict)) + parsed_rule.sort( + key=lambda expression: RuleSorter._get_sorting_key(expression, priority_dict) + ) @staticmethod - def _sort(expression: FilterExpression, priority_dict: dict) -> Union[dict, str, None]: - """Helper function for _sort_rule_segments. + def _get_sorting_key( + expression: FilterExpression, priority_dict: dict + ) -> Union[dict, str, None]: + """Get the sorting key for an expression with a priority dict.. This function is used by the _sort_rule_segments() function in the sorting key. It includes various cases to cover all the different expression classes. For every class it @@ -85,4 +89,4 @@ def _sort_not_expression(expression, priority_dict): return priority_dict[expression.child.key_as_dotted_string] except KeyError: pass - return RuleSorter._sort(expression.child, priority_dict) + return RuleSorter._get_sorting_key(expression.child, priority_dict) diff --git a/tests/unit/framework/rule_tree/test_rule_segment_sorter.py b/tests/unit/framework/rule_tree/test_rule_sorter.py similarity index 52% rename from tests/unit/framework/rule_tree/test_rule_segment_sorter.py rename to tests/unit/framework/rule_tree/test_rule_sorter.py index 0ae9c0989..499414fa6 100644 --- a/tests/unit/framework/rule_tree/test_rule_segment_sorter.py +++ b/tests/unit/framework/rule_tree/test_rule_sorter.py @@ -5,15 +5,15 @@ import pytest -from logprep.filter.expression.filter_expression import Not +from logprep.filter.expression.filter_expression import Not, Always, CompoundFilterExpression -from logprep.framework.rule_tree.rule_sorter import RuleSorter +from logprep.framework.rule_tree.rule_sorter import RuleSorter, RuleSorterException from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4, ex_1, ex_2 pytest.importorskip("logprep.processor.pre_detector") -class TestRuleSegmentSorter: +class TestRuleSorter: @pytest.mark.parametrize( "rule_list, priority_dict, expected", [ @@ -31,3 +31,23 @@ class TestRuleSegmentSorter: def test_sort_rule_segments(self, rule_list, priority_dict, expected): RuleSorter.sort_rule_segments(rule_list, priority_dict) assert rule_list == expected + + @pytest.mark.parametrize( + "expression, expected", + [ + (Always("foo"), None), + (Not(Always("foo")), None), + (sfe_1, str(sfe_1)), + (Not(sfe_1), str(sfe_1)), + (Not(Not(sfe_1)), str(sfe_1)), + (ex_1, str(ex_1)), + (Not(ex_1), str(ex_1)), + ], + ) + def test_get_sorting_key_succeeds(self, expression, expected): + assert RuleSorter._get_sorting_key(expression, {}) == expected + + @pytest.mark.parametrize("expression", [CompoundFilterExpression(sfe_1, sfe_2), "foo"]) + def test_get_sorting_key_raises_exception(self, expression): + with pytest.raises(RuleSorterException, match=f'Could not sort "{str(expression)}"'): + RuleSorter._get_sorting_key(expression, {}) From 8c0f5585b9b4cc57afcea92e3c01b8d7228607d5 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 30 Jun 2023 14:28:31 +0200 Subject: [PATCH 05/13] Improve test coverage and refactor rule tree --- .../framework/rule_tree/demorgan_resolver.py | 10 +++- logprep/framework/rule_tree/rule_parser.py | 3 +- logprep/framework/rule_tree/rule_segmenter.py | 14 ++---- .../rule_tree/test_demorgan_resolver.py | 44 +++++++++++++++-- .../framework/rule_tree/test_rule_parser.py | 4 ++ .../framework/rule_tree/test_rule_tagger.py | 1 - .../framework/rule_tree/test_rule_tree.py | 47 ++++++++++++++----- 7 files changed, 95 insertions(+), 28 deletions(-) diff --git a/logprep/framework/rule_tree/demorgan_resolver.py b/logprep/framework/rule_tree/demorgan_resolver.py index 5b4afb49c..ebd16b2ca 100644 --- a/logprep/framework/rule_tree/demorgan_resolver.py +++ b/logprep/framework/rule_tree/demorgan_resolver.py @@ -78,6 +78,11 @@ def _has_unresolved_expression(expression: FilterExpression) -> bool: return False def _resolve_not_expression(self, not_expression: Not) -> FilterExpression: + if not isinstance(not_expression, Not): + raise DeMorganResolverException( + f'Can\'t resolve expression "{not_expression}", since it\'s not of the type "NOT."' + ) + if not isinstance(not_expression.child, CompoundFilterExpression): return not_expression @@ -89,7 +94,10 @@ def _resolve_not_expression(self, not_expression: Not) -> FilterExpression: elif isinstance(compound_expression, And): expression = Or(*negated_children) else: - raise DeMorganResolverException(f"Could not resolve expression {not_expression}") + raise DeMorganResolverException( + f'Could not resolve expression "{not_expression}", ' + f'since its child is neither of the type "AND" nor "OR".' + ) return self._resolve_compound_expression(expression) diff --git a/logprep/framework/rule_tree/rule_parser.py b/logprep/framework/rule_tree/rule_parser.py index 470d0d2a1..25bd07768 100644 --- a/logprep/framework/rule_tree/rule_parser.py +++ b/logprep/framework/rule_tree/rule_parser.py @@ -46,7 +46,6 @@ def __init__(self, tag_map: dict): """ self._demorgan_resolver = DeMorganResolver() - self._rule_segmenter = RuleSegmenter() self._rule_tagger = RuleTagger(tag_map) def parse_rule(self, rule: "Rule", priority_dict: dict) -> list: @@ -79,7 +78,7 @@ def parse_rule(self, rule: "Rule", priority_dict: dict) -> list: """ filter_expression = self._demorgan_resolver.resolve(rule.filter) - dnf_rule_segments = self._rule_segmenter.segment_into_dnf(rule, filter_expression) + dnf_rule_segments = RuleSegmenter.segment_into_dnf(filter_expression) RuleSorter.sort_rule_segments(dnf_rule_segments, priority_dict) self._add_exists_filter(dnf_rule_segments) self._rule_tagger.add(dnf_rule_segments) diff --git a/logprep/framework/rule_tree/rule_segmenter.py b/logprep/framework/rule_tree/rule_segmenter.py index 5b4867530..5bd8a8910 100644 --- a/logprep/framework/rule_tree/rule_segmenter.py +++ b/logprep/framework/rule_tree/rule_segmenter.py @@ -18,16 +18,15 @@ class RuleSegmenterException(Exception): class RuleSegmenter: """Segments filter expression into list of less complex expressions.""" - def segment_into_dnf(self, rule, expression): + @staticmethod + def segment_into_dnf(expression) -> list: """Segment expression into list of less complex expressions.""" - if self._has_disjunction(expression): - rule_segments = self._segment_expression(expression) + if RuleSegmenter._has_disjunction(expression): + rule_segments = RuleSegmenter._segment_expression(expression) elif isinstance(expression, And): - rule_segments = [self._segment_conjunctive_expression(expression)] + rule_segments = [RuleSegmenter._segment_conjunctive_expression(expression)] else: rule_segments = [[expression]] - if not rule_segments: - raise RuleSegmenterException("Rule probably not parsed correctly:", rule.filter) return rule_segments @staticmethod @@ -54,7 +53,6 @@ def _has_disjunction(expression: FilterExpression) -> bool: for exp in expression.children: if RuleSegmenter._has_disjunction(exp): return True - if isinstance(expression, Not): return RuleSegmenter._has_disjunction(expression.child) @@ -88,10 +86,8 @@ def _segment_expression( if isinstance(filter_expression, And): return tuple(RuleSegmenter._segment_conjunctive_expression(filter_expression)) return filter_expression - if isinstance(filter_expression, Or): return RuleSegmenter._segment_disjunctive_expression(filter_expression) - if isinstance(filter_expression, And): segmented_sub_expressions = RuleSegmenter._segment_sub_expressions(filter_expression) RuleSegmenter._flatten_tuples_in_list(segmented_sub_expressions) diff --git a/tests/unit/framework/rule_tree/test_demorgan_resolver.py b/tests/unit/framework/rule_tree/test_demorgan_resolver.py index 3937a4adb..b56812a29 100644 --- a/tests/unit/framework/rule_tree/test_demorgan_resolver.py +++ b/tests/unit/framework/rule_tree/test_demorgan_resolver.py @@ -5,8 +5,16 @@ import pytest -from logprep.filter.expression.filter_expression import And, Or, Not -from logprep.framework.rule_tree.demorgan_resolver import DeMorganResolver +from logprep.filter.expression.filter_expression import ( + And, + Or, + Not, + CompoundFilterExpression, +) +from logprep.framework.rule_tree.demorgan_resolver import ( + DeMorganResolver, + DeMorganResolverException, +) from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4 @@ -52,5 +60,35 @@ def test_has_unresolved_not_expression(self, expression, resolve_state, demorgan ), ], ) - def test_resolve_not_expression(self, expression, resolved_expr, demorgan_resolver): + def test_resolve(self, expression, resolved_expr, demorgan_resolver): assert demorgan_resolver.resolve(expression) == resolved_expr + + @pytest.mark.parametrize( + "expression, resolved_expr, error", + [ + (Not(sfe_1), Not(sfe_1), None), + ( + sfe_1, + sfe_1, + ( + DeMorganResolverException, + r'Can\'t resolve expression ".*", since it\'s not of the type "NOT."', + ), + ), + ( + Not(CompoundFilterExpression(sfe_1, sfe_2)), + Not(sfe_1), + ( + DeMorganResolverException, + r'Could not resolve expression ".*", ' + r'since its child is neither of the type "AND" nor "OR".', + ), + ), + ], + ) + def test_resolve_not_expression(self, expression, resolved_expr, error, demorgan_resolver): + if error: + with pytest.raises(error[0], match=error[1]): + demorgan_resolver._resolve_not_expression(expression) + else: + assert demorgan_resolver._resolve_not_expression(expression) == resolved_expr diff --git a/tests/unit/framework/rule_tree/test_rule_parser.py b/tests/unit/framework/rule_tree/test_rule_parser.py index aabf39cd9..d40a61e46 100644 --- a/tests/unit/framework/rule_tree/test_rule_parser.py +++ b/tests/unit/framework/rule_tree/test_rule_parser.py @@ -584,6 +584,10 @@ def test_parse_rule_param(self, rule, priority_dict, tag_map, expected_expressio ], ), ([[Not(sfe_1)]], [[Not(sfe_1)]]), + ( + [[sfe_1, Exists(["key1"])], [sfe_1]], + [[sfe_1, Exists(["key1"])], [Exists(["key1"]), sfe_1]], + ), ], ) def test_add_exists_filter(self, rule_list, expected): diff --git a/tests/unit/framework/rule_tree/test_rule_tagger.py b/tests/unit/framework/rule_tree/test_rule_tagger.py index c6d1fb3be..08617fa7a 100644 --- a/tests/unit/framework/rule_tree/test_rule_tagger.py +++ b/tests/unit/framework/rule_tree/test_rule_tagger.py @@ -87,5 +87,4 @@ def test_add_special_tags(self, rule_list, tag_map, expected): ) def test_tag_exists(self, expression, tag, tag_map, expected): rule_tagger = RuleTagger(tag_map) - print(type(expression), expression) assert rule_tagger._tag_exists(expression, tag) is expected diff --git a/tests/unit/framework/rule_tree/test_rule_tree.py b/tests/unit/framework/rule_tree/test_rule_tree.py index 08683c159..522913c85 100644 --- a/tests/unit/framework/rule_tree/test_rule_tree.py +++ b/tests/unit/framework/rule_tree/test_rule_tree.py @@ -2,6 +2,7 @@ # pylint: disable=missing-docstring # pylint: disable=line-too-long from copy import deepcopy +from unittest import mock import pytest @@ -11,17 +12,6 @@ from logprep.framework.rule_tree.rule_tree import RuleTree from logprep.processor.pre_detector.rule import PreDetectorRule -RULE_DICT = { - "filter": "winlog: 123", - "pre_detector": { - "id": 1, - "title": "1", - "severity": "0", - "case_condition": "directly", - "mitre": [], - }, -} - @pytest.fixture(name="rule_dict") def rule_dict_fixture(): @@ -40,12 +30,29 @@ def rule_dict_fixture(): class TestRuleTree: - def test_init(self): + def test_init_without_specifying_parameters(self): rule_tree = RuleTree() assert isinstance(rule_tree.root, Node) + assert not rule_tree.rule_parser._rule_tagger._tag_map + assert not rule_tree.priority_dict assert rule_tree.root.expression == "root" + def test_init_with_specifying_root_node(self): + rule_tree = RuleTree(Node("foo")) + + assert isinstance(rule_tree.root, Node) + assert rule_tree.root.expression == "foo" + + def test_init_with_specifying_config(self): + rule_tree = RuleTree(config_path="tests/testdata/unit/tree_config.json") + + assert isinstance(rule_tree.root, Node) + assert rule_tree.rule_parser._rule_tagger._tag_map == { + "field_name_to_check_for_in_rule": "TAG-TO-CHECK-IF-IN-EVENT" + } + assert rule_tree.priority_dict == {"field_name": "priority"} + def test_add_rule(self, rule_dict): rule_tree = RuleTree() rule = PreDetectorRule._create_from_dict(rule_dict) @@ -69,6 +76,22 @@ def test_add_rule(self, rule_dict): rule ] + def test_add_rule_fails(self, rule_dict): + rule_tree = RuleTree() + rule = PreDetectorRule._create_from_dict(rule_dict) + + mocked_logger = mock.MagicMock() + with mock.patch( + "logprep.framework.rule_tree.rule_parser.RuleParser.parse_rule", + side_effect=Exception("mocked error"), + ): + rule_tree.add_rule(rule, logger=mocked_logger) + expected_call = mock.call.warning( + 'Error parsing rule "winlog:"123"": Exception: mocked error.' + "\nIgnore and continue with next rule." + ) + assert expected_call in mocked_logger.mock_calls + def test_get_rule_id(self, rule_dict): rule_tree = RuleTree() rule = PreDetectorRule._create_from_dict(rule_dict) From ba7f9e83b615d5b8188b41169756205cf94ded18 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 30 Jun 2023 14:40:43 +0200 Subject: [PATCH 06/13] Appease semgrep --- tests/unit/framework/rule_tree/test_demorgan_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/framework/rule_tree/test_demorgan_resolver.py b/tests/unit/framework/rule_tree/test_demorgan_resolver.py index b56812a29..42d2757ed 100644 --- a/tests/unit/framework/rule_tree/test_demorgan_resolver.py +++ b/tests/unit/framework/rule_tree/test_demorgan_resolver.py @@ -81,7 +81,7 @@ def test_resolve(self, expression, resolved_expr, demorgan_resolver): ( DeMorganResolverException, r'Could not resolve expression ".*", ' - r'since its child is neither of the type "AND" nor "OR".', + + r'since its child is neither of the type "AND" nor "OR".', ), ), ], From 03c5efbf782e78f2c3c3ec2d1587ed40d1424ca4 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 21 Jul 2023 07:27:18 +0200 Subject: [PATCH 07/13] Remove resolve check from demorgan resolver --- .../framework/rule_tree/demorgan_resolver.py | 39 +------------------ .../rule_tree/test_demorgan_resolver.py | 16 -------- 2 files changed, 1 insertion(+), 54 deletions(-) diff --git a/logprep/framework/rule_tree/demorgan_resolver.py b/logprep/framework/rule_tree/demorgan_resolver.py index ebd16b2ca..e71d9d455 100644 --- a/logprep/framework/rule_tree/demorgan_resolver.py +++ b/logprep/framework/rule_tree/demorgan_resolver.py @@ -33,49 +33,12 @@ def resolve(self, expression: FilterExpression) -> FilterExpression: expression. """ - if not self._has_unresolved_expression(expression): - return expression - if isinstance(expression, Not): return self._resolve_not_expression(expression) if isinstance(expression, CompoundFilterExpression): return self._resolve_compound_expression(expression) - raise DeMorganResolverException(f"Could not resolve expression {expression}") - - @staticmethod - def _has_unresolved_expression(expression: FilterExpression) -> bool: - """Check if given filter expression contains NOT-expressions. - - This function checks if the given filter expression contains any unresolved NOT-expressions. - Simple NOT(field: value) expressions do not count as unresolved expression since it cannot - be resolved. - - This is achieved by iterating over the input expression and all of its sub expressions. - The input expression needs to be resolved if a negated compound expression is found. - Otherwise, no resolving is required. - - Parameters - ---------- - expression: FilterExpression - Filter expression to be checked for NOT-expressions. - - Returns - ------- - has_unresolved_not_expression: bool - Decision if given filter expression contains any unresolved NOT-expressions. - - """ - expressions_stack = [expression] - while expressions_stack: - current_expression = expressions_stack.pop() - if isinstance(current_expression, Not): - if isinstance(current_expression.child, CompoundFilterExpression): - return True - if isinstance(current_expression, CompoundFilterExpression): - for sub_expression in current_expression.children: - expressions_stack.append(sub_expression) - return False + return expression def _resolve_not_expression(self, not_expression: Not) -> FilterExpression: if not isinstance(not_expression, Not): diff --git a/tests/unit/framework/rule_tree/test_demorgan_resolver.py b/tests/unit/framework/rule_tree/test_demorgan_resolver.py index 42d2757ed..26e9f85be 100644 --- a/tests/unit/framework/rule_tree/test_demorgan_resolver.py +++ b/tests/unit/framework/rule_tree/test_demorgan_resolver.py @@ -25,22 +25,6 @@ def fixture_demorgan_resolver(): class TestDeMorganResolver: - @pytest.mark.parametrize( - "expression, resolve_state", - [ - (sfe_1, False), - (Not(sfe_1), False), - (And(sfe_1, sfe_2), False), - (And(Not(sfe_1), sfe_2), False), - (Or(And(sfe_1, Not(sfe_2))), False), - (Or(And(sfe_1, sfe_2)), False), - (Not(And(sfe_1, sfe_2)), True), - (Or(Not(And(sfe_1, sfe_2)), sfe_3), True), - ], - ) - def test_has_unresolved_not_expression(self, expression, resolve_state, demorgan_resolver): - assert demorgan_resolver._has_unresolved_expression(expression) == resolve_state - @pytest.mark.parametrize( "expression, resolved_expr", [ From 49b9ddb19ded40c8e18f7b49cb6290e36953bab7 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 21 Jul 2023 07:28:39 +0200 Subject: [PATCH 08/13] Change str to repr in Not filter __repr__ --- logprep/filter/expression/filter_expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logprep/filter/expression/filter_expression.py b/logprep/filter/expression/filter_expression.py index 8bb68c9ae..60fd134f1 100644 --- a/logprep/filter/expression/filter_expression.py +++ b/logprep/filter/expression/filter_expression.py @@ -105,7 +105,7 @@ def __init__(self, expression: FilterExpression): self.child = expression def __repr__(self) -> str: - return f"NOT ({str(self.child)})" + return f"NOT ({repr(self.child)})" def does_match(self, document: dict) -> bool: return not self.child.matches(document) From c47fe06c1f4a88c959de9bb3fedf88920f552bb0 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 21 Jul 2023 07:30:25 +0200 Subject: [PATCH 09/13] Make test variable names more descriptive and move them to test modules --- .../framework/rule_tree/shared_constants.py | 11 - .../rule_tree/test_demorgan_resolver.py | 115 ++++++++-- .../framework/rule_tree/test_rule_parser.py | 60 +++-- .../rule_tree/test_rule_segmenter.py | 214 +++++++++++++++--- .../framework/rule_tree/test_rule_sorter.py | 120 ++++++++-- .../framework/rule_tree/test_rule_tagger.py | 140 +++++++++--- 6 files changed, 534 insertions(+), 126 deletions(-) delete mode 100644 tests/unit/framework/rule_tree/shared_constants.py diff --git a/tests/unit/framework/rule_tree/shared_constants.py b/tests/unit/framework/rule_tree/shared_constants.py deleted file mode 100644 index 5287f5126..000000000 --- a/tests/unit/framework/rule_tree/shared_constants.py +++ /dev/null @@ -1,11 +0,0 @@ -# pylint: disable=missing-docstring -from logprep.filter.expression.filter_expression import StringFilterExpression, Exists - -sfe_1 = StringFilterExpression(["key1"], "value1") -sfe_2 = StringFilterExpression(["key2"], "value2") -sfe_3 = StringFilterExpression(["key3"], "value3") -sfe_4 = StringFilterExpression(["key4"], "value4") -sfe_5 = StringFilterExpression(["key5", "subkey5"], "value5") - -ex_1 = Exists(["ABC.def"]) -ex_2 = Exists(["xyz"]) diff --git a/tests/unit/framework/rule_tree/test_demorgan_resolver.py b/tests/unit/framework/rule_tree/test_demorgan_resolver.py index 26e9f85be..065bb6063 100644 --- a/tests/unit/framework/rule_tree/test_demorgan_resolver.py +++ b/tests/unit/framework/rule_tree/test_demorgan_resolver.py @@ -10,13 +10,17 @@ Or, Not, CompoundFilterExpression, + StringFilterExpression, ) from logprep.framework.rule_tree.demorgan_resolver import ( DeMorganResolver, DeMorganResolverException, ) -from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4 +string_filter_expression_1 = StringFilterExpression(["key1"], "value1") +string_filter_expression_2 = StringFilterExpression(["key2"], "value2") +string_filter_expression_3 = StringFilterExpression(["key3"], "value3") +string_filter_expression_4 = StringFilterExpression(["key4"], "value4") @pytest.fixture(name="demorgan_resolver") @@ -26,42 +30,105 @@ def fixture_demorgan_resolver(): class TestDeMorganResolver: @pytest.mark.parametrize( - "expression, resolved_expr", + "expression, expected_resolved", [ - (sfe_1, sfe_1), - (Not(sfe_1), Not(sfe_1)), - (Not(And(Not(sfe_1))), Or(Not(Not(sfe_1)))), - (Not(Or(sfe_1, sfe_2)), And(Not(sfe_1), Not(sfe_2))), - (Not(And(sfe_1, sfe_2)), Or(Not(sfe_1), Not(sfe_2))), - (And(Not(Or(sfe_1, sfe_2)), sfe_3), And(And(Not(sfe_1), Not(sfe_2)), sfe_3)), - (Or(Not(Or(sfe_1, sfe_2)), sfe_3), Or(And(Not(sfe_1), Not(sfe_2)), sfe_3)), - (Not(Or(And(sfe_1, sfe_2), sfe_3)), And(Or(Not(sfe_1), Not(sfe_2)), Not(sfe_3))), - (Not(And(Or(sfe_1, sfe_2), sfe_3)), Or(And(Not(sfe_1), Not(sfe_2)), Not(sfe_3))), - (And(Not(And(sfe_1, sfe_2)), sfe_3), And(Or(Not(sfe_1), Not(sfe_2)), sfe_3)), + (string_filter_expression_1, string_filter_expression_1), + (Not(string_filter_expression_1), Not(string_filter_expression_1)), + (Not(And(Not(string_filter_expression_1))), Or(Not(Not(string_filter_expression_1)))), ( - And(Not(Or(sfe_1, sfe_2)), Not(And(sfe_3, sfe_4))), - And(And(Not(sfe_1), Not(sfe_2)), Or(Not(sfe_3), Not(sfe_4))), + Not(Or(string_filter_expression_1, string_filter_expression_2)), + And(Not(string_filter_expression_1), Not(string_filter_expression_2)), + ), + ( + Not(And(string_filter_expression_1, string_filter_expression_2)), + Or(Not(string_filter_expression_1), Not(string_filter_expression_2)), + ), + ( + And( + Not(Or(string_filter_expression_1, string_filter_expression_2)), + string_filter_expression_3, + ), + And( + And(Not(string_filter_expression_1), Not(string_filter_expression_2)), + string_filter_expression_3, + ), + ), + ( + Or( + Not(Or(string_filter_expression_1, string_filter_expression_2)), + string_filter_expression_3, + ), + Or( + And(Not(string_filter_expression_1), Not(string_filter_expression_2)), + string_filter_expression_3, + ), + ), + ( + Not( + Or( + And(string_filter_expression_1, string_filter_expression_2), + string_filter_expression_3, + ) + ), + And( + Or(Not(string_filter_expression_1), Not(string_filter_expression_2)), + Not(string_filter_expression_3), + ), + ), + ( + Not( + And( + Or(string_filter_expression_1, string_filter_expression_2), + string_filter_expression_3, + ) + ), + Or( + And(Not(string_filter_expression_1), Not(string_filter_expression_2)), + Not(string_filter_expression_3), + ), + ), + ( + And( + Not(And(string_filter_expression_1, string_filter_expression_2)), + string_filter_expression_3, + ), + And( + Or(Not(string_filter_expression_1), Not(string_filter_expression_2)), + string_filter_expression_3, + ), + ), + ( + And( + Not(Or(string_filter_expression_1, string_filter_expression_2)), + Not(And(string_filter_expression_3, string_filter_expression_4)), + ), + And( + And(Not(string_filter_expression_1), Not(string_filter_expression_2)), + Or(Not(string_filter_expression_3), Not(string_filter_expression_4)), + ), ), ], ) - def test_resolve(self, expression, resolved_expr, demorgan_resolver): - assert demorgan_resolver.resolve(expression) == resolved_expr + def test_resolve(self, expression, expected_resolved, demorgan_resolver): + assert demorgan_resolver.resolve(expression) == expected_resolved @pytest.mark.parametrize( - "expression, resolved_expr, error", + "expression, expected_resolved, error", [ - (Not(sfe_1), Not(sfe_1), None), + (Not(string_filter_expression_1), Not(string_filter_expression_1), None), ( - sfe_1, - sfe_1, + string_filter_expression_1, + string_filter_expression_1, ( DeMorganResolverException, r'Can\'t resolve expression ".*", since it\'s not of the type "NOT."', ), ), ( - Not(CompoundFilterExpression(sfe_1, sfe_2)), - Not(sfe_1), + Not( + CompoundFilterExpression(string_filter_expression_1, string_filter_expression_2) + ), + Not(string_filter_expression_1), ( DeMorganResolverException, r'Could not resolve expression ".*", ' @@ -70,9 +137,9 @@ def test_resolve(self, expression, resolved_expr, demorgan_resolver): ), ], ) - def test_resolve_not_expression(self, expression, resolved_expr, error, demorgan_resolver): + def test_resolve_not_expression(self, expression, expected_resolved, error, demorgan_resolver): if error: with pytest.raises(error[0], match=error[1]): demorgan_resolver._resolve_not_expression(expression) else: - assert demorgan_resolver._resolve_not_expression(expression) == resolved_expr + assert demorgan_resolver._resolve_not_expression(expression) == expected_resolved diff --git a/tests/unit/framework/rule_tree/test_rule_parser.py b/tests/unit/framework/rule_tree/test_rule_parser.py index d40a61e46..d42b8222c 100644 --- a/tests/unit/framework/rule_tree/test_rule_parser.py +++ b/tests/unit/framework/rule_tree/test_rule_parser.py @@ -9,10 +9,14 @@ from logprep.framework.rule_tree.rule_parser import RuleParser from logprep.processor.pre_detector.rule import PreDetectorRule -from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4, sfe_5 - pytest.importorskip("logprep.processor.pre_detector") +string_filter_expression_1 = StringFilterExpression(["key1"], "value1") +string_filter_expression_2 = StringFilterExpression(["key2"], "value2") +string_filter_expression_3 = StringFilterExpression(["key3"], "value3") +string_filter_expression_4 = StringFilterExpression(["key4"], "value4") +string_filter_expression_with_subkey = StringFilterExpression(["key5", "subkey5"], "value5") + class TestRuleParser: @pytest.mark.parametrize( @@ -548,45 +552,65 @@ def test_parse_rule_param(self, rule, priority_dict, tag_map, expected_expressio "rule_list, expected", [ ( - [[sfe_1, sfe_2, sfe_3, sfe_4]], + [ + [ + string_filter_expression_1, + string_filter_expression_2, + string_filter_expression_3, + string_filter_expression_4, + ] + ], [ [ Exists(["key1"]), - sfe_1, + string_filter_expression_1, Exists(["key2"]), - sfe_2, + string_filter_expression_2, Exists(["key3"]), - sfe_3, + string_filter_expression_3, Exists(["key4"]), - sfe_4, + string_filter_expression_4, ] ], ), ( - [[sfe_1, sfe_3, sfe_5]], + [ + [ + string_filter_expression_1, + string_filter_expression_3, + string_filter_expression_with_subkey, + ] + ], [ [ Exists(["key1"]), - sfe_1, + string_filter_expression_1, Exists(["key3"]), - sfe_3, + string_filter_expression_3, Exists(["key5", "subkey5"]), - sfe_5, + string_filter_expression_with_subkey, ] ], ), ( - [[sfe_1], [sfe_2], [sfe_3]], [ - [Exists(["key1"]), sfe_1], - [Exists(["key2"]), sfe_2], - [Exists(["key3"]), sfe_3], + [string_filter_expression_1], + [string_filter_expression_2], + [string_filter_expression_3], + ], + [ + [Exists(["key1"]), string_filter_expression_1], + [Exists(["key2"]), string_filter_expression_2], + [Exists(["key3"]), string_filter_expression_3], ], ), - ([[Not(sfe_1)]], [[Not(sfe_1)]]), + ([[Not(string_filter_expression_1)]], [[Not(string_filter_expression_1)]]), ( - [[sfe_1, Exists(["key1"])], [sfe_1]], - [[sfe_1, Exists(["key1"])], [Exists(["key1"]), sfe_1]], + [[string_filter_expression_1, Exists(["key1"])], [string_filter_expression_1]], + [ + [string_filter_expression_1, Exists(["key1"])], + [Exists(["key1"]), string_filter_expression_1], + ], ), ], ) diff --git a/tests/unit/framework/rule_tree/test_rule_segmenter.py b/tests/unit/framework/rule_tree/test_rule_segmenter.py index 9c7200a8d..d66284e1e 100644 --- a/tests/unit/framework/rule_tree/test_rule_segmenter.py +++ b/tests/unit/framework/rule_tree/test_rule_segmenter.py @@ -5,19 +5,50 @@ import pytest -from logprep.filter.expression.filter_expression import And, Or, Not +from logprep.filter.expression.filter_expression import And, Or, Not, StringFilterExpression from logprep.framework.rule_tree.rule_segmenter import RuleSegmenter, CnfToDnfConverter -from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4 + +string_filter_expression_1 = StringFilterExpression(["key1"], "value1") +string_filter_expression_2 = StringFilterExpression(["key2"], "value2") +string_filter_expression_3 = StringFilterExpression(["key3"], "value3") +string_filter_expression_4 = StringFilterExpression(["key4"], "value4") class TestRuleSegmenter: @pytest.mark.parametrize( "expression, expected", [ - (And(sfe_1, sfe_2), [sfe_1, sfe_2]), - (And(sfe_1, sfe_2, sfe_3), [sfe_1, sfe_2, sfe_3]), - (And(sfe_1, Not(sfe_2)), [sfe_1, Not(sfe_2)]), - (And(sfe_1, And(Not(sfe_2), sfe_3)), [sfe_1, Not(sfe_2), sfe_3]), + ( + And(string_filter_expression_1, string_filter_expression_2), + [string_filter_expression_1, string_filter_expression_2], + ), + ( + And( + string_filter_expression_1, + string_filter_expression_2, + string_filter_expression_3, + ), + [ + string_filter_expression_1, + string_filter_expression_2, + string_filter_expression_3, + ], + ), + ( + And(string_filter_expression_1, Not(string_filter_expression_2)), + [string_filter_expression_1, Not(string_filter_expression_2)], + ), + ( + And( + string_filter_expression_1, + And(Not(string_filter_expression_2), string_filter_expression_3), + ), + [ + string_filter_expression_1, + Not(string_filter_expression_2), + string_filter_expression_3, + ], + ), ], ) def test_parse_and_expression(self, expression, expected): @@ -26,27 +57,122 @@ def test_parse_and_expression(self, expression, expected): @pytest.mark.parametrize( "expression, expected", [ - (Or(sfe_1, sfe_2), [[sfe_1], [sfe_2]]), - (And(sfe_1, Or(sfe_2, sfe_3)), [[sfe_1, sfe_2], [sfe_1, sfe_3]]), - (And(sfe_1, Or(sfe_2, sfe_3), sfe_4), [[sfe_1, sfe_4, sfe_2], [sfe_1, sfe_4, sfe_3]]), - (Or(And(Not(sfe_1), Not(sfe_2)), sfe_3), [[Not(sfe_1), Not(sfe_2)], [sfe_3]]), ( - And(Or(Not(sfe_1), Not(sfe_2)), Not(sfe_3)), - [[Not(sfe_3), Not(sfe_1)], [Not(sfe_3), Not(sfe_2)]], + Or(string_filter_expression_1, string_filter_expression_2), + [[string_filter_expression_1], [string_filter_expression_2]], + ), + ( + And( + string_filter_expression_1, + Or(string_filter_expression_2, string_filter_expression_3), + ), + [ + [string_filter_expression_1, string_filter_expression_2], + [string_filter_expression_1, string_filter_expression_3], + ], ), ( - And(Not(sfe_1), Not(sfe_2), Or(Not(sfe_3), Not(sfe_4))), - [[Not(sfe_1), Not(sfe_2), Not(sfe_3)], [Not(sfe_1), Not(sfe_2), Not(sfe_4)]], + And( + string_filter_expression_1, + Or(string_filter_expression_2, string_filter_expression_3), + string_filter_expression_4, + ), + [ + [ + string_filter_expression_1, + string_filter_expression_4, + string_filter_expression_2, + ], + [ + string_filter_expression_1, + string_filter_expression_4, + string_filter_expression_3, + ], + ], ), ( - And(And(Not(sfe_1), Not(sfe_2)), Or(Not(sfe_3), Not(sfe_4))), - [[Not(sfe_2), Not(sfe_1), Not(sfe_3)], [Not(sfe_2), Not(sfe_1), Not(sfe_4)]], + Or( + And(Not(string_filter_expression_1), Not(string_filter_expression_2)), + string_filter_expression_3, + ), + [ + [Not(string_filter_expression_1), Not(string_filter_expression_2)], + [string_filter_expression_3], + ], ), ( - And(Or(sfe_1, sfe_2), Or(sfe_3, sfe_4)), - [[sfe_1, sfe_3], [sfe_1, sfe_4], [sfe_2, sfe_3], [sfe_2, sfe_4]], + And( + Or(Not(string_filter_expression_1), Not(string_filter_expression_2)), + Not(string_filter_expression_3), + ), + [ + [Not(string_filter_expression_3), Not(string_filter_expression_1)], + [Not(string_filter_expression_3), Not(string_filter_expression_2)], + ], + ), + ( + And( + Not(string_filter_expression_1), + Not(string_filter_expression_2), + Or(Not(string_filter_expression_3), Not(string_filter_expression_4)), + ), + [ + [ + Not(string_filter_expression_1), + Not(string_filter_expression_2), + Not(string_filter_expression_3), + ], + [ + Not(string_filter_expression_1), + Not(string_filter_expression_2), + Not(string_filter_expression_4), + ], + ], + ), + ( + And( + And(Not(string_filter_expression_1), Not(string_filter_expression_2)), + Or(Not(string_filter_expression_3), Not(string_filter_expression_4)), + ), + [ + [ + Not(string_filter_expression_2), + Not(string_filter_expression_1), + Not(string_filter_expression_3), + ], + [ + Not(string_filter_expression_2), + Not(string_filter_expression_1), + Not(string_filter_expression_4), + ], + ], + ), + ( + And( + Or(string_filter_expression_1, string_filter_expression_2), + Or(string_filter_expression_3, string_filter_expression_4), + ), + [ + [string_filter_expression_1, string_filter_expression_3], + [string_filter_expression_1, string_filter_expression_4], + [string_filter_expression_2, string_filter_expression_3], + [string_filter_expression_2, string_filter_expression_4], + ], + ), + ( + Or( + And( + string_filter_expression_1, + Or(string_filter_expression_2, string_filter_expression_3), + ), + string_filter_expression_4, + ), + [ + [string_filter_expression_1, string_filter_expression_2], + [string_filter_expression_1, string_filter_expression_3], + [string_filter_expression_4], + ], ), - (Or(And(sfe_1, Or(sfe_2, sfe_3)), sfe_4), [[sfe_1, sfe_2], [sfe_1, sfe_3], [sfe_4]]), ], ) def test_parse_or_expression(self, expression, expected): @@ -55,13 +181,13 @@ def test_parse_or_expression(self, expression, expected): @pytest.mark.parametrize( "expression, expected", [ - (And(sfe_1, sfe_2), False), - (Or(sfe_1, sfe_2), True), - (Not(sfe_1), False), - (Not(And(sfe_1, sfe_2)), False), - (Not(Or(sfe_1, sfe_2)), True), - (And(Not(Or(sfe_1, sfe_2))), True), - (And(Not(And(sfe_1, sfe_2))), False), + (And(string_filter_expression_1, string_filter_expression_2), False), + (Or(string_filter_expression_1, string_filter_expression_2), True), + (Not(string_filter_expression_1), False), + (Not(And(string_filter_expression_1, string_filter_expression_2)), False), + (Not(Or(string_filter_expression_1, string_filter_expression_2)), True), + (And(Not(Or(string_filter_expression_1, string_filter_expression_2))), True), + (And(Not(And(string_filter_expression_1, string_filter_expression_2))), False), ], ) def test_has_or_expression(self, expression, expected): @@ -70,10 +196,38 @@ def test_has_or_expression(self, expression, expected): @pytest.mark.parametrize( "expression_cnf, expected_dnf", [ - ([sfe_1, [[sfe_2]]], [[sfe_1, sfe_2]]), - ([[[sfe_1]], [[sfe_2]]], [[sfe_1, sfe_2]]), - ([sfe_1, [[sfe_2]], sfe_3], [[sfe_1, sfe_3, sfe_2]]), - ([sfe_1, [[sfe_2], [sfe_3]]], [[sfe_1, sfe_2], [sfe_1, sfe_3]]), + ( + [string_filter_expression_1, [[string_filter_expression_2]]], + [[string_filter_expression_1, string_filter_expression_2]], + ), + ( + [[[string_filter_expression_1]], [[string_filter_expression_2]]], + [[string_filter_expression_1, string_filter_expression_2]], + ), + ( + [ + string_filter_expression_1, + [[string_filter_expression_2]], + string_filter_expression_3, + ], + [ + [ + string_filter_expression_1, + string_filter_expression_3, + string_filter_expression_2, + ] + ], + ), + ( + [ + string_filter_expression_1, + [[string_filter_expression_2], [string_filter_expression_3]], + ], + [ + [string_filter_expression_1, string_filter_expression_2], + [string_filter_expression_1, string_filter_expression_3], + ], + ), ], ) def test_convert_cnf_to_dnf(self, expression_cnf, expected_dnf): diff --git a/tests/unit/framework/rule_tree/test_rule_sorter.py b/tests/unit/framework/rule_tree/test_rule_sorter.py index 499414fa6..cfcbff2a4 100644 --- a/tests/unit/framework/rule_tree/test_rule_sorter.py +++ b/tests/unit/framework/rule_tree/test_rule_sorter.py @@ -5,27 +5,116 @@ import pytest -from logprep.filter.expression.filter_expression import Not, Always, CompoundFilterExpression +from logprep.filter.expression.filter_expression import ( + Not, + Always, + CompoundFilterExpression, + StringFilterExpression, + Exists, +) from logprep.framework.rule_tree.rule_sorter import RuleSorter, RuleSorterException -from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4, ex_1, ex_2 pytest.importorskip("logprep.processor.pre_detector") +string_filter_expression_1 = StringFilterExpression(["key1"], "value1") +string_filter_expression_2 = StringFilterExpression(["key2"], "value2") +string_filter_expression_3 = StringFilterExpression(["key3"], "value3") +string_filter_expression_4 = StringFilterExpression(["key4"], "value4") + +exists_expression_1 = Exists(["ABC.def"]) +exists_expression_2 = Exists(["xyz"]) + class TestRuleSorter: @pytest.mark.parametrize( "rule_list, priority_dict, expected", [ - ([[sfe_1, sfe_4, sfe_3, sfe_2]], {}, [[sfe_1, sfe_2, sfe_3, sfe_4]]), - ([[sfe_1, sfe_4, sfe_3, sfe_2]], {"key2": "1"}, [[sfe_2, sfe_1, sfe_3, sfe_4]]), - ([[sfe_1, sfe_3, ex_1, sfe_2, ex_2]], {}, [[ex_1, sfe_1, sfe_2, sfe_3, ex_2]]), ( - [[sfe_1, sfe_3, ex_1, sfe_2, ex_2]], + [ + [ + string_filter_expression_1, + string_filter_expression_4, + string_filter_expression_3, + string_filter_expression_2, + ] + ], + {}, + [ + [ + string_filter_expression_1, + string_filter_expression_2, + string_filter_expression_3, + string_filter_expression_4, + ] + ], + ), + ( + [ + [ + string_filter_expression_1, + string_filter_expression_4, + string_filter_expression_3, + string_filter_expression_2, + ] + ], + {"key2": "1"}, + [ + [ + string_filter_expression_2, + string_filter_expression_1, + string_filter_expression_3, + string_filter_expression_4, + ] + ], + ), + ( + [ + [ + string_filter_expression_1, + string_filter_expression_3, + exists_expression_1, + string_filter_expression_2, + exists_expression_2, + ] + ], + {}, + [ + [ + exists_expression_1, + string_filter_expression_1, + string_filter_expression_2, + string_filter_expression_3, + exists_expression_2, + ] + ], + ), + ( + [ + [ + string_filter_expression_1, + string_filter_expression_3, + exists_expression_1, + string_filter_expression_2, + exists_expression_2, + ] + ], {"xyz": "1"}, - [[ex_2, ex_1, sfe_1, sfe_2, sfe_3]], + [ + [ + exists_expression_2, + exists_expression_1, + string_filter_expression_1, + string_filter_expression_2, + string_filter_expression_3, + ] + ], + ), + ( + [[string_filter_expression_2, Not(string_filter_expression_1)]], + {"key1": "1"}, + [[Not(string_filter_expression_1), string_filter_expression_2]], ), - ([[sfe_2, Not(sfe_1)]], {"key1": "1"}, [[Not(sfe_1), sfe_2]]), ], ) def test_sort_rule_segments(self, rule_list, priority_dict, expected): @@ -37,17 +126,20 @@ def test_sort_rule_segments(self, rule_list, priority_dict, expected): [ (Always("foo"), None), (Not(Always("foo")), None), - (sfe_1, str(sfe_1)), - (Not(sfe_1), str(sfe_1)), - (Not(Not(sfe_1)), str(sfe_1)), - (ex_1, str(ex_1)), - (Not(ex_1), str(ex_1)), + (string_filter_expression_1, str(string_filter_expression_1)), + (Not(string_filter_expression_1), str(string_filter_expression_1)), + (Not(Not(string_filter_expression_1)), str(string_filter_expression_1)), + (exists_expression_1, str(exists_expression_1)), + (Not(exists_expression_1), str(exists_expression_1)), ], ) def test_get_sorting_key_succeeds(self, expression, expected): assert RuleSorter._get_sorting_key(expression, {}) == expected - @pytest.mark.parametrize("expression", [CompoundFilterExpression(sfe_1, sfe_2), "foo"]) + @pytest.mark.parametrize( + "expression", + [CompoundFilterExpression(string_filter_expression_1, string_filter_expression_2), "foo"], + ) def test_get_sorting_key_raises_exception(self, expression): with pytest.raises(RuleSorterException, match=f'Could not sort "{str(expression)}"'): RuleSorter._get_sorting_key(expression, {}) diff --git a/tests/unit/framework/rule_tree/test_rule_tagger.py b/tests/unit/framework/rule_tree/test_rule_tagger.py index 08617fa7a..a7e4f512e 100644 --- a/tests/unit/framework/rule_tree/test_rule_tagger.py +++ b/tests/unit/framework/rule_tree/test_rule_tagger.py @@ -7,68 +7,150 @@ from logprep.filter.expression.filter_expression import StringFilterExpression, Not, Exists from logprep.framework.rule_tree.rule_tagger import RuleTagger -from tests.unit.framework.rule_tree.shared_constants import sfe_1, sfe_2, sfe_3, sfe_4, ex_2 pytest.importorskip("logprep.processor.pre_detector") +string_filter_expression_1 = StringFilterExpression(["key1"], "value1") +string_filter_expression_2 = StringFilterExpression(["key2"], "value2") +string_filter_expression_3 = StringFilterExpression(["key3"], "value3") +string_filter_expression_4 = StringFilterExpression(["key4"], "value4") + +exists_expression = Exists(["xyz"]) + + class TestRuleTagger: @pytest.mark.parametrize( "rule_list, tag_map, expected", [ - ([[sfe_1, sfe_2]], {"key2": "TAG"}, [[Exists(["TAG"]), sfe_1, sfe_2]]), ( - [[sfe_1, sfe_2], [sfe_1, sfe_3]], + [[string_filter_expression_1, string_filter_expression_2]], + {"key2": "TAG"}, + [[Exists(["TAG"]), string_filter_expression_1, string_filter_expression_2]], + ), + ( + [ + [string_filter_expression_1, string_filter_expression_2], + [string_filter_expression_1, string_filter_expression_3], + ], {"key2": "TAG2", "key3": "TAG3"}, - [[Exists(["TAG2"]), sfe_1, sfe_2], [Exists(["TAG3"]), sfe_1, sfe_3]], + [ + [Exists(["TAG2"]), string_filter_expression_1, string_filter_expression_2], + [Exists(["TAG3"]), string_filter_expression_1, string_filter_expression_3], + ], ), ( - [[sfe_1, sfe_4, sfe_2], [sfe_2, sfe_3], [sfe_2], [sfe_4, sfe_3]], + [ + [ + string_filter_expression_1, + string_filter_expression_4, + string_filter_expression_2, + ], + [string_filter_expression_2, string_filter_expression_3], + [string_filter_expression_2], + [string_filter_expression_4, string_filter_expression_3], + ], {"key1": "TAG1", "key2": "TAG2"}, [ - [Exists(["TAG2"]), Exists(["TAG1"]), sfe_1, sfe_4, sfe_2], - [Exists(["TAG2"]), sfe_2, sfe_3], - [Exists(["TAG2"]), sfe_2], - [sfe_4, sfe_3], + [ + Exists(["TAG2"]), + Exists(["TAG1"]), + string_filter_expression_1, + string_filter_expression_4, + string_filter_expression_2, + ], + [Exists(["TAG2"]), string_filter_expression_2, string_filter_expression_3], + [Exists(["TAG2"]), string_filter_expression_2], + [string_filter_expression_4, string_filter_expression_3], ], ), ( - [[sfe_1, sfe_3], [sfe_2, sfe_4]], + [ + [string_filter_expression_1, string_filter_expression_3], + [string_filter_expression_2, string_filter_expression_4], + ], {"key1": "TAG1", "key2": "TAG2.SUBTAG2"}, [ - [Exists(["TAG1"]), sfe_1, sfe_3], - [Exists(["TAG2", "SUBTAG2"]), sfe_2, sfe_4], + [Exists(["TAG1"]), string_filter_expression_1, string_filter_expression_3], + [ + Exists(["TAG2", "SUBTAG2"]), + string_filter_expression_2, + string_filter_expression_4, + ], ], ), ( - [[sfe_1, sfe_3], [sfe_2, sfe_4]], + [ + [string_filter_expression_1, string_filter_expression_3], + [string_filter_expression_2, string_filter_expression_4], + ], {"key1": "TAG1:Value1", "key2": "TAG2.SUBTAG2"}, [ - [StringFilterExpression(["TAG1"], "Value1"), sfe_1, sfe_3], - [Exists(["TAG2", "SUBTAG2"]), sfe_2, sfe_4], + [ + StringFilterExpression(["TAG1"], "Value1"), + string_filter_expression_1, + string_filter_expression_3, + ], + [ + Exists(["TAG2", "SUBTAG2"]), + string_filter_expression_2, + string_filter_expression_4, + ], ], ), ( - [[sfe_1, sfe_3], [sfe_2, sfe_4]], + [ + [string_filter_expression_1, string_filter_expression_3], + [string_filter_expression_2, string_filter_expression_4], + ], {"key1": "TAG1.SUBTAG1:Value1", "key2": "TAG2.SUBTAG2"}, [ - [StringFilterExpression(["TAG1", "SUBTAG1"], "Value1"), sfe_1, sfe_3], - [Exists(["TAG2", "SUBTAG2"]), sfe_2, sfe_4], + [ + StringFilterExpression(["TAG1", "SUBTAG1"], "Value1"), + string_filter_expression_1, + string_filter_expression_3, + ], + [ + Exists(["TAG2", "SUBTAG2"]), + string_filter_expression_2, + string_filter_expression_4, + ], ], ), ( - [[sfe_1, ex_2]], + [[string_filter_expression_1, exists_expression]], {"xyz": "TAG:VALUE"}, - [[StringFilterExpression(["TAG"], "VALUE"), sfe_1, ex_2]], + [ + [ + StringFilterExpression(["TAG"], "VALUE"), + string_filter_expression_1, + exists_expression, + ] + ], ), - ([[Not(sfe_1)]], {"key1": "TAG"}, [[Exists(["TAG"]), Not(sfe_1)]]), - ([[sfe_1]], {"key1": "key1:value1"}, [[sfe_1]]), ( - [[sfe_1, sfe_2, ex_2]], + [[Not(string_filter_expression_1)]], + {"key1": "TAG"}, + [[Exists(["TAG"]), Not(string_filter_expression_1)]], + ), + ( + [[string_filter_expression_1]], + {"key1": "key1:value1"}, + [[string_filter_expression_1]], + ), + ( + [[string_filter_expression_1, string_filter_expression_2, exists_expression]], {"key1": "TAG1", "key2": "xyz"}, - [[Exists(["TAG1"]), sfe_1, sfe_2, ex_2]], + [ + [ + Exists(["TAG1"]), + string_filter_expression_1, + string_filter_expression_2, + exists_expression, + ] + ], ), - ([[sfe_1]], {}, [[sfe_1]]), + ([[string_filter_expression_1]], {}, [[string_filter_expression_1]]), ], ) def test_add_special_tags(self, rule_list, tag_map, expected): @@ -79,10 +161,10 @@ def test_add_special_tags(self, rule_list, tag_map, expected): @pytest.mark.parametrize( "expression, tag, tag_map, expected", [ - (ex_2, "xyz", {"key1": "xyz"}, True), - (ex_2, "foo", {"key1": "foo"}, False), - (sfe_1, "key1:value1", {"key1": "key1:value1"}, True), - (sfe_1, "foo:bar", {"key1": "foo:bar"}, False), + (exists_expression, "xyz", {"key1": "xyz"}, True), + (exists_expression, "foo", {"key1": "foo"}, False), + (string_filter_expression_1, "key1:value1", {"key1": "key1:value1"}, True), + (string_filter_expression_1, "foo:bar", {"key1": "foo:bar"}, False), ], ) def test_tag_exists(self, expression, tag, tag_map, expected): From 38f55db7d6e9bce9787e299ea16149011af9b80b Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 21 Jul 2023 09:21:55 +0200 Subject: [PATCH 10/13] Make Node not accept strings as expression --- logprep/framework/rule_tree/node.py | 6 +++--- logprep/framework/rule_tree/rule_tree.py | 2 +- tests/unit/framework/rule_tree/test_rule_tree.py | 8 +------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/logprep/framework/rule_tree/node.py b/logprep/framework/rule_tree/node.py index 75267ec41..e24f9601c 100644 --- a/logprep/framework/rule_tree/node.py +++ b/logprep/framework/rule_tree/node.py @@ -1,6 +1,6 @@ """This module implements the tree node functionality for the tree model.""" -from typing import Optional, List, Union +from typing import Optional, List from logprep.filter.expression.filter_expression import FilterExpression from logprep.filter.expression.filter_expression import KeyDoesNotExistError @@ -11,11 +11,11 @@ class Node: __slots__ = ("_expression", "_children", "matching_rules") - _expression: Union[FilterExpression, str] + _expression: FilterExpression _children: list matching_rules: list - def __init__(self, expression: Optional[Union[FilterExpression, str]]): + def __init__(self, expression: Optional[FilterExpression]): """Node initialization function. Initializes a new node with a given expression and empty lists of children and matching diff --git a/logprep/framework/rule_tree/rule_tree.py b/logprep/framework/rule_tree/rule_tree.py index b0f5971d1..f31172e58 100644 --- a/logprep/framework/rule_tree/rule_tree.py +++ b/logprep/framework/rule_tree/rule_tree.py @@ -87,7 +87,7 @@ def __init__(self, root: Node = None, config_path: str = None, metric_labels: di if root: self._root = root else: - self._root = Node("root") + self._root = Node(None) def _setup(self): """Basic setup of rule tree. diff --git a/tests/unit/framework/rule_tree/test_rule_tree.py b/tests/unit/framework/rule_tree/test_rule_tree.py index 522913c85..b25312ded 100644 --- a/tests/unit/framework/rule_tree/test_rule_tree.py +++ b/tests/unit/framework/rule_tree/test_rule_tree.py @@ -36,13 +36,7 @@ def test_init_without_specifying_parameters(self): assert isinstance(rule_tree.root, Node) assert not rule_tree.rule_parser._rule_tagger._tag_map assert not rule_tree.priority_dict - assert rule_tree.root.expression == "root" - - def test_init_with_specifying_root_node(self): - rule_tree = RuleTree(Node("foo")) - - assert isinstance(rule_tree.root, Node) - assert rule_tree.root.expression == "foo" + assert rule_tree.root.expression is None def test_init_with_specifying_config(self): rule_tree = RuleTree(config_path="tests/testdata/unit/tree_config.json") From 3517d63e5c40bda587fb5e4f6f2e65ed9890ed3f Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 21 Jul 2023 11:28:22 +0200 Subject: [PATCH 11/13] Add children attribute to FilterExpression and replace child in Not expression --- .../filter/expression/filter_expression.py | 32 +++++++++++++++---- .../framework/rule_tree/demorgan_resolver.py | 4 +-- logprep/framework/rule_tree/rule_segmenter.py | 2 +- logprep/framework/rule_tree/rule_sorter.py | 12 +++---- logprep/framework/rule_tree/rule_tagger.py | 2 +- 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/logprep/filter/expression/filter_expression.py b/logprep/filter/expression/filter_expression.py index 60fd134f1..2b1a388fa 100644 --- a/logprep/filter/expression/filter_expression.py +++ b/logprep/filter/expression/filter_expression.py @@ -3,7 +3,7 @@ import re from abc import ABC, abstractmethod from itertools import chain, zip_longest -from typing import List, Any +from typing import List, Any, Tuple class FilterExpressionError(BaseException): @@ -17,6 +17,25 @@ class KeyDoesNotExistError(FilterExpressionError): class FilterExpression(ABC): """Base class for all filter expression used for matching rules.""" + __slots__ = ["children"] + + children: Tuple["FilterExpression"] + + def __init__(self, *children: "FilterExpression"): + """Initializes children for filter expression. + + Filter expression can contain multiple child filter expression, + i.e. a 'Not' expression could contain a child that gets negated, + or an 'And' expression could contain multiple children that must all match. + + Parameters + ---------- + children : FilterExpression + Child expression of this expression. + + """ + self.children = children + def matches(self, document: dict) -> bool: """Receives a document and returns True if it is matched by the expression. @@ -87,6 +106,7 @@ class Always(FilterExpression): """Filter expression that can be set to match always or never.""" def __init__(self, value: Any): + super().__init__() self._value = value def __repr__(self): @@ -102,21 +122,18 @@ class Not(FilterExpression): """Filter expression that negates a match.""" def __init__(self, expression: FilterExpression): - self.child = expression + super().__init__(expression) def __repr__(self) -> str: - return f"NOT ({repr(self.child)})" + return f"NOT ({repr(self.children[0])})" def does_match(self, document: dict) -> bool: - return not self.child.matches(document) + return not self.children[0].matches(document) class CompoundFilterExpression(FilterExpression): """Base class of filter expressions that combine other filter expressions.""" - def __init__(self, *args: FilterExpression): - self.children = args - def does_match(self, document: dict): raise NotImplementedError @@ -145,6 +162,7 @@ class KeyBasedFilterExpression(FilterExpression): """Base class of filter expressions that match a certain value on a given key.""" def __init__(self, key: List[str]): + super().__init__() self.key = key self._key_as_dotted_string = ".".join([str(i) for i in self.key]) diff --git a/logprep/framework/rule_tree/demorgan_resolver.py b/logprep/framework/rule_tree/demorgan_resolver.py index e71d9d455..62c760426 100644 --- a/logprep/framework/rule_tree/demorgan_resolver.py +++ b/logprep/framework/rule_tree/demorgan_resolver.py @@ -46,10 +46,10 @@ def _resolve_not_expression(self, not_expression: Not) -> FilterExpression: f'Can\'t resolve expression "{not_expression}", since it\'s not of the type "NOT."' ) - if not isinstance(not_expression.child, CompoundFilterExpression): + if not isinstance(not_expression.children[0], CompoundFilterExpression): return not_expression - compound_expression = not_expression.child + compound_expression = not_expression.children[0] negated_children = (Not(expression) for expression in compound_expression.children) if isinstance(compound_expression, Or): diff --git a/logprep/framework/rule_tree/rule_segmenter.py b/logprep/framework/rule_tree/rule_segmenter.py index 5bd8a8910..04bd50d47 100644 --- a/logprep/framework/rule_tree/rule_segmenter.py +++ b/logprep/framework/rule_tree/rule_segmenter.py @@ -54,7 +54,7 @@ def _has_disjunction(expression: FilterExpression) -> bool: if RuleSegmenter._has_disjunction(exp): return True if isinstance(expression, Not): - return RuleSegmenter._has_disjunction(expression.child) + return RuleSegmenter._has_disjunction(expression.children[0]) return False diff --git a/logprep/framework/rule_tree/rule_sorter.py b/logprep/framework/rule_tree/rule_sorter.py index 993788f4f..06e3d8479 100644 --- a/logprep/framework/rule_tree/rule_sorter.py +++ b/logprep/framework/rule_tree/rule_sorter.py @@ -81,12 +81,12 @@ def _get_sorting_key( @staticmethod def _sort_not_expression(expression, priority_dict): try: - if isinstance(expression.child, Not): - if isinstance(expression.child.child, KeyBasedFilterExpression): - return priority_dict[expression.child.child.key[0]] + if isinstance(expression.children[0], Not): + if isinstance(expression.children[0].children[0], KeyBasedFilterExpression): + return priority_dict[expression.children[0].children[0].key[0]] - if isinstance(expression.child, KeyBasedFilterExpression): - return priority_dict[expression.child.key_as_dotted_string] + if isinstance(expression.children[0], KeyBasedFilterExpression): + return priority_dict[expression.children[0].key_as_dotted_string] except KeyError: pass - return RuleSorter._get_sorting_key(expression.child, priority_dict) + return RuleSorter._get_sorting_key(expression.children[0], priority_dict) diff --git a/logprep/framework/rule_tree/rule_tagger.py b/logprep/framework/rule_tree/rule_tagger.py index e7622ded8..c71ec959e 100644 --- a/logprep/framework/rule_tree/rule_tagger.py +++ b/logprep/framework/rule_tree/rule_tagger.py @@ -59,7 +59,7 @@ def add(self, list_of_rule_expressions: List[List[Union[Exists, StringFilterExpr def _add_tags_to_rule_expressions(self, rule_expressions): """Iterate through all expressions and handle different cases""" for expression in rule_expressions.copy(): - next_expression = expression.child if isinstance(expression, Not) else expression + next_expression = expression.children[0] if isinstance(expression, Not) else expression if self._expression_in_tag_map(next_expression): if Exists([self._tag_map[next_expression.key[0]]]) not in rule_expressions: self._add_tag(rule_expressions, self._tag_map[next_expression.key[0]]) From c2140cb2d16f799cb64c0c650fb7c891280c1488 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Tue, 25 Jul 2023 08:52:17 +0200 Subject: [PATCH 12/13] Expand docstrings for rule tree --- logprep/framework/rule_tree/rule_parser.py | 30 +++++++++++++++++-- logprep/framework/rule_tree/rule_segmenter.py | 29 ++++++++++++++++-- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/logprep/framework/rule_tree/rule_parser.py b/logprep/framework/rule_tree/rule_parser.py index 25bd07768..8645830ad 100644 --- a/logprep/framework/rule_tree/rule_parser.py +++ b/logprep/framework/rule_tree/rule_parser.py @@ -52,9 +52,33 @@ def parse_rule(self, rule: "Rule", priority_dict: dict) -> list: """Main parsing function to parse rule into list of less complex rules. This function aims to parse a rule into a list of less complex rules that shows the same - decision behavior when matching events. The parsing process includes resolving NOT- and - OR-expressions, sorting the expression segments of a rule as well as adding EXISTS-filter - and special tags to the parsed rule. + decision behavior when matching events. + + First, Not expressions are resolved by applying De Morgan's law on the rule's expression. + Example: `not((A and not B) or not C)` becomes `(not A or B) and C`. + + Then the expression is transformed into a list representing the disjunctive normal form + (DNF). This representation is required to build the rule tree. + Example: `(not A or B) and C` becomes `[[not A, C], [B, C]]`, + which is equivalent to the DNF `(not A and C) or (B and C)`. + + The segments are then sorted using a priority dict to achieve a better performance for + rule matching. + + Afterwards, Exists filter expressions are added for every segment that is not an + Exists, Not or Always expression. + Those are then being checked first in the tree. + Exists expressions are cheap and can lead to an optimization of the rule matching. + + Finally, tags may be added to more efficiently check the existence of configured fields. + This is configured via a tag map, by specifying target fields and tags. + Those tags are added as Exists filters in front of the rule to be checked first if the + target field exists. + Example: The tag map `{"some.key": "some_tag"}` would add an Exists filter + `Exists("some_tag")` in front of the rules with filters `some.key: foo OR key_x` and + `key_y AND some.key: bar`, but not the rule with filter `key_z: foo`, since it does not + have the field `some.key` + Parameters ---------- diff --git a/logprep/framework/rule_tree/rule_segmenter.py b/logprep/framework/rule_tree/rule_segmenter.py index 04bd50d47..09555c0ad 100644 --- a/logprep/framework/rule_tree/rule_segmenter.py +++ b/logprep/framework/rule_tree/rule_segmenter.py @@ -16,10 +16,35 @@ class RuleSegmenterException(Exception): class RuleSegmenter: - """Segments filter expression into list of less complex expressions.""" + """Segments filter expression into list of less complex expressions. + + The segmenter gets a (compound) FilterExpression as input, + which must have been already resolved via De Morgan's law. + That means all NOT-expressions must have been resolved. + The expression is then changed into the disjunctive normal form (DNF), + which is required for the rule tree. + However, the output is not another FilterExpression, but a list of lists with rule segments + (FilterExpressions that are not compound), which represent the FilterExpression in DNF. + The outer list representing OR and the inner lists representing AND. + + This representation then allows to easily sort the rule segments, + add tags and build a tree out of them. + + Example: + Assume the following CompoundFilterExpression X as input: + + `X := (A and (B or C)) or D` + + Furthermore, assume that A, B and C are StringFilterExpressions. + + Calling segment_into_dnf(X) would result in the list `[[A, B], [A, C], [D]]`. + This is equivalent to the FilterExpression `(A and B) or (A and C) or (D)`, + which is the DNF of X. + + """ @staticmethod - def segment_into_dnf(expression) -> list: + def segment_into_dnf(expression: FilterExpression) -> list: """Segment expression into list of less complex expressions.""" if RuleSegmenter._has_disjunction(expression): rule_segments = RuleSegmenter._segment_expression(expression) From 8190857f55b39c50872d921f3e5ea8821b266947 Mon Sep 17 00:00:00 2001 From: Piotr Pauksztelo Date: Fri, 25 Aug 2023 14:24:19 +0200 Subject: [PATCH 13/13] Remove __slots__ from children in FilterExpression to fix matching --- .../filter/expression/filter_expression.py | 6 +---- .../rules/generic/pre_detect_four.yml | 11 ++++++++ .../pre_detector/test_pre_detector.py | 27 +++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml diff --git a/logprep/filter/expression/filter_expression.py b/logprep/filter/expression/filter_expression.py index 2b1a388fa..516219605 100644 --- a/logprep/filter/expression/filter_expression.py +++ b/logprep/filter/expression/filter_expression.py @@ -3,7 +3,7 @@ import re from abc import ABC, abstractmethod from itertools import chain, zip_longest -from typing import List, Any, Tuple +from typing import List, Any class FilterExpressionError(BaseException): @@ -17,10 +17,6 @@ class KeyDoesNotExistError(FilterExpressionError): class FilterExpression(ABC): """Base class for all filter expression used for matching rules.""" - __slots__ = ["children"] - - children: Tuple["FilterExpression"] - def __init__(self, *children: "FilterExpression"): """Initializes children for filter expression. diff --git a/tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml b/tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml new file mode 100644 index 000000000..3c7b75179 --- /dev/null +++ b/tests/testdata/unit/pre_detector/rules/generic/pre_detect_four.yml @@ -0,0 +1,11 @@ +filter: 'A: "*bar*" AND NOT (A: "foo*" AND A: "*baz")' +pre_detector: + id: RULE_FOUR_ID + title: RULE_FOUR + severity: critical + mitre: + - attack.test1 + - attack.test2 + case_condition: directly +sigma_fields: true +description: Test rule four diff --git a/tests/unit/processor/pre_detector/test_pre_detector.py b/tests/unit/processor/pre_detector/test_pre_detector.py index 73e68b2e4..823a684ae 100644 --- a/tests/unit/processor/pre_detector/test_pre_detector.py +++ b/tests/unit/processor/pre_detector/test_pre_detector.py @@ -40,6 +40,33 @@ def test_perform_successful_pre_detection(self): document, expected, detection_results, expected_detection_results ) + def test_perform_pre_detection_that_fails_if_filter_children_were_slots(self): + assert self.object.metrics.number_of_processed_events == 0 + document = {"A": "foo X bar Y"} + expected = deepcopy(document) + expected_detection_results = ( + [ + { + "case_condition": "directly", + "description": "Test rule four", + "id": "RULE_FOUR_ID", + "mitre": ["attack.test1", "attack.test2"], + "rule_filter": '(A:"*bar*" AND NOT ((A:"foo*" AND A:"*baz")))', + "severity": "critical", + "title": "RULE_FOUR", + } + ], + ({"kafka": "pre_detector_alerts"},), + ) + detection_results = self.object.process(document) + self._assert_equality_of_results( + document, expected, detection_results, expected_detection_results + ) + + document = {"A": "foo X bar Y baz"} + detection_results = self.object.process(document) + assert detection_results is None + def test_perform_successful_pre_detection_with_host_name(self): assert self.object.metrics.number_of_processed_events == 0 document = {