From e1466b17606320437eed3a1aace909d455dfac5d Mon Sep 17 00:00:00 2001 From: Robin Linacre Date: Sat, 5 Oct 2024 10:21:17 +0100 Subject: [PATCH 1/3] add docstring to custom_rule --- splink/internals/blocking_rule_library.py | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/splink/internals/blocking_rule_library.py b/splink/internals/blocking_rule_library.py index 8123e8844..74f788b8d 100644 --- a/splink/internals/blocking_rule_library.py +++ b/splink/internals/blocking_rule_library.py @@ -45,6 +45,43 @@ def __init__( salting_partitions: int | None = None, arrays_to_explode: list[str] | None = None, ): + """ + Represents a custom blocking rule using a user-defined SQL condition. To + refer to the left hand side and the right hand side of the pairwise + record comparison, use `l` and `r` respectively, e.g. + `l.first_name = r.first_name and len(l.first_name) <2`. + + Args: + blocking_rule (str): A SQL condition string representing the custom + blocking rule. + sql_dialect (str, optional): The SQL dialect of the provided blocking rule. + If specified, Splink will attempt to translate the rule to the + appropriate dialect. + salting_partitions (int, optional): The number of partitions to use for + salting. If provided, enables salting for this blocking rule. + arrays_to_explode (list[str], optional): A list of array column names + to explode before applying the blocking rule. + + Examples: + ```python + from splink.internals.blocking_rule_library import CustomRule + + # Simple custom rule + rule_1 = CustomRule("l.postcode = r.postcode") + + # Custom rule with dialect translation + rule_2 = CustomRule( + "SUBSTR(l.surname, 1, 3) = SUBSTR(r.surname, 1, 3)", + sql_dialect="sqlite" + ) + + # Custom rule with salting + rule_3 = CustomRule( + "l.city = r.city", + salting_partitions=10 + ) + ``` + """ super().__init__( salting_partitions=salting_partitions, arrays_to_explode=arrays_to_explode ) From 4b1c47539f7cc42d9982dea4c992ae116e809063 Mon Sep 17 00:00:00 2001 From: Robin Linacre Date: Sat, 5 Oct 2024 10:28:07 +0100 Subject: [PATCH 2/3] document --- docs/api_docs/blocking.md | 7 +++++-- splink/blocking_rule_library.py | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 splink/blocking_rule_library.py diff --git a/docs/api_docs/blocking.md b/docs/api_docs/blocking.md index c047369b9..0b81046b5 100644 --- a/docs/api_docs/blocking.md +++ b/docs/api_docs/blocking.md @@ -3,14 +3,17 @@ tags: - API - blocking --- -# Documentation for`block_on` +# Documentation for the `blocking_rule_library` -::: splink.block_on +::: splink.blocking_rule_library handler: python options: show_root_heading: false show_root_toc: false show_source: false + members_order: source + inherited_members: false + merge_init_into_class: true diff --git a/splink/blocking_rule_library.py b/splink/blocking_rule_library.py new file mode 100644 index 000000000..88af56a10 --- /dev/null +++ b/splink/blocking_rule_library.py @@ -0,0 +1,13 @@ +from splink.internals.blocking_rule_library import ( + And, + CustomRule, + Not, + block_on, +) + +__all__ = [ + "CustomRule", + "And", + "Not", + "block_on", +] From ab5c1c365994dc07fb10614afecf4519970bae52 Mon Sep 17 00:00:00 2001 From: Robin Linacre Date: Sat, 5 Oct 2024 10:28:51 +0100 Subject: [PATCH 3/3] fix example --- splink/internals/blocking_rule_library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/splink/internals/blocking_rule_library.py b/splink/internals/blocking_rule_library.py index 74f788b8d..1b518b9f6 100644 --- a/splink/internals/blocking_rule_library.py +++ b/splink/internals/blocking_rule_library.py @@ -64,7 +64,7 @@ def __init__( Examples: ```python - from splink.internals.blocking_rule_library import CustomRule + from splink.blocking_rule_library import CustomRule # Simple custom rule rule_1 = CustomRule("l.postcode = r.postcode")