Skip to content

Commit

Permalink
improve docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinL committed Sep 16, 2024
1 parent a3da73b commit 2d24505
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 19 deletions.
8 changes: 4 additions & 4 deletions splink/internals/linker_components/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ def deterministic_link(self) -> SplinkDataFrame:
Deterministic linkage, however, is likely to result in missed links
(false negatives).
Returns:
SplinkDataFrame: A SplinkDataFrame of the pairwise comparisons.
Examples:
```py
Expand All @@ -76,10 +80,6 @@ def deterministic_link(self) -> SplinkDataFrame:
linker = Linker(df, settings, db_api=db_api)
splink_df = linker.inference.deterministic_link()
```
Returns:
SplinkDataFrame: A SplinkDataFrame of the pairwise comparisons.
"""
pipeline = CTEPipeline()
# Allows clustering during a deterministic linkage.
Expand Down
2 changes: 1 addition & 1 deletion splink/internals/linker_components/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def query_sql(self, sql, output_type="pandas"):
Examples:
```py
linker = Linker(df, settings, db_api)
df_predict = linker.predict()
df_predict = linker.inference.predict()
linker.misc.query_sql(f"select * from {df_predict.physical_name} limit 10")
```
Expand Down
11 changes: 8 additions & 3 deletions splink/internals/linker_components/table_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ def compute_tf_table(self, column_name: str) -> SplinkDataFrame:
>>>
# On subsequent data linking job, read this table rather than recompute
df_first_name_tf = pd.read_parquet("folder/first_name_tf")
df_first_name_tf.createOrReplaceTempView("__splink__df_tf_first_name")
linker.table_management.register_term_frequency_lookup(
df_first_name_tf, "first_name"
)
```
Expand Down Expand Up @@ -207,8 +210,10 @@ def register_term_frequency_lookup(self, input_data, col_name, overwrite=False):
{"first_name": "alfie", "tf_first_name": 0.013},
]
tf_df = pd.DataFrame(tf_table)
linker.table_management.register_term_frequency_lookup(tf_df,
"first_name")
linker.table_management.register_term_frequency_lookup(
tf_df,
"first_name"
)
```
"""

Expand Down
13 changes: 3 additions & 10 deletions splink/internals/linker_components/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,14 +247,7 @@ def estimate_parameters_using_expectation_maximisation(
[this PR](https://github.com/moj-analytical-services/splink/pull/734) for
the rationale.
Examples:
Default behaviour
```py
br_training = block_on("first_name", "dob")
linker.training.estimate_parameters_using_expectation_maximisation(
br_training
)
```
Args:
blocking_rule (BlockingRuleCreator | str): The blocking rule used to
Expand All @@ -276,9 +269,9 @@ def estimate_parameters_using_expectation_maximisation(
Examples:
```py
blocking_rule = block_on("first_name", "surname")
br_training = block_on("first_name", "dob")
linker.training.estimate_parameters_using_expectation_maximisation(
blocking_rule
br_training
)
```
Expand Down
2 changes: 1 addition & 1 deletion splink/internals/linker_components/visualisations.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def tf_adjustment_chart(
this or `n_most_freq` set to None, all values will be shown.
Default to 10.
vals_to_include (list, optional): Specific values for which to show term
sfrequency adjustments.
frequency adjustments.
Defaults to None.
as_dict (bool, optional): If True, return the chart as a dictionary.
Expand Down

0 comments on commit 2d24505

Please sign in to comment.