From 5720979de2080b8f2923b7ff3a801b2bc5a7c6cb Mon Sep 17 00:00:00 2001 From: agus Date: Thu, 27 Feb 2025 09:31:15 +0000 Subject: [PATCH 1/3] fix test --- tests/integration/test_embedding_dedup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_embedding_dedup.py b/tests/integration/test_embedding_dedup.py index 7ff02f3d70..396084d9d3 100644 --- a/tests/integration/test_embedding_dedup.py +++ b/tests/integration/test_embedding_dedup.py @@ -103,7 +103,7 @@ def test_embedding_deduplication() -> None: # NOTE: Guide to choose an index: https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index nn = FaissNearestNeighbour( - k=3, + k=5, metric_type=faiss.METRIC_INNER_PRODUCT, search_batch_size=50, # string_factory="IVF300_HNSW32,Flat", @@ -122,8 +122,8 @@ def test_embedding_deduplication() -> None: ds = distiset["default"]["train"] ds_dedup = ds.filter(lambda x: x["keep_row_after_embedding_filtering"]) - print(len(ds_dedup)) - assert len(ds_dedup) == 71 + + assert len(ds_dedup) == 63 if __name__ == "__main__": From 6adb2c89a100e3c8c27a922980db086a252f5ec6 Mon Sep 17 00:00:00 2001 From: plaguss Date: Thu, 27 Feb 2025 11:15:39 +0100 Subject: [PATCH 2/3] Fix docstrings --- src/distilabel/pipeline/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distilabel/pipeline/base.py b/src/distilabel/pipeline/base.py index cf1e315246..0a93bdb4fa 100644 --- a/src/distilabel/pipeline/base.py +++ b/src/distilabel/pipeline/base.py @@ -734,7 +734,7 @@ def _set_runtime_parameters(self, parameters: Dict[str, Dict[str, Any]]) -> None Args: parameters: A dictionary with the step name as the key and a dictionary with - the parameter name as the key and the parameter value as the value. + the parameter name as the key and the parameter value as the value. """ step_names = set(self.dag.G) for step_name, step_parameters in parameters.items(): From 21eabfa639b84dcbf80e4aeb6135e4b62147b1fe Mon Sep 17 00:00:00 2001 From: plaguss Date: Thu, 27 Feb 2025 11:19:17 +0100 Subject: [PATCH 3/3] Fix docstrings --- src/distilabel/models/llms/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/distilabel/models/llms/base.py b/src/distilabel/models/llms/base.py index 912839b27b..1c8e01ec12 100644 --- a/src/distilabel/models/llms/base.py +++ b/src/distilabel/models/llms/base.py @@ -476,7 +476,7 @@ def _prepare_kwargs( Args: arguments: The arguments that would be passed to the LLM as **kwargs. to update with the structured output configuration. - structured_outputs: The structured output configuration to update the arguments. + structured_output: The structured output configuration to update the arguments. Returns: kwargs updated with the special arguments used by `instructor`.