From d09626ff85b3e48633c5080bfcea328bea258d8b Mon Sep 17 00:00:00 2001 From: Agus Date: Thu, 27 Feb 2025 11:38:17 +0100 Subject: [PATCH] fix test (#1123) Co-authored-by: agus --- src/distilabel/models/llms/base.py | 2 +- src/distilabel/pipeline/base.py | 2 +- tests/integration/test_embedding_dedup.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/distilabel/models/llms/base.py b/src/distilabel/models/llms/base.py index 912839b27b..1c8e01ec12 100644 --- a/src/distilabel/models/llms/base.py +++ b/src/distilabel/models/llms/base.py @@ -476,7 +476,7 @@ def _prepare_kwargs( Args: arguments: The arguments that would be passed to the LLM as **kwargs. to update with the structured output configuration. - structured_outputs: The structured output configuration to update the arguments. + structured_output: The structured output configuration to update the arguments. Returns: kwargs updated with the special arguments used by `instructor`. diff --git a/src/distilabel/pipeline/base.py b/src/distilabel/pipeline/base.py index cf1e315246..0a93bdb4fa 100644 --- a/src/distilabel/pipeline/base.py +++ b/src/distilabel/pipeline/base.py @@ -734,7 +734,7 @@ def _set_runtime_parameters(self, parameters: Dict[str, Dict[str, Any]]) -> None Args: parameters: A dictionary with the step name as the key and a dictionary with - the parameter name as the key and the parameter value as the value. + the parameter name as the key and the parameter value as the value. """ step_names = set(self.dag.G) for step_name, step_parameters in parameters.items(): diff --git a/tests/integration/test_embedding_dedup.py b/tests/integration/test_embedding_dedup.py index 7ff02f3d70..396084d9d3 100644 --- a/tests/integration/test_embedding_dedup.py +++ b/tests/integration/test_embedding_dedup.py @@ -103,7 +103,7 @@ def test_embedding_deduplication() -> None: # NOTE: Guide to choose an index: https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index nn = FaissNearestNeighbour( - k=3, + k=5, metric_type=faiss.METRIC_INNER_PRODUCT, search_batch_size=50, # string_factory="IVF300_HNSW32,Flat", @@ -122,8 +122,8 @@ def test_embedding_deduplication() -> None: ds = distiset["default"]["train"] ds_dedup = ds.filter(lambda x: x["keep_row_after_embedding_filtering"]) - print(len(ds_dedup)) - assert len(ds_dedup) == 71 + + assert len(ds_dedup) == 63 if __name__ == "__main__":