Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/argilla-io/distilabel in…
Browse files Browse the repository at this point in the history
…to fix-exam-example
  • Loading branch information
plaguss committed Feb 27, 2025
2 parents 2d32585 + a66d894 commit d875084
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 7 deletions.
3 changes: 2 additions & 1 deletion src/distilabel/models/llms/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ def load(self) -> None:
# This is a workaround to avoid the `OpenAILLM` calling the _prepare_structured_output
# in the load method before we have the proper client.
with patch(
"distilabel.models.openai.OpenAILLM._prepare_structured_output", lambda x: x
"distilabel.models.llms.openai.OpenAILLM._prepare_structured_output",
lambda x: x,
):
super().load()

Expand Down
2 changes: 1 addition & 1 deletion src/distilabel/models/llms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def _prepare_kwargs(
Args:
arguments: The arguments that would be passed to the LLM as **kwargs.
to update with the structured output configuration.
structured_outputs: The structured output configuration to update the arguments.
structured_output: The structured output configuration to update the arguments.
Returns:
kwargs updated with the special arguments used by `instructor`.
Expand Down
5 changes: 4 additions & 1 deletion src/distilabel/models/llms/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,10 @@ def _cleanup_vllm_model(self) -> None:

destroy_model_parallel()
destroy_distributed_environment()
del self._model.llm_engine.model_executor

# Don't delete model_executor if it does not exist, e.g. when VLLM_USE_V1 is set
if hasattr(self._model.llm_engine, "model_executor"):
del self._model.llm_engine.model_executor
del self._model
with contextlib.suppress(AssertionError):
torch.distributed.destroy_process_group()
Expand Down
2 changes: 1 addition & 1 deletion src/distilabel/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ def _set_runtime_parameters(self, parameters: Dict[str, Dict[str, Any]]) -> None
Args:
parameters: A dictionary with the step name as the key and a dictionary with
the parameter name as the key and the parameter value as the value.
the parameter name as the key and the parameter value as the value.
"""
step_names = set(self.dag.G)
for step_name, step_parameters in parameters.items():
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_embedding_dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def test_embedding_deduplication() -> None:

# NOTE: Guide to choose an index: https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index
nn = FaissNearestNeighbour(
k=3,
k=5,
metric_type=faiss.METRIC_INNER_PRODUCT,
search_batch_size=50,
# string_factory="IVF300_HNSW32,Flat",
Expand All @@ -122,8 +122,8 @@ def test_embedding_deduplication() -> None:

ds = distiset["default"]["train"]
ds_dedup = ds.filter(lambda x: x["keep_row_after_embedding_filtering"])
print(len(ds_dedup))
assert len(ds_dedup) == 71

assert len(ds_dedup) == 63


if __name__ == "__main__":
Expand Down

0 comments on commit d875084

Please sign in to comment.