Skip to content

Commit

Permalink
[#167] Support some simple distributions for randomized parameter search
Browse files Browse the repository at this point in the history
- randint returns a random integer in an inclusive range
- uniform returns a random float in an inclusive range
  • Loading branch information
riley-harper committed Nov 27, 2024
1 parent 46da4cb commit 51b4144
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
15 changes: 14 additions & 1 deletion hlink/linking/model_exploration/link_step_train_test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,21 @@ def _choose_randomized_parameters(model_parameters: dict[str, Any]) -> dict[str,
for key, value in model_parameters.items():
if key == "type":
parameter_choices[key] = value
else:
elif type(value) == list:
parameter_choices[key] = random.choice(value)
elif type(value) == dict:
distribution = value["distribution"]
low = value["low"]
high = value["high"]

if distribution == "randint":
parameter_choices[key] = random.randint(low, high)
elif distribution == "uniform":
parameter_choices[key] = random.uniform(low, high)
else:
raise ValueError("unknown distribution")
else:
raise ValueError("can't handle value type")

return parameter_choices

Expand Down
35 changes: 35 additions & 0 deletions hlink/tests/model_exploration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,41 @@ def test_get_model_parameters_search_strategy_randomized_sample_from_lists(
assert parameter_choice["maxBins"] in {10, 20, 40}


def test_get_model_parameters_search_strategy_randomized_sample_from_distributions(
training_conf,
):
"""
The "randomized" strategy also accepts dictionary values for parameters.
These dictionaries define distributions from which the parameters should be
sampled.
For example, {"distribution": "randint", "low": 1, "high": 20} means to
pick a random integer between 1 and 20, each integer with an equal chance.
And {"distribution": "uniform", "low": 0.0, "high": 100.0} means to pick a
random float between 0.0 and 100.0 with a uniform distribution.
"""
training_conf["training"]["model_parameter_search"] = {
"strategy": "randomized",
"num_samples": 15,
}
training_conf["training"]["model_parameters"] = [
{
"type": "decision_tree",
"maxDepth": {"distribution": "randint", "low": 1, "high": 20},
"minInfoGain": {"distribution": "uniform", "low": 0.0, "high": 100.0},
}
]

model_parameters = _get_model_parameters(training_conf["training"])

assert len(model_parameters) == 15

for parameter_choice in model_parameters:
assert parameter_choice["type"] == "decision_tree"
assert 1 <= parameter_choice["maxDepth"] <= 20
assert 0.0 <= parameter_choice["minInfoGain"] <= 100.0


# -------------------------------------
# Tests that probably should be moved
# -------------------------------------
Expand Down

0 comments on commit 51b4144

Please sign in to comment.