-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move validation scripts so that v1 folder can be removed (#2582)
- Loading branch information
1 parent
f6a3798
commit c571d38
Showing
16 changed files
with
785 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# This is used in notebook validation to check the output of individual cells of the notebook. | ||
# The parameters are: | ||
# --file_name The name of the notebook output file | ||
# --folder The path for the folder containing the notebook output. | ||
# --expected_stdout The expected output | ||
# --cell_source Option cell source to be checked | ||
# --cell_output_substring The specified cell is checked for this output. | ||
# --check_widget True indicates that the widget output should be checked. | ||
|
||
import json | ||
import os | ||
import argparse | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--file_name") | ||
parser.add_argument("--folder") | ||
parser.add_argument("--expected_stdout", nargs="*") | ||
parser.add_argument("--cell_source", nargs="*") | ||
parser.add_argument("--cell_output_substring") | ||
parser.add_argument("--check_widget", type=bool) | ||
|
||
inputArgs = parser.parse_args() | ||
full_name = os.path.join(inputArgs.folder, inputArgs.file_name) | ||
|
||
|
||
def checkCellOutput(fileName, expected_stdout): | ||
notebook = json.load(open(fileName, "r")) | ||
code_cells = (cell for cell in notebook["cells"] if cell["cell_type"] == "code") | ||
for cell, expected_output in zip(code_cells, expected_stdout): | ||
source = cell["source"] | ||
print("Checking cell starting with: " + source[0]) | ||
for actual_output in cell["outputs"]: | ||
if "text" in actual_output: | ||
actual_output_text = actual_output["text"] | ||
for actual_line, expected_line in zip( | ||
actual_output_text, expected_output | ||
): | ||
assert actual_line.startswith(expected_line), ( | ||
'Actual Line "' | ||
+ actual_line | ||
+ '" didn\'t match "' | ||
+ expected_line | ||
+ '"' | ||
) | ||
assert len(actual_output_text) == len(expected_output), ( | ||
"Actual output length = " | ||
+ str(len(actual_output_text)) | ||
+ ", expected_length - " | ||
+ str(len(expected_output)) | ||
) | ||
print("checkCellOutput completed") | ||
|
||
|
||
def checkSpecifiedCellOutput(fileName, cell_source, cell_output_substring): | ||
# assert that a specific code cell contains a substring (case insensitve) | ||
notebook = json.load(open(fileName, "r")) | ||
code_cells = (cell for cell in notebook["cells"] if cell["cell_type"] == "code") | ||
msg = ( | ||
"actual output {} contain expected " | ||
"substring:\nactual output = {}\nexpected substring={}" | ||
) | ||
for cell in code_cells: | ||
source = cell["source"] | ||
if source != cell_source: | ||
continue | ||
print("Checking cell starting with: " + source[0]) | ||
for actual_output in cell["outputs"]: | ||
actual_output_str = str(actual_output) | ||
bad_msg = msg.format("does not", actual_output_str, cell_output_substring) | ||
assert cell_output_substring.lower() in actual_output_str.lower(), bad_msg | ||
print("checkSpecifiedCellOutput completed") | ||
|
||
|
||
def checkWidgetOutput(fileName): | ||
widget_property = "application/aml.mini.widget.v1" | ||
widget_data_found = False | ||
notebook = json.load(open(fileName, "r")) | ||
code_cells = (cell for cell in notebook["cells"] if cell["cell_type"] == "code") | ||
for cell in code_cells: | ||
for actual_output in cell["outputs"]: | ||
if "data" in actual_output: | ||
actual_output_data = actual_output["data"] | ||
if widget_property in actual_output_data: | ||
print("Widget data found") | ||
widget_data = actual_output_data[widget_property] | ||
assert widget_data.startswith('{"status": "Completed"'), widget_data | ||
print("Widget data valid") | ||
widget_data_found = True | ||
assert widget_data_found | ||
print("checkWidgetOutput completed") | ||
|
||
|
||
if inputArgs.expected_stdout is not None: | ||
checkCellOutput(full_name, inputArgs.expected_stdout) | ||
|
||
if inputArgs.cell_source is not None: | ||
checkSpecifiedCellOutput( | ||
full_name, inputArgs.cell_source, inputArgs.cell_output_substring | ||
) | ||
|
||
if inputArgs.check_widget: | ||
checkWidgetOutput(full_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,295 @@ | ||
# This is used to check the results for an experiment | ||
# The parameters are: | ||
# --experiment_name The name of the experiment to check | ||
# --file_name The name of the notebook output file | ||
# --folder The notebook folder | ||
# --metric_name The name of the metric to check | ||
# --expected_num_iteration The expected number of iterations. | ||
# --minimum_median_score The minimum expected median score. | ||
# --absolute_minimum_score The absolute minimum expected score. | ||
# --maximum_median_score The maximum expected median score. | ||
# --absolute_maximum_score The absolute maximum expected score. | ||
# --expected_run_count The expected number of runs. | ||
# --vision_train_run Indicates that this is a vission run. | ||
# --check_explanation_best_run Check the explanation of the best run. | ||
# --is_local_run Indicates that this is a local run. | ||
|
||
import argparse | ||
import os | ||
from azureml.core.experiment import Experiment | ||
from azureml.core.workspace import Workspace | ||
from azureml.train.automl.run import AutoMLRun | ||
from azureml.core.run import Run | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--experiment_name") | ||
parser.add_argument("--file_name") | ||
parser.add_argument("--folder") | ||
parser.add_argument("--metric_name") | ||
parser.add_argument("--expected_num_iteration", type=int) | ||
parser.add_argument("--minimum_median_score", type=float) | ||
parser.add_argument("--absolute_minimum_score", type=float) | ||
parser.add_argument("--maximum_median_score", type=float) | ||
parser.add_argument("--absolute_maximum_score", type=float) | ||
parser.add_argument("--expected_run_count", type=int) | ||
parser.add_argument("--vision_train_run", type=bool) | ||
parser.add_argument("--check_explanation_best_run", type=bool) | ||
parser.add_argument("--is_local_run", type=bool) | ||
|
||
inputArgs = parser.parse_args() | ||
|
||
try: | ||
from azureml.interpret import ExplanationClient | ||
except ImportError: | ||
print( | ||
"azureml-interpret could not be imported for validation, not installed locally, skipping..." | ||
) | ||
|
||
|
||
def checkExperimentResult( | ||
experiment_name, | ||
file_name, | ||
folder, | ||
metric_name=None, | ||
expected_num_iteration=None, | ||
minimum_median_score=None, | ||
absolute_minimum_score=0.0, | ||
maximum_median_score=1.0, | ||
absolute_maximum_score=1.0, | ||
expected_run_count=1, | ||
vision_train_run=False, | ||
): | ||
ws = Workspace.from_config(folder) | ||
experiment = Experiment(ws, experiment_name) | ||
runs = list(experiment.get_runs(type="automl")) | ||
|
||
print("Total runs: " + str(len(runs))) | ||
|
||
runs = getNotebookRuns(runs, file_name, folder) | ||
|
||
if vision_train_run: | ||
# Only check the most recent runs | ||
error_msg = ( | ||
"Not enough runs found in " + ws.name + " for experiment " + experiment_name | ||
) | ||
assert len(runs) >= expected_run_count, error_msg | ||
runs = runs[:expected_run_count] | ||
print("Run count: {}".format(len(runs))) | ||
assert len(runs) == expected_run_count | ||
|
||
for run in runs: | ||
print("Validating run: " + run.id) | ||
status = run.get_details() | ||
ml_run = AutoMLRun(experiment=experiment, run_id=run.id) | ||
children = list(ml_run.get_children()) | ||
|
||
if vision_train_run: | ||
checkVisionTrainRun(children, minimum_median_score, maximum_median_score) | ||
else: | ||
properties = ml_run.get_properties() | ||
status = ml_run.get_details() | ||
print("Number of iterations found = " + properties["num_iterations"]) | ||
assert properties["num_iterations"] == str(expected_num_iteration) | ||
badScoreCount = 0 | ||
goodScoreCount = 0 | ||
# run_metrics = ml_run.get_metrics(recursive=True) | ||
|
||
for iteration in children: | ||
iteration_status = iteration.status | ||
print(iteration.id + ": " + iteration_status) | ||
assert iteration_status == "Completed" or iteration_status == "Canceled" | ||
if iteration_status == "Completed": | ||
props = iteration.get_properties() | ||
if props.get("runTemplate") != "automl_child": | ||
# not training iteration | ||
continue | ||
metrics = iteration.get_metrics() | ||
print(metric_name + " = " + str(metrics[metric_name])) | ||
assert metrics[metric_name] >= absolute_minimum_score | ||
assert metrics[metric_name] <= absolute_maximum_score | ||
if ( | ||
metrics[metric_name] < minimum_median_score | ||
or metrics[metric_name] > maximum_median_score | ||
): | ||
badScoreCount += 1 | ||
else: | ||
goodScoreCount += 1 | ||
assert badScoreCount < goodScoreCount | ||
print("Run status: " + status["status"]) | ||
assert status["status"] == "Completed" | ||
print("check_experiment_result complete") | ||
|
||
|
||
def check_experiment_model_explanation_of_best_run( | ||
experiment_name, file_name, folder, is_local_run=False | ||
): | ||
print("Start running check_experiment_model_explanation_of_best_run().") | ||
ws = Workspace.from_config(folder) | ||
|
||
experiment = Experiment(ws, experiment_name) | ||
automl_runs = list(experiment.get_runs(type="automl")) | ||
automl_runs = getNotebookRuns(automl_runs, file_name, folder) | ||
|
||
for run in automl_runs: | ||
print("Validating run: " + run.id) | ||
ml_run = AutoMLRun(experiment=experiment, run_id=run.id) | ||
|
||
if not is_local_run: | ||
model_explainability_run_id = ml_run.id + "_" + "ModelExplain" | ||
print("Checking the Model Explanation run: " + model_explainability_run_id) | ||
# Wait for the ME run to complete before accessing the result. | ||
model_explainability_run = Run( | ||
experiment=experiment, run_id=model_explainability_run_id | ||
) | ||
model_explainability_run.wait_for_completion() | ||
|
||
# The best run should have explanation result. | ||
best_run = ml_run.get_best_child() | ||
expl_client = ExplanationClient.from_run(best_run) | ||
|
||
# Download the engineered explanations | ||
engineered_explanations = expl_client.download_model_explanation(raw=False) | ||
assert engineered_explanations is not None | ||
importance_dict = engineered_explanations.get_feature_importance_dict() | ||
# Importance dict should not be empty. | ||
assert importance_dict is not None and importance_dict | ||
|
||
# Download the raw explanations | ||
raw_explanations = expl_client.download_model_explanation(raw=True) | ||
assert raw_explanations is not None | ||
importance_dict = raw_explanations.get_feature_importance_dict() | ||
# Importance dict should not be empty. | ||
assert importance_dict is not None and importance_dict | ||
|
||
print("check_experiment_model_explanation_of_best_run() completed.") | ||
|
||
|
||
def checkVisionTrainRun(child_runs, expected_min_score, expected_max_score): | ||
for hd_run in child_runs: | ||
print(hd_run.id + ": " + hd_run.status) | ||
assert hd_run.status == "Completed" | ||
|
||
_, best_metric = hd_run._get_best_run_and_metric_value( | ||
include_failed=False, include_canceled=False | ||
) | ||
print("Primary metric value of {}: {}".format(hd_run.id, best_metric)) | ||
|
||
lower_err_msg = ( | ||
"Primary metric value was lower than the expected min value of {}".format( | ||
expected_min_score | ||
) | ||
) | ||
higher_err_msg = ( | ||
"Primary metric value was higher than the expected max value of {}".format( | ||
expected_max_score | ||
) | ||
) | ||
assert best_metric >= expected_min_score, lower_err_msg | ||
assert best_metric <= expected_max_score, higher_err_msg | ||
|
||
|
||
def checkVisionScoreRun( | ||
experiment_name, | ||
min_map_score=0.0, | ||
max_map_score=0.0, | ||
min_precision_score=0.0, | ||
max_precision_score=0.0, | ||
min_recall_score=0.0, | ||
max_recall_score=0.0, | ||
expected_run_count=1, | ||
): | ||
ws = Workspace.from_config() | ||
experiment = Experiment(ws, experiment_name) | ||
runs = list(experiment.get_runs(type="azureml.scriptrun")) | ||
|
||
error_msg = ( | ||
"Not enough runs found in " + ws.name + " for experiment " + experiment_name | ||
) | ||
assert len(runs) >= expected_run_count, error_msg | ||
runs = runs[:expected_run_count] | ||
print("azureml.scriptrun run type count: {}".format(len(runs))) | ||
assert len(runs) == expected_run_count | ||
|
||
for run in runs: | ||
print("Validating run: " + run.id) | ||
status = run.get_details() | ||
|
||
# Validation only implemented for object detection | ||
if experiment_name == "flickr47-logo-detection": | ||
metrics = run.get_metrics() | ||
checkMetric( | ||
metrics, | ||
run_id=run.id, | ||
metric_name="map", | ||
expected_min=min_map_score, | ||
expected_max=max_map_score, | ||
) | ||
checkMetric( | ||
metrics, | ||
run_id=run.id, | ||
metric_name="precision", | ||
expected_min=min_precision_score, | ||
expected_max=max_precision_score, | ||
) | ||
checkMetric( | ||
metrics, | ||
run_id=run.id, | ||
metric_name="recall", | ||
expected_min=min_recall_score, | ||
expected_max=max_recall_score, | ||
) | ||
|
||
print("Run status: " + status["status"]) | ||
assert status["status"] == "Completed" | ||
print("checkVisionScoreRun complete") | ||
|
||
|
||
def checkMetric(metrics, run_id, metric_name, expected_min, expected_max): | ||
score = metrics[metric_name] | ||
print("{} score of {}: {}".format(metric_name, run_id, score)) | ||
|
||
lower_err_msg = "{} value was lower than the expected min value of {}".format( | ||
metric_name, expected_min | ||
) | ||
higher_err_msg = "{} value was higher than the expected max value of {}".format( | ||
metric_name, expected_max | ||
) | ||
assert score >= expected_min, lower_err_msg | ||
assert score <= expected_max, higher_err_msg | ||
|
||
|
||
def getNotebookRuns(runs, file_name, folder): | ||
full_name = os.path.join(folder, file_name) | ||
notebook_runs = [] | ||
|
||
with open(full_name, "r") as notebook_file: | ||
notebook_output = notebook_file.read() | ||
|
||
for run in runs: | ||
if run.id in notebook_output: | ||
notebook_runs.append(run) | ||
|
||
return notebook_runs | ||
|
||
|
||
checkExperimentResult( | ||
inputArgs.experiment_name, | ||
inputArgs.file_name, | ||
inputArgs.folder, | ||
inputArgs.metric_name, | ||
inputArgs.expected_num_iteration or 1000, | ||
inputArgs.minimum_median_score, | ||
inputArgs.absolute_minimum_score or 0.0, | ||
inputArgs.maximum_median_score or 1.0, | ||
inputArgs.absolute_maximum_score or 1.0, | ||
inputArgs.expected_run_count or 1, | ||
inputArgs.vision_train_run, | ||
) | ||
|
||
if inputArgs.check_explanation_best_run: | ||
check_experiment_model_explanation_of_best_run( | ||
inputArgs.experiment_name, | ||
inputArgs.file_name, | ||
inputArgs.folder, | ||
inputArgs.is_local_run, | ||
) |
Oops, something went wrong.