From c571d38222a27c10227dffa2d56782b865136d13 Mon Sep 17 00:00:00 2001 From: jeff-shepherd <39775772+jeff-shepherd@users.noreply.github.com> Date: Mon, 21 Aug 2023 11:14:43 -0700 Subject: [PATCH] Move validation scripts so that v1 folder can be removed (#2582) --- .github/test/scripts/check_cell_output.py | 102 ++++++ .../test/scripts/check_experiment_result.py | 295 ++++++++++++++++++ .github/test/scripts/check_notebook_output.py | 115 +++++++ .github/test/scripts/check_timeseries.py | 96 ++++++ .../scripts/check_v2_experiment_result.py | 161 ++++++++++ ...toml-classification-task-bankmarketing.yml | 4 +- ...hub-dau-auto-ml-forecasting-github-dau.yml | 2 +- ...-forecasting-orange-juice-sales-mlflow.yml | 4 +- ...e-share-auto-ml-forecasting-bike-share.yml | 4 +- ...orecasting-task-energy-demand-advanced.yml | 4 +- ...alysis-automl-nlp-multiclass-sentiment.yml | 2 +- ...zation-automl-nlp-multilabel-paper-cat.yml | 2 +- ...ognition-task-automl-nlp-text-ner-task.yml | 2 +- ...ext-ner-task-distributed-with-sweeping.yml | 2 +- ...l-regression-task-hardware-performance.yml | 4 +- sdk/python/readme.py | 2 +- 16 files changed, 785 insertions(+), 16 deletions(-) create mode 100644 .github/test/scripts/check_cell_output.py create mode 100644 .github/test/scripts/check_experiment_result.py create mode 100644 .github/test/scripts/check_notebook_output.py create mode 100644 .github/test/scripts/check_timeseries.py create mode 100644 .github/test/scripts/check_v2_experiment_result.py diff --git a/.github/test/scripts/check_cell_output.py b/.github/test/scripts/check_cell_output.py new file mode 100644 index 0000000000..54934e9ab0 --- /dev/null +++ b/.github/test/scripts/check_cell_output.py @@ -0,0 +1,102 @@ +# This is used in notebook validation to check the output of individual cells of the notebook. +# The parameters are: +# --file_name The name of the notebook output file +# --folder The path for the folder containing the notebook output. +# --expected_stdout The expected output +# --cell_source Option cell source to be checked +# --cell_output_substring The specified cell is checked for this output. +# --check_widget True indicates that the widget output should be checked. + +import json +import os +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--file_name") +parser.add_argument("--folder") +parser.add_argument("--expected_stdout", nargs="*") +parser.add_argument("--cell_source", nargs="*") +parser.add_argument("--cell_output_substring") +parser.add_argument("--check_widget", type=bool) + +inputArgs = parser.parse_args() +full_name = os.path.join(inputArgs.folder, inputArgs.file_name) + + +def checkCellOutput(fileName, expected_stdout): + notebook = json.load(open(fileName, "r")) + code_cells = (cell for cell in notebook["cells"] if cell["cell_type"] == "code") + for cell, expected_output in zip(code_cells, expected_stdout): + source = cell["source"] + print("Checking cell starting with: " + source[0]) + for actual_output in cell["outputs"]: + if "text" in actual_output: + actual_output_text = actual_output["text"] + for actual_line, expected_line in zip( + actual_output_text, expected_output + ): + assert actual_line.startswith(expected_line), ( + 'Actual Line "' + + actual_line + + '" didn\'t match "' + + expected_line + + '"' + ) + assert len(actual_output_text) == len(expected_output), ( + "Actual output length = " + + str(len(actual_output_text)) + + ", expected_length - " + + str(len(expected_output)) + ) + print("checkCellOutput completed") + + +def checkSpecifiedCellOutput(fileName, cell_source, cell_output_substring): + # assert that a specific code cell contains a substring (case insensitve) + notebook = json.load(open(fileName, "r")) + code_cells = (cell for cell in notebook["cells"] if cell["cell_type"] == "code") + msg = ( + "actual output {} contain expected " + "substring:\nactual output = {}\nexpected substring={}" + ) + for cell in code_cells: + source = cell["source"] + if source != cell_source: + continue + print("Checking cell starting with: " + source[0]) + for actual_output in cell["outputs"]: + actual_output_str = str(actual_output) + bad_msg = msg.format("does not", actual_output_str, cell_output_substring) + assert cell_output_substring.lower() in actual_output_str.lower(), bad_msg + print("checkSpecifiedCellOutput completed") + + +def checkWidgetOutput(fileName): + widget_property = "application/aml.mini.widget.v1" + widget_data_found = False + notebook = json.load(open(fileName, "r")) + code_cells = (cell for cell in notebook["cells"] if cell["cell_type"] == "code") + for cell in code_cells: + for actual_output in cell["outputs"]: + if "data" in actual_output: + actual_output_data = actual_output["data"] + if widget_property in actual_output_data: + print("Widget data found") + widget_data = actual_output_data[widget_property] + assert widget_data.startswith('{"status": "Completed"'), widget_data + print("Widget data valid") + widget_data_found = True + assert widget_data_found + print("checkWidgetOutput completed") + + +if inputArgs.expected_stdout is not None: + checkCellOutput(full_name, inputArgs.expected_stdout) + +if inputArgs.cell_source is not None: + checkSpecifiedCellOutput( + full_name, inputArgs.cell_source, inputArgs.cell_output_substring + ) + +if inputArgs.check_widget: + checkWidgetOutput(full_name) diff --git a/.github/test/scripts/check_experiment_result.py b/.github/test/scripts/check_experiment_result.py new file mode 100644 index 0000000000..38b0cf8fb4 --- /dev/null +++ b/.github/test/scripts/check_experiment_result.py @@ -0,0 +1,295 @@ +# This is used to check the results for an experiment +# The parameters are: +# --experiment_name The name of the experiment to check +# --file_name The name of the notebook output file +# --folder The notebook folder +# --metric_name The name of the metric to check +# --expected_num_iteration The expected number of iterations. +# --minimum_median_score The minimum expected median score. +# --absolute_minimum_score The absolute minimum expected score. +# --maximum_median_score The maximum expected median score. +# --absolute_maximum_score The absolute maximum expected score. +# --expected_run_count The expected number of runs. +# --vision_train_run Indicates that this is a vission run. +# --check_explanation_best_run Check the explanation of the best run. +# --is_local_run Indicates that this is a local run. + +import argparse +import os +from azureml.core.experiment import Experiment +from azureml.core.workspace import Workspace +from azureml.train.automl.run import AutoMLRun +from azureml.core.run import Run + +parser = argparse.ArgumentParser() +parser.add_argument("--experiment_name") +parser.add_argument("--file_name") +parser.add_argument("--folder") +parser.add_argument("--metric_name") +parser.add_argument("--expected_num_iteration", type=int) +parser.add_argument("--minimum_median_score", type=float) +parser.add_argument("--absolute_minimum_score", type=float) +parser.add_argument("--maximum_median_score", type=float) +parser.add_argument("--absolute_maximum_score", type=float) +parser.add_argument("--expected_run_count", type=int) +parser.add_argument("--vision_train_run", type=bool) +parser.add_argument("--check_explanation_best_run", type=bool) +parser.add_argument("--is_local_run", type=bool) + +inputArgs = parser.parse_args() + +try: + from azureml.interpret import ExplanationClient +except ImportError: + print( + "azureml-interpret could not be imported for validation, not installed locally, skipping..." + ) + + +def checkExperimentResult( + experiment_name, + file_name, + folder, + metric_name=None, + expected_num_iteration=None, + minimum_median_score=None, + absolute_minimum_score=0.0, + maximum_median_score=1.0, + absolute_maximum_score=1.0, + expected_run_count=1, + vision_train_run=False, +): + ws = Workspace.from_config(folder) + experiment = Experiment(ws, experiment_name) + runs = list(experiment.get_runs(type="automl")) + + print("Total runs: " + str(len(runs))) + + runs = getNotebookRuns(runs, file_name, folder) + + if vision_train_run: + # Only check the most recent runs + error_msg = ( + "Not enough runs found in " + ws.name + " for experiment " + experiment_name + ) + assert len(runs) >= expected_run_count, error_msg + runs = runs[:expected_run_count] + print("Run count: {}".format(len(runs))) + assert len(runs) == expected_run_count + + for run in runs: + print("Validating run: " + run.id) + status = run.get_details() + ml_run = AutoMLRun(experiment=experiment, run_id=run.id) + children = list(ml_run.get_children()) + + if vision_train_run: + checkVisionTrainRun(children, minimum_median_score, maximum_median_score) + else: + properties = ml_run.get_properties() + status = ml_run.get_details() + print("Number of iterations found = " + properties["num_iterations"]) + assert properties["num_iterations"] == str(expected_num_iteration) + badScoreCount = 0 + goodScoreCount = 0 + # run_metrics = ml_run.get_metrics(recursive=True) + + for iteration in children: + iteration_status = iteration.status + print(iteration.id + ": " + iteration_status) + assert iteration_status == "Completed" or iteration_status == "Canceled" + if iteration_status == "Completed": + props = iteration.get_properties() + if props.get("runTemplate") != "automl_child": + # not training iteration + continue + metrics = iteration.get_metrics() + print(metric_name + " = " + str(metrics[metric_name])) + assert metrics[metric_name] >= absolute_minimum_score + assert metrics[metric_name] <= absolute_maximum_score + if ( + metrics[metric_name] < minimum_median_score + or metrics[metric_name] > maximum_median_score + ): + badScoreCount += 1 + else: + goodScoreCount += 1 + assert badScoreCount < goodScoreCount + print("Run status: " + status["status"]) + assert status["status"] == "Completed" + print("check_experiment_result complete") + + +def check_experiment_model_explanation_of_best_run( + experiment_name, file_name, folder, is_local_run=False +): + print("Start running check_experiment_model_explanation_of_best_run().") + ws = Workspace.from_config(folder) + + experiment = Experiment(ws, experiment_name) + automl_runs = list(experiment.get_runs(type="automl")) + automl_runs = getNotebookRuns(automl_runs, file_name, folder) + + for run in automl_runs: + print("Validating run: " + run.id) + ml_run = AutoMLRun(experiment=experiment, run_id=run.id) + + if not is_local_run: + model_explainability_run_id = ml_run.id + "_" + "ModelExplain" + print("Checking the Model Explanation run: " + model_explainability_run_id) + # Wait for the ME run to complete before accessing the result. + model_explainability_run = Run( + experiment=experiment, run_id=model_explainability_run_id + ) + model_explainability_run.wait_for_completion() + + # The best run should have explanation result. + best_run = ml_run.get_best_child() + expl_client = ExplanationClient.from_run(best_run) + + # Download the engineered explanations + engineered_explanations = expl_client.download_model_explanation(raw=False) + assert engineered_explanations is not None + importance_dict = engineered_explanations.get_feature_importance_dict() + # Importance dict should not be empty. + assert importance_dict is not None and importance_dict + + # Download the raw explanations + raw_explanations = expl_client.download_model_explanation(raw=True) + assert raw_explanations is not None + importance_dict = raw_explanations.get_feature_importance_dict() + # Importance dict should not be empty. + assert importance_dict is not None and importance_dict + + print("check_experiment_model_explanation_of_best_run() completed.") + + +def checkVisionTrainRun(child_runs, expected_min_score, expected_max_score): + for hd_run in child_runs: + print(hd_run.id + ": " + hd_run.status) + assert hd_run.status == "Completed" + + _, best_metric = hd_run._get_best_run_and_metric_value( + include_failed=False, include_canceled=False + ) + print("Primary metric value of {}: {}".format(hd_run.id, best_metric)) + + lower_err_msg = ( + "Primary metric value was lower than the expected min value of {}".format( + expected_min_score + ) + ) + higher_err_msg = ( + "Primary metric value was higher than the expected max value of {}".format( + expected_max_score + ) + ) + assert best_metric >= expected_min_score, lower_err_msg + assert best_metric <= expected_max_score, higher_err_msg + + +def checkVisionScoreRun( + experiment_name, + min_map_score=0.0, + max_map_score=0.0, + min_precision_score=0.0, + max_precision_score=0.0, + min_recall_score=0.0, + max_recall_score=0.0, + expected_run_count=1, +): + ws = Workspace.from_config() + experiment = Experiment(ws, experiment_name) + runs = list(experiment.get_runs(type="azureml.scriptrun")) + + error_msg = ( + "Not enough runs found in " + ws.name + " for experiment " + experiment_name + ) + assert len(runs) >= expected_run_count, error_msg + runs = runs[:expected_run_count] + print("azureml.scriptrun run type count: {}".format(len(runs))) + assert len(runs) == expected_run_count + + for run in runs: + print("Validating run: " + run.id) + status = run.get_details() + + # Validation only implemented for object detection + if experiment_name == "flickr47-logo-detection": + metrics = run.get_metrics() + checkMetric( + metrics, + run_id=run.id, + metric_name="map", + expected_min=min_map_score, + expected_max=max_map_score, + ) + checkMetric( + metrics, + run_id=run.id, + metric_name="precision", + expected_min=min_precision_score, + expected_max=max_precision_score, + ) + checkMetric( + metrics, + run_id=run.id, + metric_name="recall", + expected_min=min_recall_score, + expected_max=max_recall_score, + ) + + print("Run status: " + status["status"]) + assert status["status"] == "Completed" + print("checkVisionScoreRun complete") + + +def checkMetric(metrics, run_id, metric_name, expected_min, expected_max): + score = metrics[metric_name] + print("{} score of {}: {}".format(metric_name, run_id, score)) + + lower_err_msg = "{} value was lower than the expected min value of {}".format( + metric_name, expected_min + ) + higher_err_msg = "{} value was higher than the expected max value of {}".format( + metric_name, expected_max + ) + assert score >= expected_min, lower_err_msg + assert score <= expected_max, higher_err_msg + + +def getNotebookRuns(runs, file_name, folder): + full_name = os.path.join(folder, file_name) + notebook_runs = [] + + with open(full_name, "r") as notebook_file: + notebook_output = notebook_file.read() + + for run in runs: + if run.id in notebook_output: + notebook_runs.append(run) + + return notebook_runs + + +checkExperimentResult( + inputArgs.experiment_name, + inputArgs.file_name, + inputArgs.folder, + inputArgs.metric_name, + inputArgs.expected_num_iteration or 1000, + inputArgs.minimum_median_score, + inputArgs.absolute_minimum_score or 0.0, + inputArgs.maximum_median_score or 1.0, + inputArgs.absolute_maximum_score or 1.0, + inputArgs.expected_run_count or 1, + inputArgs.vision_train_run, +) + +if inputArgs.check_explanation_best_run: + check_experiment_model_explanation_of_best_run( + inputArgs.experiment_name, + inputArgs.file_name, + inputArgs.folder, + inputArgs.is_local_run, + ) diff --git a/.github/test/scripts/check_notebook_output.py b/.github/test/scripts/check_notebook_output.py new file mode 100644 index 0000000000..471580797b --- /dev/null +++ b/.github/test/scripts/check_notebook_output.py @@ -0,0 +1,115 @@ +# This is used in notebook validation to check the output cells of the notebook. +# It checks for unexpected warnings or errors +# The parameters are: +# --file_name The name of the notebook output file +# --folder The path for the folder containing the notebook output. +# --check A list of strings to check for. +# stderr indicates anything written to stderr. + +import json +import requests +import os +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--file_name") +parser.add_argument("--folder") +parser.add_argument("--check", nargs="+") + +inputArgs = parser.parse_args() +full_name = os.path.join(inputArgs.folder, inputArgs.file_name) + +allowed_list = [ + "UserWarning: Matplotlib is building the font cache", + "UserWarning: Starting from version 2.2.1", + "the library file in distribution wheels for macOS is built by the Apple Clang", + "brew install libomp", + "No verified requirements file found" "failed to create lock file", + "retrying", + "Using default datastore for uploads", + "Already registered authentication for run id", + "INFO - Initializing logging file for interpret-community", + "INFO:interpret_community.common.explanation_utils:Using default datastore for uploads", + "better speed can be achieved with apex", + "numExpr defaulting to", + "no version information available", + "Falling back to use azure cli login credentials", + "recommend to use ServicePrincipalAuthentication or MsiAuthentication", + "Please refer to aka.ms/aml-notebook-auth", + "Class KubernetesCompute: This is an experimental class", + "Class SynapseCompute: This is an experimental class", + 'Please use "Dataset.File.upload_directory"', + 'Please use "FileDatasetFactory.upload_directory" instead', + "Called AzureBlobDatastore.upload_files", + "LinkTabularOutputDatasetConfig", + "This may take a few minutes", + "Downloading dataset", + "logger.warning", + "Importing plotly failed", + "Found the config file in:", + "Check: endpoint", + "data_collector is not a known attribute of class", + "Readonly attribute primary_metric will be ignored", + "Downloading artifact", + "The progress bar can be disabled by setting the environment variable MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR", + "Warnings:", + "Downloading builder script", + "Downloading extra modules", + "custom base image or base dockerfile detected", + "TqdmWarning: IProgress not found.", + "from .autonotebook import tqdm as notebook_tqdm", + "Class AutoDeleteSettingSchema: This is an experimental class", + "Class AutoDeleteConditionSchema: This is an experimental class", + "Class BaseAutoDeleteSettingSchema: This is an experimental class", + "Class IntellectualPropertySchema: This is an experimental class", + "Class ProtectionLevelSchema: This is an experimental class", + "Class BaseIntellectualPropertySchema: This is an experimental class", + "Class PipelineComponentBatchDeployment: This is an experimental class", + "Class LinkTabularOutputDatasetConfig: This is an experimental class", + "Field 'max_nodes': This is an experimental field", + "Uploading ", + "Forecasting parameter ", + "Parameter ", + "Get_data scripts will be deprecated", + "cost_mode is an internal parameter", + "save_mlflow is an internal parameter", + "start_auxiliary_runs_before_parent_complete is an internal parameter", + "Detected ", +] + +with open(full_name, "r") as notebook_file: + notebook = json.load(notebook_file) + + for cell in notebook["cells"]: + if cell["cell_type"] == "code": + for output in cell["outputs"]: + if "text" in output: + for line in output["text"]: + # Avoid failing notebook runs on empty + # warnings. + if not line.strip(): + continue + for not_allowed in inputArgs.check: + lower_line = line.lower() + if not_allowed == "stderr": + if "name" in output: + assert output["name"] != "stderr" or any( + (a.lower() in lower_line) for a in allowed_list + ), ( + "Found stderr line:\n" + + line + + "\n in file " + + inputArgs.file_name + ) + else: + assert not_allowed.lower() not in lower_line or any( + (a.lower() in lower_line) for a in allowed_list + ), ( + not_allowed + + " found in line:\n" + + line + + "\n in file " + + inputArgs.file_name + ) + +print("check notebook output completed") diff --git a/.github/test/scripts/check_timeseries.py b/.github/test/scripts/check_timeseries.py new file mode 100644 index 0000000000..793fabbbf5 --- /dev/null +++ b/.github/test/scripts/check_timeseries.py @@ -0,0 +1,96 @@ +# This is used to check the results for a historical time series experiment +# The parameters are: +# --experiment_name The name of the experiment to check +# --file_name The name of the notebook output file +# --folder The notebook folder + +import sys +import argparse +import os + +from azureml.core.experiment import Experiment +from azureml.core.workspace import Workspace +from azureml.train.automl.run import AutoMLRun +from azureml.core.run import Run + +parser = argparse.ArgumentParser() +parser.add_argument("--experiment_name") +parser.add_argument("--file_name") +parser.add_argument("--folder") + +inputArgs = parser.parse_args() + + +def get_hts_run_type(run): + children = list(run.get_children()) + return children[1].type + + +def check_training_run(step_runs, pipeline_run): + assert len(step_runs) == 6, "HTS training runs {} should have 6 steps.".format( + pipeline_run.id + ) + automl_parents = [] + for step in step_runs: + if step.name == "hts-automl-training": + print("Checking AutoML runs for pipeline {}".format(pipeline_run.id)) + automl_parents = list(step.get_children()) + for automl in automl_parents: + assert ( + automl.status == "Completed" + ), "AutoML run {} should be in Completed state.".format(automl.id) + assert ( + len(automl_parents) > 0 + ), "Run {} should have at least one automl run.".format(pipeline_run.id) + + +def check_hts_experiment(experiment_name, file_name, folder): + ws = Workspace.from_config(folder) + experiment = Experiment(ws, experiment_name) + runs = list(experiment.get_runs()) + runs = getNotebookRuns(runs, file_name, folder) + + assert len(runs) > 0, "At least one pipelines needs to be triggered." + n_hts_training = 0 + n_hts_inferencing = 0 + for r in runs: + print("Checking pipeline run {}".format(r.id)) + assert r.status == "Completed", "Run {} should be in Completed state.".format( + r.id + ) + assert r.type == "azureml.PipelineRun", "Run {} should be pipeline run.".format( + r.id + ) + step_runs = list(r.get_children()) + print("Checking all steps status now for {}.".format(r.id)) + for s in step_runs: + assert ( + s.status == "Completed" + ), "Run {} of {} should be in Completed state.".format(s.id, r.id) + if get_hts_run_type(r) == "azureml.HTSInferencing": + print("Checking inferencing run.") + n_hts_inferencing += 1 + assert ( + len(step_runs) == 3 + ), "Inferencing run {} should have 3 steps.".format(r.id) + elif get_hts_run_type(r) == "azureml.HTSTraining": + print("Checking training run.") + n_hts_training += 1 + check_training_run(step_runs, r) + + +def getNotebookRuns(runs, file_name, folder): + full_name = os.path.join(folder, file_name) + notebook_runs = [] + + with open(full_name, "r") as notebook_file: + notebook_output = notebook_file.read() + + for run in runs: + if run.id in notebook_output: + notebook_runs.append(run) + + return notebook_runs + + +check_hts_experiment(inputArgs.experiment_name, inputArgs.file_name, inputArgs.folder) diff --git a/.github/test/scripts/check_v2_experiment_result.py b/.github/test/scripts/check_v2_experiment_result.py new file mode 100644 index 0000000000..9f5c490040 --- /dev/null +++ b/.github/test/scripts/check_v2_experiment_result.py @@ -0,0 +1,161 @@ +# This is used to check the results for an experiment +# The parameters are: +# --experiment_name The name of the experiment to check +# --file_name The name of the notebook output file +# --folder The notebook folder +# --metric_name The name of the metric to check +# --expected_num_iteration The expected number of iterations. +# --minimum_median_score The minimum expected median score. +# --absolute_minimum_score The absolute minimum expected score. +# --maximum_median_score The maximum expected median score. +# --absolute_maximum_score The absolute maximum expected score. +# --expected_run_count The expected number of runs. +# --vision_train_run Indicates that this is a vission run. +# --check_explanation_best_run Check the explanation of the best run. +# --is_local_run Indicates that this is a local run. + +import argparse +import mlflow +import os +from mlflow.entities import ViewType +from mlflow.tracking.client import MlflowClient +from azure.identity import AzureCliCredential +from azure.ai.ml import automl, Input, MLClient, command + +parser = argparse.ArgumentParser() +parser.add_argument("--experiment_name") +parser.add_argument("--file_name") +parser.add_argument("--folder") +parser.add_argument("--metric_name") +parser.add_argument("--expected_num_iteration", type=int) +parser.add_argument("--minimum_median_score", type=float) +parser.add_argument("--absolute_minimum_score", type=float) +parser.add_argument("--maximum_median_score", type=float) +parser.add_argument("--absolute_maximum_score", type=float) +parser.add_argument("--expected_run_count", type=int) +parser.add_argument("--vision_train_run", type=bool) +parser.add_argument("--check_explanation_best_run", type=bool) +parser.add_argument("--is_local_run", type=bool) + +inputArgs = parser.parse_args() + + +def checkExperimentResult( + experiment_name, + file_name, + folder, + metric_name=None, + expected_num_iteration=None, + minimum_median_score=None, + absolute_minimum_score=0.0, + maximum_median_score=1.0, + absolute_maximum_score=1.0, + expected_run_count=1, + vision_train_run=False, +): + credential = AzureCliCredential() + ml_client = MLClient.from_config(credential) + + MLFLOW_TRACKING_URI = ml_client.workspaces.get( + name=ml_client.workspace_name + ).mlflow_tracking_uri + + mlflow.set_tracking_uri(MLFLOW_TRACKING_URI) + + mlflow_client = MlflowClient() + + experiment = mlflow_client.get_experiment_by_name(experiment_name) + + print("Experimentid = " + experiment.experiment_id) + + runs = mlflow_client.search_runs( + experiment_ids=experiment.experiment_id, + filter_string="", + run_view_type=ViewType.ALL, + order_by=["run.info.start_time DESC"], + ) + + print("Total runs: " + str(len(runs))) + + root_run_ids = getNotebookRuns(runs, file_name, folder) + + print("root_run_ids: " + str(root_run_ids)) + + if vision_train_run: + # Only check the most recent runs + error_msg = ( + "Not enough runs found in " + ws.name + " for experiment " + experiment_name + ) + assert len(root_run_ids) >= expected_run_count, error_msg + runs = runs[:expected_run_count] + print("Run count: {}".format(len(root_run_ids))) + assert len(root_run_ids) == expected_run_count + + for root_run_id in root_run_ids: + print("Validating run: " + root_run_id) + children = getChildRuns(runs, root_run_id) + + if not vision_train_run: + badScoreCount = 0 + goodScoreCount = 0 + # run_metrics = ml_run.get_metrics(recursive=True) + + for iteration in children: + iteration_status = iteration.info.status + print(iteration.info.run_id + ": " + iteration_status) + assert iteration_status in ["FINISHED", "CANCELED", "KILLED"] + if iteration_status == "FINISHED": + metrics = iteration.data.metrics + print(metric_name + " = " + str(metrics[metric_name])) + assert metrics[metric_name] >= absolute_minimum_score + assert metrics[metric_name] <= absolute_maximum_score + if ( + metrics[metric_name] < minimum_median_score + or metrics[metric_name] > maximum_median_score + ): + badScoreCount += 1 + else: + goodScoreCount += 1 + assert badScoreCount < goodScoreCount + print("check_experiment_result complete") + + +def getNotebookRuns(runs, file_name, folder): + root_run_ids = set( + run.data.tags["mlflow.rootRunId"] + for run in runs + if run.data.tags["mlflow.rootRunId"] + "_setup" == run.info.run_id + ) + full_name = os.path.join(folder, file_name) + notebook_run_ids = [] + + with open(full_name, "r") as notebook_file: + notebook_output = notebook_file.read() + + return [runid for runid in root_run_ids if runid in notebook_output] + + +def getChildRuns(runs, root_run_id): + return [ + run + for run in runs + if run.data.tags["mlflow.rootRunId"] == root_run_id + and run.info.run_id.replace( + run.data.tags["mlflow.rootRunId"] + "_", "" + ).isdigit() + ] + + +checkExperimentResult( + inputArgs.experiment_name, + inputArgs.file_name, + inputArgs.folder, + inputArgs.metric_name, + inputArgs.expected_num_iteration or 1000, + inputArgs.minimum_median_score, + inputArgs.absolute_minimum_score or 0.0, + inputArgs.maximum_median_score or 1.0, + inputArgs.absolute_maximum_score or 1.0, + inputArgs.expected_run_count or 1, + inputArgs.vision_train_run, +) diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-automl-classification-task-bankmarketing.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-automl-classification-task-bankmarketing.yml index ed2f54e1ce..643bd45e8a 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-automl-classification-task-bankmarketing.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-classification-task-bankmarketing-automl-classification-task-bankmarketing.yml @@ -71,14 +71,14 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-classification-task-bankmarketing - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-classification-task-bankmarketing.output.ipynb \ --folder . \ --check warning \ working-directory: sdk/python/jobs/automl-standalone-jobs/automl-classification-task-bankmarketing - name: check v2 experiment result run: | - python ${{ github.workspace }}/v1/scripts/validation/check_v2_experiment_result.py \ + python ${{ github.workspace }}/.github/test/scripts/check_v2_experiment_result.py \ --file_name automl-classification-task-bankmarketing.output.ipynb \ --folder . \ --experiment_name dpv2-classifier-experiment \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-github-dau-auto-ml-forecasting-github-dau.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-github-dau-auto-ml-forecasting-github-dau.yml index 28645a59fb..6a68dcdbc6 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-github-dau-auto-ml-forecasting-github-dau.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-github-dau-auto-ml-forecasting-github-dau.yml @@ -73,7 +73,7 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name auto-ml-forecasting-github-dau.output.ipynb \ --folder . \ --check warning \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-orange-juice-sales-automl-forecasting-orange-juice-sales-mlflow.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-orange-juice-sales-automl-forecasting-orange-juice-sales-mlflow.yml index 33fa906c7f..11efcafa06 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-orange-juice-sales-automl-forecasting-orange-juice-sales-mlflow.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-orange-juice-sales-automl-forecasting-orange-juice-sales-mlflow.yml @@ -73,14 +73,14 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-forecasting-orange-juice-sales-mlflow.output.ipynb \ --folder . \ --check warning \ working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales - name: check v2 experiment result run: | - python ${{ github.workspace }}/v1/scripts/validation/check_v2_experiment_result.py \ + python ${{ github.workspace }}/.github/test/scripts/check_v2_experiment_result.py \ --file_name automl-forecasting-orange-juice-sales-mlflow.output.ipynb \ --folder . \ --experiment_name dpv2-forecasting-experiment \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-bike-share-auto-ml-forecasting-bike-share.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-bike-share-auto-ml-forecasting-bike-share.yml index 059e75f8ce..04acdeb0d1 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-bike-share-auto-ml-forecasting-bike-share.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-bike-share-auto-ml-forecasting-bike-share.yml @@ -73,14 +73,14 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-bike-share - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name auto-ml-forecasting-bike-share.output.ipynb \ --folder . \ --check warning \ working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-bike-share - name: check v2 experiment result run: | - python ${{ github.workspace }}/v1/scripts/validation/check_v2_experiment_result.py \ + python ${{ github.workspace }}/.github/test/scripts/check_v2_experiment_result.py \ --file_name auto-ml-forecasting-bike-share.output.ipynb \ --folder . \ --experiment_name dpv2-bike-test \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-energy-demand-automl-forecasting-task-energy-demand-advanced.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-energy-demand-automl-forecasting-task-energy-demand-advanced.yml index a4b2d2785d..bd198f724f 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-energy-demand-automl-forecasting-task-energy-demand-advanced.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-forecasting-task-energy-demand-automl-forecasting-task-energy-demand-advanced.yml @@ -71,14 +71,14 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-energy-demand - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-forecasting-task-energy-demand-advanced.output.ipynb \ --folder . \ --check warning stderr \ working-directory: sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-energy-demand - name: check v2 experiment result run: | - python ${{ github.workspace }}/v1/scripts/validation/check_v2_experiment_result.py \ + python ${{ github.workspace }}/.github/test/scripts/check_v2_experiment_result.py \ --file_name automl-forecasting-task-energy-demand-advanced.output.ipynb \ --folder . \ --experiment_name dpv2-forecasting-experiment \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multiclass-task-sentiment-analysis-automl-nlp-multiclass-sentiment.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multiclass-task-sentiment-analysis-automl-nlp-multiclass-sentiment.yml index 58d9b68a8b..5d92eb55cd 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multiclass-task-sentiment-analysis-automl-nlp-multiclass-sentiment.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multiclass-task-sentiment-analysis-automl-nlp-multiclass-sentiment.yml @@ -71,7 +71,7 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-classification-multiclass-task-sentiment-analysis - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-nlp-multiclass-sentiment.output.ipynb \ --folder . \ --check warning stderr \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multilabel-task-paper-categorization-automl-nlp-multilabel-paper-cat.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multilabel-task-paper-categorization-automl-nlp-multilabel-paper-cat.yml index 5567cfe66e..3892a00183 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multilabel-task-paper-categorization-automl-nlp-multilabel-paper-cat.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-classification-multilabel-task-paper-categorization-automl-nlp-multilabel-paper-cat.yml @@ -71,7 +71,7 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-classification-multilabel-task-paper-categorization - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-nlp-multilabel-paper-cat.output.ipynb \ --folder . \ --check warning stderr \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-automl-nlp-text-ner-task.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-automl-nlp-text-ner-task.yml index afdd5ca1a6..210c28068d 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-automl-nlp-text-ner-task.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-automl-nlp-text-ner-task.yml @@ -71,7 +71,7 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-named-entity-recognition-task - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-nlp-text-ner-task.output.ipynb \ --folder . \ --check warning stderr \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-distributed-sweeping-automl-nlp-text-ner-task-distributed-with-sweeping.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-distributed-sweeping-automl-nlp-text-ner-task-distributed-with-sweeping.yml index f6d6b52d1d..833e36acc4 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-distributed-sweeping-automl-nlp-text-ner-task-distributed-with-sweeping.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-nlp-text-named-entity-recognition-task-distributed-sweeping-automl-nlp-text-ner-task-distributed-with-sweeping.yml @@ -71,7 +71,7 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-nlp-text-named-entity-recognition-task-distributed-sweeping - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-nlp-text-ner-task-distributed-with-sweeping.output.ipynb \ --folder . \ --check warning stderr \ diff --git a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-regression-task-hardware-performance-automl-regression-task-hardware-performance.yml b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-regression-task-hardware-performance-automl-regression-task-hardware-performance.yml index 651975a7fa..581e02c206 100644 --- a/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-regression-task-hardware-performance-automl-regression-task-hardware-performance.yml +++ b/.github/workflows/sdk-jobs-automl-standalone-jobs-automl-regression-task-hardware-performance-automl-regression-task-hardware-performance.yml @@ -71,14 +71,14 @@ jobs: working-directory: sdk/python/jobs/automl-standalone-jobs/automl-regression-task-hardware-performance - name: check notebook output run: | - python ${{ github.workspace }}/v1/scripts/validation/check_notebook_output.py \ + python ${{ github.workspace }}/.github/test/scripts/check_notebook_output.py \ --file_name automl-regression-task-hardware-performance.output.ipynb \ --folder . \ --check warning stderr \ working-directory: sdk/python/jobs/automl-standalone-jobs/automl-regression-task-hardware-performance - name: check v2 experiment result run: | - python ${{ github.workspace }}/v1/scripts/validation/check_v2_experiment_result.py \ + python ${{ github.workspace }}/.github/test/scripts/check_v2_experiment_result.py \ --file_name automl-regression-task-hardware-performance.output.ipynb \ --folder . \ --experiment_name dpv2-regression-experiment \ diff --git a/sdk/python/readme.py b/sdk/python/readme.py index b98b7f4b41..70f0e7cac1 100644 --- a/sdk/python/readme.py +++ b/sdk/python/readme.py @@ -167,7 +167,7 @@ def get_validation_check_yml(notebook_folder, notebook_name, validation): check_yml = f""" - name: {validation_name} run: | - python {github_workspace}/v1/scripts/validation/{validation_file_name}.py \\ + python {github_workspace}/.github/test/scripts/{validation_file_name}.py \\ --file_name {notebook_output_file} \\ --folder . \\"""