From 644c9e5d58082f442e3e8230fcf6ae1d6e8ee5b8 Mon Sep 17 00:00:00 2001
From: Wenqi Li <831580+wyli@users.noreply.github.com>
Date: Sat, 22 Jul 2023 16:35:27 +0100
Subject: [PATCH] enhances auto3dseg data analyzer info (#6758)

### Description
- output file name extension to be consistent with `self.fmt`
- improve logging messages when writing files
- export file format may be spelt as 'yml' instead of 'yaml'

### Types of changes
<!--- Put an `x` in all the boxes that apply, and remove the not
applicable items -->
- [x] Non-breaking change (fix or new feature that would not break
existing functionality).
- [ ] Breaking change (fix or new feature that would cause existing
functionality to change).
- [ ] New tests added to cover the changes.
- [ ] Integration tests passed locally by running `./runtests.sh -f -u
--net --coverage`.
- [ ] Quick tests passed locally by running `./runtests.sh --quick
--unittests --disttests`.
- [ ] In-line docstrings updated.
- [ ] Documentation updated, tested `make html` command in the `docs/`
folder.

Signed-off-by: Wenqi Li <wenqil@nvidia.com>
---
 monai/apps/auto3dseg/data_analyzer.py | 14 ++++++++------
 monai/bundle/config_parser.py         |  4 ++--
 tests/test_auto3dseg.py               |  2 +-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/monai/apps/auto3dseg/data_analyzer.py b/monai/apps/auto3dseg/data_analyzer.py
index ded6390601..350bb61a34 100644
--- a/monai/apps/auto3dseg/data_analyzer.py
+++ b/monai/apps/auto3dseg/data_analyzer.py
@@ -70,7 +70,7 @@ class DataAnalyzer:
             the DataAnalyzer will skip looking for labels and all label-related operations.
         hist_bins: bins to compute histogram for each image channel.
         hist_range: ranges to compute histogram for each image channel.
-        fmt: format used to save the analysis results. Defaults to "yaml".
+        fmt: format used to save the analysis results. Currently support ``"json"`` and ``"yaml"``, defaults to "yaml".
         histogram_only: whether to only compute histograms. Defaults to False.
         extra_params: other optional arguments. Currently supported arguments are :
             'allowed_shape_difference' (default 5) can be used to change the default tolerance of
@@ -164,6 +164,7 @@ def _check_data_uniformity(keys: list[str], result: dict) -> bool:
         constant_props = [result[DataStatsKeys.SUMMARY][DataStatsKeys.IMAGE_STATS][key] for key in keys]
         for prop in constant_props:
             if "stdev" in prop and np.any(prop["stdev"]):
+                logger.debug(f"summary image_stats {prop} has non-zero stdev {prop['stdev']}.")
                 return False
 
         return True
@@ -242,15 +243,16 @@ def get_all_case_stats(self, key="training", transform_list=None):
         if not self._check_data_uniformity([ImageStatsKeys.SPACING], result):
             logger.info("Data spacing is not completely uniform. MONAI transforms may provide unexpected result")
         if self.output_path:
+            logger.info(f"Writing data stats to {self.output_path}.")
             ConfigParser.export_config_file(
                 result, self.output_path, fmt=self.fmt, default_flow_style=None, sort_keys=False
             )
+            by_case_path = self.output_path.replace(f".{self.fmt}", f"_by_case.{self.fmt}")
+            if by_case_path == self.output_path:  # self.output_path not ended with self.fmt?
+                by_case_path += f".by_case.{self.fmt}"
+            logger.info(f"Writing by-case data stats to {by_case_path}, this may take a while.")
             ConfigParser.export_config_file(
-                result_bycase,
-                self.output_path.replace(".yaml", "_by_case.yaml"),
-                fmt=self.fmt,
-                default_flow_style=None,
-                sort_keys=False,
+                result_bycase, by_case_path, fmt=self.fmt, default_flow_style=None, sort_keys=False
             )
         # release memory
         if self.device.type == "cuda":
diff --git a/monai/bundle/config_parser.py b/monai/bundle/config_parser.py
index d03ca8e43b..e2553a5ffd 100644
--- a/monai/bundle/config_parser.py
+++ b/monai/bundle/config_parser.py
@@ -438,12 +438,12 @@ def export_config_file(cls, config: dict, filepath: PathLike, fmt: str = "json",
 
         """
         _filepath: str = str(Path(filepath))
-        writer = look_up_option(fmt.lower(), {"json", "yaml"})
+        writer = look_up_option(fmt.lower(), {"json", "yaml", "yml"})
         with open(_filepath, "w") as f:
             if writer == "json":
                 json.dump(config, f, **kwargs)
                 return
-            if writer == "yaml":
+            if writer == "yaml" or writer == "yml":
                 return yaml.safe_dump(config, f, **kwargs)
             raise ValueError(f"only support JSON or YAML config file so far, got {writer}.")
 
diff --git a/tests/test_auto3dseg.py b/tests/test_auto3dseg.py
index 53f25051ec..272fb52f1a 100644
--- a/tests/test_auto3dseg.py
+++ b/tests/test_auto3dseg.py
@@ -170,7 +170,7 @@ def setUp(self):
         work_dir = self.test_dir.name
         self.dataroot_dir = os.path.join(work_dir, "sim_dataroot")
         self.datalist_file = os.path.join(work_dir, "sim_datalist.json")
-        self.datastat_file = os.path.join(work_dir, "datastats.yaml")
+        self.datastat_file = os.path.join(work_dir, "datastats.yml")
         ConfigParser.export_config_file(sim_datalist, self.datalist_file)
 
     @parameterized.expand(SIM_CPU_TEST_CASES)