rename variables and functions for readability

nasa · Oct 3, 2023 · 386f99d · 386f99d
1 parent 8b30c08
commit 386f99d
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 11 deletions.
diff --git a/batcher/harmony_adapter.py b/batcher/harmony_adapter.py
@@ -2,7 +2,7 @@
 from pystac import Catalog, Item
 from pystac.item import Asset
 
-from batcher.tempo_filename_parser import get_unique_day_scan_categories
+from batcher.tempo_filename_parser import get_batch_indices
 
 VALID_EXTENSIONS = (".nc4", ".nc")
 VALID_MEDIA_TYPES = ["application/x-netcdf", "application/x-netcdf4"]
@@ -86,16 +86,19 @@ def invoke(self):
     def process_items_many_to_one(self):
         """Converts a list of STAC catalogs into a list of lists of STAC catalogs."""
         try:
+            # --- Get granule filepaths (urls) ---
             items: list[Catalog] = list(self.get_all_catalog_items(self.catalog))
             netcdf_urls: list[str] = _get_netcdf_urls(items)
 
-            batch_indices: list[int] = get_unique_day_scan_categories(netcdf_urls)
-            unique_category_indices: list[int] = sorted(set(batch_indices), key=batch_indices.index)
+            # --- Map each granule to an index representing the batch to which it belongs ---
+            batch_indices: list[int] = get_batch_indices(netcdf_urls)
+            unique_batch_indices: list[int] = sorted(set(batch_indices), key=batch_indices.index)
 
+            # --- Construct a STAC object based on the batch indices ---
             grouped: dict[int, list[Catalog]] = {}
             for k, v in zip(batch_indices, items):
                 grouped.setdefault(k, []).append(v)
-            catalogs: list[list[Catalog]] = [grouped[k] for k in unique_category_indices]
+            catalogs: list[list[Catalog]] = [grouped[k] for k in unique_batch_indices]
 
             return catalogs
 

diff --git a/batcher/tempo_filename_parser.py b/batcher/tempo_filename_parser.py
@@ -14,12 +14,12 @@
 )
 
 
-def get_unique_day_scan_categories(filenames: list) -> list[int]:
+def get_batch_indices(filenames: list) -> list[int]:
     """
     Returns
     -------
     list[int]
-        category integer for each filename in the original list, e.g. [0, 0, 0, 1, 1, 1, ...]
+        batch index for each filename in the original list, e.g. [0, 0, 0, 1, 1, 1, ...]
     """
     # Make a new list with days and scans, e.g. [('20130701', 'S009'), ('20130701', 'S009'), ...]
     day_and_scans: list[tuple[str, str]] = []
@@ -29,13 +29,13 @@ def get_unique_day_scan_categories(filenames: list) -> list[int]:
             match_dict = matches.groupdict()
             day_and_scans.append((match_dict["day_in_granule"], match_dict["daily_scan_id"]))
 
-    # Unique categories are determined, while keeping the same order
+    # Unique day-scans are determined (while keeping the same order). Each will be its own batch.
     unique_day_scans: list[tuple[str, str]] = sorted(set(day_and_scans), key=day_and_scans.index)
 
     # Map each day/scan to an integer
-    category_mapper: dict[tuple[str, str], int] = {
+    batch_mapper: dict[tuple[str, str], int] = {
         day_scan: idx for idx, day_scan in enumerate(unique_day_scans)
     }
 
     # Generate a new list with the integer representation for each entry in the original list
-    return [category_mapper[day_scan] for day_scan in day_and_scans]
+    return [batch_mapper[day_scan] for day_scan in day_and_scans]
diff --git a/tests/test_filename_grouping.py b/tests/test_filename_grouping.py
@@ -1,4 +1,4 @@
-from batcher.tempo_filename_parser import get_unique_day_scan_categories
+from batcher.tempo_filename_parser import get_batch_indices
 
 example_filenames = [
     "TEMPO_HCHO_L2_V01_20130701T212354Z_S009G05.nc",
@@ -11,6 +11,6 @@
 
 
 def test_grouping():
-    results = get_unique_day_scan_categories(example_filenames)
+    results = get_batch_indices(example_filenames)
 
     assert results == [0, 0, 0, 1, 1, 1]