Skip to content

Commit

Permalink
rename variables and functions for readability
Browse files Browse the repository at this point in the history
  • Loading branch information
danielfromearth committed Oct 3, 2023
1 parent 8b30c08 commit 386f99d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
11 changes: 7 additions & 4 deletions batcher/harmony_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pystac import Catalog, Item
from pystac.item import Asset

from batcher.tempo_filename_parser import get_unique_day_scan_categories
from batcher.tempo_filename_parser import get_batch_indices

VALID_EXTENSIONS = (".nc4", ".nc")
VALID_MEDIA_TYPES = ["application/x-netcdf", "application/x-netcdf4"]
Expand Down Expand Up @@ -86,16 +86,19 @@ def invoke(self):
def process_items_many_to_one(self):
"""Converts a list of STAC catalogs into a list of lists of STAC catalogs."""
try:
# --- Get granule filepaths (urls) ---
items: list[Catalog] = list(self.get_all_catalog_items(self.catalog))
netcdf_urls: list[str] = _get_netcdf_urls(items)

batch_indices: list[int] = get_unique_day_scan_categories(netcdf_urls)
unique_category_indices: list[int] = sorted(set(batch_indices), key=batch_indices.index)
# --- Map each granule to an index representing the batch to which it belongs ---
batch_indices: list[int] = get_batch_indices(netcdf_urls)
unique_batch_indices: list[int] = sorted(set(batch_indices), key=batch_indices.index)

# --- Construct a STAC object based on the batch indices ---
grouped: dict[int, list[Catalog]] = {}
for k, v in zip(batch_indices, items):
grouped.setdefault(k, []).append(v)
catalogs: list[list[Catalog]] = [grouped[k] for k in unique_category_indices]
catalogs: list[list[Catalog]] = [grouped[k] for k in unique_batch_indices]

return catalogs

Expand Down
10 changes: 5 additions & 5 deletions batcher/tempo_filename_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
)


def get_unique_day_scan_categories(filenames: list) -> list[int]:
def get_batch_indices(filenames: list) -> list[int]:
"""
Returns
-------
list[int]
category integer for each filename in the original list, e.g. [0, 0, 0, 1, 1, 1, ...]
batch index for each filename in the original list, e.g. [0, 0, 0, 1, 1, 1, ...]
"""
# Make a new list with days and scans, e.g. [('20130701', 'S009'), ('20130701', 'S009'), ...]
day_and_scans: list[tuple[str, str]] = []
Expand All @@ -29,13 +29,13 @@ def get_unique_day_scan_categories(filenames: list) -> list[int]:
match_dict = matches.groupdict()
day_and_scans.append((match_dict["day_in_granule"], match_dict["daily_scan_id"]))

# Unique categories are determined, while keeping the same order
# Unique day-scans are determined (while keeping the same order). Each will be its own batch.
unique_day_scans: list[tuple[str, str]] = sorted(set(day_and_scans), key=day_and_scans.index)

# Map each day/scan to an integer
category_mapper: dict[tuple[str, str], int] = {
batch_mapper: dict[tuple[str, str], int] = {
day_scan: idx for idx, day_scan in enumerate(unique_day_scans)
}

# Generate a new list with the integer representation for each entry in the original list
return [category_mapper[day_scan] for day_scan in day_and_scans]
return [batch_mapper[day_scan] for day_scan in day_and_scans]
4 changes: 2 additions & 2 deletions tests/test_filename_grouping.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from batcher.tempo_filename_parser import get_unique_day_scan_categories
from batcher.tempo_filename_parser import get_batch_indices

example_filenames = [
"TEMPO_HCHO_L2_V01_20130701T212354Z_S009G05.nc",
Expand All @@ -11,6 +11,6 @@


def test_grouping():
results = get_unique_day_scan_categories(example_filenames)
results = get_batch_indices(example_filenames)

assert results == [0, 0, 0, 1, 1, 1]

0 comments on commit 386f99d

Please sign in to comment.