LSSTDESC · jchiang87 · Apr 24, 2024 · Jan 23, 2024 · JoanneBogart · Apr 4, 2024
diff --git a/skycatalogs/catalog_creator.py b/skycatalogs/catalog_creator.py
@@ -259,7 +259,8 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None,
         _star_db = '/global/cfs/cdirs/lsst/groups/SSim/DC2/dc2_stellar_healpixel.db'
         _sn_db = '/global/cfs/cdirs/lsst/groups/SSim/DC2/cosmoDC2_v1.1.4/sne_cosmoDC2_v1.1.4_MS_DDF_healpix.db'
 
-        _star_parquet = '/global/cfs/cdirs/descssim/postDC2/UW_star_catalog'
+#        _star_parquet = '/global/cfs/cdirs/descssim/postDC2/UW_star_catalog'
+        _star_parquet = '/sdf/data/rubin/shared/ops-rehearsal-3/imSim_catalogs/UW_stars'
 
         self._galaxy_stride = galaxy_stride
         if pkg_root:

diff --git a/skycatalogs/utils/star_parquet_input.py b/skycatalogs/utils/star_parquet_input.py
@@ -1,33 +1,66 @@
-# import pyarrow as pa
 import os
+import glob
 import pyarrow.parquet as pq
 import pandas as pd
 import numpy as np
 import numpy.ma as ma
 import healpy
+from lsst.meas.algorithms.htmIndexer import HtmIndexer
 
 
-def _find_star_files(dirpath, pixel):
-    '''
-    Parameters
-    ----------
-    dirpath    string       Path to directory containing input parquet files
-                            HTM partitioning
-    pixel      int          Healpix pixel
+class UWStarFiles:
+    _files = {}  # shared state
 
-    Returns
-    -------
-    List of parquet file paths for files which might have sources belonging
-    to specified pixel
-    '''
-    # For now, hard-code to 3 files which have everything needed
-    # for joint simulation region
+    def __init__(self, input_dir):
+        self._index_files(input_dir)
+        self.htm_indexer = HtmIndexer(depth=20)
+
+    def _index_files(self, input_dir):
+        if self._files:
+            # The files have already been indexed.
+            return
+        files = sorted(glob.glob(os.path.join(input_dir,
+                                              'stars_chunk_*.parquet')))
+        for item in files:
+            tokens = os.path.basename(item).split('_')
+            imin = int(tokens[2])
+            imax = int(tokens[3].split('.')[0])
+            self._files[(imin, imax)] = item
+
+    def find_files(self, pixel, nside=32, res_factor=16):
+        """
+        Find the UW input files that cover a given healpixel by sampling the
+        interior of the healpixel and computing the htm indexes at those
+        locations.
 
-    fnames = ['stars_chunk_8800000000000_8900000000000.parquet',
-              'stars_chunk_9200000000000_9300000000000.parquet',
-              'stars_chunk_9700000000000_9800000000000.parquet']
+        Parameters
+        ----------
+        pixel : int
+            Healpixel id.
+        nside : int
+            Resolution parameter of the healpixel map.
+        res_factor : int
+            Sampling factor of the healpix to ensure coverage of the healpixel
+            by the UW files.
 
-    return [os.path.join(dirpath, fname) for fname in fnames]
+        Returns
+        -------
+        list : List of filenames.
+        """
+        corners = np.transpose(healpy.boundaries(nside, pixel))
+        subpixels = healpy.query_polygon(nside*res_factor, corners)
+        ra, dec = [], []
+        for subpix in subpixels:
+            coord = healpy.pix2ang(nside*res_factor, subpix, lonlat=True)
+            ra.append(coord[0])
+            dec.append(coord[1])
+        files = set()
+        indices = set(self.htm_indexer.indexPoints(ra, dec))
+        for index in indices:
+            for (imin, imax), item in self._files.items():
+                if imin <= index <= imax:
+                    files.add(item)
+        return files
 
 
 def _calculate_pixel_mask(ra, dec, pixel, nside=32):
@@ -49,7 +82,7 @@ def _calculate_pixel_mask(ra, dec, pixel, nside=32):
     return mask
 
 
-def _star_parquet_reader(dirpath, pixel, output_arrow_schema):
+def _star_parquet_reader(dirpath, pixel, output_arrow_schema, nside=32):
     # Get requisite info from parquet files for sources in pixel.
     # Next do renames and calculation for magnorm
     to_read = ['simobjid', 'ra', 'decl', 'mura', 'mudecl', 'vrad',
@@ -59,7 +92,8 @@ def _star_parquet_reader(dirpath, pixel, output_arrow_schema):
     out_fields = ['id', 'ra', 'dec', 'mura', 'mudec', 'radial_velocity',
                   'parallax', 'sed_filepath', 'magnorm', 'ebv']
 
-    paths = _find_star_files(dirpath, pixel)
+    uw_files = UWStarFiles(dirpath)
+    paths = uw_files.find_files(pixel, nside)
     out_dict = {}
     for k in out_fields:
         out_dict[k] = []