diff --git a/skycatalogs/catalog_creator.py b/skycatalogs/catalog_creator.py index 88f2846e..c646b7da 100644 --- a/skycatalogs/catalog_creator.py +++ b/skycatalogs/catalog_creator.py @@ -259,7 +259,8 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, _star_db = '/global/cfs/cdirs/lsst/groups/SSim/DC2/dc2_stellar_healpixel.db' _sn_db = '/global/cfs/cdirs/lsst/groups/SSim/DC2/cosmoDC2_v1.1.4/sne_cosmoDC2_v1.1.4_MS_DDF_healpix.db' - _star_parquet = '/global/cfs/cdirs/descssim/postDC2/UW_star_catalog' +# _star_parquet = '/global/cfs/cdirs/descssim/postDC2/UW_star_catalog' + _star_parquet = '/sdf/data/rubin/shared/ops-rehearsal-3/imSim_catalogs/UW_stars' self._galaxy_stride = galaxy_stride if pkg_root: diff --git a/skycatalogs/utils/star_parquet_input.py b/skycatalogs/utils/star_parquet_input.py index ad932a48..7413eb82 100644 --- a/skycatalogs/utils/star_parquet_input.py +++ b/skycatalogs/utils/star_parquet_input.py @@ -1,33 +1,47 @@ -# import pyarrow as pa import os +import glob import pyarrow.parquet as pq import pandas as pd import numpy as np import numpy.ma as ma import healpy +from lsst.meas.algorithms.htmIndexer import HtmIndexer -def _find_star_files(dirpath, pixel): - ''' - Parameters - ---------- - dirpath string Path to directory containing input parquet files - HTM partitioning - pixel int Healpix pixel +class UWStarFiles: + _files = {} # shared state - Returns - ------- - List of parquet file paths for files which might have sources belonging - to specified pixel - ''' - # For now, hard-code to 3 files which have everything needed - # for joint simulation region + def __init__(self, input_dir): + self._index_files(input_dir) + self.htm_indexer = HtmIndexer(depth=20) - fnames = ['stars_chunk_8800000000000_8900000000000.parquet', - 'stars_chunk_9200000000000_9300000000000.parquet', - 'stars_chunk_9700000000000_9800000000000.parquet'] + def _index_files(self, input_dir): + if self._files: + # The files have already been indexed. + return + files = sorted(glob.glob(os.path.join(input_dir, + 'stars_chunk_*.parquet'))) + for item in files: + tokens = os.path.basename(item).split('_') + imin = int(tokens[2]) + imax = int(tokens[3].split('.')[0]) + self._files[(imin, imax)] = item - return [os.path.join(dirpath, fname) for fname in fnames] + def find_files(self, pixel, nside=32, res_factor=16): + corners = np.transpose(healpy.boundaries(nside, pixel)) + subpixels = healpy.query_polygon(nside*res_factor, corners) + ra, dec = [], [] + for subpix in subpixels: + coord = healpy.pix2ang(nside*res_factor, subpix, lonlat=True) + ra.append(coord[0]) + dec.append(coord[1]) + files = set() + indices = set(self.htm_indexer.indexPoints(ra, dec)) + for index in indices: + for (imin, imax), item in self._files.items(): + if imin <= index <= imax: + files.add(item) + return files def _calculate_pixel_mask(ra, dec, pixel, nside=32): @@ -49,7 +63,7 @@ def _calculate_pixel_mask(ra, dec, pixel, nside=32): return mask -def _star_parquet_reader(dirpath, pixel, output_arrow_schema): +def _star_parquet_reader(dirpath, pixel, output_arrow_schema, nside=32): # Get requisite info from parquet files for sources in pixel. # Next do renames and calculation for magnorm to_read = ['simobjid', 'ra', 'decl', 'mura', 'mudecl', 'vrad', @@ -59,7 +73,8 @@ def _star_parquet_reader(dirpath, pixel, output_arrow_schema): out_fields = ['id', 'ra', 'dec', 'mura', 'mudec', 'radial_velocity', 'parallax', 'sed_filepath', 'magnorm', 'ebv'] - paths = _find_star_files(dirpath, pixel) + uw_files = UWStarFiles(dirpath) + paths = uw_files.find_files(pixel, nside) out_dict = {} for k in out_fields: out_dict[k] = []