diff --git a/skycatalogs/data/ci_sample/gaia_skyCatalog.yaml b/skycatalogs/data/ci_sample/gaia_skyCatalog.yaml new file mode 100644 index 00000000..08ae8931 --- /dev/null +++ b/skycatalogs/data/ci_sample/gaia_skyCatalog.yaml @@ -0,0 +1,27 @@ +catalog_dir: ci_sample +catalog_name: gaia_skyCatalog +object_types: + gaia_star: + area_partition: + level: 7 + type: htm + #butler_parameters: + # collections: HSC/defaults + # dstype: gaia_dr2_20200414 + #data_file_type: butler_refcat + id_prefix: gaia_dr2_ + data_file_type: fits + data_dir: ci_sample/repo/refcats/gaia_dr2_20200414 + basename_template: gaia_dr2_20200414_(?P\d+)_refcats.fits + sed_method: use_lut +provenance: + inputs: + skyCatalogs_repo: + git_branch: gaia_bypass + git_hash: 3a87b5e825dbc8f692f9713b0b761a9ad6896364 + git_status: + - UNCOMMITTED_FILES + versioning: + code_version: 1.7.0-rc3 + schema_version: 1.2.0 +skycatalog_root: /sdf/home/j/jrb/rubin-user/skycatalog_root diff --git a/skycatalogs/objects/gaia_object.py b/skycatalogs/objects/gaia_object.py index 7ece5abc..2a13ecbb 100644 --- a/skycatalogs/objects/gaia_object.py +++ b/skycatalogs/objects/gaia_object.py @@ -5,14 +5,22 @@ from collections.abc import Iterable from pathlib import PurePath import numpy as np +import numpy.ma as ma +import pandas as pd import erfa import astropy.modeling from astropy import units as u import galsim -import lsst.daf.butler as daf_butler import lsst.geom +# ## Next two lines needed only for butler access +import lsst.daf.butler as daf_butler from lsst.meas.algorithms import ReferenceObjectLoader -from skycatalogs.utils.shapes import Disk, PolygonalRegion + +# ## Need these for direct access and processing of fits files +import lsst.afw.table as afwtable +from lsst.sphgeom import HtmPixelization, Circle, LonLat + +from skycatalogs.utils.shapes import Disk, PolygonalRegion, compute_region_mask from skycatalogs.objects.base_object import BaseObject, ObjectCollection @@ -67,17 +75,20 @@ def __init__(self, obj_pars, parent_collection, index): ---------- obj_pars: dict-like Dictionary of object parameters as read directly from the - reference catalog. + complete object collection parent_collection: ObjectCollection Parent collection of this object. index: int Index of this object in the parent collection """ - ra = np.degrees(obj_pars['coord_ra']) - dec = np.degrees(obj_pars['coord_dec']) + # ra = np.degrees(obj_pars['coord_ra']) + # dec = np.degrees(obj_pars['coord_dec']) + ra = obj_pars['ra_deg'] + dec = obj_pars['dec_deg'] # Form the object id from the GAIA catalog id with the string # 'gaia_dr2_' prepended. - obj_id = f"gaia_dr2_{obj_pars['id']}" + id_prefix = GaiaCollection.get_config()['id_prefix'] + obj_id = f"{id_prefix}{obj_pars['id']}" super().__init__(ra, dec, obj_id, 'gaia_star', belongs_to=parent_collection, belongs_index=index) self.use_lut = self._belongs_to._use_lut @@ -126,6 +137,97 @@ def set_use_lut(self, use_lut): self.use_lut = use_lut +def _read_fits(htm_id, gaia_config, sky_root, out_dict, logger, region=None): + ''' + Read data for columns in keys from fits file belonging to htm_id. + Append to arrays in out_dict. If region is not None, filter out + entries not in region before appending. + Note ra, dec must be in degrees for filtering, but + coord_ra and coord_dec as stored in out_dict are in radians. + + Parameters + ---------- + htm_id int + gaia_config dict gaia_star portion of skyCatalg config + sky_root string absolute skycatalog_root path. Data + is found relative to this + out_dict dict out_dict.keys() are the columns to store + region lsst.sphgeom.Region or None + ''' + f_dir = gaia_config['data_dir'] + f_name = gaia_config['basename_template'].replace('(?P\\d+)', + f'{htm_id}') + f_path = os.path.join(sky_root, f_dir, f_name) + if not os.path.isfile(f_path): + logger.info(f'No file for requested htm id {htm_id}') + return + + tbl = afwtable.SimpleCatalog.readFits(f_path) + if region is None: + for k in out_dict.keys(): + out_dict[k] += list(tbl.get(k)) + return + + # Otherwise start with ra, dec and create mask + ra_full = tbl.get('coord_ra') + dec_full = tbl.get('coord_dec') + + ra_full_deg = np.degrees(ra_full) + dec_full_deg = np.degrees(dec_full) + + if isinstance(region, Disk): + mask = compute_region_mask(region, ra_full_deg, dec_full_deg) + if all(mask): + return + + elif isinstance(region, PolygonalRegion): + # First mask off points outside a bounding circle because it's + # relatively fast + circle = region._convex_polygon.getBoundingCircle() + v = circle.getCenter() + ra_c = LonLat.longitudeOf(v).asDegrees() + dec_c = LonLat.latitudeOf(v).asDegrees() + rad_as = circle.getOpeningAngle().asDegrees() * 3600 + disk = Disk(ra_c, dec_c, rad_as) + mask = compute_region_mask(disk, ra_full_deg, dec_full_deg) + if all(mask): + return + if any(mask): + ra_compress = ma.array(ra_full, mask=mask).compressed() + dec_compress = ma.array(dec_full, mask=mask).compressed() + else: + ra_compress = ra_full + dec_compress = dec_full + poly_mask = compute_region_mask(region, np.degrees(ra_compress), + np.degrees(dec_compress)) + if all(poly_mask): + return + + # Get indices of unmasked + ixes = np.where(~mask) + + mask[ixes] |= poly_mask + + if any(mask): + ra_compress = ma.array(ra_full, mask=mask).compressed() + dec_compress = ma.array(dec_full, mask=mask).compressed() + else: + ra_compress = ra_full + dec_compress = dec_full + + out_dict['coord_ra'] += list(ra_compress) + out_dict['coord_dec'] += list(dec_compress) + + for k in out_dict.keys(): + if k in ('coord_ra', 'coord_dec'): + continue + full = tbl.get(k) + if any(mask): + out_dict[k] += list(ma.array(full, mask=mask).compressed()) + else: + out_dict[k] += list(full) + + class GaiaCollection(ObjectCollection): # Class methods _gaia_config = None @@ -149,42 +251,67 @@ def load_collection(region, skycatalog, mjd=None): Gaia stars ''' if not skycatalog: - raise ValueError(f'GaiaCollection.load_collection: skycatalog cannot be None') + raise ValueError('GaiaCollection.load_collection: skycatalog cannot be None') if isinstance(region, Disk): ra = lsst.geom.Angle(region.ra, lsst.geom.degrees) dec = lsst.geom.Angle(region.dec, lsst.geom.degrees) center = lsst.geom.SpherePoint(ra, dec) radius = lsst.geom.Angle(region.radius_as, lsst.geom.arcseconds) - refcat_region = lsst.sphgeom.Circle(center.getVector(), radius) + refcat_region = Circle(center.getVector(), radius) elif isinstance(region, PolygonalRegion): refcat_region = region._convex_polygon else: raise TypeError(f'GaiaCollection.load_collection: {region} not supported') - - source_type = 'gaia_star' - if GaiaCollection.get_config() is None: gaia_section = skycatalog.raw_config['object_types']['gaia_star'] GaiaCollection.set_config(gaia_section) + gaia_section = GaiaCollection.get_config() - butler_params = GaiaCollection.get_config()['butler_parameters'] - butler = daf_butler.Butler(butler_params['repo'], - collections=butler_params['collections']) - refs = set(butler.registry.queryDatasets(butler_params['dstype'])) - refCats = [daf_butler.DeferredDatasetHandle(butler, _, {}) - for _ in refs] - dataIds = [butler.registry.expandDataId(_.dataId) for _ in refs] - config = ReferenceObjectLoader.ConfigClass() - config.filterMap = {f'{_}': f'phot_{_}_mean' for _ in ('g', 'bp', 'rp')} - ref_obj_loader = ReferenceObjectLoader(dataIds=dataIds, - refCats=refCats, - config=config) - + source_type = 'gaia_star' sed_method = GaiaCollection.get_config().get('sed_method', 'use_lut') use_lut = (sed_method.strip().lower() == 'use_lut') - band = 'bp' - cat = ref_obj_loader.loadRegion(refcat_region, band).refCat - df = cat.asAstropy().to_pandas().sort_values('id') + if gaia_section['data_file_type'] == 'butler_refcat': + + butler_params = gaia_section['butler_parameters'] + butler = daf_butler.Butler(butler_params['repo'], + collections=butler_params['collections']) + refs = set(butler.registry.queryDatasets(butler_params['dstype'])) + refCats = [daf_butler.DeferredDatasetHandle(butler, _, {}) + for _ in refs] + dataIds = [butler.registry.expandDataId(_.dataId) for _ in refs] + config = ReferenceObjectLoader.ConfigClass() + config.filterMap = {f'{_}': f'phot_{_}_mean' for _ in ('g', 'bp', 'rp')} + ref_obj_loader = ReferenceObjectLoader(dataIds=dataIds, + refCats=refCats, + config=config) + + band = 'bp' + cat = ref_obj_loader.loadRegion(refcat_region, band).refCat + df = cat.asAstropy().to_pandas().sort_values('id') + + else: # access fits files directly + # find htms intersecting region + level = gaia_section['area_partition']['level'] + htm_pix = HtmPixelization(level) + + overlap_ranges = htm_pix.envelope(refcat_region) + interior_ranges = htm_pix.interior(refcat_region) + partial_ranges = overlap_ranges - interior_ranges + keys = ['id', 'coord_ra', 'coord_dec', 'parallax', 'pm_ra', + 'pm_dec', 'epoch'] + keys += [f'phot_{_}_mean_flux' for _ in ('g', 'bp', 'rp')] + out_dict = {k: [] for k in keys} + + config = GaiaCollection.get_config() + for i_r in interior_ranges: + for i_htm in i_r: + _read_fits(i_htm, config, skycatalog._sky_root, out_dict, + skycatalog._logger, region=None) + for p_r in partial_ranges: + for i_htm in p_r: + _read_fits(i_htm, config, skycatalog._sky_root, + out_dict, skycatalog._logger, region=region) + df = pd.DataFrame(out_dict).sort_values('id') if mjd is None: raise RuntimeError("MJD needs to be provided for Gaia " @@ -212,6 +339,11 @@ def load_collection(region, skycatalog, mjd=None): df['coord_ra'] = pm_outputs[0] df['coord_dec'] = pm_outputs[1] + # and also store degrees version + # is there any reason to keep coord_ra & coord_dec? + df['ra_deg'] = np.degrees(pm_outputs[0]) + df['dec_deg'] = np.degrees(pm_outputs[1]) + return GaiaCollection(df, skycatalog, source_type, use_lut, mjd) def __init__(self, df, sky_catalog, source_type, use_lut, mjd): @@ -241,7 +373,11 @@ def mjd(self): def __getitem__(self, key): if isinstance(key, int) or isinstance(key, np.int64): - return GaiaObject(self.df.iloc[key], self, key) + row = {col: self.df[col][key] for col in ('id', 'ra_deg', + 'dec_deg', + 'phot_bp_mean_flux', + 'phot_rp_mean_flux')} + return GaiaObject(row, self, key) elif type(key) == slice: ixdata = [i for i in range(min(key.stop, len(self._id)))] diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index c5c0aeba..37b51af3 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -1,5 +1,4 @@ import os -import sys import re import logging import healpy @@ -20,6 +19,7 @@ from skycatalogs.utils.sed_tools import MilkyWayExtinction from skycatalogs.utils.config_utils import Config from skycatalogs.utils.shapes import Box, Disk, PolygonalRegion +from skycatalogs.utils.shapes import compute_region_mask from skycatalogs.objects.sncosmo_object import SncosmoObject, SncosmoCollection from skycatalogs.objects.star_object import StarObject from skycatalogs.objects.galaxy_object import GalaxyObject @@ -129,7 +129,7 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', dec = lon_lat.getLat().asDegrees() disk_region = Disk(ra, dec, max_offset) - mask = _compute_region_mask(disk_region, tbl['ra'], tbl['dec']) + mask = compute_region_mask(disk_region, tbl['ra'], tbl['dec']) if all(mask): if no_obj_type_return and no_mjd_return: return None, None, None, None @@ -139,7 +139,7 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', # Get compressed ra, dec ra_compress = ma.array(tbl['ra'], mask=mask).compressed() dec_compress = ma.array(tbl['dec'], mask=mask).compressed() - poly_mask = _compute_region_mask(region, ra_compress, dec_compress) + poly_mask = compute_region_mask(region, ra_compress, dec_compress) # Get indices of unmasked ixes = np.where(~mask) @@ -147,7 +147,7 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', # Update bounding box mask with polygonal mask mask[ixes] |= poly_mask else: - mask = _compute_region_mask(region, tbl['ra'], tbl['dec']) + mask = compute_region_mask(region, tbl['ra'], tbl['dec']) if transient_filter: time_mask = _compute_transient_mask(mjd, tbl['start_mjd'], @@ -206,46 +206,6 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', return tbl['ra'], tbl['dec'], tbl[id_column], tbl['object_type'], None -def _compute_region_mask(region, ra, dec): - ''' - Compute mask according to region for provided data - Parameters - ---------- - region Supported shape (box, disk) or None - ra,dec Coordinates for data to be masked, in degrees - Returns - ------- - mask of elements in ra, dec arrays to be omitted - - ''' - mask = None - if isinstance(region, Box): - mask = np.logical_or((ra < region.ra_min), - (ra > region.ra_max)) - mask = np.logical_or(mask, (dec < region.dec_min)) - mask = np.logical_or(mask, (dec > region.dec_max)) - if isinstance(region, Disk): - # Change positions to 3d vectors to measure distance - p_vec = healpy.pixelfunc.ang2vec(ra, dec, lonlat=True) - - c_vec = healpy.pixelfunc.ang2vec(region.ra, - region.dec, - lonlat=True) - radius_rad = (region.radius_as * u.arcsec).to_value('radian') - - # Rather than comparing arcs, it is equivalent to compare chords - # (or square of chord length) - obj_chord_sq = np.sum(np.square(p_vec - c_vec), axis=1) - - # This is to be compared to square of chord for angle a corresponding - # to disk radius. That's 4(sin(a/2)^2) - rad_chord_sq = 4 * np.square(np.sin(0.5 * radius_rad)) - mask = obj_chord_sq > rad_chord_sq - if isinstance(region, PolygonalRegion): - mask = region.get_containment_mask(ra, dec, included=False) - return mask - - def _compute_transient_mask(current_mjd, start_mjd, end_mjd): ''' Starting with an existing mask of excluded objects, exclude additional @@ -341,6 +301,8 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, self._cat_dir = os.path.join(sky_root, config['catalog_dir']) + self._sky_root = os.path.abspath(sky_root) + self._logger.info(f'Catalog data will be read from {self._cat_dir}') # There may be more to do at this point but not too much. # In particular, don't read in anything from data files @@ -848,279 +810,4 @@ def open_catalog(config_file, mp=False, skycatalog_root=None, verbose=False): config_dict = open_config_file(config_file) return SkyCatalog(config_dict, skycatalog_root=skycatalog_root, mp=mp, - verbose=verbose) - - -if __name__ == '__main__': - import time - cfg_file_name = 'skyCatalog.yaml' - skycatalog_root = os.getenv('SKYCATALOG_ROOT') - catalog_dir = 'reorg' - if len(sys.argv) > 1: - catalog_dir = sys.argv[1] - if len(sys.argv) > 2: - cfg_file_name = sys.argv[2] - - cfg_file = os.path.join(skycatalog_root, catalog_dir, cfg_file_name) - - write_sed = False - if len(sys.argv) > 3: - write_sed = True - - # For tract 3828 - # 55.73604 < ra < 57.563452 - # -37.19001 < dec < -35.702481 - - cat = open_catalog(cfg_file, skycatalog_root=skycatalog_root) - hps = cat._find_all_hps() - print('Found {} healpix pixels '.format(len(hps))) - for h in hps: - print(h) - ra_min_tract = 55.736 - ra_max_tract = 57.564 - dec_min_tract = -37.190 - dec_max_tract = -35.702 - # ra_min_small = 56.0 - # ra_max_small = 56.2 - ra_min_small = 55.9 - ra_max_small = 56.1 - dec_min_small = -36.2 - dec_max_small = -36.0 - - sed_fmt = 'lambda: {:.1f} f_lambda: {:g}' - - rgn = Box(ra_min_small, ra_max_small, dec_min_small, dec_max_small) - vertices = [(ra_min_small, dec_min_small), (ra_min_small, dec_max_small), - (ra_max_small, dec_max_small), (ra_max_small, dec_min_small)] - rgn_poly = PolygonalRegion(vertices_radec=vertices) - - intersect_hps = _get_intersecting_hps('ring', 32, rgn) - - print("For region ", rgn) - print("intersecting pixels are ", intersect_hps) - - intersect_poly_hps = _get_intersecting_hps('ring', 32, rgn_poly) - print("For region ", rgn_poly) - print("intersecting pixels are ", intersect_poly_hps) - - at_slac = os.getenv('HOME').startswith('/sdf/home/') - if not at_slac: - obj_types = {'star', 'galaxy', 'sncosmo', 'snana'} - else: - obj_types = {'star', 'galaxy', 'sncosmo', 'snana', 'gaia_star'} - - print('Invoke get_objects_by_region with box region, no gaia') - t0 = time.time() - object_list = cat.get_objects_by_region(rgn, - obj_type_set={'star', 'galaxy', - 'sncosmo'}, - mjd=63200.0) - t_done = time.time() - print('Took ', t_done - t0) - # #### temporary obj_type_set={'galaxy', 'star'} ) - # obj_type_set=set(['galaxy']) ) - # Try out get_objects_by_hp with no region - # colls = cat.get_objects_by_hp(9812, None, set(['galaxy']) ) - - print('Number of collections returned for box: ', - object_list.collection_count) - print('Object count for box: ', len(object_list)) - - print('Invoke get_objects_by_region with polygonal region') - t0 = time.time() - object_list_poly = cat.get_objects_by_region(rgn_poly, - obj_type_set=obj_types) - t_done = time.time() - print('Took ', t_done - t0) - - print('Number of collections returned for polygon: ', - object_list_poly.collection_count) - assert (object_list.collection_count == object_list_poly.collection_count) - print('Object count for polygon: ', len(object_list_poly)) - - fudge = 5 - assert (len(object_list_poly) > len(object_list) - fudge) - assert (len(object_list_poly) < len(object_list) + fudge) - - print('Now try inscribed polygon which is not a box') - ra_avg = (ra_min_small + ra_max_small) * 0.5 - dec_avg = (dec_min_small + dec_max_small) * 0.5 - diamond_vertices = [(ra_min_small, dec_avg), (ra_avg, dec_max_small), - (ra_max_small, dec_avg), (ra_avg, dec_min_small)] - rgn_diamond = PolygonalRegion(vertices_radec=diamond_vertices) - - intersect_diamond_hps = _get_intersecting_hps('ring', 32, rgn_diamond) - print("for diamond region ", rgn_diamond) - print("intersecting pixels are ", intersect_diamond_hps) - - print('Invoke get_objects_by_region with diamond region') - t0 = time.time() - object_list_diamond = cat.get_objects_by_region(rgn_diamond, - obj_type_set=obj_types) - t_done = time.time() - print('Took ', t_done - t0) - - print('Number of collections returned for diamond: ', - object_list_diamond.collection_count) - print('Object count for diamond: ', len(object_list_diamond)) - - # ### TEMP FOR DEBUGGING - # ## exit(0) - - # For now SIMS_SED_LIBRARY_DIR is undefined at SLAC, making it impossible - # to get SEDs for stars. So (crudely) determine whether or not - # we're running at SLAC - - colls = object_list.get_collections() - got_a_sed = False - for c in colls: - n_obj = len(c) - print("For hpid ", c.get_partition_id(), "found ", n_obj, " objects") - if (n_obj) < 1: - continue - print("First object: ") - print(c[0], '\nid=', c[0].id, ' ra=', c[0].ra, ' dec=', c[0].dec, - ' belongs_index=', c[0]._belongs_index, - ' object_type: ', c[0].object_type) - - if (n_obj < 3): - continue - print("Slice [1:3]") - slice13 = c[1:3] - for o in slice13: - print('id=', o.id, ' ra=', o.ra, ' dec=', o.dec, ' belongs_index=', - o._belongs_index, ' object_type: ', o.object_type) - print(o.object_type) - if o.object_type == 'star': - if not at_slac: - print(o.get_instcat_entry()) - sed, magnorm = o._get_sed() - print('For star magnorm: ', magnorm) - if magnorm < 1000: - print('Length of sed: ', len(sed.wave_list)) - elif o.object_type == 'sncosmo': - print(o.get_instcat_entry()) - for b in {'u', 'g', 'r', 'i', 'z', 'y'}: - print(o.get_LSST_flux(b)) - elif o.object_type == 'galaxy': - for cmp in ['disk', 'bulge', 'knots']: - print(cmp) - if cmp in o.subcomponents: - # broken for galaxies currently - # print(o.get_instcat_entry(component=cmp)) - sed, _ = o._get_sed(cmp) - if sed: - print('Length of sed table: ', len(sed.wave_list)) - if not got_a_sed: - got_a_sed = True - th = o.get_native_attribute(f'sed_val_{cmp}') - print('Tophat values: ', th) - sed, _ = o._get_sed(component=cmp) - print('Simple sed wavelengths:') - print(sed.wave_list) - print('Simple sed values:') - print([sed(w) for w in sed.wave_list]) - if write_sed: - o.write_sed('simple_sed.txt', - component=cmp) - sed_fine, _ = o._get_sed(component=cmp, - resolution=1.0) - print('Bin width = 1 nm') - print('Initial wl values', - sed_fine.wave_list[:20]) - print('Start at bin 100', - sed_fine.wave_list[100:120]) - print('Initial values') - print([sed_fine(w) for w in sed_fine.wave_list[:20]]) - print('Start at bin 100') - print([sed_fine(w) for w in sed_fine.wave_list[100:120]]) - else: - print('All-zero sed') - - # Try out old wrapper functions - print("\nget_dust:") - i_av, i_rv, g_av, g_rv = o._get_dust() - print(f'i_av={i_av} i_rv={i_rv} g_av={g_av} g_rv={g_rv}') - print("\nget_wl_params") - g1, g2, mu = o.get_wl_params() - print(f'g1={g1} g2={g2} mu={mu}') - print("\nget_gsobject_components. Keys of returned dict:") - gs_dict = o.get_gsobject_components() - print(gs_dict.keys()) - print("\nget_observer_sed_components. Keys of returned dict:") - o_seds = o.get_observer_sed_components() - print(o_seds.keys()) - - f = o.get_LSST_flux('i') - print(f'Flux for i bandpass: {f}') - fluxes = o.get_LSST_fluxes() - for k, v in fluxes.items(): - print(f'Bandpass {k} has flux {v}') - - if n_obj > 200: - print("Object 200") - print(c[200], '\nid=', c[200].id, ' ra=', c[200].ra, ' dec=', - c[200].dec, - ' belongs_index=', c[200]._belongs_index) - if n_obj > 163997: - slice_late = c[163994:163997] - print('\nobjects indexed 163994 through 163996') - for o in slice_late: - print('id=', o.id, ' ra=', o.ra, ' dec=', o.dec, - ' belongs_index=', o._belongs_index) - - print('Total object count: ', len(object_list)) - - if len(object_list) == 0: - print('Empty object list. All done') - exit(0) - - obj = object_list[0] - print("Type of element in object_list:", type(obj)) - - if object_list[0].object_type == 'galaxy': - redshift0 = object_list[0].get_native_attribute('redshift') - print('First redshift: ', redshift0) - - sum = 0 - for obj in object_list: - sum = sum + 1 - if sum > 7216: - print("Sum is now ", sum) - print("obj id: ", obj.id) - if sum > 7220: - break - - print(f'Object list len: {len(object_list)}') - print(f'Objects found with "in": {sum}') - - if len(object_list) < 5: - print('Very short object list (< 5 elements). done') - exit(0) - - segment = object_list[2:5] - print('Information for slice 2:5 ') - for o in segment: - print(f'object {o.id} of type {o.object_type} belongs to collection {o._belongs_to}') - - if len(object_list) < 304: - print('Object list len < 304. All done') - exit(0) - print('\nInformation for slice 285:300') - segment = object_list[285:300] - for o in segment: - print(f'object {o.id} of type {o.object_type} belongs to collection {o._belongs_to}') - - # ixes = ([3,5,8],) - ixes = (np.array([3, 5, 8, 300, 303]),) - print(f'\nObjects with indexes {ixes[0]}') - for o in object_list[ixes]: - print(o.id) - print('\nObjects in slice [3:9]') - for o in object_list[3:9]: - print(o.id) - print('\nObjects in slice [300:304]') - for o in object_list[300:304]: - print(o.id) - - print('all done') + verbose=verbose) \ No newline at end of file diff --git a/skycatalogs/utils/config_utils.py b/skycatalogs/utils/config_utils.py index c79c6e41..4daebd1a 100644 --- a/skycatalogs/utils/config_utils.py +++ b/skycatalogs/utils/config_utils.py @@ -1,7 +1,6 @@ import os import sys import yaml -import git import logging from typing import Any from .exceptions import ConfigDuplicateKeyError @@ -317,6 +316,7 @@ def assemble_SED_models(bins): def assemble_provenance(pkg_root, inputs={}, schema_version=None): + import git if not schema_version: schema_version = CURRENT_SCHEMA_VERSION diff --git a/skycatalogs/utils/shapes.py b/skycatalogs/utils/shapes.py index 7a739997..48248b87 100644 --- a/skycatalogs/utils/shapes.py +++ b/skycatalogs/utils/shapes.py @@ -1,9 +1,10 @@ from collections import namedtuple import numpy as np from astropy import units as u +import healpy from lsst.sphgeom import ConvexPolygon, UnitVector3d, LonLat -__all__ = ['Box', 'Disk', 'PolygonalRegion'] +__all__ = ['Box', 'Disk', 'PolygonalRegion', 'compute_region_mask'] Box = namedtuple('Box', ['ra_min', 'ra_max', 'dec_min', 'dec_max']) @@ -65,3 +66,44 @@ def get_containment_mask(self, ra, dec, included=True): return mask else: return np.logical_not(mask) + + +def compute_region_mask(region, ra, dec): + ''' + Compute mask according to region for provided data + Parameters + ---------- + region Supported shape (box, disk, PolygonalRegion) or None + ra,dec Coordinates for data to be masked, in degrees + Returns + ------- + mask of elements in ra, dec arrays to be omitted + + ''' + mask = None + if isinstance(region, Box): + mask = np.logical_or((ra < region.ra_min), + (ra > region.ra_max)) + mask = np.logical_or(mask, (dec < region.dec_min)) + mask = np.logical_or(mask, (dec > region.dec_max)) + if isinstance(region, Disk): + # Use healpy rather than lsst.sphgeom because healpy takes + # array inputs + p_vec = healpy.pixelfunc.ang2vec(ra, dec, lonlat=True) + + c_vec = healpy.pixelfunc.ang2vec(region.ra, + region.dec, + lonlat=True) + radius_rad = (region.radius_as * u.arcsec).to_value('radian') + + # Rather than comparing arcs, it is equivalent to compare chords + # (or square of chord length) + obj_chord_sq = np.sum(np.square(p_vec - c_vec), axis=1) + + # This is to be compared to square of chord for angle a corresponding + # to disk radius. That's 4(sin(a/2)^2) + rad_chord_sq = 4 * np.square(np.sin(0.5 * radius_rad)) + mask = obj_chord_sq > rad_chord_sq + if isinstance(region, PolygonalRegion): + mask = region.get_containment_mask(ra, dec, included=False) + return mask diff --git a/tests/test_gaia_direct.py b/tests/test_gaia_direct.py new file mode 100644 index 00000000..056b8afa --- /dev/null +++ b/tests/test_gaia_direct.py @@ -0,0 +1,105 @@ +import os +from pathlib import Path +import unittest +import numpy as np +import pandas as pd +import lsst.daf.butler as daf_butler +from skycatalogs import skyCatalogs +from skycatalogs.objects import GaiaCollection + + +PACKAGE_DIR = os.path.dirname(os.path.abspath(str(Path(__file__).parent))) +SKYCATALOG_ROOT = os.path.join(PACKAGE_DIR, "skycatalogs", "data") +CATALOG_DIR = os.path.join(PACKAGE_DIR, "skycatalogs", "data", "ci_sample") + +BUTLER_PARAMETERS = {'collections': 'refcats', + 'dstype': 'gaia_dr2_20200414', + 'repo': os.path.join(CATALOG_DIR, 'repo')} + + +def get_gaia_data(butler_params): + butler = daf_butler.Butler(butler_params['repo'], + collections=[butler_params['collections']]) + refs = set(butler.registry.queryDatasets(butler_params['dstype'])) + return pd.concat(butler.get(_).asAstropy().to_pandas() for _ in refs) + + +class GaiaObjectTestCase(unittest.TestCase): + """TestCase class for GaiaObjects""" + def setUp(self): + ''' + Open the catalog; establish config + ''' + skycatalog_root = SKYCATALOG_ROOT + cfg_path = os.path.join(skycatalog_root, 'ci_sample', + 'gaia_skyCatalog.yaml') + self._cat = skyCatalogs.open_catalog(cfg_path, + skycatalog_root=skycatalog_root) + + self.df = get_gaia_data(BUTLER_PARAMETERS) + + def tearDown(self): + pass + + def test_proper_motion(self): + ra, dec = 0, 0 + radius = 1 + region = skyCatalogs.Disk(ra, dec, radius*3600.0) + + # make a quadrilateral in case we want to try out + # masking for a polygonal region + # ra_min = -0.7 + # ra_max = 0.7 + # dec_min = -0.8 + # dec_max = 0.6 + + # vertices = [(ra_min, dec_min), + # (ra_min - 0.1, dec_max), + # (ra_max, dec_max), + # (ra_max, dec_min)] + # rgn_poly = skyCatalogs.PolygonalRegion(vertices_radec=vertices) + + # Use start of proper motion epoch, so ra, dec values should + # be unchanged from refcat values. + epoch_a_values = set(self.df['epoch']) + assert len(epoch_a_values) == 1 + mjd0 = epoch_a_values.pop() + object_list = GaiaCollection.load_collection(region, self._cat, + mjd=mjd0) + # First try a particular case + # fixed = 2831 + # the_list.insert(0, fixed) + # for index in np.random.choice(range(len(object_list)), size=20): + the_list = list(np.random.choice(range(len(object_list)), size=20)) + for index in the_list: + obj = object_list[index] + gaia_id = obj.id.split('_')[-1] + df = self.df.query(f"id=={gaia_id}") + row = df.iloc[0] + self.assertAlmostEqual(np.degrees(row.coord_ra), obj.ra, places=15) + self.assertAlmostEqual(np.degrees(row.coord_dec), obj.dec, + places=15) + + self.assertEqual(mjd0, object_list.mjd) + + # Use a plausible LSST observation date, and ensure that + # calculated proper motion offsets are at least 10% larger + # than the naive estimates. + mjd = 60584. # 2024-10-01 00:00:00 + object_list = GaiaCollection.load_collection(region, self._cat, + mjd=mjd) + for index in np.random.choice(range(len(object_list)), size=20): + obj = object_list[index] + gaia_id = obj.id.split('_')[-1] + df = self.df.query(f"id=={gaia_id}") + row = df.iloc[0] + self.assertGreaterEqual(abs(np.radians(obj.ra) - row.coord_ra), + abs(row['pm_ra']*(mjd - mjd0)/365.)/1.1) + self.assertGreaterEqual(abs(np.radians(obj.dec) - row.coord_dec), + abs(row['pm_dec']*(mjd - mjd0)/365.)/1.1) + + self.assertEqual(mjd, object_list.mjd) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_gaia_objects.py b/tests/test_gaia_objects.py index b1aaf2e1..813812bb 100644 --- a/tests/test_gaia_objects.py +++ b/tests/test_gaia_objects.py @@ -18,6 +18,7 @@ {'collections': 'refcats', 'dstype': 'gaia_dr2_20200414', 'repo': os.path.join(CATALOG_DIR, 'repo')}, + 'id_prefix': 'gaia_dr2_', 'data_file_type': 'butler_refcat'}