From 8163bd681778587bc04b0433e9d4a126265df24e Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 22 Jan 2024 12:49:18 -0800 Subject: [PATCH 01/31] add optional exposure argument in lots of places --- skycatalogs/objects/base_object.py | 41 ++++++++++++++++++------------ 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/skycatalogs/objects/base_object.py b/skycatalogs/objects/base_object.py index 6ec5b81c..d9ff5660 100644 --- a/skycatalogs/objects/base_object.py +++ b/skycatalogs/objects/base_object.py @@ -264,7 +264,7 @@ def get_observer_sed_component(self, component, mjd=None): raise NotImplementedError('get_observer_sed_component must be implemented by subclass') - def get_observer_sed_components(self, mjd=None): + def get_observer_sed_components(self, mjd=None, exposure=15.0): """ Return a dictionary of the SEDs, keyed by component name. """ @@ -283,7 +283,8 @@ def get_total_observer_sed(self, mjd=None): components. """ sed = None - for sed_cmp in self.get_observer_sed_components(mjd=mjd).values(): + for sed_cmp in self.get_observer_sed_components(mjd=mjd, + exposure=15.0).values(): if sed is None: sed = sed_cmp else: @@ -291,7 +292,7 @@ def get_total_observer_sed(self, mjd=None): return sed - def get_flux(self, bandpass, sed=None, mjd=None): + def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): """ Return the total object flux integrated over the bandpass in photons/sec/cm^2 @@ -299,7 +300,7 @@ def get_flux(self, bandpass, sed=None, mjd=None): """ if not sed: - sed = self.get_total_observer_sed(mjd=mjd) + sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) if sed is None: return 0.0 @@ -308,15 +309,16 @@ def get_flux(self, bandpass, sed=None, mjd=None): return flux - def get_fluxes(self, bandpasses, mjd=None): + def get_fluxes(self, bandpasses, mjd=None, exposure=15.0): # To avoid recomputing sed - sed = self.get_total_observer_sed(mjd=mjd) + sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) if sed is None: return [0.0 for b in bandpasses] return [sed.calculateFlux(b) for b in bandpasses] - def get_LSST_flux(self, band, sed=None, cache=True, mjd=None): + def get_LSST_flux(self, band, sed=None, cache=True, mjd=None, + exposure=15.0): if band not in LSST_BANDS: return None att = f'lsst_flux_{band}' @@ -329,32 +331,36 @@ def get_LSST_flux(self, band, sed=None, cache=True, mjd=None): if att in self.native_columns: return self.get_native_attribute(att) - val = self.get_flux(lsst_bandpasses[band], sed=sed, mjd=mjd) + val = self.get_flux(lsst_bandpasses[band], sed=sed, mjd=mjd, + exposure=exposure) if cache: setattr(self, att, val) return val - def get_LSST_fluxes(self, cache=True, as_dict=True, mjd=None): + def get_LSST_fluxes(self, cache=True, as_dict=True, mjd=None, + exposure=15.0): ''' Return a dict of fluxes for LSST bandpasses or, if as_dict is False, just a list in the same order as LSST_BANDS ''' fluxes = dict() - sed = self.get_total_observer_sed(mjd=mjd) + sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) if sed is None: for band in LSST_BANDS: fluxes[band] = 0.0 else: for band in LSST_BANDS: fluxes[band] = self.get_LSST_flux(band, sed=sed, - cache=cache, mjd=mjd) + cache=cache, mjd=mjd, + exposure=exposure) if as_dict: return fluxes else: return list(fluxes.values()) - def get_roman_flux(self, band, sed=None, cache=True, mjd=None): + def get_roman_flux(self, band, sed=None, cache=True, mjd=None, + exposure=15.0): if band not in ROMAN_BANDS: return None att = f'roman_flux_{band}' @@ -367,19 +373,21 @@ def get_roman_flux(self, band, sed=None, cache=True, mjd=None): if att in self.native_columns: return self.get_native_attribute(att) - val = self.get_flux(roman_bandpasses[band], sed=sed, mjd=mjd) + val = self.get_flux(roman_bandpasses[band], sed=sed, mjd=mjd, + exposure=exposure) if cache: setattr(self, att, val) return val - def get_roman_fluxes(self, cache=True, as_dict=True, mjd=None): + def get_roman_fluxes(self, cache=True, as_dict=True, mjd=None, + exposure=15.0): ''' Return a dict of fluxes for Roman bandpasses or, if as_dict is False, just a list in the same order as ROMAN_BANDS ''' fluxes = dict() - sed = self.get_total_observer_sed(mjd=mjd) + sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) if sed is None: for band in ROMAN_BANDS: @@ -387,7 +395,8 @@ def get_roman_fluxes(self, cache=True, as_dict=True, mjd=None): else: for band in ROMAN_BANDS: fluxes[band] = self.get_roman_flux(band, sed=sed, - cache=cache, mjd=mjd) + cache=cache, mjd=mjd, + exposure=exposure) if as_dict: return fluxes else: From befcc3e7c41036e0b2737d5ba0d2ccb74bf14b5d Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 23 Jan 2024 18:12:41 -0800 Subject: [PATCH 02/31] Create a single "main" skyCatalogs file for sso --- skycatalogs/catalog_creator.py | 21 +++- skycatalogs/objects/snana_object.py | 2 +- skycatalogs/objects/sso_object.py | 63 ++++++++++++ skycatalogs/scripts/create_sc.py | 16 +++- skycatalogs/skyCatalogs.py | 7 +- skycatalogs/sso_catalog_creator.py | 144 ++++++++++++++++++++++++++++ skycatalogs/utils/sed_tools.py | 30 +++++- 7 files changed, 274 insertions(+), 9 deletions(-) create mode 100644 skycatalogs/objects/sso_object.py create mode 100644 skycatalogs/sso_catalog_creator.py diff --git a/skycatalogs/catalog_creator.py b/skycatalogs/catalog_creator.py index 7218e700..ba0be424 100644 --- a/skycatalogs/catalog_creator.py +++ b/skycatalogs/catalog_creator.py @@ -24,6 +24,7 @@ from .utils.creator_utils import make_MW_extinction_av, make_MW_extinction_rv from .objects.base_object import LSST_BANDS from .objects.base_object import ROMAN_BANDS +from .sso_catalog_creator import SsoCatalogCreator """ Code to create a sky catalog for particular object types @@ -195,7 +196,8 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, main_only=False, flux_parallel=16, galaxy_nside=32, galaxy_stride=1000000, provenance=None, dc2=False, sn_object_type='sncosmo', galaxy_type='cosmodc2', - include_roman_flux=False, star_input_fmt='sqlite'): + include_roman_flux=False, star_input_fmt='sqlite', + sso_truth=None, sso_sed=None): """ Store context for catalog creation @@ -242,6 +244,8 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, galaxy_type Currently allowed values are cosmodc2 and diffsky include_roman_flux Calculate and write Roman flux values star_input_fmt May be either 'sqlite' or 'parquet' + sso_truth Directory containing Sorcha output + sso_sed Path to sed file to be used for all SSOs Might want to add a way to specify template for output file name and template for input sedLookup file name. @@ -289,6 +293,7 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, self._star_truth = _star_db elif self._star_input_fmt == 'parquet': self._star_truth = _star_parquet + self._cat = None self._parts = parts @@ -304,6 +309,9 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, self._output_dir = os.path.join(self._skycatalog_root, self._catalog_dir) + self._sso_creator = SsoCatalogCreator(self._output_dir, sso_truth, + sso_sed) + self._written_config = None self._config_path = config_path self._catalog_name = catalog_name @@ -324,6 +332,7 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, self._dc2 = dc2 self._include_roman_flux = include_roman_flux self._obs_sed_factory = None + self._sos_sed_factory = None ## do we need this? def _make_tophat_columns(self, dat, names, cmp): ''' @@ -355,8 +364,8 @@ def create(self, catalog_type): Parameters ---------- - catalog_type string Currently 'galaxy' and 'pointsource' are - the only values allowed + catalog_type string Currently 'galaxy', 'pointsource' + and 'sso' are the only values allowed Return ------ None @@ -371,6 +380,12 @@ def create(self, catalog_type): self.create_pointsource_catalog() if not self._main_only: self.create_pointsource_flux_catalog() + elif catalog_type == ('sso'): + if not self._flux_only: + self._sso_creator.create_sso_catalog() + if not self._main_only: + self._sso_creator.create_sso_flux_catalog() + else: raise NotImplementedError(f'CatalogCreator.create: unsupported catalog type {catalog_type}') diff --git a/skycatalogs/objects/snana_object.py b/skycatalogs/objects/snana_object.py index 9c0dca84..1648b3ed 100644 --- a/skycatalogs/objects/snana_object.py +++ b/skycatalogs/objects/snana_object.py @@ -92,7 +92,7 @@ def get_observer_sed_component(self, component, mjd=None): if mjd is None: mjd = self._belongs_to._mjd if mjd is None: - txt = 'SnananObject._get_sed: no mjd specified for this call\n' + txt = 'SnananObject.get_observer_sed_component: no mjd specified for this call\n' txt += 'nor when generating object list' raise ValueError(txt) sed, _ = self._get_sed(mjd=mjd) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py new file mode 100644 index 00000000..807c3c55 --- /dev/null +++ b/skycatalogs/objects/sso_object.py @@ -0,0 +1,63 @@ +import os +import numpy as np +import galsim +from .base_object import BaseObject + +__all__ = ['SsoObject'] + +class SsoObject(BaseObject): + _type_name = 'sso' + + def _get_sed(self, mjd=None): + ''' + returns a SED and magnorm + mjd is required + ''' + + # Always return the same SED. Maybe need a class method + # to load it? + # For magnorm use the magnitude from Sorcha. Can it be used + # directly or are there other effects to be applied? + # Have to find it by looking for entry for this id, this mjd + # Do we look for specific entry or do we allow interpolation? + pass + + def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0): + if gsparams is not None: + gsparams = galsim.GSParams(**gsparams) + # For a streak we use galsim.?? (galsim.Box?) + # To get the dimensions of the box, use ra rate, dec rate and + # exposure length. The first two will be in the sky catalogs + # parquet file; the last will be passed in. + # For now start with the simpler thing: just a point source. + return {'this_object': galsim.DeltaFunction(gsparams=gsparams)} + + def get_observer_sed_component(self, component, mjd=None, exposure=15.0): + if mjd is None: + mjd = self._belongs_to._mjd + if mjd is None: + txt = 'SsoObject.get_observer_sed_component: no mjd specified for this call\n' + txt += 'nor when generating object list' + raise ValueError(txt) + + sed, magnorm = self._get_sed(mjd=mjd) + + flux_500 = np.exp(-0.9210340371976184 * magnorm) + sed = self.withMagnitude(0, self._bp500) + sed = sed*flux_500 + + # no extinction + return sed + + def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): + if not sed: + sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) + if sed is None: + return 0.0 + + flux = sed.calculateFlux(bandpass) + + return flux + + + diff --git a/skycatalogs/scripts/create_sc.py b/skycatalogs/scripts/create_sc.py index 0503eac0..9861e443 100644 --- a/skycatalogs/scripts/create_sc.py +++ b/skycatalogs/scripts/create_sc.py @@ -92,6 +92,14 @@ choices=['sqlite', 'parquet'], help=''' star truth may come from either sqlite db or collection of parquet files''') +parser.add_argument('--sso-truth', default=None, help=''' + directory containing SSO truth, used to construct + SSO catalogs''') +parser.add_argument('--sso-sed', default=None, help=''' + path to sqlite file containing SED to be used + for all SSOs''') +parser.add_argument('--sso', action='store_true', + help='''If provided output SSO catalog''') args = parser.parse_args() @@ -144,7 +152,8 @@ dc2=args.dc2, galaxy_type=args.galaxy_type, galaxy_truth=args.galaxy_truth, include_roman_flux=args.include_roman_flux, - star_input_fmt=args.star_input_fmt) + star_input_fmt=args.star_input_fmt, + sso_truth=args.sso_truth, sso_sed=args.sso_sed) logger.info(f'Starting with healpix pixel {parts[0]}') if not args.no_galaxies: logger.info("Creating galaxy catalogs") @@ -154,6 +163,9 @@ logger.info("Creating point source catalogs") creator.create('pointsource') - +if args.sso: + logger.info("Creating SSO catalogs") + creator.create('sso') + logger.info('All done') print_date() diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 4c35f560..723edfa6 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -14,6 +14,7 @@ from skycatalogs.objects.gaia_object import GaiaObject, GaiaCollection from skycatalogs.readers import ParquetReader from skycatalogs.utils.sed_tools import TophatSedFactory, DiffskySedFactory +from skycatalogs.utils.sed_tools import SsoSedFactory from skycatalogs.utils.sed_tools import MilkyWayExtinction from skycatalogs.utils.config_utils import Config from skycatalogs.utils.shapes import Box, Disk, PolygonalRegion @@ -303,7 +304,11 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, config['object_types']['diffsky_galaxy'] ['sed_file_template'], config['Cosmology']) - + if 'sso' in config['object_types']: + self._sso_sed_path = config['provenance']['inputs'].get('sso_sed', + default='sso_sed.db') + + self._sso_sed_factory = SsoSedFactory(self._sso_sed_path) self._extinguisher = MilkyWayExtinction() # Make our properties accessible to BaseObject, etc. diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py new file mode 100644 index 00000000..0da81f12 --- /dev/null +++ b/skycatalogs/sso_catalog_creator.py @@ -0,0 +1,144 @@ +import os +import sys +import re +### import logging +import numpy as np +import numpy.ma as ma +import sqlite3 +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq + +# from .utils.parquet_schema_utils import make_sso_flux_schema +### or alternately include method in SsoCatalogCreator + +""" +Code for creating sky catalogs for sso objects +""" + +__all__ = ['SsoCatalogCreator'] + +_DEFAULT_ROW_GROUP_SIZE = 20000 # for testing. Should be much larger + +def _partition_mjd(mins, maxes, counts, max_rowgroup_size): + ''' Determine mjd intervals, each of which will be processed + in one go''' + total_rows = np.sum(np.array(counts)) + min_min = int(np.floor(np.min(np.array(mins)))) + max_max = int(np.ceil(np.max(np.array(maxes)))) + # n_rowgroup = (max_max + (max_rowgroup_size - 1) - min_min)/max_rowgroup_size + n_rowgroup = int(np.ceil(total_rows/max_rowgroup_size)) + # for debugging + if n_rowgroup < 2: + n_rowgroup = 2 + mjd_interval = int(np.ceil((max_max - min_min)/n_rowgroup)) + mjds = [min_min] + last = min_min + for i in range(n_rowgroup): + last += mjd_interval + last = min(last, max_max) + mjds.append(last) + if last == max_max: + break + + return mjds + + + +class SsoCatalogCreator: + _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/19jan2024' + _sso_sed = '/sdf/home/j/jrb/rubin-user/sso/sed/solar_sed.db' + + def __init__(self, output_dir, sso_truth=None, sso_sed=None): + self._output_dir = output_dir + self._sso_truth = sso_truth + if sso_truth is None: + self._sso_truth = SsoCatalogCreator._sso_truth + self._sso_sed = sso_sed + if sso_sed is None: + self._sso_sed = SsoCatalogCreator._sso_sed + + self._row_group_size = _DEFAULT_ROW_GROUP_SIZE + + tbl = 'pp_results' + mjd_c = 'FieldMJD_TAI' + self._mjd_q = f'select min({mjd_c}), max({mjd_c}), count({mjd_c}) from {tbl}' + self._df_query = f'''select ObjID as id, {mjd_c} as mjd, + "AstRA(deg)" as ra, "AstDec(deg)" as dec, optFilter as filter, + observedTrailedSourceMag from {tbl} where mjd >= (?) + and mjd < (?) order by mjd''' + + def _create_main_schema(self): + + fields = [pa.field('id', pa.string()), + pa.field('mjd', pa.float64()), + pa.field('ra', pa.float64()), + pa.field('dec', pa.float64()), + pa.field('observedTrailedSourceMag', pa.float64()), + pa.field('filter', pa.string())] + # pa.field('ra_rate', pa.float64()), + # pa.field('dec_rate', pa.float64())] + return pa.schema(fields) + + def _create_flux_schema(self): + # id, mjd and 6 flux fields (for now. Maybe later also Roman) + pass + + def _write_rg(self, writer, min_mjd, max_mjd, db_files, arrow_schema): + df_list = [] + for f in db_files: + conn = sqlite3.connect(f) + df_list.append(pd.read_sql_query(self._df_query, + conn, + params=(min_mjd, max_mjd))) + df = pd.concat(df_list) + df_sorted = df.sort_values('mjd') + tbl = pa.Table.from_pandas(df_sorted, schema=arrow_schema) + writer.write_table(tbl) + + def create_sso_catalog(self): + """ + Create the 'main' sso catalog, including everything except fluxes + """ + # Find all the db files from Sorcha. They should all be in a single + # directory with no other files in that directory + files = os.listdir(self._sso_truth) + db_files = [os.path.join(self._sso_truth, f) for f in files if f.endswith('.db')] + mins = [] + maxes = [] + counts = [] + for f in db_files: + with sqlite3.connect(f) as conn: + res = conn.execute(self._mjd_q) + r = res.fetchone() + mins.append(float(r[0])) + maxes.append(float(r[1])) + counts.append(int(r[2])) + + mjd_list = _partition_mjd(mins, maxes, counts, self._row_group_size) + arrow_schema = self._create_main_schema() + + ### Depending on length of mjd_list, may need more than one + ### output file in which case will need to add an outer loop + ### over output file. Decide on max row groups per file. + mjd_min = min(mins) + mjd_max = max(maxes) + out_name = f'sso_{int(mjd_min)}_{int(np.ceil(mjd_max))}.parquet' + writer = pq.ParquetWriter(os.path.join(self._output_dir, out_name), + arrow_schema) + for i in range(len(mjd_list) - 1): + self._write_rg(writer, mjd_list[i], mjd_list[i+1], db_files, + arrow_schema) + + # close parquet file + writer.close() + + # In sso description in config come up with suitable re for filenames + + def create_sso_flux_catalog(self): + """ + Create sso flux catalog. Includes id, mjd (since given sso normally + has several observations) and fluxes. (Or just one flux and + associated band?) + """ + pass diff --git a/skycatalogs/utils/sed_tools.py b/skycatalogs/utils/sed_tools.py index 538f97b5..817767fe 100644 --- a/skycatalogs/utils/sed_tools.py +++ b/skycatalogs/utils/sed_tools.py @@ -4,13 +4,14 @@ from astropy.cosmology import FlatLambdaCDM import astropy.constants import h5py +import sqlite3 import numpy as np from dust_extinction.parameter_averages import F19 import galsim -__all__ = ['TophatSedFactory', 'DiffskySedFactory', 'MilkyWayExtinction', - 'get_star_sed_path', 'generate_sed_path'] +__all__ = ['TophatSedFactory', 'DiffskySedFactory', 'SsoSedFactory', + 'MilkyWayExtinction', 'get_star_sed_path', 'generate_sed_path'] class TophatSedFactory: @@ -205,6 +206,31 @@ def create(self, pixel, galaxy_id, redshift_hubble, redshift): return seds +class SsoSedFactory(): + ''' + Load the single SED used for SSO objects and make it available as galsim + SED + ''' + def __init__(self, sed_path, fmt='db'): + ''' + For now only support db format. In fact we're hardcoding additional + assumptions: that the table name is "SED" and that the columns are + "wavelength" (units angstroms) and "flux" (units flambda) + ''' + if fmt != 'db': + raise ValueError(f'SsoSedFactory: Unsupport input format {fmt}; must be "db"') + wave_type = 'angstrom' + flux_type = 'flambda' + q = 'select wavelength, flux from SED order by wavelength' + with sqlite3.connect(sed_path) as conn: + sed_df = pd.read_sql_query(q, conn) + lut = galsim.LookupTable(x=sed_df['wavelength'], f=sed_df['flambda'], + interpolant='linear') + sed = galsim.SED(lut, wave_type=wave_type, flux_type=flux_type) + self.sed = sed + + def create(self): + return self.sed class MilkyWayExtinction: ''' From 3a87b5e825dbc8f692f9713b0b761a9ad6896364 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 23 Jan 2024 18:26:08 -0800 Subject: [PATCH 03/31] add sso object type to standard output config --- cfg/object_types.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cfg/object_types.yaml b/cfg/object_types.yaml index e3df01b9..3593b480 100644 --- a/cfg/object_types.yaml +++ b/cfg/object_types.yaml @@ -100,3 +100,10 @@ object_types : { type: healpix, ordering : ring, nside : 32} sed_model : snana internal_extinction : None + sso : + file_template : 'sso_(?P\d+)_(?P\d+).parquet' + flux_file_template : 'sso_flux_(?P\d+)_(?P\d+).parquet' + data_file_type : parquet + area_partition : None + time_partition : mjd + sed_model : file_angstrom From 7c91076a8c79cdbb4498456debae29a7c0d341f2 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Wed, 24 Jan 2024 16:27:33 -0800 Subject: [PATCH 04/31] some progress on API, flux file creation. Also cosmetic changes for flake8 --- cfg/object_types.yaml | 210 ++++++++++++++--------------- skycatalogs/catalog_creator.py | 14 +- skycatalogs/objects/base_object.py | 10 +- skycatalogs/objects/sso_object.py | 9 +- skycatalogs/skyCatalogs.py | 21 ++- skycatalogs/sso_catalog_creator.py | 67 +++++++-- skycatalogs/utils/sed_tools.py | 5 +- 7 files changed, 202 insertions(+), 134 deletions(-) diff --git a/cfg/object_types.yaml b/cfg/object_types.yaml index 3593b480..49d3bcb3 100644 --- a/cfg/object_types.yaml +++ b/cfg/object_types.yaml @@ -1,109 +1,109 @@ # Yaml fragment to be incorporated into configs object_types : - galaxy : - file_template : 'galaxy_(?P\d+).parquet' - flux_file_template : 'galaxy_flux_(?P\d+).parquet' - data_file_type : parquet - area_partition : - { type: healpix, ordering : ring, nside : 32} - composite : - bulge : required - disk : required - knots : optional - attribute_aliases : - size_knots_true : size_disk_true - size_minor_knots_true : size_minor_disk_true - bulge_basic : - subtype : bulge - parent : galaxy - sed_model : tophat - internal_extinction : CCM - MW_extinction : F19 - spatial_model : sersic2D - disk_basic : - subtype : disk - parent : galaxy - sed_model : tophat - internal_extinction : CCM - MW_extinction : F19 - spatial_model : sersic2D - knots_basic : - subtype : knots - parent : galaxy - sed_model : tophat - internal_extinction : CCM - MW_extinction : F19 - spatial_model : knots - diffsky_galaxy : - file_template : 'galaxy_(?P\d+).parquet' - flux_file_template : 'galaxy_flux_(?P\d+).parquet' - sed_file_template : 'galaxy_sed_(?P\d+).hdf5' - data_file_type : parquet - area_partition : - { type: healpix, ordering : ring, nside : 32} - composite : - bulge : required - disk : required - knots : optional - diffsky_bulge : - subtype : bulge - parent : diffsky_galaxy - sed_model : TBD - internal_extinction : CCM - MW_extinction : F19 - spatial_model : sersic2D - diffsky_disk : - subtype : disk - parent : diffsky_galaxy - sed_model : TBD - internal_extinction : CCM - MW_extinction : F19 - spatial_model : sersic2D - diffsky_knots : - subtype : knots - parent : diffsky_galaxy - sed_model : TBD - internal_extinction : CCM - MW_extinction : F19 - spatial_model : knots + galaxy: + file_template: 'galaxy_(?P\d+).parquet' + flux_file_template: 'galaxy_flux_(?P\d+).parquet' + data_file_type: parquet + area_partition: + { type: healpix, ordering: ring, nside: 32} + composite: + bulge: required + disk: required + knots: optional + attribute_aliases: + size_knots_true: size_disk_true + size_minor_knots_true: size_minor_disk_true + bulge_basic: + subtype: bulge + parent: galaxy + sed_model: tophat + internal_extinction: CCM + MW_extinction: F19 + spatial_model: sersic2D + disk_basic: + subtype: disk + parent: galaxy + sed_model: tophat + internal_extinction: CCM + MW_extinction: F19 + spatial_model: sersic2D + knots_basic: + subtype: knots + parent: galaxy + sed_model: tophat + internal_extinction: CCM + MW_extinction: F19 + spatial_model: knots + diffsky_galaxy: + file_template: 'galaxy_(?P\d+).parquet' + flux_file_template: 'galaxy_flux_(?P\d+).parquet' + sed_file_template: 'galaxy_sed_(?P\d+).hdf5' + data_file_type: parquet + area_partition: + { type: healpix, ordering: ring, nside: 32} + composite: + bulge: required + disk : required + knots: optional + diffsky_bulge: + subtype: bulge + parent: diffsky_galaxy + sed_model: TBD + internal_extinction: CCM + MW_extinction: F19 + spatial_model: sersic2D + diffsky_disk: + subtype: disk + parent: diffsky_galaxy + sed_model: TBD + internal_extinction: CCM + MW_extinction: F19 + spatial_model: sersic2D + diffsky_knots: + subtype: knots + parent: diffsky_galaxy + sed_model: TBD + internal_extinction: CCM + MW_extinction: F19 + spatial_model: knots - star : - file_template : 'pointsource_(?P\d+).parquet' - flux_file_template : 'pointsource_flux_(?P\d+).parquet' - data_file_type : parquet - area_partition : - { type: healpix, ordering : ring, nside : 32} - sed_model : file_nm - sed_file_root_env_var : SIMS_SED_LIBRARY_DIR - MW_extinction : F19 - internal_extinction : None - sncosmo : - file_template : 'pointsource_(?P\d+).parquet' - data_file_type : parquet - area_partition : - { type: healpix, ordering : ring, nside : 32} + star: + file_template: 'pointsource_(?P\d+).parquet' + flux_file_template: 'pointsource_flux_(?P\d+).parquet' + data_file_type: parquet + area_partition: + { type: healpix, ordering: ring, nside: 32} + sed_model: file_nm + sed_file_root_env_var: SIMS_SED_LIBRARY_DIR + MW_extinction: F19 + internal_extinction: None + sncosmo: + file_template: 'pointsource_(?P\d+).parquet' + data_file_type: parquet + area_partition: + { type: healpix, ordering: ring, nside: 32} - sed_model : sncosmo - MW_extinction : F19 - internal_extinction : None - gaia_star : - data_file_type : butler_refcat - butler_parameters : - collections : HSC/defaults - dstype : gaia_dr2_20200414 - area_partition : None - sed_method : use_lut - snana : - file_template : 'snana_(?P\d+).parquet' - data_file_type : parquet - area_partition : - { type: healpix, ordering : ring, nside : 32} - sed_model : snana - internal_extinction : None - sso : - file_template : 'sso_(?P\d+)_(?P\d+).parquet' - flux_file_template : 'sso_flux_(?P\d+)_(?P\d+).parquet' - data_file_type : parquet - area_partition : None - time_partition : mjd - sed_model : file_angstrom + sed_model: sncosmo + MW_extinction: F19 + internal_extinction: None + gaia_star: + data_file_type: butler_refcat + butler_parameters: + collections: HSC/defaults + dstype: gaia_dr2_20200414 + area_partition: None + sed_method: use_lut + snana: + file_template: 'snana_(?P\d+).parquet' + data_file_type: parquet + area_partition: + { type: healpix, ordering: ring, nside: 32} + sed_model: snana + internal_extinction: None + sso: + file_template: 'sso_(?P\d+)_(?P\d+).parquet' + flux_file_template: 'sso_flux_(?P\d+)_(?P\d+).parquet' + data_file_type: parquet + area_partition: None + time_partition: mjd + sed_model: dbfile_angstrom_flambda diff --git a/skycatalogs/catalog_creator.py b/skycatalogs/catalog_creator.py index ba0be424..d0524e4d 100644 --- a/skycatalogs/catalog_creator.py +++ b/skycatalogs/catalog_creator.py @@ -35,6 +35,7 @@ _MW_rv_constant = 3.1 _nside_allowed = 2**np.arange(15) + def _get_tophat_info(columns): ''' Parameters @@ -309,9 +310,6 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, self._output_dir = os.path.join(self._skycatalog_root, self._catalog_dir) - self._sso_creator = SsoCatalogCreator(self._output_dir, sso_truth, - sso_sed) - self._written_config = None self._config_path = config_path self._catalog_name = catalog_name @@ -332,7 +330,8 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, self._dc2 = dc2 self._include_roman_flux = include_roman_flux self._obs_sed_factory = None - self._sos_sed_factory = None ## do we need this? + self._sos_sed_factory = None # do we need this? + self._sso_creator = SsoCatalogCreator(self, sso_truth, sso_sed) def _make_tophat_columns(self, dat, names, cmp): ''' @@ -385,7 +384,7 @@ def create(self, catalog_type): self._sso_creator.create_sso_catalog() if not self._main_only: self._sso_creator.create_sso_flux_catalog() - + else: raise NotImplementedError(f'CatalogCreator.create: unsupported catalog type {catalog_type}') @@ -550,7 +549,8 @@ def create_galaxy_pixel(self, pixel, gal_cat, arrow_schema): 'convergence', 'diskEllipticity1', 'diskEllipticity2', 'spheroidEllipticity1', 'spheroidEllipticity2', 'spheroidHalfLightRadiusArcsec', - 'diskHalfLightRadiusArcsec','um_source_galaxy_obs_sm'] + 'diskHalfLightRadiusArcsec', + 'um_source_galaxy_obs_sm'] # df is not a dataframe! It's just a dict if not self._mag_cut: @@ -1071,6 +1071,8 @@ def write_config(self, overwrite=False, path_only=False): inputs['sn_truth'] = self._sn_truth if self._star_truth: inputs['star_truth'] = self._star_truth + if self._sso_truth: + inputs['sso_truth'] = self._sso_truth config.add_key('provenance', assemble_provenance(self._pkg_root, inputs=inputs)) diff --git a/skycatalogs/objects/base_object.py b/skycatalogs/objects/base_object.py index d9ff5660..4d3939d9 100644 --- a/skycatalogs/objects/base_object.py +++ b/skycatalogs/objects/base_object.py @@ -663,8 +663,14 @@ def __init__(self): def append_collection(self, coll): old = self._total_len - self._total_len += len(coll) - self._located.append(LocatedCollection(coll, old, self._total_len)) + if isinstance(coll, ObjectCollection): + self._total_len += len(coll) + self._located.append(LocatedCollection(coll, old, self._total_len)) + else: # list of collections + for c in coll: + self._total_len += len(c) + self._located.append(LocatedCollection(c, old, self._total_len)) + old = self._total_len def append_object_list(self, object_list): for e in object_list._located: diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 807c3c55..bc4017ec 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -8,6 +8,11 @@ class SsoObject(BaseObject): _type_name = 'sso' + def __init__(self, ra, dec, id, object_type, belongs_to, belongs_index, + mjd): + super.__init__(ra, dec, id, self._type_name, belongs_to, belongs_index) + self._mjd = mjd + def _get_sed(self, mjd=None): ''' returns a SED and magnorm @@ -46,7 +51,7 @@ def get_observer_sed_component(self, component, mjd=None, exposure=15.0): sed = self.withMagnitude(0, self._bp500) sed = sed*flux_500 - # no extinction + # no extinction return sed def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): @@ -58,6 +63,4 @@ def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): flux = sed.calculateFlux(bandpass) return flux - - diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 723edfa6..25d19e0d 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -306,8 +306,8 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, config['Cosmology']) if 'sso' in config['object_types']: self._sso_sed_path = config['provenance']['inputs'].get('sso_sed', - default='sso_sed.db') - + default='sso_sed.db') + self._sso_sed_factory = SsoSedFactory(self._sso_sed_path) self._extinguisher = MilkyWayExtinction() @@ -567,7 +567,8 @@ def get_object_type_by_region(self, region, object_type, mjd=None): region box, circle or PolygonalRegion. Supported region types made depend on object_type object_type known object type without parent - mjd MJD of observation epoch. + mjd MJD of observation epoch. In case of custom load + this argument may be a pair defining an interval Returns all objects found ''' @@ -721,6 +722,20 @@ def get_object_iterator_by_hp(self, hp, obj_type_set=None, ''' pass + def get_object_by_mjd_interval(self, min_mjd, max_mjd, + file_list=None, obj_type='sso'): + ''' + Make object list of all objects with mjd within interval. + If file_list is specified, search only those files. + Currently only implemented for obj_type_set={'sso'} + Parameters + ---------- + min_mjd float mjd must be >= min_mjd + max_mjd float mjd must be < max_mjd + file_list list files to search. If None, search all files + obj_type string Currently only 'sso' is supported + ''' + def open_catalog(config_file, mp=False, skycatalog_root=None, verbose=False): ''' diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 0da81f12..176920f3 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -18,7 +18,8 @@ __all__ = ['SsoCatalogCreator'] -_DEFAULT_ROW_GROUP_SIZE = 20000 # for testing. Should be much larger +_DEFAULT_ROW_GROUP_SIZE = 100000 # Maybe could be larger + def _partition_mjd(mins, maxes, counts, max_rowgroup_size): ''' Determine mjd intervals, each of which will be processed @@ -44,13 +45,22 @@ def _partition_mjd(mins, maxes, counts, max_rowgroup_size): return mjds - class SsoCatalogCreator: _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/19jan2024' _sso_sed = '/sdf/home/j/jrb/rubin-user/sso/sed/solar_sed.db' - def __init__(self, output_dir, sso_truth=None, sso_sed=None): - self._output_dir = output_dir + def __init__(self, catalog_creator, output_dir, + sso_truth=None, sso_sed=None): + ''' + Parameters + ---------- + catalog_creator instance of CatalogCreator + sso_truth path to input data directory + sso_sed path to solar sed + ''' + self._catalog_creator = catalog_creator + self._output_dir = catalog_creator._output_dir + self._logger = catalog_creator._logger self._sso_truth = sso_truth if sso_truth is None: self._sso_truth = SsoCatalogCreator._sso_truth @@ -70,19 +80,29 @@ def __init__(self, output_dir, sso_truth=None, sso_sed=None): def _create_main_schema(self): - fields = [pa.field('id', pa.string()), - pa.field('mjd', pa.float64()), - pa.field('ra', pa.float64()), - pa.field('dec', pa.float64()), - pa.field('observedTrailedSourceMag', pa.float64()), - pa.field('filter', pa.string())] - # pa.field('ra_rate', pa.float64()), - # pa.field('dec_rate', pa.float64())] + fields = [ + pa.field('id', pa.string()), + pa.field('mjd', pa.float64()), + pa.field('ra', pa.float64()), + pa.field('dec', pa.float64()), + pa.field('observedTrailedSourceMag', pa.float64()), + # pa.field('ra_rate', pa.float64()), + # pa.field('dec_rate', pa.float64()), + pa.field('filter', pa.string())] return pa.schema(fields) def _create_flux_schema(self): # id, mjd and 6 flux fields (for now. Maybe later also Roman) - pass + fields = [ + pa.field('id', pa.string()), + pa.field('mjd', pa.float64()), + pa.field('lsst_flux_u', pa.float32(), True), + pa.field('lsst_flux_g', pa.float32(), True), + pa.field('lsst_flux_r', pa.float32(), True), + pa.field('lsst_flux_i', pa.float32(), True), + pa.field('lsst_flux_z', pa.float32(), True), + pa.field('lsst_flux_y', pa.float32(), True)] + return pa.schema(fields) def _write_rg(self, writer, min_mjd, max_mjd, db_files, arrow_schema): df_list = [] @@ -135,10 +155,29 @@ def create_sso_catalog(self): # In sso description in config come up with suitable re for filenames + def _create_sso_flux_file(self, main_file): + pass + def create_sso_flux_catalog(self): """ Create sso flux catalog. Includes id, mjd (since given sso normally has several observations) and fluxes. (Or just one flux and associated band?) """ - pass + from .skyCatalogs import open_catalog + + arrow_schema = self._create_flux_schema() + config_file = self._catalog_creator.write_config(path_only=True) + self._cat = open_catalog(config_file, + skycatalog_root=self._catalog_creator._skycatalog_root) + sso_config = self._cat.raw_config['object_types']['sso'] + self._flux_template = sso_config['flux_file_template'] + self._main_template = sso_config['file_template'] + + self._logger.info('Creating sso flux files') + + # For each main file make a corresponding flux file + files = os.listdir(self._output_dir) + mains = [os.path.join(self._output_dir, f) for f in files if re.match(self._main_template, f)] + for f in mains: + self._create_sso_flux_file(f) diff --git a/skycatalogs/utils/sed_tools.py b/skycatalogs/utils/sed_tools.py index 817767fe..b00d2a98 100644 --- a/skycatalogs/utils/sed_tools.py +++ b/skycatalogs/utils/sed_tools.py @@ -5,6 +5,7 @@ import astropy.constants import h5py import sqlite3 +import pandas as pd import numpy as np from dust_extinction.parameter_averages import F19 @@ -197,7 +198,7 @@ def create(self, pixel, galaxy_id, redshift_hubble, redshift): seds = {} for i, component in enumerate(['bulge', 'disk', 'knots']): - lut = galsim.LookupTable(x=self._wave_list, f=sed_array[i,:], + lut = galsim.LookupTable(x=self._wave_list, f=sed_array[i, :], interpolant='linear') # Create the SED object and apply redshift. sed = galsim.SED(lut, wave_type='angstrom', flux_type='fnu')\ @@ -206,6 +207,7 @@ def create(self, pixel, galaxy_id, redshift_hubble, redshift): return seds + class SsoSedFactory(): ''' Load the single SED used for SSO objects and make it available as galsim @@ -232,6 +234,7 @@ def __init__(self, sed_path, fmt='db'): def create(self): return self.sed + class MilkyWayExtinction: ''' Applies extinction to a SED From 7d328227458d73df2788308a1a0cad89f4a900a5 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Thu, 25 Jan 2024 13:09:08 -0800 Subject: [PATCH 05/31] exposure parameter is only needed for get_gsobject_components --- skycatalogs/objects/base_object.py | 43 ++++++++++++------------------ 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/skycatalogs/objects/base_object.py b/skycatalogs/objects/base_object.py index 4d3939d9..2377ad32 100644 --- a/skycatalogs/objects/base_object.py +++ b/skycatalogs/objects/base_object.py @@ -240,7 +240,7 @@ def _get_sed_from_file(self, fpath, redshift=0): sed = sed.atRedshift(redshift) return sed - def get_gsobject_components(self, gsparams=None, rng=None): + def get_gsobject_components(self, gsparams=None, rng=None, exposure=None): """ Return a dictionary of the GSObject components for the sky catalogs object, keyed by component name. @@ -264,7 +264,7 @@ def get_observer_sed_component(self, component, mjd=None): raise NotImplementedError('get_observer_sed_component must be implemented by subclass') - def get_observer_sed_components(self, mjd=None, exposure=15.0): + def get_observer_sed_components(self, mjd=None): """ Return a dictionary of the SEDs, keyed by component name. """ @@ -283,8 +283,7 @@ def get_total_observer_sed(self, mjd=None): components. """ sed = None - for sed_cmp in self.get_observer_sed_components(mjd=mjd, - exposure=15.0).values(): + for sed_cmp in self.get_observer_sed_components(mjd=mjd).values(): if sed is None: sed = sed_cmp else: @@ -292,7 +291,7 @@ def get_total_observer_sed(self, mjd=None): return sed - def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): + def get_flux(self, bandpass, sed=None, mjd=None): """ Return the total object flux integrated over the bandpass in photons/sec/cm^2 @@ -300,7 +299,7 @@ def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): """ if not sed: - sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) + sed = self.get_total_observer_sed(mjd=mjd) if sed is None: return 0.0 @@ -309,16 +308,15 @@ def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): return flux - def get_fluxes(self, bandpasses, mjd=None, exposure=15.0): + def get_fluxes(self, bandpasses, mjd=None): # To avoid recomputing sed - sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) + sed = self.get_total_observer_sed(mjd=mjd) if sed is None: return [0.0 for b in bandpasses] return [sed.calculateFlux(b) for b in bandpasses] - def get_LSST_flux(self, band, sed=None, cache=True, mjd=None, - exposure=15.0): + def get_LSST_flux(self, band, sed=None, cache=True, mjd=None): if band not in LSST_BANDS: return None att = f'lsst_flux_{band}' @@ -331,36 +329,32 @@ def get_LSST_flux(self, band, sed=None, cache=True, mjd=None, if att in self.native_columns: return self.get_native_attribute(att) - val = self.get_flux(lsst_bandpasses[band], sed=sed, mjd=mjd, - exposure=exposure) + val = self.get_flux(lsst_bandpasses[band], sed=sed, mjd=mjd) if cache: setattr(self, att, val) return val - def get_LSST_fluxes(self, cache=True, as_dict=True, mjd=None, - exposure=15.0): + def get_LSST_fluxes(self, cache=True, as_dict=True, mjd=None): ''' Return a dict of fluxes for LSST bandpasses or, if as_dict is False, just a list in the same order as LSST_BANDS ''' fluxes = dict() - sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) + sed = self.get_total_observer_sed(mjd=mjd) if sed is None: for band in LSST_BANDS: fluxes[band] = 0.0 else: for band in LSST_BANDS: fluxes[band] = self.get_LSST_flux(band, sed=sed, - cache=cache, mjd=mjd, - exposure=exposure) + cache=cache, mjd=mjd) if as_dict: return fluxes else: return list(fluxes.values()) - def get_roman_flux(self, band, sed=None, cache=True, mjd=None, - exposure=15.0): + def get_roman_flux(self, band, sed=None, cache=True, mjd=None): if band not in ROMAN_BANDS: return None att = f'roman_flux_{band}' @@ -373,21 +367,19 @@ def get_roman_flux(self, band, sed=None, cache=True, mjd=None, if att in self.native_columns: return self.get_native_attribute(att) - val = self.get_flux(roman_bandpasses[band], sed=sed, mjd=mjd, - exposure=exposure) + val = self.get_flux(roman_bandpasses[band], sed=sed, mjd=mjd) if cache: setattr(self, att, val) return val - def get_roman_fluxes(self, cache=True, as_dict=True, mjd=None, - exposure=15.0): + def get_roman_fluxes(self, cache=True, as_dict=True, mjd=None): ''' Return a dict of fluxes for Roman bandpasses or, if as_dict is False, just a list in the same order as ROMAN_BANDS ''' fluxes = dict() - sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) + sed = self.get_total_observer_sed(mjd=mjd) if sed is None: for band in ROMAN_BANDS: @@ -395,8 +387,7 @@ def get_roman_fluxes(self, cache=True, as_dict=True, mjd=None, else: for band in ROMAN_BANDS: fluxes[band] = self.get_roman_flux(band, sed=sed, - cache=cache, mjd=mjd, - exposure=exposure) + cache=cache, mjd=mjd) if as_dict: return fluxes else: From 6b051e76e25b9a561bf614fbeca9334f8bdb848e Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Fri, 26 Jan 2024 14:36:28 -0800 Subject: [PATCH 06/31] erfa.pmsafe did not like mixture of dataframe columns and np arrays --- skycatalogs/objects/gaia_object.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/skycatalogs/objects/gaia_object.py b/skycatalogs/objects/gaia_object.py index 685a7966..7ece5abc 100644 --- a/skycatalogs/objects/gaia_object.py +++ b/skycatalogs/objects/gaia_object.py @@ -203,9 +203,11 @@ def load_collection(region, skycatalog, mjd=None): rv1 = np.zeros(len(df)) epNa = 2400000.5 # "part A" of starting and ending epochs for MJDs. ep2b = mjd - pm_outputs = erfa.pmsafe(df['coord_ra'], df['coord_dec'], - df['pm_ra'], df['pm_dec'], df['parallax'], - rv1, epNa, df['epoch'], epNa, ep2b) + pm_outputs = erfa.pmsafe(np.array(df['coord_ra']), + np.array(df['coord_dec']), + np.array(df['pm_ra']), np.array(df['pm_dec']), + np.array(df['parallax']), + rv1, epNa, np.array(df['epoch']), epNa, ep2b) # Update ra, dec values. df['coord_ra'] = pm_outputs[0] df['coord_dec'] = pm_outputs[1] From f099e624c7854cdd9c70b5a4d9660f15279357fe Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 29 Jan 2024 10:44:12 -0800 Subject: [PATCH 07/31] implement creation of sso flux files (so far largely untested) --- cfg/object_types.yaml | 2 +- skycatalogs/objects/sso_object.py | 210 ++++++++++++++++++++++++++--- skycatalogs/skyCatalogs.py | 65 ++++++--- skycatalogs/sso_catalog_creator.py | 60 ++++++--- 4 files changed, 280 insertions(+), 57 deletions(-) diff --git a/cfg/object_types.yaml b/cfg/object_types.yaml index 49d3bcb3..9301a60c 100644 --- a/cfg/object_types.yaml +++ b/cfg/object_types.yaml @@ -101,7 +101,7 @@ object_types : sed_model: snana internal_extinction: None sso: - file_template: 'sso_(?P\d+)_(?P\d+).parquet' + file_template: 'sso_(?P\d+)_(?P\d+).parquet' flux_file_template: 'sso_flux_(?P\d+)_(?P\d+).parquet' data_file_type: parquet area_partition: None diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index bc4017ec..5ba60dfc 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -1,31 +1,76 @@ import os +import re import numpy as np +import numpy.ma as ma +# import pyarrow.parquet as pq import galsim -from .base_object import BaseObject +from .base_object import BaseObject, ObjectCollection, ObjectList +from ..readers import ParquetReader -__all__ = ['SsoObject'] + +__all__ = ['SsoObject', 'find_sso_files'] +_MJD_EPS = 0.00002 # about 1.7 seconds + + +def find_sso_files(sso_path, sso_config): + ''' + Parameters + ---------- + sso_path string where sso catalog files may be found + sso_config dict configuration information, including filename + templates + + Returns + ------- + dict. Keys are files (fullpath?) Each value is itself a dict including + keys for at least mjd_min, mjd_max, path and reader (of + type ParquetReader, defined in parquet_reader.py). But + reader key is only added when needed, not in this routine. + ''' + files_dict = dict() + files = os.listdir(sso_path) + for f in files: + match = re.fullmatch(sso_config['file_template'], f) + if match: + new_entry = {'mjd_min': int(match['mjd_min']), + 'mjd_max': int(match['mjd_min']), + 'scope': 'main', + 'path' : os.path.join(sso_path, f)} + files_dict[f] = new_entry + continue + match = re.fullmatch(sso_config['flux_file_template'], f_base) + if match: + new_entry = {'mjd_min': int(match['mjd_min']), + 'mjd_max': int(match['mjd_min']), + 'scope': 'flux', + 'path' : os.path.join(sso_path, f)} + files_dict[f] = new_entry + continue + return files_dict + class SsoObject(BaseObject): _type_name = 'sso' + _solar_sed = None def __init__(self, ra, dec, id, object_type, belongs_to, belongs_index, mjd): super.__init__(ra, dec, id, self._type_name, belongs_to, belongs_index) self._mjd = mjd - + def _get_sed(self, mjd=None): ''' returns a SED and magnorm mjd is required ''' - - # Always return the same SED. Maybe need a class method - # to load it? - # For magnorm use the magnitude from Sorcha. Can it be used - # directly or are there other effects to be applied? - # Have to find it by looking for entry for this id, this mjd - # Do we look for specific entry or do we allow interpolation? - pass + if SsoObject._solar_sed is None: + SsoObject._solar_sed =\ + self._belongs_to._sky_catalog._sso_sed_factory.create() + # For magnorm use the magnitude from Sorcha. Can it be used + # directly or are there other effects to be applied? + # Have to find it by looking for entry for this id, this mjd + # Do we look for specific entry or do we allow interpolation? + return SsoObject._solar_sed, self.get_native_attribute('observedTrailedSourceMag') def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0): if gsparams is not None: @@ -37,14 +82,14 @@ def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0): # For now start with the simpler thing: just a point source. return {'this_object': galsim.DeltaFunction(gsparams=gsparams)} - def get_observer_sed_component(self, component, mjd=None, exposure=15.0): + def get_observer_sed_component(self, component, mjd=None): if mjd is None: mjd = self._belongs_to._mjd if mjd is None: txt = 'SsoObject.get_observer_sed_component: no mjd specified for this call\n' txt += 'nor when generating object list' raise ValueError(txt) - + sed, magnorm = self._get_sed(mjd=mjd) flux_500 = np.exp(-0.9210340371976184 * magnorm) @@ -54,13 +99,146 @@ def get_observer_sed_component(self, component, mjd=None, exposure=15.0): # no extinction return sed - def get_flux(self, bandpass, sed=None, mjd=None, exposure=15.0): + def get_flux(self, bandpass, sed=None, mjd=None): if not sed: - sed = self.get_total_observer_sed(mjd=mjd, exposure=exposure) + sed = self.get_total_observer_sed(mjd=mjd) if sed is None: return 0.0 flux = sed.calculateFlux(bandpass) return flux - + + +class SsoCollection(ObjectCollection): + @staticmethod + def find_row_groups(reader, mjd): + num_row_groups = reader._meta.num_row_groups + if mjd is None: + return [i for i in range(num_row_groups)] + rg_list = [] + for i in range(num_row_groups): + # mjds = pq_file.read_row_group(i, columns=['mjd'])['mjd'] + mjds = reader.read_columns(['mjd'], None, i)['mjd'] + if mjd < mjds[0] - _MJD_EPS: + continue + if mjd >= mjds[-1] + _MJD_EPS: + break + rg_list.append(i) + return rg_list + + @staticmethod + def form_row_group_collection(skycatalog, filepath, row_group, + mjd=None, region=None): + ''' + Parameters + ---------- + filepath + row_group row group to use as input + mjd if not None, exclude rows with mjd not within _MJD_EPS + of this value + region if not None, exclude rows with (ra, dec) not in the region + + Returns + ------- + An SsoCollection + ''' + if region is not None: + raise ValueError('SsoCollection.form_row_group_collection: region parameter not yet implemented') + reader = ParquetReader(filepath) + values = reader.read_row_group(row_group, + columns=['id', 'mjd', 'ra', 'dec']) + if mjd: # make mask + mask = np.logical_or((values['mjd'] >= mjd + _MJD_EPS), + (values['mjd'] < mjd - _MJD_EPS)) + id_compress = ma.array(values['id'], mask=mask).compressed() + mjd_compress = ma.array(values['mjd'], mask=mask).compressed() + ra_compress = ma.array(values['ra'], mask=mask).compressed() + dec_compress = ma.array(values['dec'], mask=mask).compressed() + + if all(mask): + return None + new_collection = SsoCollection(ra_compress, dec_compress, + id_compress, skycatalog, + mjd_individual=mjd_compress, + region=None, mjd_global=mjd, + mask=mask, + readers=[reader], + row_group=row_group) + else: + new_collection = SsoCollection(values['ra'], values['dec'], + values['id'], skycatalog, + mjd_individual=values['mjd'], + region=None, mask=None, + readers=[reader], + row_group=row_group) + return new_collection + + @staticmethod + def load_collection(region, skycatalog, mjd=None, filepath=None): + ''' + region A standard region (Disk, PolygonalRegion, Box) + or None + skycatalog An instance of the SkyCatalog class + mjd mjd (single float) of objects to be loaded. May be + None if filepath is used, in which case all objects from the + file will be loaded + filepath Restrict search to a particular file. + + Returns: A list of SsoCollection objects + ''' + object_list = ObjectList() + files_dict = skycatalog._sso_files + if filepath: + if filepath not in files_dict: + return [] + f_entry = files_dict[filepath] + if mjd is not None: # parse file name to see if mjd is included. + if mjd < f_entry['mjd_min'] - _MJD_EPS or mjd >= f_entry['mjd_max'] + _MJD_EPS: + return [] + if 'reader' not in f_entry: + f_entry['reader'] = ParquetReader(filepath) + row_groups = SsoObject.find_row_groups(f_entry['reader'], mjd) + for r in row_groups: + coll = SsoObject.form_row_group_collection(skycatalog, + region, r, mjd) + if coll: + object_list.append_collection(coll) + return object_list + else: + # There must be an mjd. Look up all our files (or maybe this + # is done in advance) to determine which one or two might have + # objects with this mjd. + # Find the one or two row groups that are relevant. For each + # * fetch id, mjd, ra, dec + # * form mask to exclude based on + # mjd (must be within epsilon of specified) + # and on (ra, dec) (must be within region) + # * compress out unwanted objects + # * return one or two collections + pass + # return object_list + + def __init__(self, ra, dec, id, sky_catalog, mjd_individual=None, + region=None, mjd_global=None, + mask=None, readers=None, row_group=0): + ''' + Parameters + ---------- + ra, dec array of float + id array of str + sky_catalog instance of SkyCatalog + mjd_indiviual array of float or None + region Geometric region or string (representing file path) + mjd_global float or None + mask exclusion mask if cuts have been made due to + geometric region or mjd + readers parquet reader (in practice there is always only 1) + row_group int + + One of mjd_global, mjd_individual must not be None + ''' + super.__init__(ra, dic, id, 'sso', None, sky_catalog, + region=region, mjd=mjd_global, mask=mask, + readers=readers, row_group=row_group) + self._mjds = np.array(mjd_individuals) diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 25d19e0d..112e1387 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -10,8 +10,10 @@ from astropy import units as u from skycatalogs.objects.base_object import load_lsst_bandpasses, load_roman_bandpasses from skycatalogs.utils.catalog_utils import CatalogContext -from skycatalogs.objects.base_object import ObjectList +from skycatalogs.objects.base_object import ObjectList, ObjectCollection from skycatalogs.objects.gaia_object import GaiaObject, GaiaCollection +from skycatalogs.objects.sso_object import SsoObject, SsoCollection +from skycatalogs.objects.sso_object import find_sso_files from skycatalogs.readers import ParquetReader from skycatalogs.utils.sed_tools import TophatSedFactory, DiffskySedFactory from skycatalogs.utils.sed_tools import SsoSedFactory @@ -289,6 +291,9 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, # for 'object_types', map object type to filepath self._hp_info = dict() _ = self._find_all_hps() + if 'sso' in self.raw_config['object_types']: + self._sso_files = find_sso_files(self._cat_dir, + self.raw_config['object_types']['sso']) # NOTE: the use of TophatSedFactory is appropriate *only* for an # input galaxy catalog with format like cosmoDC2, which includes @@ -338,6 +343,11 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, if 'diffsky_galaxy' in config['object_types']: self.cat_cxt.register_source_type('diffsky_galaxy', object_class=DiffskyObject) + if 'sso' in config['object_types']: + self.cat_cxt.register_source_type('sso', + object_class=SsoObject, + collection_class=SsoCollection, + custom_load=True) @property def observed_sed_factory(self): @@ -399,23 +409,24 @@ def _find_hps_by_type(self, name, type_config): else: fpath = os.path.join(rel_dir, f) for k in tmpl_keys: - m = re.fullmatch(k, f) - if m: - hp = int(m['healpix']) - hp_set.add(hp) - - if hp not in self._hp_info: - self._hp_info[hp] = {'files': {fpath: None}, - 'object_types': {name: [fpath]}} - else: - this_hp = self._hp_info[hp] - # Value of 'object_types' is now a list - if fpath not in this_hp['files']: - this_hp['files'][fpath] = None - if name in this_hp['object_types']: - this_hp['object_types'][name].append(fpath) + if k.find('?P') != -1: + m = re.fullmatch(k, f) + if m: + hp = int(m['healpix']) + hp_set.add(hp) + + if hp not in self._hp_info: + self._hp_info[hp] = {'files': {fpath: None}, + 'object_types': {name: [fpath]}} else: - this_hp['object_types'][name] = [fpath] + this_hp = self._hp_info[hp] + # Value of 'object_types' is now a list + if fpath not in this_hp['files']: + this_hp['files'][fpath] = None + if name in this_hp['object_types']: + this_hp['object_types'][name].append(fpath) + else: + this_hp['object_types'][name] = [fpath] return hp_set def _find_all_hps(self): @@ -560,7 +571,8 @@ def get_objects_by_region(self, region, obj_type_set=None, mjd=None): return object_list - def get_object_type_by_region(self, region, object_type, mjd=None): + def get_object_type_by_region(self, region, object_type, mjd=None, + filepath=None): ''' Parameters ---------- @@ -569,8 +581,13 @@ def get_object_type_by_region(self, region, object_type, mjd=None): object_type known object type without parent mjd MJD of observation epoch. In case of custom load this argument may be a pair defining an interval + filepath if not None, look only in this file. + Only used for custom loads + + Returns + ------- + an ObjectList containing all objects found. - Returns all objects found ''' out_list = ObjectList() @@ -582,9 +599,13 @@ def get_object_type_by_region(self, region, object_type, mjd=None): coll_type = self.cat_cxt.lookup_collection_type(object_type) if coll_type is not None: if self.cat_cxt.use_custom_load(object_type): - out_list.append_collection(coll_type.load_collection(region, - self, - mjd=mjd)) + coll = coll_type.load_collection(region, self, mjd=mjd, + filepath=filepath) + if isinstance(coll, ObjectCollection): + out_list.append_collection(coll) + else: + for c in coll: + out_list.append_collection(c) return out_list if partition != 'None': diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 176920f3..2779be00 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -1,16 +1,14 @@ import os -import sys -import re -### import logging +# import sys +# import re +# import logging import numpy as np -import numpy.ma as ma +# import numpy.ma as ma import sqlite3 import pandas as pd import pyarrow as pa import pyarrow.parquet as pq - -# from .utils.parquet_schema_utils import make_sso_flux_schema -### or alternately include method in SsoCatalogCreator +from .objects.base_object import LSST_BANDS """ Code for creating sky catalogs for sso objects @@ -55,6 +53,7 @@ def __init__(self, catalog_creator, output_dir, Parameters ---------- catalog_creator instance of CatalogCreator + output_dir destination directory for generated catalogs sso_truth path to input data directory sso_sed path to solar sed ''' @@ -65,7 +64,7 @@ def __init__(self, catalog_creator, output_dir, if sso_truth is None: self._sso_truth = SsoCatalogCreator._sso_truth self._sso_sed = sso_sed - if sso_sed is None: + if sso_sed is None: # use default path self._sso_sed = SsoCatalogCreator._sso_sed self._row_group_size = _DEFAULT_ROW_GROUP_SIZE @@ -138,14 +137,14 @@ def create_sso_catalog(self): mjd_list = _partition_mjd(mins, maxes, counts, self._row_group_size) arrow_schema = self._create_main_schema() - ### Depending on length of mjd_list, may need more than one - ### output file in which case will need to add an outer loop - ### over output file. Decide on max row groups per file. + # ## Depending on length of mjd_list, may need more than one + # ## output file in which case will need to add an outer loop + # ## over output file. Decide on max row groups per file. mjd_min = min(mins) mjd_max = max(maxes) out_name = f'sso_{int(mjd_min)}_{int(np.ceil(mjd_max))}.parquet' writer = pq.ParquetWriter(os.path.join(self._output_dir, out_name), - arrow_schema) + arrow_schema) for i in range(len(mjd_list) - 1): self._write_rg(writer, mjd_list[i], mjd_list[i+1], db_files, arrow_schema) @@ -155,8 +154,34 @@ def create_sso_catalog(self): # In sso description in config come up with suitable re for filenames - def _create_sso_flux_file(self, main_file): - pass + def _create_sso_flux_file(self, info, arrow_schema): + ''' + Parameters + ---------- + info dict information pertaining to an existing sso main file + arrow_schema to be used in creating the new flux file + ''' + object_list = self._cat.get_object_type_by_region(None, 'sso', + mjd=None, + filepath=info.path) + colls = object_list.get_collections() + outname = f'sso_flux_{info.mjd_min}_{info.mjd_max}.parquet' + + writer = pq.ParquetWriter(os.path.join(self._output_dir, outname)) + outs = dict() + for c in colls: + outs['id'] = c._id + outs['mjd'] = c._mjds + all_fluxes = [o.get_LSST_fluxes(as_dict=False) for o in c] + all_fluxes_transpose = zip(*all_fluxes) + for i, band in enumerate(LSST_BANDS): + v = all_fluxes_transpose.__next__() + outs[f'lsst_flux_{band}'] = v + out_df = pd.DataFrame.from_dict(outs) + out_table = pa.Table.from_pandas(out_df, schema=arrow_schema) + writer.write_table(out_table) + + writer.close() def create_sso_flux_catalog(self): """ @@ -177,7 +202,6 @@ def create_sso_flux_catalog(self): self._logger.info('Creating sso flux files') # For each main file make a corresponding flux file - files = os.listdir(self._output_dir) - mains = [os.path.join(self._output_dir, f) for f in files if re.match(self._main_template, f)] - for f in mains: - self._create_sso_flux_file(f) + for f, info in self._cat._sso_files: + if info['scope'] == 'main': + self._create_sso_flux_file(info, arrow_schema) From d095b892604887683e4f63dc49da9370e6318c50 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 29 Jan 2024 16:03:44 -0800 Subject: [PATCH 08/31] various bug fixes for sso support, mostly trivial --- cfg/object_types.yaml | 2 +- skycatalogs/catalog_creator.py | 5 +- skycatalogs/objects/sso_object.py | 90 +++++++++++++++++++++++------- skycatalogs/skyCatalogs.py | 7 +-- skycatalogs/sso_catalog_creator.py | 17 ++++-- skycatalogs/utils/sed_tools.py | 2 +- 6 files changed, 91 insertions(+), 32 deletions(-) diff --git a/cfg/object_types.yaml b/cfg/object_types.yaml index 9301a60c..7d9b55bd 100644 --- a/cfg/object_types.yaml +++ b/cfg/object_types.yaml @@ -102,7 +102,7 @@ object_types : internal_extinction: None sso: file_template: 'sso_(?P\d+)_(?P\d+).parquet' - flux_file_template: 'sso_flux_(?P\d+)_(?P\d+).parquet' + flux_file_template: 'sso_flux_(?P\d+)_(?P\d+).parquet' data_file_type: parquet area_partition: None time_partition: mjd diff --git a/skycatalogs/catalog_creator.py b/skycatalogs/catalog_creator.py index d0524e4d..dd7f39fd 100644 --- a/skycatalogs/catalog_creator.py +++ b/skycatalogs/catalog_creator.py @@ -330,8 +330,10 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, self._dc2 = dc2 self._include_roman_flux = include_roman_flux self._obs_sed_factory = None - self._sos_sed_factory = None # do we need this? + self._sso_sed_factory = None # do we need this? self._sso_creator = SsoCatalogCreator(self, sso_truth, sso_sed) + self._sso_truth = self._sso_creator.sso_truth + self._sso_sed = self._sso_creator.sso_sed def _make_tophat_columns(self, dat, names, cmp): ''' @@ -1073,6 +1075,7 @@ def write_config(self, overwrite=False, path_only=False): inputs['star_truth'] = self._star_truth if self._sso_truth: inputs['sso_truth'] = self._sso_truth + inputs['sso_sed'] = self._sso_sed config.add_key('provenance', assemble_provenance(self._pkg_root, inputs=inputs)) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 5ba60dfc..7b4c6914 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -38,7 +38,7 @@ def find_sso_files(sso_path, sso_config): 'path' : os.path.join(sso_path, f)} files_dict[f] = new_entry continue - match = re.fullmatch(sso_config['flux_file_template'], f_base) + match = re.fullmatch(sso_config['flux_file_template'], f) if match: new_entry = {'mjd_min': int(match['mjd_min']), 'mjd_max': int(match['mjd_min']), @@ -55,7 +55,8 @@ class SsoObject(BaseObject): def __init__(self, ra, dec, id, object_type, belongs_to, belongs_index, mjd): - super.__init__(ra, dec, id, self._type_name, belongs_to, belongs_index) + super().__init__(ra, dec, id, self._type_name, belongs_to, + belongs_index) self._mjd = mjd def _get_sed(self, mjd=None): @@ -128,16 +129,18 @@ def find_row_groups(reader, mjd): return rg_list @staticmethod - def form_row_group_collection(skycatalog, filepath, row_group, + def form_row_group_collection(skycatalog, reader, row_group, mjd=None, region=None): ''' Parameters ---------- - filepath - row_group row group to use as input - mjd if not None, exclude rows with mjd not within _MJD_EPS - of this value - region if not None, exclude rows with (ra, dec) not in the region + skycatalog SkyCatalog object + reader skycatalogs.readers.parquet_readers.ParquetReader + Read only from this file + row_group row group to use as input + mjd if not None, exclude rows with mjd not within _MJD_EPS + of this value + region if not None, exclude rows with (ra, dec) not in the region Returns ------- @@ -145,9 +148,9 @@ def form_row_group_collection(skycatalog, filepath, row_group, ''' if region is not None: raise ValueError('SsoCollection.form_row_group_collection: region parameter not yet implemented') - reader = ParquetReader(filepath) - values = reader.read_row_group(row_group, - columns=['id', 'mjd', 'ra', 'dec']) + # reader = ParquetReader(filepath) + values = reader.read_columns(['id', 'mjd', 'ra', 'dec'], None, + row_group=row_group) if mjd: # make mask mask = np.logical_or((values['mjd'] >= mjd + _MJD_EPS), (values['mjd'] < mjd - _MJD_EPS)) @@ -185,23 +188,29 @@ def load_collection(region, skycatalog, mjd=None, filepath=None): file will be loaded filepath Restrict search to a particular file. - Returns: A list of SsoCollection objects + Returns: An ObjectList ''' object_list = ObjectList() files_dict = skycatalog._sso_files + f_entry = None if filepath: - if filepath not in files_dict: + for f in files_dict: + if filepath == files_dict[f]['path']: + f_entry = files_dict[f] + break + if f_entry is None: return [] - f_entry = files_dict[filepath] if mjd is not None: # parse file name to see if mjd is included. if mjd < f_entry['mjd_min'] - _MJD_EPS or mjd >= f_entry['mjd_max'] + _MJD_EPS: return [] if 'reader' not in f_entry: f_entry['reader'] = ParquetReader(filepath) - row_groups = SsoObject.find_row_groups(f_entry['reader'], mjd) + row_groups = SsoCollection.find_row_groups(f_entry['reader'], mjd) for r in row_groups: - coll = SsoObject.form_row_group_collection(skycatalog, - region, r, mjd) + coll = SsoCollection.form_row_group_collection(skycatalog, + f_entry['reader'], + r, mjd=mjd, + region=region) if coll: object_list.append_collection(coll) return object_list @@ -238,7 +247,46 @@ def __init__(self, ra, dec, id, sky_catalog, mjd_individual=None, One of mjd_global, mjd_individual must not be None ''' - super.__init__(ra, dic, id, 'sso', None, sky_catalog, - region=region, mjd=mjd_global, mask=mask, - readers=readers, row_group=row_group) - self._mjds = np.array(mjd_individuals) + super().__init__(ra, dec, id, 'sso', None, sky_catalog, + region=region, mjd=mjd_global, mask=mask, + readers=readers, row_group=row_group) + self._mjds = np.array(mjd_individual) + + def __getitem__(self, key): + ''' + Override implementation in base_object because sso objects + must have an mjd + + Parameters + ---------- + If key is an int (or int-like) return a single base object + If key is a slice return a list of objects + If key is a tuple where key[0] is iterable of int-like, + return a list of objects + ''' + + if self._uniform_object_type: + object_type = self._object_type_unique + else: + object_type = self._object_types[key] + + if isinstance(key, int) or isinstance(key, np.int64): + return self._object_class(self._ra[key], self._dec[key], + self._id[key], object_type, self, key, + self._mjds[key]) + + elif type(key) == slice: + if key.start is None: + key.start = 0 + ixdata = [i for i in range(min(key.stop, len(self._ra)))] + ixes = itertools.islice(ixdata, key.start, key.stop, key.step) + return [self._object_class(self._ra[i], self._dec[i], self._id[i], + object_type, self, i, self._mjds[i]) + for i in ixes] + + elif type(key) == tuple and isinstance(key[0], Iterable): + # check it's a list of int-like? + return [self._object_class(self._ra[i], self._dec[i], self._id[i], + object_type, self, i, self._mjds[i]) + for i in key[0]] + diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 112e1387..80418e98 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -311,7 +311,7 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, config['Cosmology']) if 'sso' in config['object_types']: self._sso_sed_path = config['provenance']['inputs'].get('sso_sed', - default='sso_sed.db') + 'sso_sed.db') self._sso_sed_factory = SsoSedFactory(self._sso_sed_path) self._extinguisher = MilkyWayExtinction() @@ -603,9 +603,8 @@ def get_object_type_by_region(self, region, object_type, mjd=None, filepath=filepath) if isinstance(coll, ObjectCollection): out_list.append_collection(coll) - else: - for c in coll: - out_list.append_collection(c) + else: # ObjectList + out_list.append_object_list(coll) return out_list if partition != 'None': diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 2779be00..1c1ca38b 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -77,6 +77,14 @@ def __init__(self, catalog_creator, output_dir, observedTrailedSourceMag from {tbl} where mjd >= (?) and mjd < (?) order by mjd''' + @property + def sso_truth(self): + return self._sso_truth + + @property + def sso_sed(self): + return self._sso_sed + def _create_main_schema(self): fields = [ @@ -163,11 +171,12 @@ def _create_sso_flux_file(self, info, arrow_schema): ''' object_list = self._cat.get_object_type_by_region(None, 'sso', mjd=None, - filepath=info.path) + filepath=info['path']) colls = object_list.get_collections() - outname = f'sso_flux_{info.mjd_min}_{info.mjd_max}.parquet' + outname = f"sso_flux_{info['mjd_min']}_{info['mjd_max']}.parquet" - writer = pq.ParquetWriter(os.path.join(self._output_dir, outname)) + writer = pq.ParquetWriter(os.path.join(self._output_dir, outname), + arrow_schema) outs = dict() for c in colls: outs['id'] = c._id @@ -202,6 +211,6 @@ def create_sso_flux_catalog(self): self._logger.info('Creating sso flux files') # For each main file make a corresponding flux file - for f, info in self._cat._sso_files: + for f, info in self._cat._sso_files.items(): if info['scope'] == 'main': self._create_sso_flux_file(info, arrow_schema) diff --git a/skycatalogs/utils/sed_tools.py b/skycatalogs/utils/sed_tools.py index b00d2a98..eb43a732 100644 --- a/skycatalogs/utils/sed_tools.py +++ b/skycatalogs/utils/sed_tools.py @@ -223,7 +223,7 @@ def __init__(self, sed_path, fmt='db'): raise ValueError(f'SsoSedFactory: Unsupport input format {fmt}; must be "db"') wave_type = 'angstrom' flux_type = 'flambda' - q = 'select wavelength, flux from SED order by wavelength' + q = 'select wavelength, flux as flambda from SED order by wavelength' with sqlite3.connect(sed_path) as conn: sed_df = pd.read_sql_query(q, conn) lut = galsim.LookupTable(x=sed_df['wavelength'], f=sed_df['flambda'], From b55fe630016043fc4406096ebd5dd5bb9b00052f Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 29 Jan 2024 17:28:44 -0800 Subject: [PATCH 09/31] Can create flux file; contents need to be validated --- skycatalogs/objects/sso_object.py | 2 +- skycatalogs/sso_catalog_creator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 7b4c6914..2740b8a6 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -94,7 +94,7 @@ def get_observer_sed_component(self, component, mjd=None): sed, magnorm = self._get_sed(mjd=mjd) flux_500 = np.exp(-0.9210340371976184 * magnorm) - sed = self.withMagnitude(0, self._bp500) + sed = sed.withMagnitude(0, self._bp500) sed = sed*flux_500 # no extinction diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 1c1ca38b..41554fe6 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -181,7 +181,7 @@ def _create_sso_flux_file(self, info, arrow_schema): for c in colls: outs['id'] = c._id outs['mjd'] = c._mjds - all_fluxes = [o.get_LSST_fluxes(as_dict=False) for o in c] + all_fluxes = [o.get_LSST_fluxes(as_dict=False, mjd=o._mjd) for o in c] all_fluxes_transpose = zip(*all_fluxes) for i, band in enumerate(LSST_BANDS): v = all_fluxes_transpose.__next__() From 7a0cfab73d0456eadc6af25abe90469c217b94bd Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 30 Jan 2024 13:27:04 -0800 Subject: [PATCH 10/31] save sso file data correctly --- skycatalogs/objects/sso_object.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 2740b8a6..171e0e19 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -33,7 +33,7 @@ def find_sso_files(sso_path, sso_config): match = re.fullmatch(sso_config['file_template'], f) if match: new_entry = {'mjd_min': int(match['mjd_min']), - 'mjd_max': int(match['mjd_min']), + 'mjd_max': int(match['mjd_max']), 'scope': 'main', 'path' : os.path.join(sso_path, f)} files_dict[f] = new_entry @@ -41,7 +41,7 @@ def find_sso_files(sso_path, sso_config): match = re.fullmatch(sso_config['flux_file_template'], f) if match: new_entry = {'mjd_min': int(match['mjd_min']), - 'mjd_max': int(match['mjd_min']), + 'mjd_max': int(match['mjd_max']), 'scope': 'flux', 'path' : os.path.join(sso_path, f)} files_dict[f] = new_entry From 605f0d46258ae6a3bc76a626fd76aee7bf02cc77 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 6 Feb 2024 19:09:03 -0800 Subject: [PATCH 11/31] partition by hp, not time --- cfg/object_types.yaml | 8 +- skycatalogs/catalog_creator.py | 4 +- skycatalogs/sso_catalog_creator.py | 143 ++++++++++++++++------------- 3 files changed, 86 insertions(+), 69 deletions(-) diff --git a/cfg/object_types.yaml b/cfg/object_types.yaml index 7d9b55bd..d6d58edf 100644 --- a/cfg/object_types.yaml +++ b/cfg/object_types.yaml @@ -101,9 +101,9 @@ object_types : sed_model: snana internal_extinction: None sso: - file_template: 'sso_(?P\d+)_(?P\d+).parquet' - flux_file_template: 'sso_flux_(?P\d+)_(?P\d+).parquet' + file_template: 'sso_(?P\d+).parquet' + flux_file_template: 'sso_flux_(?P\d+).parquet' data_file_type: parquet - area_partition: None - time_partition: mjd + area_partition: + { type: healpix, ordering: ring, nside: 32} sed_model: dbfile_angstrom_flambda diff --git a/skycatalogs/catalog_creator.py b/skycatalogs/catalog_creator.py index dd7f39fd..78ebc847 100644 --- a/skycatalogs/catalog_creator.py +++ b/skycatalogs/catalog_creator.py @@ -198,7 +198,7 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, galaxy_stride=1000000, provenance=None, dc2=False, sn_object_type='sncosmo', galaxy_type='cosmodc2', include_roman_flux=False, star_input_fmt='sqlite', - sso_truth=None, sso_sed=None): + sso_truth=None, sso_sed=None, sso_partition='healpixel'): """ Store context for catalog creation @@ -247,6 +247,7 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, star_input_fmt May be either 'sqlite' or 'parquet' sso_truth Directory containing Sorcha output sso_sed Path to sed file to be used for all SSOs + sso_partition Whether to partition by time or by healpixels Might want to add a way to specify template for output file name and template for input sedLookup file name. @@ -334,6 +335,7 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None, self._sso_creator = SsoCatalogCreator(self, sso_truth, sso_sed) self._sso_truth = self._sso_creator.sso_truth self._sso_sed = self._sso_creator.sso_sed + self._sso_partition = sso_partition def _make_tophat_columns(self, dat, names, cmp): ''' diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 41554fe6..2a1c8663 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -19,32 +19,32 @@ _DEFAULT_ROW_GROUP_SIZE = 100000 # Maybe could be larger -def _partition_mjd(mins, maxes, counts, max_rowgroup_size): - ''' Determine mjd intervals, each of which will be processed - in one go''' - total_rows = np.sum(np.array(counts)) - min_min = int(np.floor(np.min(np.array(mins)))) - max_max = int(np.ceil(np.max(np.array(maxes)))) - # n_rowgroup = (max_max + (max_rowgroup_size - 1) - min_min)/max_rowgroup_size - n_rowgroup = int(np.ceil(total_rows/max_rowgroup_size)) - # for debugging - if n_rowgroup < 2: - n_rowgroup = 2 - mjd_interval = int(np.ceil((max_max - min_min)/n_rowgroup)) - mjds = [min_min] - last = min_min - for i in range(n_rowgroup): - last += mjd_interval - last = min(last, max_max) - mjds.append(last) - if last == max_max: - break - - return mjds +# def _partition_mjd(mins, maxes, counts, max_rowgroup_size): +# ''' Determine mjd intervals, each of which will be processed +# in one go''' +# total_rows = np.sum(np.array(counts)) +# min_min = int(np.floor(np.min(np.array(mins)))) +# max_max = int(np.ceil(np.max(np.array(maxes)))) +# # n_rowgroup = (max_max + (max_rowgroup_size - 1) - min_min)/max_rowgroup_size +# n_rowgroup = int(np.ceil(total_rows/max_rowgroup_size)) +# # for debugging +# if n_rowgroup < 2: +# n_rowgroup = 2 +# mjd_interval = int(np.ceil((max_max - min_min)/n_rowgroup)) +# mjds = [min_min] +# last = min_min +# for i in range(n_rowgroup): +# last += mjd_interval +# last = min(last, max_max) +# mjds.append(last) +# if last == max_max: +# break + +# return mjds class SsoCatalogCreator: - _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/19jan2024' + _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/6feb2024' _sso_sed = '/sdf/home/j/jrb/rubin-user/sso/sed/solar_sed.db' def __init__(self, catalog_creator, output_dir, @@ -72,10 +72,12 @@ def __init__(self, catalog_creator, output_dir, tbl = 'pp_results' mjd_c = 'FieldMJD_TAI' self._mjd_q = f'select min({mjd_c}), max({mjd_c}), count({mjd_c}) from {tbl}' - self._df_query = f'''select ObjID as id, {mjd_c} as mjd, - "AstRA(deg)" as ra, "AstDec(deg)" as dec, optFilter as filter, - observedTrailedSourceMag from {tbl} where mjd >= (?) - and mjd < (?) order by mjd''' + self._dfhp_query = f'''select ObjID as id, {mjd_c} as mjd, + "AstRA(deg)" as ra, "AstDec(deg)" as dec, + "AstRARate(deg/day)" as ra_rate, + "AstDecRate(deg/day)" as dec_rate, + observedTrailedSourceMag from {tbl} where healpix = (?) + order by mjd, ObjID''' @property def sso_truth(self): @@ -93,9 +95,9 @@ def _create_main_schema(self): pa.field('ra', pa.float64()), pa.field('dec', pa.float64()), pa.field('observedTrailedSourceMag', pa.float64()), - # pa.field('ra_rate', pa.float64()), - # pa.field('dec_rate', pa.float64()), - pa.field('filter', pa.string())] + pa.field('ra_rate', pa.float64()), + pa.field('dec_rate', pa.float64())] + # pa.field('filter', pa.string())] return pa.schema(fields) def _create_flux_schema(self): @@ -111,17 +113,48 @@ def _create_flux_schema(self): pa.field('lsst_flux_y', pa.float32(), True)] return pa.schema(fields) - def _write_rg(self, writer, min_mjd, max_mjd, db_files, arrow_schema): + # def _write_time_rg(self, writer, min_mjd, max_mjd, db_files, arrow_schema): + # df_list = [] + # for f in db_files: + # conn = sqlite3.connect(f) + # df_list.append(pd.read_sql_query(self._df_query, + # conn, + # params=(min_mjd, max_mjd))) + # df = pd.concat(df_list) + # df_sorted = df.sort_values('mjd') + # tbl = pa.Table.from_pandas(df_sorted, schema=arrow_schema) + # writer.write_table(tbl) + + def _get_hps(self, filepath): + + with sqlite3.connect(filepath) as conn: + df = pd.read_sql_query('select distinct healpix from pp_results', + conn) + return set(df['healpix']) + + def _write_hp(self, hp, hps_by_file, arrow_schema): df_list = [] - for f in db_files: - conn = sqlite3.connect(f) - df_list.append(pd.read_sql_query(self._df_query, - conn, - params=(min_mjd, max_mjd))) + for f in hps_by_file: + if hp in hps_by_file[f]: + conn = sqlite3.connect(f) + one_df = pd.read_sql_query(self._dfhp_query, conn, + params=(hp,)) + df_list.append(one_df) + if df_list == []: + return + writer = pq.ParquetWriter(os.path.join(self._output_dir, + f'sso_{hp}.parquet'), + arrow_schema) + df = pd.concat(df_list) df_sorted = df.sort_values('mjd') + + # Should be prepared to write multiple row groups here + # depending on # of rows in the table. + # For now only a single row group will be necessary tbl = pa.Table.from_pandas(df_sorted, schema=arrow_schema) writer.write_table(tbl) + def create_sso_catalog(self): """ @@ -130,36 +163,18 @@ def create_sso_catalog(self): # Find all the db files from Sorcha. They should all be in a single # directory with no other files in that directory files = os.listdir(self._sso_truth) + arrow_schema = self._create_main_schema() db_files = [os.path.join(self._sso_truth, f) for f in files if f.endswith('.db')] - mins = [] - maxes = [] - counts = [] - for f in db_files: - with sqlite3.connect(f) as conn: - res = conn.execute(self._mjd_q) - r = res.fetchone() - mins.append(float(r[0])) - maxes.append(float(r[1])) - counts.append(int(r[2])) - - mjd_list = _partition_mjd(mins, maxes, counts, self._row_group_size) - arrow_schema = self._create_main_schema() - - # ## Depending on length of mjd_list, may need more than one - # ## output file in which case will need to add an outer loop - # ## over output file. Decide on max row groups per file. - mjd_min = min(mins) - mjd_max = max(maxes) - out_name = f'sso_{int(mjd_min)}_{int(np.ceil(mjd_max))}.parquet' - writer = pq.ParquetWriter(os.path.join(self._output_dir, out_name), - arrow_schema) - for i in range(len(mjd_list) - 1): - self._write_rg(writer, mjd_list[i], mjd_list[i+1], db_files, - arrow_schema) - - # close parquet file - writer.close() + all_hps = set() + hps_by_file = dict() + for f in db_files: + hps_by_file[f] = self._get_hps(f) + all_hps = sorted(all_hps.union(hps_by_file[f])) + + for h in all_hps: + self._write_hp(h, hps_by_file, arrow_schema) + # In sso description in config come up with suitable re for filenames def _create_sso_flux_file(self, info, arrow_schema): From 33f017b299091456324e7355f275b91846002649 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Wed, 7 Feb 2024 16:37:53 -0800 Subject: [PATCH 12/31] able to make flux files partitioned by hp --- skycatalogs/objects/sso_object.py | 6 ++--- skycatalogs/sso_catalog_creator.py | 43 ++---------------------------- 2 files changed, 4 insertions(+), 45 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 171e0e19..5b6ca7fc 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -32,16 +32,14 @@ def find_sso_files(sso_path, sso_config): for f in files: match = re.fullmatch(sso_config['file_template'], f) if match: - new_entry = {'mjd_min': int(match['mjd_min']), - 'mjd_max': int(match['mjd_max']), + new_entry = {'hp': int(match['healpix']), 'scope': 'main', 'path' : os.path.join(sso_path, f)} files_dict[f] = new_entry continue match = re.fullmatch(sso_config['flux_file_template'], f) if match: - new_entry = {'mjd_min': int(match['mjd_min']), - 'mjd_max': int(match['mjd_max']), + new_entry = {'hp': int(match['healpix']), 'scope': 'flux', 'path' : os.path.join(sso_path, f)} files_dict[f] = new_entry diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 2a1c8663..7b74e95a 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -19,29 +19,6 @@ _DEFAULT_ROW_GROUP_SIZE = 100000 # Maybe could be larger -# def _partition_mjd(mins, maxes, counts, max_rowgroup_size): -# ''' Determine mjd intervals, each of which will be processed -# in one go''' -# total_rows = np.sum(np.array(counts)) -# min_min = int(np.floor(np.min(np.array(mins)))) -# max_max = int(np.ceil(np.max(np.array(maxes)))) -# # n_rowgroup = (max_max + (max_rowgroup_size - 1) - min_min)/max_rowgroup_size -# n_rowgroup = int(np.ceil(total_rows/max_rowgroup_size)) -# # for debugging -# if n_rowgroup < 2: -# n_rowgroup = 2 -# mjd_interval = int(np.ceil((max_max - min_min)/n_rowgroup)) -# mjds = [min_min] -# last = min_min -# for i in range(n_rowgroup): -# last += mjd_interval -# last = min(last, max_max) -# mjds.append(last) -# if last == max_max: -# break - -# return mjds - class SsoCatalogCreator: _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/6feb2024' @@ -97,7 +74,6 @@ def _create_main_schema(self): pa.field('observedTrailedSourceMag', pa.float64()), pa.field('ra_rate', pa.float64()), pa.field('dec_rate', pa.float64())] - # pa.field('filter', pa.string())] return pa.schema(fields) def _create_flux_schema(self): @@ -113,18 +89,6 @@ def _create_flux_schema(self): pa.field('lsst_flux_y', pa.float32(), True)] return pa.schema(fields) - # def _write_time_rg(self, writer, min_mjd, max_mjd, db_files, arrow_schema): - # df_list = [] - # for f in db_files: - # conn = sqlite3.connect(f) - # df_list.append(pd.read_sql_query(self._df_query, - # conn, - # params=(min_mjd, max_mjd))) - # df = pd.concat(df_list) - # df_sorted = df.sort_values('mjd') - # tbl = pa.Table.from_pandas(df_sorted, schema=arrow_schema) - # writer.write_table(tbl) - def _get_hps(self, filepath): with sqlite3.connect(filepath) as conn: @@ -175,8 +139,6 @@ def create_sso_catalog(self): for h in all_hps: self._write_hp(h, hps_by_file, arrow_schema) - # In sso description in config come up with suitable re for filenames - def _create_sso_flux_file(self, info, arrow_schema): ''' Parameters @@ -188,7 +150,7 @@ def _create_sso_flux_file(self, info, arrow_schema): mjd=None, filepath=info['path']) colls = object_list.get_collections() - outname = f"sso_flux_{info['mjd_min']}_{info['mjd_max']}.parquet" + outname = f"sso_flux_{info['hp']}.parquet" writer = pq.ParquetWriter(os.path.join(self._output_dir, outname), arrow_schema) @@ -210,8 +172,7 @@ def _create_sso_flux_file(self, info, arrow_schema): def create_sso_flux_catalog(self): """ Create sso flux catalog. Includes id, mjd (since given sso normally - has several observations) and fluxes. (Or just one flux and - associated band?) + has several observations) and fluxes. """ from .skyCatalogs import open_catalog From 52e1c653c9073fb23e706c95838bbb1bd1b83471 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Thu, 8 Feb 2024 14:26:46 -0800 Subject: [PATCH 13/31] partition by hp; eliminate unneeded sso-specific code --- skycatalogs/objects/sso_object.py | 172 ++--------------------------- skycatalogs/skyCatalogs.py | 131 ++++++++++++++++------ skycatalogs/sso_catalog_creator.py | 58 +++++----- 3 files changed, 129 insertions(+), 232 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 5b6ca7fc..67867df7 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -1,51 +1,11 @@ -import os -import re +from collections.abc import Iterable +import itertools import numpy as np -import numpy.ma as ma -# import pyarrow.parquet as pq import galsim -from .base_object import BaseObject, ObjectCollection, ObjectList -from ..readers import ParquetReader +from .base_object import BaseObject, ObjectCollection -__all__ = ['SsoObject', 'find_sso_files'] -_MJD_EPS = 0.00002 # about 1.7 seconds - - -def find_sso_files(sso_path, sso_config): - ''' - Parameters - ---------- - sso_path string where sso catalog files may be found - sso_config dict configuration information, including filename - templates - - Returns - ------- - dict. Keys are files (fullpath?) Each value is itself a dict including - keys for at least mjd_min, mjd_max, path and reader (of - type ParquetReader, defined in parquet_reader.py). But - reader key is only added when needed, not in this routine. - ''' - files_dict = dict() - files = os.listdir(sso_path) - for f in files: - match = re.fullmatch(sso_config['file_template'], f) - if match: - new_entry = {'hp': int(match['healpix']), - 'scope': 'main', - 'path' : os.path.join(sso_path, f)} - files_dict[f] = new_entry - continue - match = re.fullmatch(sso_config['flux_file_template'], f) - if match: - new_entry = {'hp': int(match['healpix']), - 'scope': 'flux', - 'path' : os.path.join(sso_path, f)} - files_dict[f] = new_entry - continue - return files_dict - +__all__ = ['SsoObject', 'SsoCollection'] class SsoObject(BaseObject): _type_name = 'sso' @@ -110,124 +70,9 @@ def get_flux(self, bandpass, sed=None, mjd=None): class SsoCollection(ObjectCollection): - @staticmethod - def find_row_groups(reader, mjd): - num_row_groups = reader._meta.num_row_groups - if mjd is None: - return [i for i in range(num_row_groups)] - rg_list = [] - for i in range(num_row_groups): - # mjds = pq_file.read_row_group(i, columns=['mjd'])['mjd'] - mjds = reader.read_columns(['mjd'], None, i)['mjd'] - if mjd < mjds[0] - _MJD_EPS: - continue - if mjd >= mjds[-1] + _MJD_EPS: - break - rg_list.append(i) - return rg_list - - @staticmethod - def form_row_group_collection(skycatalog, reader, row_group, - mjd=None, region=None): - ''' - Parameters - ---------- - skycatalog SkyCatalog object - reader skycatalogs.readers.parquet_readers.ParquetReader - Read only from this file - row_group row group to use as input - mjd if not None, exclude rows with mjd not within _MJD_EPS - of this value - region if not None, exclude rows with (ra, dec) not in the region - - Returns - ------- - An SsoCollection - ''' - if region is not None: - raise ValueError('SsoCollection.form_row_group_collection: region parameter not yet implemented') - # reader = ParquetReader(filepath) - values = reader.read_columns(['id', 'mjd', 'ra', 'dec'], None, - row_group=row_group) - if mjd: # make mask - mask = np.logical_or((values['mjd'] >= mjd + _MJD_EPS), - (values['mjd'] < mjd - _MJD_EPS)) - id_compress = ma.array(values['id'], mask=mask).compressed() - mjd_compress = ma.array(values['mjd'], mask=mask).compressed() - ra_compress = ma.array(values['ra'], mask=mask).compressed() - dec_compress = ma.array(values['dec'], mask=mask).compressed() - - if all(mask): - return None - new_collection = SsoCollection(ra_compress, dec_compress, - id_compress, skycatalog, - mjd_individual=mjd_compress, - region=None, mjd_global=mjd, - mask=mask, - readers=[reader], - row_group=row_group) - else: - new_collection = SsoCollection(values['ra'], values['dec'], - values['id'], skycatalog, - mjd_individual=values['mjd'], - region=None, mask=None, - readers=[reader], - row_group=row_group) - return new_collection - - @staticmethod - def load_collection(region, skycatalog, mjd=None, filepath=None): - ''' - region A standard region (Disk, PolygonalRegion, Box) - or None - skycatalog An instance of the SkyCatalog class - mjd mjd (single float) of objects to be loaded. May be - None if filepath is used, in which case all objects from the - file will be loaded - filepath Restrict search to a particular file. - - Returns: An ObjectList - ''' - object_list = ObjectList() - files_dict = skycatalog._sso_files - f_entry = None - if filepath: - for f in files_dict: - if filepath == files_dict[f]['path']: - f_entry = files_dict[f] - break - if f_entry is None: - return [] - if mjd is not None: # parse file name to see if mjd is included. - if mjd < f_entry['mjd_min'] - _MJD_EPS or mjd >= f_entry['mjd_max'] + _MJD_EPS: - return [] - if 'reader' not in f_entry: - f_entry['reader'] = ParquetReader(filepath) - row_groups = SsoCollection.find_row_groups(f_entry['reader'], mjd) - for r in row_groups: - coll = SsoCollection.form_row_group_collection(skycatalog, - f_entry['reader'], - r, mjd=mjd, - region=region) - if coll: - object_list.append_collection(coll) - return object_list - else: - # There must be an mjd. Look up all our files (or maybe this - # is done in advance) to determine which one or two might have - # objects with this mjd. - # Find the one or two row groups that are relevant. For each - # * fetch id, mjd, ra, dec - # * form mask to exclude based on - # mjd (must be within epsilon of specified) - # and on (ra, dec) (must be within region) - # * compress out unwanted objects - # * return one or two collections - pass - # return object_list - def __init__(self, ra, dec, id, sky_catalog, mjd_individual=None, - region=None, mjd_global=None, + def __init__(self, ra, dec, id, hp, sky_catalog, mjd_individual=None, + region=None, mjd=None, mask=None, readers=None, row_group=0): ''' Parameters @@ -245,8 +90,8 @@ def __init__(self, ra, dec, id, sky_catalog, mjd_individual=None, One of mjd_global, mjd_individual must not be None ''' - super().__init__(ra, dec, id, 'sso', None, sky_catalog, - region=region, mjd=mjd_global, mask=mask, + super().__init__(ra, dec, id, 'sso', hp, sky_catalog, + region=region, mjd=mjd, mask=mask, readers=readers, row_group=row_group) self._mjds = np.array(mjd_individual) @@ -287,4 +132,3 @@ def __getitem__(self, key): return [self._object_class(self._ra[i], self._dec[i], self._id[i], object_type, self, i, self._mjds[i]) for i in key[0]] - diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 80418e98..400aa8ee 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -13,7 +13,7 @@ from skycatalogs.objects.base_object import ObjectList, ObjectCollection from skycatalogs.objects.gaia_object import GaiaObject, GaiaCollection from skycatalogs.objects.sso_object import SsoObject, SsoCollection -from skycatalogs.objects.sso_object import find_sso_files +# from skycatalogs.objects.sso_object import find_sso_files from skycatalogs.readers import ParquetReader from skycatalogs.utils.sed_tools import TophatSedFactory, DiffskySedFactory from skycatalogs.utils.sed_tools import SsoSedFactory @@ -70,25 +70,27 @@ def _get_intersecting_hps(hp_ordering, nside, region): return pixels -def _compress_via_mask(tbl, id_column, region, source_type={'galaxy'}, +def _compress_via_mask(tbl, id_column, region, source_type='galaxy', mjd=None): ''' Parameters ---------- tbl data table including columns named "ra", "dec", and id_column - (and also "object_type" colum if galaxy is False, possibly + (and also "object_type" colum if source_type is "star" + or "Gaia_star", possibly start_mjd and end_mjd if galaxy is False and mjd is not None) id_column string region mask should restrict to this region (or not at all if None) - source_type string or set (or other container) of expected object type(s). - For now, must be a singleton - mjd if not none, may be used to filter transient objects + source_type string of expected object type + mjd if not none, may be used to filter transient or variable + objects Returns ------- 4 values for galaxies and snana: ra, dec, id, mask 5 values for pointsources: ra, dec, id, object_type, mask + 5 values for SSO: ra, dec, id, mjd, mask If objects are in the region, ra, dec, id correspond to those objects. mask will mask off unused objects If there are no objects in the region, all return values are None @@ -97,13 +99,11 @@ def _compress_via_mask(tbl, id_column, region, source_type={'galaxy'}, if isinstance(tbl[id_column][0], (int, np.int64)): tbl[id_column] = [str(an_id) for an_id in tbl[id_column]] - if not isinstance(source_type, str): - source_type = set(source_type) - if len(source_type) != 1: - raise NotImplementedError('_compress_via_mask only accepts singleton object_type') - source_type = source_type.pop() - no_obj_type_return = (source_type in {'galaxy', 'diffsky_galaxy', 'snana'}) - time_filter = ('start_mjd' in tbl) and ('end_mjd' in tbl) and mjd is not None + no_obj_type_return = (source_type in {'galaxy', 'diffsky_galaxy', + 'snana', 'sso'}) + no_mjd_return = (source_type != 'sso') # for now + transient_filter = ('start_mjd' in tbl) and ('end_mjd' in tbl) and mjd is not None + variable_filter = ('mjd' in tbl) if region is not None: if isinstance(region, PolygonalRegion): # special case @@ -117,9 +117,9 @@ def _compress_via_mask(tbl, id_column, region, source_type={'galaxy'}, # Compute mask for that box mask = _compute_region_mask(bnd_box, tbl['ra'], tbl['dec']) if all(mask): # even bounding box doesn't intersect table rows - if no_obj_type_return: + if no_obj_type_return and no_mjd_return: return None, None, None, None - else: + else: # currently if object type is returned, mjd is not return None, None, None, None, None # Get compressed ra, dec @@ -135,13 +135,16 @@ def _compress_via_mask(tbl, id_column, region, source_type={'galaxy'}, else: mask = _compute_region_mask(region, tbl['ra'], tbl['dec']) - if time_filter: - time_mask = _compute_time_mask(mjd, tbl['start_mjd'], - tbl['end_mjd']) + if transient_filter: + time_mask = _compute_transient_mask(mjd, tbl['start_mjd'], + tbl['end_mjd']) + mask = np.logical_or(mask, time_mask) + elif variable_filter: + time_mask = _compute_variable_mask(mjd, tbl['mjd']) mask = np.logical_or(mask, time_mask) if all(mask): - if no_obj_type_return: + if no_obj_type_return and no_mjd_return: return None, None, None, None else: return None, None, None, None, None @@ -150,7 +153,11 @@ def _compress_via_mask(tbl, id_column, region, source_type={'galaxy'}, dec_compress = ma.array(tbl['dec'], mask=mask).compressed() id_compress = ma.array(tbl[id_column], mask=mask).compressed() if no_obj_type_return: - return ra_compress, dec_compress, id_compress, mask + if no_mjd_return: + return ra_compress, dec_compress, id_compress, mask + else: + mjd_compress = ma.array(tbl['mjd'], mask=mask).compressed() + return ra_compress, dec_compress, id_compress, mjd_compress, mask else: object_type_compress = ma.array(tbl['object_type'], @@ -158,15 +165,27 @@ def _compress_via_mask(tbl, id_column, region, source_type={'galaxy'}, return ra_compress, dec_compress, id_compress, object_type_compress, mask else: if no_obj_type_return: - if time_filter: - time_mask = _compute_time_mask(mjd, tbl['start_mjd'], - tbl['end_mjd']) + if transient_filter: + time_mask = _compute_transient_mask(mjd, tbl['start_mjd'], + tbl['end_mjd']) ra_compress = ma.array(tbl['ra'], mask=time_mask).compressed() dec_compress = ma.array(tbl['dec'], mask=time_mask).compressed() id_compress = ma.array(tbl[id_column], mask=time_mask).compressed() return ra_compress, dec_compress, id_compress, time_mask + elif variable_filter: + time_mask = _compute_variable_mask(mjd, tbl['mjd']) + if time_mask: + ra_compress = ma.array(tbl['ra'], mask=time_mask).compressed() + dec_compress = ma.array(tbl['dec'], + mask=time_mask).compressed() + id_compress = ma.array(tbl[id_column], + mask=time_mask).compressed() + mjd_compress = ma.array(tbl['mjd'], mask=time_mask).compressed() + return ra_compress, dec_compress, id_compress, mjd_compress, time_mask + else: + return tbl['ra'], tbl['dec'], tbl[id_column], tbl['mjd'], None else: return tbl['ra'], tbl['dec'], tbl[id_column], None else: @@ -213,7 +232,7 @@ def _compute_region_mask(region, ra, dec): return mask -def _compute_time_mask(current_mjd, start_mjd, end_mjd): +def _compute_transient_mask(current_mjd, start_mjd, end_mjd): ''' Starting with an existing mask of excluded objects, exclude additional objects not visible at time current_mjd @@ -233,6 +252,32 @@ def _compute_time_mask(current_mjd, start_mjd, end_mjd): return mask +MJD_EPS = 0.00002 # about 1.7 seconds + + +def _compute_variable_mask(current_mjd, mjd_column, epsilon=MJD_EPS): + ''' + Compute mask to exclude all entries with + abs(mjd - current_mjd) > epsilon + + Parameters + ---------- + current_mjd float mjd of interest + mjd_column array of float mjd for each entry + epsilon float tolerance for matching mjd entry + + Returns + ------- + mask + ''' + if not current_mjd: + return None + + diff = current_mjd - mjd_column + mask = np.logical_or((diff > epsilon), (diff < -epsilon)) + return mask + + class SkyCatalog(object): ''' A base class with derived classes for galaxies, static (w.r.t. coordinates) @@ -291,9 +336,9 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, # for 'object_types', map object type to filepath self._hp_info = dict() _ = self._find_all_hps() - if 'sso' in self.raw_config['object_types']: - self._sso_files = find_sso_files(self._cat_dir, - self.raw_config['object_types']['sso']) +# if 'sso' in self.raw_config['object_types']: +# self._sso_files = find_sso_files(self._cat_dir, +# self.raw_config['object_types']['sso']) # NOTE: the use of TophatSedFactory is appropriate *only* for an # input galaxy catalog with format like cosmoDC2, which includes @@ -346,8 +391,7 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, if 'sso' in config['object_types']: self.cat_cxt.register_source_type('sso', object_class=SsoObject, - collection_class=SsoCollection, - custom_load=True) + collection_class=SsoCollection) @property def observed_sed_factory(self): @@ -631,14 +675,17 @@ def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): return object_list if object_type in ['galaxy', 'diffsky_galaxy']: - COLUMNS = ['galaxy_id', 'ra', 'dec'] + columns = ['galaxy_id', 'ra', 'dec'] id_name = 'galaxy_id' elif object_type in ['snana']: - COLUMNS = ['id', 'ra', 'dec', 'start_mjd', 'end_mjd'] + columns = ['id', 'ra', 'dec', 'start_mjd', 'end_mjd'] id_name = 'id' elif object_type in ['star', 'sncosmo']: - COLUMNS = ['object_type', 'id', 'ra', 'dec'] + columns = ['object_type', 'id', 'ra', 'dec'] id_name = 'id' + elif object_type in ['sso']: + id_name = 'id' + columns = ['id', 'ra', 'dec', 'mjd'] else: raise NotImplementedError(f'Unsupported object type {object_type}') @@ -652,7 +699,6 @@ def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): f_list = self._hp_info[hp]['object_types'][object_type] \ if object_type in self._hp_info[hp]['object_types'] else [] elif 'parent' in self._config['object_types'][object_type]: - # ##f_list = self._hp_info[hp]['object_types'][self._config['object_types'][ot]['parent']] f_list = self._hp_info[hp]['object_types'][self._config['object_types'][object_type]['parent']] for f in f_list: @@ -683,13 +729,13 @@ def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): # Make a collection for each row group for rg in range(rdr.n_row_groups): - arrow_t = rdr.read_columns(COLUMNS, None, rg) + arrow_t = rdr.read_columns(columns, None, rg) if object_type in {'galaxy', 'diffsky_galaxy', 'snana'}: ra_c, dec_c, id_c, mask =\ _compress_via_mask(arrow_t, id_name, region, - source_type={object_type}, + source_type=object_type, mjd=mjd) if ra_c is not None: new_collection = coll_class(ra_c, dec_c, id_c, @@ -705,7 +751,20 @@ def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): SED_file = os.path.join(self._cat_dir, base) new_collection.set_SED_file(SED_file) object_list.append_collection(new_collection) - + elif object_type in {'sso'}: + ra_c, dec_c, id_c, mjd_c, mask =\ + _compress_via_mask(arrow_t, + id_name, + region, + source_type=object_type, + mjd=mjd) + new_collection = SsoCollection(ra_c, dec_c, id_c, hp, self, + mjd_individual=mjd_c, + region=region, + mjd=mjd, mask=mask, + readers=the_readers, + row_group=rg) + object_list.append_collection(new_collection) else: ra_c, dec_c, id_c, object_type_c, mask =\ _compress_via_mask(arrow_t, id_name, region, diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 7b74e95a..a83c0b47 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -1,9 +1,5 @@ import os -# import sys -# import re -# import logging import numpy as np -# import numpy.ma as ma import sqlite3 import pandas as pd import pyarrow as pa @@ -19,7 +15,6 @@ _DEFAULT_ROW_GROUP_SIZE = 100000 # Maybe could be larger - class SsoCatalogCreator: _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/6feb2024' _sso_sed = '/sdf/home/j/jrb/rubin-user/sso/sed/solar_sed.db' @@ -50,8 +45,8 @@ def __init__(self, catalog_creator, output_dir, mjd_c = 'FieldMJD_TAI' self._mjd_q = f'select min({mjd_c}), max({mjd_c}), count({mjd_c}) from {tbl}' self._dfhp_query = f'''select ObjID as id, {mjd_c} as mjd, - "AstRA(deg)" as ra, "AstDec(deg)" as dec, - "AstRARate(deg/day)" as ra_rate, + "AstRA(deg)" as ra, "AstDec(deg)" as dec, + "AstRARate(deg/day)" as ra_rate, "AstDecRate(deg/day)" as dec_rate, observedTrailedSourceMag from {tbl} where healpix = (?) order by mjd, ObjID''' @@ -63,7 +58,7 @@ def sso_truth(self): @property def sso_sed(self): return self._sso_sed - + def _create_main_schema(self): fields = [ @@ -90,7 +85,7 @@ def _create_flux_schema(self): return pa.schema(fields) def _get_hps(self, filepath): - + with sqlite3.connect(filepath) as conn: df = pd.read_sql_query('select distinct healpix from pp_results', conn) @@ -118,7 +113,7 @@ def _write_hp(self, hp, hps_by_file, arrow_schema): # For now only a single row group will be necessary tbl = pa.Table.from_pandas(df_sorted, schema=arrow_schema) writer.write_table(tbl) - + def create_sso_catalog(self): """ @@ -127,7 +122,7 @@ def create_sso_catalog(self): # Find all the db files from Sorcha. They should all be in a single # directory with no other files in that directory files = os.listdir(self._sso_truth) - arrow_schema = self._create_main_schema() + arrow_schema = self._create_main_schema() db_files = [os.path.join(self._sso_truth, f) for f in files if f.endswith('.db')] all_hps = set() @@ -135,25 +130,21 @@ def create_sso_catalog(self): for f in db_files: hps_by_file[f] = self._get_hps(f) all_hps = sorted(all_hps.union(hps_by_file[f])) - - for h in all_hps: + + todo = self._catalog_creator._parts + if len(todo) == 0: + todo = all_hps + for h in todo: self._write_hp(h, hps_by_file, arrow_schema) - - def _create_sso_flux_file(self, info, arrow_schema): - ''' - Parameters - ---------- - info dict information pertaining to an existing sso main file - arrow_schema to be used in creating the new flux file - ''' - object_list = self._cat.get_object_type_by_region(None, 'sso', - mjd=None, - filepath=info['path']) - colls = object_list.get_collections() - outname = f"sso_flux_{info['hp']}.parquet" - writer = pq.ParquetWriter(os.path.join(self._output_dir, outname), - arrow_schema) + def _create_sso_flux_pixel(self, pixel, arrow_schema): + output_filename = f'sso_flux_{pixel}.parquet' + output_path = os.path.join(self._catalog_creator._output_dir, + output_filename) + + object_list = self._cat.get_object_type_by_hp(pixel, 'sso') + colls = object_list.get_collections() + writer = pq.ParquetWriter(output_path, arrow_schema) outs = dict() for c in colls: outs['id'] = c._id @@ -172,7 +163,7 @@ def _create_sso_flux_file(self, info, arrow_schema): def create_sso_flux_catalog(self): """ Create sso flux catalog. Includes id, mjd (since given sso normally - has several observations) and fluxes. + has several observations) and fluxes. """ from .skyCatalogs import open_catalog @@ -187,6 +178,9 @@ def create_sso_flux_catalog(self): self._logger.info('Creating sso flux files') # For each main file make a corresponding flux file - for f, info in self._cat._sso_files.items(): - if info['scope'] == 'main': - self._create_sso_flux_file(info, arrow_schema) + # for f, info in self._cat._sso_files.items(): + # if info['scope'] == 'main': + # self._create_sso_flux_file(info, arrow_schema) + + for p in self._catalog_creator._parts: + self._create_sso_flux_pixel(p, arrow_schema) From 53dd077ec2449cc8a37b4caecafc6c5c5825965d Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Fri, 9 Feb 2024 13:33:06 -0800 Subject: [PATCH 14/31] Bug fix for sso in get_object_type_by_hp --- skycatalogs/skyCatalogs.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 400aa8ee..6b9f7496 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -176,7 +176,7 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', return ra_compress, dec_compress, id_compress, time_mask elif variable_filter: time_mask = _compute_variable_mask(mjd, tbl['mjd']) - if time_mask: + if time_mask is not None: ra_compress = ma.array(tbl['ra'], mask=time_mask).compressed() dec_compress = ma.array(tbl['dec'], mask=time_mask).compressed() @@ -758,13 +758,15 @@ def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): region, source_type=object_type, mjd=mjd) - new_collection = SsoCollection(ra_c, dec_c, id_c, hp, self, - mjd_individual=mjd_c, - region=region, - mjd=mjd, mask=mask, - readers=the_readers, - row_group=rg) - object_list.append_collection(new_collection) + if ra_c is not None: + new_collection = SsoCollection(ra_c, dec_c, id_c, hp, + self, + mjd_individual=mjd_c, + region=region, + mjd=mjd, mask=mask, + readers=the_readers, + row_group=rg) + object_list.append_collection(new_collection) else: ra_c, dec_c, id_c, object_type_c, mask =\ _compress_via_mask(arrow_t, id_name, region, From f886e94fb175a92c3c44fc7021ef10e7c3cdeaff Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Fri, 9 Feb 2024 13:34:00 -0800 Subject: [PATCH 15/31] extra check for empty ObjectCollection --- skycatalogs/objects/base_object.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/skycatalogs/objects/base_object.py b/skycatalogs/objects/base_object.py index 2377ad32..1ff4b07c 100644 --- a/skycatalogs/objects/base_object.py +++ b/skycatalogs/objects/base_object.py @@ -570,7 +570,10 @@ def __contains__(self, obj): return id in self._id def __len__(self): - return len(self._id) + if self._id is not None: + return len(self._id) + else: + return 0 # def __iter__(self): Standard impl based on __getitem__ should be ok # def __reversed__(self): Default implementation ok From e3b5aaa35c66b9a4b2755df735e91fc31c771c53 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 13 Feb 2024 23:46:32 -0800 Subject: [PATCH 16/31] option to simulate sso streaks --- skycatalogs/objects/sso_object.py | 55 +++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 67867df7..6ccf42aa 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -3,10 +3,13 @@ import numpy as np import galsim from .base_object import BaseObject, ObjectCollection +from lsst.sphgeom import Angle, NormalizedAngle, UnitVector3d +# import healpy __all__ = ['SsoObject', 'SsoCollection'] + class SsoObject(BaseObject): _type_name = 'sso' _solar_sed = None @@ -17,6 +20,10 @@ def __init__(self, ra, dec, id, object_type, belongs_to, belongs_index, belongs_index) self._mjd = mjd + @property + def mjd(self): + return self._mjd + def _get_sed(self, mjd=None): ''' returns a SED and magnorm @@ -31,15 +38,49 @@ def _get_sed(self, mjd=None): # Do we look for specific entry or do we allow interpolation? return SsoObject._solar_sed, self.get_native_attribute('observedTrailedSourceMag') - def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0): + def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0, + streak=False): + skinny = 1.e-6 # ratio of width to height in our box + if gsparams is not None: gsparams = galsim.GSParams(**gsparams) - # For a streak we use galsim.?? (galsim.Box?) - # To get the dimensions of the box, use ra rate, dec rate and - # exposure length. The first two will be in the sky catalogs - # parquet file; the last will be passed in. - # For now start with the simpler thing: just a point source. - return {'this_object': galsim.DeltaFunction(gsparams=gsparams)} + + if streak: + # For a streak make galsim.Box with length the angular distance + # in arcseconds from initial ra, dec to final + # Make the width very, very small + # Then rotate appropriately + ra = self.ra + dec = self.dec + # Convert from degrees/day to arcsec/sec + ra_rate = self.get_native_attribute('ra_rate')/24.0 + dec_rate = self.get_native_attribute('dec_rate')/24.0 + ra_final = ra + ra_rate * exposure + dec_final = dec + dec_rate * exposure + + init_v = UnitVector3d(NormalizedAngle.fromDegrees(ra), + Angle.fromDegrees(dec)) + final_v = UnitVector3d(NormalizedAngle.fromDegrees(ra_final), + Angle.fromDegrees(dec_final)) + chord = (final_v - init_v).getNorm() + length = Angle(np.arccos(1.0 - (chord*chord/2.0))).asDegrees() * 3600 + + gobj = galsim.Box(length, skinny*length, gsparams=gsparams) + # now rotate to direction of (ra_rate, dec_rate) + try: + angle_rad = galsim.Angle(np.arctan(dec_rate/(ra_rate * np.cos(dec))), galsim.radians) + + gobj = gobj.rotate(angle_rad) + except ZeroDivisionError: + gobj = gobj.rotate(galsim.Angle(90, galsim.degrees)) + return {'this_object': gobj} + else: + # To get the dimensions of the box, use ra rate, dec rate and + # exposure length. The first two will be in the sky catalogs + # parquet file; the last will be passed in. + # For now start with the simpler thing: just a point source. + + return {'this_object': galsim.DeltaFunction(gsparams=gsparams)} def get_observer_sed_component(self, component, mjd=None): if mjd is None: From 878d214b4dcf5914fa2b81021d78adf2c30f7be4 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Wed, 14 Feb 2024 14:31:50 -0800 Subject: [PATCH 17/31] better way to find arc length of chord --- skycatalogs/objects/sso_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 6ccf42aa..37ffa986 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -63,7 +63,7 @@ def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0, final_v = UnitVector3d(NormalizedAngle.fromDegrees(ra_final), Angle.fromDegrees(dec_final)) chord = (final_v - init_v).getNorm() - length = Angle(np.arccos(1.0 - (chord*chord/2.0))).asDegrees() * 3600 + length = Angle(2 * np.arcsin(chord/2.0)).asDegrees() * 3600 gobj = galsim.Box(length, skinny*length, gsparams=gsparams) # now rotate to direction of (ra_rate, dec_rate) From b84068e76c3368f6e396bf195fa7292ac89b77f0 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Thu, 15 Feb 2024 10:20:50 -0800 Subject: [PATCH 18/31] use latest Sorcha output for default sso trruth --- skycatalogs/sso_catalog_creator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index a83c0b47..e8e3010c 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -16,7 +16,7 @@ class SsoCatalogCreator: - _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/6feb2024' + _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/8feb2024' _sso_sed = '/sdf/home/j/jrb/rubin-user/sso/sed/solar_sed.db' def __init__(self, catalog_creator, output_dir, From d28213758ef26ec881aa390d302a43c94d291b0e Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sat, 17 Feb 2024 18:41:44 -0800 Subject: [PATCH 19/31] Allow creation of SSO catalogs for specified healpixels as well as for all --- skycatalogs/scripts/create_sc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/skycatalogs/scripts/create_sc.py b/skycatalogs/scripts/create_sc.py index 9861e443..d53711f0 100644 --- a/skycatalogs/scripts/create_sc.py +++ b/skycatalogs/scripts/create_sc.py @@ -154,7 +154,11 @@ include_roman_flux=args.include_roman_flux, star_input_fmt=args.star_input_fmt, sso_truth=args.sso_truth, sso_sed=args.sso_sed) -logger.info(f'Starting with healpix pixel {parts[0]}') +if len(parts) > 0: + logger.info(f'Starting with healpix pixel {parts[0]}') +elif args.sso: + logger.info(f'Creating catalogs for all available healpixels') + if not args.no_galaxies: logger.info("Creating galaxy catalogs") creator.create('galaxy') @@ -166,6 +170,6 @@ if args.sso: logger.info("Creating SSO catalogs") creator.create('sso') - + logger.info('All done') print_date() From 37c4b71a88bbba28c27b2c89dba82cff62caf17a Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sat, 17 Feb 2024 18:43:23 -0800 Subject: [PATCH 20/31] adjustment for changed field name in Sorcha output --- skycatalogs/sso_catalog_creator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index e8e3010c..92b59d09 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -48,7 +48,8 @@ def __init__(self, catalog_creator, output_dir, "AstRA(deg)" as ra, "AstDec(deg)" as dec, "AstRARate(deg/day)" as ra_rate, "AstDecRate(deg/day)" as dec_rate, - observedTrailedSourceMag from {tbl} where healpix = (?) + TrailedSourceMag as trailed_source_mag from {tbl} + where healpix = (?) order by mjd, ObjID''' @property @@ -66,7 +67,7 @@ def _create_main_schema(self): pa.field('mjd', pa.float64()), pa.field('ra', pa.float64()), pa.field('dec', pa.float64()), - pa.field('observedTrailedSourceMag', pa.float64()), + pa.field('trailed_source_mag', pa.float64()), pa.field('ra_rate', pa.float64()), pa.field('dec_rate', pa.float64())] return pa.schema(fields) From 500e1a281a2b8b2860ebee0af5a673880a819b43 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sat, 17 Feb 2024 18:44:26 -0800 Subject: [PATCH 21/31] assume ra rate of change from Sorcha catalog has incorporate cos(dec) --- skycatalogs/objects/sso_object.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 37ffa986..446a7edf 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -36,7 +36,7 @@ def _get_sed(self, mjd=None): # directly or are there other effects to be applied? # Have to find it by looking for entry for this id, this mjd # Do we look for specific entry or do we allow interpolation? - return SsoObject._solar_sed, self.get_native_attribute('observedTrailedSourceMag') + return SsoObject._solar_sed, self.get_native_attribute('trailed_source_mag') def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0, streak=False): @@ -52,10 +52,15 @@ def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0, # Then rotate appropriately ra = self.ra dec = self.dec - # Convert from degrees/day to arcsec/sec + # Convert from (arc?)degrees/day to arcsec/sec ra_rate = self.get_native_attribute('ra_rate')/24.0 + # Take out factor of cos(dec). + ra_rate_raw = ra_rate/np.cos(dec) + # ra_rate = self.get_native_attribute('ra_rate')/24.0 dec_rate = self.get_native_attribute('dec_rate')/24.0 - ra_final = ra + ra_rate * exposure + # ra_final is approximate since really ra_rate is a function + # of dec, but average dec_rate is small so + ra_final = ra + ra_rate_raw * exposure dec_final = dec + dec_rate * exposure init_v = UnitVector3d(NormalizedAngle.fromDegrees(ra), @@ -68,7 +73,11 @@ def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0, gobj = galsim.Box(length, skinny*length, gsparams=gsparams) # now rotate to direction of (ra_rate, dec_rate) try: - angle_rad = galsim.Angle(np.arctan(dec_rate/(ra_rate * np.cos(dec))), galsim.radians) + # angle_rad = galsim.Angle(np.arctan(dec_rate/(ra_rate * np.cos(dec))), galsim.radians) + # NOTE: Probably cos(dec) has already been applied, in which + # case we want + angle_rad = galsim.Angle(np.arctan(dec_rate/ra_rate), + galsim.radians) gobj = gobj.rotate(angle_rad) except ZeroDivisionError: From 59bf447c9ae1ec490558377a778d6740443f2241 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 20 Feb 2024 18:00:33 -0800 Subject: [PATCH 22/31] parallelize flux computation --- skycatalogs/sso_catalog_creator.py | 134 +++++++++++++++++++++++++---- 1 file changed, 118 insertions(+), 16 deletions(-) diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 92b59d09..5cf8d240 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -1,4 +1,6 @@ import os +import sys +from multiprocessing import Process, Pipe import numpy as np import sqlite3 import pandas as pd @@ -15,6 +17,44 @@ _DEFAULT_ROW_GROUP_SIZE = 100000 # Maybe could be larger +def _do_sso_flux_chunk(send_conn, sso_collection, instrument_needed, + l_bnd, u_bnd): + ''' + end_conn output connection + star_collection information from main file + instrument_needed List of which calculations should be done + l_bnd, u_bnd demarcates slice to process + + returns + dict with keys id, lsst_flux_u, ... lsst_flux_y + ''' + out_dict = {} + + o_list = sso_collection[l_bnd: u_bnd] + # out_dict['id'] = [o.get_native_attribute('id') for o in o_list] + # out_dict['mjd'] = [o.get_native_attribute('mjd') for o in o_list] + out_dict['id'] = list(sso_collection._id[l_bnd: u_bnd]) + out_dict['mjd'] = list(sso_collection._mjds[l_bnd: u_bnd]) + if 'lsst' in instrument_needed: + all_fluxes = [o.get_LSST_fluxes(as_dict=False, mjd=o._mjd) for o in o_list] + all_fluxes_transpose = zip(*all_fluxes) + for i, band in enumerate(LSST_BANDS): + v = all_fluxes_transpose.__next__() + out_dict[f'lsst_flux_{band}'] = v + + # if 'roman' in instrument_needed: + # all_fluxes = [o.get_roman_fluxes(as_dict=False) for o in o_list] + # all_fluxes_transpose = zip(*all_fluxes) + # for i, band in enumerate(ROMAN_BANDS): + # v = all_fluxes_transpose.__next__() + # out_dict[f'roman_flux_{band}'] = v + + if send_conn: + send_conn.send(out_dict) + else: + return out_dict + + class SsoCatalogCreator: _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/8feb2024' _sso_sed = '/sdf/home/j/jrb/rubin-user/sso/sed/solar_sed.db' @@ -115,7 +155,6 @@ def _write_hp(self, hp, hps_by_file, arrow_schema): tbl = pa.Table.from_pandas(df_sorted, schema=arrow_schema) writer.write_table(tbl) - def create_sso_catalog(self): """ Create the 'main' sso catalog, including everything except fluxes @@ -139,27 +178,95 @@ def create_sso_catalog(self): self._write_hp(h, hps_by_file, arrow_schema) def _create_sso_flux_pixel(self, pixel, arrow_schema): + ''' + Create parquet file for a single healpix pixel, containing + id, mjd and fluxes + Parameters + pixel int + arrow_schema schema for parquet file to be output + Return + ------ + None + ''' + + global _sso_collection output_filename = f'sso_flux_{pixel}.parquet' output_path = os.path.join(self._catalog_creator._output_dir, output_filename) object_list = self._cat.get_object_type_by_hp(pixel, 'sso') + n_parallel = self._catalog_creator._flux_parallel + colls = object_list.get_collections() writer = pq.ParquetWriter(output_path, arrow_schema) - outs = dict() + # outs = dict() + fields_needed = arrow_schema.names + instrument_needed = ['lsst'] + rg_written = 0 + writer = None + + # There is only one row group per main healpixel file currently + # so the loop could be dispensed with for c in colls: - outs['id'] = c._id - outs['mjd'] = c._mjds - all_fluxes = [o.get_LSST_fluxes(as_dict=False, mjd=o._mjd) for o in c] - all_fluxes_transpose = zip(*all_fluxes) - for i, band in enumerate(LSST_BANDS): - v = all_fluxes_transpose.__next__() - outs[f'lsst_flux_{band}'] = v - out_df = pd.DataFrame.from_dict(outs) - out_table = pa.Table.from_pandas(out_df, schema=arrow_schema) + _sso_collection = c + l_bnd = 0 + u_bnd = len(c) + if n_parallel == 1: + n_per = u_bnd - l_bnd + else: + n_per = int((u_bnd - l_bnd + n_parallel)/n_parallel) + + lb = l_bnd + u = min(l_bnd + n_per, u_bnd) + readers = [] + if n_parallel == 1: + out_dict = _do_sso_flux_chunk(None, c, instrument_needed, + l_bnd, u_bnd) + else: + out_dict = {} + for field in fields_needed: + out_dict[field] = [] + + tm = max(int((n_per*60)/500), 5) + self._logger.info(f'Using timeout value {tm} for {n_per} sources') + p_list = [] + for i in range(n_parallel): + conn_rd, conn_wrt = Pipe(duplex=False) + readers.append(conn_rd) + + # For debugging call directly + proc = Process(target=_do_sso_flux_chunk, + name=f'proc_{i}', + args=(conn_wrt, _sso_collection, + instrument_needed, lb, u)) + proc.start() + p_list.append(proc) + lb = u + u = min(lb + n_per, u_bnd) + self._logger.debug('Processes started') + for i in range(n_parallel): + ready = readers[i].poll(tm) + if not ready: + self._logger.error(f'Process {i} timed out after {tm} sec') + sys.exit(1) + dat = readers[i].recv() + for field in fields_needed: + out_dict[field] += dat[field] + for p in p_list: + p.join() + + out_df = pd.DataFrame.from_dict(out_dict) + out_table = pa.Table.from_pandas(out_df, + schema=arrow_schema) + + if not writer: + writer = pq.ParquetWriter(output_path, arrow_schema) writer.write_table(out_table) + rg_written += 1 + writer.close() + self._logger.debug(f'# row groups written to flux file: {rg_written}') def create_sso_flux_catalog(self): """ @@ -178,10 +285,5 @@ def create_sso_flux_catalog(self): self._logger.info('Creating sso flux files') - # For each main file make a corresponding flux file - # for f, info in self._cat._sso_files.items(): - # if info['scope'] == 'main': - # self._create_sso_flux_file(info, arrow_schema) - for p in self._catalog_creator._parts: self._create_sso_flux_pixel(p, arrow_schema) From b316e2135f1c2a283c424d24deff739f8a7e9fd5 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Wed, 21 Feb 2024 10:55:03 -0800 Subject: [PATCH 23/31] make latest Sorcha output our default input --- skycatalogs/sso_catalog_creator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 5cf8d240..6f30bed1 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -56,7 +56,7 @@ def _do_sso_flux_chunk(send_conn, sso_collection, instrument_needed, class SsoCatalogCreator: - _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/8feb2024' + _sso_truth = '/sdf/home/j/jrb/rubin-user/sso/input/20feb2024' _sso_sed = '/sdf/home/j/jrb/rubin-user/sso/sed/solar_sed.db' def __init__(self, catalog_creator, output_dir, From 31af3dad0169745af325f381a833bea2a7411e9f Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Wed, 21 Feb 2024 12:51:00 -0800 Subject: [PATCH 24/31] handle very small input SSO files --- skycatalogs/sso_catalog_creator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/skycatalogs/sso_catalog_creator.py b/skycatalogs/sso_catalog_creator.py index 6f30bed1..c95511fa 100644 --- a/skycatalogs/sso_catalog_creator.py +++ b/skycatalogs/sso_catalog_creator.py @@ -211,6 +211,8 @@ def _create_sso_flux_pixel(self, pixel, arrow_schema): _sso_collection = c l_bnd = 0 u_bnd = len(c) + if (u_bnd - l_bnd) < 5 * n_parallel: + n_parallel = 1 if n_parallel == 1: n_per = u_bnd - l_bnd else: @@ -227,7 +229,7 @@ def _create_sso_flux_pixel(self, pixel, arrow_schema): for field in fields_needed: out_dict[field] = [] - tm = max(int((n_per*60)/500), 5) + tm = max(int((n_per*60)/500), 10) self._logger.info(f'Using timeout value {tm} for {n_per} sources') p_list = [] for i in range(n_parallel): From bad041f2c4787019e2d3096bc3a7fb5f5cf5ce69 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Fri, 1 Mar 2024 13:08:39 -0800 Subject: [PATCH 25/31] window for SSO mjd match now is exposure interval --- skycatalogs/objects/sso_object.py | 10 +++--- skycatalogs/skyCatalogs.py | 54 ++++++++++++++++++------------- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 446a7edf..4544faee 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -6,8 +6,8 @@ from lsst.sphgeom import Angle, NormalizedAngle, UnitVector3d # import healpy - -__all__ = ['SsoObject', 'SsoCollection'] +EXPOSURE_DEFAULT = 30.0 # seconds +__all__ = ['SsoObject', 'SsoCollection', 'EXPOSURE_DEFAULT'] class SsoObject(BaseObject): @@ -38,9 +38,10 @@ def _get_sed(self, mjd=None): # Do we look for specific entry or do we allow interpolation? return SsoObject._solar_sed, self.get_native_attribute('trailed_source_mag') - def get_gsobject_components(self, gsparams=None, rng=None, exposure=15.0, + def get_gsobject_components(self, gsparams=None, rng=None, streak=False): skinny = 1.e-6 # ratio of width to height in our box + exposure = self._belongs_to._exposure if gsparams is not None: gsparams = galsim.GSParams(**gsparams) @@ -123,7 +124,7 @@ class SsoCollection(ObjectCollection): def __init__(self, ra, dec, id, hp, sky_catalog, mjd_individual=None, region=None, mjd=None, - mask=None, readers=None, row_group=0): + mask=None, readers=None, row_group=0, exposure=EXPOSURE_DEFAULT): ''' Parameters ---------- @@ -144,6 +145,7 @@ def __init__(self, ra, dec, id, hp, sky_catalog, mjd_individual=None, region=region, mjd=mjd, mask=mask, readers=readers, row_group=row_group) self._mjds = np.array(mjd_individual) + self._exposure = exposure def __getitem__(self, key): ''' diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 6b9f7496..e16ed83c 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -13,6 +13,7 @@ from skycatalogs.objects.base_object import ObjectList, ObjectCollection from skycatalogs.objects.gaia_object import GaiaObject, GaiaCollection from skycatalogs.objects.sso_object import SsoObject, SsoCollection +from skycatalogs.objects.sso_object import EXPOSURE_DEFAULT # from skycatalogs.objects.sso_object import find_sso_files from skycatalogs.readers import ParquetReader from skycatalogs.utils.sed_tools import TophatSedFactory, DiffskySedFactory @@ -71,7 +72,7 @@ def _get_intersecting_hps(hp_ordering, nside, region): def _compress_via_mask(tbl, id_column, region, source_type='galaxy', - mjd=None): + mjd=None, exposure=EXPOSURE_DEFAULT): ''' Parameters ---------- @@ -85,6 +86,7 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', source_type string of expected object type mjd if not none, may be used to filter transient or variable objects + exposure length of exposure if mjd is not None Returns ------- @@ -140,7 +142,7 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', tbl['end_mjd']) mask = np.logical_or(mask, time_mask) elif variable_filter: - time_mask = _compute_variable_mask(mjd, tbl['mjd']) + time_mask = _compute_variable_mask(mjd, tbl['mjd'], exposure) mask = np.logical_or(mask, time_mask) if all(mask): @@ -175,7 +177,7 @@ def _compress_via_mask(tbl, id_column, region, source_type='galaxy', mask=time_mask).compressed() return ra_compress, dec_compress, id_compress, time_mask elif variable_filter: - time_mask = _compute_variable_mask(mjd, tbl['mjd']) + time_mask = _compute_variable_mask(mjd, tbl['mjd'], exposure) if time_mask is not None: ra_compress = ma.array(tbl['ra'], mask=time_mask).compressed() dec_compress = ma.array(tbl['dec'], @@ -252,18 +254,20 @@ def _compute_transient_mask(current_mjd, start_mjd, end_mjd): return mask -MJD_EPS = 0.00002 # about 1.7 seconds +MJD_EPS = 0.000001 # < 0.1 seconds +JD_SEC = 1.0/(24.0*3600.0) -def _compute_variable_mask(current_mjd, mjd_column, epsilon=MJD_EPS): +def _compute_variable_mask(current_mjd, mjd_column, exposure, epsilon=MJD_EPS): ''' Compute mask to exclude all entries with - abs(mjd - current_mjd) > epsilon + mjd_column > current_mjd - epsilon and mjd < current_mjd + exposure Parameters ---------- current_mjd float mjd of interest mjd_column array of float mjd for each entry + exposure float exposure in seconds epsilon float tolerance for matching mjd entry Returns @@ -273,8 +277,9 @@ def _compute_variable_mask(current_mjd, mjd_column, epsilon=MJD_EPS): if not current_mjd: return None - diff = current_mjd - mjd_column - mask = np.logical_or((diff > epsilon), (diff < -epsilon)) + exposure_jd = exposure * JD_SEC + mask = np.logical_or(mjd_column < (current_mjd - epsilon), + mjd_column > (current_mjd + exposure_jd)) return mask @@ -576,7 +581,8 @@ def toplevel_only(self, object_types): objs_copy.add(parent) return objs_copy - def get_objects_by_region(self, region, obj_type_set=None, mjd=None): + def get_objects_by_region(self, region, obj_type_set=None, mjd=None, + exposure=EXPOSURE_DEFAULT): ''' Parameters ---------- @@ -585,6 +591,7 @@ def get_objects_by_region(self, region, obj_type_set=None, mjd=None): obj_type_set Return only these objects. Defaults to value in config if specified; else default to all defined in config mjd MJD of observation epoch. + exposure exposure length (seconds) Returns ------- @@ -610,21 +617,21 @@ def get_objects_by_region(self, region, obj_type_set=None, mjd=None): obj_types.sort() for ot in obj_types: - new_list = self.get_object_type_by_region(region, ot, mjd=mjd) + new_list = self.get_object_type_by_region(region, ot, mjd=mjd, + exposure=exposure) object_list.append_object_list(new_list) return object_list def get_object_type_by_region(self, region, object_type, mjd=None, - filepath=None): + exposure=EXPOSURE_DEFAULT, filepath=None): ''' Parameters ---------- region box, circle or PolygonalRegion. Supported region types made depend on object_type object_type known object type without parent - mjd MJD of observation epoch. In case of custom load - this argument may be a pair defining an interval + mjd MJD of observation epoch. filepath if not None, look only in this file. Only used for custom loads @@ -644,7 +651,7 @@ def get_object_type_by_region(self, region, object_type, mjd=None, if coll_type is not None: if self.cat_cxt.use_custom_load(object_type): coll = coll_type.load_collection(region, self, mjd=mjd, - filepath=filepath) + exposure=EXPOSURE_DEFAULT) if isinstance(coll, ObjectCollection): out_list.append_collection(coll) else: # ObjectList @@ -655,15 +662,17 @@ def get_object_type_by_region(self, region, object_type, mjd=None, if partition['type'] == 'healpix': hps = self.get_hps_by_region(region, object_type) for hp in hps: - c = self.get_object_type_by_hp(hp, object_type, - region, mjd) + c = self.get_object_type_by_hp(hp, object_type, region, + mjd, + exposure=EXPOSURE_DEFAULT) if len(c) > 0: out_list.append_object_list(c) return out_list else: raise NotImplementedError(f'Unsupported object type {object_type}') - def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): + def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None, + exposure=EXPOSURE_DEFAULT): object_list = ObjectList() # Do we need to check more specifically by object type? @@ -757,7 +766,7 @@ def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): id_name, region, source_type=object_type, - mjd=mjd) + mjd=mjd, exposure=exposure) if ra_c is not None: new_collection = SsoCollection(ra_c, dec_c, id_c, hp, self, @@ -765,7 +774,8 @@ def get_object_type_by_hp(self, hp, object_type, region=None, mjd=None): region=region, mjd=mjd, mask=mask, readers=the_readers, - row_group=rg) + row_group=rg, + exposure=exposure) object_list.append_collection(new_collection) else: ra_c, dec_c, id_c, object_type_c, mask =\ @@ -930,12 +940,12 @@ def open_catalog(config_file, mp=False, skycatalog_root=None, verbose=False): print('Number of collections returned for polygon: ', object_list_poly.collection_count) - assert(object_list.collection_count == object_list_poly.collection_count) + assert (object_list.collection_count == object_list_poly.collection_count) print('Object count for polygon: ', len(object_list_poly)) fudge = 5 - assert(len(object_list_poly) > len(object_list) - fudge) - assert(len(object_list_poly) < len(object_list) + fudge) + assert (len(object_list_poly) > len(object_list) - fudge) + assert (len(object_list_poly) < len(object_list) + fudge) print('Now try inscribed polygon which is not a box') ra_avg = (ra_min_small + ra_max_small) * 0.5 From d5f761be68d4d90b3ce8f909d5aab30e8ff06d2c Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sat, 2 Mar 2024 16:37:44 -0800 Subject: [PATCH 26/31] address reviewer comments --- skycatalogs/objects/sso_object.py | 50 +++++++++++++------------------ skycatalogs/skyCatalogs.py | 23 +++----------- 2 files changed, 24 insertions(+), 49 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 4544faee..b7d1d357 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -3,12 +3,13 @@ import numpy as np import galsim from .base_object import BaseObject, ObjectCollection -from lsst.sphgeom import Angle, NormalizedAngle, UnitVector3d -# import healpy +from lsst.sphgeom import UnitVector3d, LonLat EXPOSURE_DEFAULT = 30.0 # seconds __all__ = ['SsoObject', 'SsoCollection', 'EXPOSURE_DEFAULT'] +SECONDS_PER_DAY = 24.0*3600.0 + class SsoObject(BaseObject): _type_name = 'sso' @@ -40,7 +41,7 @@ def _get_sed(self, mjd=None): def get_gsobject_components(self, gsparams=None, rng=None, streak=False): - skinny = 1.e-6 # ratio of width to height in our box + trail_width = 1.e-6 # in arcseconds exposure = self._belongs_to._exposure if gsparams is not None: @@ -53,43 +54,32 @@ def get_gsobject_components(self, gsparams=None, rng=None, # Then rotate appropriately ra = self.ra dec = self.dec - # Convert from (arc?)degrees/day to arcsec/sec - ra_rate = self.get_native_attribute('ra_rate')/24.0 + # Convert from (arc?)degrees/day to degrees/sec + ra_rate = self.get_native_attribute('ra_rate')/SECONDS_PER_DAY # Take out factor of cos(dec). ra_rate_raw = ra_rate/np.cos(dec) # ra_rate = self.get_native_attribute('ra_rate')/24.0 - dec_rate = self.get_native_attribute('dec_rate')/24.0 + dec_rate = self.get_native_attribute('dec_rate')/SECONDS_PER_DAY # ra_final is approximate since really ra_rate is a function # of dec, but average dec_rate is small so ra_final = ra + ra_rate_raw * exposure dec_final = dec + dec_rate * exposure - init_v = UnitVector3d(NormalizedAngle.fromDegrees(ra), - Angle.fromDegrees(dec)) - final_v = UnitVector3d(NormalizedAngle.fromDegrees(ra_final), - Angle.fromDegrees(dec_final)) - chord = (final_v - init_v).getNorm() - length = Angle(2 * np.arcsin(chord/2.0)).asDegrees() * 3600 + init_v = UnitVector3d(LonLat.fromDegrees(ra, dec)) + final_v = UnitVector3d(LonLat.fromDegrees(ra_final, dec_final)) + length = np.degrees(np.arccos(init_v.dot(final_v))) * 3600.0 + gobj = galsim.Box(length, trail_width, gsparams=gsparams) - gobj = galsim.Box(length, skinny*length, gsparams=gsparams) # now rotate to direction of (ra_rate, dec_rate) - try: - # angle_rad = galsim.Angle(np.arctan(dec_rate/(ra_rate * np.cos(dec))), galsim.radians) - # NOTE: Probably cos(dec) has already been applied, in which - # case we want - angle_rad = galsim.Angle(np.arctan(dec_rate/ra_rate), - galsim.radians) - - gobj = gobj.rotate(angle_rad) - except ZeroDivisionError: - gobj = gobj.rotate(galsim.Angle(90, galsim.degrees)) - return {'this_object': gobj} - else: - # To get the dimensions of the box, use ra rate, dec rate and - # exposure length. The first two will be in the sky catalogs - # parquet file; the last will be passed in. - # For now start with the simpler thing: just a point source. + # angle_rad = galsim.Angle(np.arctan2(dec_rate/(ra_rate * np.cos(dec))), galsim.radians) + # NOTE: Probably cos(dec) has already been applied, in which + # case we want + angle_rad = galsim.Angle(np.arctan2(dec_rate/ra_rate), + galsim.radians) + gobj = gobj.rotate(angle_rad) + else: + # Treat as point source return {'this_object': galsim.DeltaFunction(gsparams=gsparams)} def get_observer_sed_component(self, component, mjd=None): @@ -132,7 +122,7 @@ def __init__(self, ra, dec, id, hp, sky_catalog, mjd_individual=None, id array of str sky_catalog instance of SkyCatalog mjd_indiviual array of float or None - region Geometric region or string (representing file path) + region Geometric region mjd_global float or None mask exclusion mask if cuts have been made due to geometric region or mjd diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index e16ed83c..7f36ed9e 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -254,8 +254,9 @@ def _compute_transient_mask(current_mjd, start_mjd, end_mjd): return mask -MJD_EPS = 0.000001 # < 0.1 seconds -JD_SEC = 1.0/(24.0*3600.0) +SECONDS_PER_DAY = 24.0*3600.0 +MJD_EPS = 0.1/SECONDS_PER_DAY +JD_SEC = 1.0/SECONDS_PER_DAY def _compute_variable_mask(current_mjd, mjd_column, exposure, epsilon=MJD_EPS): @@ -341,9 +342,6 @@ def __init__(self, config, mp=False, skycatalog_root=None, verbose=False, # for 'object_types', map object type to filepath self._hp_info = dict() _ = self._find_all_hps() -# if 'sso' in self.raw_config['object_types']: -# self._sso_files = find_sso_files(self._cat_dir, -# self.raw_config['object_types']['sso']) # NOTE: the use of TophatSedFactory is appropriate *only* for an # input galaxy catalog with format like cosmoDC2, which includes @@ -632,6 +630,7 @@ def get_object_type_by_region(self, region, object_type, mjd=None, types made depend on object_type object_type known object type without parent mjd MJD of observation epoch. + exposure exposure (seconds) filepath if not None, look only in this file. Only used for custom loads @@ -813,20 +812,6 @@ def get_object_iterator_by_hp(self, hp, obj_type_set=None, ''' pass - def get_object_by_mjd_interval(self, min_mjd, max_mjd, - file_list=None, obj_type='sso'): - ''' - Make object list of all objects with mjd within interval. - If file_list is specified, search only those files. - Currently only implemented for obj_type_set={'sso'} - Parameters - ---------- - min_mjd float mjd must be >= min_mjd - max_mjd float mjd must be < max_mjd - file_list list files to search. If None, search all files - obj_type string Currently only 'sso' is supported - ''' - def open_catalog(config_file, mp=False, skycatalog_root=None, verbose=False): ''' From 53763760be1909244fedaf3199a3212ca10c7922 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sat, 2 Mar 2024 23:21:18 -0800 Subject: [PATCH 27/31] continue addressing reviewer comments --- skycatalogs/objects/sso_object.py | 32 ++++++++++++++++--------------- skycatalogs/skyCatalogs.py | 4 +--- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index b7d1d357..05460c4c 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -40,7 +40,7 @@ def _get_sed(self, mjd=None): return SsoObject._solar_sed, self.get_native_attribute('trailed_source_mag') def get_gsobject_components(self, gsparams=None, rng=None, - streak=False): + streak=True): trail_width = 1.e-6 # in arcseconds exposure = self._belongs_to._exposure @@ -71,10 +71,10 @@ def get_gsobject_components(self, gsparams=None, rng=None, gobj = galsim.Box(length, trail_width, gsparams=gsparams) # now rotate to direction of (ra_rate, dec_rate) - # angle_rad = galsim.Angle(np.arctan2(dec_rate/(ra_rate * np.cos(dec))), galsim.radians) + # angle_rad = galsim.Angle(np.arctan2(dec_rate, (ra_rate * np.cos(dec))), galsim.radians) # NOTE: Probably cos(dec) has already been applied, in which # case we want - angle_rad = galsim.Angle(np.arctan2(dec_rate/ra_rate), + angle_rad = galsim.Angle(np.arctan2(dec_rate, ra_rate), galsim.radians) gobj = gobj.rotate(angle_rad) @@ -118,18 +118,20 @@ def __init__(self, ra, dec, id, hp, sky_catalog, mjd_individual=None, ''' Parameters ---------- - ra, dec array of float - id array of str - sky_catalog instance of SkyCatalog - mjd_indiviual array of float or None - region Geometric region - mjd_global float or None - mask exclusion mask if cuts have been made due to - geometric region or mjd - readers parquet reader (in practice there is always only 1) - row_group int - - One of mjd_global, mjd_individual must not be None + ra, dec array of float + id array of str + sky_catalog instance of SkyCatalog + mjd_individual array of float. Array of mjd values belonging + to the objects which will be in the new collection + region Geometric region + mjd_global float or None. The mjd value which was used (along with + region) to determine which objects should be in the + collection + mask exclusion mask if cuts have been made due to + geometric region or mjd + readers parquet reader (in practice there is always only 1) + row_group int + ''' super().__init__(ra, dec, id, 'sso', hp, sky_catalog, region=region, mjd=mjd, mask=mask, diff --git a/skycatalogs/skyCatalogs.py b/skycatalogs/skyCatalogs.py index 7f36ed9e..17e664e5 100644 --- a/skycatalogs/skyCatalogs.py +++ b/skycatalogs/skyCatalogs.py @@ -622,7 +622,7 @@ def get_objects_by_region(self, region, obj_type_set=None, mjd=None, return object_list def get_object_type_by_region(self, region, object_type, mjd=None, - exposure=EXPOSURE_DEFAULT, filepath=None): + exposure=EXPOSURE_DEFAULT): ''' Parameters ---------- @@ -631,8 +631,6 @@ def get_object_type_by_region(self, region, object_type, mjd=None, object_type known object type without parent mjd MJD of observation epoch. exposure exposure (seconds) - filepath if not None, look only in this file. - Only used for custom loads Returns ------- From e74473f26c287db0286e173be24b5fad195fcc62 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sat, 2 Mar 2024 23:28:16 -0800 Subject: [PATCH 28/31] make docstring consistent with call interface --- skycatalogs/objects/sso_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 05460c4c..44365bd6 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -124,7 +124,7 @@ def __init__(self, ra, dec, id, hp, sky_catalog, mjd_individual=None, mjd_individual array of float. Array of mjd values belonging to the objects which will be in the new collection region Geometric region - mjd_global float or None. The mjd value which was used (along with + mjd float or None. The mjd value which was used (along with region) to determine which objects should be in the collection mask exclusion mask if cuts have been made due to From 192d6263b7004ca3414db5c90f06724708b46d0d Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sun, 3 Mar 2024 12:18:09 -0800 Subject: [PATCH 29/31] avoid conflict with main branch --- skycatalogs/catalog_creator.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/skycatalogs/catalog_creator.py b/skycatalogs/catalog_creator.py index 78ebc847..b75c3e9e 100644 --- a/skycatalogs/catalog_creator.py +++ b/skycatalogs/catalog_creator.py @@ -553,8 +553,7 @@ def create_galaxy_pixel(self, pixel, gal_cat, arrow_schema): 'convergence', 'diskEllipticity1', 'diskEllipticity2', 'spheroidEllipticity1', 'spheroidEllipticity2', 'spheroidHalfLightRadiusArcsec', - 'diskHalfLightRadiusArcsec', - 'um_source_galaxy_obs_sm'] + 'diskHalfLightRadiusArcsec', 'um_source_galaxy_obs_sm'] # df is not a dataframe! It's just a dict if not self._mag_cut: From d02844db7d092c85fee29480a764b8add29d5ebd Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 4 Mar 2024 12:50:35 -0800 Subject: [PATCH 30/31] Let galsim simplify SED-with-magnitude calculation --- skycatalogs/objects/sso_object.py | 5 +---- skycatalogs/objects/star_object.py | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 44365bd6..43a92de6 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -91,10 +91,7 @@ def get_observer_sed_component(self, component, mjd=None): raise ValueError(txt) sed, magnorm = self._get_sed(mjd=mjd) - - flux_500 = np.exp(-0.9210340371976184 * magnorm) - sed = sed.withMagnitude(0, self._bp500) - sed = sed*flux_500 + sed = sed.withMagnitude(magnorm, self._bp500) # no extinction return sed diff --git a/skycatalogs/objects/star_object.py b/skycatalogs/objects/star_object.py index eb481263..78796a3f 100644 --- a/skycatalogs/objects/star_object.py +++ b/skycatalogs/objects/star_object.py @@ -29,11 +29,7 @@ def get_gsobject_components(self, gsparams=None, rng=None): def get_observer_sed_component(self, component, mjd=None): sed, magnorm = self._get_sed(mjd=mjd) - # The SED is in units of photons/nm/cm^2/s - # -0.9210340371976184 = -np.log(10)/2.5. Use to convert mag to flux - flux_500 = np.exp(-0.9210340371976184 * magnorm) - sed = sed.withMagnitude(0, self._bp500) - sed = sed*flux_500 + sed = sed.withMagnitude(magnorm, self._bp500) if sed is not None: sed = self._apply_component_extinction(sed) From 34dee332674b7a23a30f1cb5f0fc0d46b3d8089f Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Mon, 4 Mar 2024 12:57:22 -0800 Subject: [PATCH 31/31] reinstate inadvertantly deleted line --- skycatalogs/objects/sso_object.py | 1 + 1 file changed, 1 insertion(+) diff --git a/skycatalogs/objects/sso_object.py b/skycatalogs/objects/sso_object.py index 43a92de6..6ec661d4 100644 --- a/skycatalogs/objects/sso_object.py +++ b/skycatalogs/objects/sso_object.py @@ -78,6 +78,7 @@ def get_gsobject_components(self, gsparams=None, rng=None, galsim.radians) gobj = gobj.rotate(angle_rad) + return {'this_object': gobj} else: # Treat as point source return {'this_object': galsim.DeltaFunction(gsparams=gsparams)}