Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

U/jrbogart/config reorg #112

Merged
merged 23 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f03cfda
Add template config files for each supported source type.
JoanneBogart Jul 5, 2024
d553e99
Add classes YamlPassthruIncludeLoader, ConfigWriter to config_utils.py
JoanneBogart Jul 9, 2024
3f82bc0
various bug fixes. Can now write config fragment, top file for objec…
JoanneBogart Jul 11, 2024
feb5643
fix rebase missteps
JoanneBogart Jul 11, 2024
9130d53
add routine update_yaml to unconditionally write file, even if it alr…
JoanneBogart Jul 11, 2024
3d9ddad
reorganization of config essentially working; more polishing and test…
JoanneBogart Jul 12, 2024
88e306b
bug fix needed for schema backward-compatibility
JoanneBogart Jul 13, 2024
0ceeb69
Add/improve docstrings; eliminate unused code
JoanneBogart Jul 13, 2024
52b2636
minor fixes so old test programs will run
JoanneBogart Jul 13, 2024
4572326
centralize handling of (config) schema version in Config class
JoanneBogart Jul 13, 2024
20549e2
Add CI test for new arrangement of config file
JoanneBogart Jul 15, 2024
25c69e9
Rename test module
JoanneBogart Jul 15, 2024
27c9683
make test names unique
JoanneBogart Jul 15, 2024
e43dc18
sso code needed a minor update for new config system
JoanneBogart Jul 15, 2024
bca6a51
sso updates for new config arrangement
JoanneBogart Jul 15, 2024
c774e56
updates for SSO. Also centralize "old style" handling
JoanneBogart Jul 15, 2024
06c6f6c
address some reviewer comments
JoanneBogart Jul 17, 2024
6da4f96
reorganize creation of yaml fragments for greater maintainability
JoanneBogart Jul 19, 2024
d553ccf
Add code to retrieve throughputs versions, write to flux file metadata
JoanneBogart Jul 23, 2024
23a74ae
sso binary output should also have metadata
JoanneBogart Jul 23, 2024
820aa6c
fix reference to throughputs version
JoanneBogart Jul 24, 2024
fc25144
address reviewer comments
JoanneBogart Aug 2, 2024
01a5309
one last bit of clean-up
JoanneBogart Aug 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 34 additions & 64 deletions skycatalogs/catalog_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
import sqlite3
from .utils.sed_tools import TophatSedFactory, get_star_sed_path
from .utils.sed_tools import generate_sed_path
from .utils.config_utils import create_config, assemble_SED_models
from .utils.config_utils import create_config
from .utils.config_utils import assemble_MW_extinction, assemble_cosmology
from .utils.config_utils import assemble_object_types, assemble_provenance
from .utils.config_utils import assemble_provenance
from .utils.config_utils import assemble_file_metadata
from .utils.config_utils import write_yaml
from .utils.config_utils import ConfigWriter
from .utils.star_parquet_input import _star_parquet_reader
from .utils.parquet_schema_utils import make_galaxy_schema
from .utils.parquet_schema_utils import make_galaxy_flux_schema
Expand Down Expand Up @@ -379,6 +379,13 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None,
self._run_options = run_options
self._tophat_sed_bins = None

self._config_writer = ConfigWriter(self._skycatalog_root,
self._catalog_dir,
self._catalog_name,
not self._skip_done,
self._logname)


def _make_tophat_columns(self, dat, names, cmp):
'''
Create columns sed_val_cmp, cmp_magnorm where cmp is one of "disk",
Expand Down Expand Up @@ -472,12 +479,20 @@ def create_galaxy_catalog(self):

# Now make config. We need it for computing LSST fluxes for
# the second part of the galaxy catalog
if self._skip_done:
config_path = self.write_config(path_only=True)
if os.path.exists(config_path):
self._logger.info('Will not overwrite existing config file')
return
self.write_config()
prov = assemble_provenance(self._pkg_root,
inputs={'galaxy_truth': self._galaxy_truth},
run_options=self._run_options)
if self._galaxy_type == 'diffsky':
object_type = 'diffsky_galaxy'
cosmo = assemble_cosmology(self._cosmology)
self._config_writer.write_configs(object_type, prov,
cosmology=cosmo)
else:
object_type = 'galaxy'
cosmo = assemble_cosmology(self._cosmology)
self._config_writer.write_configs(object_type, prov,
cosmology=cosmo,
tophat_bins=self._tophat_sed_bins)

def _write_subpixel(self, dat=None, output_path=None, arrow_schema=None,
to_rename=dict(), stride=100000):
Expand Down Expand Up @@ -725,7 +740,7 @@ def create_galaxy_flux_catalog(self, config_file=None):
self._gal_flux_needed = [field.name for field in self._gal_flux_schema]

if not config_file:
config_file = self.write_config(path_only=True)
config_file = self.get_config_path()
if not self._cat:
self._cat = open_catalog(config_file,
skycatalog_root=self._skycatalog_root)
Expand Down Expand Up @@ -906,6 +921,12 @@ def create_pointsource_catalog(self):
star_cat=self._star_truth)
self._logger.debug(f'Completed pixel {p}')

prov = assemble_provenance(self._pkg_root,
inputs={'star_truth': self._star_truth},
run_options=self._run_options)
self._config_writer.write_configs('star', prov)


def create_pointsource_pixel(self, pixel, arrow_schema, star_cat=None):
if not star_cat:
self._logger.info('No star input specified')
Expand Down Expand Up @@ -999,7 +1020,7 @@ def create_pointsource_flux_catalog(self, config_file=None):
self._ps_flux_schema = make_star_flux_schema(self._logname,
metadata_input=file_metadata)
if not config_file:
config_file = self.write_config(path_only=True)
config_file = self.get_config_path()

# Always open catalog. If it was opened for galaxies earlier
# it won't know about star files.
Expand Down Expand Up @@ -1116,59 +1137,8 @@ def _create_pointsource_flux_pixel(self, pixel):
writer.close()
self._logger.debug(f'# row groups written to flux file: {rg_written}')

def write_config(self, overwrite=False, path_only=False):
'''
Parameters
----------
overwrite boolean default False. If true, overwrite existing
config of the same name
path_only If true, just return the path; don't write anything

Returns
-------
Path to would-be config file if path_only is True;
else None

Side-effects
------------
Save path to config file written as instance variable

'''
def get_config_path(self):
if not self._config_path:
self._config_path = self._output_dir

if path_only:
return os.path.join(self._config_path,
self._catalog_name + '.yaml')

config = create_config(self._catalog_name, self._logname)
if self._global_partition is not None:
config.add_key('area_partition', self._area_partition)

# Even though the following keys are also in the run options
# section they need to be here so that the flux creation code
# can find them
config.add_key('catalog_dir', self._catalog_dir)
config.add_key('skycatalog_root', self._skycatalog_root)

if self._galaxy_type == 'cosmodc2':
config.add_key('SED_models',
assemble_SED_models(self._tophat_sed_bins))
config.add_key('MW_extinction_values', assemble_MW_extinction())
config.add_key('Cosmology', assemble_cosmology(self._cosmology))
config.add_key('object_types',
assemble_object_types(self._pkg_root,
galaxy_nside=self._galaxy_nside))

inputs = {'galaxy_truth': self._galaxy_truth}
if self._star_truth:
inputs['star_truth'] = self._star_truth
if self._sso_truth:
inputs['sso_truth'] = self._sso_truth
inputs['sso_sed'] = self._sso_sed
config.add_key('provenance',
assemble_provenance(self._pkg_root, inputs=inputs,
run_options=self._run_options))

self._written_config = config.write_config(self._config_path,
overwrite=overwrite)
return os.path.join(self._config_path, self._catalog_name + '.yaml')
34 changes: 34 additions & 0 deletions skycatalogs/data/cfg_templates/diffsky_galaxy_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
area_partition:
nside: 32
ordering: ring
type: healpix
composite:
bulge: required
disk: required
knots: optional
data_file_type: parquet
file_template: galaxy_(?P<healpix>\d+).parquet
flux_file_template: galaxy_flux_(?P<healpix>\d+).parquet
sed_file_template: galaxy_sed_(?P<healpix>\d+).hdf5
components:
diffsky_bulge:
MW_extinction: F19
internal_extinction: CCM
parent: diffsky_galaxy
sed_model: TBD
spatial_model: sersic2D
subtype: bulge
diffsky_disk:
MW_extinction: F19
internal_extinction: CCM
parent: diffsky_galaxy
sed_model: TBD
spatial_model: sersic2D
subtype: disk
diffsky_knots:
MW_extinction: F19
internal_extinction: CCM
parent: diffsky_galaxy
sed_model: TBD
spatial_model: knots
subtype: knots
7 changes: 7 additions & 0 deletions skycatalogs/data/cfg_templates/gaia_star_butler_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
sed_method: use_lut
id_prefix: gaia_dr2_
area_partition: None
butler_parameters:
collections: HSC/defaults
dstype: gaia_dr2_20200414
data_file_type: butler_refcat
8 changes: 8 additions & 0 deletions skycatalogs/data/cfg_templates/gaia_star_direct_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
sed_method: use_lut
id_prefix: gaia_dr2_
area_partition:
level: 7
type: htm
data_file_type: fits
data_dir: /sdf/group/rubin/datasets/refcats/htm/v1/gaia_dr2_20200414
basename_template: (?P<htm>\d+).fits
39 changes: 39 additions & 0 deletions skycatalogs/data/cfg_templates/galaxy_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
file_template: 'galaxy_(?P<healpix>\d+).parquet'
flux_file_template: 'galaxy_flux_(?P<healpix>\d+).parquet'
data_file_type: parquet
area_partition:
{ type: healpix, ordering: ring, nside: 32}
composite:
bulge: required
disk: required
knots: optional
attribute_aliases:
size_knots_true: size_disk_true
size_minor_knots_true: size_minor_disk_true
components:
bulge_basic:
subtype: bulge
parent: galaxy
sed_model: tophat
internal_extinction: CCM
MW_extinction: F19
spatial_model: sersic2D
disk_basic:
subtype: disk
parent: galaxy
sed_model: tophat
internal_extinction: CCM
MW_extinction: F19
spatial_model: sersic2D
knots_basic:
subtype: knots
parent: galaxy
sed_model: tophat
internal_extinction: CCM
MW_extinction: F19
spatial_model: knots
tophat:
bin_parameters:
- start
- width
units: angstrom
8 changes: 8 additions & 0 deletions skycatalogs/data/cfg_templates/snana_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
area_partition:
nside: 32
ordering: ring
type: healpix
data_file_type: parquet
file_template: snana_(?P<healpix>\d+).parquet
internal_extinction: None
sed_model: snana
8 changes: 8 additions & 0 deletions skycatalogs/data/cfg_templates/sso_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
area_partition:
nside: 32
ordering: ring
type: healpix
data_file_type: parquet
file_template: sso_(?P<healpix>\d+).parquet
flux_file_template: sso_flux_(?P<healpix>\d+).parquet
sed_model: dbfile_angstrom_flambda
12 changes: 12 additions & 0 deletions skycatalogs/data/cfg_templates/star_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
area_partition:
nside: 32
ordering: ring
type: healpix
data_file_type: parquet
file_template: pointsource_(?P<healpix>\d+).parquet
flux_file_template: pointsource_flux_(?P<healpix>\d+).parquet
internal_extinction: None
sed_file_root_env_var: SIMS_SED_LIBRARY_DIR
sed_model: file_nm
file_nm:
units: nm
Loading