Merge pull request #190 from LSSTDESC/lens-source-pz-split

Split lens and source photo-z catalogs
LSSTDESC · Jul 29, 2021 · 9ad0ca5 · 9ad0ca5
2 parents afa62fd + e03b9a3
commit 9ad0ca5
Show file tree

Hide file tree

Showing 15 changed files with 519 additions and 284 deletions.
diff --git a/examples/config/laptop_config.yml b/examples/config/laptop_config.yml
@@ -34,12 +34,13 @@ PZPDFMLZ:
     nz: 301
     zmax: 3.0
 
-PZRailTrain:
+PZRailTrainSource:
     class_name: FZBoost
     zmin: 0.0
     zmax: 2.0
     nzbins: 201
     trainfrac: 0.75
+    bands: riz
     bumpmin: 0.02
     bumpmax: 0.35
     nbump: 20
@@ -50,6 +51,25 @@ PZRailTrain:
     basis_system: cosine
     regression_params: {'max_depth': 8,'objective':'reg:squarederror'}
 
+PZRailTrainLens:
+    class_name: FZBoost
+    zmin: 0.0
+    zmax: 2.0
+    nzbins: 201
+    trainfrac: 0.75
+    bands: riz
+    bumpmin: 0.02
+    bumpmax: 0.35
+    nbump: 20
+    sharpmin: 0.7
+    sharpmax: 2.1
+    nsharp: 15
+    max_basis: 35
+    basis_system: cosine
+    regression_params: {'max_depth': 8, 'objective': 'reg:squarederror'}
+
+
+
 
 FlexZPipe:
     chunk_rows: 1000

diff --git a/examples/config/laptop_lensfit_config.yml b/examples/config/laptop_lensfit_config.yml
@@ -26,12 +26,13 @@ PZPDFMLZ:
     nz: 301
     zmax: 3.0
 
-PZRailTrain:
+PZRailTrainSource:
     class_name: FZBoost
     zmin: 0.0
     zmax: 2.0
     nzbins: 201
     trainfrac: 0.75
+    bands: riz
     bumpmin: 0.02
     bumpmax: 0.35
     nbump: 20
@@ -42,6 +43,23 @@ PZRailTrain:
     basis_system: cosine
     regression_params: {'max_depth': 8,'objective':'reg:squarederror'}
 
+PZRailTrainLens:
+    class_name: FZBoost
+    zmin: 0.0
+    zmax: 2.0
+    nzbins: 201
+    trainfrac: 0.75
+    bands: riz
+    bumpmin: 0.02
+    bumpmax: 0.35
+    nbump: 20
+    sharpmin: 0.7
+    sharpmax: 2.1
+    nsharp: 15
+    max_basis: 35
+    basis_system: cosine
+    regression_params: {'max_depth': 8, 'objective': 'reg:squarederror'}
+
 FlexZPipe:
     chunk_rows: 1000
     bands: ["u","g","r","i","z","y"]

diff --git a/examples/laptop_lensfit_pipeline.yml b/examples/laptop_lensfit_pipeline.yml
@@ -6,9 +6,11 @@ stages:
     - name: TXLensCatalogSplitter
     - name: TXStarCatalogSplitter
     - name: TXTruthLensSelector
-    - name: PZRailTrain
-    - name: PZRailEstimate
-    - name: TXPhotozStack
+    - name: PZRailTrainLens
+    - name: PZRailEstimateSourceFromLens
+    - name: PZRailEstimateLens
+    - name: TXPhotozSourceStack
+    - name: TXPhotozLensStack
     - name: TXMainMaps
     - name: TXAuxiliaryMaps
     - name: TXSimpleMask
@@ -76,8 +78,8 @@ config: examples/config/laptop_lensfit_config.yml
 
 inputs:
     # See README for paths to download these files
-    photoz_training: submodules/RAIL/tests/data/test_dc2_training_9816.hdf5
-    photoz_testing: submodules/RAIL/tests/data/test_dc2_validation_9816.hdf5
+    photoz_lens_training: submodules/RAIL/tests/data/test_dc2_training_9816.hdf5
+    photoz_lens_testing: submodules/RAIL/tests/data/test_dc2_validation_9816.hdf5
     shear_catalog: data/example/inputs/lensfit_shear_catalog.hdf5
     photometry_catalog: data/example/inputs/lensfit_photometry_catalog.hdf5
     calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat

diff --git a/examples/laptop_pipeline.yml b/examples/laptop_pipeline.yml
@@ -2,14 +2,24 @@
 # Stages to run
 stages:
     - name: TXSourceSelector     # select and split objects into source bins
-    - name: TXShearCalibration
-    - name: TXLensCatalogSplitter
-    - name: TXStarCatalogSplitter
+    - name: TXShearCalibration   # Calibrate and split the source sample tomographically
+    - name: TXLensCatalogSplitter  # Split the lens sample tomographically
+    - name: TXStarCatalogSplitter  # Split the star catalog into separate bins (psf/non-psf)
     - name: TXMeanLensSelector  # select objects for lens bins
-    - name: PZRailTrain
+    - name: PZRailTrainSource   # Train a photo-z estimator for the source sample
       threads_per_process: 2
-    - name: PZRailEstimate
-    - name: TXPhotozStack        # stack p(z) into n(z)
+    - name: PZRailEstimateSource # Compute p(z) values for the source sample
+    - name: PZRailEstimateLensFromSource   # Copy the p(z) values from the source to lens values
+    # # If we had separate source and lens samples (as we do in real life) we would
+    # # esimate the lens PZ separately.
+    # # We could use this to train a separate estimator:
+    # - name: PZRailTrainLens     # Train a photo-z estimator for the lens sample
+    #   threads_per_process: 2
+    # # and this to estimate the p(z) for the lenses separately:
+    # # (in this laptop test the samples are the same, as this is faster)
+    # - name: PZRailEstimateLens # Compute p(z) values for the lens sample
+    - name: TXPhotozSourceStack  # Stack p(z) into n(z)
+    - name: TXPhotozLensStack    # Stack p(z) into n(z)
     - name: TXMainMaps           # make source g1, g2 and lens n_gal maps
     - name: TXAuxiliaryMaps      # make PSF, depth, flag, and other maps
     - name: TXSimpleMask         # combine maps to make a simple mask
@@ -33,8 +43,8 @@ stages:
       threads_per_process: 2
     - name: TXRoweStatistics     # Compute and plot Rowe statistics
       threads_per_process: 2
-    - name: TXGalaxyStarDensity
-    - name: TXGalaxyStarShear
+    - name: TXGalaxyStarDensity  # Compute and plot the star-galaxy density cross-correlation
+    - name: TXGalaxyStarShear    # Compute and plot the star-galaxy shear cross-correlation
     - name: TXPSFDiagnostics     # Compute and plots other PSF diagnostics
     - name: TXBrighterFatterPlot # Make plots tracking the brighter-fatter effect
     - name: TXPhotozPlots        # Plot the bin n(z)
@@ -92,8 +102,10 @@ inputs:
     # See README for paths to download these files
     shear_catalog: data/example/inputs/shear_catalog.hdf5
     photometry_catalog: data/example/inputs/photometry_catalog.hdf5
-    photoz_training: submodules/RAIL/tests/data/test_dc2_training_9816.hdf5
-    photoz_testing: submodules/RAIL/tests/data/test_dc2_validation_9816.hdf5
+    photoz_source_training: submodules/RAIL/tests/data/test_dc2_training_9816.hdf5
+    photoz_source_testing: submodules/RAIL/tests/data/test_dc2_validation_9816.hdf5
+    photoz_lens_training: submodules/RAIL/tests/data/test_dc2_training_9816.hdf5
+    photoz_lens_testing: submodules/RAIL/tests/data/test_dc2_validation_9816.hdf5
     calibration_table: data/example/inputs/sample_cosmodc2_w10year_errors.dat
     exposures: data/example/inputs/exposures.hdf5
     star_catalog: data/example/inputs/star_catalog.hdf5

diff --git a/requirements.txt b/requirements.txt
@@ -23,3 +23,4 @@ parallel_statistics
 healsparse
 flexcode
 xgboost==1.1.0
+git+git://github.com/LSSTDESC/qp
diff --git a/submodules/RAIL b/submodules/RAIL
diff --git a/txpipe/__init__.py b/txpipe/__init__.py
@@ -8,7 +8,7 @@
 from .source_selector import TXSourceSelector
 from .lens_selector import TXMeanLensSelector
 from .photoz import TXRandomPhotozPDF
-from .photoz_stack import TXPhotozStack
+from .photoz_stack import TXPhotozSourceStack, TXPhotozLensStack
 from .random_cats import TXRandomCat
 from .twopoint_fourier import TXTwoPointFourier
 from .twopoint import TXTwoPoint
@@ -29,7 +29,8 @@
 from .metadata import TXTracerMetadata
 from .convergence import TXConvergenceMaps
 from .map_correlations import TXMapCorrelations
-from .rail_pz import PZRailTrain
+from .rail import PZRailTrainLens, PZRailTrainSource
+from .rail import PZRailEstimateLens, PZRailEstimateSource
 from .theory import TXTwoPointTheoryReal, TXTwoPointTheoryFourier
 from .jackknife import TXJackknifeCenters
 from .twopoint_null_tests import TXGammaTFieldCenters

diff --git a/txpipe/lens_selector.py b/txpipe/lens_selector.py
@@ -279,7 +279,7 @@ class TXMeanLensSelector(TXBaseLensSelector):
     name = "TXMeanLensSelector"
     inputs = [
         ('photometry_catalog', HDFFile),
-        ('photoz_pdfs', HDFFile),
+        ('lens_photoz_pdfs', HDFFile),
     ]
 
 
@@ -288,7 +288,7 @@ def data_iterator(self):
         phot_cols = ['mag_i','mag_r','mag_g']
         z_cols = ['z_mean']
         iter_phot = self.iterate_hdf('photometry_catalog', 'photometry', phot_cols, chunk_rows)
-        iter_pz = self.iterate_hdf('photoz_pdfs', 'point_estimates', z_cols, chunk_rows)
+        iter_pz = self.iterate_hdf('lens_photoz_pdfs', 'point_estimates', z_cols, chunk_rows)
         for (s, e, data), (_, _, z_data) in zip(iter_phot, iter_pz):
             data['z'] = z_data['z_mean']
             yield s, e, data
@@ -297,7 +297,7 @@ class TXModeLensSelector(TXBaseLensSelector):
     name = "TXModeLensSelector"
     inputs = [
         ('photometry_catalog', HDFFile),
-        ('photoz_pdfs', HDFFile),
+        ('lens_photoz_pdfs', HDFFile),
     ]
 
 
@@ -306,7 +306,7 @@ def data_iterator(self):
         phot_cols = ['mag_i','mag_r','mag_g']
         z_cols = ['z_mode']
         iter_phot = self.iterate_hdf('photometry_catalog', 'photometry', phot_cols, chunk_rows)
-        iter_pz = self.iterate_hdf('photoz_pdfs', 'point_estimates', z_cols, chunk_rows)
+        iter_pz = self.iterate_hdf('lens_photoz_pdfs', 'point_estimates', z_cols, chunk_rows)
         for (s, e, data), (_, _, z_data) in zip(iter_phot, iter_pz):
             data['z'] = z_data['z_mode']
             yield s, e, data

diff --git a/txpipe/photoz_mlz.py b/txpipe/photoz_mlz.py
@@ -15,7 +15,7 @@ class PZPDFMLZ(PipelineStage):
         ('photoz_trained_model', DataFile),
     ]
     outputs = [
-        ('photoz_pdfs', PhotozPDFFile),
+        ('lens_photoz_pdfs', PhotozPDFFile),
     ]
 
     config_options = {
@@ -235,7 +235,7 @@ def prepare_output(self, nobj, z):
         # Open the output file.
         # This will automatically open using the HDF5 mpi-io driver 
         # if we are running under MPI and the output type is parallel
-        f = self.open_output('photoz_pdfs', parallel=True)
+        f = self.open_output('lens_photoz_pdfs', parallel=True)
 
         z_mid = 0.5*(z[1:] + z[:-1])
         # Create the space for output data