Merge branch 'gwastro:master' into temp_bin_infra

ArthurTolley · Apr 16, 2024 · 204af25 · 204af25
2 parents 6a06a4b + 93fab1a
commit 204af25
Show file tree

Hide file tree

Showing 47 changed files with 1,734 additions and 345 deletions.
diff --git a/.codeclimate.yml b/.codeclimate.yml
@@ -44,7 +44,6 @@ plugins:
       - XXX
   pylint:
     enabled: true
-    channel: "beta"
     checks:
       import-error:
         enabled: false

diff --git a/...ULL_REQUEST_TEMPLATE/standard_template.md → .github/pull_request_template.md b/...ULL_REQUEST_TEMPLATE/standard_template.md → .github/pull_request_template.md
@@ -1,38 +1,40 @@
 <!---
+Please delete these comments when you submit the pull request
+
 Please add a title which is a concise description of what you are doing,
 e.g. 'Fix bug with numpy import in pycbc_coinc_findtrigs' or 'add high frequency sky location dependent response for long detectors'
---->
+-->
 
 <!---
 This is a brief template for making pull requests for PyCBC.
 This is _not_ a proscriptive template - you can use a different style if you want.
 Please do think about the questions posed here and whether the details will be useful to include in your PR
-Please add sufficient details so that people looking back at the request with no context around the work
-can understand the changes.
+Please add sufficient details so that people looking back at the request with no context around the work can understand the changes.
 To choose reviewers, please look at the git blame for the code you are changing (if applicable),
-or discuss in the gwastro slack.
+or discuss in the #pycbc-code channel of the gwastro slack.
 Please add labels as appropriate
 -->
 
-- [ ] The author of this pull request confirms they will adhere to the [code of conduct](https://github.com/gwastro/pycbc/blob/master/CODE_OF_CONDUCT.md)
+<!-- TOP-LEVEL SUMMARY: Please provide a brief, one-or-two-sentence description of the PR here
+-->
 
 ## Standard information about the request
 
-<!--- Some basic info about the change (delete as appropriate) --->
+<!--- Some basic info about the change (delete as appropriate) -->
 This is a: bug fix, new feature, efficiency update, other (please describe)
 
 <!--- What codes will this affect? (delete as apropriate)
 If you do not know which areas will be affected, please ask in the gwastro #pycbc-code slack
---->
+-->
 This change affects: the offline search, the live search, inference, PyGRB
 
-<!--- What code areas will this affect? (delete as apropriate) --->
+<!--- What code areas will this affect? (delete as apropriate) -->
 This change changes: documentation, result presentation / plotting, scientific output
 
-<!--- Some things which help with code management (delete as appropriate) --->
+<!--- Some things which help with code management (delete as appropriate) -->
 This change: has appropriate unit tests, follows style guidelines (See e.g. [PEP8](https://peps.python.org/pep-0008/)), has been proposed using the [contribution guidelines](https://github.com/gwastro/pycbc/blob/master/CONTRIBUTING.md)
 
-<!--- Notes about the effect of this change --->
+<!--- Notes about the effect of this change -->
 This change will: break current functionality, require additional dependencies, require a new release, other (please describe)
 
 ## Motivation
@@ -43,10 +45,12 @@ This change will: break current functionality, require additional dependencies,
 but rather a general discussion of the methods chosen -->
 
 ## Links to any issues or associated PRs
-<!--- If this is fixing / working around an already-reported issue, please link to it here --->
+<!--- If this is fixing / working around an already-reported issue, please link to it here -->
 
 ## Testing performed
 <!--- Describe tests for the code changes, either already performed or to be performed -->
 
 ## Additional notes
 <!--- Anything which does not fit in the above sections -->
+
+- [ ] The author of this pull request confirms they will adhere to the [code of conduct](https://github.com/gwastro/pycbc/blob/master/CODE_OF_CONDUCT.md)
diff --git a/bin/all_sky_search/pycbc_fit_sngls_over_multiparam b/bin/all_sky_search/pycbc_fit_sngls_over_multiparam
@@ -88,7 +88,12 @@ def smooth_tophat(nabove, invalphan, ntotal, dists):
                             idx_within_area)
 
 
-def smooth_n_closest(nabove, invalphan, ntotal, dists, total_trigs=500):
+# This is the default number of triggers required for n_closest smoothing
+_default_total_trigs = 500
+
+
+def smooth_n_closest(nabove, invalphan, ntotal, dists,
+                     total_trigs=_default_total_trigs):
     """
     Smooth templates according to the closest N templates
     No weighting is applied
@@ -97,13 +102,11 @@ def smooth_n_closest(nabove, invalphan, ntotal, dists, total_trigs=500):
     templates_required = 0
     n_triggers = 0
     # Count number of templates required to gather total_trigs templates,
-    # start at closest
-    # total_trigs, if supplied on command line will be a str so convert to int
-    while n_triggers < int(total_trigs):
-        n_triggers += nabove[dist_sort[n_triggers]]
-        templates_required += 1
-    logging.debug("%d templates required to obtain %d triggers",
-                  templates_required, n_triggers)
+    # starting at closest
+    ntcs = nabove[dist_sort].cumsum()
+    templates_required = numpy.searchsorted(ntcs, total_trigs) + 1
+    logging.debug("%d template(s) required to obtain %d(>%d) triggers",
+                  templates_required, ntcs[templates_required - 1], total_trigs)
     idx_to_smooth = dist_sort[:templates_required]
     return smooth_templates(nabove, invalphan, ntotal, idx_to_smooth)
 
@@ -174,9 +177,11 @@ parser = argparse.ArgumentParser(usage="",
 
 pycbc.add_common_pycbc_options(parser)
 parser.add_argument("--version", action=pycbc.version.Version)
-parser.add_argument("--template-fit-file", required=True,
-                    help="hdf5 file containing fit coefficients for each"
-                         " individual template. Required")
+parser.add_argument("--template-fit-file", required=True, nargs='+',
+                    help="hdf5 file(s) containing fit coefficients for each "
+                         "individual template. Can smooth over multiple "
+                         "files provided they correspond to the same bank "
+                         "and fitting settings. Required")
 parser.add_argument("--bank-file", required=True,
                     help="hdf file containing template parameters. Required")
 parser.add_argument("--output", required=True,
@@ -236,22 +241,140 @@ for inputstr in smooth_kwargs:
         key, value = inputstr.split(':')
         kwarg_dict[key] = value
     except ValueError:
-            err_txt = "--smoothing-keywords must take input in the " \
-                      "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \
-                      "Received {}".format(' '.join(args.smoothing_keywords))
-            raise ValueError(err_txt)
+        err_txt = "--smoothing-keywords must take input in the " \
+                  "form KWARG1:VALUE1 KWARG2:VALUE2 KWARG3:VALUE3 ... " \
+                  "Received {}".format(' '.join(args.smoothing_keywords))
+        raise ValueError(err_txt)
 
 assert len(args.log_param) == len(args.fit_param) == len(args.smoothing_width)
 
 init_logging(args.verbose)
 
-fits = h5py.File(args.template_fit_file, 'r')
+analysis_time = 0
+attr_dict = {}
+
+# These end up as n_files * n_templates arrays
+tid = numpy.array([], dtype=int)
+nabove = numpy.array([], dtype=int)
+ntotal = numpy.array([], dtype=int)
+alpha = numpy.array([], dtype=float)
+median_sigma = numpy.array([], dtype=float)
+
+logging.info("Loading input template fits")
+
+# Check on fit files having the same number of templates, as expected if
+# they use the same bank
+num_templates = None
+for filename in args.template_fit_file:
+    with h5py.File(filename, 'r') as fits:
+        if num_templates is None:
+            num_templates = fits['template_id'].size
+        elif not num_templates == fits['template_id'].size:
+            raise RuntimeError(
+                "Input fit files correspond to different banks. "
+                "This situation is not yet supported."
+            )
+        # get attributes from the template-level fit
+        for k in fits.attrs.keys():
+            if k == 'analysis_time':
+                # For this attribute only, we want the mean
+                analysis_time += fits.attrs['analysis_time']
+                continue
+            if k not in attr_dict:
+                # It's the first time we encounter this attribute
+                attr_dict[k] = fits.attrs[k]
+            elif k == 'ifo':
+                # We don't mind if this attribute is different, however the
+                # output attributes will only correspond to the first file's
+                # IFO. Warn if different IFOs are being used.
+                if not attr_dict[k] == fits.attrs[k]:
+                    logging.warning(
+                        "Fit files correspond to different IFOs: %s and %s, "
+                        "only %s is being used for output file attributes",
+                        attr_dict[k], fits.attrs[k], attr_dict[k],
+                    )
+                continue
+            elif not attr_dict[k] == fits.attrs[k]:
+                # Check that the files are consistent with one another
+                err_msg = f"Input files are not consistent, file {filename} "
+                err_msg += f"has attribute {k} of {fits.attrs[k]}, whereas "
+                err_msg += f"previous files have value {attr_dict[k]}"
+                raise RuntimeError(err_msg)
+
+        # get template id and template parameter values
+        tid = numpy.concatenate((tid, fits['template_id'][:]))
+        nabove = numpy.concatenate((nabove, fits['count_above_thresh'][:]))
+        ntotal = numpy.concatenate((ntotal, fits['count_in_template'][:]))
+        alpha = numpy.concatenate((alpha, fits['fit_coeff'][:]))
+        try:
+            median_sigma = numpy.concatenate((median_sigma, fits['median_sigma'][:]))
+        except KeyError:
+            logging.info('Median_sigma dataset not present in input file')
+            median_sigma = None
+
+# For an exponential fit 1/alpha is linear in the trigger statistic values
+# so calculating weighted sums or averages of 1/alpha is appropriate
+invalpha = 1. / alpha
+invalphan = invalpha * nabove
+
+# convert the sum above into a mean
+analysis_time /= len(args.template_fit_file)
+
+if len(args.template_fit_file) > 1:
+    # From the n_templates * n_files arrays, average within each template.
+    # To do this, we average the n_files occurrences which have the same tid
+
+    # The linearity of the average means that we can do this in two steps
+    # without biasing the final result.
+    logging.info("Averaging within the same template for multiple files")
+    tid_unique = numpy.unique(tid)
 
-# get the ifo from the template-level fit
-ifo = fits.attrs['ifo']
+    # sort into template_id order
+    tidsort = tid.argsort()
 
-# get template id and template parameter values
-tid = fits['template_id'][:]
+    # For each unique template id, find the range of identical template ids
+    left = numpy.searchsorted(tid[tidsort], tid_unique, side='left')
+    right = numpy.searchsorted(tid[tidsort], tid_unique, side='right') - 1
+
+    # Precompute the sums so we can quickly look up differences
+    nasum = nabove[tidsort].cumsum()
+    invsum = invalphan[tidsort].cumsum()
+    ntsum = ntotal[tidsort].cumsum()
+    mssum = median_sigma[tidsort].cumsum()
+    num = right - left
+
+    tid = tid_unique
+    nabove = (nasum[right] - nasum[left]) / num
+    invalphan = (invsum[right] - invsum[left]) / num
+    ntotal = (ntsum[right] - ntsum[left]) / num
+    median_sigma = (mssum[right] - mssum[left]) / num
+
+if args.output_fits_by_template:
+    # Store fit_by_template values for output file
+    # For more than one input fit file, these values are averaged over the same
+    # template in different files
+    fbt_dict = {
+        'count_above_thresh': nabove,
+        'count_in_template': ntotal,
+    }
+    with numpy.errstate(invalid='ignore'):
+        # If n_above is zero, then we'll get an 'invalid' warning as we
+        # are dividing zero by zero. This is normal, and we'll deal with
+        # those properly just below, so ignore this so people don't see
+        # a warning and panic
+        alpha = nabove / invalphan
+    alpha[nabove == 0] = -100
+    fbt_dict['fit_coeff'] = alpha
+
+n_required = _default_total_trigs if 'total_trigs' not in kwarg_dict \
+    else kwarg_dict['total_trigs']
+if args.smoothing_method == 'n_closest' and n_required > nabove.sum():
+    logging.warning(
+        "There are %.2f triggers above threshold, not enough to give a "
+        "total of %d for smoothing",
+        nabove.sum(),
+        n_required
+    )
 
 logging.info('Calculating template parameter values')
 bank = h5py.File(args.bank_file, 'r')
@@ -273,13 +396,6 @@ for param, slog in zip(args.fit_param, args.log_param):
     else:
         raise ValueError("invalid log param argument, use 'true', or 'false'")
 
-nabove = fits['count_above_thresh'][:]
-ntotal = fits['count_in_template'][:]
-# For an exponential fit 1/alpha is linear in the trigger statistic values
-# so calculating weighted sums or averages of 1/alpha is appropriate
-invalpha = 1. / fits['fit_coeff'][:]
-invalphan = invalpha * nabove
-
 nabove_smoothed = []
 alpha_smoothed = []
 ntotal_smoothed = []
@@ -333,19 +449,16 @@ elif numpy.isfinite(_smooth_cut[args.smoothing_method]):
     logging.info("Cutting between %d and %d templates for each smoothing",
                  n_removed.min(), n_removed.max())
     # Sort the values to be smoothed by parameter value
-    nabove = nabove[par_sort]
-    invalphan = invalphan[par_sort]
-    ntotal = ntotal[par_sort]
     logging.info("Smoothing ...")
     slices = [slice(l,r) for l, r in zip(lefts, rights)]
     for i in rang:
         report_percentage(i, rang.max())
         slc = slices[i]
         d = dist(i, slc, parvals, args.smoothing_width)
 
-        smoothed_tuple = smooth(nabove[slc],
-                                invalphan[slc],
-                                ntotal[slc],
+        smoothed_tuple = smooth(nabove[par_sort][slc],
+                                invalphan[par_sort][slc],
+                                ntotal[par_sort][slc],
                                 d,
                                 args.smoothing_method,
                                 **kwarg_dict)
@@ -377,10 +490,8 @@ outfile['template_id'] = tid
 outfile['count_above_thresh'] = nabove_smoothed
 outfile['fit_coeff'] = alpha_smoothed
 outfile['count_in_template'] = ntotal_smoothed
-try:
-    outfile['median_sigma'] = fits['median_sigma'][:]
-except KeyError:
-    logging.info('Median_sigma dataset not present in input file')
+if median_sigma is not None:
+    outfile['median_sigma'] = median_sigma
 
 for param, vals, slog in zip(args.fit_param, parvals, args.log_param):
     if slog in ['false', 'False', 'FALSE']:
@@ -389,16 +500,16 @@ for param, vals, slog in zip(args.fit_param, parvals, args.log_param):
         outfile[param] = numpy.exp(vals)
 
 if args.output_fits_by_template:
-    outfile.create_group('fit_by_template')
-    for k in ['count_above_thresh', 'fit_coeff', 'count_in_template']:
-        outfile['fit_by_template'][k] = fits[k][:]
+    fbt_group = outfile.create_group('fit_by_template')
+    for k, v in fbt_dict.items():
+        fbt_group[k] = v
 
 # Add metadata, some is inherited from template level fit
-outfile.attrs['ifo'] = ifo
-outfile.attrs['stat_threshold'] = fits.attrs['stat_threshold']
-if 'analysis_time' in fits.attrs:
-    outfile.attrs['analysis_time'] = fits.attrs['analysis_time']
+for k, v in attr_dict.items():
+    outfile.attrs[k] = v
+if not analysis_time == 0:
+    outfile.attrs['analysis_time'] = analysis_time
 
 # Add a magic file attribute so that coinc_findtrigs can parse it
-outfile.attrs['stat'] = ifo + '-fit_coeffs'
+outfile.attrs['stat'] = attr_dict['ifo'] + '-fit_coeffs'
 logging.info('Done!')
diff --git a/bin/all_sky_search/pycbc_upload_single_event_to_gracedb b/bin/all_sky_search/pycbc_upload_single_event_to_gracedb
@@ -72,7 +72,7 @@ if args.production_server:
 else:
     gracedb = GraceDb(service_url='https://gracedb-playground.ligo.org/api/')
 
-labels = [l.upper() for l in args.labels]
+labels = [l.upper() for l in (args.labels or [])]
 allowed_labels = gracedb.allowed_labels
 
 if set(labels) - set(allowed_labels):

diff --git a/bin/live/pycbc_live_combine_single_fits → ...cbc_live_combine_single_significance_fits b/bin/live/pycbc_live_combine_single_fits → ...cbc_live_combine_single_significance_fits
diff --git a/...live/pycbc_live_plot_combined_single_fits → ...ve_plot_combined_single_significance_fits b/...live/pycbc_live_plot_combined_single_fits → ...ve_plot_combined_single_significance_fits
diff --git a/bin/live/pycbc_live_plot_single_trigger_fits → .../pycbc_live_plot_single_significance_fits b/bin/live/pycbc_live_plot_single_trigger_fits → .../pycbc_live_plot_single_significance_fits
diff --git a/bin/live/pycbc_live_single_trigger_fits → bin/live/pycbc_live_single_significance_fits b/bin/live/pycbc_live_single_trigger_fits → bin/live/pycbc_live_single_significance_fits