Merge pull request #30 from LCOGT/reextract

Commissioning fixes and refactoring for re-extraction using the frontend UI.
LCOGT · Nov 5, 2024 · e7843e3 · e7843e3
2 parents 37c58c1 + 6d39c92
commit e7843e3
Show file tree

Hide file tree

Showing 34 changed files with 12,800 additions and 556 deletions.
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -44,11 +44,6 @@ jobs:
             python: '3.9'
             toxenv: py39-test-alldeps-cov
 
-          - name: OS X - Python 3.9 with all optional dependencies
-            os: macos-latest
-            python: '3.9'
-            toxenv: py39-test-alldeps
-
           - name: Windows - Python 3.9 with all optional dependencies
             os: windows-latest
             python: '3.9'

diff --git a/.gitignore b/.gitignore
@@ -135,9 +135,12 @@ dmypy.json
 
 # Directory to store test data
 test_data/
+lco_debug_data/
+manual_reduction/
 
 # VS Code
 .vscode
 
 test.db
-
+debug.db
+.DS_Store
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,9 @@
+0.10.0 (2024-11-05)
+-------------------
+- Numerous fixes based on commissioning experience
+- Added/refactored the necessary logic to re-extract
+  the data via the UI
+
 0.9.0 (2024-04-02)
 ------------------
 - Fixes based on Joey's comments

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM ghcr.io/lcogt/banzai:1.13.1
+FROM ghcr.io/lcogt/banzai:1.19.1
 
 USER root
 

diff --git a/banzai_floyds/arc_lines.py b/banzai_floyds/arc_lines.py
@@ -20,12 +20,6 @@
         'line_source': 'Hg',
         'line_notes': ''
     },
-    {
-        'wavelength': 4077.837,
-        'line_strength': 0.031,
-        'line_source': 'Hg',
-        'line_notes': ''
-    },
     {
         'wavelength': 4358.335,
         'line_strength': 1.577,
@@ -50,12 +44,6 @@
         'line_source': 'ArI',
         'line_notes': ''
     },
-    {
-        'wavelength': 7147.0416,
-        'line_strength': 0.011,
-        'line_source': 'ArI',
-        'line_notes': ''
-    },
     {
         'wavelength': 7272.9359,
         'line_strength': 0.079,
@@ -116,12 +104,6 @@
         'line_source': 'ArI',
         'line_notes': ''
     },
-    {
-        'wavelength': 10139.75,
-        'line_strength': 0.019,
-        'line_source': 'Hg',
-        'line_notes': ''
-    },
 ]
 
 unused_lines = [{
@@ -139,6 +121,11 @@
     'line_strength': 0.0064,
     'line_source': 'Hg',
     'line_notes': 'blend'
+}, {
+    'wavelength': 4077.837,
+    'line_strength': 0.031,
+    'line_source': 'Hg',
+    'line_notes': 'weak'
 }, {
     'wavelength': 4339.2232,
     'line_strength': 'nan',
@@ -174,6 +161,11 @@
     'line_strength': 'nan',
     'line_source': 'Hg',
     'line_notes': 'Weak'
+}, {
+    'wavelength': 7147.0416,
+    'line_strength': 0.011,
+    'line_source': 'ArI',
+    'line_notes': 'Too close to previous line'
 }, {
     'wavelength':
     4680.1359,
@@ -360,6 +352,11 @@
     'line_strength': 'nan',
     'line_source': 'ArI',
     'line_notes': 'No flux in FLOYDS'
+}, {
+    'wavelength': 10139.75,
+    'line_strength': 0.019,
+    'line_source': 'Hg',
+    'line_notes': 'Too close to the chip edge at COJ'
 }, {
     'wavelength': 11078.87,
     'line_strength': 'nan',

diff --git a/banzai_floyds/background.py b/banzai_floyds/background.py
@@ -0,0 +1,115 @@
+import numpy as np
+from astropy.table import Table, vstack
+from scipy.interpolate import CloughTocher2DInterpolator
+from banzai.stages import Stage
+from numpy.polynomial.legendre import Legendre
+
+
+def fit_background(data, background_order=3):
+    # I tried a wide variety of bsplines and two fits here without success.
+    # The scipy bplines either had significant issues with the number of points we are fitting in the whole 2d frame or
+    # could not capture the variation near sky line edges (the key reason to use 2d fits from Kelson 2003).
+    # I also tried using 2d polynomials, but to get the order high enough to capture the variation in the skyline edges,
+    # I was introducing significant ringing in the fits (to the point of oscillating between positive and
+    # negative values in the data).
+    # This is now doing something closer to what IRAF did, interpolating the background regions onto the wavelength
+    # bin centers, fitting a 1d polynomial, and interpolating back on the original wavelengths to subtract per pixel.
+    # In this way, it is only the background model that is interpolated and not the pixel values themselves.
+    data['data_bin_center'] = 0.0
+    data['uncertainty_bin_center'] = 0.0
+    for order in [1, 2]:
+        in_order = data['order'] == order
+
+        data_interpolator = CloughTocher2DInterpolator(np.array([data['wavelength'][in_order],
+                                                                 data['y_profile'][in_order]]).T,
+                                                       data['data'][in_order].ravel(), fill_value=0)
+        uncertainty_interpolator = CloughTocher2DInterpolator(np.array([data['wavelength'][in_order],
+                                                                       data['y_profile'][in_order]]).T,
+                                                              data['uncertainty'][in_order].ravel(), fill_value=0)
+
+        data['data_bin_center'][in_order] = data_interpolator(data['wavelength_bin'][in_order],
+                                                              data['y_profile'][in_order])
+        data['uncertainty_bin_center'][in_order] = uncertainty_interpolator(data['wavelength_bin'][in_order],
+                                                                            data['y_profile'][in_order])
+
+    # Assume no wavelength dependence for the wavelength_bin = 0 and first and last bin in the order
+    # which have funny edge effects
+    background_bin_center = []
+    for data_to_fit in data.groups:
+        if data_to_fit['wavelength_bin'][0] == 0:
+            continue
+        # Catch the case where we are an edge and fall outside the qhull interpolation surface
+        if np.all(data_to_fit['data_bin_center'] == 0):
+            data_column = 'data'
+            uncertainty_column = 'uncertainty'
+        else:
+            data_column = 'data_bin_center'
+            uncertainty_column = 'uncertainty_bin_center'
+        in_background = data_to_fit['in_background']
+        in_background = np.logical_and(in_background, data_to_fit[data_column] != 0)
+        polynomial = Legendre.fit(data_to_fit['y_profile'][in_background], data_to_fit[data_column][in_background],
+                                  background_order,
+                                  domain=[np.min(data_to_fit['y_profile']), np.max(data_to_fit['y_profile'])],
+                                  w=1/data_to_fit[uncertainty_column][in_background]**2)
+
+        background_bin_center.append(polynomial(data_to_fit['y_profile']))
+
+    data['background_bin_center'] = 0.0
+    data['background_bin_center'][data['wavelength_bin'] != 0] = np.hstack(background_bin_center)
+
+    results = Table({'x': [], 'y': [], 'background': []})
+    for order in [1, 2]:
+        in_order = np.logical_and(data['order'] == order, data['wavelength_bin'] != 0)
+        background_interpolator = CloughTocher2DInterpolator(np.array([data['wavelength_bin'][in_order],
+                                                                       data['y_profile'][in_order]]).T,
+                                                             data['background_bin_center'][in_order], fill_value=0)
+        background = background_interpolator(data['wavelength'][in_order], data['y_profile'][in_order])
+        # Deal with the funniness at the wavelength bin edges
+        upper_edge = data['wavelength'][in_order] > np.max(data['wavelength_bin'][in_order])
+        background[upper_edge] = data[in_order]['background_bin_center'][upper_edge]
+        lower_edge = data['wavelength'][in_order] < np.min(data['wavelength_bin'][in_order])
+        background[lower_edge] = data['background_bin_center'][in_order][lower_edge]
+        order_results = Table({'x': data['x'][in_order], 'y': data['y'][in_order], 'background': background})
+        results = vstack([results, order_results])
+    # Clean up our intermediate columns for now
+    data.remove_columns(['data_bin_center', 'uncertainty_bin_center', 'background_bin_center'])
+    return results
+
+
+def set_background_region(image):
+    if 'in_background' in image.binned_data.colnames:
+        return
+
+    image.binned_data['in_background'] = False
+    for order_id in [2, 1]:
+        in_order = image.binned_data['order'] == order_id
+        this_background = np.zeros(in_order.sum(), dtype=bool)
+        data = image.binned_data[in_order]
+        for background_region in image.background_windows[order_id - 1]:
+            in_background_reg = data['y_profile'] >= (background_region[0] * data['profile_sigma'])
+            in_background_reg = np.logical_and(data['y_profile'] <= (background_region[1] * data['profile_sigma']),
+                                               in_background_reg)
+            this_background = np.logical_or(this_background, in_background_reg)
+            image.binned_data['in_background'][in_order] = this_background
+    for order in [1, 2]:
+        for reg_num, region in enumerate(image.background_windows[order - 1]):
+            image.meta[f'BKWO{order}{reg_num}0'] = (
+                region[0], f'Background region {reg_num} for order:{order} minimum in profile sigma'
+            )
+            image.meta[f'BKWO{order}{reg_num}1'] = (
+                region[1], f'Background region {reg_num} for order:{order} maximum in profile sigma'
+            )
+
+
+class BackgroundFitter(Stage):
+    DEFAULT_BACKGROUND_WINDOW = (7.5, 15)
+
+    def do_stage(self, image):
+        if not image.background_windows:
+            background_window = [[-self.DEFAULT_BACKGROUND_WINDOW[1], -self.DEFAULT_BACKGROUND_WINDOW[0]],
+                                 [self.DEFAULT_BACKGROUND_WINDOW[0], self.DEFAULT_BACKGROUND_WINDOW[1]]]
+            image.background_windows = [background_window, background_window]
+        set_background_region(image)
+        background = fit_background(image.binned_data)
+        image.background = background
+        return image
diff --git a/banzai_floyds/binning.py b/banzai_floyds/binning.py
@@ -0,0 +1,9 @@
+from banzai.stages import Stage
+from banzai_floyds.utils.binning_utils import bin_data
+
+
+class Binning(Stage):
+    def do_stage(self, image):
+        image.binned_data = bin_data(image.data, image.uncertainty, image.wavelengths,
+                                     image.orders, image.mask)
+        return image