From daa42a7b1abb613c05b4b1440ea3b2603b16ef08 Mon Sep 17 00:00:00 2001 From: Jashandeep Sohi Date: Wed, 11 Sep 2024 10:39:35 -0700 Subject: [PATCH 1/3] feat: add subtraction operation --- .../data_operations/subtraction.py | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 datalab/datalab_session/data_operations/subtraction.py diff --git a/datalab/datalab_session/data_operations/subtraction.py b/datalab/datalab_session/data_operations/subtraction.py new file mode 100644 index 0000000..157d0ad --- /dev/null +++ b/datalab/datalab_session/data_operations/subtraction.py @@ -0,0 +1,85 @@ +import logging + +import numpy as np + +from datalab.datalab_session.data_operations.data_operation import BaseDataOperation +from datalab.datalab_session.exceptions import ClientAlertException +from datalab.datalab_session.file_utils import create_fits, create_jpgs +from datalab.datalab_session.s3_utils import save_fits_and_thumbnails + +log = logging.getLogger() +log.setLevel(logging.INFO) + + +class Subtraction(BaseDataOperation): + + @staticmethod + def name(): + return 'Subtraction' + + @staticmethod + def description(): + return """ + The Subtraction operation takes in 1..n input images and calculated the subtraction value pixel-by-pixel. + The output is a subtraction image for the n input images. This operation is commonly used for background subtraction. + """ + + @staticmethod + def wizard_description(): + return { + 'name': Subtraction.name(), + 'description': Subtraction.description(), + 'category': 'image', + 'inputs': { + 'input_files': { + 'name': 'Input Files', + 'description': 'The input files to operate on', + 'type': 'file', + 'minimum': 1, + 'maximum': 10 + }, + 'subtraction_file': { + 'name': 'Subtraction File', + 'description': 'This file will be subtracted from the input images.', + 'type': 'file', + 'minimum': 1, + 'maximum': 1 + } + }, + } + + def operate(self): + + input_files = self.input_data.get('input_files', []) + subtraction_file_input = self.input_data.get('subtraction_file', []) + + if not subtraction_file_input: + raise ClientAlertException('subtraction file not specified') + + if len(input_files) < 1: + raise ClientAlertException(' needs at least 1 file') + + log.info(f'Executing subtraction operation on {len(input_files)} files') + + input_image_data_list = self.get_fits_npdata(input_files) + self.set_percent_completion(.05) + + subtraction_image = self.get_fits_npdata(subtraction_file_input)[0] + self.set_percent_completion(.10) + + outputs = [] + for x in input_image_data_list: + o = np.subtract(x, subtraction_image) + fits_file = create_fits(self.cache_key, o) + large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file) + output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path) + outputs.append(output_file) + + + self.set_percent_completion(.90) + + output = {'output_files': outputs} + + self.set_percent_completion(1.0) + self.set_output(output) + log.info(f'Subtraction output: {self.get_output()}') From 4af51cfbc0db5d0ef6f49627cacd1a30b2070672 Mon Sep 17 00:00:00 2001 From: Steve Foale Date: Wed, 11 Sep 2024 19:15:35 +0100 Subject: [PATCH 2/3] Add an index for each of the outputs. --- datalab/datalab_session/data_operations/subtraction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datalab/datalab_session/data_operations/subtraction.py b/datalab/datalab_session/data_operations/subtraction.py index 157d0ad..a0242aa 100644 --- a/datalab/datalab_session/data_operations/subtraction.py +++ b/datalab/datalab_session/data_operations/subtraction.py @@ -68,11 +68,11 @@ def operate(self): self.set_percent_completion(.10) outputs = [] - for x in input_image_data_list: - o = np.subtract(x, subtraction_image) + for index, input_image in enumerate(input_image_data_list): + o = np.subtract(input_image, subtraction_image) fits_file = create_fits(self.cache_key, o) large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file) - output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path) + output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path, index) outputs.append(output_file) From 048d59b6cd5a0839c23c734eb41e063364b8ebf0 Mon Sep 17 00:00:00 2001 From: Lloyd Dakin Date: Mon, 16 Sep 2024 11:55:45 -0700 Subject: [PATCH 3/3] replace stack arrays with crop arrays, add index to basename for multiple outputs, change subtraction max to 999, update messages, update progress, crop subtraction image and input image, update test --- .../datalab_session/data_operations/median.py | 6 ++--- .../data_operations/rgb_stack.py | 7 +++-- .../data_operations/subtraction.py | 27 +++++++++++-------- datalab/datalab_session/file_utils.py | 5 ++-- datalab/datalab_session/s3_utils.py | 2 +- datalab/datalab_session/tests/test_utils.py | 9 +++---- 6 files changed, 31 insertions(+), 25 deletions(-) diff --git a/datalab/datalab_session/data_operations/median.py b/datalab/datalab_session/data_operations/median.py index e068a90..d93a745 100644 --- a/datalab/datalab_session/data_operations/median.py +++ b/datalab/datalab_session/data_operations/median.py @@ -4,7 +4,7 @@ from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.exceptions import ClientAlertException -from datalab.datalab_session.file_utils import create_fits, stack_arrays, create_jpgs +from datalab.datalab_session.file_utils import create_fits, crop_arrays, create_jpgs from datalab.datalab_session.s3_utils import save_fits_and_thumbnails log = logging.getLogger() @@ -51,7 +51,8 @@ def operate(self): image_data_list = self.get_fits_npdata(input, percent=0.4, cur_percent=0.0) - stacked_data = stack_arrays(image_data_list) + cropped_data_list = crop_arrays(image_data_list) + stacked_data = np.stack(cropped_data_list, axis=2) # using the numpy library's median method median = np.median(stacked_data, axis=2) @@ -64,6 +65,5 @@ def operate(self): output = {'output_files': [output_file]} - self.set_percent_completion(1.0) self.set_output(output) log.info(f'Median output: {self.get_output()}') diff --git a/datalab/datalab_session/data_operations/rgb_stack.py b/datalab/datalab_session/data_operations/rgb_stack.py index b140b2d..58e347f 100644 --- a/datalab/datalab_session/data_operations/rgb_stack.py +++ b/datalab/datalab_session/data_operations/rgb_stack.py @@ -1,10 +1,11 @@ import logging from astropy.io import fits +import numpy as np from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.exceptions import ClientAlertException -from datalab.datalab_session.file_utils import get_fits, stack_arrays, create_fits, create_jpgs +from datalab.datalab_session.file_utils import get_fits, crop_arrays, create_fits, create_jpgs from datalab.datalab_session.s3_utils import save_fits_and_thumbnails log = logging.getLogger() @@ -70,7 +71,9 @@ def operate(self): # color photos take three files, so we store it as one fits file with a 3d SCI ndarray arrays = [fits.open(file)['SCI'].data for file in fits_paths] - stacked_data = stack_arrays(arrays) + cropped_data_list = crop_arrays(arrays) + stacked_data = np.stack(cropped_data_list, axis=2) + fits_file = create_fits(self.cache_key, stacked_data) output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path) diff --git a/datalab/datalab_session/data_operations/subtraction.py b/datalab/datalab_session/data_operations/subtraction.py index a0242aa..b3bca9f 100644 --- a/datalab/datalab_session/data_operations/subtraction.py +++ b/datalab/datalab_session/data_operations/subtraction.py @@ -4,7 +4,7 @@ from datalab.datalab_session.data_operations.data_operation import BaseDataOperation from datalab.datalab_session.exceptions import ClientAlertException -from datalab.datalab_session.file_utils import create_fits, create_jpgs +from datalab.datalab_session.file_utils import create_fits, create_jpgs, crop_arrays from datalab.datalab_session.s3_utils import save_fits_and_thumbnails log = logging.getLogger() @@ -36,7 +36,7 @@ def wizard_description(): 'description': 'The input files to operate on', 'type': 'file', 'minimum': 1, - 'maximum': 10 + 'maximum': 999 }, 'subtraction_file': { 'name': 'Subtraction File', @@ -51,35 +51,40 @@ def wizard_description(): def operate(self): input_files = self.input_data.get('input_files', []) + print(f'Input files: {input_files}') subtraction_file_input = self.input_data.get('subtraction_file', []) + print(f'Subtraction file: {subtraction_file_input}') if not subtraction_file_input: - raise ClientAlertException('subtraction file not specified') + raise ClientAlertException('Missing a subtraction file') if len(input_files) < 1: - raise ClientAlertException(' needs at least 1 file') + raise ClientAlertException('Need at least one input file') log.info(f'Executing subtraction operation on {len(input_files)} files') input_image_data_list = self.get_fits_npdata(input_files) - self.set_percent_completion(.05) + self.set_percent_completion(.30) subtraction_image = self.get_fits_npdata(subtraction_file_input)[0] - self.set_percent_completion(.10) + self.set_percent_completion(.40) outputs = [] for index, input_image in enumerate(input_image_data_list): - o = np.subtract(input_image, subtraction_image) - fits_file = create_fits(self.cache_key, o) + # crop the input_image and subtraction_image to the same size + input_image, subtraction_image = crop_arrays([input_image, subtraction_image]) + + difference_array = np.subtract(input_image, subtraction_image) + + fits_file = create_fits(self.cache_key, difference_array) large_jpg_path, small_jpg_path = create_jpgs(self.cache_key, fits_file) + output_file = save_fits_and_thumbnails(self.cache_key, fits_file, large_jpg_path, small_jpg_path, index) outputs.append(output_file) - - self.set_percent_completion(.90) + self.set_percent_completion(self.get_percent_completion() + .50 * (index + 1) / len(input_files)) output = {'output_files': outputs} - self.set_percent_completion(1.0) self.set_output(output) log.info(f'Subtraction output: {self.get_output()}') diff --git a/datalab/datalab_session/file_utils.py b/datalab/datalab_session/file_utils.py index 645a356..c0a5c75 100644 --- a/datalab/datalab_session/file_utils.py +++ b/datalab/datalab_session/file_utils.py @@ -79,7 +79,7 @@ def create_jpgs(cache_key, fits_paths: str, color=False) -> list: return large_jpg_path, thumbnail_jpg_path -def stack_arrays(array_list: list): +def crop_arrays(array_list: list): """ Takes a list of numpy arrays from fits images and stacks them to be a 3d numpy array cropped since fits images can be different sizes @@ -88,8 +88,7 @@ def stack_arrays(array_list: list): min_y = min(arr.shape[1] for arr in array_list) cropped_data_list = [arr[:min_x, :min_y] for arr in array_list] - - return np.stack(cropped_data_list, axis=2) + return cropped_data_list def scale_points(height_1: int, width_1: int, height_2: int, width_2: int, x_points=[], y_points=[], flip_y = False, flip_x = False): """ diff --git a/datalab/datalab_session/s3_utils.py b/datalab/datalab_session/s3_utils.py index c461b97..8ccbd72 100644 --- a/datalab/datalab_session/s3_utils.py +++ b/datalab/datalab_session/s3_utils.py @@ -152,7 +152,7 @@ def save_fits_and_thumbnails(cache_key, fits_path, large_jpg_path, thumbnail_jpg 'fits_url': fits_url, 'large_url': large_jpg_url, 'thumbnail_url': thumbnail_jpg_url, - 'basename': f'{cache_key}', + 'basename': f'{cache_key}-{index}' if index else cache_key, 'source': 'datalab'} ) diff --git a/datalab/datalab_session/tests/test_utils.py b/datalab/datalab_session/tests/test_utils.py index 562ba60..f4cb603 100644 --- a/datalab/datalab_session/tests/test_utils.py +++ b/datalab/datalab_session/tests/test_utils.py @@ -53,11 +53,10 @@ def test_stack_arrays(self): test_array_1 = np.zeros((10, 20)) test_array_2 = np.ones((20, 10)) - stacked_array = stack_arrays([test_array_1, test_array_2]) - self.assertIsInstance(stacked_array, np.ndarray) - self.assertEqual(stacked_array.shape, (10, 10, 2)) - self.assertEqual(stacked_array[:, :, 0].tolist(), np.zeros((10, 10)).tolist()) - self.assertEqual(stacked_array[:, :, 1].tolist(), np.ones((10, 10)).tolist()) + cropped_array = crop_arrays([test_array_1, test_array_2]) + self.assertEqual(len(cropped_array), 2) + self.assertEqual(cropped_array[0].tolist(), np.zeros((10, 10)).tolist()) + self.assertEqual(cropped_array[1].tolist(), np.ones((10, 10)).tolist()) def test_scale_points(self): x_points = [1, 2, 3]