From 478fd7315eb298b5d0266d8b5c53f5e39af0f8e6 Mon Sep 17 00:00:00 2001 From: Tyler Pritchard Date: Tue, 2 Apr 2024 13:20:14 -0400 Subject: [PATCH] cleaned up comments --- src/newlk_search/search.py | 153 ++++++------------------------------- 1 file changed, 24 insertions(+), 129 deletions(-) diff --git a/src/newlk_search/search.py b/src/newlk_search/search.py index 3fdf6e5..ba98c53 100644 --- a/src/newlk_search/search.py +++ b/src/newlk_search/search.py @@ -4,37 +4,32 @@ import re import logging import warnings -from lightkurve.utils import ( - LightkurveDeprecationWarning, - LightkurveError, - LightkurveWarning, - suppress_stdout, -) import numpy as np from astropy import units as u from astropy.coordinates import SkyCoord -from astropy.table import Table, join +from astropy.table import Table from astropy.time import Time -from lightkurve.io import read from copy import deepcopy -# import cache # from .config import conf, config from . import PACKAGEDIR, PREFER_CLOUD, DOWNLOAD_CLOUD, conf, config - +# TODO: Revisit caching from memoization import cached - +# import cache # from src.newlk_search.cache import cache + log = logging.getLogger(__name__) class SearchError(Exception): pass +class SearchWarning(Warning): + pass class MASTSearch(object): """ @@ -73,10 +68,6 @@ class MASTSearch(object): Mission Specific Survey value that corresponds to Sector (TESS), Campaign (K2), or Quarter (Kepler) """ - # Shared functions that are used for searches by any mission - # "mission", - # Start time? - # distance _REPR_COLUMNS = [ "target_name", "pipeline", @@ -87,7 +78,6 @@ class MASTSearch(object): "description", ] - # why is this needed here? recursion error otherwise table = None def __init__( @@ -109,6 +99,7 @@ def __init__( pipeline = np.atleast_1d(pipeline).tolist() self.search_pipeline = pipeline self.search_sequence = sequence + # Legacy functionality - no longer query kic/tic by integer value only if isinstance(target, int): raise TypeError( @@ -116,8 +107,6 @@ def __init__( "or astropy coordinate object" ) - # If target is not None, Parse the input - # TODO: get rid of saving prod and obs to self self.target = target if isinstance(table, type(None)): self._target_from_name(target) @@ -145,7 +134,8 @@ def _target_from_name(self, target): self.table = self.table[mask] def _target_from_table(self, table, obs_table, prod_table): - # see if we were passed a joint table + + # see if function was passed a joint table if isinstance(table, pd.DataFrame): self.table = table @@ -238,8 +228,7 @@ def __repr__(self): else: return "I am an uninitialized MASTSearch result" - # This is a possible addition to add a hyperlink to the dataproduct homepages. - # I think we want this anyways as this calls the pandas table html output which is nicer + # Used to call the pandas table html output which is nicer def _repr_html_(self): if isinstance(self.table, pd.DataFrame): return self.table[self._REPR_COLUMNS]._repr_html_() @@ -257,22 +246,19 @@ def __getitem__(self, key): return self._mask(key) def _mask(self, mask): - """Masks down the product and observation tables given an input mask, then returns them as a new K2Search object.""" + """Masks down the product and observation tables given an input mask, then returns them as a new Search object. + deepcopy is used to preserve the class metadata stored in class variables""" new_table = deepcopy(self) new_table.table = self.table[mask].reset_index() return new_table - # may overwrite this function in the individual KEplerSearch/TESSSearch/K2Search calls? def _update_table(self, joint_table): - # Ideally I'd like to replace of t_exptime and pro - # joint_table['exptime'] = joint_table['t_exptime'].copy() - # joint_table['pipeline'] = joint_table['provenance_name'].copy() - # joint_table['mission'] = joint_table['obs_collection_obs'].copy() - # joint_table = joint_table.rename(columns={"t_exptime":"exptime","provenance_name":"pipeline","obs_collection_obs":"mission"}) + #copy columns joint_table = joint_table.rename(columns={"t_exptime": "exptime"}) joint_table["pipeline"] = joint_table["provenance_name"].copy() joint_table["mission"] = joint_table["obs_collection_obs"].copy() + # rename identical columns joint_table.rename( columns={ @@ -286,22 +272,6 @@ def _update_table(self, joint_table): ) joint_table = joint_table.reset_index() - #year = np.floor(Time(joint_table["t_min"], format="mjd").decimalyear) - ## `t_min` is incorrect for Kepler pipeline products, so we extract year from the filename for those - #for idx, row in joint_table.iterrows(): - # if (row['pipeline'] == "Kepler") & ("Data Validation" not in row['description']): - # year[idx] = re.findall( - # r"\d+.(\d{4})\d+", row["productFilename"] - # )[0] - #joint_table["year"] = year.astype(int) - # - ## TODO: make sure the time for TESS/Kepler/K2 all add 2400000.5 - #joint_table["start_time"] = Time( - # self.table["t_min"].values + 2400000.5, format="jd" - #).iso - #joint_table["end_time"] = Time( - # self.table["t_max"].values + 2400000.5, format="jd" - #).iso return joint_table def _fix_table_times(self, joint_table): @@ -323,30 +293,7 @@ def _fix_table_times(self, joint_table): ).iso return joint_table - - """ - Full list of features - ['intentType', 'obscollection_obs', 'provennce_name', - 'instrument_name', 'project_obs', 'filters', 'wavelength_region', - 'target_name', 'target_classification', 'obs_id', 's_ra', 's_dec', - 'dataproduct_type_obs', 'proposal_pi', 'calib_level_obs', 't_min', - 't_max', 't_exptime', 'em_min', 'em_max', 'obs_title', 't_obs_release', - 'proposal_id_obs', 'proposal_type', 'sequence_number', 's_region', - 'jpegURL', 'dataURL', 'dataRights_obs', 'mtFlag', 'srcDen', 'obsid', - 'objID', 'objID1', 'distance', 'obsID', 'obs_collection_prod', - 'dataproduct_type_prod', 'description', 'type', 'dataURI', - 'productType', 'productGroupDescription', 'productSubGroupDescription', - 'productDocumentationURL', 'project_prod', 'prvversion', - 'proposal_id_prod', 'productFilename', 'size', 'parent_obsid', - 'dataRights_prod', 'calib_level_prod']""" - - # Other additions may include the following - # self._add_columns("something") - # self._add_urls_to_authors() - # self._add_s3_url_column() - # self._sort_by_priority() - - + def _search( self, search_radius: Union[float, u.Quantity] = None, @@ -413,7 +360,7 @@ def _parse_input(self, search_input): ) def _add_s3_url_column(self, joint_table): - # self.table would updated to have an extra column of s3 URLS if possible + """ self.table will updated to have an extra column of s3 URLS if possible """ Observations.enable_cloud_dataset() cloud_uris = Observations.get_cloud_uris( Table.from_pandas(joint_table), full_url=True @@ -431,29 +378,13 @@ def _search_obs( sequence=None, cadence=None, ): - # Helper function that returns a Search Result object containing MAST products - # combines the results of Observations.query_criteria (called via self.query_mast) and Observations.get_product_list - - """if [bool(quarter), - bool(campaign), - bool(sector)].count(True) > 1: - raise LightkurveError("Ambiguity Error; multiple quarter/campaign/sector specified." - "If searching for specific data across different missions, perform separate searches by mission.") - """ - # Is this what we want to do/ where we want the error thrown? + # Is this what we want to do/ where we want the error thrown for an ffi search in MASTsearch? if filetype == "ffi": raise SearchError( f"FFI search not implemented in MASTSearch. Please use TESSSearch." ) - # if a quarter/campaign/sector is specified, search only that mission - """if quarter is not None: - mission = ["Kepler"] - if campaign is not None: - mission = ["K2"] - if sector is not None: - mission = ["TESS"] """ # Ensure mission is a list mission = np.atleast_1d(mission).tolist() if pipeline is not None: @@ -504,7 +435,6 @@ def _query_mast( ): from astroquery.exceptions import NoResultsWarning, ResolverError - # **extra_query_criteria,): # Constructs the appropriate query for mast log.debug(f"Searching for {self.target} with {exptime} on project {project}") @@ -604,11 +534,6 @@ def cubedata(self): # return self._cubedata() return self._mask(mask) - # def _cubedata(self): - # """ passthrough that mission searches can call """ - # mask = self.table.productFilename.str.endswith("tp.fits") - # return(self._mask(mask)) - def limit_results(self, limit: int): mask = np.ones(len(self.table), dtype=bool) mask[limit:] = False @@ -725,20 +650,9 @@ def _filter( else: exptime_mask = not mask - """# If no products are left, return an empty dataframe with the same columns - if sum(mask) == 0: - return pd.DataFrame(columns = products.keys()) - - products = products[mask] - - products.sort_values(by=["distance", "productFilename"], ignore_index=True) - - return products""" - # I think this hidden filter function should now just return the mask mask = file_mask & project_mask & provenance_mask & exptime_mask return mask - # Again, may want to add to self.mask if we go that route. def _mask_by_exptime(self, exptime): """Helper function to filter by exposure time. Returns a boolean array""" @@ -797,9 +711,10 @@ def download( Cachine more seamless if a user is searching for the same file(s) accross different project directories and has a pipeline workflow with input functions """ + if len(self.table) == 0: warnings.warn( - "Cannot download from an empty search result.", LightkurveWarning + "Cannot download from an empty search result.", SearchWarning ) return None if cloud: @@ -1049,13 +964,15 @@ def search_individual_ffi(self, **query_criteria, ) - ffi_products = Observations.get_product_list(ffi_obs) + ffi_products = Observations.get_product_list(ffi_obs + ) #filter out uncalibrated ffi's & theoretical potential HLSP prod_mask = ffi_products['calib_level'] == 2 ffi_products = ffi_products[prod_mask] new_table = deepcopy(self) + # Unlike the other products, ffis don't map cleanly bia obs_id as advertised, so we invert and add specific column info new_table.obs_table = ffi_products.to_pandas() new_table.obs_table['year'] = np.nan @@ -1088,7 +1005,8 @@ def download(self, cloud: PREFER_CLOUD = True, cache: PREFER_CLOUD = True, cloud mask = self.table["provenance_name"] == "TESScut" self._mask(~mask).download() from astroquery.mast import Tesscut - Tesscut.enable_cloud_dataset() + if cloud: + Tesscut.enable_cloud_dataset() mf1 = Tesscut.download_cutouts(coordinates=self.SkyCoord, size=TESScut_size, sector=self.table['sequence_number'].values[mask], @@ -1100,24 +1018,6 @@ def download(self, cloud: PREFER_CLOUD = True, cache: PREFER_CLOUD = True, cloud manifest = mf1.append(mf2) return manifest - - - -#Download should work here -# def download_ffi(self): -# raise NotImplementedError - - # This was in Christina's PR to search. Is this a way we want to handle HLSPs? - # def _mask_bad_authors(authors): - # """Returns a mask to remove authors we don't have readers for.""" - # bad_authors = np.asarray([author not in AUTHOR_LINKS.keys() for author in authors]) - # if bad_authors.any(): - # log.warn( - # f"Authors {np.unique(authors[bad_authors])} have been removed as `lightkurve` does not have a specific reader for these HLSPs.", - # ) - # return ~bad_authors - - class KeplerSearch(MASTSearch): def __init__( self, @@ -1149,11 +1049,6 @@ def __init__( # Can't search mast with quarter/month directly, so filter on that after the fact. self.table = self.table[self._filter_kepler(quarter, month)] - """ - # Now implemented in base class - def _fix_times(): - # Fixes Kepler times - raise NotImplementedError""" def _handle_kbonus(self): # KBONUS times are masked as they are invalid for the quarter data