From 6ff193b970eb4cbafe8b8899bd067f0c90943659 Mon Sep 17 00:00:00 2001 From: Tyler Pritchard Date: Tue, 21 May 2024 16:32:19 -0400 Subject: [PATCH 1/2] updated config file system, tests, new download_cloud config file parameter, repr to fix a bug, and more --- docs/tutorials/Example_searches.ipynb | 681 +++++++++++++++++++++++--- src/lksearch/K2Search.py | 4 +- src/lksearch/KeplerSearch.py | 4 +- src/lksearch/MASTSearch.py | 82 ++-- src/lksearch/TESSSearch.py | 21 +- src/lksearch/__init__.py | 32 +- src/lksearch/config/__init__.py | 36 +- tests/test_search.py | 34 ++ 8 files changed, 774 insertions(+), 120 deletions(-) diff --git a/docs/tutorials/Example_searches.ipynb b/docs/tutorials/Example_searches.ipynb index a35e110..d2f8d57 100644 --- a/docs/tutorials/Example_searches.ipynb +++ b/docs/tutorials/Example_searches.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "from tssc import MASTSearch, KeplerSearch, K2Search, TESSSearch" + "from lksearch import MASTSearch, KeplerSearch, K2Search, TESSSearch" ] }, { @@ -2557,7 +2557,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "pipeline products: 100%|███████████████████████████████████████████████████████████| 2/2 [00:20<00:00, 10.50s/it]\n" + "pipeline products: 100%|██████████████████████████| 2/2 [00:03<00:00, 1.87s/it]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" ] }, { @@ -2581,6 +2595,7 @@ " \n", " \n", " \n", + " index\n", " Local Path\n", " Status\n", " Message\n", @@ -2590,14 +2605,16 @@ " \n", " \n", " 0\n", - " /Users/tapritc2/.tssc/cache/mastDownload/TESS/...\n", + " 0\n", + " /Users/tapritc2/.lksearch/cache/mastDownload/T...\n", " COMPLETE\n", " None\n", " None\n", " \n", " \n", - " 0\n", - " /Users/tapritc2/.tssc/cache/mastDownload/HLSP/...\n", + " 1\n", + " 0\n", + " /Users/tapritc2/.lksearch/cache/mastDownload/H...\n", " COMPLETE\n", " None\n", " None\n", @@ -2607,9 +2624,13 @@ "" ], "text/plain": [ - " Local Path Status Message URL\n", - "0 /Users/tapritc2/.tssc/cache/mastDownload/TESS/... COMPLETE None None\n", - "0 /Users/tapritc2/.tssc/cache/mastDownload/HLSP/... COMPLETE None None" + " index Local Path Status Message \\\n", + "0 0 /Users/tapritc2/.lksearch/cache/mastDownload/T... COMPLETE None \n", + "1 0 /Users/tapritc2/.lksearch/cache/mastDownload/H... COMPLETE None \n", + "\n", + " URL \n", + "0 None \n", + "1 None " ] }, "execution_count": 16, @@ -2798,7 +2819,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "TESScut : 100%|███████████████████████████████████████████████████████████| 1/1 [00:25<00:00, 25.84s/it]\n" + "TESScut : 100%|██████████████████████████| 1/1 [00:03<00:00, 3.22s/it]\n" ] }, { @@ -2829,7 +2850,7 @@ " \n", " \n", " 0\n", - " /Users/tapritc2/.tssc/cache/mastDownload/TESSC...\n", + " /Users/tapritc2/.lksearch/cache/mastDownload/T...\n", " COMPLETE\n", " \n", " \n", @@ -2838,7 +2859,7 @@ ], "text/plain": [ " Local Path Status\n", - "0 /Users/tapritc2/.tssc/cache/mastDownload/TESSC... COMPLETE" + "0 /Users/tapritc2/.lksearch/cache/mastDownload/T... COMPLETE" ] }, "execution_count": 18, @@ -2871,7 +2892,7 @@ { "data": { "text/html": [ - "TESSSearch object containing 152 data products
\n", + "TESSSearch object containing 160 data products
\n", "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
target_namepipelinemissioncampaignexptimedistanceyeardescription
0ktwo201912552K2K211800.00.02014Target Pixel Long Cadence (KTL) - C01
1ktwo201912552K2K211800.00.02014Lightcurve Long Cadence (KLC) - C01
2ktwo201912552EVERESTHLSP11800.00.02014FITS
3ktwo201912552EVERESTHLSP11800.00.02014PDF
4ktwo201912552K2SFFHLSP11800.00.02014FITS
5ktwo201912552K2VARCATHLSP11800.00.02014FITS
\n", + "
" + ], + "text/plain": [ + "K2Search object containing 6 data products target_name pipeline mission campaign exptime distance year \\\n", + "0 ktwo201912552 K2 K2 1 1800.0 0.0 2014 \n", + "1 ktwo201912552 K2 K2 1 1800.0 0.0 2014 \n", + "2 ktwo201912552 EVEREST HLSP 1 1800.0 0.0 2014 \n", + "3 ktwo201912552 EVEREST HLSP 1 1800.0 0.0 2014 \n", + "4 ktwo201912552 K2SFF HLSP 1 1800.0 0.0 2014 \n", + "5 ktwo201912552 K2VARCAT HLSP 1 1800.0 0.0 2014 \n", + "\n", + " description \n", + "0 Target Pixel Long Cadence (KTL) - C01 \n", + "1 Lightcurve Long Cadence (KLC) - C01 \n", + "2 FITS \n", + "3 PDF \n", + "4 FITS \n", + "5 FITS " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "K2 = K2Search(\"K2-18\")\n", "K2" @@ -4100,10 +4287,109 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "7d27bf57", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "K2Search object containing 4 data products
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
target_namepipelinemissioncampaignexptimedistanceyeardescription
0ktwo201912552EVERESTHLSP11800.00.02014FITS
1ktwo201912552EVERESTHLSP11800.00.02014PDF
2ktwo201912552K2SFFHLSP11800.00.02014FITS
3ktwo201912552K2VARCATHLSP11800.00.02014FITS
\n", + "
" + ], + "text/plain": [ + "K2Search object containing 4 data products target_name pipeline mission campaign exptime distance year \\\n", + "0 ktwo201912552 EVEREST HLSP 1 1800.0 0.0 2014 \n", + "1 ktwo201912552 EVEREST HLSP 1 1800.0 0.0 2014 \n", + "2 ktwo201912552 K2SFF HLSP 1 1800.0 0.0 2014 \n", + "3 ktwo201912552 K2VARCAT HLSP 1 1800.0 0.0 2014 \n", + "\n", + " description \n", + "0 FITS \n", + "1 PDF \n", + "2 FITS \n", + "3 FITS " + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Download all lightcurves produced by HLSPs\n", "K2_HLSPs = K2.HLSPs\n", @@ -4112,22 +4398,287 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "c2cd7ee3", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "pipeline products: 100%|██████████████████████████| 3/3 [00:01<00:00, 2.50it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexLocal PathStatusMessageURL
00/Users/tapritc2/.lksearch/cache/mastDownload/H...COMPLETENoneNone
10/Users/tapritc2/.lksearch/cache/mastDownload/H...COMPLETENoneNone
20/Users/tapritc2/.lksearch/cache/mastDownload/H...COMPLETENoneNone
\n", + "
" + ], + "text/plain": [ + " index Local Path Status Message \\\n", + "0 0 /Users/tapritc2/.lksearch/cache/mastDownload/H... COMPLETE None \n", + "1 0 /Users/tapritc2/.lksearch/cache/mastDownload/H... COMPLETE None \n", + "2 0 /Users/tapritc2/.lksearch/cache/mastDownload/H... COMPLETE None \n", + "\n", + " URL \n", + "0 None \n", + "1 None \n", + "2 None " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "manifest = K2_HLSPs.timeseries.download()\n", "manifest" ] }, + { + "cell_type": "markdown", + "id": "a42fd1bd-f70a-4db9-b483-3a39f4787d4c", + "metadata": {}, + "source": [ + "## Configuration and Caching\n", + "\n", + "`lksearch` has a default file download location that serves as the file cache, and an optional configuration file that can be created and used to overwrite the default values" + ] + }, + { + "cell_type": "markdown", + "id": "03f52bd0-c5d3-4974-9b3a-dfd0c3d1a248", + "metadata": {}, + "source": [ + "### lksearch File Download and Cache\n", + "The `lksearch` file cache is a directory where files are downloaded to. This directory also serves as a cache directory, and if a file matching the name of the file to be downloaded exists we treat this as a cached file and by default do not overwrite the current file on disk. \n", + "\n", + "The default file download and cache directory is located at:\n", + "`~/.lksearch/cache`\n", + "\n", + "This can be verified using the get_cache_dir convenience function in the config sub-module, e.g.:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "5ebda13a-0051-48c7-8272-e6aa3040f4b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/tapritc2/.lksearch/cache'" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from lksearch import config as lkconfig\n", + "lkconfig.get_cache_dir()" + ] + }, + { + "cell_type": "markdown", + "id": "e70e062e-d0f8-45e0-b01a-06cec8da54e5", + "metadata": {}, + "source": [ + "#### Clearing the Cache & Corrupted Files\n", + "If you wish to delete an individual file that you downloaded (for example, if you are concerned that a previously downloaded file is corrupted), the easiest way to do that is using the `Local Path` information in the manifest returned by the `.download()` function." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "6e6625e2-c1c2-40fb-8fd2-fa3633cf3722", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "# The manifest returned by download() is a pandas DataFrame\n", + "# We will access the first local path using iloc as so\n", + "os.remove(manifest.iloc[0][\"Local Path\"])" + ] + }, + { + "cell_type": "markdown", + "id": "7ffa2d24-eba6-48fd-9f59-12a348dcb08d", + "metadata": {}, + "source": [ + "If you want to clear *everything* from your cache, you can use the `config.clearcache()` function to completely empty your cache of downloaded files. by default this will run in \"test\" mode and print what you will be deleting. To confirm deletion, run with `test=False` optional parameter. " + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "1345bf88-74be-4aea-96c9-512ab31b349b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running in test mode, rerun with test=False to clear cache\n", + "removing /Users/tapritc2/.lksearch/cache/mastDownload/TESS\n", + "removing /Users/tapritc2/.lksearch/cache/mastDownload/K2\n", + "removing /Users/tapritc2/.lksearch/cache/mastDownload/Kepler\n", + "removing /Users/tapritc2/.lksearch/cache/mastDownload/TESSCut\n", + "removing /Users/tapritc2/.lksearch/cache/mastDownload/HLSP\n" + ] + } + ], + "source": [ + "lkconfig.clearcache()" + ] + }, + { + "cell_type": "markdown", + "id": "6291a748-1e67-41b1-8049-57aabe2b744b", + "metadata": {}, + "source": [ + "**Passing `test=False` will then fully delete the above directories** \n", + "\n", + "e.g. `lkconfig.clearcache(test=False)`" + ] + }, + { + "cell_type": "markdown", + "id": "9adc419e-91b3-41bc-bc40-d37b5c628ebe", + "metadata": {}, + "source": [ + "### lksearch Configuration file\n", + "lksearch also has an optional configuration file that is built on-top of `~astropy.config` using `~astropy.config.ConfigNamespace`. This file does not exist by default, but a default version can be created using the `config.create_config_file` helper function. " + ] + }, { "cell_type": "code", - "execution_count": null, - "id": "7dfa491c", + "execution_count": 33, + "id": "345bb5e5-7b13-4ee8-9a08-c85de036ace4", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "lkconfig.create_config_file(overwrite = True)" + ] + }, + { + "cell_type": "markdown", + "id": "1561451f-d3c0-4df9-bdb9-343b63e0f136", + "metadata": {}, + "source": [ + "This file can be found in the below location. To edit this, please see the astropy.config documentation. " + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "be7bc33c-eb6c-4ce7-81f8-d98076581cc4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/tapritc2/.lksearch/config'" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lkconfig.get_config_dir()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "341857e6-aa78-4cc6-b68d-79fca23f7e63", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/Users/tapritc2/.lksearch/config/lksearch.cfg'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lkconfig.get_config_file()" + ] } ], "metadata": { diff --git a/src/lksearch/K2Search.py b/src/lksearch/K2Search.py index df0517a..1026dfa 100644 --- a/src/lksearch/K2Search.py +++ b/src/lksearch/K2Search.py @@ -16,12 +16,10 @@ from .utils import SearchError, SearchWarning, suppress_stdout from .MASTSearch import MASTSearch -from . import PACKAGEDIR, PREFER_CLOUD, DOWNLOAD_CLOUD, conf, config +from . import PACKAGEDIR, conf, config pd.options.display.max_rows = 10 -default_download_dir = config.get_cache_dir() - log = logging.getLogger(__name__) diff --git a/src/lksearch/KeplerSearch.py b/src/lksearch/KeplerSearch.py index b133769..629bb37 100644 --- a/src/lksearch/KeplerSearch.py +++ b/src/lksearch/KeplerSearch.py @@ -16,12 +16,10 @@ from .utils import SearchError, SearchWarning, suppress_stdout from .MASTSearch import MASTSearch -from . import PACKAGEDIR, PREFER_CLOUD, DOWNLOAD_CLOUD, conf, config +from . import PACKAGEDIR, conf, config pd.options.display.max_rows = 10 -default_download_dir = config.get_cache_dir() - log = logging.getLogger(__name__) diff --git a/src/lksearch/MASTSearch.py b/src/lksearch/MASTSearch.py index f9f5891..d6b24ad 100644 --- a/src/lksearch/MASTSearch.py +++ b/src/lksearch/MASTSearch.py @@ -21,8 +21,6 @@ pd.options.display.max_rows = 10 -default_download_dir = config.get_cache_dir() - log = logging.getLogger(__name__) @@ -145,6 +143,10 @@ def __getitem__(self, key): if all(isinstance(n, str) for n in key): strlist = True + if hasattr(key, "__iter__") or isinstance(key, pd.Series): + if len(key) == len(self.table): + return self._mask(key) + if isinstance(key, (slice, int)) or (intlist): if not intlist: mask = np.in1d( @@ -153,14 +155,11 @@ def __getitem__(self, key): else: mask = np.in1d(self.table.index, key) return self._mask(mask) - if isinstance(key, (str, pd.Series)) or strlist: + if isinstance(key, str) or strlist: # Return a column as a series, or a dataframe of columns # Note that we're not returning a Search Object here as # we havce additional Requiered columns, etc. return self.table[key] - if hasattr(key, "__iter__"): - if len(key) == len(self.table): - return self._mask(key) @property def ra(self): @@ -202,7 +201,7 @@ def uris(self): """Location Information of the products in the table""" uris = self.table["dataURI"].values - if config.PREFER_CLOUD: + if conf.PREFER_CLOUD: cloud_uris = self.cloud_uris mask = cloud_uris != None uris[mask] = cloud_uris[mask] @@ -217,10 +216,10 @@ def cloud_uris(self): ~numpy.array of URI's from ~astroquery.mast an array where each element is the cloud-URI of a product in self.table """ - Observations.enable_cloud_dataset() - return np.asarray( - Observations.get_cloud_uris(Table.from_pandas(self.table), full_url=True) - ) + if "cloud_uri" not in self.table.columns: + self.table = self._add_s3_url_column(self.table) + + return self.table["cloud_uri"] @property def timeseries(self): @@ -323,8 +322,9 @@ def _searchtable_from_table( def _mask(self, mask): """Masks down the product and observation tables given an input mask, then returns them as a new Search object. deepcopy is used to preserve the class metadata stored in class variables""" + new_MASTSearch = deepcopy(self) - new_MASTSearch.table = self.table[mask].reset_index() + new_MASTSearch.table = new_MASTSearch.table[mask].reset_index(drop=True) return new_MASTSearch @@ -382,7 +382,7 @@ def _fix_table_times(self, joint_table: pd.DataFrame): """ if isinstance(joint_table.index, pd.MultiIndex): # Multi-Index leading to issues, re-index? - joint_table = joint_table.reset_index() + joint_table = joint_table.reset_index(drop=True) year = np.floor(Time(joint_table["t_min"], format="mjd").decimalyear) # `t_min` is incorrect for Kepler pipeline products, so we extract year from the filename for those @@ -513,11 +513,14 @@ def _add_s3_url_column(self, joint_table: pd.DataFrame) -> pd.DataFrame: input dataframe with a column added which countaings the cloud uris of assosciated producs """ + logging.getLogger("astroquery").setLevel(log.getEffectiveLevel()) + Observations.enable_cloud_dataset() cloud_uris = Observations.get_cloud_uris( - Table.from_pandas(joint_table), full_url=True + Table.from_pandas(joint_table.loc[pd.notna(joint_table["dataURI"])]), + full_url=True, ) - joint_table["cloud_uri"] = cloud_uris + joint_table.loc[pd.notna(joint_table["dataURI"]), "cloud_uri"] = cloud_uris return joint_table def _search_obs( @@ -926,17 +929,38 @@ def _download_one( """ # Make sure astroquery uses the same level of verbosity - print(log.getEffectiveLevel()) logging.getLogger("astropy").setLevel(log.getEffectiveLevel()) logging.getLogger("astroquery").setLevel(log.getEffectiveLevel()) - manifest = Observations.download_products( - Table().from_pandas(row.to_frame(name=" ").transpose()), - download_dir=download_dir, - cache=cache, - cloud_only=cloud_only, - ) - return manifest.to_pandas() + # We don't want to query cloud_uri if we don't have to + # First check to see if we're not downloading on a cloud platform + # If not - cloud_uris should have already been queried - in that case + # check to see if a cloud_uri exists, if so we just pass that + + download = True + if not conf.DOWNLOAD_CLOUD: + if pd.notna(row["cloud_uri"]): + download = False + if conf.DOWNLOAD_CLOUD or download: + print(cloud_only) + manifest = Observations.download_products( + Table().from_pandas(row.to_frame(name=" ").transpose()), + download_dir=download_dir, + cache=cache, + cloud_only=cloud_only, + ) + manifest = manifest.to_pandas() + else: + manifest = pd.DataFrame( + { + "Local Path": [row["cloud_uri"]], + "Status": ["COMPLETE"], + "Message": ["Link to S3 bucket for remote read"], + "URL": [None], + } + ) + + return manifest def filter_table( self, @@ -961,8 +985,8 @@ def download( self, cloud: bool = True, cache: bool = True, - cloud_only: bool = False, - download_dir: str = default_download_dir, + cloud_only: bool = conf.CLOUD_ONLY, + download_dir: str = config.get_cache_dir(), remove_incomplete: str = True, ) -> pd.DataFrame: """downloads products in self.table to the local hard-drive @@ -978,7 +1002,7 @@ def download( download only products availaible in the cloud, by default False download_dir : str, optional directory where the products should be downloaded to, - by default default_download_dir + by default `~lksearch.config.get_cache_dir` remove_incomplete: str, optional remove files with a status not "COMPLETE" in the manifest, by default True Returns @@ -996,6 +1020,9 @@ def download( logging.getLogger("astroquery").setLevel(log.getEffectiveLevel()) Observations.enable_cloud_dataset() + if (not conf.DOWNLOAD_CLOUD) and ("cloud_uri" not in self.table.columns): + self.table = self._add_s3_url_column(self.table) + manifest = [ self._download_one(row, cloud_only, cache, download_dir) for _, row in tqdm( @@ -1003,7 +1030,6 @@ def download( total=self.table.shape[0], desc="pipeline products", ) - # for _, row in self.table.iterrows() ] manifest = pd.concat(manifest) @@ -1020,5 +1046,5 @@ def download( warnings.warn(f"Removed {file}", SearchWarning) else: warnings.warn(f"Not a file: {file}", SearchWarning) - + manifest = manifest.reset_index(drop=True) return manifest diff --git a/src/lksearch/TESSSearch.py b/src/lksearch/TESSSearch.py index 07210e9..dbbf3f0 100644 --- a/src/lksearch/TESSSearch.py +++ b/src/lksearch/TESSSearch.py @@ -19,11 +19,12 @@ from .utils import SearchError, SearchWarning, suppress_stdout from .MASTSearch import MASTSearch -from . import PACKAGEDIR, PREFER_CLOUD, DOWNLOAD_CLOUD, conf, config +from . import PACKAGEDIR, conf, config -pd.options.display.max_rows = 10 +PREFER_CLOUD = conf.PREFER_CLOUD +DOWNLOAD_CLOUD = conf.DOWNLOAD_CLOUD -default_download_dir = config.get_cache_dir() +pd.options.display.max_rows = 10 log = logging.getLogger(__name__) @@ -427,10 +428,10 @@ def filter_table( def download( self, - cloud: PREFER_CLOUD = True, - cache: PREFER_CLOUD = True, - cloud_only: PREFER_CLOUD = False, - download_dir: PACKAGEDIR = default_download_dir, + cloud: bool = conf.PREFER_CLOUD, + cache: bool = True, + cloud_only: bool = conf.CLOUD_ONLY, + download_dir: str = config.get_cache_dir(), # TESScut_product="SPOC", TESScut_size=10, ): @@ -441,7 +442,7 @@ def download( mast_mf = super().download(cloud, cache, cloud_only, download_dir) elif "TESScut" in self.table.provenance_name.unique(): - TESSCut_dir = f"{default_download_dir}/mastDownload/TESSCut" + TESSCut_dir = f"{download_dir}/mastDownload/TESSCut" if not os.path.isdir(TESSCut_dir): os.makedirs(TESSCut_dir) mask = self.table["provenance_name"] == "TESScut" @@ -459,7 +460,7 @@ def download( # Uncomment when astroquery 0.4.8 is released to enable TICA support # product=TESScut_product, # verbose=False - path=f"{default_download_dir}/mastDownload/TESSCut", + path=f"{download_dir}/mastDownload/TESSCut", inflate=True, moving_target=False, # this could be added mt_type=None, @@ -469,7 +470,7 @@ def download( sector_list, total=len(sector_list), desc="TESScut " ) ] - if len(mast_mf) != 0: + if len(np.atleast_1d(mast_mf)) != 0: manifest = mast_mf if len(tesscut_mf) != 0: diff --git a/src/lksearch/__init__.py b/src/lksearch/__init__.py index b3daefa..9683d63 100644 --- a/src/lksearch/__init__.py +++ b/src/lksearch/__init__.py @@ -5,8 +5,6 @@ import os PACKAGEDIR = os.path.abspath(os.path.dirname(__file__)) -PREFER_CLOUD = True # Do you prefer URIs pointing to the Amazon bucket when available? -DOWNLOAD_CLOUD = True from .version import __version__ @@ -30,6 +28,12 @@ class Conf(_config.ConfigNamespace): cache_dir Default cache directory for data files downloaded, etc. Defaults to ``~/.lksearch/cache`` if not specified. + PREFER_CLOUD + Use Cloud-based data product retrieval where available (primarily Amazon S3 buckets for MAST holdings) + + DOWNLOAD_CLOUD + Download cloud based data. If False, download() will return a pointer to the cloud based data instead of + downloading it - intended usage for cloud-based science platforms (e.g. TIKE) """ # Note: when using list or string_list datatype, @@ -42,7 +46,7 @@ class Conf(_config.ConfigNamespace): [], "List of extra columns to be included when displaying a SearchResult object.", cfgtype="string_list", - module="lksearch.search", + module="lksearch", ) cache_dir = _config.ConfigItem( @@ -52,6 +56,28 @@ class Conf(_config.ConfigNamespace): module="lksearch.config", ) + CLOUD_ONLY = _config.ConfigItem( + False, + "Only Download cloud based data." + "If False, will download all data" + "If True, will only download data located on a cloud (Amazon S3) bucket", + cfgtype="boolean", + ) + + PREFER_CLOUD = _config.ConfigItem( + True, + "Prefer Cloud-based data product retrieval where available", + cfgtype="boolean", + ) + + DOWNLOAD_CLOUD = _config.ConfigItem( + True, + "Download cloud based data." + "If False, download() will return a pointer to the cloud based data" + "instead of downloading it - intended usage for cloud-based science platforms (e.g. TIKE)", + cfgtype="boolean", + ) + conf = Conf() diff --git a/src/lksearch/config/__init__.py b/src/lksearch/config/__init__.py index cd631f4..87de222 100644 --- a/src/lksearch/config/__init__.py +++ b/src/lksearch/config/__init__.py @@ -3,19 +3,16 @@ import glob import shutil -import astropy.config as astropyconfig - +import astropy.config as _config ROOTNAME = "lksearch" -PREFER_CLOUD = True # Do you prefer URIs pointing to the Amazon bucket when available? -DOWNLOAD_CLOUD = True -class ConfigNamespace(astropyconfig.ConfigNamespace): +class ConfigNamespace(_config.ConfigNamespace): rootname = ROOTNAME -class ConfigItem(astropyconfig.ConfigItem): +class ConfigItem(_config.ConfigItem): rootname = ROOTNAME @@ -35,7 +32,30 @@ def get_config_dir(): The absolute path to the configuration directory. """ - return astropyconfig.get_config_dir(ROOTNAME) + return _config.get_config_dir(ROOTNAME) + + +def get_config_file(): + return f"{get_config_dir()}/{ROOTNAME}.cfg" + + +def create_config_file(overwrite: bool = False): + """Creates a default configuration file in the config directory""" + + from .. import conf + + # check if config file exists + path_to_config_file = get_config_file() + cfg_exists = os.path.isfile(path_to_config_file) + + if not cfg_exists or (cfg_exists and overwrite): + with open(path_to_config_file, "w", encoding="utf-8") as f: + for item in conf.items(): + f.write(f"## {item[1].description} \n") + f.write(f"# {item[0]} = {item[1].defaultvalue} \n") + f.write("\n") + else: + log.error("Config file exists and overwrite set to {overwrite}") def get_cache_dir(): @@ -62,7 +82,7 @@ def get_cache_dir(): cache_dir = conf.cache_dir if cache_dir is None or cache_dir == "": - cache_dir = astropyconfig.get_cache_dir(ROOTNAME) + cache_dir = _config.get_cache_dir(ROOTNAME) cache_dir = _ensure_cache_dir_exists(cache_dir) cache_dir = os.path.abspath(cache_dir) diff --git a/tests/test_search.py b/tests/test_search.py index 9a0863a..4a34d82 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -4,6 +4,8 @@ import pytest from numpy.testing import assert_almost_equal, assert_array_equal +import numpy as np + import tempfile from requests import HTTPError @@ -21,6 +23,7 @@ from lksearch.utils import SearchError, SearchWarning from lksearch import MASTSearch, TESSSearch, KeplerSearch, K2Search +from lksearch import conf def test_search_cubedata(): @@ -442,3 +445,34 @@ def test_tesscut(): assert len(results.cubedata) == 3 manifest = results.cubedata[2].download() assert len(manifest) == 1 + + +def test_tess_clouduris(): + """regression test - do tesscut/nan's in dataURI column break cloud uri fetching""" + toi = TESSSearch("TOI 1161", sector=14) + # 17 products should be returned + assert len(toi.cloud_uris) == 17 + # 5 of them should have cloud uris + assert np.sum((toi.cloud_uris.values != None).astype(int)) == 5 + + +def test_tess_return_clouduri_not_download(): + """Test to see if we return a S3 bucket instead of downloading if + `~conf.DOWNLOAD_CLOUD` = False + """ + # reload the config, set download_cloud = False + conf.reload() + conf.DOWNLOAD_CLOUD = False + # Try to download a file without a S3 bucket, and one with + # Search for TESS data only. This by default includes both HLSPs and FFI cutouts. + toi = TESSSearch("TOI 1161", sector=14) + uris = toi.dvreports.cloud_uris + not_cloud = pd.isna(uris) + # A DV Report is not on the cloud - this should still get downloaded locally + dvr = toi.dvreports[not_cloud] + dvr_man = dvr[0].download() + assert os.path.isfile(dvr_man["Local Path"][0]) + # A SPOC TPF is on the cloud, this should return a S3 bucket + mask = toi.timeseries.pipeline == "SPOC" + lc_man = toi.timeseries[mask].download() + assert lc_man["Local Path"][0][0:5] == "s3://" From 2b78947888a8cf90c940d7858a55edd5387b325c Mon Sep 17 00:00:00 2001 From: Tyler Pritchard Date: Tue, 21 May 2024 17:53:35 -0400 Subject: [PATCH 2/2] updated docs --- README.rst | 11 ++++++++--- docs/apidoc.rst | 3 +++ docs/index.rst | 1 + src/lksearch/MASTSearch.py | 6 ++++-- src/lksearch/TESSSearch.py | 29 ++++++++++++++++++++++++++++- src/lksearch/__init__.py | 2 +- src/lksearch/config/__init__.py | 10 +++++++++- 7 files changed, 54 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index 52c74c3..4bfa7d0 100644 --- a/README.rst +++ b/README.rst @@ -79,9 +79,10 @@ Usage Contributing ============ -We welcome community contributions! +**lksearch** is an open-source, community driven package. +We welcome users to contribute and develop new features for lksearch. -Guidelines TBD +For further information, please see the `Lightkurve Community guidelines `_. .. @@ -92,7 +93,11 @@ Citing If you find **lksearch** useful in your research, please cite it and give us a GitHub star! -Citation Instructions TBD +If you use Lightkurve for work or research presented in a publication, we request the following acknowledgment or citation: + +`This research made use of Lightkurve, a Python package for Kepler and TESS data analysis (Lightkurve Collaboration, 2018).` + +See full citation instuctions, including dependencies, in the `Lightkurve documentation `_. .. diff --git a/docs/apidoc.rst b/docs/apidoc.rst index 467a86e..04ac0d1 100644 --- a/docs/apidoc.rst +++ b/docs/apidoc.rst @@ -13,4 +13,7 @@ API documentation :members: .. autoclass:: lksearch.TESSSearch + :members: + +.. automodule:: lksearch.config :members: \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 1334cff..64171e2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,6 +2,7 @@ sphinx-quickstart on Mon Apr 22 14:10:27 2024. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. + ######## lksearch ######## diff --git a/src/lksearch/MASTSearch.py b/src/lksearch/MASTSearch.py index 70bc49c..ef245ca 100644 --- a/src/lksearch/MASTSearch.py +++ b/src/lksearch/MASTSearch.py @@ -17,7 +17,7 @@ from .utils import SearchError, SearchWarning, suppress_stdout -from . import PACKAGEDIR, PREFER_CLOUD, DOWNLOAD_CLOUD, conf, config +from . import PACKAGEDIR, conf, config pd.options.display.max_rows = 10 @@ -1006,17 +1006,19 @@ def download( download only products availaible in the cloud, by default False download_dir : str, optional directory where the products should be downloaded to, - by default default_download_dir + by default default_download_dir cache : bool, optional passed to `~astroquery.mast.Observations.download_products`, by default True if False, will overwrite the file to be downloaded (for example to replace a corrrupted file) remove_incomplete: str, optional remove files with a status not "COMPLETE" in the manifest, by default True + Returns ------- ~pandas.DataFrame table where each row is an ~astroquery.mast.Observations.download_products() manifest + """ if len(self.table) == 0: diff --git a/src/lksearch/TESSSearch.py b/src/lksearch/TESSSearch.py index 2c3912c..3ca833e 100644 --- a/src/lksearch/TESSSearch.py +++ b/src/lksearch/TESSSearch.py @@ -433,8 +433,35 @@ def download( cloud_only: bool = conf.CLOUD_ONLY, download_dir: str = config.get_cache_dir(), # TESScut_product="SPOC", - TESScut_size: int = 10, + TESScut_size: Union[int, tuple] = 10, ): + """downloads products in self.table to the local hard-drive + + Parameters + ---------- + cloud : bool, optional + enable cloud (as opposed to MAST) downloading, by default True + cloud_only : bool, optional + download only products availaible in the cloud, by default False + download_dir : str, optional + directory where the products should be downloaded to, + by default default_download_dir + cache : bool, optional + passed to `~astroquery.mast.Observations.download_products`, by default True + if False, will overwrite the file to be downloaded (for example to replace a corrrupted file) + remove_incomplete: str, optional + remove files with a status not "COMPLETE" in the manifest, by default True + TESScut_size : Union[int, tuple], optional, + The size of a TESScut FFI cutout in pixels + + Returns + ------- + ~pandas.DataFrame + table where each row is an ~astroquery.mast.Observations.download_products() + manifest + + """ + mast_mf = [] tesscut_mf = [] manifest = [] diff --git a/src/lksearch/__init__.py b/src/lksearch/__init__.py index 9683d63..5ec3292 100644 --- a/src/lksearch/__init__.py +++ b/src/lksearch/__init__.py @@ -20,7 +20,7 @@ class Conf(_config.ConfigNamespace): The attributes listed below are the available configuration parameters. - Attributes + Parameters ---------- search_result_display_extra_columns List of extra columns to be included when displaying a SearchResult object. diff --git a/src/lksearch/config/__init__.py b/src/lksearch/config/__init__.py index 87de222..0586a8d 100644 --- a/src/lksearch/config/__init__.py +++ b/src/lksearch/config/__init__.py @@ -76,7 +76,7 @@ def get_cache_dir(): cachedir : str The absolute path to the cache directory. - See :ref:`configuration ` for more information. + See `~lksearch.Conf` for more information. """ from .. import conf @@ -108,6 +108,14 @@ def _ensure_cache_dir_exists(cache_dir): def clearcache(test=True): + """Deletes all downloaded files in the lksearch download directory + + Parameters + ---------- + test : bool, optional + perform this in test mode, printing what folders will be deleted, by default True. + Set test=False to delete cache + """ # Check to see if default download dir/mastDownload exists mastdir = f"{get_cache_dir()}/mastDownload" if os.path.isdir(mastdir):