diff --git a/las_geoh5/import_files/driver.py b/las_geoh5/import_files/driver.py index 2ad19f8..9787986 100644 --- a/las_geoh5/import_files/driver.py +++ b/las_geoh5/import_files/driver.py @@ -7,41 +7,97 @@ from __future__ import annotations +import logging import sys +from multiprocessing import Pool +from time import time import lasio from geoh5py.shared.utils import fetch_active_workspace from geoh5py.ui_json import InputFile +from tqdm import tqdm from las_geoh5.import_las import LASTranslator, las_to_drillhole +logger = logging.getLogger("Import Files") +logger.setLevel(logging.INFO) +stream_handler = logging.StreamHandler() +stream_handler.setLevel(logging.INFO) +formatter = logging.Formatter("%(asctime)s : %(name)s : %(levelname)s : %(message)s") +stream_handler.setFormatter(formatter) +logger.addHandler(stream_handler) -def run(file: str): - ifile = InputFile.read_ui_json(file) - # TODO: Once fix implemented in geoh5py can revert back to simply pulling - # drillhole group from input file rather that using get_entity. - # dh_group = ifile.data["drillhole_group"] +def elapsed_time_logger(start, end, message): + if message[-1] != ".": + message += "." + + elapsed = end - start + minutes = elapsed // 60 + seconds = elapsed % 60 + + if minutes >= 1: + out = f"{message} Time elapsed: {minutes}m {seconds}s." + else: + out = f"{message} Time elapsed: {seconds:.2f}s." + + return out + + +def run(filepath: str): # pylint: disable=too-many-locals + start = time() + ifile = InputFile.read_ui_json(filepath) + + logger.info( + "Importing las file data to workspace %s.", ifile.data["geoh5"].h5file.stem + ) - name = ifile.data["name"] - files = ifile.data["files"].split(";") - files = [lasio.read(file, mnemonic_case="preserve") for file in files] translator = LASTranslator( depth=ifile.data["depths_name"], collar_x=ifile.data["collar_x_name"], collar_y=ifile.data["collar_y_name"], collar_z=ifile.data["collar_z_name"], ) - with fetch_active_workspace(ifile.data["geoh5"], mode="a") as workspace: - dh_group = ifile.workspace.get_entity(ifile.data["drillhole_group"].uid)[0] - las_to_drillhole(workspace, files, dh_group, name, translator=translator) + begin_reading = time() + with Pool() as pool: + futures = [] + for file in tqdm(ifile.data["files"].split(";"), desc="Reading las files"): + futures.append( + pool.apply_async(lasio.read, (file,), {"mnemonic_case": "preserve"}) + ) + + lasfiles = [future.get() for future in futures] + end_reading = time() + logger.info( + elapsed_time_logger(begin_reading, end_reading, "Finished reading las files") + ) + with fetch_active_workspace(ifile.data["geoh5"], mode="a") as geoh5: + dh_group = geoh5.get_entity(ifile.data["drillhole_group"].uid)[0] + logger.info( + "Saving drillhole data into drillhole group %s under property group %s", + dh_group.name, + ifile.data["name"], + ) + begin_saving = time() + _ = las_to_drillhole( + geoh5, + lasfiles, + dh_group, + ifile.data["name"], + translator=translator, + skip_empty_header=ifile.data["skip_empty_header"], + ) + end_saving = time() + logger.info( + elapsed_time_logger( + begin_saving, end_saving, "Finished saving drillhole data" + ) + ) -def import_las_files(workspace, dh_group, property_group_name, files): - for file in files: - lasfile = lasio.read(file) - las_to_drillhole(workspace, lasfile, dh_group, property_group_name) + end = time() + logger.info(elapsed_time_logger(start, end, "All done.")) if __name__ == "__main__": diff --git a/las_geoh5/import_files/uijson.py b/las_geoh5/import_files/uijson.py index 4df020f..6d4506d 100644 --- a/las_geoh5/import_files/uijson.py +++ b/las_geoh5/import_files/uijson.py @@ -14,6 +14,11 @@ **{ "title": "LAS files to Drillhole group", "run_command": "las_geoh5.import_files.driver", + "name": { + "main": True, + "label": "Name", + "value": "", + }, "files": { "main": True, "label": "Files", @@ -23,12 +28,13 @@ "fileMulti": True, }, "depths_name": { - "main": True, "label": "Depths", "value": "DEPTH", + "group": "Import fields", + "optional": True, + "enabled": False, }, "collar_x_name": { - "main": True, "label": "Collar x", "value": "X", "group": "Import fields", @@ -36,7 +42,6 @@ "enabled": False, }, "collar_y_name": { - "main": True, "label": "Collar y", "value": "Y", "group": "Import fields", @@ -44,12 +49,21 @@ "enabled": False, }, "collar_z_name": { - "main": True, "label": "Collar z", "value": "ELEV", "group": "Import fields", "optional": True, "enabled": False, }, + "skip_empty_header": { + "label": "Skip empty header", + "value": False, + "tooltip": ( + "Importing files without collar information " + "results in drillholes placed at the origin. " + "Check this box to skip these files." + "" + ), + }, } ) diff --git a/las_geoh5/import_las.py b/las_geoh5/import_las.py index 7900fcb..01f2fbf 100644 --- a/las_geoh5/import_las.py +++ b/las_geoh5/import_las.py @@ -8,6 +8,7 @@ import warnings from pathlib import Path +from typing import Any import lasio import numpy as np @@ -85,12 +86,14 @@ def get_depths(lasfile: lasio.LASFile) -> dict[str, np.ndarray]: :return: Depth data as 'from-to' interval or 'depth' locations. """ - if "DEPTH" in lasfile.curves: - depths = lasfile["DEPTH"] - elif "DEPT" in lasfile.curves: - depths = lasfile["DEPT"] - else: - raise KeyError( + depths = None + for name, curve in lasfile.curves.items(): + if name.lower() in ["depth", "dept"]: + depths = curve.data + break + + if depths is None: + raise ValueError( "In order to import data to geoh5py format, .las files " "must contain a depth curve named 'DEPTH' or 'DEPT'." ) @@ -105,9 +108,7 @@ def get_depths(lasfile: lasio.LASFile) -> dict[str, np.ndarray]: return out -def get_collar( - lasfile: lasio.LASFile, translator: LASTranslator | None = None -) -> list | None: +def get_collar(lasfile: lasio.LASFile, translator: LASTranslator | None = None) -> list: """ Returns collar data from las file or None if data missing. @@ -121,8 +122,9 @@ def get_collar( collar = [] for field in ["collar_x", "collar_y", "collar_z"]: + collar_coord = 0.0 try: - collar.append(translator.retrieve(field, lasfile)) + collar_coord = translator.retrieve(field, lasfile) except KeyError: exclusions = ["STRT", "STOP", "STEP", "NULL"] options = [ @@ -137,6 +139,11 @@ def get_collar( f"{options}." ) + collar_coord = 0.0 + + try: + collar.append(float(collar_coord)) + except ValueError: collar.append(0.0) return collar @@ -214,7 +221,8 @@ def add_data( :return: Updated drillhole object. """ - kwargs = get_depths(lasfile) + depths = get_depths(lasfile) + kwargs: dict[str, Any] = {**depths} for curve in [ k for k in lasfile.curves if k.mnemonic not in ["DEPT", "DEPTH", "TO"] ]: @@ -241,7 +249,18 @@ def add_data( if existing_data and isinstance(existing_data, Entity): kwargs["entity_type"] = existing_data.entity_type - drillhole.add_data({name: kwargs}, property_group=property_group) + try: + drillhole.add_data({name: kwargs}, property_group=property_group) + except ValueError as err: + msg = ( + f"ValueError raised trying to add data '{name}' to " + f"drillhole '{drillhole.name}' with message:\n{err.args[0]}." + ) + warnings.warn(msg) + + # TODO: Increment property group name if it already exists and the depth + # Sampling is different. Could try removing the try/except block once + # done and see if error start to appear. return drillhole @@ -260,6 +279,7 @@ def create_or_append_drillhole( :param lasfile: Las file object. :param drillhole_group: Drillhole group container. :param group_name: Property group name. + :param translator: Translator for las file. :return: Created or augmented drillhole. """ @@ -299,14 +319,15 @@ def create_or_append_drillhole( return drillhole -def las_to_drillhole( +def las_to_drillhole( # pylint: disable=too-many-arguments workspace: Workspace, data: lasio.LASFile | list[lasio.LASFile], drillhole_group: DrillholeGroup, property_group: str | None = None, survey: Path | list[Path] | None = None, translator: LASTranslator | None = None, -) -> Drillhole: + skip_empty_header: bool = False, +): """ Import a las file containing collocated datasets for a single drillhole. @@ -315,6 +336,8 @@ def las_to_drillhole( :param drillhole_group: Drillhole group container. :param property_group: Property group name. :param survey: Path to a survey file stored as .csv or .las format. + :param translator: Translator for las file. + :param skip_empty_header: Skip empty header data. :return: A :obj:`geoh5py.objects.Drillhole` object """ @@ -326,7 +349,12 @@ def las_to_drillhole( if translator is None: translator = LASTranslator() + drillhole = None for datum in tqdm(data): + collar = get_collar(datum, translator) + if all(k == 0 for k in collar) and skip_empty_header: + continue + drillhole = create_or_append_drillhole( workspace, datum, drillhole_group, property_group, translator=translator ) diff --git a/las_geoh5/uijson/import_las_files.ui.json b/las_geoh5/uijson/import_las_files.ui.json index 5d15a0c..3159338 100644 --- a/las_geoh5/uijson/import_las_files.ui.json +++ b/las_geoh5/uijson/import_las_files.ui.json @@ -22,10 +22,8 @@ }, "name": { "main": true, - "label": "Property group name", - "value": "", - "optional": true, - "enabled": false + "label": "Name", + "value": "" }, "files": { "main": true, @@ -40,12 +38,13 @@ "fileMulti": true }, "depths_name": { - "main": true, "label": "Depths", - "value": "DEPTH" + "value": "DEPTH", + "group": "Import fields", + "optional": true, + "enabled": false }, "collar_x_name": { - "main": true, "label": "Collar x", "value": "X", "group": "Import fields", @@ -53,7 +52,6 @@ "enabled": false }, "collar_y_name": { - "main": true, "label": "Collar y", "value": "Y", "group": "Import fields", @@ -61,11 +59,15 @@ "enabled": false }, "collar_z_name": { - "main": true, "label": "Collar z", "value": "ELEV", "group": "Import fields", "optional": true, "enabled": false + }, + "skip_empty_header": { + "label": "Skip empty header", + "value": false, + "tooltip": "Importing files without collar information results in drillholes placed at the origin. Check this box to skip these files." } } diff --git a/tests/geoh5_to_las_test.py b/tests/geoh5_to_las_test.py index 8957c6e..227e916 100644 --- a/tests/geoh5_to_las_test.py +++ b/tests/geoh5_to_las_test.py @@ -42,7 +42,7 @@ def test_get_depths(): assert "from-to" in depths and len(depths) == 1 assert np.allclose(depths["from-to"], np.c_[np.arange(0, 10), np.arange(1, 11)]) lasfile = lasio.LASFile() - with pytest.raises(KeyError, match="curve named 'DEPTH' or 'DEPT'."): + with pytest.raises(ValueError, match="curve named 'DEPTH' or 'DEPT'."): get_depths(lasfile) @@ -61,6 +61,28 @@ def test_get_collar(): assert np.allclose(get_collar(lasfile), [10.0, 10.0, 10.0]) +def test_get_collar_not_in_header(): + lasfile = lasio.LASFile() + lasfile.params.append(lasio.HeaderItem(mnemonic="X", value=10.0)) + lasfile.params.append(lasio.HeaderItem(mnemonic="Y", value=10.0)) + lasfile.params.append(lasio.HeaderItem(mnemonic="ELEV", value=10.0)) + collar = get_collar(lasfile) + assert np.allclose(collar, [10.0, 10.0, 10.0]) + + +def test_get_collar_skip_non_float(): + lasfile = lasio.LASFile() + lasfile.well.append(lasio.HeaderItem(mnemonic="X", value="10.0")) + lasfile.well.append(lasio.HeaderItem(mnemonic="Y", value="10.0")) + lasfile.params.append(lasio.HeaderItem(mnemonic="ELEV", value="10.0")) + collar = get_collar(lasfile) + assert np.allclose(collar, [10.0, 10.0, 10.0]) + lasfile.well["X"] = "not a float" + lasfile.params["ELEV"] = "also not a float" + collar = get_collar(lasfile) + assert np.allclose(collar, [0.0, 10.0, 0.0]) + + def test_create_or_append_drillhole(tmp_path): with Workspace.create(Path(tmp_path / "test.geoh5")) as workspace: drillhole_group = DrillholeGroup.create(workspace, name="dh_group") diff --git a/tests/import_las_test.py b/tests/import_las_test.py index 1916eb6..5917bf4 100644 --- a/tests/import_las_test.py +++ b/tests/import_las_test.py @@ -17,6 +17,7 @@ from geoh5py.objects import Drillhole from geoh5py.ui_json import InputFile +from las_geoh5.import_files.driver import elapsed_time_logger from las_geoh5.import_las import LASTranslator @@ -62,6 +63,7 @@ def write_input_file( # pylint: disable=too-many-arguments y_collar_name, z_collar_name, module_name, + skip_empty_header=False, ): basepath = workspace.h5file.parent module = importlib.import_module(f"las_geoh5.{module_name}.uijson") @@ -78,6 +80,7 @@ def write_input_file( # pylint: disable=too-many-arguments "collar_x_name": x_collar_name, "collar_y_name": y_collar_name, "collar_z_name": z_collar_name, + "skip_empty_header": skip_empty_header, } ) ifile.write_ui_json("import_las_files.ui.json", str(basepath)) @@ -256,3 +259,58 @@ def test_las_translator_translate(): assert translator.translate("collar_x") == "UTMX" with pytest.raises(KeyError, match="'not_a_field' is not a recognized field."): translator.translate("not_a_field") + + +def test_elapsed_time_logger(): + msg = elapsed_time_logger(0, 90, "Finished some task") + assert msg == "Finished some task. Time elapsed: 1m 30s." + msg = elapsed_time_logger(0, 59, "Finished another task.") + assert msg == "Finished another task. Time elapsed: 59.00s." + msg = elapsed_time_logger(0, 0.0001, "Done another task.") + assert msg == "Done another task. Time elapsed: 0.00s." + msg = elapsed_time_logger(0, 0.2345, "Boy I'm getting a lot done.") + assert msg == "Boy I'm getting a lot done. Time elapsed: 0.23s." + + +def test_skip_empty_header_option(tmp_path): + with Workspace.create(tmp_path / "test.geoh5") as workspace: + dh_group = DrillholeGroup.create(workspace, name="dh_group") + + files = [ + generate_lasfile( + "dh1", + {"UTMX": 0.0, "UTMY": 0.0, "ELEV": 10.0}, + np.arange(0, 11, 1), + {"my_property": np.zeros(11)}, + ), + generate_lasfile( + "dh2", + {}, + np.arange(0, 11, 1), + {"my_property": np.random.rand(11)}, + ), + ] + lasfiles = write_lasfiles(tmp_path, files) + filepath = write_input_file( + workspace, + dh_group, + "my_property_group", + lasfiles, + "DEPTH", + "UTMX", + "UTMY", + "ELEV", + "import_files", + skip_empty_header=True, + ) + + module = importlib.import_module("las_geoh5.import_files.driver") + getattr(module, "run")(filepath) + + with workspace.open(): + dh1 = workspace.get_entity("dh1")[0] + assert dh1.collar["x"] == 0.0 + assert dh1.collar["y"] == 0.0 + assert dh1.collar["z"] == 10.0 + dh1 = workspace.get_entity("dh2")[0] + assert not dh1