From a8a46ffe17051b36d13b5066a5f79633aee57739 Mon Sep 17 00:00:00 2001 From: Fabian Wolf Date: Fri, 19 Jul 2024 16:20:00 +0200 Subject: [PATCH 1/4] Support readonly wsi dir --- stamp/preprocessing/wsi_norm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stamp/preprocessing/wsi_norm.py b/stamp/preprocessing/wsi_norm.py index 9036ec5..8dc6fec 100755 --- a/stamp/preprocessing/wsi_norm.py +++ b/stamp/preprocessing/wsi_norm.py @@ -41,6 +41,8 @@ def lock_file(slide_path: Path): Path(f"{slide_path}.lock").touch() except PermissionError: pass # No write permissions for wsi directory + except OSError: + pass # No write permissions for wsi directory try: yield finally: @@ -53,6 +55,9 @@ def test_wsidir_write_permissions(wsi_dir: Path): except PermissionError: logging.warning("No write permissions for wsi directory! If multiple stamp processes are running " "in parallel, the final summary may show an incorrect number of slides processed.") + except OSError: + logging.warning("No write permissions for wsi directory! If multiple stamp processes are running " + "in parallel, the final summary may show an incorrect number of slides processed.") finally: clean_lockfile(testfile) From 18dcc4d4d50cd89360753c98a8172b70d6f81436 Mon Sep 17 00:00:00 2001 From: Fabian Wolf Date: Fri, 19 Jul 2024 16:30:40 +0200 Subject: [PATCH 2/4] Add preload_wsi option to load wsi files into a tmp directory first. --- stamp/cli.py | 3 ++- stamp/config.yaml | 1 + stamp/preprocessing/wsi_norm.py | 14 ++++++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/stamp/cli.py b/stamp/cli.py index ea8b1f9..43955e9 100755 --- a/stamp/cli.py +++ b/stamp/cli.py @@ -173,7 +173,8 @@ def run_cli(args: argparse.Namespace): only_feature_extraction=c.only_feature_extraction, keep_dir_structure=c.keep_dir_structure if 'keep_dir_structure' in c else False, device=c.device, - normalization_template=normalization_template_path + normalization_template=normalization_template_path, + preload_wsi=c.preload_wsi if 'preload_wsi' in c else False ) case "train": require_configs( diff --git a/stamp/config.yaml b/stamp/config.yaml index bf56d51..8cc5bd7 100644 --- a/stamp/config.yaml +++ b/stamp/config.yaml @@ -17,6 +17,7 @@ preprocessing: only_feature_extraction: false # Only perform feature extraction (intermediate images (background rejected, [normalized]) have to exist) cores: 8 # CPU cores to use device: cuda:0 # device to run feature extraction on (cpu, cuda, cuda:0, etc.) + preload_wsi: true # Preload the whole-slide image into temporary directory (helpful in case the slides are accessed over network) modeling: clini_table: # Path to clini_table file (.xlsx or .csv) diff --git a/stamp/preprocessing/wsi_norm.py b/stamp/preprocessing/wsi_norm.py index 8dc6fec..3fb3a89 100755 --- a/stamp/preprocessing/wsi_norm.py +++ b/stamp/preprocessing/wsi_norm.py @@ -11,6 +11,7 @@ from contextlib import contextmanager import logging import os +import shutil import openslide from tqdm import tqdm import PIL @@ -19,6 +20,7 @@ from datetime import timedelta from pathlib import Path from random import shuffle +import tempfile import torch from typing import Optional from .helpers import stainNorm_Macenko @@ -72,7 +74,7 @@ def save_image(image, path: Path): def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Path, norm: bool, del_slide: bool, only_feature_extraction: bool, cache: bool = True, cores: int = 8, target_microns: int = 256, patch_size: int = 224, keep_dir_structure: bool = False, - device: str = "cuda", normalization_template: Path = None, feat_extractor: str = "ctp"): + device: str = "cuda", normalization_template: Path = None, feat_extractor: str = "ctp", preload_wsi: bool = False): # Clean up potentially old leftover .lock files for lockfile in wsi_dir.glob("**/*.lock"): if time.time() - os.path.getmtime(lockfile) > 20: @@ -174,7 +176,12 @@ def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Pat print(f"Loaded {img_name}, {len(canny_norm_patch_list)}/{total} tiles remain") else: try: - slide = openslide.OpenSlide(slide_url) + if preload_wsi: + slide_url_tmp = tempfile.NamedTemporaryFile(delete=False) + shutil.copy(slide_url, slide_url_tmp.name) + slide = openslide.OpenSlide(slide_url_tmp.name) + else: + slide = openslide.OpenSlide(slide_url) except openslide.lowlevel.OpenSlideUnsupportedFormatError: logging.error("Unsupported format for slide, continuing...") error_slides.append(slide_name) @@ -240,6 +247,9 @@ def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Pat print("Deleting slide from local folder...") if os.path.exists(slide_url): os.remove(slide_url) + + if preload_wsi: + os.remove(slide_url_tmp.name) print(f"\nExtracting {model_name} features from slide...") start_time = time.time() From ce0205e6a5c6d4a7b8c2b961a67ca1dc86df4293 Mon Sep 17 00:00:00 2001 From: Fabian Wolf <44507699+FWao@users.noreply.github.com> Date: Thu, 22 Aug 2024 12:46:12 +0200 Subject: [PATCH 3/4] Preprocessing: Make wsi preloading compatible with .mrsx files --- stamp/preprocessing/wsi_norm.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/stamp/preprocessing/wsi_norm.py b/stamp/preprocessing/wsi_norm.py index 3fb3a89..ace4377 100755 --- a/stamp/preprocessing/wsi_norm.py +++ b/stamp/preprocessing/wsi_norm.py @@ -177,9 +177,18 @@ def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Pat else: try: if preload_wsi: - slide_url_tmp = tempfile.NamedTemporaryFile(delete=False) - shutil.copy(slide_url, slide_url_tmp.name) - slide = openslide.OpenSlide(slide_url_tmp.name) + slide_tmp_dir = Path(tempfile.mkdtemp()) + slide_tmp_file = slide_tmp_dir / slide_url.name + + shutil.copy(slide_url, slide_tmp_file) + + # Some slide formats (.mrsx) come with an additional directory which needs to be transferred as well + slide_folder_name = slide_url.with_suffix('') + if slide_folder_name.is_dir(): + slide_folder_tmp = slide_tmp_dir / slide_folder_name.name + shutil.copytree(slide_folder_name, slide_folder_tmp) + + slide = openslide.OpenSlide(slide_tmp_file) else: slide = openslide.OpenSlide(slide_url) except openslide.lowlevel.OpenSlideUnsupportedFormatError: @@ -249,7 +258,7 @@ def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Pat os.remove(slide_url) if preload_wsi: - os.remove(slide_url_tmp.name) + shutil.rmtree(slide_tmp_dir) print(f"\nExtracting {model_name} features from slide...") start_time = time.time() From ac39c02775943301cec11aaa2d0036749779ea4c Mon Sep 17 00:00:00 2001 From: Fabian Wolf Date: Mon, 9 Sep 2024 14:12:55 +0200 Subject: [PATCH 4/4] wsi_norm: Fix temporary directory not deleted in case of an exception --- stamp/preprocessing/wsi_norm.py | 150 ++++++++++++++++---------------- 1 file changed, 74 insertions(+), 76 deletions(-) diff --git a/stamp/preprocessing/wsi_norm.py b/stamp/preprocessing/wsi_norm.py index ace4377..1dbff95 100755 --- a/stamp/preprocessing/wsi_norm.py +++ b/stamp/preprocessing/wsi_norm.py @@ -175,90 +175,88 @@ def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Pat canny_norm_patch_list, coords_list, total = process_slide_jpg(slide_jpg) print(f"Loaded {img_name}, {len(canny_norm_patch_list)}/{total} tiles remain") else: - try: - if preload_wsi: - slide_tmp_dir = Path(tempfile.mkdtemp()) - slide_tmp_file = slide_tmp_dir / slide_url.name + with tempfile.TemporaryDirectory() as temp_dir: + try: + if preload_wsi: + slide_tmp_dir = Path(temp_dir) + slide_tmp_file = slide_tmp_dir / slide_url.name - shutil.copy(slide_url, slide_tmp_file) - - # Some slide formats (.mrsx) come with an additional directory which needs to be transferred as well - slide_folder_name = slide_url.with_suffix('') - if slide_folder_name.is_dir(): - slide_folder_tmp = slide_tmp_dir / slide_folder_name.name - shutil.copytree(slide_folder_name, slide_folder_tmp) + shutil.copy(slide_url, slide_tmp_file) + + # Some slide formats (.mrsx) come with an additional directory which needs to be transferred as well + slide_folder_name = slide_url.with_suffix('') + if slide_folder_name.is_dir(): + slide_folder_tmp = slide_tmp_dir / slide_folder_name.name + shutil.copytree(slide_folder_name, slide_folder_tmp) - slide = openslide.OpenSlide(slide_tmp_file) - else: - slide = openslide.OpenSlide(slide_url) - except openslide.lowlevel.OpenSlideUnsupportedFormatError: - logging.error("Unsupported format for slide, continuing...") - error_slides.append(slide_name) - continue - except Exception as e: - logging.error(f"Failed loading slide, continuing... Error: {e}") - error_slides.append(slide_name) - continue + slide = openslide.OpenSlide(slide_tmp_file) + else: + slide = openslide.OpenSlide(slide_url) + except openslide.lowlevel.OpenSlideUnsupportedFormatError: + logging.error("Unsupported format for slide, continuing...") + error_slides.append(slide_name) + continue + except Exception as e: + logging.error(f"Failed loading slide, continuing... Error: {e}") + error_slides.append(slide_name) + continue - start_time = time.time() - try: - slide_array = load_slide(slide=slide, target_mpp=target_mpp, cores=cores) - except MPPExtractionError: - if del_slide: - logging.error("MPP missing in slide metadata, deleting slide and continuing...") - if os.path.exists(slide_url): - os.remove(slide_url) - else: - logging.error("MPP missing in slide metadata, continuing...") - error_slides.append(slide_name) - continue - except openslide.lowlevel.OpenSlideError as e: - print("") - logging.error(f"Failed loading slide, continuing... Error: {e}") - error_slides.append(slide_name) - continue + start_time = time.time() + try: + slide_array = load_slide(slide=slide, target_mpp=target_mpp, cores=cores) + except MPPExtractionError: + if del_slide: + logging.error("MPP missing in slide metadata, deleting slide and continuing...") + if os.path.exists(slide_url): + os.remove(slide_url) + else: + logging.error("MPP missing in slide metadata, continuing...") + error_slides.append(slide_name) + continue + except openslide.lowlevel.OpenSlideError as e: + print("") + logging.error(f"Failed loading slide, continuing... Error: {e}") + error_slides.append(slide_name) + continue - # Remove .SVS from memory - del slide - print(f"\nLoaded slide: {time.time() - start_time:.2f} seconds") - print(f"\nSize of WSI: {slide_array.shape}") - - if cache: - # Save raw .svs jpg - raw_image = PIL.Image.fromarray(slide_array) - save_image(raw_image, slide_cache_dir/"slide.jpg") + # Remove .SVS from memory + del slide + print(f"\nLoaded slide: {time.time() - start_time:.2f} seconds") + print(f"\nSize of WSI: {slide_array.shape}") + + if cache: + # Save raw .svs jpg + raw_image = PIL.Image.fromarray(slide_array) + save_image(raw_image, slide_cache_dir/"slide.jpg") - #Do edge detection here and reject unnecessary tiles BEFORE normalisation - bg_reject_array, rejected_tile_array, patch_shapes = reject_background(img=slide_array, patch_size=patch_shape, step=step_size, cores=cores) + #Do edge detection here and reject unnecessary tiles BEFORE normalisation + bg_reject_array, rejected_tile_array, patch_shapes = reject_background(img=slide_array, patch_size=patch_shape, step=step_size, cores=cores) - start_time = time.time() - # Pass raw slide_array for getting the initial concentrations, bg_reject_array for actual normalisation - if norm: - print(f"Normalising slide...") - canny_img, img_norm_wsi_jpg, canny_norm_patch_list, coords_list = normalizer.transform(slide_array, bg_reject_array, - rejected_tile_array, patch_shapes, cores=cores) - print(f"\nNormalised slide: {time.time() - start_time:.2f} seconds") - if cache: - save_image(img_norm_wsi_jpg, slide_cache_dir/"norm_slide.jpg") - else: - canny_img, canny_norm_patch_list, coords_list = get_raw_tile_list(slide_array.shape, bg_reject_array, - rejected_tile_array, patch_shapes) + start_time = time.time() + # Pass raw slide_array for getting the initial concentrations, bg_reject_array for actual normalisation + if norm: + print(f"Normalising slide...") + canny_img, img_norm_wsi_jpg, canny_norm_patch_list, coords_list = normalizer.transform(slide_array, bg_reject_array, + rejected_tile_array, patch_shapes, cores=cores) + print(f"\nNormalised slide: {time.time() - start_time:.2f} seconds") + if cache: + save_image(img_norm_wsi_jpg, slide_cache_dir/"norm_slide.jpg") + else: + canny_img, canny_norm_patch_list, coords_list = get_raw_tile_list(slide_array.shape, bg_reject_array, + rejected_tile_array, patch_shapes) - if cache: - print("Saving Canny background rejected image...") - save_image(canny_img, slide_cache_dir/"canny_slide.jpg") + if cache: + print("Saving Canny background rejected image...") + save_image(canny_img, slide_cache_dir/"canny_slide.jpg") - # Remove original slide jpg from memory - del slide_array - - # Optionally remove the original slide from harddrive - if del_slide: - print("Deleting slide from local folder...") - if os.path.exists(slide_url): - os.remove(slide_url) - - if preload_wsi: - shutil.rmtree(slide_tmp_dir) + # Remove original slide jpg from memory + del slide_array + + # Optionally remove the original slide from harddrive + if del_slide: + print("Deleting slide from local folder...") + if os.path.exists(slide_url): + os.remove(slide_url) print(f"\nExtracting {model_name} features from slide...") start_time = time.time()