Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preprocessing: Add WSI prefetching #32

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion stamp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ def run_cli(args: argparse.Namespace):
only_feature_extraction=c.only_feature_extraction,
keep_dir_structure=c.keep_dir_structure if 'keep_dir_structure' in c else False,
device=c.device,
normalization_template=normalization_template_path
normalization_template=normalization_template_path,
preload_wsi=c.preload_wsi if 'preload_wsi' in c else False
)
case "train":
require_configs(
Expand Down
1 change: 1 addition & 0 deletions stamp/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ preprocessing:
only_feature_extraction: false # Only perform feature extraction (intermediate images (background rejected, [normalized]) have to exist)
cores: 8 # CPU cores to use
device: cuda:0 # device to run feature extraction on (cpu, cuda, cuda:0, etc.)
preload_wsi: true # Preload the whole-slide image into temporary directory (helpful in case the slides are accessed over network)

modeling:
clini_table: # Path to clini_table file (.xlsx or .csv)
Expand Down
28 changes: 26 additions & 2 deletions stamp/preprocessing/wsi_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from contextlib import contextmanager
import logging
import os
import shutil
import openslide
from tqdm import tqdm
import PIL
Expand All @@ -19,6 +20,7 @@
from datetime import timedelta
from pathlib import Path
from random import shuffle
import tempfile
import torch
from typing import Optional
from .helpers import stainNorm_Macenko
Expand All @@ -41,6 +43,8 @@ def lock_file(slide_path: Path):
Path(f"{slide_path}.lock").touch()
except PermissionError:
pass # No write permissions for wsi directory
except OSError:
pass # No write permissions for wsi directory
try:
yield
finally:
Expand All @@ -53,6 +57,9 @@ def test_wsidir_write_permissions(wsi_dir: Path):
except PermissionError:
logging.warning("No write permissions for wsi directory! If multiple stamp processes are running "
"in parallel, the final summary may show an incorrect number of slides processed.")
except OSError:
logging.warning("No write permissions for wsi directory! If multiple stamp processes are running "
"in parallel, the final summary may show an incorrect number of slides processed.")
finally:
clean_lockfile(testfile)

Expand All @@ -67,7 +74,7 @@ def save_image(image, path: Path):
def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Path, norm: bool,
del_slide: bool, only_feature_extraction: bool, cache: bool = True, cores: int = 8,
target_microns: int = 256, patch_size: int = 224, keep_dir_structure: bool = False,
device: str = "cuda", normalization_template: Path = None, feat_extractor: str = "ctp"):
device: str = "cuda", normalization_template: Path = None, feat_extractor: str = "ctp", preload_wsi: bool = False):
# Clean up potentially old leftover .lock files
for lockfile in wsi_dir.glob("**/*.lock"):
if time.time() - os.path.getmtime(lockfile) > 20:
Expand Down Expand Up @@ -169,7 +176,21 @@ def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Pat
print(f"Loaded {img_name}, {len(canny_norm_patch_list)}/{total} tiles remain")
else:
try:
slide = openslide.OpenSlide(slide_url)
if preload_wsi:
slide_tmp_dir = Path(tempfile.mkdtemp())
slide_tmp_file = slide_tmp_dir / slide_url.name

shutil.copy(slide_url, slide_tmp_file)

# Some slide formats (.mrsx) come with an additional directory which needs to be transferred as well
slide_folder_name = slide_url.with_suffix('')
if slide_folder_name.is_dir():
slide_folder_tmp = slide_tmp_dir / slide_folder_name.name
shutil.copytree(slide_folder_name, slide_folder_tmp)

slide = openslide.OpenSlide(slide_tmp_file)
else:
slide = openslide.OpenSlide(slide_url)
except openslide.lowlevel.OpenSlideUnsupportedFormatError:
logging.error("Unsupported format for slide, continuing...")
error_slides.append(slide_name)
Expand Down Expand Up @@ -235,6 +256,9 @@ def preprocess(output_dir: Path, wsi_dir: Path, model_path: Path, cache_dir: Pat
print("Deleting slide from local folder...")
if os.path.exists(slide_url):
os.remove(slide_url)

if preload_wsi:
shutil.rmtree(slide_tmp_dir)
FWao marked this conversation as resolved.
Show resolved Hide resolved

print(f"\nExtracting {model_name} features from slide...")
start_time = time.time()
Expand Down