diff --git a/src/otx/core/config/__init__.py b/src/otx/core/config/__init__.py index cfaed2264b4..f3244449f73 100644 --- a/src/otx/core/config/__init__.py +++ b/src/otx/core/config/__init__.py @@ -21,7 +21,7 @@ def as_int_tuple(*args) -> tuple[int, ...]: YAML file example:: ```yaml - mem_cache_img_max_size: ${as_int_tuple:500,500} + mem_cache_img_min_size: ${as_int_tuple:500,500} ``` """ return tuple(int(arg) for arg in args) diff --git a/src/otx/core/data/dataset/action_classification.py b/src/otx/core/data/dataset/action_classification.py index 23391984423..05ca5b3cda3 100644 --- a/src/otx/core/data/dataset/action_classification.py +++ b/src/otx/core/data/dataset/action_classification.py @@ -32,7 +32,7 @@ def __init__( dm_subset: DatasetSubset, transforms: Transforms, mem_cache_handler: MemCacheHandlerBase = NULL_MEM_CACHE_HANDLER, - mem_cache_img_max_size: tuple[int, int] | None = None, + mem_cache_img_min_size: tuple[int, int] | None = None, max_refetch: int = 1000, image_color_channel: ImageColorChannel = ImageColorChannel.BGR, stack_images: bool = True, @@ -42,7 +42,7 @@ def __init__( dm_subset, transforms, mem_cache_handler, - mem_cache_img_max_size, + mem_cache_img_min_size, max_refetch, image_color_channel, stack_images, diff --git a/src/otx/core/data/dataset/anomaly.py b/src/otx/core/data/dataset/anomaly.py index b776ccda911..7ece094ba0d 100644 --- a/src/otx/core/data/dataset/anomaly.py +++ b/src/otx/core/data/dataset/anomaly.py @@ -40,7 +40,7 @@ def __init__( dm_subset: DmDataset, transforms: Transforms, mem_cache_handler: MemCacheHandlerBase = NULL_MEM_CACHE_HANDLER, - mem_cache_img_max_size: tuple[int, int] | None = None, + mem_cache_img_min_size: tuple[int, int] | None = None, max_refetch: int = 1000, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, @@ -51,7 +51,7 @@ def __init__( dm_subset, transforms, mem_cache_handler, - mem_cache_img_max_size, + mem_cache_img_min_size, max_refetch, image_color_channel, stack_images, @@ -79,7 +79,7 @@ def _get_item_impl( image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ), @@ -101,7 +101,7 @@ def _get_item_impl( image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ), @@ -124,7 +124,7 @@ def _get_item_impl( image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ), diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py index a98f7c6083b..6e3a537ed88 100644 --- a/src/otx/core/data/dataset/base.py +++ b/src/otx/core/data/dataset/base.py @@ -66,7 +66,7 @@ class OTXDataset(Dataset, Generic[T_OTXDataEntity]): dm_subset: Datumaro subset of a dataset transforms: Transforms to apply on images mem_cache_handler: Handler of the images cache - mem_cache_img_max_size: Max size of images to put in cache + mem_cache_img_min_size: Minimum size of images to put in cache max_refetch: Maximum number of images to fetch in cache image_color_channel: Color channel of images stack_images: Whether or not to stack images in collate function in OTXBatchData entity. @@ -78,7 +78,7 @@ def __init__( dm_subset: DatasetSubset, transforms: Transforms, mem_cache_handler: MemCacheHandlerBase = NULL_MEM_CACHE_HANDLER, - mem_cache_img_max_size: tuple[int, int] | None = None, + mem_cache_img_min_size: tuple[int, int] | None = None, max_refetch: int = 1000, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, @@ -87,7 +87,7 @@ def __init__( self.dm_subset = dm_subset self.transforms = transforms self.mem_cache_handler = mem_cache_handler - self.mem_cache_img_max_size = mem_cache_img_max_size + self.mem_cache_img_min_size = mem_cache_img_min_size self.max_refetch = max_refetch self.image_color_channel = image_color_channel self.stack_images = stack_images @@ -142,10 +142,13 @@ def __getitem__(self, index: int) -> T_OTXDataEntity: raise RuntimeError(msg) def _get_img_data_and_shape(self, img: Image) -> tuple[np.ndarray, tuple[int, int]]: + """Get image and original image shape from the memory cache.""" key = img.path if isinstance(img, ImageFromFile) else id(img) + img_shape = img.size - if (img_data := self.mem_cache_handler.get(key=key)[0]) is not None: - return img_data, img_data.shape[:2] + img_data, _ = self.mem_cache_handler.get(key=key) + if img_data is not None: + return img_data, img_shape or img_data.shape[:2] with image_decode_context(): img_data = ( @@ -160,13 +163,13 @@ def _get_img_data_and_shape(self, img: Image) -> tuple[np.ndarray, tuple[int, in img_data = self._cache_img(key=key, img_data=img_data.astype(np.uint8)) - return img_data, img_data.shape[:2] + return img_data, img_shape or img_data.shape[:2] def _cache_img(self, key: str | int, img_data: np.ndarray) -> np.ndarray: """Cache an image after resizing. If there is available space in the memory pool, the input image is cached. - Before caching, the input image is resized if it is larger than the maximum image size + Before caching, the input image is resized if it is larger than the minimum image size specified by the memory caching handler. Otherwise, the input image is directly cached. After caching, the processed image data is returned. @@ -181,21 +184,21 @@ def _cache_img(self, key: str | int, img_data: np.ndarray) -> np.ndarray: if self.mem_cache_handler.frozen: return img_data - if self.mem_cache_img_max_size is None: + if self.mem_cache_img_min_size is None: self.mem_cache_handler.put(key=key, data=img_data, meta=None) return img_data height, width = img_data.shape[:2] - max_height, max_width = self.mem_cache_img_max_size + min_height, min_width = self.mem_cache_img_min_size - if height <= max_height and width <= max_width: + if height <= min_height or width <= min_width: self.mem_cache_handler.put(key=key, data=img_data, meta=None) return img_data - # Preserve the image size ratio and fit to max_height or max_width - # e.g. (1000 / 2000 = 0.5, 1000 / 1000 = 1.0) => 0.5 - # h, w = 2000 * 0.5 => 1000, 1000 * 0.5 => 500, bounded by max_height - min_scale = min(max_height / height, max_width / width) + # Preserve the image size ratio and fit to min_height or min_width + # e.g. (1000 / 2000 = 0.5, 1000 / 4000 = 0.25) => 0.5 + # h, w = 2000 * 0.5 => 1000, 4000 * 0.5 => 2000, bounded by min_height + min_scale = max(min_height / height, min_width / width) new_height, new_width = int(min_scale * height), int(min_scale * width) resized_img = cv2.resize( src=img_data, diff --git a/src/otx/core/data/dataset/classification.py b/src/otx/core/data/dataset/classification.py index 57170da967b..48a429fec5b 100644 --- a/src/otx/core/data/dataset/classification.py +++ b/src/otx/core/data/dataset/classification.py @@ -43,7 +43,7 @@ def _get_item_impl(self, index: int) -> MulticlassClsDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ), @@ -78,7 +78,7 @@ def _get_item_impl(self, index: int) -> MultilabelClsDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ignored_labels=ignored_labels, @@ -186,7 +186,7 @@ def _get_item_impl(self, index: int) -> HlabelClsDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ignored_labels=ignored_labels, diff --git a/src/otx/core/data/dataset/detection.py b/src/otx/core/data/dataset/detection.py index 8094638b457..f6ec98c70db 100644 --- a/src/otx/core/data/dataset/detection.py +++ b/src/otx/core/data/dataset/detection.py @@ -40,7 +40,7 @@ def _get_item_impl(self, index: int) -> DetDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ignored_labels=ignored_labels, diff --git a/src/otx/core/data/dataset/instance_segmentation.py b/src/otx/core/data/dataset/instance_segmentation.py index 0a3abaeb877..6a075f12456 100644 --- a/src/otx/core/data/dataset/instance_segmentation.py +++ b/src/otx/core/data/dataset/instance_segmentation.py @@ -66,7 +66,7 @@ def _get_item_impl(self, index: int) -> InstanceSegDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ignored_labels=ignored_labels, diff --git a/src/otx/core/data/dataset/keypoint_detection.py b/src/otx/core/data/dataset/keypoint_detection.py index 2b8edbe9d46..1322deca849 100644 --- a/src/otx/core/data/dataset/keypoint_detection.py +++ b/src/otx/core/data/dataset/keypoint_detection.py @@ -34,7 +34,7 @@ def __init__( dm_subset: DatasetSubset, transforms: Transforms, mem_cache_handler: MemCacheHandlerBase = NULL_MEM_CACHE_HANDLER, - mem_cache_img_max_size: tuple[int, int] | None = None, + mem_cache_img_min_size: tuple[int, int] | None = None, max_refetch: int = 1000, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, @@ -44,7 +44,7 @@ def __init__( dm_subset, transforms, mem_cache_handler, - mem_cache_img_max_size, + mem_cache_img_min_size, max_refetch, image_color_channel, stack_images, @@ -112,7 +112,7 @@ def _get_item_impl(self, index: int) -> KeypointDetDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ignored_labels=ignored_labels, diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py index 363a15e84cc..ae77c0e7f4b 100644 --- a/src/otx/core/data/dataset/segmentation.py +++ b/src/otx/core/data/dataset/segmentation.py @@ -161,7 +161,7 @@ def __init__( dm_subset: DmDataset, transforms: Transforms, mem_cache_handler: MemCacheHandlerBase = NULL_MEM_CACHE_HANDLER, - mem_cache_img_max_size: tuple[int, int] | None = None, + mem_cache_img_min_size: tuple[int, int] | None = None, max_refetch: int = 1000, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, @@ -172,7 +172,7 @@ def __init__( dm_subset, transforms, mem_cache_handler, - mem_cache_img_max_size, + mem_cache_img_min_size, max_refetch, image_color_channel, stack_images, @@ -214,7 +214,7 @@ def _get_item_impl(self, index: int) -> SegDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, image_color_channel=self.image_color_channel, ignored_labels=ignored_labels, diff --git a/src/otx/core/data/dataset/tile.py b/src/otx/core/data/dataset/tile.py index a1fcee48621..5d6420cacf0 100644 --- a/src/otx/core/data/dataset/tile.py +++ b/src/otx/core/data/dataset/tile.py @@ -212,7 +212,7 @@ def __init__(self, dataset: OTXDataset, tile_config: TileConfig) -> None: dataset.dm_subset, dataset.transforms, dataset.mem_cache_handler, - dataset.mem_cache_img_max_size, + dataset.mem_cache_img_min_size, dataset.max_refetch, ) self.tile_config = tile_config @@ -354,7 +354,7 @@ def _get_item_impl(self, index: int) -> TileDetDataEntity: # type: ignore[overr tile_attr_list=tile_attrs, ori_img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, ), ori_bboxes=tv_tensors.BoundingBoxes( @@ -456,7 +456,7 @@ def _get_item_impl(self, index: int) -> TileInstSegDataEntity: # type: ignore[o tile_attr_list=tile_attrs, ori_img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, ), ori_bboxes=tv_tensors.BoundingBoxes( diff --git a/src/otx/core/data/dataset/visual_prompting.py b/src/otx/core/data/dataset/visual_prompting.py index 0047e9350fe..3360f4d5ad1 100644 --- a/src/otx/core/data/dataset/visual_prompting.py +++ b/src/otx/core/data/dataset/visual_prompting.py @@ -147,7 +147,7 @@ def _get_item_impl(self, index: int) -> VisualPromptingDataEntity | None: image=img_data, img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, ), masks=None, @@ -270,7 +270,7 @@ def _get_item_impl(self, index: int) -> ZeroShotVisualPromptingDataEntity | None image=to_image(img_data), img_info=ImageInfo( img_idx=index, - img_shape=img_shape, + img_shape=img_data.shape[:2], ori_shape=img_shape, ), masks=masks, diff --git a/src/otx/core/data/factory.py b/src/otx/core/data/factory.py index 4ca229aacb1..31d86112d92 100644 --- a/src/otx/core/data/factory.py +++ b/src/otx/core/data/factory.py @@ -73,7 +73,7 @@ def create( # noqa: PLR0911 dm_subset: DmDataset, cfg_subset: SubsetConfig, mem_cache_handler: MemCacheHandlerBase, - mem_cache_img_max_size: tuple[int, int] | None = None, + mem_cache_img_min_size: tuple[int, int] | None = None, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, include_polygons: bool = False, @@ -86,7 +86,7 @@ def create( # noqa: PLR0911 "dm_subset": dm_subset, "transforms": transforms, "mem_cache_handler": mem_cache_handler, - "mem_cache_img_max_size": mem_cache_img_max_size, + "mem_cache_img_min_size": mem_cache_img_min_size, "image_color_channel": image_color_channel, "stack_images": stack_images, "to_tv_image": cfg_subset.to_tv_image, diff --git a/src/otx/core/data/mem_cache.py b/src/otx/core/data/mem_cache.py index 7d34867fbaa..dab2c9f1c1a 100644 --- a/src/otx/core/data/mem_cache.py +++ b/src/otx/core/data/mem_cache.py @@ -82,9 +82,9 @@ class MemCacheHandlerBase: It will be combined with LoadImageFromOTXDataset to store/retrieve the samples in memory. """ - def __init__(self, mem_size: int, mem_cache_img_max_size: tuple[int, int] | None = None): + def __init__(self, mem_size: int, mem_cache_img_min_size: tuple[int, int] | None = None): self._mem_size = mem_size - self._mem_cache_img_max_size = mem_cache_img_max_size + self._mem_cache_img_max_size = mem_cache_img_min_size self._init_data_structs(mem_size) def _init_data_structs(self, mem_size: int) -> None: @@ -110,7 +110,7 @@ def mem_size(self) -> int: return len(self._arr) @property - def mem_cache_img_max_size(self) -> tuple[int, int] | None: + def mem_cache_img_min_size(self) -> tuple[int, int] | None: """Get the image max size in mem cache.""" return self._mem_cache_img_max_size diff --git a/src/otx/core/data/module.py b/src/otx/core/data/module.py index 06f62f1c614..9b103abec36 100644 --- a/src/otx/core/data/module.py +++ b/src/otx/core/data/module.py @@ -66,7 +66,7 @@ def __init__( # noqa: PLR0913 tile_config: TileConfig = TileConfig(enable_tiler=False), vpm_config: VisualPromptingConfig = VisualPromptingConfig(), # noqa: B008 mem_cache_size: str = "1GB", - mem_cache_img_max_size: tuple[int, int] | None = None, + mem_cache_img_min_size: tuple[int, int] | None = None, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, include_polygons: bool = False, @@ -93,7 +93,7 @@ def __init__( # noqa: PLR0913 self.vpm_config = vpm_config self.mem_cache_size = mem_cache_size - self.mem_cache_img_max_size = mem_cache_img_max_size + self.mem_cache_img_min_size = mem_cache_img_min_size self.image_color_channel = image_color_channel self.stack_images = stack_images @@ -146,9 +146,18 @@ def __init__( # noqa: PLR0913 for subset_cfg in [train_subset, val_subset, test_subset, unlabeled_subset]: if subset_cfg.input_size is None: subset_cfg.input_size = input_size + + if self.mem_cache_img_min_size is None: + self.mem_cache_img_min_size = ( + (input_size, input_size) # type: ignore[assignment] + if isinstance(input_size, int) + else tuple(input_size) + ) + self.input_size = input_size if self.tile_config.enable_tiler and self.tile_config.enable_adaptive_tiling: + self.mem_cache_img_min_size = None adapt_tile_config(self.tile_config, dataset=dataset) config_mapping = { @@ -193,7 +202,7 @@ def __init__( # noqa: PLR0913 dm_subset=dm_subset.as_dataset(), cfg_subset=config_mapping[name], mem_cache_handler=mem_cache_handler, - mem_cache_img_max_size=mem_cache_img_max_size, + mem_cache_img_min_size=self.mem_cache_img_min_size, image_color_channel=image_color_channel, stack_images=stack_images, include_polygons=include_polygons, @@ -231,7 +240,7 @@ def __init__( # noqa: PLR0913 dm_subset=dm_subset, cfg_subset=unlabeled_config, mem_cache_handler=mem_cache_handler, - mem_cache_img_max_size=mem_cache_img_max_size, + mem_cache_img_min_size=self.mem_cache_img_min_size, image_color_channel=image_color_channel, stack_images=stack_images, include_polygons=include_polygons, @@ -245,7 +254,7 @@ def __init__( # noqa: PLR0913 dm_subset=dm_subset.as_dataset(), cfg_subset=self.unlabeled_subset, mem_cache_handler=mem_cache_handler, - mem_cache_img_max_size=mem_cache_img_max_size, + mem_cache_img_min_size=self.mem_cache_img_min_size, image_color_channel=image_color_channel, stack_images=stack_images, include_polygons=include_polygons, @@ -456,7 +465,7 @@ def __reduce__(self): self.tile_config, self.vpm_config, self.mem_cache_size, - self.mem_cache_img_max_size, + self.mem_cache_img_min_size, self.image_color_channel, self.stack_images, self.include_polygons, diff --git a/src/otx/recipe/_base_/data/anomaly.yaml b/src/otx/recipe/_base_/data/anomaly.yaml index 2f74b987915..865ba7dc03b 100644 --- a/src/otx/recipe/_base_/data/anomaly.yaml +++ b/src/otx/recipe/_base_/data/anomaly.yaml @@ -2,7 +2,7 @@ task: ANOMALY_CLASSIFICATION input_size: 256 data_format: mvtec mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB stack_images: false unannotated_items_ratio: 0.0 diff --git a/src/otx/recipe/_base_/data/classification.yaml b/src/otx/recipe/_base_/data/classification.yaml index e8ee41bf15e..288214917d5 100644 --- a/src/otx/recipe/_base_/data/classification.yaml +++ b/src/otx/recipe/_base_/data/classification.yaml @@ -1,7 +1,7 @@ task: MULTI_CLASS_CLS input_size: 224 mem_cache_size: 1GB -mem_cache_img_max_size: +mem_cache_img_min_size: - 500 - 500 image_color_channel: RGB diff --git a/src/otx/recipe/_base_/data/detection.yaml b/src/otx/recipe/_base_/data/detection.yaml index c08a5fea022..cbdb714d9ec 100644 --- a/src/otx/recipe/_base_/data/detection.yaml +++ b/src/otx/recipe/_base_/data/detection.yaml @@ -3,7 +3,7 @@ input_size: - 800 - 992 mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB stack_images: true data_format: coco_instances diff --git a/src/otx/recipe/_base_/data/instance_segmentation.yaml b/src/otx/recipe/_base_/data/instance_segmentation.yaml index 3520f3930a7..3e1261df073 100644 --- a/src/otx/recipe/_base_/data/instance_segmentation.yaml +++ b/src/otx/recipe/_base_/data/instance_segmentation.yaml @@ -3,7 +3,7 @@ input_size: - 1024 - 1024 mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB stack_images: true data_format: coco_instances diff --git a/src/otx/recipe/_base_/data/keypoint_detection.yaml b/src/otx/recipe/_base_/data/keypoint_detection.yaml index bc6bf54540e..89e5db21ef1 100644 --- a/src/otx/recipe/_base_/data/keypoint_detection.yaml +++ b/src/otx/recipe/_base_/data/keypoint_detection.yaml @@ -1,6 +1,6 @@ task: KEYPOINT_DETECTION mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null stack_images: true data_format: coco_person_keypoints unannotated_items_ratio: 0.0 diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml index 52b3dec6f63..bcbc49e784d 100644 --- a/src/otx/recipe/_base_/data/semantic_segmentation.yaml +++ b/src/otx/recipe/_base_/data/semantic_segmentation.yaml @@ -3,7 +3,7 @@ input_size: - 512 - 512 mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB data_format: common_semantic_segmentation_with_subset_dirs include_polygons: true diff --git a/src/otx/recipe/_base_/data/semisl/semantic_segmentation_semisl.yaml b/src/otx/recipe/_base_/data/semisl/semantic_segmentation_semisl.yaml index ec3d647ab39..e86127711c3 100644 --- a/src/otx/recipe/_base_/data/semisl/semantic_segmentation_semisl.yaml +++ b/src/otx/recipe/_base_/data/semisl/semantic_segmentation_semisl.yaml @@ -3,7 +3,7 @@ input_size: - 512 - 512 mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB data_format: common_semantic_segmentation_with_subset_dirs include_polygons: true diff --git a/src/otx/recipe/_base_/data/semisl/torchvision_semisl.yaml b/src/otx/recipe/_base_/data/semisl/torchvision_semisl.yaml index 1b5d630a1ec..4ffb3d2291f 100644 --- a/src/otx/recipe/_base_/data/semisl/torchvision_semisl.yaml +++ b/src/otx/recipe/_base_/data/semisl/torchvision_semisl.yaml @@ -1,7 +1,7 @@ task: MULTI_CLASS_CLS input_size: 224 mem_cache_size: 1GB -mem_cache_img_max_size: +mem_cache_img_min_size: - 500 - 500 image_color_channel: RGB diff --git a/src/otx/recipe/_base_/data/torchvision_base.yaml b/src/otx/recipe/_base_/data/torchvision_base.yaml index ab71ff16e86..d32a4522a39 100644 --- a/src/otx/recipe/_base_/data/torchvision_base.yaml +++ b/src/otx/recipe/_base_/data/torchvision_base.yaml @@ -1,6 +1,6 @@ task: MULTI_CLASS_CLS mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB stack_images: false data_format: imagenet_with_subset_dirs diff --git a/src/otx/recipe/_base_/data/visual_prompting.yaml b/src/otx/recipe/_base_/data/visual_prompting.yaml index f51287efdec..7e50b95e976 100644 --- a/src/otx/recipe/_base_/data/visual_prompting.yaml +++ b/src/otx/recipe/_base_/data/visual_prompting.yaml @@ -3,7 +3,7 @@ input_size: - 1024 - 1024 mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB stack_images: false data_format: coco_instances diff --git a/src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml b/src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml index ef43ad84e60..95309102441 100644 --- a/src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml +++ b/src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml @@ -1,6 +1,6 @@ task: ZERO_SHOT_VISUAL_PROMPTING mem_cache_size: 1GB -mem_cache_img_max_size: null +mem_cache_img_min_size: null image_color_channel: RGB stack_images: false data_format: coco_instances diff --git a/src/otx/recipe/action_classification/movinet.yaml b/src/otx/recipe/action_classification/movinet.yaml index 6964d236cf5..dd0692b5d5e 100644 --- a/src/otx/recipe/action_classification/movinet.yaml +++ b/src/otx/recipe/action_classification/movinet.yaml @@ -30,7 +30,7 @@ data: - 224 data_format: kinetics mem_cache_size: 1GB - mem_cache_img_max_size: + mem_cache_img_min_size: - 500 - 500 image_color_channel: BGR diff --git a/src/otx/recipe/action_classification/x3d.yaml b/src/otx/recipe/action_classification/x3d.yaml index 283daa72b2a..a06bd358a46 100644 --- a/src/otx/recipe/action_classification/x3d.yaml +++ b/src/otx/recipe/action_classification/x3d.yaml @@ -30,7 +30,7 @@ data: - 224 data_format: kinetics mem_cache_size: 1GB - mem_cache_img_max_size: + mem_cache_img_min_size: - 500 - 500 image_color_channel: BGR diff --git a/tests/unit/cli/utils/test_jsonargparse.py b/tests/unit/cli/utils/test_jsonargparse.py index a9f499fff59..f87a67cc694 100644 --- a/tests/unit/cli/utils/test_jsonargparse.py +++ b/tests/unit/cli/utils/test_jsonargparse.py @@ -152,14 +152,14 @@ def test_namespace_override(fxt_configs) -> None: # test for single key override overrides = Namespace( - mem_cache_img_max_size=[100, 100], + mem_cache_img_min_size=[100, 100], stack_images=False, train_subset=Namespace(batch_size=64, num_workers=8), ) namespace_override(configs=cfg, key="data", overrides=overrides, convert_dict_to_namespace=False) - assert cfg.data.mem_cache_img_max_size == overrides.mem_cache_img_max_size + assert cfg.data.mem_cache_img_min_size == overrides.mem_cache_img_min_size assert cfg.data.stack_images == overrides.stack_images assert cfg.data.train_subset.batch_size == overrides.train_subset.batch_size assert cfg.data.train_subset.num_workers == overrides.train_subset.num_workers diff --git a/tests/unit/core/config/test_resolver.py b/tests/unit/core/config/test_resolver.py index 51ecd411ee6..34c2d6c34e4 100644 --- a/tests/unit/core/config/test_resolver.py +++ b/tests/unit/core/config/test_resolver.py @@ -8,11 +8,11 @@ class TestResolver: def test_as_int_tuple(self) -> None: cfg_str = """ - mem_cache_img_max_size: ${as_int_tuple:1333,800} + mem_cache_img_min_size: ${as_int_tuple:1333,800} """ cfg = OmegaConf.create(cfg_str) - assert isinstance(cfg.mem_cache_img_max_size, tuple) - assert cfg.mem_cache_img_max_size == (1333, 800) + assert isinstance(cfg.mem_cache_img_min_size, tuple) + assert cfg.mem_cache_img_min_size == (1333, 800) def test_as_torch_dtype(self) -> None: cfg_str = """ diff --git a/tests/unit/core/data/test_dataset.py b/tests/unit/core/data/test_dataset.py index 44031b96a46..a1a697d8b0d 100644 --- a/tests/unit/core/data/test_dataset.py +++ b/tests/unit/core/data/test_dataset.py @@ -23,7 +23,7 @@ def test_get_item( dataset = dataset_cls( dm_subset=fxt_mock_dm_subset, transforms=[lambda x: x], - mem_cache_img_max_size=None, + mem_cache_img_min_size=None, max_refetch=3, **kwargs, ) @@ -48,17 +48,17 @@ def test_sample_another_idx( dataset = dataset_cls( dm_subset=fxt_mock_dm_subset, transforms=lambda x: x, - mem_cache_img_max_size=None, + mem_cache_img_min_size=None, **kwargs, ) dataset.num_classes = 1 assert dataset._sample_another_idx() < len(dataset) - @pytest.mark.parametrize("mem_cache_img_max_size", [(3, 5), (5, 3)]) + @pytest.mark.parametrize("mem_cache_img_min_size", [(3, 5), (5, 3)]) def test_mem_cache_resize( self, mocker, - mem_cache_img_max_size, + mem_cache_img_min_size, fxt_mem_cache_handler, fxt_dataset_and_data_entity_cls, fxt_mock_dm_subset: MagicMock, @@ -71,7 +71,7 @@ def test_mem_cache_resize( dm_subset=fxt_mock_dm_subset, transforms=lambda x: x, mem_cache_handler=fxt_mem_cache_handler, - mem_cache_img_max_size=mem_cache_img_max_size, + mem_cache_img_min_size=mem_cache_img_min_size, **kwargs, ) dataset.num_classes = 1 @@ -79,7 +79,7 @@ def test_mem_cache_resize( item = dataset[0] # Put in the cache # The returned image should be resized because it was resized before caching - h_expected = w_expected = min(mem_cache_img_max_size) + h_expected = w_expected = max(mem_cache_img_min_size) if dataset_cls != OTXActionClsDataset: # Action classification dataset handle video, not image. assert item.image.shape[:2] == (h_expected, w_expected) assert item.img_info.img_shape == (h_expected, w_expected) @@ -95,7 +95,7 @@ def test_ignore_index(self, fxt_mock_dm_subset): dataset = OTXSegmentationDataset( dm_subset=fxt_mock_dm_subset, transforms=lambda x: x, - mem_cache_img_max_size=None, + mem_cache_img_min_size=None, ignore_index=100, ) @@ -109,7 +109,7 @@ def test_overflown_ignore_index(self, fxt_mock_dm_subset): dataset = OTXSegmentationDataset( dm_subset=fxt_mock_dm_subset, transforms=lambda x: x, - mem_cache_img_max_size=None, + mem_cache_img_min_size=None, ignore_index=65536, ) with pytest.raises( @@ -131,7 +131,7 @@ def test_overflown_label(self, fxt_invalid_label, fxt_mock_dm_subset): dataset = OTXSegmentationDataset( dm_subset=fxt_mock_dm_subset, transforms=lambda x: x, - mem_cache_img_max_size=None, + mem_cache_img_min_size=None, ignore_index=100, ) diff --git a/tests/unit/core/data/test_module.py b/tests/unit/core/data/test_module.py index e5365406ddc..c19f57497e9 100644 --- a/tests/unit/core/data/test_module.py +++ b/tests/unit/core/data/test_module.py @@ -133,6 +133,7 @@ def test_init( assert fxt_config.train_subset.input_size is None assert fxt_config.val_subset.input_size is None assert fxt_config.test_subset.input_size is None + assert module.mem_cache_img_min_size is None def test_init_input_size( self, @@ -148,7 +149,7 @@ def test_init_input_size( fxt_config.val_subset.input_size = None fxt_config.test_subset.input_size = (800, 800) - OTXDataModule( + data_module = OTXDataModule( task=OTXTaskType.MULTI_CLASS_CLS, data_format=fxt_config.data_format, data_root=fxt_config.data_root, @@ -161,6 +162,7 @@ def test_init_input_size( assert fxt_config.train_subset.input_size == (1200, 1200) assert fxt_config.val_subset.input_size == (1200, 1200) assert fxt_config.test_subset.input_size == (800, 800) + assert data_module.mem_cache_img_min_size == (1200, 1200) @pytest.fixture() def mock_adapt_input_size_to_dataset(self, mocker) -> MagicMock: diff --git a/tests/unit/core/data/test_transform_libs.py b/tests/unit/core/data/test_transform_libs.py index 9af540588d0..4f4652e699c 100644 --- a/tests/unit/core/data/test_transform_libs.py +++ b/tests/unit/core/data/test_transform_libs.py @@ -178,7 +178,7 @@ def test_transform( dataset = dataset_cls( dm_subset=fxt_mock_dm_subset, transforms=transform, - mem_cache_img_max_size=None, + mem_cache_img_min_size=None, **kwargs, ) dataset.num_classes = 1 @@ -260,7 +260,7 @@ def test_image_info( dataset = dataset_cls( dm_subset=fxt_mock_dm_subset, transforms=transform, - mem_cache_img_max_size=None, + mem_cache_img_min_size=None, image_color_channel=fxt_image_color_channel, **kwargs, )