Skip to content

Commit

Permalink
Improved DatasetNotFound error message (#7923)
Browse files Browse the repository at this point in the history
The recent changes enhance the dataset import functionality across various dataset formats in the CVAT application by integrating specific importers from the Datumaro library. The updates streamline the detection of datasets, improve error handling, and introduce new tests to ensure robustness against incorrect file structures during import operations.
  • Loading branch information
klakhov authored May 29, 2024
1 parent 0b2f877 commit da71018
Show file tree
Hide file tree
Showing 29 changed files with 181 additions and 27 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Added

- Improved message of DatasetNotFoundError
(<https://github.com/cvat-ai/cvat/pull/7923>)
6 changes: 5 additions & 1 deletion cvat-core/src/server-proxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,11 @@ async function chunkUpload(file: File, uploadConfig): Promise<{ uploadSentSize:
function filterPythonTraceback(data: string): string {
if (typeof data === 'string' && data.trim().startsWith('Traceback')) {
const lastRow = data.split('\n').findLastIndex((str) => str.trim().length);
return `${data.split('\n').slice(lastRow, lastRow + 1)[0]}`;
let errorText = `${data.split('\n').slice(lastRow, lastRow + 1)[0]}`;
if (errorText.includes('CvatDatasetNotFoundError')) {
errorText = errorText.replace(/.*CvatDatasetNotFoundError: /, '');
}
return errorText;
}

return data;
Expand Down
7 changes: 6 additions & 1 deletion cvat-ui/src/components/cvat-app.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -375,13 +375,18 @@ class CVATApplication extends React.PureComponent<CVATAppProps & RouteComponentP
function showError(title: string, _error: Error, shouldLog?: boolean, className?: string): void {
const error = _error?.message || _error.toString();
const dynamicProps = typeof className === 'undefined' ? {} : { className };
let errorLength = error.length;
// Do not count the length of the link in the Markdown error message
if (/]\(.+\)/.test(error)) {
errorLength = error.replace(/]\(.+\)/, ']').length;
}
notification.error({
...dynamicProps,
message: (
<ReactMarkdown>{title}</ReactMarkdown>
),
duration: null,
description: error.length > 300 ? 'Open the Browser Console to get details' : <ReactMarkdown>{error}</ReactMarkdown>,
description: errorLength > 300 ? 'Open the Browser Console to get details' : <ReactMarkdown>{error}</ReactMarkdown>,
});

if (shouldLog) {
Expand Down
48 changes: 48 additions & 0 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from __future__ import annotations

import os.path as osp
import re
import sys
from collections import namedtuple
from functools import reduce
Expand All @@ -22,8 +23,12 @@
import rq
from attr import attrib, attrs
from datumaro.components.media import PointCloud
from datumaro.components.environment import Environment
from datumaro.components.extractor import Importer
from datumaro.components.format_detection import RejectionReason
from django.db.models import QuerySet
from django.utils import timezone
from django.conf import settings

from cvat.apps.dataset_manager.formats.utils import get_label_color
from cvat.apps.dataset_manager.util import add_prefetch_fields
Expand Down Expand Up @@ -1664,6 +1669,33 @@ def GetCVATDataExtractor(
class CvatImportError(Exception):
pass

@attrs
class CvatDatasetNotFoundError(CvatImportError):
message: str = ""
reason: str = ""
format_name: str = ""
_docs_base_url = f"{settings.CVAT_DOCS_URL}/manual/advanced/formats/"

def __str__(self) -> str:
formatted_format_name = self._format_name_for_docs()
docs_message = self._docs_message(formatted_format_name)
display_message = self._clean_display_message()
return f"{docs_message}. {display_message}"

def _format_name_for_docs(self) -> str:
return self.format_name.replace("_", "-")

def _docs_message(self, formatted_format_name: str) -> str:
return f"Check [format docs]({self._docs_base_url}format-{formatted_format_name})"

def _clean_display_message(self) -> str:
message = re.sub(r'^.*?:', "", self.message)
if "dataset must contain a file matching pattern" in message:
message = message.replace("dataset must contain a file matching pattern", "")
message = message.replace("\n", "")
message = "Dataset must contain a file:" + message
return re.sub(r' +', " ", message)

def mangle_image_name(name: str, subset: str, names: DefaultDict[Tuple[str, str], int]) -> str:
name, ext = name.rsplit(osp.extsep, maxsplit=1)

Expand Down Expand Up @@ -2265,3 +2297,19 @@ def load_dataset_data(project_annotation, dataset: dm.Dataset, project_data):
dataset_files['data_root'] = osp.commonpath(root_paths) + osp.sep

project_annotation.add_task(task_fields, dataset_files, project_data)

def detect_dataset(dataset_dir: str, format_name: str, importer: Importer) -> None:
not_found_error_instance = CvatDatasetNotFoundError()

def not_found_error(_, reason, human_message):
not_found_error_instance.format_name = format_name
not_found_error_instance.reason = reason
not_found_error_instance.message = human_message

detection_env = Environment()
detection_env.importers.items.clear()
detection_env.importers.register(format_name, importer)
detected = detection_env.detect_dataset(dataset_dir, depth=4, rejection_callback=not_found_error)

if not detected and not_found_error_instance.reason != RejectionReason.detection_unsupported:
raise not_found_error_instance
3 changes: 3 additions & 0 deletions cvat/apps/dataset_manager/formats/camvid.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

# We do not run detect_dataset before import because the Camvid format
# has problem with the dataset detection in case of empty annotation file(s)
# Details in: https://github.com/cvat-ai/datumaro/issues/43
dataset = Dataset.import_from(temp_dir, 'camvid', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/cityscapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from datumaro.plugins.cityscapes_format import write_label_map
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

Expand Down Expand Up @@ -43,6 +43,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs
for label, info in make_colormap(instance_data).items()}
write_label_map(labelmap_file, colormap)

detect_dataset(temp_dir, format_name='cityscapes', importer= dm_env.importers.get('cityscapes'))
dataset = Dataset.import_from(temp_dir, 'cityscapes', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
Expand Down
7 changes: 6 additions & 1 deletion cvat/apps/dataset_manager/formats/coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@

from datumaro.components.dataset import Dataset
from datumaro.components.annotation import AnnotationType
from datumaro.plugins.coco_format.importer import CocoImporter

from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, \
from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \
import_dm_annotations
from cvat.apps.dataset_manager.util import make_zip_archive

Expand All @@ -27,6 +28,8 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(temp_dir)
# We use coco importer because it gives better error message
detect_dataset(temp_dir, format_name='coco', importer=CocoImporter)
dataset = Dataset.import_from(temp_dir, 'coco_instances', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
Expand Down Expand Up @@ -55,6 +58,8 @@ def remove_extra_annotations(dataset):

if zipfile.is_zipfile(src_file):
zipfile.ZipFile(src_file).extractall(temp_dir)
# We use coco importer because it gives better error message
detect_dataset(temp_dir, format_name='coco', importer=CocoImporter)
dataset = Dataset.import_from(temp_dir, 'coco_person_keypoints', env=dm_env)
remove_extra_annotations(dataset)
if load_data_callback is not None:
Expand Down
5 changes: 4 additions & 1 deletion cvat/apps/dataset_manager/formats/cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
from datumaro.components.dataset import Dataset, DatasetItem
from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, Extractor,
Importer)
from datumaro.plugins.cvat_format.extractor import CvatImporter as _CvatImporter

from datumaro.util.image import Image
from defusedxml import ElementTree

from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData,
from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData, detect_dataset,
get_defaulted_subset,
import_dm_annotations,
match_dm_item)
Expand Down Expand Up @@ -1439,6 +1441,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs
zipfile.ZipFile(src_file).extractall(temp_dir)

if isinstance(instance_data, ProjectData):
detect_dataset(temp_dir, format_name='cvat', importer=_CvatImporter)
dataset = Dataset.import_from(temp_dir, 'cvat', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
Expand Down
5 changes: 4 additions & 1 deletion cvat/apps/dataset_manager/formats/datumaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import ItemTransform
from datumaro.util.image import Image

from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive
from cvat.apps.engine.models import DimensionType
Expand Down Expand Up @@ -37,6 +38,7 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro'))
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)

if load_data_callback is not None:
Expand All @@ -61,6 +63,7 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro'))
dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env)

if load_data_callback is not None:
Expand Down
11 changes: 11 additions & 0 deletions cvat/apps/dataset_manager/formats/icdar.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def _export_recognition(dst_file, temp_dir, instance_data, save_images=False):
@importer(name='ICDAR Recognition', ext='ZIP', version='1.0')
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)

# We do not run detect_dataset before import because the ICDAR format
# has problem with the dataset detection in case of empty annotation file(s)
# Details in: https://github.com/cvat-ai/datumaro/issues/43
dataset = Dataset.import_from(temp_dir, 'icdar_word_recognition', env=dm_env)
dataset.transform(CaptionToLabel, label='icdar')
if load_data_callback is not None:
Expand All @@ -107,6 +111,9 @@ def _export_localization(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)

# We do not run detect_dataset before import because the ICDAR format
# has problem with the dataset detection in case of empty annotation file(s)
# Details in: https://github.com/cvat-ai/datumaro/issues/43
dataset = Dataset.import_from(temp_dir, 'icdar_text_localization', env=dm_env)
dataset.transform(AddLabelToAnns, label='icdar')
if load_data_callback is not None:
Expand All @@ -129,6 +136,10 @@ def _export_segmentation(dst_file, temp_dir, instance_data, save_images=False):
@importer(name='ICDAR Segmentation', ext='ZIP', version='1.0')
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)

# We do not run detect_dataset before import because the ICDAR format
# has problem with the dataset detection in case of empty annotation file(s)
# Details in: https://github.com/cvat-ai/datumaro/issues/43
dataset = Dataset.import_from(temp_dir, 'icdar_text_segmentation', env=dm_env)
dataset.transform(AddLabelToAnns, label='icdar')
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
Expand Down
4 changes: 4 additions & 0 deletions cvat/apps/dataset_manager/formats/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
@importer(name='ImageNet', ext='ZIP', version='1.0')
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)

# We do not run detect_dataset before import because the Imagenet format
# has problem with the dataset detection in case of empty annotation file(s)
# Details in: https://github.com/cvat-ai/datumaro/issues/43
if glob(osp.join(temp_dir, '*.txt')):
dataset = Dataset.import_from(temp_dir, 'imagenet_txt', env=dm_env)
else:
Expand Down
4 changes: 3 additions & 1 deletion cvat/apps/dataset_manager/formats/kitti.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

from datumaro.components.dataset import Dataset
from datumaro.plugins.kitti_format.format import KittiPath, write_label_map

from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, import_dm_annotations)
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons
Expand Down Expand Up @@ -41,6 +42,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs
if not osp.isfile(color_map_path):
write_label_map(color_map_path, color_map)

detect_dataset(temp_dir, format_name='kitti', importer=dm_env.importers.get('kitti'))
dataset = Dataset.import_from(temp_dir, format='kitti', env=dm_env)
labels_meta = instance_data.meta[instance_data.META_FIELD]['labels']
if 'background' not in [label['name'] for _, label in labels_meta]:
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/labelme.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
import_dm_annotations)
from cvat.apps.dataset_manager.formats.transformations import MaskToPolygonTransformation
from cvat.apps.dataset_manager.util import make_zip_archive
Expand All @@ -26,6 +26,7 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

detect_dataset(temp_dir, format_name='label_me', importer=dm_env.importers.get('label_me'))
dataset = Dataset.import_from(temp_dir, 'label_me', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/lfw.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

Expand All @@ -17,6 +17,7 @@
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

detect_dataset(temp_dir, format_name='lfw', importer=dm_env.importers.get('lfw'))
dataset = Dataset.import_from(temp_dir, 'lfw')
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/market1501.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import ItemTransform

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

Expand Down Expand Up @@ -74,6 +74,7 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
zipfile.ZipFile(src_file).extractall(temp_dir)

detect_dataset(temp_dir, format_name='market1501', importer=dm_env.importers.get('market1501'))
dataset = Dataset.import_from(temp_dir, 'market1501', env=dm_env)
dataset.transform(AttrToLabelAttr, label='market-1501')
if load_data_callback is not None:
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

Expand All @@ -32,6 +32,7 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

detect_dataset(temp_dir, format_name='voc', importer=dm_env.importers.get('voc'))
dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/mot.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import datumaro as dm
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor
from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset
from cvat.apps.dataset_manager.util import make_zip_archive

from .registry import dm_env, exporter, importer
Expand Down Expand Up @@ -105,6 +105,7 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

detect_dataset(temp_dir, format_name='mot_seq', importer=dm_env.importers.get('mot_seq'))
dataset = dm.Dataset.import_from(temp_dir, 'mot_seq', env=dm_env)
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
Expand Down
3 changes: 2 additions & 1 deletion cvat/apps/dataset_manager/formats/mots.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from datumaro.components.extractor import ItemTransform
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset,
find_dataset_root, match_dm_item)
from cvat.apps.dataset_manager.util import make_zip_archive

Expand Down Expand Up @@ -110,6 +110,7 @@ def _export(dst_file, temp_dir, instance_data, save_images=False):
def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs):
Archive(src_file.name).extractall(temp_dir)

detect_dataset(temp_dir, format_name='mots', importer=dm_env.importers.get('mots'))
dataset = Dataset.import_from(temp_dir, 'mots', env=dm_env)
dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs)
if load_data_callback is not None:
Expand Down
Loading

0 comments on commit da71018

Please sign in to comment.