From 77904c9cbf22c92ce1c4d35acfb6b753599e7f74 Mon Sep 17 00:00:00 2001 From: Jiangmiao Pang Date: Tue, 8 Sep 2020 22:45:04 +0800 Subject: [PATCH] Update Dataset and Support MMDET Training --- configs/quasi_dense_r50_12e.py | 35 +- qdtrack/apis/test.py | 43 --- qdtrack/apis/train.py | 11 +- qdtrack/core/evaluation/__init__.py | 3 +- qdtrack/core/evaluation/eval_hooks.py | 33 ++ qdtrack/datasets/__init__.py | 14 +- qdtrack/datasets/builder.py | 28 +- qdtrack/datasets/coco_video_dataset.py | 331 +++++++----------- qdtrack/datasets/pipelines/__init__.py | 4 +- qdtrack/datasets/pipelines/formatting.py | 54 ++- qdtrack/datasets/pipelines/loading.py | 3 +- qdtrack/models/mot/quasi_dense.py | 11 +- .../models/roi_heads/quasi_dense_roi_head.py | 4 +- .../track_heads/quasi_dense_embed_head.py | 15 +- qdtrack/version.py | 4 +- tools/test.py | 21 +- tools/train.py | 11 +- 17 files changed, 294 insertions(+), 331 deletions(-) create mode 100644 qdtrack/core/evaluation/eval_hooks.py diff --git a/configs/quasi_dense_r50_12e.py b/configs/quasi_dense_r50_12e.py index 86eae98..2dfe6d4 100644 --- a/configs/quasi_dense_r50_12e.py +++ b/configs/quasi_dense_r50_12e.py @@ -176,7 +176,7 @@ dict(type='SeqDefaultFormatBundle'), dict( type='SeqCollect', - keys=['img', 'gt_bboxes', 'gt_labels', 'gt_mids'], + keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], ref_prefix='ref'), ] test_pipeline = [ @@ -191,27 +191,28 @@ dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), - dict( - type='Collect', - keys=['img'], - meta_keys=('filename', 'img_shape', 'scale_factor', 'flip', - 'img_norm_cfg', 'frame_id')), + dict(type='VideoCollect', keys=['img']) ]) ] data = dict( samples_per_gpu=2, workers_per_gpu=2, - train=dict( - type=dataset_type, - ann_file=dict( - DET=data_root + 'detection/annotations/train_coco-format.json', - VID=data_root + 'tracking/annotations/train_coco-format.json'), - img_prefix=dict( - DET=data_root + 'detection/images/train/', - VID=data_root + 'tracking/images/train/'), - key_img_sampler=dict(interval=1), - ref_img_sampler=dict(num=1, scope=3, method='uniform'), - pipeline=train_pipeline), + train=[ + dict( + type=dataset_type, + ann_file=data_root + 'tracking/annotations/train_coco-format.json', + img_prefix=data_root + 'tracking/images/train/', + key_img_sampler=dict(interval=1), + ref_img_sampler=dict(num_ref_imgs=1, scope=3, method='uniform'), + pipeline=train_pipeline), + dict( + type=dataset_type, + load_as_video=False, + ann_file=data_root + + 'detection/annotations/train_coco-format.json', + img_prefix=data_root + 'detection/images/train/', + pipeline=train_pipeline) + ], val=dict( type=dataset_type, ann_file=data_root + 'tracking/annotations/val_coco-format.json', diff --git a/qdtrack/apis/test.py b/qdtrack/apis/test.py index d4a9d48..d829eb9 100644 --- a/qdtrack/apis/test.py +++ b/qdtrack/apis/test.py @@ -76,7 +76,6 @@ def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): # collect results from all ranks if gpu_collect: raise NotImplementedError - # results = collect_results_gpu(results, len(dataset)) else: results = collect_results_cpu(results, len(dataset), tmpdir) return results @@ -115,47 +114,5 @@ def collect_results_cpu(result_part, size, tmpdir=None): part_file = mmcv.load(part_file) for k, v in part_file.items(): part_list[k].extend(v) - # TODO: consider the case for DET - # # sort the results - # ordered_results = [] - # for res in zip(*part_list): - # ordered_results.extend(list(res)) - # # the dataloader may pad some samples - # ordered_results = ordered_results[:size] - # remove tmp dir shutil.rmtree(tmpdir) return part_list - - -# def collect_results_gpu(result_part, size): -# rank, world_size = get_dist_info() -# # dump result part to tensor with pickle -# part_tensor = torch.tensor( -# bytearray(pickle.dumps(result_part)), dtype=torch.uint8, -# device='cuda') -# # gather all result part tensor shape -# shape_tensor = torch.tensor(part_tensor.shape, device='cuda') -# shape_list = [shape_tensor.clone() for _ in range(world_size)] -# dist.all_gather(shape_list, shape_tensor) -# # padding result part tensor to max length -# shape_max = torch.tensor(shape_list).max() -# part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') -# part_send[:shape_tensor[0]] = part_tensor -# part_recv_list = [ -# part_tensor.new_zeros(shape_max) for _ in range(world_size) -# ] -# # gather all result part -# dist.all_gather(part_recv_list, part_send) - -# if rank == 0: -# part_list = [] -# for recv, shape in zip(part_recv_list, shape_list): -# part_list.append( -# pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) -# # sort the results -# ordered_results = [] -# for res in zip(*part_list): -# ordered_results.extend(list(res)) -# # the dataloader may pad some samples -# ordered_results = ordered_results[:size] -# return ordered_results diff --git a/qdtrack/apis/train.py b/qdtrack/apis/train.py index 6ea741c..caa1598 100644 --- a/qdtrack/apis/train.py +++ b/qdtrack/apis/train.py @@ -3,11 +3,12 @@ from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, OptimizerHook, build_optimizer) from mmcv.utils import build_from_cfg -from mmdet.core import DistEvalHook, EvalHook, Fp16OptimizerHook -from mmdet.datasets import build_dataloader, build_dataset -from mmdet.utils import get_root_logger +from mmdet.core import Fp16OptimizerHook +from mmdet.datasets import build_dataset -from qdtrack.datasets import build_video_dataloader +from qdtrack.core import DistEvalHook, EvalHook +from qdtrack.datasets import build_dataloader +from qdtrack.utils import get_root_logger def train_model(model, @@ -91,7 +92,7 @@ def train_model(model, # register eval hooks if validate: val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) - val_dataloader = build_video_dataloader( + val_dataloader = build_dataloader( val_dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, diff --git a/qdtrack/core/evaluation/__init__.py b/qdtrack/core/evaluation/__init__.py index 9c5caeb..10110ba 100644 --- a/qdtrack/core/evaluation/__init__.py +++ b/qdtrack/core/evaluation/__init__.py @@ -1,3 +1,4 @@ +from .eval_hooks import EvalHook, DistEvalHook from .mot import eval_mot -__all__ = ['eval_mot'] +__all__ = ['eval_mot', 'EvalHook', 'DistEvalHook'] diff --git a/qdtrack/core/evaluation/eval_hooks.py b/qdtrack/core/evaluation/eval_hooks.py new file mode 100644 index 0000000..b329e2d --- /dev/null +++ b/qdtrack/core/evaluation/eval_hooks.py @@ -0,0 +1,33 @@ +import os.path as osp + +from mmdet.core import DistEvalHook as _DistEvalHook +from mmdet.core import EvalHook as _EvalHook + + +class EvalHook(_EvalHook): + + def after_train_epoch(self, runner): + if not self.evaluation_flag(runner): + return + from qdtrack.apis import single_gpu_test + results = single_gpu_test(runner.model, self.dataloader, show=False) + self.evaluate(runner, results) + + +class DistEvalHook(_DistEvalHook): + + def after_train_epoch(self, runner): + if not self.evaluation_flag(runner): + return + from qdtrack.apis import multi_gpu_test + tmpdir = self.tmpdir + if tmpdir is None: + tmpdir = osp.join(runner.work_dir, '.eval_hook') + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=tmpdir, + gpu_collect=self.gpu_collect) + if runner.rank == 0: + print('\n') + self.evaluate(runner, results) diff --git a/qdtrack/datasets/__init__.py b/qdtrack/datasets/__init__.py index 3070413..4331d5a 100644 --- a/qdtrack/datasets/__init__.py +++ b/qdtrack/datasets/__init__.py @@ -1,8 +1,7 @@ -from mmdet.datasets.builder import (DATASETS, PIPELINES, build_dataloader, - build_dataset) +from mmdet.datasets.builder import (DATASETS, PIPELINES, build_dataset) from .bdd_video_dataset import BDDVideoDataset -from .builder import build_video_dataloader +from .builder import build_dataloader from .coco_video_dataset import CocoVideoDataset from .parsers import CocoVID from .pipelines import (LoadMultiImagesFromFile, SeqCollect, @@ -10,9 +9,8 @@ SeqNormalize, SeqPad, SeqRandomFlip, SeqResize) __all__ = [ - 'DATASETS', 'PIPELINES', 'build_dataloader', 'build_video_dataloader', - 'build_dataset', 'CocoVID', 'BDDVideoDataset', 'CocoVideoDataset', - 'LoadMultiImagesFromFile', 'SeqLoadAnnotations', 'SeqResize', - 'SeqNormalize', 'SeqRandomFlip', 'SeqPad', 'SeqDefaultFormatBundle', - 'SeqCollect' + 'DATASETS', 'PIPELINES', 'build_dataloader', 'build_dataset', 'CocoVID', + 'BDDVideoDataset', 'CocoVideoDataset', 'LoadMultiImagesFromFile', + 'SeqLoadAnnotations', 'SeqResize', 'SeqNormalize', 'SeqRandomFlip', + 'SeqPad', 'SeqDefaultFormatBundle', 'SeqCollect' ] diff --git a/qdtrack/datasets/builder.py b/qdtrack/datasets/builder.py index 8fea942..990a52f 100644 --- a/qdtrack/datasets/builder.py +++ b/qdtrack/datasets/builder.py @@ -4,20 +4,20 @@ import numpy as np from mmcv.parallel import collate from mmcv.runner import get_dist_info -from mmdet.datasets.samplers import GroupSampler +from mmdet.datasets.samplers import DistributedGroupSampler, GroupSampler from torch.utils.data import DataLoader from .samplers import DistributedVideoSampler -def build_video_dataloader(dataset, - samples_per_gpu, - workers_per_gpu, - num_gpus=1, - dist=True, - shuffle=False, - seed=None, - **kwargs): +def build_dataloader(dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + **kwargs): """Build PyTorch DataLoader. In distributed training, each GPU/process has a dataloader. @@ -38,12 +38,14 @@ def build_video_dataloader(dataset, Returns: DataLoader: A PyTorch dataloader. """ - if shuffle: - raise ValueError('This dataloader is specifically for video testing.') rank, world_size = get_dist_info() if dist: - sampler = DistributedVideoSampler( - dataset, world_size, rank, shuffle=False) + if shuffle: + sampler = DistributedGroupSampler(dataset, samples_per_gpu, + world_size, rank) + else: + sampler = DistributedVideoSampler( + dataset, world_size, rank, shuffle=False) batch_size = samples_per_gpu num_workers = workers_per_gpu else: diff --git a/qdtrack/datasets/coco_video_dataset.py b/qdtrack/datasets/coco_video_dataset.py index 8319476..1339b6b 100644 --- a/qdtrack/datasets/coco_video_dataset.py +++ b/qdtrack/datasets/coco_video_dataset.py @@ -2,12 +2,9 @@ import mmcv import numpy as np -from mmcv.utils import print_log from mmdet.datasets import DATASETS, CocoDataset -from mmdet.datasets.pipelines import Compose +from mmtrack.core import eval_mot -from qdtrack.core import eval_mot -from qdtrack.utils import get_root_logger from .parsers import CocoVID @@ -17,137 +14,84 @@ class CocoVideoDataset(CocoDataset): CLASSES = None def __init__(self, - ann_file, - pipeline, - img_prefix=None, - test_mode=False, - filter_empty_gt=True, + load_as_video=True, + match_gts=True, + skip_nomatch_pairs=True, key_img_sampler=dict(interval=1), - ref_img_sampler=dict(scope=3)): - self.logger = get_root_logger() - self.ann_file = ann_file - self.img_prefix = img_prefix - self.test_mode = test_mode - self.filter_empty_gt = filter_empty_gt + ref_img_sampler=dict( + scope=3, num_ref_imgs=1, method='uniform'), + *args, + **kwargs): + self.load_as_video = load_as_video + self.match_gts = match_gts + self.skip_nomatch_pairs = skip_nomatch_pairs self.key_img_sampler = key_img_sampler self.ref_img_sampler = ref_img_sampler + super().__init__(*args, **kwargs) - self.data_infos = self.load_annotations(self.ann_file) - self.pipeline = Compose(pipeline) - if not test_mode: - valid_inds = self._filter_imgs() - self.data_infos = [self.data_infos[i] for i in valid_inds] - self._set_group_flag() - - def __len__(self): - """Total number of samples of data.""" - return len(self.data_infos) - - def _filter_imgs(self, min_size=32): - valid_inds = [] - - if self.num_vid_imgs > 0: - vid_img_ids = set(_['image_id'] for _ in self.VID.anns.values()) - for i, img_info in enumerate(self.data_infos[:self.num_vid_imgs]): - if self.filter_empty_gt and ( - img_info['id'] not in vid_img_ids): - continue - if min(img_info['width'], img_info['height']) >= min_size: - valid_inds.append(i) - - if self.num_det_imgs > 0: - det_img_ids = set(_['image_id'] for _ in self.coco.anns.values()) - for i, img_info in enumerate(self.data_infos[self.num_vid_imgs:]): - if self.filter_empty_gt and img_info['id'] not in det_img_ids: - continue - if min(img_info['width'], img_info['height']) >= min_size: - valid_inds.append(i + self.num_vid_imgs) - - return valid_inds - - def key_img_sampling(self, vid_id, interval=1): - img_ids = self.VID.get_img_ids_from_vid(vid_id) - if not self.test_mode: - img_ids = img_ids[::interval] - return img_ids - - def ref_img_sampling(self, img_info, scope, num=1, method='uniform'): - assert num == 1 - if scope > 0: - vid_id = img_info['video_id'] - img_ids = self.VID.get_img_ids_from_vid(vid_id) - frame_id = img_info['frame_id'] - if method == 'uniform': - left = max(0, frame_id - scope) - right = min(frame_id + scope, len(img_ids) - 1) - valid_inds = img_ids[left:frame_id] + img_ids[frame_id + - 1:right + 1] - ref_img_id = random.choice(valid_inds) - else: - raise NotImplementedError( - 'Only uniform sampling is supported now.') - ref_img_info = self.VID.loadImgs([ref_img_id])[0] - ref_img_info['filename'] = ref_img_info['file_name'] - ref_img_info['type'] = 'VID' + def load_annotations(self, ann_file): + """Load annotation from annotation file.""" + if not self.load_as_video: + data_infos = super().load_annotations(ann_file) else: - ref_img_info = img_info.copy() - return ref_img_info + data_infos = self.load_video_anns(ann_file) + return data_infos def load_video_anns(self, ann_file): - data_infos = [] - - self.VID = CocoVID(ann_file) - - self.cat_ids = self.VID.get_cat_ids(cat_names=self.CLASSES) + self.coco = CocoVID(ann_file) + self.cat_ids = self.coco.get_cat_ids(cat_names=self.CLASSES) self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)} - self.vid_ids = self.VID.get_vid_ids() + data_infos = [] + self.vid_ids = self.coco.get_vid_ids() + self.img_ids = [] for vid_id in self.vid_ids: - img_ids = self.key_img_sampling(vid_id, **self.key_img_sampler) + img_ids = self.coco.get_img_ids_from_vid(vid_id) + img_ids = self.key_img_sampling(img_ids, **self.key_img_sampler) + self.img_ids.extend(img_ids) for img_id in img_ids: - info = self.VID.load_imgs([img_id])[0] + info = self.coco.load_imgs([img_id])[0] info['filename'] = info['file_name'] - info['type'] = 'VID' data_infos.append(info) - return data_infos - def load_annotations(self, ann_file): - """Load annotation from annotation file.""" - mode = 'TEST' if self.test_mode else 'TRAIN' - if isinstance(ann_file, str): - print_log( - f'Default loading {ann_file} as video dataset.', - logger=self.logger) - ann_file = dict(VID=ann_file) - self.img_prefix = dict(VID=self.img_prefix) - elif isinstance(ann_file, dict): - for k in ann_file.keys(): - if k not in ['VID', 'DET']: - raise ValueError('Keys must be DET or VID.') + def key_img_sampling(self, img_ids, interval=1): + return img_ids[::interval] + + def ref_img_sampling(self, + img_info, + scope, + num_ref_imgs=1, + method='uniform'): + if num_ref_imgs != 1 or method != 'uniform': + raise NotImplementedError + if img_info.get('frame_id', -1) < 0 or scope <= 0: + ref_img_info = img_info.copy() else: - raise TypeError('ann_file must be a str or dict.') - - data_infos = [] + vid_id = img_info['video_id'] + img_ids = self.coco.get_img_ids_from_vid(vid_id) + frame_id = img_info['frame_id'] + if method == 'uniform': + left = max(0, frame_id - scope) + right = min(frame_id + scope, len(img_ids) - 1) + valid_inds = img_ids[left:frame_id] + img_ids[frame_id + + 1:right + 1] + ref_img_id = random.choice(valid_inds) + ref_img_info = self.coco.loadImgs([ref_img_id])[0] + ref_img_info['filename'] = ref_img_info['file_name'] + return ref_img_info - if 'VID' in ann_file.keys(): - vid_data_infos = self.load_video_anns(ann_file['VID']) - data_infos.extend(vid_data_infos) - self.num_vid_imgs = len(data_infos) - - if 'DET' in ann_file.keys(): - det_data_infos = super().load_annotations(ann_file['DET']) - for info in det_data_infos: - info['type'] = 'DET' - data_infos.extend(det_data_infos) - self.num_det_imgs = len(data_infos) - self.num_vid_imgs - - print_log((f'{mode}: Load {self.num_vid_imgs} images from VID set ' - f'and {self.num_det_imgs} images from DET set.'), - logger=self.logger) - return data_infos + def pre_pipeline(self, results): + """Prepare results dict for pipeline.""" + if isinstance(results, list): + for _results in results: + super().pre_pipeline(_results) + elif isinstance(results, dict): + super().pre_pipeline(results) + else: + raise TypeError('input must be a list or a dict') - def get_ann_info(self, idx): + def get_ann_info(self, img_info): """Get COCO annotation by index. Args: @@ -156,29 +100,66 @@ def get_ann_info(self, idx): Returns: dict: Annotation info of specified index. """ - img_info = self.data_infos[idx] img_id = img_info['id'] - api = self.coco if img_info['type'] == 'DET' else self.VID - ann_ids = api.get_ann_ids(img_ids=[img_id]) - ann_info = api.load_anns(ann_ids) - return self._parse_ann_info(self.data_infos[idx], ann_info) - - def match_gts(self, ann, ref_ann): - if 'instance_ids' in ann.keys(): - gt_instances = list(ann['instance_ids']) - ref_instances = list(ref_ann['instance_ids']) - gt_pids = np.array([ - ref_instances.index(i) if i in ref_instances else -1 - for i in gt_instances + ann_ids = self.coco.get_ann_ids(img_ids=[img_id]) + ann_info = self.coco.load_anns(ann_ids) + return self._parse_ann_info(img_info, ann_info) + + def prepare_results(self, img_info): + ann_info = self.get_ann_info(img_info) + results = dict(img_info=img_info, ann_info=ann_info) + if self.proposals is not None: + idx = self.img_ids.index(img_info['id']) + results['proposals'] = self.proposals[idx] + return results + + def match_results(self, results, ref_results): + match_indices, ref_match_indices = self._match_gts( + results['ann_info'], ref_results['ann_info']) + results['ann_info']['match_indices'] = match_indices + ref_results['ann_info']['match_indices'] = ref_match_indices + return results, ref_results + + def _match_gts(self, ann, ref_ann): + if ann.get('instance_ids', False): + ins_ids = list(ann['instance_ids']) + ref_ins_ids = list(ref_ann['instance_ids']) + match_indices = np.array([ + ref_ins_ids.index(i) if i in ref_ins_ids else -1 + for i in ins_ids ]) - ref_gt_pids = np.array([ - gt_instances.index(i) if i in gt_instances else -1 - for i in ref_instances + ref_match_indices = np.array([ + ins_ids.index(i) if i in ins_ids else -1 for i in ref_ins_ids ]) else: - gt_pids = np.arange(ann['bboxes'].shape[0], dtype=np.int64) - ref_gt_pids = gt_pids.copy() - return gt_pids, ref_gt_pids + match_indices = np.arange(ann['bboxes'].shape[0], dtype=np.int64) + ref_match_indices = match_indices.copy() + return match_indices, ref_match_indices + + def prepare_train_img(self, idx): + """Get training data and annotations after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training data and annotation after pipeline with new keys \ + introduced by pipeline. + """ + img_info = self.data_infos[idx] + ref_img_info = self.ref_img_sampling(img_info, **self.ref_img_sampler) + + results = self.prepare_results(img_info) + ref_results = self.prepare_results(ref_img_info) + + if self.match_gts: + results, ref_results = self.match_results(results, ref_results) + nomatch = (results['ann_info']['match_indices'] == -1).all() + if self.skip_nomatch_pairs and nomatch: + return None + + self.pre_pipeline([results, ref_results]) + return self.pipeline([results, ref_results]) def _parse_ann_info(self, img_info, ann_info): """Parse bbox and mask annotation. @@ -196,8 +177,7 @@ def _parse_ann_info(self, img_info, ann_info): gt_labels = [] gt_bboxes_ignore = [] gt_masks_ann = [] - if img_info['type'] == 'VID': - gt_instance_ids = [] + gt_instance_ids = [] for i, ann in enumerate(ann_info): if ann.get('ignore', False): @@ -243,81 +223,11 @@ def _parse_ann_info(self, img_info, ann_info): masks=gt_masks_ann, seg_map=seg_map) - if img_info['type'] == 'VID': + if self.load_as_video: ann['instance_ids'] = gt_instance_ids return ann - def pre_pipeline(self, results): - """Prepare results dict for pipeline.""" - - def _prepare(_results): - _results['img_prefix'] = self.img_prefix[_results['img_info'] - ['type']] - _results['frame_id'] = _results['img_info'].get('frame_id', -1) - _results['bbox_fields'] = [] - - if isinstance(results, list): - for _results in results: - _prepare(_results) - elif isinstance(results, dict): - _prepare(results) - else: - raise TypeError('Input must be a list or dict.') - - def prepare_train_img(self, idx): - """Get training data and annotations after pipeline. - - Args: - idx (int): Index of data. - - Returns: - dict: Training data and annotation after pipeline with new keys \ - introduced by pipeline. - """ - - img_info = self.data_infos[idx] - ann_info = self.get_ann_info(idx) - results = dict(img_info=img_info, ann_info=ann_info) - - if img_info['type'] == 'VID': - ref_img_info = self.ref_img_sampling(img_info, - **self.ref_img_sampler) - ref_ann_ids = self.VID.get_ann_ids(img_ids=[ref_img_info['id']]) - ref_ann_info = self.VID.load_anns(ref_ann_ids) - ref_ann_info = self._parse_ann_info(ref_img_info, ref_ann_info) - ref_results = dict(img_info=ref_img_info, ann_info=ref_ann_info) - else: - ref_results = results.copy() - - mids, ref_mids = self.match_gts(results['ann_info'], - ref_results['ann_info']) - - if (mids == -1).all(): - return None - else: - results['ann_info']['mids'] = mids - ref_results['ann_info']['mids'] = ref_mids - self.pre_pipeline([results, ref_results]) - - return self.pipeline([results, ref_results]) - - def prepare_test_img(self, idx): - """Get testing data after pipeline. - - Args: - idx (int): Index of data. - - Returns: - dict: Testing data after pipeline with new keys intorduced by \ - piepline. - """ - - img_info = self.data_infos[idx] - results = dict(img_info=img_info) - self.pre_pipeline(results) - return self.pipeline(results) - def format_track_results(self, results, **kwargs): pass @@ -330,6 +240,7 @@ def evaluate(self, proposal_nums=(100, 300, 1000), iou_thr=None, metric_items=None): + # evaluate for detectors without tracker eval_results = dict() metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'track'] @@ -347,8 +258,6 @@ def evaluate(self, super_results.append((bbox, segm)) else: super_results = results['bbox_result'] - self.img_ids = [_['id'] for _ in self.data_infos] - self.coco = self.VID super_eval_results = super().evaluate( results=super_results, metric=super_metrics, diff --git a/qdtrack/datasets/pipelines/__init__.py b/qdtrack/datasets/pipelines/__init__.py index 8621f21..2e4b751 100644 --- a/qdtrack/datasets/pipelines/__init__.py +++ b/qdtrack/datasets/pipelines/__init__.py @@ -1,9 +1,9 @@ -from .formatting import SeqCollect, SeqDefaultFormatBundle +from .formatting import VideoCollect, SeqCollect, SeqDefaultFormatBundle from .loading import LoadMultiImagesFromFile, SeqLoadAnnotations from .transforms import SeqNormalize, SeqPad, SeqRandomFlip, SeqResize __all__ = [ 'LoadMultiImagesFromFile', 'SeqLoadAnnotations', 'SeqResize', 'SeqNormalize', 'SeqRandomFlip', 'SeqPad', 'SeqDefaultFormatBundle', - 'SeqCollect' + 'SeqCollect', 'VideoCollect' ] diff --git a/qdtrack/datasets/pipelines/formatting.py b/qdtrack/datasets/pipelines/formatting.py index 09722d4..55be34c 100644 --- a/qdtrack/datasets/pipelines/formatting.py +++ b/qdtrack/datasets/pipelines/formatting.py @@ -10,13 +10,63 @@ def __call__(self, results): outs = [] for _results in results: _results = super().__call__(_results) - _results['gt_mids'] = DC(to_tensor(_results['gt_mids'])) + _results['gt_match_indices'] = DC( + to_tensor(_results['gt_match_indices'])) outs.append(_results) return outs @PIPELINES.register_module() -class SeqCollect(Collect): +class VideoCollect(Collect): + """Collect data from the loader relevant to the specific task. + + This is usually the last stage of the data loader pipeline. Typically keys + is set to some subset of "img", "proposals", "gt_bboxes", + "gt_bboxes_ignore", "gt_labels", and/or "gt_masks". + + The "img_meta" item is always populated. The contents of the "img_meta" + dictionary depends on "meta_keys". By default this includes: + + - "img_shape": shape of the image input to the network as a tuple \ + (h, w, c). Note that images may be zero padded on the \ + bottom/right if the batch tensor is larger than this shape. + + - "scale_factor": a float indicating the preprocessing scale + + - "flip": a boolean indicating if image flip transform was used + + - "filename": path to the image file + + - "ori_shape": original shape of the image as a tuple (h, w, c) + + - "pad_shape": image shape after padding + + - "img_norm_cfg": a dict of normalization information: + + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + + Args: + keys (Sequence[str]): Keys of results to be collected in ``data``. + meta_keys (Sequence[str], optional): Meta keys to be converted to + ``mmcv.DataContainer`` and collected in ``data[img_metas]``. + Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', + 'pad_shape', 'scale_factor', 'flip', 'flip_direction', + 'img_norm_cfg')`` + """ + + def __init__(self, + keys, + meta_keys=('filename', 'ori_filename', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction', 'img_norm_cfg', 'frame_id')): + self.keys = keys + self.meta_keys = meta_keys + + +@PIPELINES.register_module(force=True) +class SeqCollect(VideoCollect): def __init__(self, keys, diff --git a/qdtrack/datasets/pipelines/loading.py b/qdtrack/datasets/pipelines/loading.py index 4d61d82..03059e1 100644 --- a/qdtrack/datasets/pipelines/loading.py +++ b/qdtrack/datasets/pipelines/loading.py @@ -34,7 +34,8 @@ def _load_ins_ids(self, results): dict: The dict contains loaded label annotations. """ - results['gt_mids'] = results['ann_info']['mids'].copy() + results['gt_match_indices'] = results['ann_info'][ + 'match_indices'].copy() return results diff --git a/qdtrack/models/mot/quasi_dense.py b/qdtrack/models/mot/quasi_dense.py index 36b6984..e6b36b1 100644 --- a/qdtrack/models/mot/quasi_dense.py +++ b/qdtrack/models/mot/quasi_dense.py @@ -23,12 +23,12 @@ def forward_train(self, img_metas, gt_bboxes, gt_labels, - gt_mids, + gt_match_indices, ref_img, ref_img_metas, ref_gt_bboxes, ref_gt_labels, - ref_gt_mids, + ref_gt_match_indices, gt_bboxes_ignore=None, gt_masks=None, ref_gt_bboxes_ignore=None, @@ -53,9 +53,10 @@ def forward_train(self, ref_proposals = self.rpn_head.simple_test_rpn(ref_x, ref_img_metas) roi_losses = self.roi_head.forward_train( - x, img_metas, proposal_list, gt_bboxes, gt_labels, gt_mids, ref_x, - ref_img_metas, ref_proposals, ref_gt_bboxes, ref_gt_labels, - gt_bboxes_ignore, gt_masks, ref_gt_bboxes_ignore, **kwargs) + x, img_metas, proposal_list, gt_bboxes, gt_labels, + gt_match_indices, ref_x, ref_img_metas, ref_proposals, + ref_gt_bboxes, ref_gt_labels, gt_bboxes_ignore, gt_masks, + ref_gt_bboxes_ignore, **kwargs) losses.update(roi_losses) return losses diff --git a/qdtrack/models/roi_heads/quasi_dense_roi_head.py b/qdtrack/models/roi_heads/quasi_dense_roi_head.py index 06df373..7662a59 100644 --- a/qdtrack/models/roi_heads/quasi_dense_roi_head.py +++ b/qdtrack/models/roi_heads/quasi_dense_roi_head.py @@ -66,7 +66,7 @@ def forward_train(self, proposal_list, gt_bboxes, gt_labels, - gt_mids, + gt_match_indices, ref_x, ref_img_metas, ref_proposals, @@ -119,7 +119,7 @@ def forward_train(self, key_sampling_results, ref_sampling_results) asso_targets = self.track_head.get_track_targets( - gt_mids, key_sampling_results, ref_sampling_results) + gt_match_indices, key_sampling_results, ref_sampling_results) loss_track = self.track_head.loss(*match_feats, *asso_targets) losses.update(loss_track) diff --git a/qdtrack/models/roi_heads/track_heads/quasi_dense_embed_head.py b/qdtrack/models/roi_heads/track_heads/quasi_dense_embed_head.py index 80104c7..e43853d 100644 --- a/qdtrack/models/roi_heads/track_heads/quasi_dense_embed_head.py +++ b/qdtrack/models/roi_heads/track_heads/quasi_dense_embed_head.py @@ -98,18 +98,19 @@ def forward(self, x): x = self.fc_embed(x) return x - def get_track_targets(self, gt_mids, key_sampling_results, + def get_track_targets(self, gt_match_indices, key_sampling_results, ref_sampling_results): track_targets = [] track_weights = [] - for _gt_mids, key_res, ref_res in zip(gt_mids, key_sampling_results, - ref_sampling_results): - targets = _gt_mids.new_zeros( + for _gt_match_indices, key_res, ref_res in zip(gt_match_indices, + key_sampling_results, + ref_sampling_results): + targets = _gt_match_indices.new_zeros( (key_res.pos_bboxes.size(0), ref_res.bboxes.size(0)), dtype=torch.int) - _mids = _gt_mids[key_res.pos_assigned_gt_inds] - pos2pos = (_mids.view(-1, 1) == ref_res.pos_assigned_gt_inds.view( - 1, -1)).int() + _match_indices = _gt_match_indices[key_res.pos_assigned_gt_inds] + pos2pos = (_match_indices.view( + -1, 1) == ref_res.pos_assigned_gt_inds.view(1, -1)).int() targets[:, :pos2pos.size(1)] = pos2pos weights = (targets.sum(dim=1) > 0).float() track_targets.append(targets) diff --git a/qdtrack/version.py b/qdtrack/version.py index 179d5d9..d024a46 100644 --- a/qdtrack/version.py +++ b/qdtrack/version.py @@ -1,5 +1,5 @@ # GENERATED VERSION FILE -# TIME: Mon Sep 7 13:44:25 2020 -__version__ = '0.1.0+92c5230' +# TIME: Mon Sep 7 10:37:20 2020 +__version__ = '0.1.0+f0ca1b0' short_version = '0.1.0' version_info = (0, 1, 0) diff --git a/tools/test.py b/tools/test.py index 9428d48..750a29e 100644 --- a/tools/test.py +++ b/tools/test.py @@ -10,10 +10,6 @@ from mmdet.core import wrap_fp16_model from mmdet.datasets import build_dataset -from qdtrack.apis import multi_gpu_test, single_gpu_test -from qdtrack.datasets import build_video_dataloader -from qdtrack.models import build_model - def parse_args(): parser = argparse.ArgumentParser(description='qdtrack test model') @@ -83,6 +79,14 @@ def main(): raise ValueError('The output file must be a pkl file.') cfg = Config.fromfile(args.config) + if cfg.get('USE_MMDET', False): + from mmdet.apis import multi_gpu_test, single_gpu_test + from mmdet.models import build_detector as build_model + from mmdet.datasets import build_dataloader + else: + from qdtrack.apis import multi_gpu_test, single_gpu_test + from qdtrack.models import build_model + from qdtrack.datasets import build_dataloader if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set cudnn_benchmark @@ -99,9 +103,8 @@ def main(): init_dist(args.launcher, **cfg.dist_params) # build the dataloader - # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) - data_loader = build_video_dataloader( + data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=cfg.data.workers_per_gpu, @@ -115,14 +118,14 @@ def main(): wrap_fp16_model(model) checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') + if args.fuse_conv_bn: + model = fuse_conv_bn(model) + if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = dataset.CLASSES - if args.fuse_conv_bn: - model = fuse_conv_bn(model) - if not distributed: model = MMDataParallel(model, device_ids=[0]) outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, diff --git a/tools/train.py b/tools/train.py index 8e830bb..ce510cc 100644 --- a/tools/train.py +++ b/tools/train.py @@ -9,11 +9,9 @@ from mmcv import Config, DictAction from mmcv.runner import init_dist from mmdet.apis import set_random_seed +from mmdet.datasets import build_dataset from qdtrack import __version__ -from qdtrack.apis import train_model -from qdtrack.datasets import build_dataset -from qdtrack.models import build_model from qdtrack.utils import collect_env, get_root_logger @@ -67,6 +65,13 @@ def main(): args = parse_args() cfg = Config.fromfile(args.config) + + if cfg.get('USE_MMDET', False): + from mmdet.apis import train_detector as train_model + from mmdet.models import build_detector as build_model + else: + from qdtrack.apis import train_model + from qdtrack.models import build_model if args.cfg_options is not None: cfg.merge_from_dict(args.cfg_options) # set cudnn_benchmark