From b1a7a8b176cf8634f7ae9637ef2841e94b813676 Mon Sep 17 00:00:00 2001 From: zhuolisam Date: Sat, 16 Mar 2024 12:51:39 +0800 Subject: [PATCH] add tracker --- pyproject.toml | 10 +- src/juxtapose/rtm.py | 24 +- src/juxtapose/trackers/__init__.py | 13 +- src/juxtapose/trackers/boxmot/__init__.py | 37 + .../trackers/boxmot/appearance/__init__.py | 16 + .../boxmot/appearance/backbones/__init__.py | 108 ++ .../boxmot/appearance/backbones/osnet.py | 560 ++++++ .../boxmot/appearance/reid_model_factory.py | 214 +++ .../boxmot/appearance/reid_multibackend.py | 324 ++++ .../trackers/boxmot/configs/__init__.py | 1 + .../trackers/boxmot/configs/botsort.yaml | 12 + .../trackers/boxmot/configs/bytetrack.yaml | 5 + .../trackers/boxmot/configs/deepocsort.yaml | 17 + .../trackers/boxmot/configs/hybridsort.yaml | 9 + .../trackers/boxmot/configs/ocsort.yaml | 11 + .../trackers/boxmot/configs/strongsort.yaml | 9 + .../trackers/boxmot/motion/__init__.py | 1 + .../trackers/boxmot/motion/cmc/__init__.py | 19 + .../boxmot/motion/cmc/cmc_interface.py | 21 + .../trackers/boxmot/motion/cmc/ecc.py | 159 ++ .../trackers/boxmot/motion/cmc/orb.py | 281 +++ .../trackers/boxmot/motion/cmc/sift.py | 304 +++ .../trackers/boxmot/motion/cmc/sof.py | 254 +++ .../boxmot/motion/kalman_filters/__init__.py | 1 + .../kalman_filters/adapters/__init__.py | 11 + .../adapters/botsort_kf_adapter.py | 170 ++ .../adapters/bytetrack_kf_adapter.py | 170 ++ .../adapters/hybridsort_kf_adapter.py | 1555 +++++++++++++++ .../adapters/ocsort_kf_adapter.py | 8 + .../adapters/strongsort_kf_adapter.py | 171 ++ .../motion/kalman_filters/kalman_filter.py | 1676 +++++++++++++++++ .../boxmot/postprocessing/__init__.py | 1 + .../trackers/boxmot/postprocessing/gsi.py | 93 + src/juxtapose/trackers/boxmot/tracker_zoo.py | 123 ++ .../trackers/boxmot/trackers/__init__.py | 1 + .../boxmot/trackers/deepocsort/__init__.py | 1 + .../boxmot/trackers/deepocsort/deep_ocsort.py | 585 ++++++ .../boxmot/trackers/hybridsort/association.py | 684 +++++++ .../boxmot/trackers/hybridsort/hybridsort.py | 737 ++++++++ .../boxmot/trackers/ocsort/__init__.py | 1 + .../trackers/boxmot/trackers/ocsort/ocsort.py | 385 ++++ .../boxmot/trackers/strongsort/__init__.py | 1 + .../trackers/strongsort/sort/__init__.py | 1 + .../trackers/strongsort/sort/detection.py | 41 + .../trackers/strongsort/sort/iou_matching.py | 87 + .../strongsort/sort/linear_assignment.py | 194 ++ .../boxmot/trackers/strongsort/sort/track.py | 200 ++ .../trackers/strongsort/sort/tracker.py | 182 ++ .../boxmot/trackers/strongsort/strong_sort.py | 102 + .../trackers/boxmot/utils/__init__.py | 61 + .../trackers/boxmot/utils/association.py | 283 +++ src/juxtapose/trackers/boxmot/utils/checks.py | 38 + .../boxmot/utils/convert_rtm_boxmot.py | 18 + src/juxtapose/trackers/boxmot/utils/iou.py | 172 ++ .../trackers/boxmot/utils/matching.py | 411 ++++ src/juxtapose/trackers/boxmot/utils/ops.py | 97 + .../trackers/boxmot/utils/torch_utils.py | 76 + src/juxtapose/trackers/track.py | 78 +- 58 files changed, 10803 insertions(+), 21 deletions(-) create mode 100644 src/juxtapose/trackers/boxmot/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/appearance/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/appearance/backbones/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/appearance/backbones/osnet.py create mode 100644 src/juxtapose/trackers/boxmot/appearance/reid_model_factory.py create mode 100644 src/juxtapose/trackers/boxmot/appearance/reid_multibackend.py create mode 100644 src/juxtapose/trackers/boxmot/configs/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/configs/botsort.yaml create mode 100644 src/juxtapose/trackers/boxmot/configs/bytetrack.yaml create mode 100644 src/juxtapose/trackers/boxmot/configs/deepocsort.yaml create mode 100644 src/juxtapose/trackers/boxmot/configs/hybridsort.yaml create mode 100644 src/juxtapose/trackers/boxmot/configs/ocsort.yaml create mode 100644 src/juxtapose/trackers/boxmot/configs/strongsort.yaml create mode 100644 src/juxtapose/trackers/boxmot/motion/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/motion/cmc/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/motion/cmc/cmc_interface.py create mode 100644 src/juxtapose/trackers/boxmot/motion/cmc/ecc.py create mode 100644 src/juxtapose/trackers/boxmot/motion/cmc/orb.py create mode 100644 src/juxtapose/trackers/boxmot/motion/cmc/sift.py create mode 100644 src/juxtapose/trackers/boxmot/motion/cmc/sof.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/botsort_kf_adapter.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/bytetrack_kf_adapter.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/hybridsort_kf_adapter.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/ocsort_kf_adapter.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/strongsort_kf_adapter.py create mode 100644 src/juxtapose/trackers/boxmot/motion/kalman_filters/kalman_filter.py create mode 100644 src/juxtapose/trackers/boxmot/postprocessing/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/postprocessing/gsi.py create mode 100644 src/juxtapose/trackers/boxmot/tracker_zoo.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/deepocsort/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/deepocsort/deep_ocsort.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/hybridsort/association.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/hybridsort/hybridsort.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/ocsort/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/ocsort/ocsort.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/sort/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/sort/detection.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/sort/iou_matching.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/sort/linear_assignment.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/sort/track.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/sort/tracker.py create mode 100644 src/juxtapose/trackers/boxmot/trackers/strongsort/strong_sort.py create mode 100644 src/juxtapose/trackers/boxmot/utils/__init__.py create mode 100644 src/juxtapose/trackers/boxmot/utils/association.py create mode 100644 src/juxtapose/trackers/boxmot/utils/checks.py create mode 100644 src/juxtapose/trackers/boxmot/utils/convert_rtm_boxmot.py create mode 100644 src/juxtapose/trackers/boxmot/utils/iou.py create mode 100644 src/juxtapose/trackers/boxmot/utils/matching.py create mode 100644 src/juxtapose/trackers/boxmot/utils/ops.py create mode 100644 src/juxtapose/trackers/boxmot/utils/torch_utils.py diff --git a/pyproject.toml b/pyproject.toml index 43e3624..24a5ea6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ packages = [ ] [tool.poetry.dependencies] -python = "^3.8" +python = "^3.9" packaging = {version = "23.1"} aenum = {version = "3.1.15"} tqdm = {version = "^4.65.0"} @@ -83,6 +83,14 @@ mediapy = {version = "*"} einshape = {version = "*"} ipympl = {version = "*"} +# boxmot +filterpy = {version = "*"} +scikit-learn = {version = "*"} +scipy = "^1.12.0" +loguru = "^0.7.2" +gdown = "^5.0.1" +pyyaml = "^6.0.1" + # [[tool.poetry.source]] # name = "torch+cu118" # url = "https://download.pytorch.org/whl/cu118" diff --git a/src/juxtapose/rtm.py b/src/juxtapose/rtm.py index 8e7c033..906ae04 100644 --- a/src/juxtapose/rtm.py +++ b/src/juxtapose/rtm.py @@ -13,7 +13,7 @@ from juxtapose.data import load_inference_source from juxtapose.detectors import get_detector from juxtapose.pose.rtmpose import RTMPose -from juxtapose.trackers import Tracker, TRACKER_MAP +from juxtapose.trackers import Tracker, TRACKER_MAP, BOXMOT_TRACKER_MAP from juxtapose.types import ( DETECTOR_TYPES, POSE_ESTIMATOR_TYPES, @@ -71,10 +71,13 @@ def __init__( self.rtmpose = RTMPose(pose.split("-")[1], device=device) self.annotator = annotator self.box_annotator = sv.BoxAnnotator() - + self.device = device self.tracker_type = tracker - if tracker not in TRACKER_MAP.keys(): + if ( + tracker not in TRACKER_MAP.keys() + and tracker not in BOXMOT_TRACKER_MAP.keys() + ): self.tracker_type = None self.dataset = None @@ -85,12 +88,13 @@ def setup_source(self, source, imgsz=640, vid_stride=1) -> None: source=source, imgsz=imgsz, vid_stride=vid_stride ) self.source_type = self.dataset.source_type - self.vid_path, self.vid_writer = [None] * self.dataset.bs, [ - None - ] * self.dataset.bs + self.vid_path, self.vid_writer = ( + [None] * self.dataset.bs, + [None] * self.dataset.bs, + ) def setup_tracker(self) -> None: - self.tracker = Tracker(self.tracker_type).tracker + self.tracker = Tracker(self.tracker_type, self.device) def setup_detector(self, det, device, captions): return get_detector(det, device=device, captions=captions) @@ -214,11 +218,7 @@ def stream_inference( # multi object tracking (adjust bounding boxes) with profilers[1]: if self.tracker_type: - detections: Detections = self.tracker.update( - bboxes=detections.xyxy, - confidence=detections.confidence, - labels=detections.labels, - ) + detections: Detections = self.tracker.update(detections, im) track_id = detections.track_id else: track_id = np.array([""] * len(detections.xyxy)) diff --git a/src/juxtapose/trackers/__init__.py b/src/juxtapose/trackers/__init__.py index 01b8e56..a12b26b 100644 --- a/src/juxtapose/trackers/__init__.py +++ b/src/juxtapose/trackers/__init__.py @@ -1,8 +1,17 @@ from .bot_sort import BOTSORT from .byte_tracker import BYTETracker -from .track import Tracker, TRACKER_MAP +from .track import Tracker, TRACKER_MAP, BOXMOT_TRACKER_MAP +from .boxmot import StrongSORT, DeepOCSORT # from .track import register_tracker -__all__ = "Tracker", "TRACKER_MAP", "BOTSORT", "BYTETracker" # allow simpler import +__all__ = ( + "Tracker", + "TRACKER_MAP", + "BOXMOT_TRACKER_MAP", + "BOTSORT", + "BYTETracker", + "StrongSORT", + "DeepOCSORT", +) # allow simpler import diff --git a/src/juxtapose/trackers/boxmot/__init__.py b/src/juxtapose/trackers/boxmot/__init__.py new file mode 100644 index 0000000..7ebd653 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/__init__.py @@ -0,0 +1,37 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +__version__ = "10.0.41" + +from juxtapose.trackers.boxmot.postprocessing.gsi import gsi +from juxtapose.trackers.boxmot.tracker_zoo import create_tracker, get_tracker_config + +# from juxtapose.trackers.boxmot.trackers.botsort.bot_sort import BoTSORT +# from juxtapose.trackers.boxmot.trackers.bytetrack.byte_tracker import BYTETracker +from juxtapose.trackers.boxmot.trackers.deepocsort.deep_ocsort import ( + DeepOCSort as DeepOCSORT, +) +from juxtapose.trackers.boxmot.trackers.hybridsort.hybridsort import HybridSORT +from juxtapose.trackers.boxmot.trackers.ocsort.ocsort import OCSort as OCSORT +from juxtapose.trackers.boxmot.trackers.strongsort.strong_sort import StrongSORT + +TRACKERS = [ + # "bytetrack", + # "botsort", + "strongsort", + "ocsort", + "deepocsort", + "hybridsort", +] + +__all__ = ( + "__version__", + "StrongSORT", + "OCSORT", + # "BYTETracker", + # "BoTSORT", + "DeepOCSORT", + "HybridSORT", + "create_tracker", + "get_tracker_config", + "gsi", +) diff --git a/src/juxtapose/trackers/boxmot/appearance/__init__.py b/src/juxtapose/trackers/boxmot/appearance/__init__.py new file mode 100644 index 0000000..b060aea --- /dev/null +++ b/src/juxtapose/trackers/boxmot/appearance/__init__.py @@ -0,0 +1,16 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import pandas as pd + + +def export_formats(): + # yolo tracking export formats + x = [ + ["PyTorch", "-", ".pt", True, True], + ["TorchScript", "torchscript", ".torchscript", True, True], + ["ONNX", "onnx", ".onnx", True, True], + ["OpenVINO", "openvino", "_openvino_model", True, False], + ["TensorRT", "engine", ".engine", False, True], + ["TensorFlow Lite", "tflite", ".tflite", True, False], + ] + return pd.DataFrame(x, columns=["Format", "Argument", "Suffix", "CPU", "GPU"]) diff --git a/src/juxtapose/trackers/boxmot/appearance/backbones/__init__.py b/src/juxtapose/trackers/boxmot/appearance/backbones/__init__.py new file mode 100644 index 0000000..9e2a1df --- /dev/null +++ b/src/juxtapose/trackers/boxmot/appearance/backbones/__init__.py @@ -0,0 +1,108 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from __future__ import absolute_import + +# from juxtapose.trackers.boxmot.appearance.backbones.clip.make_model import make_model +# from juxtapose.trackers.boxmot.appearance.backbones.hacnn import HACNN +# from juxtapose.trackers.boxmot.appearance.backbones.lmbn.lmbn_n import LMBN_n +# from juxtapose.trackers.boxmot.appearance.backbones.mlfn import mlfn +# from juxtapose.trackers.boxmot.appearance.backbones.mobilenetv2 import ( +# mobilenetv2_x1_0, +# mobilenetv2_x1_4, +# ) +from juxtapose.trackers.boxmot.appearance.backbones.osnet import ( + osnet_ibn_x1_0, + osnet_x0_5, + osnet_x0_25, + osnet_x0_75, + osnet_x1_0, +) + +# from juxtapose.trackers.boxmot.appearance.backbones.osnet_ain import ( +# osnet_ain_x0_5, +# osnet_ain_x0_25, +# osnet_ain_x0_75, +# osnet_ain_x1_0, +# ) +# from juxtapose.trackers.boxmot.appearance.backbones.resnet import resnet50, resnet101 + +NR_CLASSES_DICT = {"market1501": 751, "duke": 702, "veri": 576, "vehicleid": 576} + + +__model_factory = { + # image classification models + # "resnet50": resnet50, + # "resnet101": resnet101, + # "mobilenetv2_x1_0": mobilenetv2_x1_0, + # "mobilenetv2_x1_4": mobilenetv2_x1_4, + # reid-specific models + # "hacnn": HACNN, + # "mlfn": mlfn, + "osnet_x1_0": osnet_x1_0, + "osnet_x0_75": osnet_x0_75, + "osnet_x0_5": osnet_x0_5, + "osnet_x0_25": osnet_x0_25, + # "osnet_ibn_x1_0": osnet_ibn_x1_0, + # "osnet_ain_x1_0": osnet_ain_x1_0, + # "osnet_ain_x0_75": osnet_ain_x0_75, + # "osnet_ain_x0_5": osnet_ain_x0_5, + # "osnet_ain_x0_25": osnet_ain_x0_25, + # "lmbn_n": LMBN_n, + # "clip": make_model, +} + + +def show_avai_models(): + """Displays available models. + + Examples:: + >>> from torchreid import models + >>> models.show_avai_models() + """ + print(list(__model_factory.keys())) + + +def get_nr_classes(weigths): + num_classes = [ + value for key, value in NR_CLASSES_DICT.items() if key in str(weigths.name) + ] + if len(num_classes) == 0: + num_classes = 1 + else: + num_classes = num_classes[0] + return num_classes + + +def build_model(name, num_classes, loss="softmax", pretrained=True, use_gpu=True): + """A function wrapper for building a model. + + Args: + name (str): model name. + num_classes (int): number of training identities. + loss (str, optional): loss function to optimize the model. Currently + supports "softmax" and "triplet". Default is "softmax". + pretrained (bool, optional): whether to load ImageNet-pretrained weights. + Default is True. + use_gpu (bool, optional): whether to use gpu. Default is True. + + Returns: + nn.Module + + Examples:: + >>> from torchreid import models + >>> model = models.build_model('resnet50', 751, loss='softmax') + """ + avai_models = list(__model_factory.keys()) + if name not in avai_models: + raise KeyError("Unknown model: {}. Must be one of {}".format(name, avai_models)) + # if "clip" in name: + # from juxtapose.trackers.boxmot.appearance.backbones.clip.config.defaults import ( + # _C as cfg, + # ) + + # return __model_factory[name]( + # cfg, num_class=num_classes, camera_num=2, view_num=1 + # ) + return __model_factory[name]( + num_classes=num_classes, loss=loss, pretrained=pretrained, use_gpu=use_gpu + ) diff --git a/src/juxtapose/trackers/boxmot/appearance/backbones/osnet.py b/src/juxtapose/trackers/boxmot/appearance/backbones/osnet.py new file mode 100644 index 0000000..a929dfd --- /dev/null +++ b/src/juxtapose/trackers/boxmot/appearance/backbones/osnet.py @@ -0,0 +1,560 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from __future__ import absolute_import, division + +import warnings + +import torch +from torch import nn +from torch.nn import functional as F + +__all__ = ["osnet_x1_0", "osnet_x0_75", "osnet_x0_5", "osnet_x0_25", "osnet_ibn_x1_0"] + +pretrained_urls = { + "osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY", + "osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq", + "osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i", + "osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs", + "osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l", +} + + +########## +# Basic layers +########## +class ConvLayer(nn.Module): + """Convolution layer (conv + bn + relu).""" + + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + groups=1, + IN=False, + ): + super(ConvLayer, self).__init__() + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, + stride=stride, + padding=padding, + bias=False, + groups=groups, + ) + if IN: + self.bn = nn.InstanceNorm2d(out_channels, affine=True) + else: + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Conv1x1(nn.Module): + """1x1 convolution + bn + relu.""" + + def __init__(self, in_channels, out_channels, stride=1, groups=1): + super(Conv1x1, self).__init__() + self.conv = nn.Conv2d( + in_channels, + out_channels, + 1, + stride=stride, + padding=0, + bias=False, + groups=groups, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class Conv1x1Linear(nn.Module): + """1x1 convolution + bn (w/o non-linearity).""" + + def __init__(self, in_channels, out_channels, stride=1): + super(Conv1x1Linear, self).__init__() + self.conv = nn.Conv2d( + in_channels, out_channels, 1, stride=stride, padding=0, bias=False + ) + self.bn = nn.BatchNorm2d(out_channels) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + return x + + +class Conv3x3(nn.Module): + """3x3 convolution + bn + relu.""" + + def __init__(self, in_channels, out_channels, stride=1, groups=1): + super(Conv3x3, self).__init__() + self.conv = nn.Conv2d( + in_channels, + out_channels, + 3, + stride=stride, + padding=1, + bias=False, + groups=groups, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv(x) + x = self.bn(x) + x = self.relu(x) + return x + + +class LightConv3x3(nn.Module): + """Lightweight 3x3 convolution. + + 1x1 (linear) + dw 3x3 (nonlinear). + """ + + def __init__(self, in_channels, out_channels): + super(LightConv3x3, self).__init__() + self.conv1 = nn.Conv2d( + in_channels, out_channels, 1, stride=1, padding=0, bias=False + ) + self.conv2 = nn.Conv2d( + out_channels, + out_channels, + 3, + stride=1, + padding=1, + bias=False, + groups=out_channels, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.bn(x) + x = self.relu(x) + return x + + +########## +# Building blocks for omni-scale feature learning +########## +class ChannelGate(nn.Module): + """A mini-network that generates channel-wise gates conditioned on input tensor.""" + + def __init__( + self, + in_channels, + num_gates=None, + return_gates=False, + gate_activation="sigmoid", + reduction=16, + layer_norm=False, + ): + super(ChannelGate, self).__init__() + if num_gates is None: + num_gates = in_channels + self.return_gates = return_gates + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Conv2d( + in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0 + ) + self.norm1 = None + if layer_norm: + self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1)) + self.relu = nn.ReLU(inplace=True) + self.fc2 = nn.Conv2d( + in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0 + ) + if gate_activation == "sigmoid": + self.gate_activation = nn.Sigmoid() + elif gate_activation == "relu": + self.gate_activation = nn.ReLU(inplace=True) + elif gate_activation == "linear": + self.gate_activation = None + else: + raise RuntimeError("Unknown gate activation: {}".format(gate_activation)) + + def forward(self, x): + input = x + x = self.global_avgpool(x) + x = self.fc1(x) + if self.norm1 is not None: + x = self.norm1(x) + x = self.relu(x) + x = self.fc2(x) + if self.gate_activation is not None: + x = self.gate_activation(x) + if self.return_gates: + return x + return input * x + + +class OSBlock(nn.Module): + """Omni-scale feature learning block.""" + + def __init__( + self, in_channels, out_channels, IN=False, bottleneck_reduction=4, **kwargs + ): + super(OSBlock, self).__init__() + mid_channels = out_channels // bottleneck_reduction + self.conv1 = Conv1x1(in_channels, mid_channels) + self.conv2a = LightConv3x3(mid_channels, mid_channels) + self.conv2b = nn.Sequential( + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + ) + self.conv2c = nn.Sequential( + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + ) + self.conv2d = nn.Sequential( + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + LightConv3x3(mid_channels, mid_channels), + ) + self.gate = ChannelGate(mid_channels) + self.conv3 = Conv1x1Linear(mid_channels, out_channels) + self.downsample = None + if in_channels != out_channels: + self.downsample = Conv1x1Linear(in_channels, out_channels) + self.IN = None + if IN: + self.IN = nn.InstanceNorm2d(out_channels, affine=True) + + def forward(self, x): + identity = x + x1 = self.conv1(x) + x2a = self.conv2a(x1) + x2b = self.conv2b(x1) + x2c = self.conv2c(x1) + x2d = self.conv2d(x1) + x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d) + x3 = self.conv3(x2) + if self.downsample is not None: + identity = self.downsample(identity) + out = x3 + identity + if self.IN is not None: + out = self.IN(out) + return F.relu(out) + + +########## +# Network architecture +########## +class OSNet(nn.Module): + """Omni-Scale Network. + + Reference: + - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019. + - Zhou et al. Learning Generalisable Omni-Scale Representations + for Person Re-Identification. TPAMI, 2021. + """ + + def __init__( + self, + num_classes, + blocks, + layers, + channels, + feature_dim=512, + loss="softmax", + IN=False, + **kwargs, + ): + super(OSNet, self).__init__() + num_blocks = len(blocks) + assert num_blocks == len(layers) + assert num_blocks == len(channels) - 1 + self.loss = loss + self.feature_dim = feature_dim + + # convolutional backbone + self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN) + self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) + self.conv2 = self._make_layer( + blocks[0], + layers[0], + channels[0], + channels[1], + reduce_spatial_size=True, + IN=IN, + ) + self.conv3 = self._make_layer( + blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True + ) + self.conv4 = self._make_layer( + blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False + ) + self.conv5 = Conv1x1(channels[3], channels[3]) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + # fully connected layer + self.fc = self._construct_fc_layer( + self.feature_dim, channels[3], dropout_p=None + ) + # identity classification layer + self.classifier = nn.Linear(self.feature_dim, num_classes) + + self._init_params() + + def _make_layer( + self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False + ): + layers = [] + + layers.append(block(in_channels, out_channels, IN=IN)) + for i in range(1, layer): + layers.append(block(out_channels, out_channels, IN=IN)) + + if reduce_spatial_size: + layers.append( + nn.Sequential( + Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2) + ) + ) + + return nn.Sequential(*layers) + + def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): + if fc_dims is None or fc_dims < 0: + self.feature_dim = input_dim + return None + + if isinstance(fc_dims, int): + fc_dims = [fc_dims] + + layers = [] + for dim in fc_dims: + layers.append(nn.Linear(input_dim, dim)) + layers.append(nn.BatchNorm1d(dim)) + layers.append(nn.ReLU(inplace=True)) + if dropout_p is not None: + layers.append(nn.Dropout(p=dropout_p)) + input_dim = dim + + self.feature_dim = fc_dims[-1] + + return nn.Sequential(*layers) + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def featuremaps(self, x): + x = self.conv1(x) + x = self.maxpool(x) + x = self.conv2(x) + x = self.conv3(x) + x = self.conv4(x) + x = self.conv5(x) + return x + + def forward(self, x, return_featuremaps=False): + x = self.featuremaps(x) + if return_featuremaps: + return x + v = self.global_avgpool(x) + v = v.view(v.size(0), -1) + if self.fc is not None: + v = self.fc(v) + if not self.training: + return v + y = self.classifier(v) + if self.loss == "softmax": + return y + elif self.loss == "triplet": + return y, v + else: + raise KeyError("Unsupported loss: {}".format(self.loss)) + + +def init_pretrained_weights(model, key=""): + """Initializes model with pretrained weights. + + Layers that don't match with pretrained layers in name or size are kept unchanged. + """ + import errno + import os + from collections import OrderedDict + + import gdown + + def _get_torch_home(): + ENV_TORCH_HOME = "TORCH_HOME" + ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME" + DEFAULT_CACHE_DIR = "~/.cache" + torch_home = os.path.expanduser( + os.getenv( + ENV_TORCH_HOME, + os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"), + ) + ) + return torch_home + + torch_home = _get_torch_home() + model_dir = os.path.join(torch_home, "checkpoints") + try: + os.makedirs(model_dir) + except OSError as e: + if e.errno == errno.EEXIST: + # Directory already exists, ignore. + pass + else: + # Unexpected OSError, re-raise. + raise + filename = key + "_imagenet.pth" + cached_file = os.path.join(model_dir, filename) + + if not os.path.exists(cached_file): + gdown.download(pretrained_urls[key], cached_file, quiet=False) + + state_dict = torch.load(cached_file) + model_dict = model.state_dict() + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + + for k, v in state_dict.items(): + if k.startswith("module."): + k = k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + warnings.warn( + 'The pretrained weights from "{}" cannot be loaded, ' + "please check the key names manually " + "(** ignored and continue **)".format(cached_file) + ) + else: + print( + 'Successfully loaded imagenet pretrained weights from "{}"'.format( + cached_file + ) + ) + if len(discarded_layers) > 0: + print( + "** The following layers are discarded " + "due to unmatched keys or layer size: {}".format(discarded_layers) + ) + + +########## +# Instantiation +########## +def osnet_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs): + # standard size (width x1.0) + model = OSNet( + num_classes, + blocks=[OSBlock, OSBlock, OSBlock], + layers=[2, 2, 2], + channels=[64, 256, 384, 512], + loss=loss, + **kwargs, + ) + if pretrained: + init_pretrained_weights(model, key="osnet_x1_0") + return model + + +def osnet_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs): + # medium size (width x0.75) + model = OSNet( + num_classes, + blocks=[OSBlock, OSBlock, OSBlock], + layers=[2, 2, 2], + channels=[48, 192, 288, 384], + loss=loss, + **kwargs, + ) + if pretrained: + init_pretrained_weights(model, key="osnet_x0_75") + return model + + +def osnet_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs): + # tiny size (width x0.5) + model = OSNet( + num_classes, + blocks=[OSBlock, OSBlock, OSBlock], + layers=[2, 2, 2], + channels=[32, 128, 192, 256], + loss=loss, + **kwargs, + ) + if pretrained: + init_pretrained_weights(model, key="osnet_x0_5") + return model + + +def osnet_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs): + # very tiny size (width x0.25) + model = OSNet( + num_classes, + blocks=[OSBlock, OSBlock, OSBlock], + layers=[2, 2, 2], + channels=[16, 64, 96, 128], + loss=loss, + **kwargs, + ) + if pretrained: + init_pretrained_weights(model, key="osnet_x0_25") + return model + + +def osnet_ibn_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs): + # standard size (width x1.0) + IBN layer + # Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018. + model = OSNet( + num_classes, + blocks=[OSBlock, OSBlock, OSBlock], + layers=[2, 2, 2], + channels=[64, 256, 384, 512], + loss=loss, + IN=True, + **kwargs, + ) + if pretrained: + init_pretrained_weights(model, key="osnet_ibn_x1_0") + return model diff --git a/src/juxtapose/trackers/boxmot/appearance/reid_model_factory.py b/src/juxtapose/trackers/boxmot/appearance/reid_model_factory.py new file mode 100644 index 0000000..3290f67 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/appearance/reid_model_factory.py @@ -0,0 +1,214 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import sys +import time +from collections import OrderedDict + +import torch + +from juxtapose.trackers.boxmot.utils import logger as LOGGER + +__model_types = [ + "resnet50", + "resnet101", + "mlfn", + "hacnn", + "mobilenetv2_x1_0", + "mobilenetv2_x1_4", + "osnet_x1_0", + "osnet_x0_75", + "osnet_x0_5", + "osnet_x0_25", + "osnet_ibn_x1_0", + "osnet_ain_x1_0", + "lmbn_n", + "clip", +] + +lmbn_loc = "https://github.com/mikel-brostrom/yolov8_tracking/releases/download/v9.0/" + +__trained_urls = { + # resnet50 + "resnet50_market1501.pt": "https://drive.google.com/uc?id=1dUUZ4rHDWohmsQXCRe2C_HbYkzz94iBV", + "resnet50_dukemtmcreid.pt": "https://drive.google.com/uc?id=17ymnLglnc64NRvGOitY3BqMRS9UWd1wg", + "resnet50_msmt17.pt": "https://drive.google.com/uc?id=1ep7RypVDOthCRIAqDnn4_N-UhkkFHJsj", + "resnet50_fc512_market1501.pt": "https://drive.google.com/uc?id=1kv8l5laX_YCdIGVCetjlNdzKIA3NvsSt", + "resnet50_fc512_dukemtmcreid.pt": "https://drive.google.com/uc?id=13QN8Mp3XH81GK4BPGXobKHKyTGH50Rtx", + "resnet50_fc512_msmt17.pt": "https://drive.google.com/uc?id=1fDJLcz4O5wxNSUvImIIjoaIF9u1Rwaud", + # mlfn + "mlfn_market1501.pt": "https://drive.google.com/uc?id=1wXcvhA_b1kpDfrt9s2Pma-MHxtj9pmvS", + "mlfn_dukemtmcreid.pt": "https://drive.google.com/uc?id=1rExgrTNb0VCIcOnXfMsbwSUW1h2L1Bum", + "mlfn_msmt17.pt": "https://drive.google.com/uc?id=18JzsZlJb3Wm7irCbZbZ07TN4IFKvR6p-", + # hacnn + "hacnn_market1501.pt": "https://drive.google.com/uc?id=1LRKIQduThwGxMDQMiVkTScBwR7WidmYF", + "hacnn_dukemtmcreid.pt": "https://drive.google.com/uc?id=1zNm6tP4ozFUCUQ7Sv1Z98EAJWXJEhtYH", + "hacnn_msmt17.pt": "https://drive.google.com/uc?id=1MsKRtPM5WJ3_Tk2xC0aGOO7pM3VaFDNZ", + # mobilenetv2 + "mobilenetv2_x1_0_market1501.pt": "https://drive.google.com/uc?id=18DgHC2ZJkjekVoqBWszD8_Xiikz-fewp", + "mobilenetv2_x1_0_dukemtmcreid.pt": "https://drive.google.com/uc?id=1q1WU2FETRJ3BXcpVtfJUuqq4z3psetds", + "mobilenetv2_x1_0_msmt17.pt": "https://drive.google.com/uc?id=1j50Hv14NOUAg7ZeB3frzfX-WYLi7SrhZ", + "mobilenetv2_x1_4_market1501.pt": "https://drive.google.com/uc?id=1t6JCqphJG-fwwPVkRLmGGyEBhGOf2GO5", + "mobilenetv2_x1_4_dukemtmcreid.pt": "https://drive.google.com/uc?id=12uD5FeVqLg9-AFDju2L7SQxjmPb4zpBN", + "mobilenetv2_x1_4_msmt17.pt": "https://drive.google.com/uc?id=1ZY5P2Zgm-3RbDpbXM0kIBMPvspeNIbXz", + # osnet + "osnet_x1_0_market1501.pt": "https://drive.google.com/uc?id=1vduhq5DpN2q1g4fYEZfPI17MJeh9qyrA", + "osnet_x1_0_dukemtmcreid.pt": "https://drive.google.com/uc?id=1QZO_4sNf4hdOKKKzKc-TZU9WW1v6zQbq", + "osnet_x1_0_msmt17.pt": "https://drive.google.com/uc?id=112EMUfBPYeYg70w-syK6V6Mx8-Qb9Q1M", + "osnet_x0_75_market1501.pt": "https://drive.google.com/uc?id=1ozRaDSQw_EQ8_93OUmjDbvLXw9TnfPer", + "osnet_x0_75_dukemtmcreid.pt": "https://drive.google.com/uc?id=1IE3KRaTPp4OUa6PGTFL_d5_KQSJbP0Or", + "osnet_x0_75_msmt17.pt": "https://drive.google.com/uc?id=1QEGO6WnJ-BmUzVPd3q9NoaO_GsPNlmWc", + "osnet_x0_5_market1501.pt": "https://drive.google.com/uc?id=1PLB9rgqrUM7blWrg4QlprCuPT7ILYGKT", + "osnet_x0_5_dukemtmcreid.pt": "https://drive.google.com/uc?id=1KoUVqmiST175hnkALg9XuTi1oYpqcyTu", + "osnet_x0_5_msmt17.pt": "https://drive.google.com/uc?id=1UT3AxIaDvS2PdxzZmbkLmjtiqq7AIKCv", + "osnet_x0_25_market1501.pt": "https://drive.google.com/uc?id=1z1UghYvOTtjx7kEoRfmqSMu-z62J6MAj", + "osnet_x0_25_dukemtmcreid.pt": "https://drive.google.com/uc?id=1eumrtiXT4NOspjyEV4j8cHmlOaaCGk5l", + "osnet_x0_25_msmt17.pt": "https://drive.google.com/uc?id=1sSwXSUlj4_tHZequ_iZ8w_Jh0VaRQMqF", + # osnet_ain | osnet_ibn + "osnet_ibn_x1_0_msmt17.pt": "https://drive.google.com/uc?id=1q3Sj2ii34NlfxA4LvmHdWO_75NDRmECJ", + "osnet_ain_x1_0_msmt17.pt": "https://drive.google.com/uc?id=1SigwBE6mPdqiJMqhuIY4aqC7--5CsMal", + # lmbn + "lmbn_n_duke.pt": lmbn_loc + "lmbn_n_duke.pth", + "lmbn_n_market.pt": lmbn_loc + "lmbn_n_market.pth", + "lmbn_n_cuhk03_d.pt": lmbn_loc + "lmbn_n_cuhk03_d.pth", + # clip + "clip_market1501.pt": "https://drive.google.com/uc?id=1GnyAVeNOg3Yug1KBBWMKKbT2x43O5Ch7", + "clip_duke.pt": "https://drive.google.com/uc?id=1ldjSkj-7pXAWmx8on5x0EftlCaolU4dY", + "clip_veri.pt": "https://drive.google.com/uc?id=1RyfHdOBI2pan_wIGSim5-l6cM4S2WN8e", + "clip_vehicleid.pt": "https://drive.google.com/uc?id=1RyfHdOBI2pan_wIGSim5-l6cM4S2WN8e", +} + + +def show_downloadable_models(): + LOGGER.info("\nAvailable .pt ReID models for automatic download") + LOGGER.info(list(__trained_urls.keys())) + + +def get_model_url(model): + if model.name in __trained_urls: + return __trained_urls[model.name] + else: + None + + +def is_model_in_model_types(model): + if model.name in __model_types: + return True + else: + return False + + +def get_model_name(model): + for x in __model_types: + if x in model.name: + return x + return None + + +def download_url(url, dst): + """Downloads file from a url to a destination. + + Args: + url (str): url to download file. + dst (str): destination path. + """ + from six.moves import urllib + + LOGGER.info('* url="{}"'.format(url)) + LOGGER.info('* destination="{}"'.format(dst)) + + def _reporthook(count, block_size, total_size): + global start_time + if count == 0: + start_time = time.time() + return + duration = time.time() - start_time + progress_size = int(count * block_size) + speed = int(progress_size / (1024 * duration)) + percent = int(count * block_size * 100 / total_size) + sys.stdout.write( + "\r...%d%%, %d MB, %d KB/s, %d seconds passed" + % (percent, progress_size / (1024 * 1024), speed, duration) + ) + sys.stdout.flush() + + urllib.request.urlretrieve(url, dst, _reporthook) + sys.stdout.write("\n") + + +def load_pretrained_weights(model, weight_path): + r"""Loads pretrianed weights to model. + + Features:: + - Incompatible layers (unmatched in name or size) will be ignored. + - Can automatically deal with keys containing "module.". + + Args: + model (nn.Module): network model. + weight_path (str): path to pretrained weights. + + Examples:: + >>> from juxtapose.trackers.boxmot.appearance.backbones import build_model + >>> from juxtapose.trackers.boxmot.appearance.reid_model_factory import load_pretrained_weights + >>> weight_path = 'log/my_model/model-best.pth.tar' + >>> model = build_model() + >>> load_pretrained_weights(model, weight_path) + """ + + if not torch.cuda.is_available(): + checkpoint = torch.load(weight_path, map_location=torch.device("cpu")) + else: + checkpoint = torch.load(weight_path) + + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + else: + state_dict = checkpoint + + model_dict = model.state_dict() + + if "lmbn" in str(weight_path): + model.load_state_dict(model_dict, strict=True) + elif "clip" in str(weight_path): + + def forward_override(self, x: torch.Tensor, cv_emb=None, old_forward=None): + _, image_features, image_features_proj = old_forward(x, cv_emb) + return torch.cat([image_features[:, 0], image_features_proj[:, 0]], dim=1) + + # print('model.load_param(str(weight_path))', str(weight_path)) + model.load_param(str(weight_path)) + model = model.image_encoder + # old_forward = model.forward + # model.forward = lambda *args, **kwargs: forward_override(model, old_forward=old_forward, *args, **kwargs) + LOGGER.success(f'Successfully loaded pretrained weights from "{weight_path}"') + else: + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + + for k, v in state_dict.items(): + if k.startswith("module."): + k = k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + LOGGER.warning( + f'The pretrained weights "{weight_path}" cannot be loaded, ' + "please check the key names manually " + "(** ignored and continue **)" + ) + else: + LOGGER.success( + f'Successfully loaded pretrained weights from "{weight_path}"' + ) + if len(discarded_layers) > 0: + LOGGER.warning( + "The following layers are discarded " + f"due to unmatched keys or layer size: {*discarded_layers,}" + ) diff --git a/src/juxtapose/trackers/boxmot/appearance/reid_multibackend.py b/src/juxtapose/trackers/boxmot/appearance/reid_multibackend.py new file mode 100644 index 0000000..25b7306 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/appearance/reid_multibackend.py @@ -0,0 +1,324 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from collections import OrderedDict, namedtuple +from os.path import exists as file_exists +from pathlib import Path + +import cv2 +import gdown +import numpy as np +import torch +import torch.nn as nn + +from juxtapose.trackers.boxmot.appearance.backbones import ( + build_model, + get_nr_classes, +) +from juxtapose.trackers.boxmot.appearance.reid_model_factory import ( + get_model_name, + get_model_url, + load_pretrained_weights, + show_downloadable_models, +) +from juxtapose.trackers.boxmot.utils import logger as LOGGER + +from juxtapose.trackers.boxmot.utils.checks import TestRequirements + +tr = TestRequirements() + + +def check_suffix(file="osnet_x0_25_msmt17.pt", suffix=(".pt",), msg=""): + # Check file(s) for acceptable suffix + if file and suffix: + if isinstance(suffix, str): + suffix = [suffix] + for f in file if isinstance(file, (list, tuple)) else [file]: + s = Path(f).suffix.lower() # file suffix + if len(s): + try: + assert s in suffix + except AssertionError as err: + LOGGER.error(f"{err}{f} acceptable suffix is {suffix}") + + +class ReIDDetectMultiBackend(nn.Module): + # ReID models MultiBackend class for python inference on various backends + def __init__( + self, weights="osnet_x0_25_msmt17.pt", device=torch.device("cpu"), fp16=False + ): + super().__init__() + + w = weights[0] if isinstance(weights, list) else weights + ( + self.pt, + self.jit, + self.onnx, + self.xml, + self.engine, + self.tflite, + ) = self.model_type(w) # get backend + self.fp16 = fp16 + self.fp16 &= self.pt or self.jit or self.engine # FP16 + self.device = device + self.nhwc = self.tflite # activate bhwc --> bcwh + + model_name = get_model_name(w) + + if w.suffix == ".pt": + model_url = get_model_url(w) + if not file_exists(w) and model_url is not None: + gdown.download(model_url, str(w), quiet=False) + elif file_exists(w): + pass + else: + LOGGER.error( + f"No URL associated to the chosen StrongSORT weights ({w}). Choose between:" + ) + show_downloadable_models() + exit() + + # Build model + self.model = build_model( + model_name, + num_classes=get_nr_classes(w), + pretrained=not (w and w.is_file()), + use_gpu=device, + ) + + if self.pt: # PyTorch + # populate model arch with weights + if w and w.is_file() and w.suffix == ".pt": + load_pretrained_weights(self.model, w) + self.model.to(device).eval() + self.model.half() if self.fp16 else self.model.float() + elif self.jit: + LOGGER.info(f"Loading {w} for TorchScript inference...") + self.model = torch.jit.load(w) + self.model.half() if self.fp16 else self.model.float() + elif self.onnx: # ONNX Runtime + LOGGER.info(f"Loading {w} for ONNX Runtime inference...") + cuda = torch.cuda.is_available() and device.type != "cpu" + tr.check_packages( + ( + "onnx", + "onnxruntime-gpu" if cuda else "onnxruntime", + ) + ) + import onnxruntime + + providers = ( + ["CUDAExecutionProvider", "CPUExecutionProvider"] + if cuda + else ["CPUExecutionProvider"] + ) + self.session = onnxruntime.InferenceSession(str(w), providers=providers) + elif self.engine: # TensorRT + LOGGER.info(f"Loading {w} for TensorRT inference...") + tr.check_packages(("nvidia-tensorrt",)) + import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download + + if device.type == "cpu": + device = torch.device("cuda:0") + Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr")) + logger = trt.Logger(trt.Logger.INFO) + with open(w, "rb") as f, trt.Runtime(logger) as runtime: + self.model_ = runtime.deserialize_cuda_engine(f.read()) + self.context = self.model_.create_execution_context() + self.bindings = OrderedDict() + self.fp16 = False # default updated below + # dynamic = False + for index in range(self.model_.num_bindings): + name = self.model_.get_binding_name(index) + dtype = trt.nptype(self.model_.get_binding_dtype(index)) + if self.model_.binding_is_input(index): + if -1 in tuple(self.model_.get_binding_shape(index)): # dynamic + # dynamic = True + self.context.set_binding_shape( + index, tuple(self.model_.get_profile_shape(0, index)[2]) + ) + if dtype == np.float16: + self.fp16 = True + shape = tuple(self.context.get_binding_shape(index)) + im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) + self.bindings[name] = Binding( + name, dtype, shape, im, int(im.data_ptr()) + ) + self.binding_addrs = OrderedDict( + (n, d.ptr) for n, d in self.bindings.items() + ) + # batch_size = self.bindings["images"].shape[ + # 0 + # ] # if dynamic, this is instead max batch size + elif self.xml: # OpenVINO + LOGGER.info(f"Loading {w} for OpenVINO inference...") + try: + # requires openvino-dev: https://pypi.org/project/openvino-dev/ + from openvino.runtime import Core, Layout + except ImportError: + LOGGER.error( + f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n" + "requires openvino pip package to be installed!\n" + "$ pip install openvino-dev>=2022.3\n" + ) + ie = Core() + if not Path(w).is_file(): # if not *.xml + w = next( + Path(w).glob("*.xml") + ) # get *.xml file from *_openvino_model dir + network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin")) + if network.get_parameters()[0].get_layout().empty: + network.get_parameters()[0].set_layout(Layout("NCWH")) + self.executable_network = ie.compile_model( + network, device_name="CPU" + ) # device_name="MYRIAD" for Intel NCS2 + self.output_layer = next(iter(self.executable_network.outputs)) + + elif self.tflite: + LOGGER.info(f"Loading {w} for TensorFlow Lite inference...") + import tensorflow as tf + + interpreter = tf.lite.Interpreter(model_path=str(w)) + try: + self.tf_lite_model = interpreter.get_signature_runner() + except Exception as e: + LOGGER.error( + f"{e}. If SignatureDef error. Export you model with the official onn2tf docker" + ) + exit() + else: + LOGGER.error("This model framework is not supported yet!") + exit() + + @staticmethod + def model_type(p="path/to/model.pt"): + # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx + from . import export_formats + + sf = list(export_formats().Suffix) # export suffixes + check_suffix(p, sf) # checks + types = [s in Path(p).name for s in sf] + return types + + def preprocess(self, xyxys, img): + crops = [] + # dets are of different sizes so batch preprocessing is not possible + for box in xyxys: + x1, y1, x2, y2 = box.astype("int") + crop = img[y1:y2, x1:x2] + # resize + crop = cv2.resize( + crop, + (128, 256), # from (x, y) to (128, 256) | (w, h) + interpolation=cv2.INTER_LINEAR, + ) + + # (cv2) BGR 2 (PIL) RGB. The ReID models have been trained with this channel order + crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB) + + # normalization + crop = crop / 255 + + # standardization (RGB channel order) + crop = crop - np.array([0.485, 0.456, 0.406]) + crop = crop / np.array([0.229, 0.224, 0.225]) + + crop = torch.from_numpy(crop).float() + crops.append(crop) + + crops = torch.stack(crops, dim=0) + crops = torch.permute(crops, (0, 3, 1, 2)) + crops = crops.to( + dtype=torch.half if self.fp16 else torch.float, device=self.device + ) + + return crops + + def forward(self, im_batch): + # batch to half + if self.fp16 and im_batch.dtype != torch.float16: + im_batch = im_batch.half() + + # torch BCHW to numpy BHWC + if self.nhwc: + im_batch = im_batch.permute(0, 2, 3, 1) + + # batch processing + features = [] + if self.pt: + features = self.model(im_batch) + elif self.jit: # TorchScript + features = self.model(im_batch) + elif self.onnx: # ONNX Runtime + im_batch = im_batch.cpu().numpy() # torch to numpy + features = self.session.run( + [self.session.get_outputs()[0].name], + {self.session.get_inputs()[0].name: im_batch}, + )[0] + elif self.tflite: + im_batch = im_batch.cpu().numpy() + inputs = { + "images": im_batch, + } + tf_lite_output = self.tf_lite_model(**inputs) + features = tf_lite_output["output"] + + elif self.engine: # TensorRT + if True and im_batch.shape != self.bindings["images"].shape: + i_in, i_out = ( + self.model_.get_binding_index(x) for x in ("images", "output") + ) + self.context.set_binding_shape( + i_in, im_batch.shape + ) # reshape if dynamic + self.bindings["images"] = self.bindings["images"]._replace( + shape=im_batch.shape + ) + self.bindings["output"].data.resize_( + tuple(self.context.get_binding_shape(i_out)) + ) + s = self.bindings["images"].shape + assert ( + im_batch.shape == s + ), f"input size {im_batch.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" + self.binding_addrs["images"] = int(im_batch.data_ptr()) + self.context.execute_v2(list(self.binding_addrs.values())) + features = self.bindings["output"].data + elif self.xml: # OpenVINO + im_batch = im_batch.cpu().numpy() # FP32 + features = self.executable_network([im_batch])[self.output_layer] + else: + LOGGER.error( + "Framework not supported at the moment, leave an enhancement suggestion" + ) + exit() + + if isinstance(features, (list, tuple)): + return ( + self.to_numpy(features[0]) + if len(features) == 1 + else [self.to_numpy(x) for x in features] + ) + else: + return self.to_numpy(features) + + def to_numpy(self, x): + # return x.cpu().numpy() if isinstance(x, torch.Tensor) else x + return x.cpu().detach().numpy() if isinstance(x, torch.Tensor) else x + + def warmup(self, imgsz=[(256, 128, 3)]): + # warmup model by running inference once + # if self.device.type != "cpu": + if self.device != "cpu": + im = np.random.randint(0, 255, *imgsz, dtype=np.uint8) + im = self.preprocess(xyxys=np.array([[0, 0, 128, 256]]), img=im) + self.forward(im) # warmup + + @torch.no_grad() + def get_features(self, xyxys, img): + if xyxys.size != 0: + crops = self.preprocess(xyxys, img) + features = self.forward(crops) + else: + features = np.array([]) + features = features / np.linalg.norm(features) + return features diff --git a/src/juxtapose/trackers/boxmot/configs/__init__.py b/src/juxtapose/trackers/boxmot/configs/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/configs/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/configs/botsort.yaml b/src/juxtapose/trackers/boxmot/configs/botsort.yaml new file mode 100644 index 0000000..4715da8 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/configs/botsort.yaml @@ -0,0 +1,12 @@ +# Trial number: 232 +# HOTA, MOTA, IDF1: [45.31] +appearance_thresh: 0.4818211117541298 +cmc_method: sparseOptFlow +conf_thres: 0.3501265956918775 +frame_rate: 30 +lambda_: 0.9896143462366406 +match_thresh: 0.22734550911325851 +new_track_thresh: 0.21144301345190655 +proximity_thresh: 0.5945380911899254 +track_buffer: 60 +track_high_thresh: 0.33824964456239337 diff --git a/src/juxtapose/trackers/boxmot/configs/bytetrack.yaml b/src/juxtapose/trackers/boxmot/configs/bytetrack.yaml new file mode 100644 index 0000000..fe65ecf --- /dev/null +++ b/src/juxtapose/trackers/boxmot/configs/bytetrack.yaml @@ -0,0 +1,5 @@ +track_thresh: 0.5 # tracking confidence threshold, should be the same as model conf +track_buffer: 30 # the frames for keep lost tracks +match_thresh: 0.8 # matching threshold for tracking +frame_rate: 30 # FPS +conf_thres: 0.5122620708221085 diff --git a/src/juxtapose/trackers/boxmot/configs/deepocsort.yaml b/src/juxtapose/trackers/boxmot/configs/deepocsort.yaml new file mode 100644 index 0000000..2028cda --- /dev/null +++ b/src/juxtapose/trackers/boxmot/configs/deepocsort.yaml @@ -0,0 +1,17 @@ +# Trial number: 0 +# HOTA, MOTA, IDF1: [27.884, 11.425, 21.545] +alpha_fixed_emb: 0.95 +asso_func: giou +aw_off: false +aw_param: 0.5 +cmc_off: false +conf: 0.5 +delta_t: 3 +det_thresh: 0 +embedding_off: false +inertia: 0.2 +iou_thresh: 0.3 +max_age: 30 +min_hits: 1 +new_kf_off: false +w_association_emb: 0.75 diff --git a/src/juxtapose/trackers/boxmot/configs/hybridsort.yaml b/src/juxtapose/trackers/boxmot/configs/hybridsort.yaml new file mode 100644 index 0000000..8d7a471 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/configs/hybridsort.yaml @@ -0,0 +1,9 @@ +asso_func: giou +conf_thres: 0.5122620708221085 +delta_t: 3 +det_thresh: 0 +inertia: 0.2 +iou_thresh: 0.3 +max_age: 30 +min_hits: 1 +use_byte: false diff --git a/src/juxtapose/trackers/boxmot/configs/ocsort.yaml b/src/juxtapose/trackers/boxmot/configs/ocsort.yaml new file mode 100644 index 0000000..138f353 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/configs/ocsort.yaml @@ -0,0 +1,11 @@ +# Trial number: 137 +# HOTA, MOTA, IDF1: [55.567] +asso_func: giou +conf_thres: 0.5122620708221085 +delta_t: 3 +det_thresh: 0 +inertia: 0.2 +iou_thresh: 0.3 +max_age: 30 +min_hits: 1 +use_byte: false diff --git a/src/juxtapose/trackers/boxmot/configs/strongsort.yaml b/src/juxtapose/trackers/boxmot/configs/strongsort.yaml new file mode 100644 index 0000000..a57e9d2 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/configs/strongsort.yaml @@ -0,0 +1,9 @@ +ecc: true +ema_alpha: 0.8 +max_age: 30 +max_dist: 0.2 +max_iou_dist: 0.7 +mc_lambda: 0.995 +n_init: 1 +nn_budget: 100 +conf_thres: 0.5122620708221085 diff --git a/src/juxtapose/trackers/boxmot/motion/__init__.py b/src/juxtapose/trackers/boxmot/motion/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/motion/cmc/__init__.py b/src/juxtapose/trackers/boxmot/motion/cmc/__init__.py new file mode 100644 index 0000000..0f1bddf --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/cmc/__init__.py @@ -0,0 +1,19 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from juxtapose.trackers.boxmot.motion.cmc.ecc import ECC +from juxtapose.trackers.boxmot.motion.cmc.orb import ORB +from juxtapose.trackers.boxmot.motion.cmc.sift import SIFT +from juxtapose.trackers.boxmot.motion.cmc.sof import SparseOptFlow + + +def get_cmc_method(cmc_method): + if cmc_method == "ecc": + return ECC + elif cmc_method == "orb": + return ORB + elif cmc_method == "sof": + return SparseOptFlow + elif cmc_method == "sift": + return SIFT + else: + return None diff --git a/src/juxtapose/trackers/boxmot/motion/cmc/cmc_interface.py b/src/juxtapose/trackers/boxmot/motion/cmc/cmc_interface.py new file mode 100644 index 0000000..3b96e82 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/cmc/cmc_interface.py @@ -0,0 +1,21 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + + +class CMCInterface: + + def apply(self, im): + raise NotImplementedError('Subclasses must implement this method.') + + def generate_mask(self, img, dets, scale): + h, w = img.shape + mask = np.zeros_like(img) + + mask[int(0.02 * h): int(0.98 * h), int(0.02 * w): int(0.98 * w)] = 255 + if dets is not None: + for det in dets: + tlbr = np.multiply(det, scale).astype(int) + mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0 + + return mask diff --git a/src/juxtapose/trackers/boxmot/motion/cmc/ecc.py b/src/juxtapose/trackers/boxmot/motion/cmc/ecc.py new file mode 100644 index 0000000..14b12eb --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/cmc/ecc.py @@ -0,0 +1,159 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import time + +import cv2 +import numpy as np + +from juxtapose.trackers.boxmot.motion.cmc.cmc_interface import CMCInterface +from juxtapose.trackers.boxmot.utils import BOXMOT +from juxtapose.trackers.boxmot.utils import logger as LOGGER + + +class ECC(CMCInterface): + def __init__( + self, + warp_mode=cv2.MOTION_EUCLIDEAN, + eps=1e-5, + max_iter=100, + scale=0.1, + align=False, + grayscale=True, + ): + """Compute the warp matrix from src to dst. + + Parameters + ---------- + warp_mode: opencv flag + translation: cv2.MOTION_TRANSLATION + rotated and shifted: cv2.MOTION_EUCLIDEAN + affine(shift,rotated,shear): cv2.MOTION_AFFINE + homography(3d): cv2.MOTION_HOMOGRAPHY + eps: float + the threshold of the increment in the correlation coefficient between two iterations + max_iter: int + the number of iterations. + scale: float or [int, int] + scale_ratio: float + scale_size: [W, H] + align: bool + whether to warp affine or perspective transforms to the source image + grayscale: bool + whether to transform 3 channel RGB to single channel grayscale for faster computations + + Returns + ------- + warp matrix : ndarray + Returns the warp matrix from src to dst. + if motion models is homography, the warp matrix will be 3x3, otherwise 2x3 + src_aligned: ndarray + aligned source image of gray + """ + self.align = align + self.grayscale = grayscale + self.scale = scale + self.warp_mode = warp_mode + self.termination_criteria = ( + cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, + max_iter, + eps, + ) + self.prev_img = None + + def preprocess(self, img): + # bgr2gray + if self.grayscale: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # resize + if self.scale is not None: + img = cv2.resize( + img, + (0, 0), + fx=self.scale, + fy=self.scale, + interpolation=cv2.INTER_LINEAR, + ) + + return img + + def apply(self, curr_img, dets): + if self.warp_mode == cv2.MOTION_HOMOGRAPHY: + warp_matrix = np.eye(3, 3, dtype=np.float32) + else: + warp_matrix = np.eye(2, 3, dtype=np.float32) + + if self.prev_img is None: + self.prev_img = self.preprocess(curr_img) + return warp_matrix + + curr_img = self.preprocess(curr_img) + + try: + (ret_val, warp_matrix) = cv2.findTransformECC( + self.prev_img, # already processed + curr_img, + warp_matrix, + self.warp_mode, + self.termination_criteria, + None, + 1, + ) + except Exception as e: + LOGGER.warning( + f"Affine matrix could not be generated: {e}. Returning identity" + ) + return warp_matrix + + # upscale warp matrix to original images size + if self.scale < 1: + warp_matrix[0, 2] /= self.scale + warp_matrix[1, 2] /= self.scale + + if self.align: + h, w = self.prev_img.shape + if self.warp_mode == cv2.MOTION_HOMOGRAPHY: + # Use warpPerspective for Homography + self.prev_img_aligned = cv2.warpPerspective( + self.prev_img, warp_matrix, (w, h), flags=cv2.INTER_LINEAR + ) + else: + # Use warpAffine for Translation, Euclidean and Affine + self.prev_img_aligned = cv2.warpAffine( + self.prev_img, warp_matrix, (w, h), flags=cv2.INTER_LINEAR + ) + else: + self.prev_img_aligned = None + + self.prev_img = curr_img + + return warp_matrix # , prev_img_aligned + + +def main(): + ecc = ECC(scale=0.5, align=True, grayscale=True) + curr_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000005.jpg") + prev_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000001.jpg") + + warp_matrix = ecc.apply(prev_img, None) + warp_matrix = ecc.apply(curr_img, None) + + start = time.process_time() + for i in range(0, 100): + warp_matrix = ecc.apply(prev_img, None) + warp_matrix = ecc.apply(curr_img, None) + end = time.process_time() + print("Total time", end - start) + print(warp_matrix) + + if ecc.prev_img_aligned is not None: + curr_img = ecc.preprocess(curr_img) + prev_img = ecc.preprocess(prev_img) + weighted_img = cv2.addWeighted(curr_img, 0.5, ecc.prev_img_aligned, 0.5, 0) + cv2.imshow("prev_img_aligned", weighted_img) + cv2.waitKey(0) + cv2.imwrite(str(BOXMOT / "motion/cmc/ecc_aligned.jpg"), weighted_img) + + +if __name__ == "__main__": + main() diff --git a/src/juxtapose/trackers/boxmot/motion/cmc/orb.py b/src/juxtapose/trackers/boxmot/motion/cmc/orb.py new file mode 100644 index 0000000..f68631f --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/cmc/orb.py @@ -0,0 +1,281 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import copy +import time + +import cv2 +import numpy as np + +from juxtapose.trackers.boxmot.motion.cmc.cmc_interface import CMCInterface +from juxtapose.trackers.boxmot.utils import BOXMOT + + +class ORB(CMCInterface): + def __init__( + self, + feature_detector_threshold=20, + matcher_norm_type=cv2.NORM_HAMMING, + scale=0.1, + grayscale=True, + draw_keypoint_matches=False, + align=False, + ): + """Compute the warp matrix from src to dst. + + Parameters + ---------- + matcher_norm_type: opencv flag + NORM_L1, NORM_L2, NORM_HAMMING, NORM_HAMMING2. L1 and L2 norms are preferable + choices for SIFT and SURF descriptors, NORM_HAMMING should be used with + ORB, BRISK and BRIEF, NORM_HAMMING2 should be used with ORB when WTA_K==3 or 4 + feature_detector_threshold: int + the threshold for feature extraction + scale: float + scale_ratio: float + grayscale: bool + whether to transform 3 channel RGB to single channel grayscale for faster computations + + Returns + ------- + warp matrix : ndarray + Returns the warp matrix from the matching keypoint in the previous image to the current + warp matrix is always 2x3 + """ + self.grayscale = grayscale + self.scale = scale + + self.detector = cv2.FastFeatureDetector_create( + threshold=feature_detector_threshold + ) + self.extractor = cv2.ORB_create() + self.matcher = cv2.BFMatcher(matcher_norm_type) + + self.prev_img = None + self.draw_keypoint_matches = draw_keypoint_matches + self.align = align + + def preprocess(self, img): + # bgr2gray + if self.grayscale: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # resize + if self.scale is not None: + img = cv2.resize( + img, + (0, 0), + fx=self.scale, + fy=self.scale, + interpolation=cv2.INTER_LINEAR, + ) + + return img + + def apply(self, img, dets): + H = np.eye(2, 3) + + img = self.preprocess(img) + h, w = img.shape + + # generate dynamic object maks + mask = self.generate_mask(img, dets, self.scale) + + # find static keypoints + keypoints = self.detector.detect(img, mask) + + # compute the descriptors + keypoints, descriptors = self.extractor.compute(img, keypoints) + + # handle first frame + if self.prev_img is None: + # Initialize data + self.prev_dets = dets.copy() + self.prev_img = img.copy() + self.prev_keypoints = copy.copy(keypoints) + self.prev_descriptors = copy.copy(descriptors) + + return H + + # Match descriptors. + knnMatches = self.matcher.knnMatch(self.prev_descriptors, descriptors, k=2) + + # Handle empty matches case + if len(knnMatches) == 0: + # Store to next iteration + self.prev_img = img.copy() + self.prev_keypoints = copy.copy(keypoints) + self.prev_descriptors = copy.copy(descriptors) + + return H + + # filtered matches based on smallest spatial distance + matches = [] + spatial_distances = [] + max_spatial_distance = 0.25 * np.array([w, h]) + + for m, n in knnMatches: + if m.distance < 0.9 * n.distance: + prevKeyPointLocation = self.prev_keypoints[m.queryIdx].pt + currKeyPointLocation = keypoints[m.trainIdx].pt + + spatial_distance = ( + prevKeyPointLocation[0] - currKeyPointLocation[0], + prevKeyPointLocation[1] - currKeyPointLocation[1], + ) + + if (np.abs(spatial_distance[0]) < max_spatial_distance[0]) and ( + np.abs(spatial_distance[1]) < max_spatial_distance[1] + ): + spatial_distances.append(spatial_distance) + matches.append(m) + + mean_spatial_distances = np.mean(spatial_distances, 0) + std_spatial_distances = np.std(spatial_distances, 0) + + inliesrs = ( + spatial_distances - mean_spatial_distances + ) < 2.5 * std_spatial_distances + + goodMatches = [] + prevPoints = [] + currPoints = [] + for i in range(len(matches)): + if inliesrs[i, 0] and inliesrs[i, 1]: + goodMatches.append(matches[i]) + prevPoints.append(self.prev_keypoints[matches[i].queryIdx].pt) + currPoints.append(keypoints[matches[i].trainIdx].pt) + + prevPoints = np.array(prevPoints) + currPoints = np.array(currPoints) + + # draw keypoint matches on the output image + if self.draw_keypoint_matches: + self.prev_img[:, :][mask == True] = 0 # noqa:E712 + self.matches_img = np.hstack((self.prev_img, img)) + self.matches_img = cv2.cvtColor(self.matches_img, cv2.COLOR_GRAY2BGR) + + W = np.size(self.prev_img, 1) + for m in goodMatches: + prev_pt = np.array(self.prev_keypoints[m.queryIdx].pt, dtype=np.int_) + curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_) + curr_pt[0] += W + color = np.random.randint(0, 255, (3,)) + color = (int(color[0]), int(color[1]), int(color[2])) + self.matches_img = cv2.line( + self.matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA + ) + self.matches_img = cv2.circle( + self.matches_img, prev_pt, 2, tuple(color), -1 + ) + self.matches_img = cv2.circle( + self.matches_img, curr_pt, 2, tuple(color), -1 + ) + for det in dets: + det = np.multiply(det, self.scale).astype(int) + start = (det[0] + w, det[1]) + end = (det[2] + w, det[3]) + self.matches_img = cv2.rectangle( + self.matches_img, start, end, (0, 0, 255), 2 + ) + for det in self.prev_dets: + det = np.multiply(det, self.scale).astype(int) + start = (det[0], det[1]) + end = (det[2], det[3]) + self.matches_img = cv2.rectangle( + self.matches_img, start, end, (0, 0, 255), 2 + ) + else: + self.matches_img = None + + # find rigid matrix + if (np.size(prevPoints, 0) > 4) and ( + np.size(prevPoints, 0) == np.size(prevPoints, 0) + ): + H, inliesrs = cv2.estimateAffinePartial2D( + prevPoints, currPoints, cv2.RANSAC + ) + + # upscale warp matrix to original images size + if self.scale < 1.0: + H[0, 2] /= self.scale + H[1, 2] /= self.scale + + if self.align: + self.prev_img_aligned = cv2.warpAffine( + self.prev_img, H, (w, h), flags=cv2.INTER_LINEAR + ) + else: + print("Warning: not enough matching points") + + # Store to next iteration + self.prev_img = img.copy() + self.prev_keypoints = copy.copy(keypoints) + self.prev_descriptors = copy.copy(descriptors) + + return H + + +def main(): + orb = ORB(scale=0.5, align=True, grayscale=True, draw_keypoint_matches=False) + curr_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000005.jpg") + prev_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000001.jpg") + curr_dets = np.array( + [ + [1083.8207, 541.5978, 1195.7952, 655.8790], # noqa:E241 + [1635.6456, 563.8348, 1695.4153, 686.6704], # noqa:E241 + [957.0879, 545.6558, 1042.6743, 611.8740], # noqa:E241,E261,E201 + [1550.0317, 562.5705, 1600.3931, 684.7425], # noqa:E241 + [78.8801, 714.3307, 121.0272, 817.6857], # noqa:E241,E261,E201 + [1382.9938, 512.2731, 1418.6012, 620.1938], # noqa:E241 + [1459.7921, 496.2123, 1488.5767, 584.3533], # noqa:E241 + [982.9818, 492.8579, 1013.6625, 517.9271], # noqa:E241,E261,E201 + [496.1809, 541.3972, 531.4617, 638.0989], # noqa:E241,E261,E201 + [1498.8512, 522.6646, 1526.1145, 587.7672], # noqa:E241 + [536.4527, 548.4061, 569.2723, 635.5656], # noqa:E241,E261,E201 + [247.8834, 580.8851, 287.2241, 735.3685], # noqa:E241,E261,E201 + [151.4096, 572.3918, 203.5401, 731.1011], # noqa:E241,E261,E201 + [1227.4098, 440.5505, 1252.7986, 489.5295], + ] # noqa:E241 + ) + prev_dets = np.array( + [ + [2.1069e-02, 6.7026e02, 4.9816e01, 8.8407e02], + [1.0765e03, 5.4009e02, 1.1883e03, 6.5219e02], + [1.5208e03, 5.6322e02, 1.5711e03, 6.7676e02], + [1.6111e03, 5.5926e02, 1.6640e03, 6.7443e02], + [9.5244e02, 5.4681e02, 1.0384e03, 6.1180e02], + [1.3691e03, 5.1258e02, 1.4058e03, 6.1695e02], + [1.2043e02, 7.0780e02, 1.7309e02, 8.0518e02], + [1.4454e03, 5.0919e02, 1.4724e03, 5.8270e02], + [9.7848e02, 4.9563e02, 1.0083e03, 5.1980e02], + [5.0166e02, 5.4778e02, 5.3796e02, 6.3940e02], + [1.4777e03, 5.1856e02, 1.5105e03, 5.9523e02], + [1.9540e02, 5.7292e02, 2.3711e02, 7.2717e02], + [2.7373e02, 5.8564e02, 3.1335e02, 7.3281e02], + [5.4038e02, 5.4735e02, 5.7359e02, 6.3797e02], + [1.2190e03, 4.4176e02, 1.2414e03, 4.9038e02], + ] + ) + + warp_matrix = orb.apply(prev_img, prev_dets) + warp_matrix = orb.apply(curr_img, curr_dets) + + start = time.process_time() + for i in range(0, 100): + warp_matrix = orb.apply(prev_img, prev_dets) + warp_matrix = orb.apply(curr_img, curr_dets) + end = time.process_time() + print("Total time", end - start) + print(warp_matrix) + + if orb.prev_img_aligned is not None: + curr_img = orb.preprocess(curr_img) + prev_img = orb.preprocess(prev_img) + weighted_img = cv2.addWeighted(curr_img, 0.5, orb.prev_img_aligned, 0.5, 0) + cv2.imshow("prev_img_aligned", weighted_img) + cv2.waitKey(0) + cv2.imwrite(str(BOXMOT / "motion/cmc/orb_aligned.jpg"), weighted_img) + + +if __name__ == "__main__": + main() diff --git a/src/juxtapose/trackers/boxmot/motion/cmc/sift.py b/src/juxtapose/trackers/boxmot/motion/cmc/sift.py new file mode 100644 index 0000000..90d1993 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/cmc/sift.py @@ -0,0 +1,304 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import copy +import time + +import cv2 +import numpy as np + +from juxtapose.trackers.boxmot.motion.cmc.cmc_interface import CMCInterface +from juxtapose.trackers.boxmot.utils import BOXMOT + + +class SIFT(CMCInterface): + def __init__( + self, + warp_mode=cv2.MOTION_EUCLIDEAN, + eps=1e-5, + max_iter=100, + scale=0.1, + grayscale=True, + draw_keypoint_matches=False, + align=False, + ): + """Compute the warp matrix from src to dst. + + Parameters + ---------- + warp_mode: opencv flag + translation: cv2.MOTION_TRANSLATION + rotated and shifted: cv2.MOTION_EUCLIDEAN + affine(shift,rotated,shear): cv2.MOTION_AFFINE + homography(3d): cv2.MOTION_HOMOGRAPHY + eps: float + the threshold of the increment in the correlation coefficient between two iterations + max_iter: int + the number of iterations. + scale: float or [int, int] + scale_ratio: float + scale_size: [W, H] + align: bool + whether to warp affine or perspective transforms to the source image + grayscale: bool + whether to transform 3 channel RGB to single channel grayscale for faster computations + + Returns + ------- + warp matrix : ndarray + Returns the warp matrix from src to dst. + if motion models is homography, the warp matrix will be 3x3, otherwise 2x3 + src_aligned: ndarray + aligned source image of gray + """ + self.grayscale = grayscale + self.scale = scale + self.warp_mode = warp_mode + self.termination_criteria = ( + cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, + max_iter, + eps, + ) + if self.warp_mode == cv2.MOTION_HOMOGRAPHY: + self.warp_matrix = np.eye(3, 3, dtype=np.float32) + else: + self.warp_matrix = np.eye(2, 3, dtype=np.float32) + + self.detector = cv2.SIFT_create( + nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20 + ) + self.extractor = cv2.SIFT_create( + nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20 + ) + self.matcher = cv2.BFMatcher(cv2.NORM_L2) + + self.prev_img = None + self.minimum_features = 10 + self.prev_desc = None + self.draw_keypoint_matches = draw_keypoint_matches + self.align = align + + def preprocess(self, img): + # bgr2gray + if self.grayscale: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # resize + if self.scale is not None: + img = cv2.resize( + img, + (0, 0), + fx=self.scale, + fy=self.scale, + interpolation=cv2.INTER_LINEAR, + ) + + return img + + def apply(self, img, dets): + H = np.eye(2, 3) + + img = self.preprocess(img) + h, w = img.shape + + # generate dynamic object maks + mask = self.generate_mask(img, dets, self.scale) + + # find static keypoints + keypoints = self.detector.detect(img, mask) + + # compute the descriptors + keypoints, descriptors = self.extractor.compute(img, keypoints) + + # handle first frame + if self.prev_img is None: + # Initialize data + self.prev_dets = dets.copy() + self.prev_img = img.copy() + self.prev_keypoints = copy.copy(keypoints) + self.prev_descriptors = copy.copy(descriptors) + + return H + + # Match descriptors. + knnMatches = self.matcher.knnMatch(self.prev_descriptors, descriptors, k=2) + + # Handle empty matches case + if len(knnMatches) == 0: + # Store to next iteration + self.prev_img = img.copy() + self.prev_keypoints = copy.copy(keypoints) + self.prev_descriptors = copy.copy(descriptors) + + return H + + # filtered matches based on smallest spatial distance + matches = [] + spatial_distances = [] + max_spatial_distance = 0.25 * np.array([w, h]) + + for m, n in knnMatches: + if m.distance < 0.9 * n.distance: + prevKeyPointLocation = self.prev_keypoints[m.queryIdx].pt + currKeyPointLocation = keypoints[m.trainIdx].pt + + spatial_distance = ( + prevKeyPointLocation[0] - currKeyPointLocation[0], + prevKeyPointLocation[1] - currKeyPointLocation[1], + ) + + if (np.abs(spatial_distance[0]) < max_spatial_distance[0]) and ( + np.abs(spatial_distance[1]) < max_spatial_distance[1] + ): + spatial_distances.append(spatial_distance) + matches.append(m) + + mean_spatial_distances = np.mean(spatial_distances, 0) + std_spatial_distances = np.std(spatial_distances, 0) + + inliesrs = ( + spatial_distances - mean_spatial_distances + ) < 2.5 * std_spatial_distances + + goodMatches = [] + prevPoints = [] + currPoints = [] + for i in range(len(matches)): + if inliesrs[i, 0] and inliesrs[i, 1]: + goodMatches.append(matches[i]) + prevPoints.append(self.prev_keypoints[matches[i].queryIdx].pt) + currPoints.append(keypoints[matches[i].trainIdx].pt) + + prevPoints = np.array(prevPoints) + currPoints = np.array(currPoints) + + # Draw the keypoint matches on the output image + if self.draw_keypoint_matches: + self.prev_img[:, :][mask == True] = 0 # noqa:E712 + self.matches_img = np.hstack((self.prev_img, img)) + self.matches_img = cv2.cvtColor(self.matches_img, cv2.COLOR_GRAY2BGR) + + W = np.size(self.prev_img, 1) + for m in goodMatches: + prev_pt = np.array(self.prev_keypoints[m.queryIdx].pt, dtype=np.int_) + curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_) + curr_pt[0] += W + color = np.random.randint(0, 255, (3,)) + color = (int(color[0]), int(color[1]), int(color[2])) + self.matches_img = cv2.line( + self.matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA + ) + self.matches_img = cv2.circle( + self.matches_img, prev_pt, 2, tuple(color), -1 + ) + self.matches_img = cv2.circle( + self.matches_img, curr_pt, 2, tuple(color), -1 + ) + for det in dets: + det = np.multiply(det, self.scale).astype(int) + start = (det[0] + w, det[1]) + end = (det[2] + w, det[3]) + self.matches_img = cv2.rectangle( + self.matches_img, start, end, (0, 0, 255), 2 + ) + for det in self.prev_dets: + det = np.multiply(det, self.scale).astype(int) + start = (det[0], det[1]) + end = (det[2], det[3]) + self.matches_img = cv2.rectangle( + self.matches_img, start, end, (0, 0, 255), 2 + ) + else: + self.matches_img = None + + # find rigid matrix + if (np.size(prevPoints, 0) > 4) and ( + np.size(prevPoints, 0) == np.size(prevPoints, 0) + ): + H, inliesrs = cv2.estimateAffinePartial2D( + prevPoints, currPoints, cv2.RANSAC + ) + + # upscale warp matrix to original images size + if self.scale < 1.0: + H[0, 2] /= self.scale + H[1, 2] /= self.scale + + if self.align: + self.prev_img_aligned = cv2.warpAffine( + self.prev_img, H, (w, h), flags=cv2.INTER_LINEAR + ) + else: + print("Warning: not enough matching points") + + # Store to next iteration + self.prev_img = img.copy() + self.prev_keypoints = copy.copy(keypoints) + self.prev_descriptors = copy.copy(descriptors) + + return H + + +def main(): + sift = SIFT(scale=0.5, align=True, grayscale=True, draw_keypoint_matches=False) + curr_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000005.jpg") + prev_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000001.jpg") + curr_dets = np.array( + [ + [1083.8207, 541.5978, 1195.7952, 655.8790], # noqa:E241 + [1635.6456, 563.8348, 1695.4153, 686.6704], # noqa:E241 + [957.0879, 545.6558, 1042.6743, 611.8740], # noqa:E241,E261,E201 + [1550.0317, 562.5705, 1600.3931, 684.7425], # noqa:E241 + [78.8801, 714.3307, 121.0272, 817.6857], # noqa:E241,E261,E201 + [1382.9938, 512.2731, 1418.6012, 620.1938], # noqa:E241 + [1459.7921, 496.2123, 1488.5767, 584.3533], # noqa:E241 + [982.9818, 492.8579, 1013.6625, 517.9271], # noqa:E241,E261,E201 + [496.1809, 541.3972, 531.4617, 638.0989], # noqa:E241,E261,E201 + [1498.8512, 522.6646, 1526.1145, 587.7672], # noqa:E241 + [536.4527, 548.4061, 569.2723, 635.5656], # noqa:E241,E261,E201 + [247.8834, 580.8851, 287.2241, 735.3685], # noqa:E241,E261,E201 + [151.4096, 572.3918, 203.5401, 731.1011], # noqa:E241,E261,E201 + [1227.4098, 440.5505, 1252.7986, 489.5295], + ] # noqa:E241 + ) + prev_dets = np.array( + [ + [2.1069e-02, 6.7026e02, 4.9816e01, 8.8407e02], + [1.0765e03, 5.4009e02, 1.1883e03, 6.5219e02], + [1.5208e03, 5.6322e02, 1.5711e03, 6.7676e02], + [1.6111e03, 5.5926e02, 1.6640e03, 6.7443e02], + [9.5244e02, 5.4681e02, 1.0384e03, 6.1180e02], + [1.3691e03, 5.1258e02, 1.4058e03, 6.1695e02], + [1.2043e02, 7.0780e02, 1.7309e02, 8.0518e02], + [1.4454e03, 5.0919e02, 1.4724e03, 5.8270e02], + [9.7848e02, 4.9563e02, 1.0083e03, 5.1980e02], + [5.0166e02, 5.4778e02, 5.3796e02, 6.3940e02], + [1.4777e03, 5.1856e02, 1.5105e03, 5.9523e02], + [1.9540e02, 5.7292e02, 2.3711e02, 7.2717e02], + [2.7373e02, 5.8564e02, 3.1335e02, 7.3281e02], + [5.4038e02, 5.4735e02, 5.7359e02, 6.3797e02], + [1.2190e03, 4.4176e02, 1.2414e03, 4.9038e02], + ] + ) + + warp_matrix = sift.apply(prev_img, prev_dets) + warp_matrix = sift.apply(curr_img, curr_dets) + + start = time.process_time() + for i in range(0, 100): + warp_matrix = sift.apply(prev_img, prev_dets) + warp_matrix = sift.apply(curr_img, curr_dets) + end = time.process_time() + print("Total time", end - start) + print(warp_matrix) + + if sift.prev_img_aligned is not None: + curr_img = sift.preprocess(curr_img) + prev_img = sift.preprocess(prev_img) + weighted_img = cv2.addWeighted(curr_img, 0.5, sift.prev_img_aligned, 0.5, 0) + cv2.imshow("prev_img_aligned", weighted_img) + cv2.waitKey(0) + cv2.imwrite(str(BOXMOT / "motion/cmc/sift_aligned.jpg"), weighted_img) + + +if __name__ == "__main__": + main() diff --git a/src/juxtapose/trackers/boxmot/motion/cmc/sof.py b/src/juxtapose/trackers/boxmot/motion/cmc/sof.py new file mode 100644 index 0000000..10228da --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/cmc/sof.py @@ -0,0 +1,254 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import time + +import cv2 +import numpy as np + +from juxtapose.trackers.boxmot.motion.cmc.cmc_interface import CMCInterface +from juxtapose.trackers.boxmot.utils import BOXMOT +from juxtapose.trackers.boxmot.utils import logger as LOGGER + + +class SparseOptFlow(CMCInterface): + def __init__( + self, + warp_mode=cv2.MOTION_EUCLIDEAN, + eps=1e-5, + max_iter=100, + scale=0.1, + align=False, + grayscale=True, + draw_optical_flow=False, + ): + """Compute the warp matrix from src to dst. + + Parameters + ---------- + warp_mode: opencv flag + translation: cv2.MOTION_TRANSLATION + rotated and shifted: cv2.MOTION_EUCLIDEAN + affine(shift,rotated,shear): cv2.MOTION_AFFINE + homography(3d): cv2.MOTION_HOMOGRAPHY + eps: float + the threshold of the increment in the correlation coefficient between two iterations + max_iter: int + the number of iterations. + scale: float or [int, int] + scale_ratio: float + scale_size: [W, H] + align: bool + whether to warp affine or perspective transforms to the source image + grayscale: bool + whether to transform 3 channel RGB to single channel grayscale for faster computations + + Returns + ------- + warp matrix : ndarray + Returns the warp matrix from src to dst. + if motion models is homography, the warp matrix will be 3x3, otherwise 2x3 + src_aligned: ndarray + aligned source image of gray + """ + self.align = align + self.grayscale = grayscale + self.scale = scale + self.prev_img = None + self.draw_optical_flow = draw_optical_flow + + self.detector = cv2.FastFeatureDetector_create(threshold=20) + self.extractor = cv2.ORB_create(nfeatures=5) + self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING) + + def preprocess(self, img): + # bgr2gray + if self.grayscale: + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + + # resize + if self.scale is not None: + img = cv2.resize( + img, + (0, 0), + fx=self.scale, + fy=self.scale, + interpolation=cv2.INTER_LINEAR, + ) + + return img + + def apply(self, img, dets): + H = np.eye(2, 3) + + img = self.preprocess(img) + + h, w = img.shape + + # Lucas-Kanade is based on a local motion constancy assumption, + # where nearby pixels have the same displacement direction. Hence, it is better to discard + # dynamic object for feature extraction from static objects + mask = self.generate_mask(img, dets, self.scale) + + # handle first frame + if self.prev_img is None: + # find keypoints in first frame + keypoints = cv2.goodFeaturesToTrack( + img, + mask=mask, + maxCorners=3000, + qualityLevel=0.01, + minDistance=1, + blockSize=3, + useHarrisDetector=False, + k=0.04, + ) + + # if image lacks distinctive features, ignore this frame and try to initialize again + if keypoints is None: + return H + # initialize first frame + else: + # Initialize data + self.prev_img = img.copy() + self.prev_keypoints = keypoints.copy() + return H + + # sparse otical flow for sparse features using Lucas-Kanade with pyramids + try: + matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK( + self.prev_img, img, self.prev_keypoints, None + ) + except Exception as e: + LOGGER.warning(f"calcOpticalFlowPyrLK failed: {e}") + return H + + # calculate new positions of the keypoints between the previous frame (self.prev_img) + # and the current frame (img) using sparse optical flow (Lucas-Kanade with pyramids) + next_keypoints, status, err = cv2.calcOpticalFlowPyrLK( + self.prev_img, img, self.prev_keypoints, None + ) + + # for simplicity, if no keypoints are found, we discard the frame + if next_keypoints is None: + return H + + # keep points that were successfully matched + self.prev_keypoints = self.prev_keypoints[status == 1].reshape(-1, 1, 2) + next_keypoints = next_keypoints[status == 1].reshape(-1, 1, 2) + + # get affine matrix + try: + H, _ = cv2.estimateAffinePartial2D( + self.prev_keypoints, next_keypoints, cv2.RANSAC + ) + except Exception as e: + LOGGER.warning(f"Affine matrix could not be generated: {e}") + return H + finally: + if H is None: + return np.eye(2, 3) + + if self.draw_optical_flow: + self.warped_img = cv2.warpAffine( + self.prev_img, H, (w, h), flags=cv2.INTER_LINEAR + ) + self.mask = np.zeros_like(img) + for i, (new, old) in enumerate(zip(next_keypoints, self.prev_keypoints)): + a, b = new.ravel() + c, d = old.ravel() + self.mask = cv2.line( + img=self.mask, + pt1=tuple(np.int32([a, b])), + pt2=tuple(np.int32([c, d])), + color=(255, 255, 255), + thickness=1, + ) + self.mask = cv2.circle( + img=self.mask, + center=tuple(np.int32([a, b])), + radius=1, + color=(255, 255, 255), + thickness=2, + ) + + # store to next iteration + self.prev_img = img.copy() + self.prevKeyPoints = next_keypoints.copy() + + # handle downscale + if self.scale < 1: + H[0, 2] /= self.scale + H[1, 2] /= self.scale + + return H + + +def main(): + sof = SparseOptFlow(scale=0.25, align=True, grayscale=True, draw_optical_flow=True) + curr_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000005.jpg") + prev_img = cv2.imread("assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000001.jpg") + curr_dets = np.array( + [ + [1083.8207, 541.5978, 1195.7952, 655.8790], # noqa:E241 + [1635.6456, 563.8348, 1695.4153, 686.6704], # noqa:E241 + [957.0879, 545.6558, 1042.6743, 611.8740], # noqa:E241,E261,E201 + [1550.0317, 562.5705, 1600.3931, 684.7425], # noqa:E241 + [78.8801, 714.3307, 121.0272, 817.6857], # noqa:E241,E261,E201 + [1382.9938, 512.2731, 1418.6012, 620.1938], # noqa:E241 + [1459.7921, 496.2123, 1488.5767, 584.3533], # noqa:E241 + [982.9818, 492.8579, 1013.6625, 517.9271], # noqa:E241,E261,E201 + [496.1809, 541.3972, 531.4617, 638.0989], # noqa:E241,E261,E201 + [1498.8512, 522.6646, 1526.1145, 587.7672], # noqa:E241 + [536.4527, 548.4061, 569.2723, 635.5656], # noqa:E241,E261,E201 + [247.8834, 580.8851, 287.2241, 735.3685], # noqa:E241,E261,E201 + [151.4096, 572.3918, 203.5401, 731.1011], # noqa:E241,E261,E201 + [1227.4098, 440.5505, 1252.7986, 489.5295], + ] # noqa:E241 + ) + prev_dets = np.array( + [ + [2.1069e-02, 6.7026e02, 4.9816e01, 8.8407e02], + [1.0765e03, 5.4009e02, 1.1883e03, 6.5219e02], + [1.5208e03, 5.6322e02, 1.5711e03, 6.7676e02], + [1.6111e03, 5.5926e02, 1.6640e03, 6.7443e02], + [9.5244e02, 5.4681e02, 1.0384e03, 6.1180e02], + [1.3691e03, 5.1258e02, 1.4058e03, 6.1695e02], + [1.2043e02, 7.0780e02, 1.7309e02, 8.0518e02], + [1.4454e03, 5.0919e02, 1.4724e03, 5.8270e02], + [9.7848e02, 4.9563e02, 1.0083e03, 5.1980e02], + [5.0166e02, 5.4778e02, 5.3796e02, 6.3940e02], + [1.4777e03, 5.1856e02, 1.5105e03, 5.9523e02], + [1.9540e02, 5.7292e02, 2.3711e02, 7.2717e02], + [2.7373e02, 5.8564e02, 3.1335e02, 7.3281e02], + [5.4038e02, 5.4735e02, 5.7359e02, 6.3797e02], + [1.2190e03, 4.4176e02, 1.2414e03, 4.9038e02], + ] + ) + + warp_matrix = sof.apply(prev_img, prev_dets) + warp_matrix = sof.apply(curr_img, curr_dets) + + start = time.process_time() + for i in range(0, 100): + warp_matrix = sof.apply(prev_img, prev_dets) + warp_matrix = sof.apply(curr_img, curr_dets) + end = time.process_time() + print("Total time", end - start) + print(warp_matrix) + + if sof.warped_img is not None: + curr_img = sof.preprocess(curr_img) + prev_img = sof.preprocess(prev_img) + + warped = cv2.addWeighted(sof.warped_img, 0.5, sof.mask, 0.5, 0) + warped = cv2.addWeighted(warped, 0.5, curr_img, 0.5, 0) + + # Display the frame with keypoints and optical flow tracks + cv2.imshow("Optical Flow", warped) + cv2.waitKey(0) + + cv2.imwrite(str(BOXMOT / "motion/cmc/sof_aligned.jpg"), warped) + + +if __name__ == "__main__": + main() diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/__init__.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/__init__.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/__init__.py new file mode 100644 index 0000000..30fa5e3 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/__init__.py @@ -0,0 +1,11 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from .botsort_kf_adapter import BotSortKalmanFilterAdapter +from .bytetrack_kf_adapter import ByteTrackKalmanFilterAdapter +from .ocsort_kf_adapter import OCSortKalmanFilterAdapter +from .strongsort_kf_adapter import StrongSortKalmanFilterAdapter + +__all__ = ("BotSortKalmanFilterAdapter", + "ByteTrackKalmanFilterAdapter", + "OCSortKalmanFilterAdapter", + "StrongSortKalmanFilterAdapter") diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/botsort_kf_adapter.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/botsort_kf_adapter.py new file mode 100644 index 0000000..0990186 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/botsort_kf_adapter.py @@ -0,0 +1,170 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + +from ..kalman_filter import KalmanFilter +from ..kalman_filter import multi_predict as kf_multi_predict + + +class BotSortKalmanFilterAdapter(KalmanFilter): + ndim = 4 + + def __init__(self, dt=1): + super().__init__(dim_x=2 * self.ndim, dim_z=self.ndim) + + # Set transition matrix + for i in range(self.ndim): + self.F[i, self.ndim + i] = dt + + # Set observation matrix + self.H = np.eye(self.ndim, 2 * self.ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1.0 / 20 + self._std_weight_velocity = 1.0 / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, w, h) with center position (x, y), + width w, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + self.x = (np.r_[mean_pos, mean_vel]).T + + std = [ + 2 * self._std_weight_position * measurement[2], + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[2], + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[2], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[2], + 10 * self._std_weight_velocity * measurement[3], + ] + self.P = np.diag(np.square(std)) + + return self.x.T, self.P + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[2], + self._std_weight_position * mean[3], + self._std_weight_position * mean[2], + self._std_weight_position * mean[3], + ] + std_vel = [ + self._std_weight_velocity * mean[2], + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[2], + self._std_weight_velocity * mean[3], + ] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + super().predict(Q=motion_cov) + + return self.x.T, self.P + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, w, h), where (x, y) + is the center position, w the width, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + self.x = mean.T + self.P = covariance + + std = [ + self._std_weight_position * mean[2], + self._std_weight_position * mean[3], + self._std_weight_position * mean[2], + self._std_weight_position * mean[3], + ] + innovation_cov = np.diag(np.square(std)) + + super().update(measurement, R=innovation_cov) + + return self.x.T, self.P + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 2], + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 2], + self._std_weight_position * mean[:, 3], + ] + std_vel = [ + self._std_weight_velocity * mean[:, 2], + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 2], + self._std_weight_velocity * mean[:, 3], + ] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + return kf_multi_predict(mean, covariance, F=self.F, Q=motion_cov) diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/bytetrack_kf_adapter.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/bytetrack_kf_adapter.py new file mode 100644 index 0000000..20734b4 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/bytetrack_kf_adapter.py @@ -0,0 +1,170 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + +from ..kalman_filter import KalmanFilter +from ..kalman_filter import multi_predict as kf_multi_predict + + +class ByteTrackKalmanFilterAdapter(KalmanFilter): + ndim = 4 + + def __init__(self, dt=1): + super().__init__(dim_x=2 * self.ndim, dim_z=self.ndim) + + # Set transition matrix + for i in range(self.ndim): + self.F[i, self.ndim + i] = dt + + # Set observation matrix + self.H = np.eye(self.ndim, 2 * self.ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1.0 / 20 + self._std_weight_velocity = 1.0 / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, w, h) with center position (x, y), + width w, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + self.x = (np.r_[mean_pos, mean_vel]).T + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3], + ] + self.P = np.diag(np.square(std)) + + return self.x.T, self.P + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3], + ] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3], + ] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + super().predict(Q=motion_cov) + + return self.x.T, self.P + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, w, h), where (x, y) + is the center position, w the width, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + self.x = mean.T + self.P = covariance + + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3], + ] + innovation_cov = np.diag(np.square(std)) + + super().update(measurement, R=innovation_cov) + + return self.x.T, self.P + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3] + ] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3] + ] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + return kf_multi_predict(mean, covariance, F=self.F, Q=motion_cov) diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/hybridsort_kf_adapter.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/hybridsort_kf_adapter.py new file mode 100644 index 0000000..6a25a57 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/hybridsort_kf_adapter.py @@ -0,0 +1,1555 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +""" +This module implements the linear Kalman filter in both an object +oriented and procedural form. The KalmanFilter class implements +the filter by storing the various matrices in instance variables, +minimizing the amount of bookkeeping you have to do. +All Kalman filters operate with a predict->update cycle. The +predict step, implemented with the method or function predict(), +uses the state transition matrix F to predict the state in the next +time period (epoch). The state is stored as a gaussian (x, P), where +x is the state (column) vector, and P is its covariance. Covariance +matrix Q specifies the process covariance. In Bayesian terms, this +prediction is called the *prior*, which you can think of colloquially +as the estimate prior to incorporating the measurement. +The update step, implemented with the method or function `update()`, +incorporates the measurement z with covariance R, into the state +estimate (x, P). The class stores the system uncertainty in S, +the innovation (residual between prediction and measurement in +measurement space) in y, and the Kalman gain in k. The procedural +form returns these variables to you. In Bayesian terms this computes +the *posterior* - the estimate after the information from the +measurement is incorporated. +Whether you use the OO form or procedural form is up to you. If +matrices such as H, R, and F are changing each epoch, you'll probably +opt to use the procedural form. If they are unchanging, the OO +form is perhaps easier to use since you won't need to keep track +of these matrices. This is especially useful if you are implementing +banks of filters or comparing various KF designs for performance; +a trivial coding bug could lead to using the wrong sets of matrices. +This module also offers an implementation of the RTS smoother, and +other helper functions, such as log likelihood computations. +The Saver class allows you to easily save the state of the +KalmanFilter class after every update +This module expects NumPy arrays for all values that expect +arrays, although in a few cases, particularly method parameters, +it will accept types that convert to NumPy arrays, such as lists +of lists. These exceptions are documented in the method or function. +Examples +-------- +The following example constructs a constant velocity kinematic +filter, filters noisy data, and plots the results. It also demonstrates +using the Saver class to save the state of the filter at each epoch. +.. code-block:: Python + import matplotlib.pyplot as plt + import numpy as np + from filterpy.kalman import KalmanFilter + from filterpy.common import Q_discrete_white_noise, Saver + r_std, q_std = 2., 0.003 + cv = KalmanFilter(dim_x=2, dim_z=1) + cv.x = np.array([[0., 1.]]) # position, velocity + cv.F = np.array([[1, dt],[ [0, 1]]) + cv.R = np.array([[r_std^^2]]) + f.H = np.array([[1., 0.]]) + f.P = np.diag([.1^^2, .03^^2) + f.Q = Q_discrete_white_noise(2, dt, q_std**2) + saver = Saver(cv) + for z in range(100): + cv.predict() + cv.update([z + randn() * r_std]) + saver.save() # save the filter's state + saver.to_array() + plt.plot(saver.x[:, 0]) + # plot all of the priors + plt.plot(saver.x_prior[:, 0]) + # plot mahalanobis distance + plt.figure() + plt.plot(saver.mahalanobis) +This code implements the same filter using the procedural form + x = np.array([[0., 1.]]) # position, velocity + F = np.array([[1, dt],[ [0, 1]]) + R = np.array([[r_std^^2]]) + H = np.array([[1., 0.]]) + P = np.diag([.1^^2, .03^^2) + Q = Q_discrete_white_noise(2, dt, q_std**2) + for z in range(100): + x, P = predict(x, P, F=F, Q=Q) + x, P = update(x, P, z=[z + randn() * r_std], R=R, H=H) + xs.append(x[0, 0]) + plt.plot(xs) +For more examples see the test subdirectory, or refer to the +book cited below. In it I both teach Kalman filtering from basic +principles, and teach the use of this library in great detail. +FilterPy library. +http://github.com/rlabbe/filterpy +Documentation at: +https://filterpy.readthedocs.org +Supporting book at: +https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python +This is licensed under an MIT license. See the readme.MD file +for more information. +Copyright 2014-2018 Roger R Labbe Jr. +""" + +from __future__ import absolute_import, division + +import sys +from copy import deepcopy +from math import exp, log, sqrt + +import numpy as np +import numpy.linalg as linalg +from filterpy.common import pretty_str, reshape_z +from filterpy.stats import logpdf +from numpy import dot, eye, isscalar, shape, zeros + + +class KalmanFilterNew_score_new(object): + """ Implements a Kalman filter. You are responsible for setting the + various state variables to reasonable values; the defaults will + not give you a functional filter. + For now the best documentation is my free book Kalman and Bayesian + Filters in Python [2]_. The test files in this directory also give you a + basic idea of use, albeit without much description. + In brief, you will first construct this object, specifying the size of + the state vector with dim_x and the size of the measurement vector that + you will be using with dim_z. These are mostly used to perform size checks + when you assign values to the various matrices. For example, if you + specified dim_z=2 and then try to assign a 3x3 matrix to R (the + measurement noise matrix you will get an assert exception because R + should be 2x2. (If for whatever reason you need to alter the size of + things midstream just use the underscore version of the matrices to + assign directly: your_filter._R = a_3x3_matrix.) + After construction the filter will have default matrices created for you, + but you must specify the values for each. It’s usually easiest to just + overwrite them rather than assign to each element yourself. This will be + clearer in the example below. All are of type numpy.array. + Examples + -------- + Here is a filter that tracks position and velocity using a sensor that only + reads position. + First construct the object with the required dimensionality. Here the state + (`dim_x`) has 2 coefficients (position and velocity), and the measurement + (`dim_z`) has one. In FilterPy `x` is the state, `z` is the measurement. + .. code:: + from filterpy.kalman import KalmanFilter + f = KalmanFilter (dim_x=2, dim_z=1) + Assign the initial value for the state (position and velocity). You can do this + with a two dimensional array like so: + .. code:: + f.x = np.array([[2.], # position + [0.]]) # velocity + or just use a one dimensional array, which I prefer doing. + .. code:: + f.x = np.array([2., 0.]) + Define the state transition matrix: + .. code:: + f.F = np.array([[1.,1.], + [0.,1.]]) + Define the measurement function. Here we need to convert a position-velocity + vector into just a position vector, so we use: + .. code:: + f.H = np.array([[1., 0.]]) + Define the state's covariance matrix P. + .. code:: + f.P = np.array([[1000., 0.], + [ 0., 1000.] ]) + Now assign the measurement noise. Here the dimension is 1x1, so I can + use a scalar + .. code:: + f.R = 5 + I could have done this instead: + .. code:: + f.R = np.array([[5.]]) + Note that this must be a 2 dimensional array. + Finally, I will assign the process noise. Here I will take advantage of + another FilterPy library function: + .. code:: + from filterpy.common import Q_discrete_white_noise + f.Q = Q_discrete_white_noise(dim=2, dt=0.1, var=0.13) + Now just perform the standard predict/update loop: + .. code:: + while some_condition_is_true: + z = get_sensor_reading() + f.predict() + f.update(z) + do_something_with_estimate (f.x) + **Procedural Form** + This module also contains stand alone functions to perform Kalman filtering. + Use these if you are not a fan of objects. + **Example** + .. code:: + while True: + z, R = read_sensor() + x, P = predict(x, P, F, Q) + x, P = update(x, P, z, R, H) + See my book Kalman and Bayesian Filters in Python [2]_. + You will have to set the following attributes after constructing this + object for the filter to perform properly. Please note that there are + various checks in place to ensure that you have made everything the + 'correct' size. However, it is possible to provide incorrectly sized + arrays such that the linear algebra can not perform an operation. + It can also fail silently - you can end up with matrices of a size that + allows the linear algebra to work, but are the wrong shape for the problem + you are trying to solve. + Parameters + ---------- + dim_x : int + Number of state variables for the Kalman filter. For example, if + you are tracking the position and velocity of an object in two + dimensions, dim_x would be 4. + This is used to set the default size of P, Q, and u + dim_z : int + Number of of measurement inputs. For example, if the sensor + provides you with position in (x,y), dim_z would be 2. + dim_u : int (optional) + size of the control input, if it is being used. + Default value of 0 indicates it is not used. + compute_log_likelihood : bool (default = True) + Computes log likelihood by default, but this can be a slow + computation, so if you never use it you can turn this computation + off. + Attributes + ---------- + x : numpy.array(dim_x, 1) + Current state estimate. Any call to update() or predict() updates + this variable. + P : numpy.array(dim_x, dim_x) + Current state covariance matrix. Any call to update() or predict() + updates this variable. + x_prior : numpy.array(dim_x, 1) + Prior (predicted) state estimate. The *_prior and *_post attributes + are for convenience; they store the prior and posterior of the + current epoch. Read Only. + P_prior : numpy.array(dim_x, dim_x) + Prior (predicted) state covariance matrix. Read Only. + x_post : numpy.array(dim_x, 1) + Posterior (updated) state estimate. Read Only. + P_post : numpy.array(dim_x, dim_x) + Posterior (updated) state covariance matrix. Read Only. + z : numpy.array + Last measurement used in update(). Read only. + R : numpy.array(dim_z, dim_z) + Measurement noise covariance matrix. Also known as the + observation covariance. + Q : numpy.array(dim_x, dim_x) + Process noise covariance matrix. Also known as the transition + covariance. + F : numpy.array() + State Transition matrix. Also known as `A` in some formulation. + H : numpy.array(dim_z, dim_x) + Measurement function. Also known as the observation matrix, or as `C`. + y : numpy.array + Residual of the update step. Read only. + K : numpy.array(dim_x, dim_z) + Kalman gain of the update step. Read only. + S : numpy.array + System uncertainty (P projected to measurement space). Read only. + SI : numpy.array + Inverse system uncertainty. Read only. + log_likelihood : float + log-likelihood of the last measurement. Read only. + likelihood : float + likelihood of last measurement. Read only. + Computed from the log-likelihood. The log-likelihood can be very + small, meaning a large negative value such as -28000. Taking the + exp() of that results in 0.0, which can break typical algorithms + which multiply by this value, so by default we always return a + number >= sys.float_info.min. + mahalanobis : float + mahalanobis distance of the innovation. Read only. + inv : function, default numpy.linalg.inv + If you prefer another inverse function, such as the Moore-Penrose + pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv + This is only used to invert self.S. If you know it is diagonal, you + might choose to set it to filterpy.common.inv_diagonal, which is + several times faster than numpy.linalg.inv for diagonal matrices. + alpha : float + Fading memory setting. 1.0 gives the normal Kalman filter, and + values slightly larger than 1.0 (such as 1.02) give a fading + memory effect - previous measurements have less influence on the + filter's estimates. This formulation of the Fading memory filter + (there are many) is due to Dan Simon [1]_. + References + ---------- + .. [1] Dan Simon. "Optimal State Estimation." John Wiley & Sons. + p. 208-212. (2006) + .. [2] Roger Labbe. "Kalman and Bayesian Filters in Python" + https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python + """ + + def __init__(self, dim_x, dim_z, dim_u=0, args=None): + if dim_x < 1: + raise ValueError('dim_x must be 1 or greater') + if dim_z < 1: + raise ValueError('dim_z must be 1 or greater') + if dim_u < 0: + raise ValueError('dim_u must be 0 or greater') + + self.dim_x = dim_x + self.dim_z = dim_z + self.dim_u = dim_u + + self.x = zeros((dim_x, 1)) # state + self.P = eye(dim_x) # uncertainty covariance + self.Q = eye(dim_x) # process uncertainty + self.B = None # control transition matrix + self.F = eye(dim_x) # state transition matrix + self.H = zeros((dim_z, dim_x)) # measurement function + self.R = eye(dim_z) # measurement uncertainty + self._alpha_sq = 1. # fading memory control + self.M = np.zeros((dim_x, dim_z)) # process-measurement cross correlation + self.z = np.array([[None] * self.dim_z]).T + + # gain and residual are computed during the innovation step. We + # save them so that in case you want to inspect them for various + # purposes + self.K = np.zeros((dim_x, dim_z)) # kalman gain + self.y = zeros((dim_z, 1)) + self.S = np.zeros((dim_z, dim_z)) # system uncertainty + self.SI = np.zeros((dim_z, dim_z)) # inverse system uncertainty + + # identity matrix. Do not alter this. + self._I = np.eye(dim_x) + + # these will always be a copy of x,P after predict() is called + self.x_prior = self.x.copy() + self.P_prior = self.P.copy() + + # these will always be a copy of x,P after update() is called + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + # Only computed only if requested via property + self._log_likelihood = log(sys.float_info.min) + self._likelihood = sys.float_info.min + self._mahalanobis = None + + # keep all observations + self.history_obs = [] + + self.inv = np.linalg.inv + + self.attr_saved = None + self.observed = False + self.args = args + + def predict(self, u=None, B=None, F=None, Q=None): + """ + Predict next state (prior) using the Kalman filter state propagation + equations. + Parameters + ---------- + u : np.array, default 0 + Optional control vector. + B : np.array(dim_x, dim_u), or None + Optional control transition matrix; a value of None + will cause the filter to use `self.B`. + F : np.array(dim_x, dim_x), or None + Optional state transition matrix; a value of None + will cause the filter to use `self.F`. + Q : np.array(dim_x, dim_x), scalar, or None + Optional process noise matrix; a value of None will cause the + filter to use `self.Q`. + """ + + if B is None: + B = self.B + if F is None: + F = self.F + if Q is None: + Q = self.Q + elif isscalar(Q): + Q = eye(self.dim_x) * Q + + # x = Fx + Bu + if B is not None and u is not None: + self.x = dot(F, self.x) + dot(B, u) + else: + self.x = dot(F, self.x) + + # P = FPF' + Q + self.P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q + + # save prior + self.x_prior = self.x.copy() + self.P_prior = self.P.copy() + + def freeze(self): + """ + Save the parameters before non-observation forward + """ + self.attr_saved = deepcopy(self.__dict__) + + def unfreeze(self): + if self.attr_saved is not None: + new_history = deepcopy(self.history_obs) + self.__dict__ = self.attr_saved + # self.history_obs = new_history + self.history_obs = self.history_obs[:-1] + occur = [int(d is None) for d in new_history] + indices = np.where(np.array(occur) == 0)[0] + index1 = indices[-2] + index2 = indices[-1] + box1 = new_history[index1] + x1, y1, s1, c1, r1 = box1 + w1 = np.sqrt(s1 * r1) + h1 = np.sqrt(s1 / r1) + box2 = new_history[index2] + x2, y2, s2, c2, r2 = box2 + w2 = np.sqrt(s2 * r2) + h2 = np.sqrt(s2 / r2) + time_gap = index2 - index1 + dx = (x2 - x1) / time_gap + dy = (y2 - y1) / time_gap + dw = (w2 - w1) / time_gap + dh = (h2 - h1) / time_gap + dc = (c2 - c1) / time_gap + for i in range(index2 - index1): + """ + The default virtual trajectory generation is by linear + motion (constant speed hypothesis), you could modify this + part to implement your own. + """ + x = x1 + (i + 1) * dx + y = y1 + (i + 1) * dy + w = w1 + (i + 1) * dw + h = h1 + (i + 1) * dh + s = w * h + r = w / float(h) + c = c1 + (i + 1) * dc + new_box = np.array([x, y, s, c, r]).reshape((5, 1)) + """ + I still use predict-update loop here to refresh the parameters, + but this can be faster by directly modifying the internal parameters + as suggested in the paper. I keep this naive but slow way for + easy read and understanding + """ + + if not i == (index2 - index1 - 1): + self.update(new_box) + self.predict() + else: + self.update(new_box) + + def update(self, z, R=None, H=None): + """ + Add a new measurement (z) to the Kalman filter. + If z is None, nothing is computed. However, x_post and P_post are + updated with the prior (x_prior, P_prior), and self.z is set to None. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + If you pass in a value of H, z must be a column vector the + of the correct size. + R : np.array, scalar, or None + Optionally provide R to override the measurement noise for this + one call, otherwise self.R will be used. + H : np.array, or None + Optionally provide H to override the measurement function for this + one call, otherwise self.H will be used. + """ + + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + # append the observation + self.history_obs.append(z) + + if z is None: + if self.observed: + """ + Got no observation so freeze the current parameters for future + potential online smoothing. + """ + self.freeze() + self.observed = False + self.z = np.array([[None] * self.dim_z]).T + self.x_post = self.x.copy() + self.P_post = self.P.copy() + self.y = zeros((self.dim_z, 1)) + return + + # self.observed = True + if not self.observed: + """ + Get observation, use online smoothing to re-update parameters + """ + self.unfreeze() + self.observed = True + + if R is None: + R = self.R + elif isscalar(R): + R = eye(self.dim_z) * R + + # if self.args.use_nsa_kalman: + # if confidence > 0.6: + # R = [(1 - confidence) * self.args.nsa_kalman_interval * x for x in R] + # else: + # R = [self.args.nsa_kalman_interval_sec * x for x in R] + + if H is None: + z = reshape_z(z, self.dim_z, self.x.ndim) + H = self.H + + # y = z - Hx + # error (residual) between measurement and prediction + self.y = z - dot(H, self.x) + + # common subexpression for speed + PHT = dot(self.P, H.T) + + # S = HPH' + R + # project system uncertainty into measurement space + self.S = dot(H, PHT) + R + self.SI = self.inv(self.S) + # K = PH'inv(S) + # map system uncertainty into kalman gain + self.K = dot(PHT, self.SI) + + # x = x + Ky + # predict new x with residual scaled by the kalman gain + self.x = self.x + dot(self.K, self.y) + + # P = (I-KH)P(I-KH)' + KRK' + # This is more numerically stable + # and works for non-optimal K vs the equation + # P = (I-KH)P usually seen in the literature. + + I_KH = self._I - dot(self.K, H) + self.P = dot(dot(I_KH, self.P), I_KH.T) + dot(dot(self.K, R), self.K.T) + + # save measurement and posterior state + self.z = deepcopy(z) + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + def predict_steadystate(self, u=0, B=None): + """ + Predict state (prior) using the Kalman filter state propagation + equations. Only x is updated, P is left unchanged. See + update_steadstate() for a longer explanation of when to use this + method. + Parameters + ---------- + u : np.array + Optional control vector. If non-zero, it is multiplied by B + to create the control input into the system. + B : np.array(dim_x, dim_u), or None + Optional control transition matrix; a value of None + will cause the filter to use `self.B`. + """ + + if B is None: + B = self.B + + # x = Fx + Bu + if B is not None: + self.x = dot(self.F, self.x) + dot(B, u) + else: + self.x = dot(self.F, self.x) + + # save prior + self.x_prior = self.x.copy() + self.P_prior = self.P.copy() + + def update_steadystate(self, z): + """ + Add a new measurement (z) to the Kalman filter without recomputing + the Kalman gain K, the state covariance P, or the system + uncertainty S. + You can use this for LTI systems since the Kalman gain and covariance + converge to a fixed value. Precompute these and assign them explicitly, + or run the Kalman filter using the normal predict()/update(0 cycle + until they converge. + The main advantage of this call is speed. We do significantly less + computation, notably avoiding a costly matrix inversion. + Use in conjunction with predict_steadystate(), otherwise P will grow + without bound. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + + """ + + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + if z is None: + self.z = np.array([[None] * self.dim_z]).T + self.x_post = self.x.copy() + self.P_post = self.P.copy() + self.y = zeros((self.dim_z, 1)) + return + + z = reshape_z(z, self.dim_z, self.x.ndim) + + # y = z - Hx + # error (residual) between measurement and prediction + self.y = z - dot(self.H, self.x) + + # x = x + Ky + # predict new x with residual scaled by the kalman gain + self.x = self.x + dot(self.K, self.y) + + self.z = deepcopy(z) + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + def update_correlated(self, z, R=None, H=None): + """ Add a new measurement (z) to the Kalman filter assuming that + process noise and measurement noise are correlated as defined in + the `self.M` matrix. + A partial derivation can be found in [1] + If z is None, nothing is changed. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + R : np.array, scalar, or None + Optionally provide R to override the measurement noise for this + one call, otherwise self.R will be used. + H : np.array, or None + Optionally provide H to override the measurement function for this + one call, otherwise self.H will be used. + References + ---------- + .. [1] Bulut, Y. (2011). Applied Kalman filter theory (Doctoral dissertation, Northeastern University). + http://people.duke.edu/~hpgavin/SystemID/References/Balut-KalmanFilter-PhD-NEU-2011.pdf + """ + + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + if z is None: + self.z = np.array([[None] * self.dim_z]).T + self.x_post = self.x.copy() + self.P_post = self.P.copy() + self.y = zeros((self.dim_z, 1)) + return + + if R is None: + R = self.R + elif isscalar(R): + R = eye(self.dim_z) * R + + # rename for readability and a tiny extra bit of speed + if H is None: + z = reshape_z(z, self.dim_z, self.x.ndim) + H = self.H + + # handle special case: if z is in form [[z]] but x is not a column + # vector dimensions will not match + if self.x.ndim == 1 and shape(z) == (1, 1): + z = z[0] + + if shape(z) == (): # is it scalar, e.g. z=3 or z=np.array(3) + z = np.asarray([z]) + + # y = z - Hx + # error (residual) between measurement and prediction + self.y = z - dot(H, self.x) + + # common subexpression for speed + PHT = dot(self.P, H.T) + + # project system uncertainty into measurement space + self.S = dot(H, PHT) + dot(H, self.M) + dot(self.M.T, H.T) + R + self.SI = self.inv(self.S) + + # K = PH'inv(S) + # map system uncertainty into kalman gain + self.K = dot(PHT + self.M, self.SI) + + # x = x + Ky + # predict new x with residual scaled by the kalman gain + self.x = self.x + dot(self.K, self.y) + self.P = self.P - dot(self.K, dot(H, self.P) + self.M.T) + + self.z = deepcopy(z) + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + def batch_filter(self, zs, Fs=None, Qs=None, Hs=None, + Rs=None, Bs=None, us=None, update_first=False, + saver=None): + """ Batch processes a sequences of measurements. + Parameters + ---------- + zs : list-like + list of measurements at each time step `self.dt`. Missing + measurements must be represented by `None`. + Fs : None, list-like, default=None + optional value or list of values to use for the state transition + matrix F. + If Fs is None then self.F is used for all epochs. + Otherwise it must contain a list-like list of F's, one for + each epoch. This allows you to have varying F per epoch. + Qs : None, np.array or list-like, default=None + optional value or list of values to use for the process error + covariance Q. + If Qs is None then self.Q is used for all epochs. + Otherwise it must contain a list-like list of Q's, one for + each epoch. This allows you to have varying Q per epoch. + Hs : None, np.array or list-like, default=None + optional list of values to use for the measurement matrix H. + If Hs is None then self.H is used for all epochs. + If Hs contains a single matrix, then it is used as H for all + epochs. + Otherwise it must contain a list-like list of H's, one for + each epoch. This allows you to have varying H per epoch. + Rs : None, np.array or list-like, default=None + optional list of values to use for the measurement error + covariance R. + If Rs is None then self.R is used for all epochs. + Otherwise it must contain a list-like list of R's, one for + each epoch. This allows you to have varying R per epoch. + Bs : None, np.array or list-like, default=None + optional list of values to use for the control transition matrix B. + If Bs is None then self.B is used for all epochs. + Otherwise it must contain a list-like list of B's, one for + each epoch. This allows you to have varying B per epoch. + us : None, np.array or list-like, default=None + optional list of values to use for the control input vector; + If us is None then None is used for all epochs (equivalent to 0, + or no control input). + Otherwise it must contain a list-like list of u's, one for + each epoch. + update_first : bool, optional, default=False + controls whether the order of operations is update followed by + predict, or predict followed by update. Default is predict->update. + saver : filterpy.common.Saver, optional + filterpy.common.Saver object. If provided, saver.save() will be + called after every epoch + Returns + ------- + means : np.array((n,dim_x,1)) + array of the state for each time step after the update. Each entry + is an np.array. In other words `means[k,:]` is the state at step + `k`. + covariance : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the update. + In other words `covariance[k,:,:]` is the covariance at step `k`. + means_predictions : np.array((n,dim_x,1)) + array of the state for each time step after the predictions. Each + entry is an np.array. In other words `means[k,:]` is the state at + step `k`. + covariance_predictions : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the prediction. + In other words `covariance[k,:,:]` is the covariance at step `k`. + Examples + -------- + .. code-block:: Python + # this example demonstrates tracking a measurement where the time + # between measurement varies, as stored in dts. This requires + # that F be recomputed for each epoch. The output is then smoothed + # with an RTS smoother. + zs = [t + random.randn()*4 for t in range (40)] + Fs = [np.array([[1., dt], [0, 1]] for dt in dts] + (mu, cov, _, _) = kf.batch_filter(zs, Fs=Fs) + (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs) + """ + + n = np.size(zs, 0) + if Fs is None: + Fs = [self.F] * n + if Qs is None: + Qs = [self.Q] * n + if Hs is None: + Hs = [self.H] * n + if Rs is None: + Rs = [self.R] * n + if Bs is None: + Bs = [self.B] * n + if us is None: + us = [0] * n + + # mean estimates from Kalman Filter + if self.x.ndim == 1: + means = zeros((n, self.dim_x)) + means_p = zeros((n, self.dim_x)) + else: + means = zeros((n, self.dim_x, 1)) + means_p = zeros((n, self.dim_x, 1)) + + # state covariances from Kalman Filter + covariances = zeros((n, self.dim_x, self.dim_x)) + covariances_p = zeros((n, self.dim_x, self.dim_x)) + + if update_first: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + + self.update(z, R=R, H=H) + means[i, :] = self.x + covariances[i, :, :] = self.P + + self.predict(u=u, B=B, F=F, Q=Q) + means_p[i, :] = self.x + covariances_p[i, :, :] = self.P + + if saver is not None: + saver.save() + else: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + + self.predict(u=u, B=B, F=F, Q=Q) + means_p[i, :] = self.x + covariances_p[i, :, :] = self.P + + self.update(z, R=R, H=H) + means[i, :] = self.x + covariances[i, :, :] = self.P + + if saver is not None: + saver.save() + + return (means, covariances, means_p, covariances_p) + + def rts_smoother(self, Xs, Ps, Fs=None, Qs=None, inv=np.linalg.inv): + """ + Runs the Rauch-Tung-Striebel Kalman smoother on a set of + means and covariances computed by a Kalman filter. The usual input + would come from the output of `KalmanFilter.batch_filter()`. + Parameters + ---------- + Xs : numpy.array + array of the means (state variable x) of the output of a Kalman + filter. + Ps : numpy.array + array of the covariances of the output of a kalman filter. + Fs : list-like collection of numpy.array, optional + State transition matrix of the Kalman filter at each time step. + Optional, if not provided the filter's self.F will be used + Qs : list-like collection of numpy.array, optional + Process noise of the Kalman filter at each time step. Optional, + if not provided the filter's self.Q will be used + inv : function, default numpy.linalg.inv + If you prefer another inverse function, such as the Moore-Penrose + pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv + Returns + ------- + x : numpy.ndarray + smoothed means + P : numpy.ndarray + smoothed state covariances + K : numpy.ndarray + smoother gain at each step + Pp : numpy.ndarray + Predicted state covariances + Examples + -------- + .. code-block:: Python + zs = [t + random.randn()*4 for t in range (40)] + (mu, cov, _, _) = kalman.batch_filter(zs) + (x, P, K, Pp) = rts_smoother(mu, cov, kf.F, kf.Q) + """ + + if len(Xs) != len(Ps): + raise ValueError('length of Xs and Ps must be the same') + + n = Xs.shape[0] + dim_x = Xs.shape[1] + + if Fs is None: + Fs = [self.F] * n + if Qs is None: + Qs = [self.Q] * n + + # smoother gain + K = zeros((n, dim_x, dim_x)) + + x, P, Pp = Xs.copy(), Ps.copy(), Ps.copy() + for k in range(n - 2, -1, -1): + Pp[k] = dot(dot(Fs[k + 1], P[k]), Fs[k + 1].T) + Qs[k + 1] + + K[k] = dot(dot(P[k], Fs[k + 1].T), inv(Pp[k])) + x[k] += dot(K[k], x[k + 1] - dot(Fs[k + 1], x[k])) + P[k] += dot(dot(K[k], P[k + 1] - Pp[k]), K[k].T) + + return (x, P, K, Pp) + + def get_prediction(self, u=None, B=None, F=None, Q=None): + """ + Predict next state (prior) using the Kalman filter state propagation + equations and returns it without modifying the object. + Parameters + ---------- + u : np.array, default 0 + Optional control vector. + B : np.array(dim_x, dim_u), or None + Optional control transition matrix; a value of None + will cause the filter to use `self.B`. + F : np.array(dim_x, dim_x), or None + Optional state transition matrix; a value of None + will cause the filter to use `self.F`. + Q : np.array(dim_x, dim_x), scalar, or None + Optional process noise matrix; a value of None will cause the + filter to use `self.Q`. + Returns + ------- + (x, P) : tuple + State vector and covariance array of the prediction. + """ + + if B is None: + B = self.B + if F is None: + F = self.F + if Q is None: + Q = self.Q + elif isscalar(Q): + Q = eye(self.dim_x) * Q + + # x = Fx + Bu + if B is not None and u is not None: + x = dot(F, self.x) + dot(B, u) + else: + x = dot(F, self.x) + + # P = FPF' + Q + P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q + + return x, P + + def get_update(self, z=None): + """ + Computes the new estimate based on measurement `z` and returns it + without altering the state of the filter. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + Returns + ------- + (x, P) : tuple + State vector and covariance array of the update. + """ + + if z is None: + return self.x, self.P + z = reshape_z(z, self.dim_z, self.x.ndim) + + R = self.R + H = self.H + P = self.P + x = self.x + + # error (residual) between measurement and prediction + y = z - dot(H, x) + + # common subexpression for speed + PHT = dot(P, H.T) + + # project system uncertainty into measurement space + S = dot(H, PHT) + R + + # map system uncertainty into kalman gain + K = dot(PHT, self.inv(S)) + + # predict new x with residual scaled by the kalman gain + x = x + dot(K, y) + + # P = (I-KH)P(I-KH)' + KRK' + I_KH = self._I - dot(K, H) + P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T) + + return x, P + + def residual_of(self, z): + """ + Returns the residual for the given measurement (z). Does not alter + the state of the filter. + """ + z = reshape_z(z, self.dim_z, self.x.ndim) + return z - dot(self.H, self.x_prior) + + def measurement_of_state(self, x): + """ + Helper function that converts a state into a measurement. + Parameters + ---------- + x : np.array + kalman state vector + Returns + ------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + """ + + return dot(self.H, x) + + @property + def log_likelihood(self): + """ + log-likelihood of the last measurement. + """ + if self._log_likelihood is None: + self._log_likelihood = logpdf(x=self.y, cov=self.S) + return self._log_likelihood + + @property + def likelihood(self): + """ + Computed from the log-likelihood. The log-likelihood can be very + small, meaning a large negative value such as -28000. Taking the + exp() of that results in 0.0, which can break typical algorithms + which multiply by this value, so by default we always return a + number >= sys.float_info.min. + """ + if self._likelihood is None: + self._likelihood = exp(self.log_likelihood) + if self._likelihood == 0: + self._likelihood = sys.float_info.min + return self._likelihood + + @property + def mahalanobis(self): + """" + Mahalanobis distance of measurement. E.g. 3 means measurement + was 3 standard deviations away from the predicted value. + Returns + ------- + mahalanobis : float + """ + if self._mahalanobis is None: + self._mahalanobis = sqrt(float(dot(dot(self.y.T, self.SI), self.y))) + return self._mahalanobis + + @property + def alpha(self): + """ + Fading memory setting. 1.0 gives the normal Kalman filter, and + values slightly larger than 1.0 (such as 1.02) give a fading + memory effect - previous measurements have less influence on the + filter's estimates. This formulation of the Fading memory filter + (there are many) is due to Dan Simon [1]_. + """ + return self._alpha_sq**.5 + + def log_likelihood_of(self, z): + """ + log likelihood of the measurement `z`. This should only be called + after a call to update(). Calling after predict() will yield an + incorrect result.""" + + if z is None: + return log(sys.float_info.min) + return logpdf(z, dot(self.H, self.x), self.S) + + @alpha.setter + def alpha(self, value): + if not np.isscalar(value) or value < 1: + raise ValueError('alpha must be a float greater than 1') + + self._alpha_sq = value**2 + + def __repr__(self): + return '\n'.join([ + 'KalmanFilter object', + pretty_str('dim_x', self.dim_x), + pretty_str('dim_z', self.dim_z), + pretty_str('dim_u', self.dim_u), + pretty_str('x', self.x), + pretty_str('P', self.P), + pretty_str('x_prior', self.x_prior), + pretty_str('P_prior', self.P_prior), + pretty_str('x_post', self.x_post), + pretty_str('P_post', self.P_post), + pretty_str('F', self.F), + pretty_str('Q', self.Q), + pretty_str('R', self.R), + pretty_str('H', self.H), + pretty_str('K', self.K), + pretty_str('y', self.y), + pretty_str('S', self.S), + pretty_str('SI', self.SI), + pretty_str('M', self.M), + pretty_str('B', self.B), + pretty_str('z', self.z), + pretty_str('log-likelihood', self.log_likelihood), + pretty_str('likelihood', self.likelihood), + pretty_str('mahalanobis', self.mahalanobis), + pretty_str('alpha', self.alpha), + pretty_str('inv', self.inv) + ]) + + def test_matrix_dimensions(self, z=None, H=None, R=None, F=None, Q=None): + """ + Performs a series of asserts to check that the size of everything + is what it should be. This can help you debug problems in your design. + If you pass in H, R, F, Q those will be used instead of this object's + value for those matrices. + Testing `z` (the measurement) is problamatic. x is a vector, and can be + implemented as either a 1D array or as a nx1 column vector. Thus Hx + can be of different shapes. Then, if Hx is a single value, it can + be either a 1D array or 2D vector. If either is true, z can reasonably + be a scalar (either '3' or np.array('3') are scalars under this + definition), a 1D, 1 element array, or a 2D, 1 element array. You are + allowed to pass in any combination that works. + """ + + if H is None: + H = self.H + if R is None: + R = self.R + if F is None: + F = self.F + if Q is None: + Q = self.Q + x = self.x + P = self.P + + assert x.ndim == 1 or x.ndim == 2, \ + "x must have one or two dimensions, but has {}".format(x.ndim) + + if x.ndim == 1: + assert x.shape[0] == self.dim_x, \ + "Shape of x must be ({},{}), but is {}".format( + self.dim_x, 1, x.shape) + else: + assert x.shape == (self.dim_x, 1), \ + "Shape of x must be ({},{}), but is {}".format( + self.dim_x, 1, x.shape) + + assert P.shape == (self.dim_x, self.dim_x), \ + "Shape of P must be ({},{}), but is {}".format( + self.dim_x, self.dim_x, P.shape) + + assert Q.shape == (self.dim_x, self.dim_x), \ + "Shape of Q must be ({},{}), but is {}".format( + self.dim_x, self.dim_x, P.shape) + + assert F.shape == (self.dim_x, self.dim_x), \ + "Shape of F must be ({},{}), but is {}".format( + self.dim_x, self.dim_x, F.shape) + + assert np.ndim(H) == 2, \ + "Shape of H must be (dim_z, {}), but is {}".format( + P.shape[0], shape(H)) + + assert H.shape[1] == P.shape[0], \ + "Shape of H must be (dim_z, {}), but is {}".format( + P.shape[0], H.shape) + + # shape of R must be the same as HPH' + hph_shape = (H.shape[0], H.shape[0]) + r_shape = shape(R) + + if H.shape[0] == 1: + # r can be scalar, 1D, or 2D in this case + assert r_shape in [(), (1,), (1, 1)], \ + "R must be scalar or one element array, but is shaped {}".format(r_shape) + else: + assert r_shape == hph_shape, \ + "shape of R should be {} but it is {}".format(hph_shape, r_shape) + + if z is not None: + z_shape = shape(z) + else: + z_shape = (self.dim_z, 1) + + # H@x must have shape of z + Hx = dot(H, x) + + if z_shape == (): # scalar or np.array(scalar) + assert Hx.ndim == 1 or shape(Hx) == (1, 1), \ + "shape of z should be {}, not {} for the given H".format(shape(Hx), z_shape) + + elif shape(Hx) == (1,): + assert z_shape[0] == 1, 'Shape of z must be {} for the given H'.format(shape(Hx)) + + else: + assert (z_shape == shape(Hx) or + (len(z_shape) == 1 and shape(Hx) == (z_shape[0], 1))), \ + "shape of z should be {}, not {} for the given H".format(shape(Hx), z_shape) + + if np.ndim(Hx) > 1 and shape(Hx) != (1, 1): + assert shape(Hx) == z_shape, \ + 'shape of z should be {} for the given H, but it is {}'.format( + shape(Hx), z_shape) + + +def update(x, P, z, R, H=None, return_all=False): + """ + Add a new measurement (z) to the Kalman filter. If z is None, nothing + is changed. + This can handle either the multidimensional or unidimensional case. If + all parameters are floats instead of arrays the filter will still work, + and return floats for x, P as the result. + update(1, 2, 1, 1, 1) # univariate + update(x, P, 1 + Parameters + ---------- + x : numpy.array(dim_x, 1), or float + State estimate vector + P : numpy.array(dim_x, dim_x), or float + Covariance matrix + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + R : numpy.array(dim_z, dim_z), or float + Measurement noise matrix + H : numpy.array(dim_x, dim_x), or float, optional + Measurement function. If not provided, a value of 1 is assumed. + return_all : bool, default False + If true, y, K, S, and log_likelihood are returned, otherwise + only x and P are returned. + Returns + ------- + x : numpy.array + Posterior state estimate vector + P : numpy.array + Posterior covariance matrix + y : numpy.array or scalar + Residua. Difference between measurement and state in measurement space + K : numpy.array + Kalman gain + S : numpy.array + System uncertainty in measurement space + log_likelihood : float + log likelihood of the measurement + """ + + if z is None: + if return_all: + return x, P, None, None, None, None + return x, P + + if H is None: + H = np.array([1]) + + if np.isscalar(H): + H = np.array([H]) + + Hx = np.atleast_1d(dot(H, x)) + z = reshape_z(z, Hx.shape[0], x.ndim) + + # error (residual) between measurement and prediction + y = z - Hx + + # project system uncertainty into measurement space + S = dot(dot(H, P), H.T) + R + + # map system uncertainty into kalman gain + try: + K = dot(dot(P, H.T), linalg.inv(S)) + except Exception: + # can't invert a 1D array, annoyingly + K = dot(dot(P, H.T), 1. / S) + + # predict new x with residual scaled by the kalman gain + x = x + dot(K, y) + + # P = (I-KH)P(I-KH)' + KRK' + KH = dot(K, H) + + try: + I_KH = np.eye(KH.shape[0]) - KH + except Exception: + I_KH = np.array([1 - KH]) + P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T) + + if return_all: + # compute log likelihood + log_likelihood = logpdf(z, dot(H, x), S) + return x, P, y, K, S, log_likelihood + return x, P + + +def update_steadystate(x, z, K, H=None): + """ + Add a new measurement (z) to the Kalman filter. If z is None, nothing + is changed. + Parameters + ---------- + x : numpy.array(dim_x, 1), or float + State estimate vector + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + K : numpy.array, or float + Kalman gain matrix + H : numpy.array(dim_x, dim_x), or float, optional + Measurement function. If not provided, a value of 1 is assumed. + Returns + ------- + x : numpy.array + Posterior state estimate vector + Examples + -------- + This can handle either the multidimensional or unidimensional case. If + all parameters are floats instead of arrays the filter will still work, + and return floats for x, P as the result. + + """ + + if z is None: + return x + + if H is None: + H = np.array([1]) + + if np.isscalar(H): + H = np.array([H]) + + Hx = np.atleast_1d(dot(H, x)) + z = reshape_z(z, Hx.shape[0], x.ndim) + + # error (residual) between measurement and prediction + y = z - Hx + + # estimate new x with residual scaled by the kalman gain + return x + dot(K, y) + + +def predict(x, P, F=1, Q=0, u=0, B=1, alpha=1.): + """ + Predict next state (prior) using the Kalman filter state propagation + equations. + Parameters + ---------- + x : numpy.array + State estimate vector + P : numpy.array + Covariance matrix + F : numpy.array() + State Transition matrix + Q : numpy.array, Optional + Process noise matrix + u : numpy.array, Optional, default 0. + Control vector. If non-zero, it is multiplied by B + to create the control input into the system. + B : numpy.array, optional, default 0. + Control transition matrix. + alpha : float, Optional, default=1.0 + Fading memory setting. 1.0 gives the normal Kalman filter, and + values slightly larger than 1.0 (such as 1.02) give a fading + memory effect - previous measurements have less influence on the + filter's estimates. This formulation of the Fading memory filter + (there are many) is due to Dan Simon + Returns + ------- + x : numpy.array + Prior state estimate vector + P : numpy.array + Prior covariance matrix + """ + + if np.isscalar(F): + F = np.array(F) + x = dot(F, x) + dot(B, u) + P = (alpha * alpha) * dot(dot(F, P), F.T) + Q + + return x, P + + +def predict_steadystate(x, F=1, u=0, B=1): + """ + Predict next state (prior) using the Kalman filter state propagation + equations. This steady state form only computes x, assuming that the + covariance is constant. + Parameters + ---------- + x : numpy.array + State estimate vector + P : numpy.array + Covariance matrix + F : numpy.array() + State Transition matrix + u : numpy.array, Optional, default 0. + Control vector. If non-zero, it is multiplied by B + to create the control input into the system. + B : numpy.array, optional, default 0. + Control transition matrix. + Returns + ------- + x : numpy.array + Prior state estimate vector + """ + + if np.isscalar(F): + F = np.array(F) + x = dot(F, x) + dot(B, u) + + return x + + +def batch_filter(x, P, zs, Fs, Qs, Hs, Rs, Bs=None, us=None, + update_first=False, saver=None): + """ + Batch processes a sequences of measurements. + Parameters + ---------- + zs : list-like + list of measurements at each time step. Missing measurements must be + represented by None. + Fs : list-like + list of values to use for the state transition matrix matrix. + Qs : list-like + list of values to use for the process error + covariance. + Hs : list-like + list of values to use for the measurement matrix. + Rs : list-like + list of values to use for the measurement error + covariance. + Bs : list-like, optional + list of values to use for the control transition matrix; + a value of None in any position will cause the filter + to use `self.B` for that time step. + us : list-like, optional + list of values to use for the control input vector; + a value of None in any position will cause the filter to use + 0 for that time step. + update_first : bool, optional + controls whether the order of operations is update followed by + predict, or predict followed by update. Default is predict->update. + saver : filterpy.common.Saver, optional + filterpy.common.Saver object. If provided, saver.save() will be + called after every epoch + Returns + ------- + means : np.array((n,dim_x,1)) + array of the state for each time step after the update. Each entry + is an np.array. In other words `means[k,:]` is the state at step + `k`. + covariance : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the update. + In other words `covariance[k,:,:]` is the covariance at step `k`. + means_predictions : np.array((n,dim_x,1)) + array of the state for each time step after the predictions. Each + entry is an np.array. In other words `means[k,:]` is the state at + step `k`. + covariance_predictions : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the prediction. + In other words `covariance[k,:,:]` is the covariance at step `k`. + Examples + -------- + .. code-block:: Python + zs = [t + random.randn()*4 for t in range (40)] + Fs = [kf.F for t in range (40)] + Hs = [kf.H for t in range (40)] + (mu, cov, _, _) = kf.batch_filter(zs, Rs=R_list, Fs=Fs, Hs=Hs, Qs=None, + Bs=None, us=None, update_first=False) + (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs, Qs=None) + """ + + n = np.size(zs, 0) + dim_x = x.shape[0] + + # mean estimates from Kalman Filter + if x.ndim == 1: + means = zeros((n, dim_x)) + means_p = zeros((n, dim_x)) + else: + means = zeros((n, dim_x, 1)) + means_p = zeros((n, dim_x, 1)) + + # state covariances from Kalman Filter + covariances = zeros((n, dim_x, dim_x)) + covariances_p = zeros((n, dim_x, dim_x)) + + if us is None: + us = [0.] * n + Bs = [0.] * n + + if update_first: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + + x, P = update(x, P, z, R=R, H=H) + means[i, :] = x + covariances[i, :, :] = P + + x, P = predict(x, P, u=u, B=B, F=F, Q=Q) + means_p[i, :] = x + covariances_p[i, :, :] = P + if saver is not None: + saver.save() + else: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + + x, P = predict(x, P, u=u, B=B, F=F, Q=Q) + means_p[i, :] = x + covariances_p[i, :, :] = P + + x, P = update(x, P, z, R=R, H=H) + means[i, :] = x + covariances[i, :, :] = P + if saver is not None: + saver.save() + + return (means, covariances, means_p, covariances_p) + + +def rts_smoother(Xs, Ps, Fs, Qs): + """ + Runs the Rauch-Tung-Striebel Kalman smoother on a set of + means and covariances computed by a Kalman filter. The usual input + would come from the output of `KalmanFilter.batch_filter()`. + Parameters + ---------- + Xs : numpy.array + array of the means (state variable x) of the output of a Kalman + filter. + Ps : numpy.array + array of the covariances of the output of a kalman filter. + Fs : list-like collection of numpy.array + State transition matrix of the Kalman filter at each time step. + Qs : list-like collection of numpy.array, optional + Process noise of the Kalman filter at each time step. + Returns + ------- + x : numpy.ndarray + smoothed means + P : numpy.ndarray + smoothed state covariances + K : numpy.ndarray + smoother gain at each step + pP : numpy.ndarray + predicted state covariances + Examples + -------- + .. code-block:: Python + zs = [t + random.randn()*4 for t in range (40)] + (mu, cov, _, _) = kalman.batch_filter(zs) + (x, P, K, pP) = rts_smoother(mu, cov, kf.F, kf.Q) + """ + + if len(Xs) != len(Ps): + raise ValueError('length of Xs and Ps must be the same') + + n = Xs.shape[0] + dim_x = Xs.shape[1] + + # smoother gain + K = zeros((n, dim_x, dim_x)) + x, P, pP = Xs.copy(), Ps.copy(), Ps.copy() + + for k in range(n - 2, -1, -1): + pP[k] = dot(dot(Fs[k], P[k]), Fs[k].T) + Qs[k] + + K[k] = dot(dot(P[k], Fs[k].T), linalg.inv(pP[k])) + x[k] += dot(K[k], x[k + 1] - dot(Fs[k], x[k])) + P[k] += dot(dot(K[k], P[k + 1] - pP[k]), K[k].T) + + return (x, P, K, pP) diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/ocsort_kf_adapter.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/ocsort_kf_adapter.py new file mode 100644 index 0000000..3db3013 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/ocsort_kf_adapter.py @@ -0,0 +1,8 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from ..kalman_filter import KalmanFilter + + +class OCSortKalmanFilterAdapter(KalmanFilter): + def __init__(self, dim_x, dim_z): + super().__init__(dim_x=dim_x, dim_z=dim_z) diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/strongsort_kf_adapter.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/strongsort_kf_adapter.py new file mode 100644 index 0000000..d7d5650 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/adapters/strongsort_kf_adapter.py @@ -0,0 +1,171 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np +from filterpy.common import reshape_z + +from ..kalman_filter import KalmanFilter + + +class StrongSortKalmanFilterAdapter(KalmanFilter): + ndim = 4 + + def __init__(self, dt=1): + super().__init__(dim_x=2 * self.ndim, dim_z=self.ndim) + + # Set transition matrix + for i in range(self.ndim): + self.F[i, self.ndim + i] = dt + + # Set observation matrix + self.H = np.eye(self.ndim, 2 * self.ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1.0 / 20 + self._std_weight_velocity = 1.0 / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, w, h) with center position (x, y), + width w, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + self.x = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[0], # the center point x + 2 * self._std_weight_position * measurement[1], # the center point y + 1 * measurement[2], # the ratio of width/height + 2 * self._std_weight_position * measurement[3], # the height + 10 * self._std_weight_velocity * measurement[0], + 10 * self._std_weight_velocity * measurement[1], + 0.1 * measurement[2], + 10 * self._std_weight_velocity * measurement[3], + ] + self.P = np.diag(np.square(std)) + + return self.x, self.P + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[0], + self._std_weight_position * mean[1], + 1 * mean[2], + self._std_weight_position * mean[3], + ] + std_vel = [ + self._std_weight_velocity * mean[0], + self._std_weight_velocity * mean[1], + 0.1 * mean[2], + self._std_weight_velocity * mean[3], + ] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + super().predict(Q=motion_cov) + + return self.x, self.P + + def update(self, mean, covariance, measurement, confidence=0.0): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, w, h), where (x, y) + is the center position, w the width, and h the height of the + bounding box. + confidence : float + Confidence level of measurement + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + self.x = mean + self.P = covariance + + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3], + ] + std = [(1 - confidence) * x for x in std] + + innovation_cov = np.diag(np.square(std)) + + super().update(measurement, R=innovation_cov) + + return self.x, self.P + + def gating_distance(self, measurements, only_position=False): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : bool + Whether to use only the positional attributes of the track for + calculating the gating distance + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + + squared_maha = np.zeros((measurements.shape[0],)) + for i, measurement in enumerate(measurements): + if not only_position: + squared_maha[i] = super().md_for_measurement(measurement) + else: + # TODO (henriksod): Needs to be tested! + z = reshape_z(measurements, self.dim_z, 2) + H = self.H[:2, :2] + y = z - np.dot(H, self.x[:2]) + squared_maha[i] = np.sqrt(float(np.dot(np.dot(y.T, self.SI[:2, :2]), y))) + return squared_maha diff --git a/src/juxtapose/trackers/boxmot/motion/kalman_filters/kalman_filter.py b/src/juxtapose/trackers/boxmot/motion/kalman_filters/kalman_filter.py new file mode 100644 index 0000000..3de3155 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/motion/kalman_filters/kalman_filter.py @@ -0,0 +1,1676 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +""" +This module implements the linear Kalman filter in both an object +oriented and procedural form. The KalmanFilter class implements +the filter by storing the various matrices in instance variables, +minimizing the amount of bookkeeping you have to do. +All Kalman filters operate with a predict->update cycle. The +predict step, implemented with the method or function predict(), +uses the state transition matrix F to predict the state in the next +time period (epoch). The state is stored as a gaussian (x, P), where +x is the state (column) vector, and P is its covariance. Covariance +matrix Q specifies the process covariance. In Bayesian terms, this +prediction is called the *prior*, which you can think of colloquially +as the estimate prior to incorporating the measurement. +The update step, implemented with the method or function `update()`, +incorporates the measurement z with covariance R, into the state +estimate (x, P). The class stores the system uncertainty in S, +the innovation (residual between prediction and measurement in +measurement space) in y, and the Kalman gain in k. The procedural +form returns these variables to you. In Bayesian terms this computes +the *posterior* - the estimate after the information from the +measurement is incorporated. +Whether you use the OO form or procedural form is up to you. If +matrices such as H, R, and F are changing each epoch, you'll probably +opt to use the procedural form. If they are unchanging, the OO +form is perhaps easier to use since you won't need to keep track +of these matrices. This is especially useful if you are implementing +banks of filters or comparing various KF designs for performance; +a trivial coding bug could lead to using the wrong sets of matrices. +This module also offers an implementation of the RTS smoother, and +other helper functions, such as log likelihood computations. +The Saver class allows you to easily save the state of the +KalmanFilter class after every update +This module expects NumPy arrays for all values that expect +arrays, although in a few cases, particularly method parameters, +it will accept types that convert to NumPy arrays, such as lists +of lists. These exceptions are documented in the method or function. +Examples +-------- +The following example constructs a constant velocity kinematic +filter, filters noisy data, and plots the results. It also demonstrates +using the Saver class to save the state of the filter at each epoch. +.. code-block:: Python + import matplotlib.pyplot as plt + import numpy as np + from filterpy.kalman import KalmanFilter + from filterpy.common import Q_discrete_white_noise, Saver + r_std, q_std = 2., 0.003 + cv = KalmanFilter(dim_x=2, dim_z=1) + cv.x = np.array([[0., 1.]]) # position, velocity + cv.F = np.array([[1, dt],[ [0, 1]]) + cv.R = np.array([[r_std^^2]]) + f.H = np.array([[1., 0.]]) + f.P = np.diag([.1^^2, .03^^2) + f.Q = Q_discrete_white_noise(2, dt, q_std**2) + saver = Saver(cv) + for z in range(100): + cv.predict() + cv.update([z + randn() * r_std]) + saver.save() # save the filter's state + saver.to_array() + plt.plot(saver.x[:, 0]) + # plot all of the priors + plt.plot(saver.x_prior[:, 0]) + # plot mahalanobis distance + plt.figure() + plt.plot(saver.mahalanobis) +This code implements the same filter using the procedural form + x = np.array([[0., 1.]]) # position, velocity + F = np.array([[1, dt],[ [0, 1]]) + R = np.array([[r_std^^2]]) + H = np.array([[1., 0.]]) + P = np.diag([.1^^2, .03^^2) + Q = Q_discrete_white_noise(2, dt, q_std**2) + for z in range(100): + x, P = predict(x, P, F=F, Q=Q) + x, P = update(x, P, z=[z + randn() * r_std], R=R, H=H) + xs.append(x[0, 0]) + plt.plot(xs) +For more examples see the test subdirectory, or refer to the +book cited below. In it I both teach Kalman filtering from basic +principles, and teach the use of this library in great detail. +FilterPy library. +http://github.com/rlabbe/filterpy +Documentation at: +https://filterpy.readthedocs.org +Supporting book at: +https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python +This is licensed under an MIT license. See the readme.MD file +for more information. +Copyright 2014-2018 Roger R Labbe Jr. +""" + +from __future__ import absolute_import, division + +from copy import deepcopy +from math import exp, log, sqrt +from sys import float_info + +import numpy as np +import numpy.linalg as linalg +from filterpy.common import pretty_str, reshape_z +from filterpy.stats import logpdf +from numpy import dot, eye, isscalar, shape, zeros + + +class KalmanFilter(object): + """Implements a Kalman filter. You are responsible for setting the + various state variables to reasonable values; the defaults will + not give you a functional filter. + For now the best documentation is my free book Kalman and Bayesian + Filters in Python [2]_. The test files in this directory also give you a + basic idea of use, albeit without much description. + In brief, you will first construct this object, specifying the size of + the state vector with dim_x and the size of the measurement vector that + you will be using with dim_z. These are mostly used to perform size checks + when you assign values to the various matrices. For example, if you + specified dim_z=2 and then try to assign a 3x3 matrix to R (the + measurement noise matrix you will get an assert exception because R + should be 2x2. (If for whatever reason you need to alter the size of + things midstream just use the underscore version of the matrices to + assign directly: your_filter._R = a_3x3_matrix.) + After construction the filter will have default matrices created for you, + but you must specify the values for each. It’s usually easiest to just + overwrite them rather than assign to each element yourself. This will be + clearer in the example below. All are of type numpy.array. + Examples + -------- + Here is a filter that tracks position and velocity using a sensor that only + reads position. + First construct the object with the required dimensionality. Here the state + (`dim_x`) has 2 coefficients (position and velocity), and the measurement + (`dim_z`) has one. In FilterPy `x` is the state, `z` is the measurement. + .. code:: + from filterpy.kalman import KalmanFilter + f = KalmanFilter (dim_x=2, dim_z=1) + Assign the initial value for the state (position and velocity). You can do this + with a two dimensional array like so: + .. code:: + f.x = np.array([[2.], # position + [0.]]) # velocity + or just use a one dimensional array, which I prefer doing. + .. code:: + f.x = np.array([2., 0.]) + Define the state transition matrix: + .. code:: + f.F = np.array([[1.,1.], + [0.,1.]]) + Define the measurement function. Here we need to convert a position-velocity + vector into just a position vector, so we use: + .. code:: + f.H = np.array([[1., 0.]]) + Define the state's covariance matrix P. + .. code:: + f.P = np.array([[1000., 0.], + [ 0., 1000.] ]) + Now assign the measurement noise. Here the dimension is 1x1, so I can + use a scalar + .. code:: + f.R = 5 + I could have done this instead: + .. code:: + f.R = np.array([[5.]]) + Note that this must be a 2 dimensional array. + Finally, I will assign the process noise. Here I will take advantage of + another FilterPy library function: + .. code:: + from filterpy.common import Q_discrete_white_noise + f.Q = Q_discrete_white_noise(dim=2, dt=0.1, var=0.13) + Now just perform the standard predict/update loop: + .. code:: + while some_condition_is_true: + z = get_sensor_reading() + f.predict() + f.update(z) + do_something_with_estimate (f.x) + **Procedural Form** + This module also contains stand alone functions to perform Kalman filtering. + Use these if you are not a fan of objects. + **Example** + .. code:: + while True: + z, R = read_sensor() + x, P = predict(x, P, F, Q) + x, P = update(x, P, z, R, H) + See my book Kalman and Bayesian Filters in Python [2]_. + You will have to set the following attributes after constructing this + object for the filter to perform properly. Please note that there are + various checks in place to ensure that you have made everything the + 'correct' size. However, it is possible to provide incorrectly sized + arrays such that the linear algebra can not perform an operation. + It can also fail silently - you can end up with matrices of a size that + allows the linear algebra to work, but are the wrong shape for the problem + you are trying to solve. + Parameters + ---------- + dim_x : int + Number of state variables for the Kalman filter. For example, if + you are tracking the position and velocity of an object in two + dimensions, dim_x would be 4. + This is used to set the default size of P, Q, and u + dim_z : int + Number of of measurement inputs. For example, if the sensor + provides you with position in (x,y), dim_z would be 2. + dim_u : int (optional) + size of the control input, if it is being used. + Default value of 0 indicates it is not used. + compute_log_likelihood : bool (default = True) + Computes log likelihood by default, but this can be a slow + computation, so if you never use it you can turn this computation + off. + Attributes + ---------- + x : numpy.array(dim_x, 1) + Current state estimate. Any call to update() or predict() updates + this variable. + P : numpy.array(dim_x, dim_x) + Current state covariance matrix. Any call to update() or predict() + updates this variable. + x_prior : numpy.array(dim_x, 1) + Prior (predicted) state estimate. The *_prior and *_post attributes + are for convenience; they store the prior and posterior of the + current epoch. Read Only. + P_prior : numpy.array(dim_x, dim_x) + Prior (predicted) state covariance matrix. Read Only. + x_post : numpy.array(dim_x, 1) + Posterior (updated) state estimate. Read Only. + P_post : numpy.array(dim_x, dim_x) + Posterior (updated) state covariance matrix. Read Only. + z : numpy.array + Last measurement used in update(). Read only. + R : numpy.array(dim_z, dim_z) + Measurement noise covariance matrix. Also known as the + observation covariance. + Q : numpy.array(dim_x, dim_x) + Process noise covariance matrix. Also known as the transition + covariance. + F : numpy.array() + State Transition matrix. Also known as `A` in some formulation. + H : numpy.array(dim_z, dim_x) + Measurement function. Also known as the observation matrix, or as `C`. + y : numpy.array + Residual of the update step. Read only. + K : numpy.array(dim_x, dim_z) + Kalman gain of the update step. Read only. + S : numpy.array + System uncertainty (P projected to measurement space). Read only. + SI : numpy.array + Inverse system uncertainty. Read only. + log_likelihood : float + log-likelihood of the last measurement. Read only. + likelihood : float + likelihood of last measurement. Read only. + Computed from the log-likelihood. The log-likelihood can be very + small, meaning a large negative value such as -28000. Taking the + exp() of that results in 0.0, which can break typical algorithms + which multiply by this value, so by default we always return a + number >= float_info.min. + mahalanobis : float + mahalanobis distance of the innovation. Read only. + inv : function, default numpy.linalg.inv + If you prefer another inverse function, such as the Moore-Penrose + pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv + This is only used to invert self.S. If you know it is diagonal, you + might choose to set it to filterpy.common.inv_diagonal, which is + several times faster than numpy.linalg.inv for diagonal matrices. + alpha : float + Fading memory setting. 1.0 gives the normal Kalman filter, and + values slightly larger than 1.0 (such as 1.02) give a fading + memory effect - previous measurements have less influence on the + filter's estimates. This formulation of the Fading memory filter + (there are many) is due to Dan Simon [1]_. + References + ---------- + .. [1] Dan Simon. "Optimal State Estimation." John Wiley & Sons. + p. 208-212. (2006) + .. [2] Roger Labbe. "Kalman and Bayesian Filters in Python" + https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python + """ + + def __init__(self, dim_x, dim_z, dim_u=0): + if dim_x < 1: + raise ValueError("dim_x must be 1 or greater") + if dim_z < 1: + raise ValueError("dim_z must be 1 or greater") + if dim_u < 0: + raise ValueError("dim_u must be 0 or greater") + + self.dim_x = dim_x + self.dim_z = dim_z + self.dim_u = dim_u + + self.x = zeros((dim_x, 1)) # state + self.P = eye(dim_x) # uncertainty covariance + self.Q = eye(dim_x) # process uncertainty + self.B = None # control transition matrix + self.F = eye(dim_x) # state transition matrix + self.H = zeros((dim_z, dim_x)) # measurement function + self.R = eye(dim_z) # measurement uncertainty + self._alpha_sq = 1.0 # fading memory control + self.M = np.zeros((dim_x, dim_z)) # process-measurement cross correlation + self.z = np.array([[None] * self.dim_z]).T + + # gain and residual are computed during the innovation step. We + # save them so that in case you want to inspect them for various + # purposes + self.K = np.zeros((dim_x, dim_z)) # kalman gain + self.y = zeros((dim_z, 1)) + self.S = np.zeros((dim_z, dim_z)) # system uncertainty + self.SI = np.zeros((dim_z, dim_z)) # inverse system uncertainty + + # identity matrix. Do not alter this. + self._I = np.eye(dim_x) + + # these will always be a copy of x,P after predict() is called + self.x_prior = self.x.copy() + self.P_prior = self.P.copy() + + # these will always be a copy of x,P after update() is called + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + # Only computed only if requested via property + self._log_likelihood = log(float_info.min) + self._likelihood = float_info.min + self._mahalanobis = None + + # keep all observations + self.history_obs = [] + + self.inv = np.linalg.inv + + self.attr_saved = None + self.observed = False + self.last_measurement = None + + def predict(self, u=None, B=None, F=None, Q=None): + """ + Predict next state (prior) using the Kalman filter state propagation + equations. + Parameters + ---------- + u : np.array, default 0 + Optional control vector. + B : np.array(dim_x, dim_u), or None + Optional control transition matrix; a value of None + will cause the filter to use `self.B`. + F : np.array(dim_x, dim_x), or None + Optional state transition matrix; a value of None + will cause the filter to use `self.F`. + Q : np.array(dim_x, dim_x), scalar, or None + Optional process noise matrix; a value of None will cause the + filter to use `self.Q`. + """ + + if B is None: + B = self.B + if F is None: + F = self.F + if Q is None: + Q = self.Q + elif isscalar(Q): + Q = eye(self.dim_x) * Q + + # x = Fx + Bu + if B is not None and u is not None: + self.x = dot(F, self.x) + dot(B, u) + else: + self.x = dot(F, self.x) + + # P = FPF' + Q + self.P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q + + # save prior + self.x_prior = self.x.copy() + self.P_prior = self.P.copy() + + def freeze(self): + """ + Save the parameters before non-observation forward + """ + self.attr_saved = deepcopy(self.__dict__) + + def apply_affine_correction(self, m, t, new_kf): + """ + Apply to both last state and last observation for OOS smoothing. + + Messy due to internal logic for kalman filter being messy. + """ + if new_kf: + big_m = np.kron(np.eye(4, dtype=float), m) + self.x = big_m @ self.x + self.x[:2] += t + self.P = big_m @ self.P @ big_m.T + + # If frozen, also need to update the frozen state for OOS + if not self.observed and self.attr_saved is not None: + self.attr_saved["x"] = big_m @ self.attr_saved["x"] + self.attr_saved["x"][:2] += t + self.attr_saved["P"] = big_m @ self.attr_saved["P"] @ big_m.T + self.attr_saved["last_measurement"][:2] = ( + m @ self.attr_saved["last_measurement"][:2] + t + ) + self.attr_saved["last_measurement"][2:] = ( + m @ self.attr_saved["last_measurement"][2:] + ) + else: + # scale = np.linalg.norm(m[:, 0]) + self.x[:2] = m @ self.x[:2] + t + self.x[4:6] = m @ self.x[4:6] + # self.x[2] *= scale + # self.x[6] *= scale + + self.P[:2, :2] = m @ self.P[:2, :2] @ m.T + self.P[4:6, 4:6] = m @ self.P[4:6, 4:6] @ m.T + # self.P[2, 2] *= 2 * scale + # self.P[6, 6] *= 2 * scale + + # If frozen, also need to update the frozen state for OOS + if not self.observed and self.attr_saved is not None: + self.attr_saved["x"][:2] = m @ self.attr_saved["x"][:2] + t + self.attr_saved["x"][4:6] = m @ self.attr_saved["x"][4:6] + # self.attr_saved["x"][2] *= scale + # self.attr_saved["x"][6] *= scale + + self.attr_saved["P"][:2, :2] = m @ self.attr_saved["P"][:2, :2] @ m.T + self.attr_saved["P"][4:6, 4:6] = m @ self.attr_saved["P"][4:6, 4:6] @ m.T + # self.attr_saved["P"][2, 2] *= 2 * scale + # self.attr_saved["P"][6, 6] *= 2 * scale + + self.attr_saved["last_measurement"][:2] = ( + m @ self.attr_saved["last_measurement"][:2] + t + ) + # self.attr_saved["last_measurement"][2] *= scale + + def unfreeze(self): + if self.attr_saved is not None: + new_history = deepcopy(self.history_obs) + self.__dict__ = self.attr_saved + # self.history_obs = new_history + self.history_obs = self.history_obs[:-1] + occur = [int(d is None) for d in new_history] + indices = np.where(np.array(occur) == 0)[0] + index1 = indices[-2] + index2 = indices[-1] + # box1 = new_history[index1] + box1 = self.last_measurement + x1, y1, s1, r1 = box1 + w1 = np.sqrt(s1 * r1) + h1 = np.sqrt(s1 / r1) + box2 = new_history[index2] + x2, y2, s2, r2 = box2 + w2 = np.sqrt(s2 * r2) + h2 = np.sqrt(s2 / r2) + time_gap = index2 - index1 + dx = (x2 - x1) / time_gap + dy = (y2 - y1) / time_gap + dw = (w2 - w1) / time_gap + dh = (h2 - h1) / time_gap + for i in range(index2 - index1): + """ + The default virtual trajectory generation is by linear + motion (constant speed hypothesis), you could modify this + part to implement your own. + """ + x = x1 + (i + 1) * dx + y = y1 + (i + 1) * dy + w = w1 + (i + 1) * dw + h = h1 + (i + 1) * dh + s = w * h + r = w / float(h) + new_box = np.array([x, y, s, r]).reshape((4, 1)) + """ + I still use predict-update loop here to refresh the parameters, + but this can be faster by directly modifying the internal parameters + as suggested in the paper. I keep this naive but slow way for + easy read and understanding + """ + self.update(new_box) + if not i == (index2 - index1 - 1): + self.predict() + + def update(self, z, R=None, H=None): + """ + Add a new measurement (z) to the Kalman filter. + If z is None, nothing is computed. However, x_post and P_post are + updated with the prior (x_prior, P_prior), and self.z is set to None. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + If you pass in a value of H, z must be a column vector the + of the correct size. + R : np.array, scalar, or None + Optionally provide R to override the measurement noise for this + one call, otherwise self.R will be used. + H : np.array, or None + Optionally provide H to override the measurement function for this + one call, otherwise self.H will be used. + """ + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + # append the observation + self.history_obs.append(z) + + if z is None: + if self.observed: + """ + Got no observation so freeze the current parameters for future + potential online smoothing. + """ + self.last_measurement = self.history_obs[-2] + self.freeze() + self.observed = False + self.z = np.array([[None] * self.dim_z]).T + self.x_post = self.x.copy() + self.P_post = self.P.copy() + self.y = zeros((self.dim_z, 1)) + return + + # self.observed = True + if not self.observed: + """ + Get observation, use online smoothing to re-update parameters + """ + self.unfreeze() + self.observed = True + + if R is None: + R = self.R + elif isscalar(R): + R = eye(self.dim_z) * R + + if H is None: + z = reshape_z(z, self.dim_z, self.x.ndim) + H = self.H + + # y = z - Hx + # error (residual) between measurement and prediction + self.y = z - dot(H, self.x) + + # common subexpression for speed + PHT = dot(self.P, H.T) + + # S = HPH' + R + # project system uncertainty into measurement space + self.S = dot(H, PHT) + R + self.SI = self.inv(self.S) + # K = PH'inv(S) + # map system uncertainty into kalman gain + self.K = dot(PHT, self.SI) + + # x = x + Ky + # predict new x with residual scaled by the kalman gain + self.x = self.x + dot(self.K, self.y) + + # P = (I-KH)P(I-KH)' + KRK' + # This is more numerically stable + # and works for non-optimal K vs the equation + # P = (I-KH)P usually seen in the literature. + + I_KH = self._I - dot(self.K, H) + self.P = dot(dot(I_KH, self.P), I_KH.T) + dot(dot(self.K, R), self.K.T) + + # save measurement and posterior state + self.z = deepcopy(z) + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + def md_for_measurement(self, z): + """Mahalanobis distance for any measurement. + + Should be run after a prediction() call. + """ + z = reshape_z(z, self.dim_z, self.x.ndim) + H = self.H + y = z - dot(H, self.x) + md = sqrt(float(dot(dot(y.T, self.SI), y))) + return md + + def predict_steadystate(self, u=0, B=None): + """ + Predict state (prior) using the Kalman filter state propagation + equations. Only x is updated, P is left unchanged. See + update_steadstate() for a longer explanation of when to use this + method. + Parameters + ---------- + u : np.array + Optional control vector. If non-zero, it is multiplied by B + to create the control input into the system. + B : np.array(dim_x, dim_u), or None + Optional control transition matrix; a value of None + will cause the filter to use `self.B`. + """ + + if B is None: + B = self.B + + # x = Fx + Bu + if B is not None: + self.x = dot(self.F, self.x) + dot(B, u) + else: + self.x = dot(self.F, self.x) + + # save prior + self.x_prior = self.x.copy() + self.P_prior = self.P.copy() + + def update_steadystate(self, z): + """ + Add a new measurement (z) to the Kalman filter without recomputing + the Kalman gain K, the state covariance P, or the system + uncertainty S. + You can use this for LTI systems since the Kalman gain and covariance + converge to a fixed value. Precompute these and assign them explicitly, + or run the Kalman filter using the normal predict()/update(0 cycle + until they converge. + The main advantage of this call is speed. We do significantly less + computation, notably avoiding a costly matrix inversion. + Use in conjunction with predict_steadystate(), otherwise P will grow + without bound. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + Examples + -------- + cv = KalmanFilter(dim=3, order=2) # 3D const velocity filter + # let filter converge on representative data, then save k and P + for i in range(100): + cv.predict() + cv.update([i, i, i]) + saved_K = np.copy(cv.K) + saved_P = np.copy(cv.P) + later on: + cv = KalmanFilter(dim=3, order=2) # 3D const velocity filter + cv.K = np.copy(saved_K) + cv.P = np.copy(saved_P) + for i in range(100): + cv.predict_steadystate() + cv.update_steadystate([i, i, i]) + """ + + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + if z is None: + self.z = np.array([[None] * self.dim_z]).T + self.x_post = self.x.copy() + self.P_post = self.P.copy() + self.y = zeros((self.dim_z, 1)) + return + + z = reshape_z(z, self.dim_z, self.x.ndim) + + # y = z - Hx + # error (residual) between measurement and prediction + self.y = z - dot(self.H, self.x) + + # x = x + Ky + # predict new x with residual scaled by the kalman gain + self.x = self.x + dot(self.K, self.y) + + self.z = deepcopy(z) + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + def update_correlated(self, z, R=None, H=None): + """Add a new measurement (z) to the Kalman filter assuming that + process noise and measurement noise are correlated as defined in + the `self.M` matrix. + A partial derivation can be found in [1] + If z is None, nothing is changed. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + R : np.array, scalar, or None + Optionally provide R to override the measurement noise for this + one call, otherwise self.R will be used. + H : np.array, or None + Optionally provide H to override the measurement function for this + one call, otherwise self.H will be used. + References + ---------- + .. [1] Bulut, Y. (2011). Applied Kalman filter theory + (Doctoral dissertation, Northeastern University). + http://people.duke.edu/~hpgavin/SystemID/References/Balut-KalmanFilter-PhD-NEU-2011.pdf + """ + + # set to None to force recompute + self._log_likelihood = None + self._likelihood = None + self._mahalanobis = None + + if z is None: + self.z = np.array([[None] * self.dim_z]).T + self.x_post = self.x.copy() + self.P_post = self.P.copy() + self.y = zeros((self.dim_z, 1)) + return + + if R is None: + R = self.R + elif isscalar(R): + R = eye(self.dim_z) * R + + # rename for readability and a tiny extra bit of speed + if H is None: + z = reshape_z(z, self.dim_z, self.x.ndim) + H = self.H + + # handle special case: if z is in form [[z]] but x is not a column + # vector dimensions will not match + if self.x.ndim == 1 and shape(z) == (1, 1): + z = z[0] + + if shape(z) == (): # is it scalar, e.g. z=3 or z=np.array(3) + z = np.asarray([z]) + + # y = z - Hx + # error (residual) between measurement and prediction + self.y = z - dot(H, self.x) + + # common subexpression for speed + PHT = dot(self.P, H.T) + + # project system uncertainty into measurement space + self.S = dot(H, PHT) + dot(H, self.M) + dot(self.M.T, H.T) + R + self.SI = self.inv(self.S) + + # K = PH'inv(S) + # map system uncertainty into kalman gain + self.K = dot(PHT + self.M, self.SI) + + # x = x + Ky + # predict new x with residual scaled by the kalman gain + self.x = self.x + dot(self.K, self.y) + self.P = self.P - dot(self.K, dot(H, self.P) + self.M.T) + + self.z = deepcopy(z) + self.x_post = self.x.copy() + self.P_post = self.P.copy() + + def batch_filter( + self, + zs, + Fs=None, + Qs=None, + Hs=None, + Rs=None, + Bs=None, + us=None, + update_first=False, + saver=None, + ): + """Batch processes a sequences of measurements. + Parameters + ---------- + zs : list-like + list of measurements at each time step `self.dt`. Missing + measurements must be represented by `None`. + Fs : None, list-like, default=None + optional value or list of values to use for the state transition + matrix F. + If Fs is None then self.F is used for all epochs. + Otherwise it must contain a list-like list of F's, one for + each epoch. This allows you to have varying F per epoch. + Qs : None, np.array or list-like, default=None + optional value or list of values to use for the process error + covariance Q. + If Qs is None then self.Q is used for all epochs. + Otherwise it must contain a list-like list of Q's, one for + each epoch. This allows you to have varying Q per epoch. + Hs : None, np.array or list-like, default=None + optional list of values to use for the measurement matrix H. + If Hs is None then self.H is used for all epochs. + If Hs contains a single matrix, then it is used as H for all + epochs. + Otherwise it must contain a list-like list of H's, one for + each epoch. This allows you to have varying H per epoch. + Rs : None, np.array or list-like, default=None + optional list of values to use for the measurement error + covariance R. + If Rs is None then self.R is used for all epochs. + Otherwise it must contain a list-like list of R's, one for + each epoch. This allows you to have varying R per epoch. + Bs : None, np.array or list-like, default=None + optional list of values to use for the control transition matrix B. + If Bs is None then self.B is used for all epochs. + Otherwise it must contain a list-like list of B's, one for + each epoch. This allows you to have varying B per epoch. + us : None, np.array or list-like, default=None + optional list of values to use for the control input vector; + If us is None then None is used for all epochs (equivalent to 0, + or no control input). + Otherwise it must contain a list-like list of u's, one for + each epoch. + update_first : bool, optional, default=False + controls whether the order of operations is update followed by + predict, or predict followed by update. Default is predict->update. + saver : filterpy.common.Saver, optional + filterpy.common.Saver object. If provided, saver.save() will be + called after every epoch + Returns + ------- + means : np.array((n,dim_x,1)) + array of the state for each time step after the update. Each entry + is an np.array. In other words `means[k,:]` is the state at step + `k`. + covariance : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the update. + In other words `covariance[k,:,:]` is the covariance at step `k`. + means_predictions : np.array((n,dim_x,1)) + array of the state for each time step after the predictions. Each + entry is an np.array. In other words `means[k,:]` is the state at + step `k`. + covariance_predictions : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the prediction. + In other words `covariance[k,:,:]` is the covariance at step `k`. + Examples + -------- + .. code-block:: Python + # this example demonstrates tracking a measurement where the time + # between measurement varies, as stored in dts. This requires + # that F be recomputed for each epoch. The output is then smoothed + # with an RTS smoother. + zs = [t + random.randn()*4 for t in range (40)] + Fs = [np.array([[1., dt], [0, 1]] for dt in dts] + (mu, cov, _, _) = kf.batch_filter(zs, Fs=Fs) + (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs) + """ + + # pylint: disable=too-many-statements + n = np.size(zs, 0) + if Fs is None: + Fs = [self.F] * n + if Qs is None: + Qs = [self.Q] * n + if Hs is None: + Hs = [self.H] * n + if Rs is None: + Rs = [self.R] * n + if Bs is None: + Bs = [self.B] * n + if us is None: + us = [0] * n + + # mean estimates from Kalman Filter + if self.x.ndim == 1: + means = zeros((n, self.dim_x)) + means_p = zeros((n, self.dim_x)) + else: + means = zeros((n, self.dim_x, 1)) + means_p = zeros((n, self.dim_x, 1)) + + # state covariances from Kalman Filter + covariances = zeros((n, self.dim_x, self.dim_x)) + covariances_p = zeros((n, self.dim_x, self.dim_x)) + + if update_first: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + self.update(z, R=R, H=H) + means[i, :] = self.x + covariances[i, :, :] = self.P + + self.predict(u=u, B=B, F=F, Q=Q) + means_p[i, :] = self.x + covariances_p[i, :, :] = self.P + + if saver is not None: + saver.save() + else: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + self.predict(u=u, B=B, F=F, Q=Q) + means_p[i, :] = self.x + covariances_p[i, :, :] = self.P + + self.update(z, R=R, H=H) + means[i, :] = self.x + covariances[i, :, :] = self.P + + if saver is not None: + saver.save() + + return (means, covariances, means_p, covariances_p) + + def rts_smoother(self, Xs, Ps, Fs=None, Qs=None, inv=np.linalg.inv): + """ + Runs the Rauch-Tung-Striebel Kalman smoother on a set of + means and covariances computed by a Kalman filter. The usual input + would come from the output of `KalmanFilter.batch_filter()`. + Parameters + ---------- + Xs : numpy.array + array of the means (state variable x) of the output of a Kalman + filter. + Ps : numpy.array + array of the covariances of the output of a kalman filter. + Fs : list-like collection of numpy.array, optional + State transition matrix of the Kalman filter at each time step. + Optional, if not provided the filter's self.F will be used + Qs : list-like collection of numpy.array, optional + Process noise of the Kalman filter at each time step. Optional, + if not provided the filter's self.Q will be used + inv : function, default numpy.linalg.inv + If you prefer another inverse function, such as the Moore-Penrose + pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv + Returns + ------- + x : numpy.ndarray + smoothed means + P : numpy.ndarray + smoothed state covariances + K : numpy.ndarray + smoother gain at each step + Pp : numpy.ndarray + Predicted state covariances + Examples + -------- + .. code-block:: Python + zs = [t + random.randn()*4 for t in range (40)] + (mu, cov, _, _) = kalman.batch_filter(zs) + (x, P, K, Pp) = rts_smoother(mu, cov, kf.F, kf.Q) + """ + + if len(Xs) != len(Ps): + raise ValueError("length of Xs and Ps must be the same") + + n = Xs.shape[0] + dim_x = Xs.shape[1] + + if Fs is None: + Fs = [self.F] * n + if Qs is None: + Qs = [self.Q] * n + + # smoother gain + K = zeros((n, dim_x, dim_x)) + + x, P, Pp = Xs.copy(), Ps.copy(), Ps.copy() + for k in range(n - 2, -1, -1): + Pp[k] = dot(dot(Fs[k + 1], P[k]), Fs[k + 1].T) + Qs[k + 1] + + # pylint: disable=bad-whitespace + K[k] = dot(dot(P[k], Fs[k + 1].T), inv(Pp[k])) + x[k] += dot(K[k], x[k + 1] - dot(Fs[k + 1], x[k])) + P[k] += dot(dot(K[k], P[k + 1] - Pp[k]), K[k].T) + + return (x, P, K, Pp) + + def get_prediction(self, u=None, B=None, F=None, Q=None): + """ + Predict next state (prior) using the Kalman filter state propagation + equations and returns it without modifying the object. + Parameters + ---------- + u : np.array, default 0 + Optional control vector. + B : np.array(dim_x, dim_u), or None + Optional control transition matrix; a value of None + will cause the filter to use `self.B`. + F : np.array(dim_x, dim_x), or None + Optional state transition matrix; a value of None + will cause the filter to use `self.F`. + Q : np.array(dim_x, dim_x), scalar, or None + Optional process noise matrix; a value of None will cause the + filter to use `self.Q`. + Returns + ------- + (x, P) : tuple + State vector and covariance array of the prediction. + """ + + if B is None: + B = self.B + if F is None: + F = self.F + if Q is None: + Q = self.Q + elif isscalar(Q): + Q = eye(self.dim_x) * Q + + # x = Fx + Bu + if B is not None and u is not None: + x = dot(F, self.x) + dot(B, u) + else: + x = dot(F, self.x) + + # P = FPF' + Q + P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q + + return x, P + + def get_update(self, z=None): + """ + Computes the new estimate based on measurement `z` and returns it + without altering the state of the filter. + Parameters + ---------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + Returns + ------- + (x, P) : tuple + State vector and covariance array of the update. + """ + + if z is None: + return self.x, self.P + z = reshape_z(z, self.dim_z, self.x.ndim) + + R = self.R + H = self.H + P = self.P + x = self.x + + # error (residual) between measurement and prediction + y = z - dot(H, x) + + # common subexpression for speed + PHT = dot(P, H.T) + + # project system uncertainty into measurement space + S = dot(H, PHT) + R + + # map system uncertainty into kalman gain + K = dot(PHT, self.inv(S)) + + # predict new x with residual scaled by the kalman gain + x = x + dot(K, y) + + # P = (I-KH)P(I-KH)' + KRK' + I_KH = self._I - dot(K, H) + P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T) + + return x, P + + def residual_of(self, z): + """ + Returns the residual for the given measurement (z). Does not alter + the state of the filter. + """ + z = reshape_z(z, self.dim_z, self.x.ndim) + return z - dot(self.H, self.x_prior) + + def measurement_of_state(self, x): + """ + Helper function that converts a state into a measurement. + Parameters + ---------- + x : np.array + kalman state vector + Returns + ------- + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + """ + + return dot(self.H, x) + + @property + def log_likelihood(self): + """ + log-likelihood of the last measurement. + """ + if self._log_likelihood is None: + self._log_likelihood = logpdf(x=self.y, cov=self.S) + return self._log_likelihood + + @property + def likelihood(self): + """ + Computed from the log-likelihood. The log-likelihood can be very + small, meaning a large negative value such as -28000. Taking the + exp() of that results in 0.0, which can break typical algorithms + which multiply by this value, so by default we always return a + number >= float_info.min. + """ + if self._likelihood is None: + self._likelihood = exp(self.log_likelihood) + if self._likelihood == 0: + self._likelihood = float_info.min + return self._likelihood + + @property + def mahalanobis(self): + """ " + Mahalanobis distance of measurement. E.g. 3 means measurement + was 3 standard deviations away from the predicted value. + Returns + ------- + mahalanobis : float + """ + if self._mahalanobis is None: + self._mahalanobis = sqrt(float(dot(dot(self.y.T, self.SI), self.y))) + return self._mahalanobis + + @property + def alpha(self): + """ + Fading memory setting. 1.0 gives the normal Kalman filter, and + values slightly larger than 1.0 (such as 1.02) give a fading + memory effect - previous measurements have less influence on the + filter's estimates. This formulation of the Fading memory filter + (there are many) is due to Dan Simon [1]_. + """ + return self._alpha_sq**0.5 + + def log_likelihood_of(self, z): + """ + log likelihood of the measurement `z`. This should only be called + after a call to update(). Calling after predict() will yield an + incorrect result.""" + + if z is None: + return log(float_info.min) + return logpdf(z, dot(self.H, self.x), self.S) + + @alpha.setter + def alpha(self, value): + if not np.isscalar(value) or value < 1: + raise ValueError("alpha must be a float greater than 1") + + self._alpha_sq = value**2 + + def __repr__(self): + return "\n".join( + [ + "KalmanFilter object", + pretty_str("dim_x", self.dim_x), + pretty_str("dim_z", self.dim_z), + pretty_str("dim_u", self.dim_u), + pretty_str("x", self.x), + pretty_str("P", self.P), + pretty_str("x_prior", self.x_prior), + pretty_str("P_prior", self.P_prior), + pretty_str("x_post", self.x_post), + pretty_str("P_post", self.P_post), + pretty_str("F", self.F), + pretty_str("Q", self.Q), + pretty_str("R", self.R), + pretty_str("H", self.H), + pretty_str("K", self.K), + pretty_str("y", self.y), + pretty_str("S", self.S), + pretty_str("SI", self.SI), + pretty_str("M", self.M), + pretty_str("B", self.B), + pretty_str("z", self.z), + pretty_str("log-likelihood", self.log_likelihood), + pretty_str("likelihood", self.likelihood), + pretty_str("mahalanobis", self.mahalanobis), + pretty_str("alpha", self.alpha), + pretty_str("inv", self.inv), + ] + ) + + def test_matrix_dimensions(self, z=None, H=None, R=None, F=None, Q=None): + """ + Performs a series of asserts to check that the size of everything + is what it should be. This can help you debug problems in your design. + If you pass in H, R, F, Q those will be used instead of this object's + value for those matrices. + Testing `z` (the measurement) is problamatic. x is a vector, and can be + implemented as either a 1D array or as a nx1 column vector. Thus Hx + can be of different shapes. Then, if Hx is a single value, it can + be either a 1D array or 2D vector. If either is true, z can reasonably + be a scalar (either '3' or np.array('3') are scalars under this + definition), a 1D, 1 element array, or a 2D, 1 element array. You are + allowed to pass in any combination that works. + """ + + if H is None: + H = self.H + if R is None: + R = self.R + if F is None: + F = self.F + if Q is None: + Q = self.Q + x = self.x + P = self.P + + assert x.ndim == 1 or x.ndim == 2, "x must have one or two dimensions, but has {}".format( + x.ndim + ) + + if x.ndim == 1: + assert x.shape[0] == self.dim_x, "Shape of x must be ({},{}), but is {}".format( + self.dim_x, 1, x.shape + ) + else: + assert x.shape == ( + self.dim_x, + 1, + ), "Shape of x must be ({},{}), but is {}".format(self.dim_x, 1, x.shape) + + assert P.shape == ( + self.dim_x, + self.dim_x, + ), "Shape of P must be ({},{}), but is {}".format(self.dim_x, self.dim_x, P.shape) + + assert Q.shape == ( + self.dim_x, + self.dim_x, + ), "Shape of Q must be ({},{}), but is {}".format(self.dim_x, self.dim_x, P.shape) + + assert F.shape == ( + self.dim_x, + self.dim_x, + ), "Shape of F must be ({},{}), but is {}".format(self.dim_x, self.dim_x, F.shape) + + assert np.ndim(H) == 2, "Shape of H must be (dim_z, {}), but is {}".format( + P.shape[0], shape(H) + ) + + assert H.shape[1] == P.shape[0], "Shape of H must be (dim_z, {}), but is {}".format( + P.shape[0], H.shape + ) + + # shape of R must be the same as HPH' + hph_shape = (H.shape[0], H.shape[0]) + r_shape = shape(R) + + if H.shape[0] == 1: + # r can be scalar, 1D, or 2D in this case + assert r_shape in [ + (), + (1,), + (1, 1), + ], "R must be scalar or one element array, but is shaped {}".format(r_shape) + else: + assert r_shape == hph_shape, "shape of R should be {} but it is {}".format( + hph_shape, r_shape + ) + + if z is not None: + z_shape = shape(z) + else: + z_shape = (self.dim_z, 1) + + # H@x must have shape of z + Hx = dot(H, x) + + if z_shape == (): # scalar or np.array(scalar) + assert Hx.ndim == 1 or shape(Hx) == ( + 1, + 1, + ), "shape of z should be {}, not {} for the given H".format(shape(Hx), z_shape) + + elif shape(Hx) == (1,): + assert z_shape[0] == 1, "Shape of z must be {} for the given H".format(shape(Hx)) + + else: + assert z_shape == shape(Hx) or ( + len(z_shape) == 1 and shape(Hx) == (z_shape[0], 1) + ), "shape of z should be {}, not {} for the given H".format(shape(Hx), z_shape) + + if np.ndim(Hx) > 1 and shape(Hx) != (1, 1): + assert ( + shape(Hx) == z_shape + ), "shape of z should be {} for the given H, but it is {}".format(shape(Hx), z_shape) + + +def update(x, P, z, R, H=None, return_all=False): + """ + Add a new measurement (z) to the Kalman filter. If z is None, nothing + is changed. + This can handle either the multidimensional or unidimensional case. If + all parameters are floats instead of arrays the filter will still work, + and return floats for x, P as the result. + update(1, 2, 1, 1, 1) # univariate + update(x, P, 1 + Parameters + ---------- + x : numpy.array(dim_x, 1), or float + State estimate vector + P : numpy.array(dim_x, dim_x), or float + Covariance matrix + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + R : numpy.array(dim_z, dim_z), or float + Measurement noise matrix + H : numpy.array(dim_x, dim_x), or float, optional + Measurement function. If not provided, a value of 1 is assumed. + return_all : bool, default False + If true, y, K, S, and log_likelihood are returned, otherwise + only x and P are returned. + Returns + ------- + x : numpy.array + Posterior state estimate vector + P : numpy.array + Posterior covariance matrix + y : numpy.array or scalar + Residua. Difference between measurement and state in measurement space + K : numpy.array + Kalman gain + S : numpy.array + System uncertainty in measurement space + log_likelihood : float + log likelihood of the measurement + """ + + # pylint: disable=bare-except + + if z is None: + if return_all: + return x, P, None, None, None, None + return x, P + + if H is None: + H = np.array([1]) + + if np.isscalar(H): + H = np.array([H]) + + Hx = np.atleast_1d(dot(H, x)) + z = reshape_z(z, Hx.shape[0], x.ndim) + + # error (residual) between measurement and prediction + y = z - Hx + + # project system uncertainty into measurement space + S = dot(dot(H, P), H.T) + R + + # map system uncertainty into kalman gain + try: + K = dot(dot(P, H.T), linalg.inv(S)) + except BaseException: + # can't invert a 1D array, annoyingly + K = dot(dot(P, H.T), 1.0 / S) + + # predict new x with residual scaled by the kalman gain + x = x + dot(K, y) + + # P = (I-KH)P(I-KH)' + KRK' + KH = dot(K, H) + + try: + I_KH = np.eye(KH.shape[0]) - KH + except BaseException: + I_KH = np.array([1 - KH]) + P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T) + + if return_all: + # compute log likelihood + log_likelihood = logpdf(z, dot(H, x), S) + return x, P, y, K, S, log_likelihood + return x, P + + +def update_steadystate(x, z, K, H=None): + """ + Add a new measurement (z) to the Kalman filter. If z is None, nothing + is changed. + Parameters + ---------- + x : numpy.array(dim_x, 1), or float + State estimate vector + z : (dim_z, 1): array_like + measurement for this update. z can be a scalar if dim_z is 1, + otherwise it must be convertible to a column vector. + K : numpy.array, or float + Kalman gain matrix + H : numpy.array(dim_x, dim_x), or float, optional + Measurement function. If not provided, a value of 1 is assumed. + Returns + ------- + x : numpy.array + Posterior state estimate vector + Examples + -------- + This can handle either the multidimensional or unidimensional case. If + all parameters are floats instead of arrays the filter will still work, + and return floats for x, P as the result. + """ + + if z is None: + return x + + if H is None: + H = np.array([1]) + + if np.isscalar(H): + H = np.array([H]) + + Hx = np.atleast_1d(dot(H, x)) + z = reshape_z(z, Hx.shape[0], x.ndim) + + # error (residual) between measurement and prediction + y = z - Hx + + # estimate new x with residual scaled by the kalman gain + return x + dot(K, y) + + +def predict(x, P, F=1, Q=0, u=0, B=1, alpha=1.0): + """ + Predict next state (prior) using the Kalman filter state propagation + equations. + Parameters + ---------- + x : numpy.array + State estimate vector + P : numpy.array + Covariance matrix + F : numpy.array() + State Transition matrix + Q : numpy.array, Optional + Process noise matrix + u : numpy.array, Optional, default 0. + Control vector. If non-zero, it is multiplied by B + to create the control input into the system. + B : numpy.array, optional, default 0. + Control transition matrix. + alpha : float, Optional, default=1.0 + Fading memory setting. 1.0 gives the normal Kalman filter, and + values slightly larger than 1.0 (such as 1.02) give a fading + memory effect - previous measurements have less influence on the + filter's estimates. This formulation of the Fading memory filter + (there are many) is due to Dan Simon + Returns + ------- + x : numpy.array + Prior state estimate vector + P : numpy.array + Prior covariance matrix + """ + + if np.isscalar(F): + F = np.array(F) + x = dot(F, x) + dot(B, u) + P = (alpha * alpha) * dot(dot(F, P), F.T) + Q + + return x, P + + +def predict_steadystate(x, F=1, u=0, B=1): + """ + Predict next state (prior) using the Kalman filter state propagation + equations. This steady state form only computes x, assuming that the + covariance is constant. + Parameters + ---------- + x : numpy.array + State estimate vector + P : numpy.array + Covariance matrix + F : numpy.array() + State Transition matrix + u : numpy.array, Optional, default 0. + Control vector. If non-zero, it is multiplied by B + to create the control input into the system. + B : numpy.array, optional, default 0. + Control transition matrix. + Returns + ------- + x : numpy.array + Prior state estimate vector + """ + + if np.isscalar(F): + F = np.array(F) + x = dot(F, x) + dot(B, u) + + return x + + +def multi_predict(x, P, F=1, Q=0): + """Run Kalman filter prediction step for N states (Vectorized version). + Parameters + ---------- + x : ndarray + The Nxm dimensional mean matrix of the object states at the previous + time step. + P : ndarray + The Nxmxn dimensional covariance matrics of the object states at the + previous time step. + F : ndarray + The mxn dimensional state transition matrix + Q : ndarray + The Nxmxn dimensional process noise matrices + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + + if np.isscalar(F): + F = np.array(F) + + x = np.dot(x, F.T) + left = np.dot(F, P).transpose((1, 0, 2)) + P = np.dot(left, F.T) + Q + + return x, P + + +def batch_filter(x, P, zs, Fs, Qs, Hs, Rs, Bs=None, us=None, update_first=False, saver=None): + """ + Batch processes a sequences of measurements. + Parameters + ---------- + zs : list-like + list of measurements at each time step. Missing measurements must be + represented by None. + Fs : list-like + list of values to use for the state transition matrix matrix. + Qs : list-like + list of values to use for the process error + covariance. + Hs : list-like + list of values to use for the measurement matrix. + Rs : list-like + list of values to use for the measurement error + covariance. + Bs : list-like, optional + list of values to use for the control transition matrix; + a value of None in any position will cause the filter + to use `self.B` for that time step. + us : list-like, optional + list of values to use for the control input vector; + a value of None in any position will cause the filter to use + 0 for that time step. + update_first : bool, optional + controls whether the order of operations is update followed by + predict, or predict followed by update. Default is predict->update. + saver : filterpy.common.Saver, optional + filterpy.common.Saver object. If provided, saver.save() will be + called after every epoch + Returns + ------- + means : np.array((n,dim_x,1)) + array of the state for each time step after the update. Each entry + is an np.array. In other words `means[k,:]` is the state at step + `k`. + covariance : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the update. + In other words `covariance[k,:,:]` is the covariance at step `k`. + means_predictions : np.array((n,dim_x,1)) + array of the state for each time step after the predictions. Each + entry is an np.array. In other words `means[k,:]` is the state at + step `k`. + covariance_predictions : np.array((n,dim_x,dim_x)) + array of the covariances for each time step after the prediction. + In other words `covariance[k,:,:]` is the covariance at step `k`. + Examples + -------- + .. code-block:: Python + zs = [t + random.randn()*4 for t in range (40)] + Fs = [kf.F for t in range (40)] + Hs = [kf.H for t in range (40)] + (mu, cov, _, _) = kf.batch_filter(zs, Rs=R_list, Fs=Fs, Hs=Hs, Qs=None, + Bs=None, us=None, update_first=False) + (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs, Qs=None) + """ + + n = np.size(zs, 0) + dim_x = x.shape[0] + + # mean estimates from Kalman Filter + if x.ndim == 1: + means = zeros((n, dim_x)) + means_p = zeros((n, dim_x)) + else: + means = zeros((n, dim_x, 1)) + means_p = zeros((n, dim_x, 1)) + + # state covariances from Kalman Filter + covariances = zeros((n, dim_x, dim_x)) + covariances_p = zeros((n, dim_x, dim_x)) + + if us is None: + us = [0.0] * n + Bs = [0.0] * n + + if update_first: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + x, P = update(x, P, z, R=R, H=H) + means[i, :] = x + covariances[i, :, :] = P + + x, P = predict(x, P, u=u, B=B, F=F, Q=Q) + means_p[i, :] = x + covariances_p[i, :, :] = P + if saver is not None: + saver.save() + else: + for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)): + x, P = predict(x, P, u=u, B=B, F=F, Q=Q) + means_p[i, :] = x + covariances_p[i, :, :] = P + + x, P = update(x, P, z, R=R, H=H) + means[i, :] = x + covariances[i, :, :] = P + if saver is not None: + saver.save() + + return (means, covariances, means_p, covariances_p) + + +def rts_smoother(Xs, Ps, Fs, Qs): + """ + Runs the Rauch-Tung-Striebel Kalman smoother on a set of + means and covariances computed by a Kalman filter. The usual input + would come from the output of `KalmanFilter.batch_filter()`. + Parameters + ---------- + Xs : numpy.array + array of the means (state variable x) of the output of a Kalman + filter. + Ps : numpy.array + array of the covariances of the output of a kalman filter. + Fs : list-like collection of numpy.array + State transition matrix of the Kalman filter at each time step. + Qs : list-like collection of numpy.array, optional + Process noise of the Kalman filter at each time step. + Returns + ------- + x : numpy.ndarray + smoothed means + P : numpy.ndarray + smoothed state covariances + K : numpy.ndarray + smoother gain at each step + pP : numpy.ndarray + predicted state covariances + Examples + -------- + .. code-block:: Python + zs = [t + random.randn()*4 for t in range (40)] + (mu, cov, _, _) = kalman.batch_filter(zs) + (x, P, K, pP) = rts_smoother(mu, cov, kf.F, kf.Q) + """ + + if len(Xs) != len(Ps): + raise ValueError("length of Xs and Ps must be the same") + + n = Xs.shape[0] + dim_x = Xs.shape[1] + + # smoother gain + K = zeros((n, dim_x, dim_x)) + x, P, pP = Xs.copy(), Ps.copy(), Ps.copy() + + for k in range(n - 2, -1, -1): + pP[k] = dot(dot(Fs[k], P[k]), Fs[k].T) + Qs[k] + + # pylint: disable=bad-whitespace + K[k] = dot(dot(P[k], Fs[k].T), linalg.inv(pP[k])) + x[k] += dot(K[k], x[k + 1] - dot(Fs[k], x[k])) + P[k] += dot(dot(K[k], P[k + 1] - pP[k]), K[k].T) + + return (x, P, K, pP) diff --git a/src/juxtapose/trackers/boxmot/postprocessing/__init__.py b/src/juxtapose/trackers/boxmot/postprocessing/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/postprocessing/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/postprocessing/gsi.py b/src/juxtapose/trackers/boxmot/postprocessing/gsi.py new file mode 100644 index 0000000..b30e8c5 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/postprocessing/gsi.py @@ -0,0 +1,93 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from pathlib import Path + +import numpy as np +from sklearn.gaussian_process import GaussianProcessRegressor as GPR +from sklearn.gaussian_process.kernels import RBF + +from juxtapose.trackers.boxmot.utils import logger as LOGGER + + +def linear_interpolation(input_, interval): + input_ = input_[np.lexsort([input_[:, 0], input_[:, 1]])] + output_ = input_.copy() + + id_pre, f_pre, row_pre = -1, -1, np.zeros((10,)) + for row in input_: + f_curr, id_curr = row[:2].astype(int) + if id_curr == id_pre: + if f_pre + 1 < f_curr < f_pre + interval: + for i, f in enumerate(range(f_pre + 1, f_curr), start=1): + step = (row - row_pre) / (f_curr - f_pre) * i + row_new = row_pre + step + output_ = np.append(output_, row_new[np.newaxis, :], axis=0) + else: + id_pre = id_curr + row_pre = row + f_pre = f_curr + output_ = output_[np.lexsort([output_[:, 0], output_[:, 1]])] + return output_ + + +def gaussian_smooth(input_, tau): + output_ = list() + print("input_", input_) + ids = set(input_[:, 1]) + for i, id_ in enumerate(ids): + tracks = input_[input_[:, 1] == id_] + print("tracks", tracks) + len_scale = np.clip(tau * np.log(tau**3 / len(tracks)), tau**-1, tau**2) + gpr = GPR(RBF(len_scale, "fixed")) + t = tracks[:, 0].reshape(-1, 1) + x = tracks[:, 2].reshape(-1, 1) + y = tracks[:, 3].reshape(-1, 1) + w = tracks[:, 4].reshape(-1, 1) + h = tracks[:, 5].reshape(-1, 1) + gpr.fit(t, x) + xx = gpr.predict(t) + gpr.fit(t, y) + yy = gpr.predict(t) + gpr.fit(t, w) + ww = gpr.predict(t) + gpr.fit(t, h) + hh = gpr.predict(t) + # frame count, id, x, y, w, h, conf, cls, -1 (don't care) + output_.extend( + [ + [ + t[j, 0], + id_, + xx[j], + yy[j], + ww[j], + hh[j], + tracks[j, 6], + tracks[j, 7], + -1, + ] + for j in range(len(t)) + ] + ) + return output_ + + +def gsi(mot_results_folder=Path("examples/runs/val/exp87/labels"), interval=20, tau=10): + tracking_results_files = mot_results_folder.glob("MOT*FRCNN.txt") + for p in tracking_results_files: + LOGGER.info(f"Applying gaussian smoothed interpolation (GSI) to: {p}") + tracking_results = np.loadtxt(p, dtype=int, delimiter=" ") + if tracking_results.size != 0: + li = linear_interpolation(tracking_results, interval) + gsi = gaussian_smooth(li, tau) + np.savetxt(p, gsi, fmt="%d %d %d %d %d %d %d %d %d") + else: + print("No tracking result in {p}. Skipping...") + + +def main(): + gsi() + + +if __name__ == "__main__": + main() diff --git a/src/juxtapose/trackers/boxmot/tracker_zoo.py b/src/juxtapose/trackers/boxmot/tracker_zoo.py new file mode 100644 index 0000000..2c679f0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/tracker_zoo.py @@ -0,0 +1,123 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from types import SimpleNamespace + +import yaml + +from juxtapose.trackers.boxmot.utils import BOXMOT + + +def get_tracker_config(tracker_type): + tracking_config = BOXMOT / "configs" / (tracker_type + ".yaml") + return tracking_config + + +def create_tracker(tracker_type, tracker_config, reid_weights, device, half, per_class): + with open(tracker_config, "r") as f: + cfg = yaml.load(f.read(), Loader=yaml.FullLoader) + cfg = SimpleNamespace(**cfg) # easier dict acces by dot, instead of [''] + + if tracker_type == "strongsort": + from juxtapose.trackers.boxmot.trackers.strongsort.strong_sort import ( + StrongSORT, + ) + + strongsort = StrongSORT( + reid_weights, + device, + half, + max_dist=cfg.max_dist, + max_iou_dist=cfg.max_iou_dist, + max_age=cfg.max_age, + n_init=cfg.n_init, + nn_budget=cfg.nn_budget, + mc_lambda=cfg.mc_lambda, + ema_alpha=cfg.ema_alpha, + ) + return strongsort + + elif tracker_type == "ocsort": + from juxtapose.trackers.boxmot.trackers.ocsort.ocsort import OCSort + + ocsort = OCSort( + per_class, + det_thresh=cfg.det_thresh, + max_age=cfg.max_age, + min_hits=cfg.min_hits, + iou_threshold=cfg.iou_thresh, + delta_t=cfg.delta_t, + asso_func=cfg.asso_func, + inertia=cfg.inertia, + use_byte=cfg.use_byte, + ) + return ocsort + + # elif tracker_type == "bytetrack": + # from juxtapose.trackers.boxmot.trackers.bytetrack.byte_tracker import BYTETracker + + # bytetracker = BYTETracker( + # track_thresh=cfg.track_thresh, + # match_thresh=cfg.match_thresh, + # track_buffer=cfg.track_buffer, + # frame_rate=cfg.frame_rate, + # ) + # return bytetracker + + # elif tracker_type == "botsort": + # from juxtapose.trackers.boxmot.trackers.botsort.bot_sort import BoTSORT + + # botsort = BoTSORT( + # reid_weights, + # device, + # half, + # track_high_thresh=cfg.track_high_thresh, + # new_track_thresh=cfg.new_track_thresh, + # track_buffer=cfg.track_buffer, + # match_thresh=cfg.match_thresh, + # proximity_thresh=cfg.proximity_thresh, + # appearance_thresh=cfg.appearance_thresh, + # cmc_method=cfg.cmc_method, + # frame_rate=cfg.frame_rate, + # lambda_=cfg.lambda_, + # ) + # return botsort + elif tracker_type == "deepocsort": + from juxtapose.trackers.boxmot.trackers.deepocsort.deep_ocsort import ( + DeepOCSort, + ) + + deepocsort = DeepOCSort( + reid_weights, + device, + half, + per_class, + det_thresh=cfg.det_thresh, + max_age=cfg.max_age, + min_hits=cfg.min_hits, + iou_threshold=cfg.iou_thresh, + delta_t=cfg.delta_t, + asso_func=cfg.asso_func, + inertia=cfg.inertia, + ) + return deepocsort + elif tracker_type == "hybridsort": + from juxtapose.trackers.boxmot.trackers.hybridsort.hybridsort import ( + HybridSORT, + ) + + hybridsort = HybridSORT( + reid_weights, + device, + half, + det_thresh=cfg.det_thresh, + max_age=cfg.max_age, + min_hits=cfg.min_hits, + iou_threshold=cfg.iou_thresh, + delta_t=cfg.delta_t, + asso_func=cfg.asso_func, + inertia=cfg.inertia, + ) + return hybridsort + else: + print("No such tracker") + exit() diff --git a/src/juxtapose/trackers/boxmot/trackers/__init__.py b/src/juxtapose/trackers/boxmot/trackers/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/trackers/deepocsort/__init__.py b/src/juxtapose/trackers/boxmot/trackers/deepocsort/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/deepocsort/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/trackers/deepocsort/deep_ocsort.py b/src/juxtapose/trackers/boxmot/trackers/deepocsort/deep_ocsort.py new file mode 100644 index 0000000..028547f --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/deepocsort/deep_ocsort.py @@ -0,0 +1,585 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +""" + This script is adopted from the SORT script by Alex Bewley alex@bewley.ai +""" + +import numpy as np + +from juxtapose.trackers.boxmot.appearance.reid_multibackend import ( + ReIDDetectMultiBackend, +) +from juxtapose.trackers.boxmot.motion.cmc import get_cmc_method +from juxtapose.trackers.boxmot.motion.kalman_filters.adapters import ( + OCSortKalmanFilterAdapter, +) +from juxtapose.trackers.boxmot.utils import PerClassDecorator +from juxtapose.trackers.boxmot.utils.association import associate, linear_assignment +from juxtapose.trackers.boxmot.utils.iou import get_asso_func + + +def k_previous_obs(observations, cur_age, k): + if len(observations) == 0: + return [-1, -1, -1, -1, -1] + for i in range(k): + dt = k - i + if cur_age - dt in observations: + return observations[cur_age - dt] + max_age = max(observations.keys()) + return observations[max_age] + + +def convert_bbox_to_z(bbox): + """ + Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form + [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is + the aspect ratio + """ + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + x = bbox[0] + w / 2.0 + y = bbox[1] + h / 2.0 + s = w * h # scale is just area + r = w / float(h + 1e-6) + return np.array([x, y, s, r]).reshape((4, 1)) + + +def convert_bbox_to_z_new(bbox): + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + x = bbox[0] + w / 2.0 + y = bbox[1] + h / 2.0 + return np.array([x, y, w, h]).reshape((4, 1)) + + +def convert_x_to_bbox_new(x): + x, y, w, h = x.reshape(-1)[:4] + return np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2]).reshape(1, 4) + + +def convert_x_to_bbox(x, score=None): + """ + Takes a bounding box in the centre form [x,y,s,r] and returns it in the form + [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right + """ + w = np.sqrt(x[2] * x[3]) + h = x[2] / w + if score is None: + return np.array( + [x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0] + ).reshape((1, 4)) + else: + return np.array( + [x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score] + ).reshape((1, 5)) + + +def speed_direction(bbox1, bbox2): + cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0 + cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0 + speed = np.array([cy2 - cy1, cx2 - cx1]) + norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 + return speed / norm + + +def new_kf_process_noise(w, h, p=1 / 20, v=1 / 160): + Q = np.diag( + ( + (p * w) ** 2, + (p * h) ** 2, + (p * w) ** 2, + (p * h) ** 2, + (v * w) ** 2, + (v * h) ** 2, + (v * w) ** 2, + (v * h) ** 2, + ) + ) + return Q + + +def new_kf_measurement_noise(w, h, m=1 / 20): + w_var = (m * w) ** 2 + h_var = (m * h) ** 2 + R = np.diag((w_var, h_var, w_var, h_var)) + return R + + +class KalmanBoxTracker(object): + """ + This class represents the internal state of individual tracked objects observed as bbox. + """ + + count = 0 + + def __init__(self, det, delta_t=3, emb=None, alpha=0, new_kf=False): + """ + Initialises a tracker using initial bounding box. + + """ + # define constant velocity model + + self.new_kf = new_kf + bbox = det[0:5] + self.conf = det[4] + self.cls = det[5] + self.det_ind = det[6] + + if new_kf: + self.kf = OCSortKalmanFilterAdapter(dim_x=8, dim_z=4) + self.kf.F = np.array( + [ + # x y w h x' y' w' h' + [1, 0, 0, 0, 1, 0, 0, 0], + [0, 1, 0, 0, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 1, 0], + [0, 0, 0, 1, 0, 0, 0, 1], + [0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 1], + ] + ) + self.kf.H = np.array( + [ + [1, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0], + ] + ) + _, _, w, h = convert_bbox_to_z_new(bbox).reshape(-1) + self.kf.P = new_kf_process_noise(w, h) + self.kf.P[:4, :4] *= 4 + self.kf.P[4:, 4:] *= 100 + # Process and measurement uncertainty happen in functions + self.bbox_to_z_func = convert_bbox_to_z_new + self.x_to_bbox_func = convert_x_to_bbox_new + else: + self.kf = OCSortKalmanFilterAdapter(dim_x=7, dim_z=4) + self.kf.F = np.array( + [ + # x y s r x' y' s' + [1, 0, 0, 0, 1, 0, 0], + [0, 1, 0, 0, 0, 1, 0], + [0, 0, 1, 0, 0, 0, 1], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 1], + ] + ) + self.kf.H = np.array( + [ + [1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + ] + ) + self.kf.R[2:, 2:] *= 10.0 + self.kf.P[ + 4:, 4: + ] *= 1000.0 # give high uncertainty to the unobservable initial velocities + self.kf.P *= 10.0 + self.kf.Q[-1, -1] *= 0.01 + self.kf.Q[4:, 4:] *= 0.01 + self.bbox_to_z_func = convert_bbox_to_z + self.x_to_bbox_func = convert_x_to_bbox + + self.kf.x[:4] = self.bbox_to_z_func(bbox) + + self.time_since_update = 0 + self.id = KalmanBoxTracker.count + KalmanBoxTracker.count += 1 + self.history = [] + self.hits = 0 + self.hit_streak = 0 + self.age = 0 + """ + NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of + function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a + fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), + let's bear it for now. + """ + # Used for OCR + self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder + # Used to output track after min_hits reached + self.history_observations = [] + # Used for velocity + self.observations = dict() + self.velocity = None + self.delta_t = delta_t + + self.emb = emb + + self.frozen = False + + def update(self, det): + """ + Updates the state vector with observed bbox. + """ + + if det is not None: + bbox = det[0:5] + self.conf = det[4] + self.cls = det[5] + self.det_ind = det[6] + self.frozen = False + + if self.last_observation.sum() >= 0: # no previous observation + previous_box = None + for dt in range(self.delta_t, 0, -1): + if self.age - dt in self.observations: + previous_box = self.observations[self.age - dt] + break + if previous_box is None: + previous_box = self.last_observation + """ + Estimate the track speed direction with observations \Delta t steps away + """ + self.velocity = speed_direction(previous_box, bbox) + """ + Insert new observations. This is a ugly way to maintain both self.observations + and self.history_observations. Bear it for the moment. + """ + self.last_observation = bbox + self.observations[self.age] = bbox + self.history_observations.append(bbox) + + self.time_since_update = 0 + self.history = [] + self.hits += 1 + self.hit_streak += 1 + if self.new_kf: + R = new_kf_measurement_noise(self.kf.x[2, 0], self.kf.x[3, 0]) + self.kf.update(self.bbox_to_z_func(bbox), R=R) + else: + self.kf.update(self.bbox_to_z_func(bbox)) + else: + self.kf.update(det) + self.frozen = True + + def update_emb(self, emb, alpha=0.9): + self.emb = alpha * self.emb + (1 - alpha) * emb + self.emb /= np.linalg.norm(self.emb) + + def get_emb(self): + return self.emb + + def apply_affine_correction(self, affine): + m = affine[:, :2] + t = affine[:, 2].reshape(2, 1) + # For OCR + if self.last_observation.sum() > 0: + ps = self.last_observation[:4].reshape(2, 2).T + ps = m @ ps + t + self.last_observation[:4] = ps.T.reshape(-1) + + # Apply to each box in the range of velocity computation + for dt in range(self.delta_t, -1, -1): + if self.age - dt in self.observations: + ps = self.observations[self.age - dt][:4].reshape(2, 2).T + ps = m @ ps + t + self.observations[self.age - dt][:4] = ps.T.reshape(-1) + + # Also need to change kf state, but might be frozen + self.kf.apply_affine_correction(m, t, self.new_kf) + + def predict(self): + """ + Advances the state vector and returns the predicted bounding box estimate. + """ + # Don't allow negative bounding boxes + if self.new_kf: + if self.kf.x[2] + self.kf.x[6] <= 0: + self.kf.x[6] = 0 + if self.kf.x[3] + self.kf.x[7] <= 0: + self.kf.x[7] = 0 + + # Stop velocity, will update in kf during OOS + if self.frozen: + self.kf.x[6] = self.kf.x[7] = 0 + Q = new_kf_process_noise(self.kf.x[2, 0], self.kf.x[3, 0]) + else: + if (self.kf.x[6] + self.kf.x[2]) <= 0: + self.kf.x[6] *= 0.0 + Q = None + + self.kf.predict(Q=Q) + self.age += 1 + if self.time_since_update > 0: + self.hit_streak = 0 + self.time_since_update += 1 + self.history.append(self.x_to_bbox_func(self.kf.x)) + return self.history[-1] + + def get_state(self): + """ + Returns the current bounding box estimate. + """ + return self.x_to_bbox_func(self.kf.x) + + def mahalanobis(self, bbox): + """Should be run after a predict() call for accuracy.""" + return self.kf.md_for_measurement(self.bbox_to_z_func(bbox)) + + +class DeepOCSort(object): + def __init__( + self, + model_weights, + device, + fp16, + per_class=False, + det_thresh=0.3, + max_age=30, + min_hits=3, + iou_threshold=0.3, + delta_t=3, + asso_func="iou", + inertia=0.2, + w_association_emb=0.5, + alpha_fixed_emb=0.95, + aw_param=0.5, + embedding_off=False, + cmc_off=False, + aw_off=False, + new_kf_off=False, + **kwargs, + ): + """ + Sets key parameters for SORT + """ + self.max_age = max_age + self.min_hits = min_hits + self.iou_threshold = iou_threshold + self.trackers = [] + self.frame_count = 0 + self.det_thresh = det_thresh + self.delta_t = delta_t + self.asso_func = get_asso_func(asso_func) + self.inertia = inertia + self.w_association_emb = w_association_emb + self.alpha_fixed_emb = alpha_fixed_emb + self.aw_param = aw_param + self.per_class = per_class + KalmanBoxTracker.count = 1 + + self.model = ReIDDetectMultiBackend( + weights=model_weights, device=device, fp16=fp16 + ) + # "similarity transforms using feature point extraction, optical flow, and RANSAC" + self.cmc = get_cmc_method("sof")() + self.embedding_off = embedding_off + self.cmc_off = cmc_off + self.aw_off = aw_off + self.new_kf_off = new_kf_off + + @PerClassDecorator + def update(self, dets, img): + # def update(self, bboxes, confidence, labels, img): + """ + Params: + dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] + Requires: this method must be called once for each frame even with empty detections + (use np.empty((0, 5)) for frames without detections). + Returns the a similar array, where the last column is the object ID. + NOTE: The number of objects returned may differ from the number of detections provided. + """ + assert isinstance( + dets, np.ndarray + ), f"Unsupported 'dets' input type '{type(dets)}', valid format is np.ndarray" + assert isinstance( + img, np.ndarray + ), f"Unsupported 'img' input type '{type(img)}', valid format is np.ndarray" + assert ( + len(dets.shape) == 2 + ), "Unsupported 'dets' dimensions, valid number of dimensions is two" + assert ( + dets.shape[1] == 6 + ), "Unsupported 'dets' 2nd dimension lenght, valid lenghts is 6" + # assert isinstance( + # bboxes, np.ndarray + # ), f"Unsupported 'dets' input format '{type(bboxes)}', valid format is np.ndarray" + # assert isinstance( + # img, np.ndarray + # ), f"Unsupported 'img' input format '{type(img)}', valid format is np.ndarray" + # assert ( + # len(bboxes) == len(confidence) == len(labels) + # ), "bboxes and confidence and labels should have same dimensions" + + # # Restructure dets from bboxes, confidence, labels to [x1, y1, x2, y2, conf, cls] + # dets = np.hstack([bboxes, confidence.reshape(-1, 1), labels.reshape(-1, 1)]) + + self.frame_count += 1 + self.height, self.width = img.shape[:2] + + scores = dets[:, 4] + dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)]) + assert dets.shape[1] == 7 + remain_inds = scores > self.det_thresh + dets = dets[remain_inds] + + # appearance descriptor extraction + if self.embedding_off or dets.shape[0] == 0: + dets_embs = np.ones((dets.shape[0], 1)) + else: + # (Ndets x X) [512, 1024, 2048] + dets_embs = self.model.get_features(dets[:, 0:4], img) + + # CMC + if not self.cmc_off: + transform = self.cmc.apply(img, dets[:, :4]) + for trk in self.trackers: + trk.apply_affine_correction(transform) + + trust = (dets[:, 4] - self.det_thresh) / (1 - self.det_thresh) + af = self.alpha_fixed_emb + # From [self.alpha_fixed_emb, 1], goes to 1 as detector is less confident + dets_alpha = af + (1 - af) * (1 - trust) + + # get predicted locations from existing trackers. + trks = np.zeros((len(self.trackers), 5)) + trk_embs = [] + to_del = [] + ret = [] + for t, trk in enumerate(trks): + pos = self.trackers[t].predict()[0] + trk[:] = [pos[0], pos[1], pos[2], pos[3], 0] + if np.any(np.isnan(pos)): + to_del.append(t) + else: + trk_embs.append(self.trackers[t].get_emb()) + trks = np.ma.compress_rows(np.ma.masked_invalid(trks)) + + if len(trk_embs) > 0: + trk_embs = np.vstack(trk_embs) + else: + trk_embs = np.array(trk_embs) + + for t in reversed(to_del): + self.trackers.pop(t) + + velocities = np.array( + [ + trk.velocity if trk.velocity is not None else np.array((0, 0)) + for trk in self.trackers + ] + ) + last_boxes = np.array([trk.last_observation for trk in self.trackers]) + k_observations = np.array( + [ + k_previous_obs(trk.observations, trk.age, self.delta_t) + for trk in self.trackers + ] + ) + + """ + First round of association + """ + # (M detections X N tracks, final score) + if self.embedding_off or dets.shape[0] == 0 or trk_embs.shape[0] == 0: + stage1_emb_cost = None + else: + stage1_emb_cost = dets_embs @ trk_embs.T + matched, unmatched_dets, unmatched_trks = associate( + dets, + trks, + self.iou_threshold, + velocities, + k_observations, + self.inertia, + stage1_emb_cost, + self.w_association_emb, + self.aw_off, + self.aw_param, + ) + for m in matched: + self.trackers[m[1]].update(dets[m[0], :]) + self.trackers[m[1]].update_emb(dets_embs[m[0]], alpha=dets_alpha[m[0]]) + + """ + Second round of associaton by OCR + """ + if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0: + left_dets = dets[unmatched_dets] + left_dets_embs = dets_embs[unmatched_dets] + left_trks = last_boxes[unmatched_trks] + left_trks_embs = trk_embs[unmatched_trks] + + iou_left = self.asso_func(left_dets, left_trks) + # TODO: is better without this + emb_cost_left = left_dets_embs @ left_trks_embs.T + if self.embedding_off: + emb_cost_left = np.zeros_like(emb_cost_left) + iou_left = np.array(iou_left) + if iou_left.max() > self.iou_threshold: + """ + NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may + get a higher performance especially on MOT17/MOT20 datasets. But we keep it + uniform here for simplicity + """ + rematched_indices = linear_assignment(-iou_left) + to_remove_det_indices = [] + to_remove_trk_indices = [] + for m in rematched_indices: + det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]] + if iou_left[m[0], m[1]] < self.iou_threshold: + continue + self.trackers[trk_ind].update(dets[det_ind, :]) + self.trackers[trk_ind].update_emb( + dets_embs[det_ind], alpha=dets_alpha[det_ind] + ) + to_remove_det_indices.append(det_ind) + to_remove_trk_indices.append(trk_ind) + unmatched_dets = np.setdiff1d( + unmatched_dets, np.array(to_remove_det_indices) + ) + unmatched_trks = np.setdiff1d( + unmatched_trks, np.array(to_remove_trk_indices) + ) + + for m in unmatched_trks: + self.trackers[m].update(None) + + # create and initialise new trackers for unmatched detections + for i in unmatched_dets: + trk = KalmanBoxTracker( + dets[i], + delta_t=self.delta_t, + emb=dets_embs[i], + alpha=dets_alpha[i], + new_kf=not self.new_kf_off, + ) + self.trackers.append(trk) + i = len(self.trackers) + for trk in reversed(self.trackers): + if trk.last_observation.sum() < 0: + d = trk.get_state()[0] + else: + """ + this is optional to use the recent observation or the kalman filter prediction, + we didn't notice significant difference here + """ + d = trk.last_observation[:4] + if (trk.time_since_update < 1) and ( + trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits + ): + # +1 as MOT benchmark requires positive + ret.append( + np.concatenate( + (d, [trk.id], [trk.conf], [trk.cls], [trk.det_ind]) + ).reshape(1, -1) + ) + i -= 1 + # remove dead tracklet + if trk.time_since_update > self.max_age: + self.trackers.pop(i) + if len(ret) > 0: + return np.concatenate(ret) + return np.array([]) + + # if len(ret) > 0: + # ret = np.concatenate(ret) + # return convert_boxmot_tracker_to_rtm(ret) diff --git a/src/juxtapose/trackers/boxmot/trackers/hybridsort/association.py b/src/juxtapose/trackers/boxmot/trackers/hybridsort/association.py new file mode 100644 index 0000000..bfe9e92 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/hybridsort/association.py @@ -0,0 +1,684 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + + +def intersection_batch(bboxes1, bboxes2): + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + intersections = w * h + return intersections + + +def box_area(bbox): + area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) + return area + + +def iou_batch(bboxes1, bboxes2): + """ + From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] + """ + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + o = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) + return (o) + + +def cal_score_dif_batch(bboxes1, bboxes2): + """ + From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] + """ + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + score2 = bboxes2[..., 4] + score1 = bboxes1[..., 4] + + return (abs(score2 - score1)) + + +def cal_score_dif_batch_two_score(bboxes1, bboxes2): + """ + From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] + """ + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + score2 = bboxes2[..., 5] + score1 = bboxes1[..., 4] + + return (abs(score2 - score1)) + + +def hmiou(bboxes1, bboxes2): + """ + Height_Modulated_IoU + """ + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + yy11 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + yy12 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + + yy21 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + yy22 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + o = (yy12 - yy11) / (yy22 - yy21) + + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + o *= wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) + return (o) + + +def giou_batch(bboxes1, bboxes2): + """ + :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2) + :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2) + :return: + """ + # for details should go to https://arxiv.org/pdf/1902.09630.pdf + # ensure predict's bbox form + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + iou = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) + + xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0]) + yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2]) + yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + wc = xxc2 - xxc1 + hc = yyc2 - yyc1 + assert ((wc > 0).all() and (hc > 0).all()) + area_enclose = wc * hc + giou = iou - (area_enclose - wh) / area_enclose + giou = (giou + 1.) / 2.0 # resize from (-1,1) to (0,1) + return giou + + +def giou_batch_true(bboxes1, bboxes2): + """ + :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2) + :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2) + :return: + """ + # for details should go to https://arxiv.org/pdf/1902.09630.pdf + # ensure predict's bbox form + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + union = ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) + iou = wh / union + + xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0]) + yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2]) + yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + wc = xxc2 - xxc1 + hc = yyc2 - yyc1 + assert ((wc > 0).all() and (hc > 0).all()) + area_enclose = wc * hc + giou = iou - (area_enclose - union) / area_enclose + giou = (giou + 1.) / 2.0 # resize from (-1,1) to (0,1) + return giou + + +def diou_batch(bboxes1, bboxes2): + """ + :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2) + :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2) + :return: + """ + # for details should go to https://arxiv.org/pdf/1902.09630.pdf + # ensure predict's bbox form + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + # calculate the intersection box + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + iou = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) + + centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0 + centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0 + centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0 + centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0 + + inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2 + + xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0]) + yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2]) + yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + + outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2 + diou = iou - inner_diag / outer_diag + + return (diou + 1) / 2.0 # resize from (-1,1) to (0,1) + + +def ciou_batch(bboxes1, bboxes2): + """ + :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2) + :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2) + :return: + """ + # for details should go to https://arxiv.org/pdf/1902.09630.pdf + # ensure predict's bbox form + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + # calculate the intersection box + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + iou = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) + + centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0 + centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0 + centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0 + centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0 + + inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2 + + xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0]) + yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2]) + yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + + outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2 + + w1 = bboxes1[..., 2] - bboxes1[..., 0] + h1 = bboxes1[..., 3] - bboxes1[..., 1] + w2 = bboxes2[..., 2] - bboxes2[..., 0] + h2 = bboxes2[..., 3] - bboxes2[..., 1] + + # prevent dividing over zero. add one pixel shift + h2 = h2 + 1. + h1 = h1 + 1. + arctan = np.arctan(w2 / h2) - np.arctan(w1 / h1) + v = (4 / (np.pi ** 2)) * (arctan ** 2) + S = 1 - iou + alpha = v / (S + v) + ciou = iou - inner_diag / outer_diag - alpha * v + + return (ciou + 1) / 2.0 # resize from (-1,1) to (0,1) + + +def ct_dist(bboxes1, bboxes2): + """ + Measure the center distance between two sets of bounding boxes, + this is a coarse implementation, we don't recommend using it only + for association, which can be unstable and sensitive to frame rate + and object speed. + """ + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0 + centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0 + centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0 + centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0 + + ct_dist2 = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2 + + ct_dist = np.sqrt(ct_dist2) + + # The linear rescaling is a naive version and needs more study + ct_dist = ct_dist / ct_dist.max() + return ct_dist.max() - ct_dist # resize to (0,1) + + +def speed_direction_batch(dets, tracks): + """ + batch formulation of function 'speed_direction', compute normalized speed from batch bboxes + @param dets: + @param tracks: + @return: normalized speed in batch + """ + tracks = tracks[..., np.newaxis] + CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0 + CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0 + dx = CX1 - CX2 + dy = CY1 - CY2 + norm = np.sqrt(dx ** 2 + dy ** 2) + 1e-6 + dx = dx / norm + dy = dy / norm + return dy, dx # size: num_track x num_det + + +def linear_assignment(cost_matrix, thresh=0.): + try: # [hgx0411] goes here! + import lap + if thresh != 0: + _, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + else: + _, x, y = lap.lapjv(cost_matrix, extend_cost=True) + return np.array([[y[i], i] for i in x if i >= 0]) + except ImportError: + from scipy.optimize import linear_sum_assignment + x, y = linear_sum_assignment(cost_matrix) + return np.array(list(zip(x, y))) + + +def cost_vel(Y, X, trackers, velocities, detections, previous_obs, vdc_weight): + # Y, X = speed_direction_batch(detections, previous_obs) + inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1] + inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1) + inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1) + diff_angle_cos = inertia_X * X + inertia_Y * Y + diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1) + diff_angle = np.arccos(diff_angle_cos) + diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi + + valid_mask = np.ones(previous_obs.shape[0]) + valid_mask[np.where(previous_obs[:, 4] < 0)] = 0 + + # iou_matrix = iou_batch(detections, trackers) + scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1) + # iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this + valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1) + + angle_diff_cost = (valid_mask * diff_angle) * vdc_weight + angle_diff_cost = angle_diff_cost.T + angle_diff_cost = angle_diff_cost * scores + return angle_diff_cost + + +def speed_direction_batch_lt(dets, tracks): + tracks = tracks[..., np.newaxis] + CX1, CY1 = dets[:, 0], dets[:, 1] + CX2, CY2 = tracks[:, 0], tracks[:, 1] + dx = CX1 - CX2 + dy = CY1 - CY2 + norm = np.sqrt(dx ** 2 + dy ** 2) + 1e-6 + dx = dx / norm + dy = dy / norm + return dy, dx # size: num_track x num_det + + +def speed_direction_batch_rt(dets, tracks): + tracks = tracks[..., np.newaxis] + CX1, CY1 = dets[:, 0], dets[:, 3] + CX2, CY2 = tracks[:, 0], tracks[:, 3] + dx = CX1 - CX2 + dy = CY1 - CY2 + norm = np.sqrt(dx ** 2 + dy ** 2) + 1e-6 + dx = dx / norm + dy = dy / norm + return dy, dx # size: num_track x num_det + + +def speed_direction_batch_lb(dets, tracks): + tracks = tracks[..., np.newaxis] + CX1, CY1 = dets[:, 2], dets[:, 1] + CX2, CY2 = tracks[:, 2], tracks[:, 1] + dx = CX1 - CX2 + dy = CY1 - CY2 + norm = np.sqrt(dx ** 2 + dy ** 2) + 1e-6 + dx = dx / norm + dy = dy / norm + return dy, dx # size: num_track x num_det + + +def speed_direction_batch_rb(dets, tracks): + tracks = tracks[..., np.newaxis] + CX1, CY1 = dets[:, 2], dets[:, 3] + CX2, CY2 = tracks[:, 2], tracks[:, 3] + dx = CX1 - CX2 + dy = CY1 - CY2 + norm = np.sqrt(dx ** 2 + dy ** 2) + 1e-6 + dx = dx / norm + dy = dy / norm + return dy, dx # size: num_track x num_det + + +def associate_4_points( + detections, trackers, iou_threshold, lt, rt, lb, rb, previous_obs, vdc_weight, iou_type=None, args=None +): + if (len(trackers) == 0): + return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int) + + Y1, X1 = speed_direction_batch_lt(detections, previous_obs) + Y2, X2 = speed_direction_batch_rt(detections, previous_obs) + Y3, X3 = speed_direction_batch_lb(detections, previous_obs) + Y4, X4 = speed_direction_batch_rb(detections, previous_obs) + YC, XC = speed_direction_batch(detections, previous_obs) + cost_lt = cost_vel(Y1, X1, trackers, lt, detections, previous_obs, vdc_weight) + cost_rt = cost_vel(Y2, X2, trackers, rt, detections, previous_obs, vdc_weight) + cost_lb = cost_vel(Y3, X3, trackers, lb, detections, previous_obs, vdc_weight) + cost_rb = cost_vel(Y4, X4, trackers, rb, detections, previous_obs, vdc_weight) + + iou_matrix = iou_type(detections, trackers) + angle_diff_cost = cost_lt + cost_rt + cost_lb + cost_rb + + if min(iou_matrix.shape) > 0: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(-(iou_matrix + angle_diff_cost)) + else: + matched_indices = np.empty(shape=(0, 2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if (d not in matched_indices[:, 0]): + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if (t not in matched_indices[:, 1]): + unmatched_trackers.append(t) + + # filter out matched with low IOU + matches = [] + for m in matched_indices: + if (iou_matrix[m[0], m[1]] < iou_threshold): + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + if (len(matches) == 0): + matches = np.empty((0, 2), dtype=int) + else: + matches = np.concatenate(matches, axis=0) + + return matches, np.array(unmatched_detections), np.array(unmatched_trackers) + + +def associate_4_points_with_score( + detections, trackers, iou_threshold, lt, rt, lb, rb, previous_obs, vdc_weight, iou_type=None, args=None +): + if (len(trackers) == 0): + return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int) + + Y1, X1 = speed_direction_batch_lt(detections, previous_obs) + Y2, X2 = speed_direction_batch_rt(detections, previous_obs) + Y3, X3 = speed_direction_batch_lb(detections, previous_obs) + Y4, X4 = speed_direction_batch_rb(detections, previous_obs) + cost_lt = cost_vel(Y1, X1, trackers, lt, detections, previous_obs, vdc_weight) + cost_rt = cost_vel(Y2, X2, trackers, rt, detections, previous_obs, vdc_weight) + cost_lb = cost_vel(Y3, X3, trackers, lb, detections, previous_obs, vdc_weight) + cost_rb = cost_vel(Y4, X4, trackers, rb, detections, previous_obs, vdc_weight) + iou_matrix = iou_type(detections, trackers) + score_dif = cal_score_dif_batch(detections, trackers) + + angle_diff_cost = cost_lt + cost_rt + cost_lb + cost_rb + + # TCM + angle_diff_cost -= score_dif * args.TCM_first_step_weight + + if min(iou_matrix.shape) > 0: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(-(iou_matrix + angle_diff_cost)) + else: + matched_indices = np.empty(shape=(0, 2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if (d not in matched_indices[:, 0]): + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if (t not in matched_indices[:, 1]): + unmatched_trackers.append(t) + + # filter out matched with low IOU + matches = [] + for m in matched_indices: + if (iou_matrix[m[0], m[1]] < iou_threshold): + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + if (len(matches) == 0): + matches = np.empty((0, 2), dtype=int) + else: + matches = np.concatenate(matches, axis=0) + + return matches, np.array(unmatched_detections), np.array(unmatched_trackers) + + +def associate_4_points_with_score_with_reid( + detections, trackers, iou_threshold, lt, rt, lb, rb, previous_obs, vdc_weight, + TCM_first_step_weight, + iou_type=None, emb_cost=None, weights=(1.0, 0), thresh=0.8, + long_emb_dists=None, with_longterm_reid=False, + longterm_reid_weight=0.0, with_longterm_reid_correction=False, + longterm_reid_correction_thresh=0.0, dataset="dancetrack" +): + + if (len(trackers) == 0): + return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int) + + Y1, X1 = speed_direction_batch_lt(detections, previous_obs) + Y2, X2 = speed_direction_batch_rt(detections, previous_obs) + Y3, X3 = speed_direction_batch_lb(detections, previous_obs) + Y4, X4 = speed_direction_batch_rb(detections, previous_obs) + cost_lt = cost_vel(Y1, X1, trackers, lt, detections, previous_obs, vdc_weight) + cost_rt = cost_vel(Y2, X2, trackers, rt, detections, previous_obs, vdc_weight) + cost_lb = cost_vel(Y3, X3, trackers, lb, detections, previous_obs, vdc_weight) + cost_rb = cost_vel(Y4, X4, trackers, rb, detections, previous_obs, vdc_weight) + iou_matrix = iou_type(detections, trackers) + score_dif = cal_score_dif_batch(detections, trackers) + + angle_diff_cost = cost_lt + cost_rt + cost_lb + cost_rb + + # TCM + angle_diff_cost -= score_dif * TCM_first_step_weight + + if min(iou_matrix.shape) > 0: + if emb_cost is None: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(-(iou_matrix + angle_diff_cost)) + else: + if not with_longterm_reid: + matched_indices = linear_assignment( + weights[0] * (-(iou_matrix + angle_diff_cost)) + + weights[1] * emb_cost) # , thresh=thresh + else: # long-term reid feats + matched_indices = linear_assignment( + weights[0] * (-(iou_matrix + angle_diff_cost)) + + weights[1] * emb_cost + longterm_reid_weight * long_emb_dists + ) # , thresh=thresh + + if matched_indices.size == 0: + matched_indices = np.empty(shape=(0, 2)) + else: + matched_indices = np.empty(shape=(0, 2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if (d not in matched_indices[:, 0]): + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if (t not in matched_indices[:, 1]): + unmatched_trackers.append(t) + + # filter out matched with low IOU (and long-term ReID feats) + matches = [] + # iou_matrix_thre = iou_matrix if dataset == "dancetrack" else iou_matrix - score_dif + iou_matrix_thre = iou_matrix - score_dif + if with_longterm_reid_correction: + for m in matched_indices: + if (emb_cost[m[0], m[1]] > longterm_reid_correction_thresh) and\ + (iou_matrix_thre[m[0], m[1]] < iou_threshold): + print("correction:", emb_cost[m[0], m[1]]) + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + else: + for m in matched_indices: + if (iou_matrix_thre[m[0], m[1]] < iou_threshold): + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + + if (len(matches) == 0): + matches = np.empty((0, 2), dtype=int) + else: + matches = np.concatenate(matches, axis=0) + + return matches, np.array(unmatched_detections), np.array(unmatched_trackers) + + +def associate_kitti( + detections, trackers, det_cates, iou_threshold, velocities, previous_obs, vdc_weight +): + if (len(trackers) == 0): + return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int) + + """ + Cost from the velocity direction consistency + """ + Y, X = speed_direction_batch(detections, previous_obs) + inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1] + inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1) + inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1) + diff_angle_cos = inertia_X * X + inertia_Y * Y + diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1) + diff_angle = np.arccos(diff_angle_cos) + diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi + + valid_mask = np.ones(previous_obs.shape[0]) + valid_mask[np.where(previous_obs[:, 4] < 0)] = 0 + valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1) + + scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1) + angle_diff_cost = (valid_mask * diff_angle) * vdc_weight + angle_diff_cost = angle_diff_cost.T + angle_diff_cost = angle_diff_cost * scores + + """ + Cost from IoU + """ + iou_matrix = iou_batch(detections, trackers) + + """ + With multiple categories, generate the cost for catgory mismatch + """ + num_dets = detections.shape[0] + num_trk = trackers.shape[0] + cate_matrix = np.zeros((num_dets, num_trk)) + for i in range(num_dets): + for j in range(num_trk): + if det_cates[i] != trackers[j, 4]: + cate_matrix[i][j] = -1e6 + + cost_matrix = - iou_matrix - angle_diff_cost - cate_matrix + + if min(iou_matrix.shape) > 0: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(cost_matrix) + else: + matched_indices = np.empty(shape=(0, 2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if (d not in matched_indices[:, 0]): + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if (t not in matched_indices[:, 1]): + unmatched_trackers.append(t) + + # filter out matched with low IOU + matches = [] + for m in matched_indices: + if (iou_matrix[m[0], m[1]] < iou_threshold): + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + if (len(matches) == 0): + matches = np.empty((0, 2), dtype=int) + else: + matches = np.concatenate(matches, axis=0) + + return matches, np.array(unmatched_detections), np.array(unmatched_trackers) + + +# compute embedding distance and gating, borrowed and modified from FairMOT +from scipy.spatial.distance import cdist + + +def embedding_distance(tracks_feat, detections_feat, metric='cosine'): + """ + :param tracks: list[KalmanBoxTracker] + :param detections: list[KalmanBoxTracker] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks_feat), len(detections_feat)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + # det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) # [detection_num, emd_dim] + # #for i, track in enumerate(tracks): + # #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + # track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) # [track_num, emd_dim] + # Nomalized features, metric: cosine, [track_num, detection_num] + cost_matrix = np.maximum(0.0, cdist(tracks_feat, detections_feat, metric)) + return cost_matrix diff --git a/src/juxtapose/trackers/boxmot/trackers/hybridsort/hybridsort.py b/src/juxtapose/trackers/boxmot/trackers/hybridsort/hybridsort.py new file mode 100644 index 0000000..73277a0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/hybridsort/hybridsort.py @@ -0,0 +1,737 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +""" + This script is adopted from the SORT script by Alex Bewley alex@bewley.ai +""" + +from collections import deque # [hgx0418] deque for reid feature + +import numpy as np + +from juxtapose.trackers.boxmot.appearance.reid_multibackend import ( + ReIDDetectMultiBackend, +) +from juxtapose.trackers.boxmot.motion.cmc import get_cmc_method +from juxtapose.trackers.boxmot.trackers.hybridsort.association import ( + associate_4_points_with_score, + associate_4_points_with_score_with_reid, + cal_score_dif_batch_two_score, + embedding_distance, + linear_assignment, +) +from juxtapose.trackers.boxmot.utils import PerClassDecorator +from juxtapose.trackers.boxmot.utils.iou import get_asso_func + +np.random.seed(0) + + +def k_previous_obs(observations, cur_age, k): + if len(observations) == 0: + return [-1, -1, -1, -1, -1] + for i in range(k): + dt = k - i + if cur_age - dt in observations: + return observations[cur_age - dt] + max_age = max(observations.keys()) + return observations[max_age] + + +def convert_bbox_to_z(bbox): + """ + Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form + [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is + the aspect ratio + """ + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + x = bbox[0] + w / 2.0 + y = bbox[1] + h / 2.0 + s = w * h # scale is just area + r = w / float(h + 1e-6) + score = bbox[4] + if score: + return np.array([x, y, s, score, r]).reshape((5, 1)) + else: + return np.array([x, y, s, r]).reshape((4, 1)) + + +def convert_x_to_bbox(x, score=None): + """ + Takes a bounding box in the centre form [x,y,s,r] and returns it in the form + [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right + """ + w = np.sqrt(x[2] * x[4]) + h = x[2] / w + score = x[3] + if score is None: + return np.array( + [x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0] + ).reshape((1, 4)) + else: + return np.array( + [x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score] + ).reshape((1, 5)) + + +def speed_direction(bbox1, bbox2): + cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0 + cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0 + speed = np.array([cy2 - cy1, cx2 - cx1]) + norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 + return speed / norm + + +def speed_direction_lt(bbox1, bbox2): + cx1, cy1 = bbox1[0], bbox1[1] + cx2, cy2 = bbox2[0], bbox2[1] + speed = np.array([cy2 - cy1, cx2 - cx1]) + norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 + return speed / norm + + +def speed_direction_rt(bbox1, bbox2): + cx1, cy1 = bbox1[0], bbox1[3] + cx2, cy2 = bbox2[0], bbox2[3] + speed = np.array([cy2 - cy1, cx2 - cx1]) + norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 + return speed / norm + + +def speed_direction_lb(bbox1, bbox2): + cx1, cy1 = bbox1[2], bbox1[1] + cx2, cy2 = bbox2[2], bbox2[1] + speed = np.array([cy2 - cy1, cx2 - cx1]) + norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 + return speed / norm + + +def speed_direction_rb(bbox1, bbox2): + cx1, cy1 = bbox1[2], bbox1[3] + cx2, cy2 = bbox2[2], bbox2[3] + speed = np.array([cy2 - cy1, cx2 - cx1]) + norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 + return speed / norm + + +class KalmanBoxTracker(object): + """ + This class represents the internal state of individual tracked objects observed as bbox. + """ + + count = 0 + + def __init__( + self, + bbox, + cls, + det_ind, + temp_feat, + delta_t=3, + orig=False, + buffer_size=30, + longterm_bank_length=30, + alpha=0.8, + ): # 'temp_feat' and 'buffer_size' for reid feature + """ + Initialises a tracker using initial bounding box. + + """ + # define constant velocity model + # if not orig and not args.kalman_GPR: + if not orig: + from juxtapose.trackers.boxmot.motion.kalman_filters.adapters.hybridsort_kf_adapter import ( + KalmanFilterNew_score_new as KalmanFilter_score_new, + ) + + self.kf = KalmanFilter_score_new(dim_x=9, dim_z=5) + else: + from filterpy.kalman import KalmanFilter + + self.kf = KalmanFilter(dim_x=7, dim_z=4) + # u, v, s, c, r, ~u, ~v, ~s, ~c + self.kf.F = np.array( + [ + [1, 0, 0, 0, 0, 1, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 1], + ] + ) + self.kf.H = np.array( + [ + [1, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0, 0], + ] + ) + + self.kf.R[2:, 2:] *= 10.0 + self.kf.P[ + 5:, 5: + ] *= 1000.0 # give high uncertainty to the unobservable initial velocities + self.kf.P *= 10.0 + self.kf.Q[-1, -1] *= 0.01 + self.kf.Q[-2, -2] *= 0.01 + self.kf.Q[5:, 5:] *= 0.01 + + self.kf.x[:5] = convert_bbox_to_z(bbox) + + self.time_since_update = 0 + self.id = KalmanBoxTracker.count + KalmanBoxTracker.count += 1 + self.history = [] + self.hits = 0 + self.hit_streak = 0 + self.age = 0 + self.conf = bbox[4] + self.cls = cls + self.det_ind = det_ind + self.adapfs = False + """ + NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of + function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a + fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), + let's bear it for now. + """ + self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder + self.last_observation_save = np.array([-1, -1, -1, -1, -1]) + self.observations = dict() + self.history_observations = [] + self.velocity_lt = None + self.velocity_rt = None + self.velocity_lb = None + self.velocity_rb = None + self.delta_t = delta_t + self.confidence_pre = None + self.confidence = bbox[4] + + # add the following values and functions + self.smooth_feat = None + buffer_size = longterm_bank_length + self.features = deque([], maxlen=buffer_size) + self.update_features(temp_feat) + + # momentum of embedding update + self.alpha = alpha + + # ReID. for update embeddings during tracking + def update_features(self, feat, score=-1): + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + if self.adapfs: + assert score > 0 + pre_w = self.alpha * (self.confidence / (self.confidence + score)) + cur_w = (1 - self.alpha) * (score / (self.confidence + score)) + sum_w = pre_w + cur_w + pre_w = pre_w / sum_w + cur_w = cur_w / sum_w + self.smooth_feat = pre_w * self.smooth_feat + cur_w * feat + else: + self.smooth_feat = ( + self.alpha * self.smooth_feat + (1 - self.alpha) * feat + ) + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def camera_update(self, warp_matrix): + """ + update 'self.mean' of current tracklet with ecc results. + Parameters + ---------- + warp_matrix: warp matrix computed by ECC. + """ + x1, y1, x2, y2, s = convert_x_to_bbox(self.kf.x)[0] + x1_, y1_ = warp_matrix @ np.array([x1, y1, 1]).T + x2_, y2_ = warp_matrix @ np.array([x2, y2, 1]).T + # w, h = x2_ - x1_, y2_ - y1_ + # cx, cy = x1_ + w / 2, y1_ + h / 2 + self.kf.x[:5] = convert_bbox_to_z([x1_, y1_, x2_, y2_, s]) + + def update(self, bbox, cls, det_ind, id_feature, update_feature=True): + """ + Updates the state vector with observed bbox. + """ + velocity_lt = None + velocity_rt = None + velocity_lb = None + velocity_rb = None + if bbox is not None: + self.conf = bbox[-1] + self.cls = cls + self.det_ind = det_ind + if self.last_observation.sum() >= 0: # no previous observation + previous_box = None + for i in range(self.delta_t): + # dt = self.delta_t - i + if self.age - i - 1 in self.observations: + previous_box = self.observations[self.age - i - 1] + if velocity_lt is not None: + velocity_lt += speed_direction_lt(previous_box, bbox) + velocity_rt += speed_direction_rt(previous_box, bbox) + velocity_lb += speed_direction_lb(previous_box, bbox) + velocity_rb += speed_direction_rb(previous_box, bbox) + else: + velocity_lt = speed_direction_lt(previous_box, bbox) + velocity_rt = speed_direction_rt(previous_box, bbox) + velocity_lb = speed_direction_lb(previous_box, bbox) + velocity_rb = speed_direction_rb(previous_box, bbox) + # break + if previous_box is None: + previous_box = self.last_observation + self.velocity_lt = speed_direction_lt(previous_box, bbox) + self.velocity_rt = speed_direction_rt(previous_box, bbox) + self.velocity_lb = speed_direction_lb(previous_box, bbox) + self.velocity_rb = speed_direction_rb(previous_box, bbox) + else: + self.velocity_lt = velocity_lt + self.velocity_rt = velocity_rt + self.velocity_lb = velocity_lb + self.velocity_rb = velocity_rb + """ + Insert new observations. This is a ugly way to maintain both self.observations + and self.history_observations. Bear it for the moment. + """ + self.last_observation = bbox + self.last_observation_save = bbox + self.observations[self.age] = bbox + self.history_observations.append(bbox) + + self.time_since_update = 0 + self.history = [] + self.hits += 1 + self.hit_streak += 1 + self.kf.update(convert_bbox_to_z(bbox)) + # add interface for update feature or not + if update_feature: + if self.adapfs: + self.update_features(id_feature, score=bbox[4]) + else: + self.update_features(id_feature) + self.confidence_pre = self.confidence + self.confidence = bbox[4] + else: + self.kf.update(bbox) + self.confidence_pre = None + + def predict(self, track_thresh=0.6): + """ + Advances the state vector and returns the predicted bounding box estimate. + """ + if (self.kf.x[7] + self.kf.x[2]) <= 0: + self.kf.x[7] *= 0.0 + + self.kf.predict() + self.age += 1 + if self.time_since_update > 0: + self.hit_streak = 0 + self.time_since_update += 1 + self.history.append(convert_x_to_bbox(self.kf.x)) + if not self.confidence_pre: + return ( + self.history[-1], + np.clip(self.kf.x[3], track_thresh, 1.0), + np.clip(self.confidence, 0.1, track_thresh), + ) + else: + return ( + self.history[-1], + np.clip(self.kf.x[3], track_thresh, 1.0), + np.clip( + self.confidence - (self.confidence_pre - self.confidence), + 0.1, + track_thresh, + ), + ) + + def get_state(self): + """ + Returns the current bounding box estimate. + """ + return convert_x_to_bbox(self.kf.x) + + +class HybridSORT(object): + def __init__( + self, + reid_weights, + device, + half, + det_thresh, + max_age=30, + min_hits=3, + iou_threshold=0.3, + delta_t=3, + asso_func="iou", + inertia=0.2, + use_byte=False, + ): + """ + Sets key parameters for SORT + """ + self.max_age = max_age + self.min_hits = min_hits + self.iou_threshold = iou_threshold + self.trackers = [] + self.per_class = True + self.frame_count = 0 + self.det_thresh = det_thresh + self.delta_t = delta_t + self.asso_func = get_asso_func(asso_func) + self.inertia = inertia + self.use_byte = use_byte + self.low_thresh = 0.1 + self.EG_weight_high_score = 1.3 + self.EG_weight_low_score = 1.2 + self.TCM_first_step = True + self.with_longterm_reid = True + self.with_longterm_reid_correction = True + self.longterm_reid_weight = 0 + self.TCM_first_step_weight = 0 + self.high_score_matching_thresh = 0.8 + self.longterm_reid_correction_thresh = 0.4 + self.longterm_reid_correction_thresh_low = 0.4 + self.TCM_byte_step = True + self.TCM_byte_step_weight = 1.0 + self.dataset = "dancetrack" + self.ECC = False + KalmanBoxTracker.count = 0 + + self.model = ReIDDetectMultiBackend( + weights=reid_weights, device=device, fp16=half + ) + self.cmc = get_cmc_method("ecc")() + + def camera_update(self, trackers, warp_matrix): + for tracker in trackers: + tracker.camera_update(warp_matrix) + + @PerClassDecorator + def update(self, dets, im): + """ + Params: + dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] + Requires: this method must be called once for each frame even with empty detections + (use np.empty((0, 5)) for frames without detections). + Returns the a similar array, where the last column is the object ID. + NOTE: The number of objects returned may differ from the number of detections provided. + """ + if dets is None: + return np.empty((0, 7)) + + if self.ECC: + warp_matrix = self.cmc.apply(im, dets) + if warp_matrix is not None: + self.camera_update(self.trackers, warp_matrix) + + self.frame_count += 1 + scores = dets[:, 4] + bboxes = dets[:, :4] + + dets_embs = self.model.get_features(bboxes, im) + dets0 = np.concatenate((dets, np.expand_dims(scores, axis=-1)), axis=1) + dets = np.concatenate((bboxes, np.expand_dims(scores, axis=-1)), axis=1) + inds_low = scores > self.low_thresh + inds_high = scores < self.det_thresh + inds_second = np.logical_and( + inds_low, inds_high + ) # self.det_thresh > score > 0.1, for second matching + dets_second = dets[inds_second] # detections for second matching + remain_inds = scores > self.det_thresh + dets = dets[remain_inds] + id_feature_keep = dets_embs[remain_inds] # ID feature of 1st stage matching + id_feature_second = dets_embs[inds_second] # ID feature of 2nd stage matching + + trks = np.zeros((len(self.trackers), 8)) + to_del = [] + ret = [] + for t, trk in enumerate(trks): + pos, kalman_score, simple_score = self.trackers[t].predict() + try: + trk[:6] = [ + pos[0][0], + pos[0][1], + pos[0][2], + pos[0][3], + kalman_score, + simple_score[0], + ] + except Exception: + trk[:6] = [ + pos[0][0], + pos[0][1], + pos[0][2], + pos[0][3], + kalman_score, + simple_score, + ] + if np.any(np.isnan(pos)): + to_del.append(t) + trks = np.ma.compress_rows(np.ma.masked_invalid(trks)) + for t in reversed(to_del): + self.trackers.pop(t) + + velocities_lt = np.array( + [ + trk.velocity_lt if trk.velocity_lt is not None else np.array((0, 0)) + for trk in self.trackers + ] + ) + velocities_rt = np.array( + [ + trk.velocity_rt if trk.velocity_rt is not None else np.array((0, 0)) + for trk in self.trackers + ] + ) + velocities_lb = np.array( + [ + trk.velocity_lb if trk.velocity_lb is not None else np.array((0, 0)) + for trk in self.trackers + ] + ) + velocities_rb = np.array( + [ + trk.velocity_rb if trk.velocity_rb is not None else np.array((0, 0)) + for trk in self.trackers + ] + ) + last_boxes = np.array([trk.last_observation for trk in self.trackers]) + k_observations = np.array( + [ + k_previous_obs(trk.observations, trk.age, self.delta_t) + for trk in self.trackers + ] + ) + + """ + First round of association + """ + if self.EG_weight_high_score > 0 and self.TCM_first_step: + track_features = np.asarray( + [track.smooth_feat for track in self.trackers], dtype=np.float + ) + emb_dists = embedding_distance(track_features, id_feature_keep).T + if self.with_longterm_reid or self.with_longterm_reid_correction: + long_track_features = np.asarray( + [ + np.vstack(list(track.features)).mean(0) + for track in self.trackers + ], + dtype=np.float, + ) + assert track_features.shape == long_track_features.shape + long_emb_dists = embedding_distance( + long_track_features, id_feature_keep + ).T + assert emb_dists.shape == long_emb_dists.shape + ( + matched, + unmatched_dets, + unmatched_trks, + ) = associate_4_points_with_score_with_reid( + dets, + trks, + self.iou_threshold, + velocities_lt, + velocities_rt, + velocities_lb, + velocities_rb, + k_observations, + self.inertia, + self.TCM_first_step_weight, + self.asso_func, + emb_cost=emb_dists, + weights=(1.0, self.EG_weight_high_score), + thresh=self.high_score_matching_thresh, + long_emb_dists=long_emb_dists, + with_longterm_reid=self.with_longterm_reid, + longterm_reid_weight=self.longterm_reid_weight, + with_longterm_reid_correction=self.with_longterm_reid_correction, + longterm_reid_correction_thresh=self.longterm_reid_correction_thresh, + dataset=self.dataset, + ) + else: + ( + matched, + unmatched_dets, + unmatched_trks, + ) = associate_4_points_with_score_with_reid( + dets, + trks, + self.iou_threshold, + velocities_lt, + velocities_rt, + velocities_lb, + velocities_rb, + k_observations, + self.inertia, + self.TCM_first_step_weight, + self.asso_func, + emb_cost=emb_dists, + weights=(1.0, self.EG_weight_high_score), + thresh=self.high_score_matching_thresh, + ) + elif self.TCM_first_step: + matched, unmatched_dets, unmatched_trks = associate_4_points_with_score( + dets, + trks, + self.iou_threshold, + velocities_lt, + velocities_rt, + velocities_lb, + velocities_rb, + k_observations, + self.inertia, + self.TCM_first_step_weight, + self.asso_func, + ) + + # update with id feature + for m in matched: + self.trackers[m[1]].update( + dets[m[0], :], dets0[m[0], 5], dets0[m[0], 6], id_feature_keep[m[0], :] + ) + + """ + Second round of associaton by OCR + """ + # BYTE association + if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[0] > 0: + u_trks = trks[unmatched_trks] + u_tracklets = [self.trackers[index] for index in unmatched_trks] + iou_left = self.asso_func(dets_second, u_trks) + iou_left = np.array(iou_left) + if iou_left.max() > self.iou_threshold: + """ + NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may + get a higher performance especially on MOT17/MOT20 datasets. But we keep it + uniform here for simplicity + """ + if self.TCM_byte_step: + iou_left -= np.array( + cal_score_dif_batch_two_score(dets_second, u_trks) + * self.TCM_byte_step_weight + ) + iou_left_thre = iou_left + if self.EG_weight_low_score > 0: + u_track_features = np.asarray( + [track.smooth_feat for track in u_tracklets], dtype=np.float + ) + emb_dists_low_score = embedding_distance( + u_track_features, id_feature_second + ).T + matched_indices = linear_assignment( + -iou_left + self.EG_weight_low_score * emb_dists_low_score, + ) + else: + matched_indices = linear_assignment(-iou_left) + to_remove_trk_indices = [] + for m in matched_indices: + det_ind, trk_ind = m[0], unmatched_trks[m[1]] + if ( + self.with_longterm_reid_correction + and self.EG_weight_low_score > 0 + ): + if (iou_left_thre[m[0], m[1]] < self.iou_threshold) or ( + emb_dists_low_score[m[0], m[1]] + > self.longterm_reid_correction_thresh_low + ): + print("correction 2nd:", emb_dists_low_score[m[0], m[1]]) + continue + else: + if iou_left_thre[m[0], m[1]] < self.iou_threshold: + continue + self.trackers[trk_ind].update( + dets_second[det_ind, :], + id_feature_second[det_ind, :], + update_feature=False, + ) # [hgx0523] do not update with id feature + to_remove_trk_indices.append(trk_ind) + unmatched_trks = np.setdiff1d( + unmatched_trks, np.array(to_remove_trk_indices) + ) + + if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0: + left_dets = dets[unmatched_dets] + # left_id_feature = id_feature_keep[unmatched_dets] # update id feature, if needed + left_trks = last_boxes[unmatched_trks] + iou_left = self.asso_func(left_dets, left_trks) + iou_left = np.array(iou_left) + + if iou_left.max() > self.iou_threshold: + """ + NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may + get a higher performance especially on MOT17/MOT20 datasets. But we keep it + uniform here for simplicity + """ + rematched_indices = linear_assignment(-iou_left) + to_remove_det_indices = [] + to_remove_trk_indices = [] + for m in rematched_indices: + det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]] + if iou_left[m[0], m[1]] < self.iou_threshold: + continue + self.trackers[trk_ind].update( + dets[det_ind, :], + dets0[det_ind, 5], + dets0[det_ind, 6], + id_feature_keep[det_ind, :], + update_feature=False, + ) + to_remove_det_indices.append(det_ind) + to_remove_trk_indices.append(trk_ind) + unmatched_dets = np.setdiff1d( + unmatched_dets, np.array(to_remove_det_indices) + ) + unmatched_trks = np.setdiff1d( + unmatched_trks, np.array(to_remove_trk_indices) + ) + + for m in unmatched_trks: + self.trackers[m].update(None, None, None, None) + + # create and initialise new trackers for unmatched detections + for i in unmatched_dets: + trk = KalmanBoxTracker( + dets[i, :], + dets0[i, 5], + dets0[i, 6], + id_feature_keep[i, :], + delta_t=self.delta_t, + ) + self.trackers.append(trk) + i = len(self.trackers) + for trk in reversed(self.trackers): + if trk.last_observation.sum() < 0: + d = trk.get_state()[0][:4] + else: + """ + this is optional to use the recent observation or the kalman filter prediction, + we didn't notice significant difference here + """ + d = trk.last_observation[:4] + if (trk.time_since_update < 1) and ( + trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits + ): + # +1 as MOT benchmark requires positive + ret.append( + np.concatenate( + (d, [trk.id + 1], [trk.conf], [trk.cls], [trk.det_ind]) + ).reshape(1, -1) + ) + i -= 1 + # remove dead tracklet + if trk.time_since_update > self.max_age: + self.trackers.pop(i) + if len(ret) > 0: + return np.concatenate(ret) + return np.empty((0, 7)) diff --git a/src/juxtapose/trackers/boxmot/trackers/ocsort/__init__.py b/src/juxtapose/trackers/boxmot/trackers/ocsort/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/ocsort/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/trackers/ocsort/ocsort.py b/src/juxtapose/trackers/boxmot/trackers/ocsort/ocsort.py new file mode 100644 index 0000000..d1a7cbe --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/ocsort/ocsort.py @@ -0,0 +1,385 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +""" + This script is adopted from the SORT script by Alex Bewley alex@bewley.ai +""" +import numpy as np + +from juxtapose.trackers.boxmot.motion.kalman_filters.adapters import ( + OCSortKalmanFilterAdapter, +) +from juxtapose.trackers.boxmot.utils.association import associate, linear_assignment +from juxtapose.trackers.boxmot.utils.iou import get_asso_func + + +def k_previous_obs(observations, cur_age, k): + if len(observations) == 0: + return [-1, -1, -1, -1, -1] + for i in range(k): + dt = k - i + if cur_age - dt in observations: + return observations[cur_age - dt] + max_age = max(observations.keys()) + return observations[max_age] + + +def convert_bbox_to_z(bbox): + """ + Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form + [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is + the aspect ratio + """ + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + x = bbox[0] + w / 2.0 + y = bbox[1] + h / 2.0 + s = w * h # scale is just area + r = w / float(h + 1e-6) + return np.array([x, y, s, r]).reshape((4, 1)) + + +def convert_x_to_bbox(x, score=None): + """ + Takes a bounding box in the centre form [x,y,s,r] and returns it in the form + [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right + """ + w = np.sqrt(x[2] * x[3]) + h = x[2] / w + if score is None: + return np.array( + [x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0] + ).reshape((1, 4)) + else: + return np.array( + [x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score] + ).reshape((1, 5)) + + +def speed_direction(bbox1, bbox2): + cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0 + cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0 + speed = np.array([cy2 - cy1, cx2 - cx1]) + norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 + return speed / norm + + +class KalmanBoxTracker(object): + """ + This class represents the internal state of individual tracked objects observed as bbox. + """ + + count = 0 + + def __init__(self, bbox, cls, det_ind, delta_t=3): + """ + Initialises a tracker using initial bounding box. + + """ + # define constant velocity model + self.det_ind = det_ind + self.kf = OCSortKalmanFilterAdapter(dim_x=7, dim_z=4) + self.kf.F = np.array( + [ + [1, 0, 0, 0, 1, 0, 0], + [0, 1, 0, 0, 0, 1, 0], + [0, 0, 1, 0, 0, 0, 1], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 1], + ] + ) + self.kf.H = np.array( + [ + [1, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + ] + ) + + self.kf.R[2:, 2:] *= 10.0 + self.kf.P[ + 4:, 4: + ] *= 1000.0 # give high uncertainty to the unobservable initial velocities + self.kf.P *= 10.0 + self.kf.Q[-1, -1] *= 0.01 + self.kf.Q[4:, 4:] *= 0.01 + + self.kf.x[:4] = convert_bbox_to_z(bbox) + self.time_since_update = 0 + self.id = KalmanBoxTracker.count + KalmanBoxTracker.count += 1 + self.history = [] + self.hits = 0 + self.hit_streak = 0 + self.age = 0 + self.conf = bbox[-1] + self.cls = cls + """ + NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of + function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a + fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), + let's bear it for now. + """ + self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder + self.observations = dict() + self.history_observations = [] + self.velocity = None + self.delta_t = delta_t + + def update(self, bbox, cls, det_ind): + """ + Updates the state vector with observed bbox. + """ + self.det_ind = det_ind + if bbox is not None: + self.conf = bbox[-1] + self.cls = cls + if self.last_observation.sum() >= 0: # no previous observation + previous_box = None + for i in range(self.delta_t): + dt = self.delta_t - i + if self.age - dt in self.observations: + previous_box = self.observations[self.age - dt] + break + if previous_box is None: + previous_box = self.last_observation + """ + Estimate the track speed direction with observations \Delta t steps away + """ + self.velocity = speed_direction(previous_box, bbox) + + """ + Insert new observations. This is a ugly way to maintain both self.observations + and self.history_observations. Bear it for the moment. + """ + self.last_observation = bbox + self.observations[self.age] = bbox + self.history_observations.append(bbox) + + self.time_since_update = 0 + self.history = [] + self.hits += 1 + self.hit_streak += 1 + self.kf.update(convert_bbox_to_z(bbox)) + else: + self.kf.update(bbox) + + def predict(self): + """ + Advances the state vector and returns the predicted bounding box estimate. + """ + if (self.kf.x[6] + self.kf.x[2]) <= 0: + self.kf.x[6] *= 0.0 + + self.kf.predict() + self.age += 1 + if self.time_since_update > 0: + self.hit_streak = 0 + self.time_since_update += 1 + self.history.append(convert_x_to_bbox(self.kf.x)) + return self.history[-1] + + def get_state(self): + """ + Returns the current bounding box estimate. + """ + return convert_x_to_bbox(self.kf.x) + + +class OCSort(object): + def __init__( + self, + per_class=True, + det_thresh=0.2, + max_age=30, + min_hits=3, + iou_threshold=0.3, + delta_t=3, + asso_func="iou", + inertia=0.2, + use_byte=False, + ): + """ + Sets key parameters for SORT + """ + self.max_age = max_age + self.min_hits = min_hits + self.iou_threshold = iou_threshold + self.trackers = [] + self.frame_count = 0 + self.det_thresh = det_thresh + self.delta_t = delta_t + self.asso_func = get_asso_func(asso_func) + self.inertia = inertia + self.use_byte = use_byte + KalmanBoxTracker.count = 0 + + def update(self, dets, _): + """ + Params: + dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] + Requires: this method must be called once for each frame even with empty detections + (use np.empty((0, 5)) for frames without detections). + Returns the a similar array, where the last column is the object ID. + NOTE: The number of objects returned may differ from the number of detections provided. + """ + + assert isinstance( + dets, np.ndarray + ), f"Unsupported 'dets' input format '{type(dets)}', valid format is np.ndarray" + assert ( + len(dets.shape) == 2 + ), "Unsupported 'dets' dimensions, valid number of dimensions is two" + assert ( + dets.shape[1] == 6 + ), "Unsupported 'dets' 2nd dimension lenght, valid lenghts is 6" + + self.frame_count += 1 + + dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)]) + confs = dets[:, 4] + + inds_low = confs > 0.1 + inds_high = confs < self.det_thresh + inds_second = np.logical_and( + inds_low, inds_high + ) # self.det_thresh > score > 0.1, for second matching + dets_second = dets[inds_second] # detections for second matching + remain_inds = confs > self.det_thresh + dets = dets[remain_inds] + + # get predicted locations from existing trackers. + trks = np.zeros((len(self.trackers), 5)) + to_del = [] + ret = [] + for t, trk in enumerate(trks): + pos = self.trackers[t].predict()[0] + trk[:] = [pos[0], pos[1], pos[2], pos[3], 0] + if np.any(np.isnan(pos)): + to_del.append(t) + trks = np.ma.compress_rows(np.ma.masked_invalid(trks)) + for t in reversed(to_del): + self.trackers.pop(t) + + velocities = np.array( + [ + trk.velocity if trk.velocity is not None else np.array((0, 0)) + for trk in self.trackers + ] + ) + last_boxes = np.array([trk.last_observation for trk in self.trackers]) + k_observations = np.array( + [ + k_previous_obs(trk.observations, trk.age, self.delta_t) + for trk in self.trackers + ] + ) + + """ + First round of association + """ + matched, unmatched_dets, unmatched_trks = associate( + dets, trks, self.iou_threshold, velocities, k_observations, self.inertia + ) + for m in matched: + self.trackers[m[1]].update(dets[m[0], :5], dets[m[0], 5], dets[m[0], 6]) + + """ + Second round of associaton by OCR + """ + # BYTE association + if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[0] > 0: + u_trks = trks[unmatched_trks] + iou_left = self.asso_func( + dets_second, u_trks + ) # iou between low score detections and unmatched tracks + iou_left = np.array(iou_left) + if iou_left.max() > self.iou_threshold: + """ + NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may + get a higher performance especially on MOT17/MOT20 datasets. But we keep it + uniform here for simplicity + """ + matched_indices = linear_assignment(-iou_left) + to_remove_trk_indices = [] + for m in matched_indices: + det_ind, trk_ind = m[0], unmatched_trks[m[1]] + if iou_left[m[0], m[1]] < self.iou_threshold: + continue + self.trackers[trk_ind].update( + dets_second[det_ind, :5], + dets_second[det_ind, 5], + dets_second[det_ind, 6], + ) + to_remove_trk_indices.append(trk_ind) + unmatched_trks = np.setdiff1d( + unmatched_trks, np.array(to_remove_trk_indices) + ) + + if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0: + left_dets = dets[unmatched_dets] + left_trks = last_boxes[unmatched_trks] + iou_left = self.asso_func(left_dets, left_trks) + iou_left = np.array(iou_left) + if iou_left.max() > self.iou_threshold: + """ + NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may + get a higher performance especially on MOT17/MOT20 datasets. But we keep it + uniform here for simplicity + """ + rematched_indices = linear_assignment(-iou_left) + to_remove_det_indices = [] + to_remove_trk_indices = [] + for m in rematched_indices: + det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]] + if iou_left[m[0], m[1]] < self.iou_threshold: + continue + self.trackers[trk_ind].update( + dets[det_ind, :5], dets[det_ind, 5], dets[det_ind, 6] + ) + to_remove_det_indices.append(det_ind) + to_remove_trk_indices.append(trk_ind) + unmatched_dets = np.setdiff1d( + unmatched_dets, np.array(to_remove_det_indices) + ) + unmatched_trks = np.setdiff1d( + unmatched_trks, np.array(to_remove_trk_indices) + ) + + for m in unmatched_trks: + self.trackers[m].update(None, None, None) + + # create and initialise new trackers for unmatched detections + for i in unmatched_dets: + trk = KalmanBoxTracker( + dets[i, :5], dets[i, 5], dets[i, 6], delta_t=self.delta_t + ) + self.trackers.append(trk) + i = len(self.trackers) + for trk in reversed(self.trackers): + if trk.last_observation.sum() < 0: + d = trk.get_state()[0] + else: + """ + this is optional to use the recent observation or the kalman filter prediction, + we didn't notice significant difference here + """ + d = trk.last_observation[:4] + if (trk.time_since_update < 1) and ( + trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits + ): + # +1 as MOT benchmark requires positive + ret.append( + np.concatenate( + (d, [trk.id + 1], [trk.conf], [trk.cls], [trk.det_ind]) + ).reshape(1, -1) + ) + i -= 1 + # remove dead tracklet + if trk.time_since_update > self.max_age: + self.trackers.pop(i) + if len(ret) > 0: + return np.concatenate(ret) + return np.array([]) diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/__init__.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/__init__.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/__init__.py new file mode 100644 index 0000000..4f7a4d0 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/__init__.py @@ -0,0 +1 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/detection.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/detection.py new file mode 100644 index 0000000..5f94b53 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/detection.py @@ -0,0 +1,41 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + + Parameters + ---------- + tlwh : array_like + Bounding box in format `(x, y, w, h)`. + confidence : float + Detector confidence score. + feature : array_like + A feature vector that describes the object contained in this image. + + Attributes + ---------- + tlwh : ndarray + Bounding box in format `(top left x, top left y, width, height)`. + confidence : ndarray + Detector confidence score. + feature : ndarray | NoneType + A feature vector that describes the object contained in this image. + + """ + + def __init__(self, tlwh, conf, cls, det_ind, feat): + self.tlwh = tlwh + self.conf = conf + self.cls = cls + self.det_ind = det_ind + self.feat = feat + + def to_xyah(self): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/iou_matching.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/iou_matching.py new file mode 100644 index 0000000..3483ce4 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/iou_matching.py @@ -0,0 +1,87 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from __future__ import absolute_import + +import numpy as np + +from . import linear_assignment + + +def iou(bbox, candidates): + """Computer intersection over union. + + Parameters + ---------- + bbox : ndarray + A bounding box in format `(top left x, top left y, width, height)`. + candidates : ndarray + A matrix of candidate bounding boxes (one per row) in the same format + as `bbox`. + + Returns + ------- + ndarray + The intersection over union in [0, 1] between the `bbox` and each + candidate. A higher score means a larger fraction of the `bbox` is + occluded by the candidate. + + """ + bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + tl = np.c_[ + np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis], + ] + br = np.c_[ + np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis], + ] + wh = np.maximum(0.0, br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + return area_intersection / (area_bbox + area_candidates - area_intersection) + + +def iou_cost(tracks, detections, track_indices=None, detection_indices=None): + """An intersection over union distance metric. + + Parameters + ---------- + tracks : List[deep_sort.track.Track] + A list of tracks. + detections : List[deep_sort.detection.Detection] + A list of detections. + track_indices : Optional[List[int]] + A list of indices to tracks that should be matched. Defaults to + all `tracks`. + detection_indices : Optional[List[int]] + A list of indices to detections that should be matched. Defaults + to all `detections`. + + Returns + ------- + ndarray + Returns a cost matrix of shape + len(track_indices), len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = linear_assignment.INFTY_COST + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray([detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1.0 - iou(bbox, candidates) + return cost_matrix diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/linear_assignment.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/linear_assignment.py new file mode 100644 index 0000000..0816213 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/linear_assignment.py @@ -0,0 +1,194 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from __future__ import absolute_import + +import numpy as np +from scipy.optimize import linear_sum_assignment + +from ....utils.matching import chi2inv95 + +INFTY_COST = 1e5 + + +def min_cost_matching( + distance_metric, + max_distance, + tracks, + detections, + track_indices=None, + detection_indices=None, +): + """Solve linear assignment problem. + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection_indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices) + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + row_indices, col_indices = linear_sum_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in col_indices: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in row_indices: + unmatched_tracks.append(track_idx) + for row, col in zip(row_indices, col_indices): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade( + distance_metric, + max_distance, + cascade_depth, + tracks, + detections, + track_indices=None, + detection_indices=None, +): + """Run matching cascade. + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + cascade_depth: int + The cascade depth, should be se to the maximum track age. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : Optional[List[int]] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). Defaults to all tracks. + detection_indices : Optional[List[int]] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). Defaults to all + detections. + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + track_indices_l = [k for k in track_indices] + matches_l, _, unmatched_detections = min_cost_matching( + distance_metric, + max_distance, + tracks, + detections, + track_indices_l, + unmatched_detections, + ) + matches += matches_l + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix( + cost_matrix, + tracks, + detections, + track_indices, + detection_indices, + mc_lambda, + gated_cost=INFTY_COST, + only_position=False, +): + """Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + Parameters + ---------- + kf : The Kalman filter. + cost_matrix : ndarray + The NxM dimensional cost matrix, where N is the number of track indices + and M is the number of detection indices, such that entry (i, j) is the + association cost between `tracks[track_indices[i]]` and + `detections[detection_indices[j]]`. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + gated_cost : Optional[float] + Entries in the cost matrix corresponding to infeasible associations are + set this value. Defaults to a very large value. + only_position : Optional[bool] + If True, only the x, y position of the state distribution is considered + during gating. Defaults to False. + Returns + ------- + ndarray + Returns the modified cost matrix. + """ + gating_threshold = chi2inv95[4] + measurements = np.asarray([detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = track.kf.gating_distance(measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + cost_matrix[row] = ( + mc_lambda * cost_matrix[row] + (1 - mc_lambda) * gating_distance + ) + return cost_matrix diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/track.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/track.py new file mode 100644 index 0000000..0b68aa9 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/track.py @@ -0,0 +1,200 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + +from juxtapose.trackers.boxmot.motion.kalman_filters.adapters import ( + StrongSortKalmanFilterAdapter, +) + + +class TrackState: + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + + """ + + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track: + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + + Parameters + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + max_age : int + The maximum number of consecutive misses before the track state is + set to `Deleted`. + feature : Optional[ndarray] + Feature vector of the detection this track originates from. If not None, + this feature is added to the `features` cache. + + Attributes + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + hits : int + Total number of measurement updates. + age : int + Total number of frames since first occurance. + time_since_update : int + Total number of frames since last measurement update. + state : TrackState + The current track state. + features : List[ndarray] + A cache of features. On each measurement update, the associated feature + vector is added to this list. + + """ + + def __init__( + self, + detection, + id, + n_init, + max_age, + ema_alpha, + ): + self.id = id + self.bbox = detection.to_xyah() + self.conf = detection.conf + self.cls = detection.cls + self.det_ind = detection.det_ind + self.hits = 1 + self.age = 1 + self.time_since_update = 0 + self.ema_alpha = ema_alpha + + self.state = TrackState.Tentative + self.features = [] + if detection.feat is not None: + detection.feat /= np.linalg.norm(detection.feat) + self.features.append(detection.feat) + + self._n_init = n_init + self._max_age = max_age + + self.kf = StrongSortKalmanFilterAdapter() + self.mean, self.covariance = self.kf.initiate(self.bbox) + + def to_tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + + Returns + ------- + ndarray + The bounding box. + + """ + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get kf estimated current position in bounding box format `(min x, miny, max x, + max y)`. + + Returns + ------- + ndarray + The predicted kf bounding box. + + """ + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def camera_update(self, warp_matrix): + [a, b] = warp_matrix + warp_matrix = np.array([a, b, [0, 0, 1]]) + warp_matrix = warp_matrix.tolist() + x1, y1, x2, y2 = self.to_tlbr() + x1_, y1_, _ = warp_matrix @ np.array([x1, y1, 1]).T + x2_, y2_, _ = warp_matrix @ np.array([x2, y2, 1]).T + w, h = x2_ - x1_, y2_ - y1_ + cx, cy = x1_ + w / 2, y1_ + h / 2 + self.mean[:4] = [cx, cy, w / h, h] + + def increment_age(self): + self.age += 1 + self.time_since_update += 1 + + def predict(self): + """Propagate the state distribution to the current time step using a + Kalman filter prediction step. + """ + self.mean, self.covariance = self.kf.predict(self.mean, self.covariance) + self.age += 1 + self.time_since_update += 1 + + def update(self, detection): + """Perform Kalman filter measurement update step and update the feature + cache. + Parameters + ---------- + detection : Detection + The associated detection. + """ + self.bbox = detection.to_xyah() + self.conf = detection.conf + self.cls = detection.cls + self.det_ind = detection.det_ind + self.mean, self.covariance = self.kf.update( + self.mean, self.covariance, self.bbox, self.conf + ) + + feature = detection.feat / np.linalg.norm(detection.feat) + + smooth_feat = ( + self.ema_alpha * self.features[-1] + (1 - self.ema_alpha) * feature + ) + smooth_feat /= np.linalg.norm(smooth_feat) + self.features = [smooth_feat] + + self.hits += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step).""" + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed).""" + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/tracker.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/tracker.py new file mode 100644 index 0000000..d359442 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/sort/tracker.py @@ -0,0 +1,182 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +from __future__ import absolute_import + +import numpy as np + +from juxtapose.trackers.boxmot.motion.cmc import get_cmc_method +from juxtapose.trackers.boxmot.trackers.strongsort.sort import ( + iou_matching, + linear_assignment, +) +from juxtapose.trackers.boxmot.trackers.strongsort.sort.track import Track +from juxtapose.trackers.boxmot.utils.matching import chi2inv95 + + +class Tracker: + """ + This is the multi-target tracker. + Parameters + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + A distance metric for measurement-to-track association. + max_age : int + Maximum number of missed misses before a track is deleted. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + Attributes + ---------- + metric : nn_matching.NearestNeighborDistanceMetric + The distance metric used for measurement to track association. + max_age : int + Maximum number of missed misses before a track is deleted. + n_init : int + Number of frames that a track remains in initialization phase. + tracks : List[Track] + The list of active tracks at the current time step. + """ + + GATING_THRESHOLD = np.sqrt(chi2inv95[4]) + + def __init__( + self, + metric, + max_iou_dist=0.9, + max_age=30, + n_init=3, + _lambda=0, + ema_alpha=0.9, + mc_lambda=0.995, + ): + self.metric = metric + self.max_iou_dist = max_iou_dist + self.max_age = max_age + self.n_init = n_init + self._lambda = _lambda + self.ema_alpha = ema_alpha + self.mc_lambda = mc_lambda + + self.tracks = [] + self._next_id = 1 + self.cmc = get_cmc_method("ecc")() + + def predict(self): + """Propagate track state distributions one time step forward. + + This function should be called once every time step, before `update`. + """ + for track in self.tracks: + track.predict() + + def increment_ages(self): + for track in self.tracks: + track.increment_age() + track.mark_missed() + + def update(self, detections): + """Perform measurement update and track management. + + Parameters + ---------- + detections : List[deep_sort.detection.Detection] + A list of detections at the current time step. + + """ + # Run matching cascade. + matches, unmatched_tracks, unmatched_detections = self._match(detections) + + # Update track set. + for track_idx, detection_idx in matches: + self.tracks[track_idx].update(detections[detection_idx]) + for track_idx in unmatched_tracks: + self.tracks[track_idx].mark_missed() + for detection_idx in unmatched_detections: + self._initiate_track(detections[detection_idx]) + self.tracks = [t for t in self.tracks if not t.is_deleted()] + + # Update distance metric. + active_targets = [t.id for t in self.tracks if t.is_confirmed()] + features, targets = [], [] + for track in self.tracks: + if not track.is_confirmed(): + continue + features += track.features + targets += [track.id for _ in track.features] + self.metric.partial_fit( + np.asarray(features), np.asarray(targets), active_targets + ) + + def _match(self, detections): + def gated_metric(tracks, dets, track_indices, detection_indices): + features = np.array([dets[i].feat for i in detection_indices]) + targets = np.array([tracks[i].id for i in track_indices]) + cost_matrix = self.metric.distance(features, targets) + cost_matrix = linear_assignment.gate_cost_matrix( + cost_matrix, + tracks, + dets, + track_indices, + detection_indices, + self.mc_lambda, + ) + + return cost_matrix + + # Split track set into confirmed and unconfirmed tracks. + confirmed_tracks = [i for i, t in enumerate(self.tracks) if t.is_confirmed()] + unconfirmed_tracks = [ + i for i, t in enumerate(self.tracks) if not t.is_confirmed() + ] + + # Associate confirmed tracks using appearance features. + ( + matches_a, + unmatched_tracks_a, + unmatched_detections, + ) = linear_assignment.matching_cascade( + gated_metric, + self.metric.matching_threshold, + self.max_age, + self.tracks, + detections, + confirmed_tracks, + ) + + # Associate remaining tracks together with unconfirmed tracks using IOU. + iou_track_candidates = unconfirmed_tracks + [ + k for k in unmatched_tracks_a if self.tracks[k].time_since_update == 1 + ] + unmatched_tracks_a = [ + k for k in unmatched_tracks_a if self.tracks[k].time_since_update != 1 + ] + + ( + matches_b, + unmatched_tracks_b, + unmatched_detections, + ) = linear_assignment.min_cost_matching( + iou_matching.iou_cost, + self.max_iou_dist, + self.tracks, + detections, + iou_track_candidates, + unmatched_detections, + ) + + matches = matches_a + matches_b + unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) + return matches, unmatched_tracks, unmatched_detections + + def _initiate_track(self, detection): + self.tracks.append( + Track( + detection, + self._next_id, + self.n_init, + self.max_age, + self.ema_alpha, + ) + ) + self._next_id += 1 diff --git a/src/juxtapose/trackers/boxmot/trackers/strongsort/strong_sort.py b/src/juxtapose/trackers/boxmot/trackers/strongsort/strong_sort.py new file mode 100644 index 0000000..60dac5c --- /dev/null +++ b/src/juxtapose/trackers/boxmot/trackers/strongsort/strong_sort.py @@ -0,0 +1,102 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + +from juxtapose.trackers.boxmot.appearance.reid_multibackend import ( + ReIDDetectMultiBackend, +) +from juxtapose.trackers.boxmot.motion.cmc import get_cmc_method +from juxtapose.trackers.boxmot.trackers.strongsort.sort.detection import Detection +from juxtapose.trackers.boxmot.trackers.strongsort.sort.tracker import Tracker +from juxtapose.trackers.boxmot.utils.matching import NearestNeighborDistanceMetric +from juxtapose.trackers.boxmot.utils.ops import xyxy2tlwh + + +class StrongSORT(object): + def __init__( + self, + model_weights, + device, + fp16, + max_dist=0.2, + max_iou_dist=0.7, + max_age=30, + n_init=1, + nn_budget=100, + mc_lambda=0.995, + ema_alpha=0.9, + ): + self.model = ReIDDetectMultiBackend( + weights=model_weights, device=device, fp16=fp16 + ) + self.tracker = Tracker( + metric=NearestNeighborDistanceMetric("cosine", max_dist, nn_budget), + max_iou_dist=max_iou_dist, + max_age=max_age, + n_init=n_init, + mc_lambda=mc_lambda, + ema_alpha=ema_alpha, + ) + self.cmc = get_cmc_method("ecc")() + + def update(self, dets, img): + assert isinstance( + dets, np.ndarray + ), f"Unsupported 'dets' input format '{type(dets)}', valid format is np.ndarray" + assert isinstance( + img, np.ndarray + ), f"Unsupported 'img' input format '{type(img)}', valid format is np.ndarray" + assert ( + len(dets.shape) == 2 + ), "Unsupported 'dets' dimensions, valid number of dimensions is two" + assert ( + dets.shape[1] == 6 + ), "Unsupported 'dets' 2nd dimension lenght, valid lenghts is 6" + + dets = np.hstack([dets, np.arange(len(dets)).reshape(-1, 1)]) + xyxy = dets[:, 0:4] + confs = dets[:, 4] + clss = dets[:, 5] + det_ind = dets[:, 6] + + if len(self.tracker.tracks) >= 1: + warp_matrix = self.cmc.apply(img, xyxy) + for track in self.tracker.tracks: + track.camera_update(warp_matrix) + + # extract appearance information for each detection + features = self.model.get_features(xyxy, img) + + tlwh = xyxy2tlwh(xyxy) + detections = [ + Detection(box, conf, cls, det_ind, feat) + for box, conf, cls, det_ind, feat in zip( + tlwh, confs, clss, det_ind, features + ) + ] + + # update tracker + self.tracker.predict() + self.tracker.update(detections) + + # output bbox identities + outputs = [] + for track in self.tracker.tracks: + if not track.is_confirmed() or track.time_since_update >= 1: + continue + + x1, y1, x2, y2 = track.to_tlbr() + + id = track.id + conf = track.conf + cls = track.cls + det_ind = track.det_ind + + outputs.append( + np.concatenate( + ([x1, y1, x2, y2], [id], [conf], [cls], [det_ind]) + ).reshape(1, -1) + ) + if len(outputs) > 0: + return np.concatenate(outputs) + return np.array([]) diff --git a/src/juxtapose/trackers/boxmot/utils/__init__.py b/src/juxtapose/trackers/boxmot/utils/__init__.py new file mode 100644 index 0000000..8ada8f6 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/__init__.py @@ -0,0 +1,61 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import sys +from pathlib import Path + +import numpy as np + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[2] # root directory +BOXMOT = ROOT / "boxmot" +EXAMPLES = ROOT / "examples" +WEIGHTS = ROOT / "examples" / "weights" +REQUIREMENTS = ROOT / "requirements.txt" + +# global logger +from loguru import logger + +logger.remove() +logger.add(sys.stderr, colorize=True, level="INFO") + + +class PerClassDecorator: + def __init__(self, method): + self.update = method + + def __get__(self, instance, owner): + def wrapper(*args, **kwargs): + modified_args = list(args) + dets = modified_args[0] + im = modified_args[1] + + # input one class of detections at a time in order to not mix them up + if instance.per_class is True and dets.size != 0: + dets_dict = { + class_id: np.array([det for det in dets if det[5] == class_id]) + for class_id in set(det[5] for det in dets) + } + # get unique classes in predictions + detected_classes = set(dets_dict.keys()) + # get unque classes with active trackers + active_classes = set([tracker.cls for tracker in instance.trackers]) + # get tracks that are both active and in the current detections + relevant_classes = active_classes.union(detected_classes) + + mc_dets = np.empty(shape=(0, 8)) + for class_id in relevant_classes: + modified_args[0] = np.array( + dets_dict.get(int(class_id), np.empty((0, 6))) + ) + logger.debug( + f"Feeding class {int(class_id)}: {modified_args[0].shape}" + ) + dets = self.update(instance, modified_args[0], im) + if dets.size != 0: + mc_dets = np.append(mc_dets, dets, axis=0) + logger.debug(f"Per class updates output: {mc_dets.shape}") + else: + mc_dets = self.update(instance, dets, im) + return mc_dets + + return wrapper diff --git a/src/juxtapose/trackers/boxmot/utils/association.py b/src/juxtapose/trackers/boxmot/utils/association.py new file mode 100644 index 0000000..b1e7d5d --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/association.py @@ -0,0 +1,283 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + +from juxtapose.trackers.boxmot.utils.iou import iou_batch + + +def speed_direction_batch(dets, tracks): + tracks = tracks[..., np.newaxis] + CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0 + CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0 + dx = CX1 - CX2 + dy = CY1 - CY2 + norm = np.sqrt(dx**2 + dy**2) + 1e-6 + dx = dx / norm + dy = dy / norm + return dy, dx # size: num_track x num_det + + +def linear_assignment(cost_matrix): + try: + import lap + + _, x, y = lap.lapjv(cost_matrix, extend_cost=True) + return np.array([[y[i], i] for i in x if i >= 0]) # + except ImportError: + from scipy.optimize import linear_sum_assignment + + x, y = linear_sum_assignment(cost_matrix) + return np.array([list(zip(x, y))]) + + +def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3): + """ + Assigns detections to tracked object (both represented as bounding boxes) + Returns 3 lists of matches, unmatched_detections and unmatched_trackers + """ + if len(trackers) == 0: + return ( + np.empty((0, 2), dtype=int), + np.arange(len(detections)), + np.empty((0, 5), dtype=int), + ) + + iou_matrix = iou_batch(detections, trackers) + + if min(iou_matrix.shape) > 0: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(-iou_matrix) + else: + matched_indices = np.empty(shape=(0, 2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if d not in matched_indices[:, 0]: + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if t not in matched_indices[:, 1]: + unmatched_trackers.append(t) + + # filter out matched with low IOU + matches = [] + for m in matched_indices: + if iou_matrix[m[0], m[1]] < iou_threshold: + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + if len(matches) == 0: + matches = np.empty((0, 2), dtype=int) + else: + matches = np.concatenate(matches, axis=0) + + return matches, np.array(unmatched_detections), np.array(unmatched_trackers) + + +def compute_aw_max_metric(emb_cost, w_association_emb, bottom=0.5): + w_emb = np.full_like(emb_cost, w_association_emb) + + for idx in range(emb_cost.shape[0]): + inds = np.argsort(-emb_cost[idx]) + # If there's less than two matches, just keep original weight + if len(inds) < 2: + continue + if emb_cost[idx, inds[0]] == 0: + row_weight = 0 + else: + row_weight = 1 - max( + (emb_cost[idx, inds[1]] / emb_cost[idx, inds[0]]) - bottom, 0 + ) / (1 - bottom) + w_emb[idx] *= row_weight + + for idj in range(emb_cost.shape[1]): + inds = np.argsort(-emb_cost[:, idj]) + # If there's less than two matches, just keep original weight + if len(inds) < 2: + continue + if emb_cost[inds[0], idj] == 0: + col_weight = 0 + else: + col_weight = 1 - max( + (emb_cost[inds[1], idj] / emb_cost[inds[0], idj]) - bottom, 0 + ) / (1 - bottom) + w_emb[:, idj] *= col_weight + + return w_emb * emb_cost + + +def associate( + detections, + trackers, + iou_threshold, + velocities, + previous_obs, + vdc_weight, + emb_cost=None, + w_assoc_emb=None, + aw_off=None, + aw_param=None, +): + if len(trackers) == 0: + return ( + np.empty((0, 2), dtype=int), + np.arange(len(detections)), + np.empty((0, 5), dtype=int), + ) + + Y, X = speed_direction_batch(detections, previous_obs) + inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1] + inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1) + inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1) + diff_angle_cos = inertia_X * X + inertia_Y * Y + diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1) + diff_angle = np.arccos(diff_angle_cos) + diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi + + valid_mask = np.ones(previous_obs.shape[0]) + valid_mask[np.where(previous_obs[:, 4] < 0)] = 0 + + iou_matrix = iou_batch(detections, trackers) + scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1) + # iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this + valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1) + + angle_diff_cost = (valid_mask * diff_angle) * vdc_weight + angle_diff_cost = angle_diff_cost.T + angle_diff_cost = angle_diff_cost * scores + + if min(iou_matrix.shape): + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + if emb_cost is None: + emb_cost = 0 + else: + emb_cost = emb_cost + emb_cost[iou_matrix <= 0] = 0 + if not aw_off: + emb_cost = compute_aw_max_metric( + emb_cost, w_assoc_emb, bottom=aw_param + ) + else: + emb_cost *= w_assoc_emb + + final_cost = -(iou_matrix + angle_diff_cost + emb_cost) + matched_indices = linear_assignment(final_cost) + if matched_indices.size == 0: + matched_indices = np.empty(shape=(0, 2)) + + else: + matched_indices = np.empty(shape=(0, 2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if d not in matched_indices[:, 0]: + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if t not in matched_indices[:, 1]: + unmatched_trackers.append(t) + + # filter out matched with low IOU + matches = [] + for m in matched_indices: + if iou_matrix[m[0], m[1]] < iou_threshold: + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + if len(matches) == 0: + matches = np.empty((0, 2), dtype=int) + else: + matches = np.concatenate(matches, axis=0) + + return matches, np.array(unmatched_detections), np.array(unmatched_trackers) + + +def associate_kitti( + detections, trackers, det_cates, iou_threshold, velocities, previous_obs, vdc_weight +): + if len(trackers) == 0: + return ( + np.empty((0, 2), dtype=int), + np.arange(len(detections)), + np.empty((0, 5), dtype=int), + ) + + """ + Cost from the velocity direction consistency + """ + Y, X = speed_direction_batch(detections, previous_obs) + inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1] + inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1) + inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1) + diff_angle_cos = inertia_X * X + inertia_Y * Y + diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1) + diff_angle = np.arccos(diff_angle_cos) + diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi + + valid_mask = np.ones(previous_obs.shape[0]) + valid_mask[np.where(previous_obs[:, 4] < 0)] = 0 + valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1) + + scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1) + angle_diff_cost = (valid_mask * diff_angle) * vdc_weight + angle_diff_cost = angle_diff_cost.T + angle_diff_cost = angle_diff_cost * scores + + """ + Cost from IoU + """ + iou_matrix = iou_batch(detections, trackers) + + """ + With multiple categories, generate the cost for catgory mismatch + """ + num_dets = detections.shape[0] + num_trk = trackers.shape[0] + cate_matrix = np.zeros((num_dets, num_trk)) + for i in range(num_dets): + for j in range(num_trk): + if det_cates[i] != trackers[j, 4]: + cate_matrix[i][j] = -1e6 + + cost_matrix = -iou_matrix - angle_diff_cost - cate_matrix + + if min(iou_matrix.shape) > 0: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(cost_matrix) + else: + matched_indices = np.empty(shape=(0, 2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if d not in matched_indices[:, 0]: + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if t not in matched_indices[:, 1]: + unmatched_trackers.append(t) + + # filter out matched with low IOU + matches = [] + for m in matched_indices: + if iou_matrix[m[0], m[1]] < iou_threshold: + unmatched_detections.append(m[0]) + unmatched_trackers.append(m[1]) + else: + matches.append(m.reshape(1, 2)) + if len(matches) == 0: + matches = np.empty((0, 2), dtype=int) + else: + matches = np.concatenate(matches, axis=0) + + return matches, np.array(unmatched_detections), np.array(unmatched_trackers) diff --git a/src/juxtapose/trackers/boxmot/utils/checks.py b/src/juxtapose/trackers/boxmot/utils/checks.py new file mode 100644 index 0000000..bcd725d --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/checks.py @@ -0,0 +1,38 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import subprocess + +import pkg_resources + +from juxtapose.trackers.boxmot.utils import REQUIREMENTS, logger + + +class TestRequirements: + def check_requirements(self): + requirements = pkg_resources.parse_requirements(REQUIREMENTS.open()) + self.check_packages(requirements) + + def check_packages(self, requirements, cmds=""): + """Test that each required package is available.""" + # Ref: https://stackoverflow.com/a/45474387/ + + s = "" # missing packages + for r in requirements: + r = str(r) + try: + pkg_resources.require(r) + except Exception as e: + logger.error(f"{e}") + s += f'"{r}" ' + if s: + logger.warning(f"\nMissing packages: {s}\nAtempting installation...") + try: + subprocess.check_output( + f"pip install --no-cache {s} {cmds}", + shell=True, + stderr=subprocess.STDOUT, + ) + except Exception as e: + logger.error(e) + exit() + logger.success("All the missing packages were installed successfully") diff --git a/src/juxtapose/trackers/boxmot/utils/convert_rtm_boxmot.py b/src/juxtapose/trackers/boxmot/utils/convert_rtm_boxmot.py new file mode 100644 index 0000000..6b57fdd --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/convert_rtm_boxmot.py @@ -0,0 +1,18 @@ +import numpy as np +from juxtapose.utils.core import Detections + + +def convert_boxmot_tracker_to_rtm(outputs, detections) -> Detections: + if len(outputs) > 0: + # get the index of track_id that is "", remove it from outputs, don't add to detections + outputs = outputs[outputs[:, 4] != ""] + return Detections( + xyxy=outputs[:, :4], + track_id=outputs[:, 4].astype("float").astype("int"), + confidence=outputs[:, 5], + labels=outputs[:, 6], + ) + else: + # return empty Detections object + detections.track_id = np.array([""] * len(detections.xyxy)) + return detections diff --git a/src/juxtapose/trackers/boxmot/utils/iou.py b/src/juxtapose/trackers/boxmot/utils/iou.py new file mode 100644 index 0000000..6890e53 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/iou.py @@ -0,0 +1,172 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np + + +def iou_batch(bboxes1, bboxes2): + """ + From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] + """ + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + wh = w * h + o = wh / ( + (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - + wh + ) + return o + + +def giou_batch(bboxes1, bboxes2): + """ + :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2) + :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2) + :return: + """ + # for details should go to https://arxiv.org/pdf/1902.09630.pdf + # ensure predict's bbox form + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + wh = w * h + iou = wh / ( + (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - + wh + ) + + xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0]) + yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2]) + yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + wc = xxc2 - xxc1 + hc = yyc2 - yyc1 + assert (wc > 0).all() and (hc > 0).all() + area_enclose = wc * hc + giou = iou - (area_enclose - wh) / area_enclose + giou = (giou + 1.0) / 2.0 # resize from (-1,1) to (0,1) + return giou + + +def diou_batch(bboxes1, bboxes2): + """ + :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2) + :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2) + :return: + """ + # for details should go to https://arxiv.org/pdf/1902.09630.pdf + # ensure predict's bbox form + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + # calculate the intersection box + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + wh = w * h + iou = wh / ( + (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - + wh + ) + + centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0 + centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0 + centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0 + centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0 + + inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2 + + xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0]) + yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2]) + yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + + outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2 + diou = iou - inner_diag / outer_diag + + return (diou + 1) / 2.0 # resize from (-1,1) to (0,1) + + +def ciou_batch(bboxes1, bboxes2): + """ + :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2) + :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2) + :return: + """ + # for details should go to https://arxiv.org/pdf/1902.09630.pdf + # ensure predict's bbox form + bboxes2 = np.expand_dims(bboxes2, 0) + bboxes1 = np.expand_dims(bboxes1, 1) + + # calculate the intersection box + xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0]) + yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1]) + xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2]) + yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3]) + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + wh = w * h + iou = wh / ( + (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1]) + + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - + wh + ) + + centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0 + centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0 + centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0 + centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0 + + inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2 + + xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0]) + yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1]) + xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2]) + yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3]) + + outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2 + + w1 = bboxes1[..., 2] - bboxes1[..., 0] + h1 = bboxes1[..., 3] - bboxes1[..., 1] + w2 = bboxes2[..., 2] - bboxes2[..., 0] + h2 = bboxes2[..., 3] - bboxes2[..., 1] + + # prevent dividing over zero. add one pixel shift + h2 = h2 + 1.0 + h1 = h1 + 1.0 + arctan = np.arctan(w2 / h2) - np.arctan(w1 / h1) + v = (4 / (np.pi**2)) * (arctan**2) + S = 1 - iou + alpha = v / (S + v) + ciou = iou - inner_diag / outer_diag - alpha * v + + return (ciou + 1) / 2.0 # resize from (-1,1) to (0,1) + + +def get_asso_func(asso_mode): + ASSO_FUNCS = { + "iou": iou_batch, + "giou": giou_batch, + "ciou": ciou_batch, + "diou": diou_batch, + } + + return ASSO_FUNCS[asso_mode] diff --git a/src/juxtapose/trackers/boxmot/utils/matching.py b/src/juxtapose/trackers/boxmot/utils/matching.py new file mode 100644 index 0000000..0b09e04 --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/matching.py @@ -0,0 +1,411 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import lap +import numpy as np +import scipy +import torch +from scipy.spatial.distance import cdist + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919, +} + + +def merge_matches(m1, m2, shape): + O, P, Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1 * M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = matched_cost <= thresh + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return ( + np.empty((0, 2), dtype=int), + tuple(range(cost_matrix.shape[0])), + tuple(range(cost_matrix.shape[1])), + ) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float32), + np.ascontiguousarray(btlbrs, dtype=np.float32), + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or ( + len(btracks) > 0 and isinstance(btracks[0], np.ndarray) + ): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.xyxy for track in atracks] + btlbrs = [track.xyxy for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + + +def v_iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or ( + len(btracks) > 0 and isinstance(btracks[0], np.ndarray) + ): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] + btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + + +def embedding_distance(tracks, detections, metric="cosine"): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray( + [track.curr_feat for track in detections], dtype=np.float32 + ) + # for i, track in enumerate(tracks): + # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray( + [track.smooth_feat for track in tracks], dtype=np.float32 + ) + cost_matrix = np.maximum( + 0.0, cdist(track_features, det_features, metric) + ) # Nomalized features + return cost_matrix + + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position + ) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric="maha" + ) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix + + +def fuse_iou(cost_matrix, tracks, detections): + if cost_matrix.size == 0: + return cost_matrix + reid_sim = 1 - cost_matrix + iou_dist = iou_distance(tracks, detections) + iou_sim = 1 - iou_dist + fuse_sim = reid_sim * (1 + iou_sim) / 2 + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + # fuse_sim = fuse_sim * (1 + det_scores) / 2 + fuse_cost = 1 - fuse_sim + return fuse_cost + + +def fuse_score(cost_matrix, detections): + if cost_matrix.size == 0: + return cost_matrix + iou_sim = 1 - cost_matrix + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + fuse_sim = iou_sim * det_scores + fuse_cost = 1 - fuse_sim + return fuse_cost + + +def bbox_ious(boxes, query_boxes): + """ + Parameters + ---------- + boxes: (N, 4) ndarray of float + query_boxes: (K, 4) ndarray of float + Returns + ------- + overlaps: (N, K) ndarray of overlap between boxes and query_boxes + """ + N = boxes.shape[0] + K = query_boxes.shape[0] + overlaps = np.zeros((N, K), dtype=np.float32) + + for k in range(K): + box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * ( + query_boxes[k, 3] - query_boxes[k, 1] + 1 + ) + for n in range(N): + iw = ( + min(boxes[n, 2], query_boxes[k, 2]) - + max(boxes[n, 0], query_boxes[k, 0]) + 1 + ) + if iw > 0: + ih = ( + min(boxes[n, 3], query_boxes[k, 3]) - + max(boxes[n, 1], query_boxes[k, 1]) + 1 + ) + if ih > 0: + ua = float( + (boxes[n, 2] - boxes[n, 0] + 1) * + (boxes[n, 3] - boxes[n, 1] + 1) + + box_area - + iw * ih + ) + overlaps[n, k] = iw * ih / ua + return overlaps + + +def _pdist(a, b): + """Compute pair-wise squared distance between points in `a` and `b`. + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that eleement (i, j) + contains the squared distance between `a[i]` and `b[j]`. + """ + a, b = np.asarray(a), np.asarray(b) + if len(a) == 0 or len(b) == 0: + return np.zeros((len(a), len(b))) + a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) + r2 = -2.0 * np.dot(a, b.T) + a2[:, None] + b2[None, :] + r2 = np.clip(r2, 0.0, float(np.inf)) + return r2 + + +def _cosine_distance(a, b, data_is_normalized=False): + """Compute pair-wise cosine distance between points in `a` and `b`. + Parameters + ---------- + a : array_like + An NxM matrix of N samples of dimensionality M. + b : array_like + An LxM matrix of L samples of dimensionality M. + data_is_normalized : Optional[bool] + If True, assumes rows in a and b are unit length vectors. + Otherwise, a and b are explicitly normalized to lenght 1. + Returns + ------- + ndarray + Returns a matrix of size len(a), len(b) such that eleement (i, j) + contains the squared distance between `a[i]` and `b[j]`. + """ + if not data_is_normalized: + a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) + b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) + return 1.0 - np.dot(a, b.T) + + +def _nn_euclidean_distance(x, y): + """Helper function for nearest neighbor distance metric (Euclidean). + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest Euclidean distance to a sample in `x`. + """ + # x_ = torch.from_numpy(np.asarray(x) / np.linalg.norm(x, axis=1, keepdims=True)) + # y_ = torch.from_numpy(np.asarray(y) / np.linalg.norm(y, axis=1, keepdims=True)) + distances = distances = _pdist(x, y) + return np.maximum(0.0, torch.min(distances, axis=0)[0].numpy()) + + +def _nn_cosine_distance(x, y): + """Helper function for nearest neighbor distance metric (cosine). + Parameters + ---------- + x : ndarray + A matrix of N row-vectors (sample points). + y : ndarray + A matrix of M row-vectors (query points). + Returns + ------- + ndarray + A vector of length M that contains for each entry in `y` the + smallest cosine distance to a sample in `x`. + """ + x_ = torch.from_numpy(np.asarray(x)) + y_ = torch.from_numpy(np.asarray(y)) + distances = _cosine_distance(x_, y_) + distances = distances + return distances.min(axis=0) + + +class NearestNeighborDistanceMetric(object): + """ + A nearest neighbor distance metric that, for each target, returns + the closest distance to any sample that has been observed so far. + Parameters + ---------- + metric : str + Either "euclidean" or "cosine". + matching_threshold: float + The matching threshold. Samples with larger distance are considered an + invalid match. + budget : Optional[int] + If not None, fix samples per class to at most this number. Removes + the oldest samples when the budget is reached. + Attributes + ---------- + samples : Dict[int -> List[ndarray]] + A dictionary that maps from target identities to the list of samples + that have been observed so far. + """ + + def __init__(self, metric, matching_threshold, budget=None): + if metric == "euclidean": + self._metric = _nn_euclidean_distance + elif metric == "cosine": + self._metric = _nn_cosine_distance + else: + raise ValueError("Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + """Update the distance metric with new data. + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : ndarray + An integer array of associated target identities. + active_targets : List[int] + A list of targets that are currently present in the scene. + """ + for feature, target in zip(features, targets): + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + self.samples[target] = self.samples[target][-self.budget:] + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + """Compute distance between features and targets. + Parameters + ---------- + features : ndarray + An NxM matrix of N features of dimensionality M. + targets : List[int] + A list of targets to match the given `features` against. + Returns + ------- + ndarray + Returns a cost matrix of shape len(targets), len(features), where + element (i, j) contains the closest squared distance between + `targets[i]` and `features[j]`. + """ + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix diff --git a/src/juxtapose/trackers/boxmot/utils/ops.py b/src/juxtapose/trackers/boxmot/utils/ops.py new file mode 100644 index 0000000..99d319e --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/ops.py @@ -0,0 +1,97 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import numpy as np +import torch + + +def xyxy2xywh(x): + """ + Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format. + + Args: + x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x1, y1, x2, y2) format. + Returns: + y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x, y, width, height) format. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center + y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center + y[..., 2] = x[..., 2] - x[..., 0] # width + y[..., 3] = x[..., 3] - x[..., 1] # height + return y + + +def xywh2xyxy(x): + """ + Convert bounding box coordinates from (x_c, y_c, width, height) format to + (x1, y1, x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) + is the bottom-right corner. + + Args: + x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format. + Returns: + y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x + y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y + y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x + y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y + return y + + +def xywh2tlwh(x): + """ + Convert bounding box coordinates from (x c, y c, w, h) format to (t, l, w, h) format where (t, l) is the + top-left corner and (w, h) is width and height. + + Args: + x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format. + Returns: + y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] - x[..., 2] / 2.0 # xc --> t + y[..., 1] = x[..., 1] - x[..., 3] / 2.0 # yc --> l + y[..., 2] = x[..., 2] # width + y[..., 3] = x[..., 3] # height + return y + + +def tlwh2xyxy(x): + """ + Convert bounding box coordinates from (t, l ,w ,h) format to (t, l, w, h) format where (t, l) is the + top-left corner and (w, h) is width and height. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] + y[..., 1] = x[..., 1] + y[..., 2] = x[..., 0] + x[..., 2] + y[..., 3] = x[..., 1] + x[..., 3] + return y + + +def xyxy2tlwh(x): + """ + Convert bounding box coordinates from (t, l ,w ,h) format to (t, l, w, h) format where (t, l) is the + top-left corner and (w, h) is width and height. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] + y[..., 1] = x[..., 1] + y[..., 2] = x[..., 2] - x[..., 0] + y[..., 3] = x[..., 3] - x[..., 1] + return y + + +def tlwh2xyah(x): + """ + Convert bounding box coordinates from (t, l ,w ,h) + to (center x, center y, aspect ratio, height)`, where the aspect ratio is `width / height`. + """ + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[..., 0] = x[..., 0] + (x[..., 2] / 2) + y[..., 1] = x[..., 1] + (x[..., 3] / 2) + y[..., 2] = x[..., 2] / x[..., 3] + y[..., 3] = x[..., 3] + return y diff --git a/src/juxtapose/trackers/boxmot/utils/torch_utils.py b/src/juxtapose/trackers/boxmot/utils/torch_utils.py new file mode 100644 index 0000000..b5d9fff --- /dev/null +++ b/src/juxtapose/trackers/boxmot/utils/torch_utils.py @@ -0,0 +1,76 @@ +# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license + +import os +import platform + +import torch + +from .. import __version__ +from . import logger as LOGGER + + +def select_device(device="", batch=0, newline=False, verbose=True): + """Selects PyTorch Device. Options are device = None or 'cpu' or 0 or '0' or '0,1,2,3'.""" + s = f"Yolo Tracking v{__version__} 🚀 Python-{platform.python_version()} torch-{torch.__version__} " + device = str(device).lower() + for remove in "cuda:", "none", "(", ")", "[", "]", "'", " ": + device = device.replace( + remove, "" + ) # to string, 'cuda:0' -> '0' and '(0, 1)' -> '0,1' + cpu = device == "cpu" + mps = device == "mps" # Apple Metal Performance Shaders (MPS) + if cpu or mps: + os.environ[ + "CUDA_VISIBLE_DEVICES" + ] = "-1" # force torch.cuda.is_available() = False + elif device: # non-cpu device requested + visible = os.environ.get("CUDA_VISIBLE_DEVICES", None) + os.environ[ + "CUDA_VISIBLE_DEVICES" + ] = device # set environment variable - must be before assert is_available() + if not ( + torch.cuda.is_available() + and torch.cuda.device_count() >= len(device.replace(",", "")) + ): + install = ( + "See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no " + "CUDA devices are seen by torch.\n" + if torch.cuda.device_count() == 0 + else "" + ) + raise ValueError( + f"Invalid CUDA 'device={device}' requested." + f" Use 'device=cpu' or pass valid CUDA device(s) if available," + f" i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.\n" + f"\ntorch.cuda.is_available(): {torch.cuda.is_available()}" + f"\ntorch.cuda.device_count(): {torch.cuda.device_count()}" + f"\nos.environ['CUDA_VISIBLE_DEVICES']: {visible}\n" + f"{install}" + ) + + if not cpu and not mps and torch.cuda.is_available(): # prefer GPU if available + devices = ( + device.split(",") if device else "0" + ) # range(torch.cuda.device_count()) # i.e. 0,1,6,7 + n = len(devices) # device count + if ( + n > 1 and batch > 0 and batch % n != 0 + ): # check batch_size is divisible by device_count + raise ValueError( + f"'batch={batch}' must be a multiple of GPU count {n}. Try 'batch={batch // n * n}' or " + f"'batch={batch // n * n + n}', the nearest batch sizes evenly divisible by {n}." + ) + space = " " * (len(s) + 1) + for i, d in enumerate(devices): + p = torch.cuda.get_device_properties(i) + s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n" # bytes to MB + arg = "cuda:0" + elif mps: + s += "MPS\n" + arg = "mps" + else: # revert to CPU + s += "CPU\n" + arg = "cpu" + + LOGGER.info(s) + return torch.device(arg) diff --git a/src/juxtapose/trackers/track.py b/src/juxtapose/trackers/track.py index 7e8da80..5d3faaf 100644 --- a/src/juxtapose/trackers/track.py +++ b/src/juxtapose/trackers/track.py @@ -1,8 +1,14 @@ import torch - +import numpy as np +import inspect +from pathlib import Path from .bot_sort import BOTSORT from .byte_tracker import BYTETracker - +from .boxmot import StrongSORT, DeepOCSORT, OCSORT +from juxtapose.utils.core import Detections +from juxtapose.trackers.boxmot.utils.convert_rtm_boxmot import ( + convert_boxmot_tracker_to_rtm, +) from typing import Union ARGS_MAP = { @@ -33,10 +39,70 @@ } TRACKER_MAP = {"bytetrack": BYTETracker, "botsort": BOTSORT} +BOXMOT_TRACKER_MAP = { + "strongsort": StrongSORT, + "deepocsort": DeepOCSORT, + "ocsort": OCSORT, +} class Tracker: - def __init__(self, type: str = "bytetrack") -> None: - self.tracker: Union[BYTETracker, BOTSORT] = TRACKER_MAP[type]( - args=ARGS_MAP[type], frame_rate=30 - ) + def __init__(self, type: str = "bytetrack", device: str = "cpu") -> None: + self.type = type + if ( + type in BOXMOT_TRACKER_MAP.keys() + and "model_weights" + in inspect.signature(BOXMOT_TRACKER_MAP[type].__init__).parameters + ): + self.tracker = BOXMOT_TRACKER_MAP[type]( + model_weights=Path("model/osnet_x0_25_msmt17.pt"), + device=device, + fp16=torch.cuda.is_available() and device == "cuda", + ) + self.tracker.model.warmup() + elif type in BOXMOT_TRACKER_MAP.keys(): + self.tracker = BOXMOT_TRACKER_MAP[type]() + elif type in TRACKER_MAP.keys(): + self.tracker: Union[BYTETracker, BOTSORT] = TRACKER_MAP[type]( + args=ARGS_MAP[type], frame_rate=30 + ) + + def update(self, detections, im): + # Triggered only if self.tracker_type == True + # If self.tracker_type is strongsort, deepocsort, ocsort, transform detections into 2D dets array format + if self.type in BOXMOT_TRACKER_MAP.keys(): + # clamp to image size and 0 + detections.xyxy[:, 0] = np.clip(detections.xyxy[:, 0], 0, im.shape[1]) + detections.xyxy[:, 1] = np.clip(detections.xyxy[:, 1], 0, im.shape[0]) + detections.xyxy[:, 2] = np.clip(detections.xyxy[:, 2], 0, im.shape[1]) + detections.xyxy[:, 3] = np.clip(detections.xyxy[:, 3], 0, im.shape[0]) + dets = np.array( + [ + detections.xyxy[:, 0], + detections.xyxy[:, 1], + detections.xyxy[:, 2], + detections.xyxy[:, 3], + detections.confidence, + detections.labels, + ] + ).T + # if dets.xyxy is None: + # dets = np.empty((0, 6)) + temp_outputs: Detections = self.tracker.update(dets, im) + detections = convert_boxmot_tracker_to_rtm(temp_outputs, detections) + return detections + + # If self.tracker_type is bortsort of byte tracker, just put in detections and no need + elif self.type in TRACKER_MAP.keys(): + detections: Detections = self.tracker.update( + bboxes=detections.xyxy, + confidence=detections.confidence, + labels=detections.labels, + img=im, + ) + # return detection type + return detections + + # base case + else: + return detections