From aa9c185e8469e0f7d92abcf5b1b713be4aaccd5c Mon Sep 17 00:00:00 2001
From: topguns837 <arjun.k@orangewood.co>
Date: Tue, 6 Feb 2024 23:33:55 +0530
Subject: [PATCH] Re-wrote YOLOv5 Plugin using torch instead of OpenCV

---
 object_detection/config/params.yaml           |   6 +-
 .../object_detection/Detectors/YOLOv5.py      | 151 ++++--------------
 .../object_detection/Detectors/YOLOv8.py      |  84 +++++-----
 3 files changed, 73 insertions(+), 168 deletions(-)

diff --git a/object_detection/config/params.yaml b/object_detection/config/params.yaml
index eb69547..ee0563b 100644
--- a/object_detection/config/params.yaml
+++ b/object_detection/config/params.yaml
@@ -5,8 +5,8 @@ object_detection:
     output_img_topic: object_detection/img
     publish_output_img: 1
     model_params:
-      detector_type: YOLOv5
-      model_dir_path: /root/percep_ws/models/yolov5
-      weight_file_name: yolov5.onnx
+      detector_type: YOLOv8
+      model_dir_path: /root/percep_ws/models/yolov8
+      weight_file_name: yolov8s.pt
       confidence_threshold : 0.5
       show_fps : 1
\ No newline at end of file
diff --git a/object_detection/object_detection/Detectors/YOLOv5.py b/object_detection/object_detection/Detectors/YOLOv5.py
index 4363c33..e126dbe 100644
--- a/object_detection/object_detection/Detectors/YOLOv5.py
+++ b/object_detection/object_detection/Detectors/YOLOv5.py
@@ -1,148 +1,53 @@
+import torch
 import os
-import cv2
-import numpy as np
 
 from ..DetectorBase import DetectorBase
 
 
 class YOLOv5(DetectorBase):
-    def __init__(self, conf_threshold = 0.7, score_threshold = 0.4, nms_threshold = 0.25, is_cuda = 1):
+    def __init__(self, conf_threshold = 0.7):
 
         super().__init__()
 
-        # opencv img input
-        self.frame = None
-        self.net = None
-        self.INPUT_WIDTH = 640
-        self.INPUT_HEIGHT = 640
-        self.CONFIDENCE_THRESHOLD = conf_threshold
+        self.conf_threshold = conf_threshold
 
-        self.is_cuda = is_cuda   
-
-    
-    # load model and prepare its backend to either run on GPU or CPU, see if it can be added in constructor
     def build_model(self, model_dir_path, weight_file_name):
-        model_path = os.path.join(model_dir_path, weight_file_name)
-
-        try:
-            self.net = cv2.dnn.readNet(model_path)
-        except:
-            raise Exception("Error loading given model from path: {}. Maybe the file doesn't exist?".format(model_path))
-
-        if self.is_cuda:
-            print("is_cuda was set to True. Attempting to use CUDA")
-            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
-            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
-        else:
-            print("is_cuda was set to False. Running on CPU")
-            self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
-            self.net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
 
+        try : 
+            model_path = os.path.join(model_dir_path, weight_file_name)
+            self.model = torch.hub.load('ultralytics/yolov5:v6.0', 'custom', path = model_path, force_reload = True)
 
-    # load classes.txt that contains mapping of model with labels
-    # TODO: add try/except to raise exception that tells the use to check the name if it is classes.txt
+        except :
+            raise Exception("Error loading given model from path: {}. Maybe the file doesn't exist?".format(model_path))
+        
     def load_classes(self, model_dir_path):
+
         self.class_list = []
-        with open(model_dir_path + "/classes.txt", "r") as f:
+
+        with open(os.path.join(model_dir_path, 'classes.txt')) as f :
             self.class_list = [cname.strip() for cname in f.readlines()]
-        return self.class_list
 
+        return self.class_list
     
-    def detect(self, image):
-        # convert image to 640x640 
-        blob = cv2.dnn.blobFromImage(image, 1/255.0, (self.INPUT_WIDTH, self.INPUT_HEIGHT), swapRB=True, crop=False)
-        self.net.setInput(blob)
-        preds = self.net.forward()
-        return preds
-
-
-    # extract bounding box, class IDs and confidences of detected objects
-    # YOLOv5 returns a 3D tensor of dimension 25200*(5 + n_classes)
-    def wrap_detection(self, input_image, output_data):
-        class_ids = []
-        confidences = []
-        boxes = []
-
-        rows = output_data.shape[0]
-
-        image_width, image_height, _ = input_image.shape
-
-        x_factor = image_width / self.INPUT_WIDTH
-        y_factor =  image_height / self.INPUT_HEIGHT
-
-        # Iterate through all the 25200 vectors
-        for r in range(rows):
-            row = output_data[r]
-
-            # Continue only if Pc > conf_threshold
-            confidence = row[4]
-            if confidence >= self.CONFIDENCE_THRESHOLD:
-                
-                # One-hot encoded vector representing class of object
-                classes_scores = row[5:]
-
-                # Returns min and max values in a array alongwith their indices
-                _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
-
-                # Extract the column index of the maximum values in classes_scores
-                class_id = max_indx[1]
-
-                # Continue of the class score is greater than a threshold
-                # class_score represents the probability of an object belonging to that class
-                if (classes_scores[class_id] > .25):
-
-                    confidences.append(confidence)
-
-                    class_ids.append(class_id)
-
-                    x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() 
-                    left = int((x - 0.5 * w) * x_factor)
-                    top = int((y - 0.5 * h) * y_factor)
-                    width = int(w * x_factor)
-                    height = int(h * y_factor)
-                    box = np.array([left, top, width, height])
-                    boxes.append(box)
-
-        # removing intersecting bounding boxes
-        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45) 
-
-        result_class_ids = []
-        result_confidences = []
-        result_boxes = []
-
-        for i in indexes:
-            result_confidences.append(confidences[i])
-            result_class_ids.append(class_ids[i])
-            result_boxes.append(boxes[i])
-
-        return result_class_ids, result_confidences, result_boxes
-
-
-    # makes image square with dimension max(h, w)
-    def format_yolov5(self):
-        row, col, _ = self.frame.shape
-        _max = max(col, row)
-        result = np.zeros((_max, _max, 3), np.uint8)
-        result[0:row, 0:col] = self.frame
-        return result
-
-
     def get_predictions(self, cv_image):
-        #Clear list
-        self.predictions = []
 
-        if cv_image is None:
+        if cv_image is None :
             # TODO: show warning message (different color, maybe)
-            return None,None
-        else:
+            return None, None
+        
+        else :
             self.frame = cv_image
+            class_id = []
+            confidence = []
+            boxes = []
 
-            # make image square
-            inputImage = self.format_yolov5()
+            results = self.model(self.frame)
+            
+            for *xyxy, conf, id in results.xyxy[0]:
+                class_id.append(int(id))
+                confidence.append(conf.item())
+                boxes.append([int(xy) for xy in xyxy])
 
-            outs = self.detect(inputImage)
-            class_ids, confidences, boxes = self.wrap_detection(inputImage, outs[0])
+            super().create_predictions_list(class_id, confidence, boxes)
 
-            super().create_predictions_list(class_ids, confidences, boxes)  
-            
-            return self.predictions
\ No newline at end of file
+            return self.predictions
diff --git a/object_detection/object_detection/Detectors/YOLOv8.py b/object_detection/object_detection/Detectors/YOLOv8.py
index e1f7d59..269bb5e 100755
--- a/object_detection/object_detection/Detectors/YOLOv8.py
+++ b/object_detection/object_detection/Detectors/YOLOv8.py
@@ -5,51 +5,51 @@
 
 
 class YOLOv8(DetectorBase):
-  def __init__(self, conf_threshold = 0.7):
-    
-    super().__init__()
-    
-    self.conf_threshold = conf_threshold
-
-  def build_model(self, model_dir_path, weight_file_name) :
-
-    try :
-      model_path = os.path.join(model_dir_path, weight_file_name)
-      self.model = YOLO(model_path)
+    def __init__(self, conf_threshold = 0.7):
+       
+        super().__init__()
+       
+        self.conf_threshold = conf_threshold
+  
+    def build_model(self, model_dir_path, weight_file_name) :
+  
+        try :
+            model_path = os.path.join(model_dir_path, weight_file_name)
+            self.model = YOLO(model_path)
+      
+        except :
+            raise Exception("Error loading given model from path: {}. Maybe the file doesn't exist?".format(model_path))
     
-    except :
-      raise Exception("Error loading given model from path: {}. Maybe the file doesn't exist?".format(model_path))
   
+    def load_classes(self, model_dir_path):
+  
+        self.class_list = []
+  
+        with open(model_dir_path + "/classes.txt", "r") as f:
+            self.class_list = [cname.strip() for cname in f.readlines()]
+  
+        return self.class_list
+  
+    def get_predictions(self, cv_image):
+      
+        if cv_image is None:
+            # TODO: show warning message (different color, maybe)
+            return None
+      
+        else :     
+            self.frame = cv_image   
+            class_id = []
+            confidence = []
+            boxes = []
 
-  def load_classes(self, model_dir_path):
-
-    self.class_list = []
-
-    with open(model_dir_path + "/classes.txt", "r") as f:
-        self.class_list = [cname.strip() for cname in f.readlines()]
-
-    return self.class_list
-
-  def get_predictions(self, cv_image):
-    
-    if cv_image is None:
-      # TODO: show warning message (different color, maybe)
-      return None,None
-    
-    else :     
-      self.frame = cv_image   
-      class_id = []
-      confidence = []
-      boxes = []
+            result = self.model.predict(self.frame, conf = self.conf_threshold, verbose = False) # Perform object detection on image
+            row = result[0].boxes.cpu()
 
-      result = self.model.predict(self.frame, conf = self.conf_threshold, verbose = False) # Perform object detection on image
-      row = result[0].boxes.cpu()
+            for box in row:
+                class_id.append(box.cls.numpy().tolist()[0])
+                confidence.append(box.conf.numpy().tolist()[0])
+                boxes.append(box.xyxy.numpy().tolist()[0])
 
-      for box in row:
-        class_id.append(box.cls.numpy().tolist()[0])
-        confidence.append(box.conf.numpy().tolist()[0])
-        boxes.append(box.xyxy.numpy().tolist()[0])
+            super().create_predictions_list(class_id, confidence, boxes)
 
-      super().create_predictions_list(class_id, confidence, boxes)
-      
-      return self.predictions
\ No newline at end of file
+            return self.predictions
\ No newline at end of file