benchmark: add vittrack benchmark results (#214)

* create vittrack.py for benchmark; modify demo.py to use VitTrack

* add benchmark config

* add benchmark results on some hardware

* update benchmark results on jetson, jetson orin, vim4

* update results on sunrise x3, 200i dk, rv1126

* update benchmark results on atlas 200 and maix

* improve doc; update results on cuda

* doc correction

Files changed (3) hide show

README.md +7 -6
demo.py +38 -12
vittrack.py +39 -0

README.md CHANGED Viewed

@@ -1,18 +1,19 @@
 # VIT tracker
-VIT tracker(vision transformer tracker) is a much better model for  real-time object tracking. VIT tracker can achieve speeds exceeding  nanotrack by 20% in single-threaded mode with ARM chip, and the  advantage becomes even more pronounced in multi-threaded mode. In  addition, on the dataset, vit tracker demonstrates better performance  compared to nanotrack. Moreover, vit trackerprovides confidence values  during the tracking process, which can be used to determine if the  tracking is currently lost.
-video demo: https://youtu.be/MJiPnu1ZQRI
- In target tracking tasks, the score is an important indicator that can  indicate whether the current target is lost. In the video, vit tracker  can track the target and display the current score in the upper left  corner of the video. When the target is lost, the score drops  significantly. While nanotrack will only return 0.9 score in any  situation, so that we cannot determine whether the target is lost.
 This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
-**NOTE: OpenCV > 4.8.0**
 # Demo
 ```bash
-#  tracking on video
 python demo.py --input /path/to/video
 # get help regarding various parameters
@@ -59,4 +60,4 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
 OSTrack: https://github.com/botaoye/OSTrack
-OpenCV Sample: https://github.com/opencv/opencv/blob/4.x/samples/dnn/vit_tracker.cpp

 # VIT tracker
+VIT tracker(vision transformer tracker) is a much better model for real-time object tracking. VIT tracker can achieve speeds exceeding nanotrack by 20% in single-threaded mode with ARM chip, and the advantage becomes even more pronounced in multi-threaded mode. In addition, on the dataset, vit tracker demonstrates better performance compared to nanotrack. Moreover, vit trackerprovides confidence values during the tracking process, which can be used to determine if the tracking is currently lost.
+In target tracking tasks, the score is an important indicator that can indicate whether the current target is lost. In the video, vit tracker can track the target and display the current score in the upper left corner of the video. When the target is lost, the score drops significantly. While nanotrack will only return 0.9 score in any situation, so that we cannot determine whether the target is lost.
+Video demo: https://youtu.be/MJiPnu1ZQRI
 This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
+**NOTE: OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
 # Demo
 ```bash
+# tracking on video
 python demo.py --input /path/to/video
 # get help regarding various parameters
 OSTrack: https://github.com/botaoye/OSTrack
+OpenCV Sample: https://github.com/opencv/opencv/blob/4.x/samples/dnn/vit_tracker.cpp

demo.py CHANGED Viewed

@@ -1,17 +1,45 @@
 import numpy as np
 import cv2 as cv
-import argparse
 # Check OpenCV version
 assert cv.__version__ > "4.8.0", \
        "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
 parser = argparse.ArgumentParser(
     description="VIT track opencv API")
 parser.add_argument('--input', '-i', type=str,
                     help='Usage: Set path to the input video. Omit for using default camera.')
 parser.add_argument('--model_path', type=str, default='object_tracking_vittrack_2023sep.onnx',
                     help='Usage: Set model path')
 args = parser.parse_args()
 def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
@@ -35,16 +63,16 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex
     return output
 if __name__ == '__main__':
-    params = cv.TrackerVit_Params()
-    params.net = args.model_path
-    model = cv.TrackerVit_create(params)
     # Read from args.input
-    _input = args.input
-    if args.input is None:
-        device_id = 0
-        _input = device_id
     video = cv.VideoCapture(_input)
     # Select an object
@@ -75,11 +103,9 @@ if __name__ == '__main__':
             break
         # Inference
         tm.start()
-        isLocated, bbox = model.update(frame)
-        score = model.getTrackingScore()
         tm.stop()
         # Visualize
         frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
-        cv.imshow('vittrack Demo', frame)
         tm.reset()

+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+import argparse
 import numpy as np
 import cv2 as cv
+from vittrack import VitTrack
 # Check OpenCV version
 assert cv.__version__ > "4.8.0", \
        "Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
 parser = argparse.ArgumentParser(
     description="VIT track opencv API")
 parser.add_argument('--input', '-i', type=str,
                     help='Usage: Set path to the input video. Omit for using default camera.')
 parser.add_argument('--model_path', type=str, default='object_tracking_vittrack_2023sep.onnx',
                     help='Usage: Set model path')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save a file with results. Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
 args = parser.parse_args()
 def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
     return output
 if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
+    model = VitTrack(
+        model_path=args.model_path,
+        backend_id=backend_id,
+        target_id=target_id)
     # Read from args.input
+    _input = 0 if args.input is None else args.input
     video = cv.VideoCapture(_input)
     # Select an object
             break
         # Inference
         tm.start()
+        isLocated, bbox, score = model.infer(frame)
         tm.stop()
         # Visualize
         frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
+        cv.imshow('VitTrack Demo', frame)
         tm.reset()

vittrack.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+import numpy as np
+import cv2 as cv
+class VitTrack:
+    def __init__(self, model_path, backend_id=0, target_id=0):
+        self.model_path = model_path
+        self.backend_id = backend_id
+        self.target_id = target_id
+        self.params = cv.TrackerVit_Params()
+        self.params.net = self.model_path
+        self.params.backend = self.backend_id
+        self.params.target = self.target_id
+        self.model = cv.TrackerVit_create(self.params)
+    @property
+    def name(self):
+        return self.__class__.__name__
+    def setBackendAndTarget(self, backend_id, target_id):
+        self.backend_id = backend_id
+        self.target_id = target_id
+        self.params.backend = self.backend_id
+        self.params.target = self.target_id
+        self.model = cv.TrackerVit_create(self.params)
+    def init(self, image, roi):
+        self.model.init(image, roi)
+    def infer(self, image):
+        is_located, bbox = self.model.update(image)
+        score = self.model.getTrackingScore()
+        return is_located, bbox, score