benchmark: add vittrack benchmark results (#214)
Browse files* create vittrack.py for benchmark; modify demo.py to use VitTrack
* add benchmark config
* add benchmark results on some hardware
* update benchmark results on jetson, jetson orin, vim4
* update results on sunrise x3, 200i dk, rv1126
* update benchmark results on atlas 200 and maix
* improve doc; update results on cuda
* doc correction
- README.md +7 -6
- demo.py +38 -12
- vittrack.py +39 -0
README.md
CHANGED
@@ -1,18 +1,19 @@
|
|
1 |
# VIT tracker
|
2 |
|
3 |
-
VIT tracker(vision transformer tracker) is a much better model for
|
4 |
|
5 |
-
video
|
6 |
-
|
|
|
7 |
|
8 |
This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
|
9 |
|
10 |
-
**NOTE: OpenCV > 4.8.0
|
11 |
|
12 |
# Demo
|
13 |
|
14 |
```bash
|
15 |
-
#
|
16 |
python demo.py --input /path/to/video
|
17 |
|
18 |
# get help regarding various parameters
|
@@ -59,4 +60,4 @@ All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
|
|
59 |
|
60 |
OSTrack: https://github.com/botaoye/OSTrack
|
61 |
|
62 |
-
OpenCV Sample: https://github.com/opencv/opencv/blob/4.x/samples/dnn/vit_tracker.cpp
|
|
|
1 |
# VIT tracker
|
2 |
|
3 |
+
VIT tracker(vision transformer tracker) is a much better model for real-time object tracking. VIT tracker can achieve speeds exceeding nanotrack by 20% in single-threaded mode with ARM chip, and the advantage becomes even more pronounced in multi-threaded mode. In addition, on the dataset, vit tracker demonstrates better performance compared to nanotrack. Moreover, vit trackerprovides confidence values during the tracking process, which can be used to determine if the tracking is currently lost.
|
4 |
|
5 |
+
In target tracking tasks, the score is an important indicator that can indicate whether the current target is lost. In the video, vit tracker can track the target and display the current score in the upper left corner of the video. When the target is lost, the score drops significantly. While nanotrack will only return 0.9 score in any situation, so that we cannot determine whether the target is lost.
|
6 |
+
|
7 |
+
Video demo: https://youtu.be/MJiPnu1ZQRI
|
8 |
|
9 |
This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
|
10 |
|
11 |
+
**NOTE: OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
|
12 |
|
13 |
# Demo
|
14 |
|
15 |
```bash
|
16 |
+
# tracking on video
|
17 |
python demo.py --input /path/to/video
|
18 |
|
19 |
# get help regarding various parameters
|
|
|
60 |
|
61 |
OSTrack: https://github.com/botaoye/OSTrack
|
62 |
|
63 |
+
OpenCV Sample: https://github.com/opencv/opencv/blob/4.x/samples/dnn/vit_tracker.cpp
|
demo.py
CHANGED
@@ -1,17 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import numpy as np
|
2 |
import cv2 as cv
|
3 |
-
|
|
|
|
|
4 |
|
5 |
# Check OpenCV version
|
6 |
assert cv.__version__ > "4.8.0", \
|
7 |
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
parser = argparse.ArgumentParser(
|
10 |
description="VIT track opencv API")
|
11 |
parser.add_argument('--input', '-i', type=str,
|
12 |
help='Usage: Set path to the input video. Omit for using default camera.')
|
13 |
parser.add_argument('--model_path', type=str, default='object_tracking_vittrack_2023sep.onnx',
|
14 |
help='Usage: Set model path')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
args = parser.parse_args()
|
16 |
|
17 |
def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
|
@@ -35,16 +63,16 @@ def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),tex
|
|
35 |
return output
|
36 |
|
37 |
if __name__ == '__main__':
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
42 |
|
43 |
# Read from args.input
|
44 |
-
_input = args.input
|
45 |
-
if args.input is None:
|
46 |
-
device_id = 0
|
47 |
-
_input = device_id
|
48 |
video = cv.VideoCapture(_input)
|
49 |
|
50 |
# Select an object
|
@@ -75,11 +103,9 @@ if __name__ == '__main__':
|
|
75 |
break
|
76 |
# Inference
|
77 |
tm.start()
|
78 |
-
isLocated, bbox = model.
|
79 |
-
score = model.getTrackingScore()
|
80 |
tm.stop()
|
81 |
# Visualize
|
82 |
frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
|
83 |
-
cv.imshow('
|
84 |
tm.reset()
|
85 |
-
|
|
|
1 |
+
# This file is part of OpenCV Zoo project.
|
2 |
+
# It is subject to the license terms in the LICENSE file found in the same directory.
|
3 |
+
|
4 |
+
import argparse
|
5 |
+
|
6 |
import numpy as np
|
7 |
import cv2 as cv
|
8 |
+
|
9 |
+
|
10 |
+
from vittrack import VitTrack
|
11 |
|
12 |
# Check OpenCV version
|
13 |
assert cv.__version__ > "4.8.0", \
|
14 |
"Please install latest opencv-python to try this demo: python3 -m pip install --upgrade opencv-python"
|
15 |
|
16 |
+
# Valid combinations of backends and targets
|
17 |
+
backend_target_pairs = [
|
18 |
+
[cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
|
19 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
|
20 |
+
[cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
|
21 |
+
[cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
|
22 |
+
[cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU]
|
23 |
+
]
|
24 |
+
|
25 |
parser = argparse.ArgumentParser(
|
26 |
description="VIT track opencv API")
|
27 |
parser.add_argument('--input', '-i', type=str,
|
28 |
help='Usage: Set path to the input video. Omit for using default camera.')
|
29 |
parser.add_argument('--model_path', type=str, default='object_tracking_vittrack_2023sep.onnx',
|
30 |
help='Usage: Set model path')
|
31 |
+
parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
32 |
+
help='''Choose one of the backend-target pair to run this demo:
|
33 |
+
{:d}: (default) OpenCV implementation + CPU,
|
34 |
+
{:d}: CUDA + GPU (CUDA),
|
35 |
+
{:d}: CUDA + GPU (CUDA FP16),
|
36 |
+
{:d}: TIM-VX + NPU,
|
37 |
+
{:d}: CANN + NPU
|
38 |
+
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
39 |
+
parser.add_argument('--save', '-s', action='store_true',
|
40 |
+
help='Usage: Specify to save a file with results. Invalid in case of camera input.')
|
41 |
+
parser.add_argument('--vis', '-v', action='store_true',
|
42 |
+
help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
|
43 |
args = parser.parse_args()
|
44 |
|
45 |
def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
|
|
|
63 |
return output
|
64 |
|
65 |
if __name__ == '__main__':
|
66 |
+
backend_id = backend_target_pairs[args.backend_target][0]
|
67 |
+
target_id = backend_target_pairs[args.backend_target][1]
|
68 |
|
69 |
+
model = VitTrack(
|
70 |
+
model_path=args.model_path,
|
71 |
+
backend_id=backend_id,
|
72 |
+
target_id=target_id)
|
73 |
|
74 |
# Read from args.input
|
75 |
+
_input = 0 if args.input is None else args.input
|
|
|
|
|
|
|
76 |
video = cv.VideoCapture(_input)
|
77 |
|
78 |
# Select an object
|
|
|
103 |
break
|
104 |
# Inference
|
105 |
tm.start()
|
106 |
+
isLocated, bbox, score = model.infer(frame)
|
|
|
107 |
tm.stop()
|
108 |
# Visualize
|
109 |
frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
|
110 |
+
cv.imshow('VitTrack Demo', frame)
|
111 |
tm.reset()
|
|
vittrack.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file is part of OpenCV Zoo project.
|
2 |
+
# It is subject to the license terms in the LICENSE file found in the same directory.
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import cv2 as cv
|
6 |
+
|
7 |
+
class VitTrack:
|
8 |
+
def __init__(self, model_path, backend_id=0, target_id=0):
|
9 |
+
self.model_path = model_path
|
10 |
+
self.backend_id = backend_id
|
11 |
+
self.target_id = target_id
|
12 |
+
|
13 |
+
self.params = cv.TrackerVit_Params()
|
14 |
+
self.params.net = self.model_path
|
15 |
+
self.params.backend = self.backend_id
|
16 |
+
self.params.target = self.target_id
|
17 |
+
|
18 |
+
self.model = cv.TrackerVit_create(self.params)
|
19 |
+
|
20 |
+
@property
|
21 |
+
def name(self):
|
22 |
+
return self.__class__.__name__
|
23 |
+
|
24 |
+
def setBackendAndTarget(self, backend_id, target_id):
|
25 |
+
self.backend_id = backend_id
|
26 |
+
self.target_id = target_id
|
27 |
+
|
28 |
+
self.params.backend = self.backend_id
|
29 |
+
self.params.target = self.target_id
|
30 |
+
|
31 |
+
self.model = cv.TrackerVit_create(self.params)
|
32 |
+
|
33 |
+
def init(self, image, roi):
|
34 |
+
self.model.init(image, roi)
|
35 |
+
|
36 |
+
def infer(self, image):
|
37 |
+
is_located, bbox = self.model.update(image)
|
38 |
+
score = self.model.getTrackingScore()
|
39 |
+
return is_located, bbox, score
|