Ryan Lee
commited on
Commit
·
be1656e
1
Parent(s):
1528081
C++ Demo - Object Tracking (VitTrack) (#240)
Browse files* Preliminary attempt at C++ demo.
* Update README documentation
* Fixed text spacing and removed unused arguments. Cleaned up to not use tuple.
* Update offsets to match C++ to prevent overlapping text
* Add help functionality
* Add using namespace for standalone C++ demo file for readability.
* Update formatting and add save/visualization functionality
* More formatting changes
models/object_tracking_vittrack/CMakeLists.txt
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cmake_minimum_required(VERSION 3.24)
|
2 |
+
set(project_name "opencv_zoo_object_tracking_vittrack")
|
3 |
+
|
4 |
+
PROJECT (${project_name})
|
5 |
+
|
6 |
+
set(OPENCV_VERSION "4.9.0")
|
7 |
+
set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
|
8 |
+
find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
|
9 |
+
# Find OpenCV, you may need to set OpenCV_DIR variable
|
10 |
+
# to the absolute path to the directory containing OpenCVConfig.cmake file
|
11 |
+
# via the command line or GUI
|
12 |
+
|
13 |
+
file(GLOB SourceFile
|
14 |
+
"demo.cpp")
|
15 |
+
# If the package has been found, several variables will
|
16 |
+
# be set, you can find the full list with descriptions
|
17 |
+
# in the OpenCVConfig.cmake file.
|
18 |
+
# Print some message showing some of them
|
19 |
+
message(STATUS "OpenCV library status:")
|
20 |
+
message(STATUS " config: ${OpenCV_DIR}")
|
21 |
+
message(STATUS " version: ${OpenCV_VERSION}")
|
22 |
+
message(STATUS " libraries: ${OpenCV_LIBS}")
|
23 |
+
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
|
24 |
+
|
25 |
+
# Declare the executable target built from your sources
|
26 |
+
add_executable(${project_name} ${SourceFile})
|
27 |
+
|
28 |
+
# Set C++ compilation standard to C++11
|
29 |
+
set(CMAKE_CXX_STANDARD 11)
|
30 |
+
|
31 |
+
# Link your application with OpenCV libraries
|
32 |
+
target_link_libraries(${project_name} PRIVATE ${OpenCV_LIBS})
|
models/object_tracking_vittrack/README.md
CHANGED
@@ -11,14 +11,34 @@ This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC
|
|
11 |
**NOTE: OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
|
12 |
|
13 |
# Demo
|
14 |
-
|
15 |
```bash
|
|
|
|
|
|
|
16 |
# tracking on video
|
17 |
python demo.py --input /path/to/video
|
18 |
|
19 |
# get help regarding various parameters
|
20 |
python demo.py --help
|
21 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Example outputs
|
24 |
|
|
|
11 |
**NOTE: OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
|
12 |
|
13 |
# Demo
|
14 |
+
## Python
|
15 |
```bash
|
16 |
+
# tracking on camera input
|
17 |
+
python demo.py
|
18 |
+
|
19 |
# tracking on video
|
20 |
python demo.py --input /path/to/video
|
21 |
|
22 |
# get help regarding various parameters
|
23 |
python demo.py --help
|
24 |
```
|
25 |
+
## C++
|
26 |
+
Install latest OpenCV and CMake >= 3.24.0 to get started.
|
27 |
+
|
28 |
+
```shell
|
29 |
+
# A typical and default installation path of OpenCV is /usr/local
|
30 |
+
cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
|
31 |
+
cmake --build build
|
32 |
+
|
33 |
+
# tracking on camera input
|
34 |
+
./build/opencv_zoo_object_tracking_vittrack
|
35 |
+
|
36 |
+
# tracking on video
|
37 |
+
./build/opencv_zoo_object_tracking_vittrack -i=/path/to/video
|
38 |
+
|
39 |
+
# get help messages
|
40 |
+
./build/opencv_zoo_object_tracking_vittrack -h
|
41 |
+
```
|
42 |
|
43 |
# Example outputs
|
44 |
|
models/object_tracking_vittrack/demo.cpp
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#include <iostream>
|
2 |
+
#include <opencv2/opencv.hpp>
|
3 |
+
|
4 |
+
using namespace std;
|
5 |
+
using namespace cv;
|
6 |
+
using namespace dnn;
|
7 |
+
|
8 |
+
struct TrackingResult
|
9 |
+
{
|
10 |
+
bool isLocated;
|
11 |
+
Rect bbox;
|
12 |
+
float score;
|
13 |
+
};
|
14 |
+
|
15 |
+
class VitTrack
|
16 |
+
{
|
17 |
+
public:
|
18 |
+
|
19 |
+
VitTrack(const string& model_path, int backend_id = 0, int target_id = 0)
|
20 |
+
{
|
21 |
+
params.net = model_path;
|
22 |
+
params.backend = backend_id;
|
23 |
+
params.target = target_id;
|
24 |
+
model = TrackerVit::create(params);
|
25 |
+
}
|
26 |
+
|
27 |
+
void init(const Mat& image, const Rect& roi)
|
28 |
+
{
|
29 |
+
model->init(image, roi);
|
30 |
+
}
|
31 |
+
|
32 |
+
TrackingResult infer(const Mat& image)
|
33 |
+
{
|
34 |
+
TrackingResult result;
|
35 |
+
result.isLocated = model->update(image, result.bbox);
|
36 |
+
result.score = model->getTrackingScore();
|
37 |
+
return result;
|
38 |
+
}
|
39 |
+
|
40 |
+
private:
|
41 |
+
TrackerVit::Params params;
|
42 |
+
Ptr<TrackerVit> model;
|
43 |
+
};
|
44 |
+
|
45 |
+
Mat visualize(const Mat& image, const Rect& bbox, float score, bool isLocated, double fps = -1.0,
|
46 |
+
const Scalar& box_color = Scalar(0, 255, 0), const Scalar& text_color = Scalar(0, 255, 0),
|
47 |
+
double fontScale = 1.0, int fontSize = 1)
|
48 |
+
{
|
49 |
+
Mat output = image.clone();
|
50 |
+
int h = output.rows;
|
51 |
+
int w = output.cols;
|
52 |
+
|
53 |
+
if (fps >= 0)
|
54 |
+
{
|
55 |
+
putText(output, "FPS: " + to_string(fps), Point(0, 30), FONT_HERSHEY_DUPLEX, fontScale, text_color, fontSize);
|
56 |
+
}
|
57 |
+
|
58 |
+
if (isLocated && score >= 0.3)
|
59 |
+
{
|
60 |
+
rectangle(output, bbox, box_color, 2);
|
61 |
+
putText(output, format("%.2f", score), Point(bbox.x, bbox.y + 25),
|
62 |
+
FONT_HERSHEY_DUPLEX, fontScale, text_color, fontSize);
|
63 |
+
}
|
64 |
+
else
|
65 |
+
{
|
66 |
+
Size text_size = getTextSize("Target lost!", FONT_HERSHEY_DUPLEX, fontScale, fontSize, nullptr);
|
67 |
+
int text_x = (w - text_size.width) / 2;
|
68 |
+
int text_y = (h - text_size.height) / 2;
|
69 |
+
putText(output, "Target lost!", Point(text_x, text_y), FONT_HERSHEY_DUPLEX, fontScale, Scalar(0, 0, 255), fontSize);
|
70 |
+
}
|
71 |
+
|
72 |
+
return output;
|
73 |
+
}
|
74 |
+
|
75 |
+
int main(int argc, char** argv)
|
76 |
+
{
|
77 |
+
CommandLineParser parser(argc, argv,
|
78 |
+
"{help h | | Print help message. }"
|
79 |
+
"{input i | |Set path to the input video. Omit for using default camera.}"
|
80 |
+
"{model_path |object_tracking_vittrack_2023sep.onnx |Set model path}"
|
81 |
+
"{backend_target bt |0 |Choose backend-target pair: 0 - OpenCV implementation + CPU, 1 - CUDA + GPU (CUDA), 2 - CUDA + GPU (CUDA FP16), 3 - TIM-VX + NPU, 4 - CANN + NPU}"
|
82 |
+
"{save s |false |Specify to save a file with results.}"
|
83 |
+
"{vis v |true |Specify to open a new window to show results.}");
|
84 |
+
if (parser.has("help"))
|
85 |
+
{
|
86 |
+
parser.printMessage();
|
87 |
+
return 0;
|
88 |
+
}
|
89 |
+
|
90 |
+
string input = parser.get<string>("input");
|
91 |
+
string model_path = parser.get<string>("model_path");
|
92 |
+
int backend_target = parser.get<int>("backend_target");
|
93 |
+
bool save = parser.get<bool>("save");
|
94 |
+
bool vis = parser.get<bool>("vis");
|
95 |
+
|
96 |
+
vector<vector<int>> backend_target_pairs =
|
97 |
+
{
|
98 |
+
{DNN_BACKEND_OPENCV, DNN_TARGET_CPU},
|
99 |
+
{DNN_BACKEND_CUDA, DNN_TARGET_CUDA},
|
100 |
+
{DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16},
|
101 |
+
{DNN_BACKEND_TIMVX, DNN_TARGET_NPU},
|
102 |
+
{DNN_BACKEND_CANN, DNN_TARGET_NPU}
|
103 |
+
};
|
104 |
+
|
105 |
+
int backend_id = backend_target_pairs[backend_target][0];
|
106 |
+
int target_id = backend_target_pairs[backend_target][1];
|
107 |
+
|
108 |
+
// Create VitTrack tracker
|
109 |
+
VitTrack tracker(model_path, backend_id, target_id);
|
110 |
+
|
111 |
+
// Open video capture
|
112 |
+
VideoCapture video;
|
113 |
+
if (input.empty())
|
114 |
+
{
|
115 |
+
video.open(0); // Default camera
|
116 |
+
}
|
117 |
+
else
|
118 |
+
{
|
119 |
+
video.open(input);
|
120 |
+
}
|
121 |
+
|
122 |
+
if (!video.isOpened())
|
123 |
+
{
|
124 |
+
cerr << "Error: Could not open video source" << endl;
|
125 |
+
return -1;
|
126 |
+
}
|
127 |
+
|
128 |
+
// Select an object
|
129 |
+
Mat first_frame;
|
130 |
+
video >> first_frame;
|
131 |
+
|
132 |
+
if (first_frame.empty())
|
133 |
+
{
|
134 |
+
cerr << "No frames grabbed!" << endl;
|
135 |
+
return -1;
|
136 |
+
}
|
137 |
+
|
138 |
+
Mat first_frame_copy = first_frame.clone();
|
139 |
+
putText(first_frame_copy, "1. Drag a bounding box to track.", Point(0, 25), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0));
|
140 |
+
putText(first_frame_copy, "2. Press ENTER to confirm", Point(0, 50), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0));
|
141 |
+
Rect roi = selectROI("VitTrack Demo", first_frame_copy);
|
142 |
+
|
143 |
+
if (roi.area() == 0)
|
144 |
+
{
|
145 |
+
cerr << "No ROI is selected! Exiting..." << endl;
|
146 |
+
return -1;
|
147 |
+
}
|
148 |
+
else
|
149 |
+
{
|
150 |
+
cout << "Selected ROI: " << roi << endl;
|
151 |
+
}
|
152 |
+
|
153 |
+
// Create VideoWriter if save option is specified
|
154 |
+
VideoWriter output_video;
|
155 |
+
if (save)
|
156 |
+
{
|
157 |
+
Size frame_size = first_frame.size();
|
158 |
+
output_video.open("output.mp4", VideoWriter::fourcc('m', 'p', '4', 'v'), video.get(CAP_PROP_FPS), frame_size);
|
159 |
+
if (!output_video.isOpened())
|
160 |
+
{
|
161 |
+
cerr << "Error: Could not create output video stream" << endl;
|
162 |
+
return -1;
|
163 |
+
}
|
164 |
+
}
|
165 |
+
|
166 |
+
// Initialize tracker with ROI
|
167 |
+
tracker.init(first_frame, roi);
|
168 |
+
|
169 |
+
// Track frame by frame
|
170 |
+
TickMeter tm;
|
171 |
+
while (waitKey(1) < 0)
|
172 |
+
{
|
173 |
+
video >> first_frame;
|
174 |
+
if (first_frame.empty())
|
175 |
+
{
|
176 |
+
cout << "End of video" << endl;
|
177 |
+
break;
|
178 |
+
}
|
179 |
+
|
180 |
+
// Inference
|
181 |
+
tm.start();
|
182 |
+
TrackingResult result = tracker.infer(first_frame);
|
183 |
+
tm.stop();
|
184 |
+
|
185 |
+
// Visualize
|
186 |
+
Mat frame = first_frame.clone();
|
187 |
+
frame = visualize(frame, result.bbox, result.score, result.isLocated, tm.getFPS());
|
188 |
+
|
189 |
+
if (save)
|
190 |
+
{
|
191 |
+
output_video.write(frame);
|
192 |
+
}
|
193 |
+
|
194 |
+
if (vis)
|
195 |
+
{
|
196 |
+
imshow("VitTrack Demo", frame);
|
197 |
+
}
|
198 |
+
tm.reset();
|
199 |
+
}
|
200 |
+
|
201 |
+
if (save)
|
202 |
+
{
|
203 |
+
output_video.release();
|
204 |
+
}
|
205 |
+
|
206 |
+
video.release();
|
207 |
+
destroyAllWindows();
|
208 |
+
|
209 |
+
return 0;
|
210 |
+
}
|
models/object_tracking_vittrack/demo.py
CHANGED
@@ -35,12 +35,11 @@ parser.add_argument('--backend_target', '-bt', type=int, default=0,
|
|
35 |
{:d}: TIM-VX + NPU,
|
36 |
{:d}: CANN + NPU
|
37 |
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
38 |
-
parser.add_argument('--save', '-s', action='store_true',
|
39 |
-
help='Usage: Specify to save a file with results.
|
40 |
-
parser.add_argument('--vis', '-v', action='store_true',
|
41 |
-
help='Usage: Specify to open a new window to show results.
|
42 |
args = parser.parse_args()
|
43 |
-
|
44 |
def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
|
45 |
output = image.copy()
|
46 |
h, w, _ = output.shape
|
@@ -80,16 +79,21 @@ if __name__ == '__main__':
|
|
80 |
print('No frames grabbed!')
|
81 |
exit()
|
82 |
first_frame_copy = first_frame.copy()
|
83 |
-
cv.putText(first_frame_copy, "1. Drag a bounding box to track.", (0,
|
84 |
-
cv.putText(first_frame_copy, "2. Press ENTER to confirm", (0,
|
85 |
-
roi = cv.selectROI('
|
86 |
|
87 |
if np.all(np.array(roi) == 0):
|
88 |
-
print("No
|
89 |
exit()
|
90 |
else:
|
91 |
print("Selected ROI: {}".format(roi))
|
92 |
|
|
|
|
|
|
|
|
|
|
|
93 |
# Init tracker with ROI
|
94 |
model.init(first_frame, roi)
|
95 |
|
@@ -106,5 +110,15 @@ if __name__ == '__main__':
|
|
106 |
tm.stop()
|
107 |
# Visualize
|
108 |
frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
|
109 |
-
|
|
|
|
|
|
|
|
|
110 |
tm.reset()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
{:d}: TIM-VX + NPU,
|
36 |
{:d}: CANN + NPU
|
37 |
'''.format(*[x for x in range(len(backend_target_pairs))]))
|
38 |
+
parser.add_argument('--save', '-s', action='store_true', default=False,
|
39 |
+
help='Usage: Specify to save a file with results.')
|
40 |
+
parser.add_argument('--vis', '-v', action='store_true', default=True,
|
41 |
+
help='Usage: Specify to open a new window to show results.')
|
42 |
args = parser.parse_args()
|
|
|
43 |
def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
|
44 |
output = image.copy()
|
45 |
h, w, _ = output.shape
|
|
|
79 |
print('No frames grabbed!')
|
80 |
exit()
|
81 |
first_frame_copy = first_frame.copy()
|
82 |
+
cv.putText(first_frame_copy, "1. Drag a bounding box to track.", (0, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
|
83 |
+
cv.putText(first_frame_copy, "2. Press ENTER to confirm", (0, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
|
84 |
+
roi = cv.selectROI('VitTrack Demo', first_frame_copy)
|
85 |
|
86 |
if np.all(np.array(roi) == 0):
|
87 |
+
print("No ROI is selected! Exiting ...")
|
88 |
exit()
|
89 |
else:
|
90 |
print("Selected ROI: {}".format(roi))
|
91 |
|
92 |
+
if args.save:
|
93 |
+
fps = video.get(cv.CAP_PROP_FPS)
|
94 |
+
frame_size = (first_frame.shape[1], first_frame.shape[0])
|
95 |
+
output_video = cv.VideoWriter('output.mp4', cv.VideoWriter_fourcc(*'mp4v'), fps, frame_size)
|
96 |
+
|
97 |
# Init tracker with ROI
|
98 |
model.init(first_frame, roi)
|
99 |
|
|
|
110 |
tm.stop()
|
111 |
# Visualize
|
112 |
frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
|
113 |
+
if args.save:
|
114 |
+
output_video.write(frame)
|
115 |
+
|
116 |
+
if args.vis:
|
117 |
+
cv.imshow('VitTrack Demo', frame)
|
118 |
tm.reset()
|
119 |
+
|
120 |
+
if args.save:
|
121 |
+
output_video.release()
|
122 |
+
|
123 |
+
video.release()
|
124 |
+
cv.destroyAllWindows()
|