Ryan Lee commited on
Commit
be1656e
·
1 Parent(s): 1528081

C++ Demo - Object Tracking (VitTrack) (#240)

Browse files

* Preliminary attempt at C++ demo.

* Update README documentation

* Fixed text spacing and removed unused arguments. Cleaned up to not use tuple.

* Update offsets to match C++ to prevent overlapping text

* Add help functionality

* Add using namespace for standalone C++ demo file for readability.

* Update formatting and add save/visualization functionality

* More formatting changes

models/object_tracking_vittrack/CMakeLists.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.24)
2
+ set(project_name "opencv_zoo_object_tracking_vittrack")
3
+
4
+ PROJECT (${project_name})
5
+
6
+ set(OPENCV_VERSION "4.9.0")
7
+ set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
8
+ find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
9
+ # Find OpenCV, you may need to set OpenCV_DIR variable
10
+ # to the absolute path to the directory containing OpenCVConfig.cmake file
11
+ # via the command line or GUI
12
+
13
+ file(GLOB SourceFile
14
+ "demo.cpp")
15
+ # If the package has been found, several variables will
16
+ # be set, you can find the full list with descriptions
17
+ # in the OpenCVConfig.cmake file.
18
+ # Print some message showing some of them
19
+ message(STATUS "OpenCV library status:")
20
+ message(STATUS " config: ${OpenCV_DIR}")
21
+ message(STATUS " version: ${OpenCV_VERSION}")
22
+ message(STATUS " libraries: ${OpenCV_LIBS}")
23
+ message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
24
+
25
+ # Declare the executable target built from your sources
26
+ add_executable(${project_name} ${SourceFile})
27
+
28
+ # Set C++ compilation standard to C++11
29
+ set(CMAKE_CXX_STANDARD 11)
30
+
31
+ # Link your application with OpenCV libraries
32
+ target_link_libraries(${project_name} PRIVATE ${OpenCV_LIBS})
models/object_tracking_vittrack/README.md CHANGED
@@ -11,14 +11,34 @@ This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC
11
  **NOTE: OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
12
 
13
  # Demo
14
-
15
  ```bash
 
 
 
16
  # tracking on video
17
  python demo.py --input /path/to/video
18
 
19
  # get help regarding various parameters
20
  python demo.py --help
21
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Example outputs
24
 
 
11
  **NOTE: OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
12
 
13
  # Demo
14
+ ## Python
15
  ```bash
16
+ # tracking on camera input
17
+ python demo.py
18
+
19
  # tracking on video
20
  python demo.py --input /path/to/video
21
 
22
  # get help regarding various parameters
23
  python demo.py --help
24
  ```
25
+ ## C++
26
+ Install latest OpenCV and CMake >= 3.24.0 to get started.
27
+
28
+ ```shell
29
+ # A typical and default installation path of OpenCV is /usr/local
30
+ cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
31
+ cmake --build build
32
+
33
+ # tracking on camera input
34
+ ./build/opencv_zoo_object_tracking_vittrack
35
+
36
+ # tracking on video
37
+ ./build/opencv_zoo_object_tracking_vittrack -i=/path/to/video
38
+
39
+ # get help messages
40
+ ./build/opencv_zoo_object_tracking_vittrack -h
41
+ ```
42
 
43
  # Example outputs
44
 
models/object_tracking_vittrack/demo.cpp ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <iostream>
2
+ #include <opencv2/opencv.hpp>
3
+
4
+ using namespace std;
5
+ using namespace cv;
6
+ using namespace dnn;
7
+
8
+ struct TrackingResult
9
+ {
10
+ bool isLocated;
11
+ Rect bbox;
12
+ float score;
13
+ };
14
+
15
+ class VitTrack
16
+ {
17
+ public:
18
+
19
+ VitTrack(const string& model_path, int backend_id = 0, int target_id = 0)
20
+ {
21
+ params.net = model_path;
22
+ params.backend = backend_id;
23
+ params.target = target_id;
24
+ model = TrackerVit::create(params);
25
+ }
26
+
27
+ void init(const Mat& image, const Rect& roi)
28
+ {
29
+ model->init(image, roi);
30
+ }
31
+
32
+ TrackingResult infer(const Mat& image)
33
+ {
34
+ TrackingResult result;
35
+ result.isLocated = model->update(image, result.bbox);
36
+ result.score = model->getTrackingScore();
37
+ return result;
38
+ }
39
+
40
+ private:
41
+ TrackerVit::Params params;
42
+ Ptr<TrackerVit> model;
43
+ };
44
+
45
+ Mat visualize(const Mat& image, const Rect& bbox, float score, bool isLocated, double fps = -1.0,
46
+ const Scalar& box_color = Scalar(0, 255, 0), const Scalar& text_color = Scalar(0, 255, 0),
47
+ double fontScale = 1.0, int fontSize = 1)
48
+ {
49
+ Mat output = image.clone();
50
+ int h = output.rows;
51
+ int w = output.cols;
52
+
53
+ if (fps >= 0)
54
+ {
55
+ putText(output, "FPS: " + to_string(fps), Point(0, 30), FONT_HERSHEY_DUPLEX, fontScale, text_color, fontSize);
56
+ }
57
+
58
+ if (isLocated && score >= 0.3)
59
+ {
60
+ rectangle(output, bbox, box_color, 2);
61
+ putText(output, format("%.2f", score), Point(bbox.x, bbox.y + 25),
62
+ FONT_HERSHEY_DUPLEX, fontScale, text_color, fontSize);
63
+ }
64
+ else
65
+ {
66
+ Size text_size = getTextSize("Target lost!", FONT_HERSHEY_DUPLEX, fontScale, fontSize, nullptr);
67
+ int text_x = (w - text_size.width) / 2;
68
+ int text_y = (h - text_size.height) / 2;
69
+ putText(output, "Target lost!", Point(text_x, text_y), FONT_HERSHEY_DUPLEX, fontScale, Scalar(0, 0, 255), fontSize);
70
+ }
71
+
72
+ return output;
73
+ }
74
+
75
+ int main(int argc, char** argv)
76
+ {
77
+ CommandLineParser parser(argc, argv,
78
+ "{help h | | Print help message. }"
79
+ "{input i | |Set path to the input video. Omit for using default camera.}"
80
+ "{model_path |object_tracking_vittrack_2023sep.onnx |Set model path}"
81
+ "{backend_target bt |0 |Choose backend-target pair: 0 - OpenCV implementation + CPU, 1 - CUDA + GPU (CUDA), 2 - CUDA + GPU (CUDA FP16), 3 - TIM-VX + NPU, 4 - CANN + NPU}"
82
+ "{save s |false |Specify to save a file with results.}"
83
+ "{vis v |true |Specify to open a new window to show results.}");
84
+ if (parser.has("help"))
85
+ {
86
+ parser.printMessage();
87
+ return 0;
88
+ }
89
+
90
+ string input = parser.get<string>("input");
91
+ string model_path = parser.get<string>("model_path");
92
+ int backend_target = parser.get<int>("backend_target");
93
+ bool save = parser.get<bool>("save");
94
+ bool vis = parser.get<bool>("vis");
95
+
96
+ vector<vector<int>> backend_target_pairs =
97
+ {
98
+ {DNN_BACKEND_OPENCV, DNN_TARGET_CPU},
99
+ {DNN_BACKEND_CUDA, DNN_TARGET_CUDA},
100
+ {DNN_BACKEND_CUDA, DNN_TARGET_CUDA_FP16},
101
+ {DNN_BACKEND_TIMVX, DNN_TARGET_NPU},
102
+ {DNN_BACKEND_CANN, DNN_TARGET_NPU}
103
+ };
104
+
105
+ int backend_id = backend_target_pairs[backend_target][0];
106
+ int target_id = backend_target_pairs[backend_target][1];
107
+
108
+ // Create VitTrack tracker
109
+ VitTrack tracker(model_path, backend_id, target_id);
110
+
111
+ // Open video capture
112
+ VideoCapture video;
113
+ if (input.empty())
114
+ {
115
+ video.open(0); // Default camera
116
+ }
117
+ else
118
+ {
119
+ video.open(input);
120
+ }
121
+
122
+ if (!video.isOpened())
123
+ {
124
+ cerr << "Error: Could not open video source" << endl;
125
+ return -1;
126
+ }
127
+
128
+ // Select an object
129
+ Mat first_frame;
130
+ video >> first_frame;
131
+
132
+ if (first_frame.empty())
133
+ {
134
+ cerr << "No frames grabbed!" << endl;
135
+ return -1;
136
+ }
137
+
138
+ Mat first_frame_copy = first_frame.clone();
139
+ putText(first_frame_copy, "1. Drag a bounding box to track.", Point(0, 25), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0));
140
+ putText(first_frame_copy, "2. Press ENTER to confirm", Point(0, 50), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0));
141
+ Rect roi = selectROI("VitTrack Demo", first_frame_copy);
142
+
143
+ if (roi.area() == 0)
144
+ {
145
+ cerr << "No ROI is selected! Exiting..." << endl;
146
+ return -1;
147
+ }
148
+ else
149
+ {
150
+ cout << "Selected ROI: " << roi << endl;
151
+ }
152
+
153
+ // Create VideoWriter if save option is specified
154
+ VideoWriter output_video;
155
+ if (save)
156
+ {
157
+ Size frame_size = first_frame.size();
158
+ output_video.open("output.mp4", VideoWriter::fourcc('m', 'p', '4', 'v'), video.get(CAP_PROP_FPS), frame_size);
159
+ if (!output_video.isOpened())
160
+ {
161
+ cerr << "Error: Could not create output video stream" << endl;
162
+ return -1;
163
+ }
164
+ }
165
+
166
+ // Initialize tracker with ROI
167
+ tracker.init(first_frame, roi);
168
+
169
+ // Track frame by frame
170
+ TickMeter tm;
171
+ while (waitKey(1) < 0)
172
+ {
173
+ video >> first_frame;
174
+ if (first_frame.empty())
175
+ {
176
+ cout << "End of video" << endl;
177
+ break;
178
+ }
179
+
180
+ // Inference
181
+ tm.start();
182
+ TrackingResult result = tracker.infer(first_frame);
183
+ tm.stop();
184
+
185
+ // Visualize
186
+ Mat frame = first_frame.clone();
187
+ frame = visualize(frame, result.bbox, result.score, result.isLocated, tm.getFPS());
188
+
189
+ if (save)
190
+ {
191
+ output_video.write(frame);
192
+ }
193
+
194
+ if (vis)
195
+ {
196
+ imshow("VitTrack Demo", frame);
197
+ }
198
+ tm.reset();
199
+ }
200
+
201
+ if (save)
202
+ {
203
+ output_video.release();
204
+ }
205
+
206
+ video.release();
207
+ destroyAllWindows();
208
+
209
+ return 0;
210
+ }
models/object_tracking_vittrack/demo.py CHANGED
@@ -35,12 +35,11 @@ parser.add_argument('--backend_target', '-bt', type=int, default=0,
35
  {:d}: TIM-VX + NPU,
36
  {:d}: CANN + NPU
37
  '''.format(*[x for x in range(len(backend_target_pairs))]))
38
- parser.add_argument('--save', '-s', action='store_true',
39
- help='Usage: Specify to save a file with results. Invalid in case of camera input.')
40
- parser.add_argument('--vis', '-v', action='store_true',
41
- help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
42
  args = parser.parse_args()
43
-
44
  def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
45
  output = image.copy()
46
  h, w, _ = output.shape
@@ -80,16 +79,21 @@ if __name__ == '__main__':
80
  print('No frames grabbed!')
81
  exit()
82
  first_frame_copy = first_frame.copy()
83
- cv.putText(first_frame_copy, "1. Drag a bounding box to track.", (0, 15), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
84
- cv.putText(first_frame_copy, "2. Press ENTER to confirm", (0, 35), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
85
- roi = cv.selectROI('vitTrack Demo', first_frame_copy)
86
 
87
  if np.all(np.array(roi) == 0):
88
- print("No roi is selected! Exiting ...")
89
  exit()
90
  else:
91
  print("Selected ROI: {}".format(roi))
92
 
 
 
 
 
 
93
  # Init tracker with ROI
94
  model.init(first_frame, roi)
95
 
@@ -106,5 +110,15 @@ if __name__ == '__main__':
106
  tm.stop()
107
  # Visualize
108
  frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
109
- cv.imshow('VitTrack Demo', frame)
 
 
 
 
110
  tm.reset()
 
 
 
 
 
 
 
35
  {:d}: TIM-VX + NPU,
36
  {:d}: CANN + NPU
37
  '''.format(*[x for x in range(len(backend_target_pairs))]))
38
+ parser.add_argument('--save', '-s', action='store_true', default=False,
39
+ help='Usage: Specify to save a file with results.')
40
+ parser.add_argument('--vis', '-v', action='store_true', default=True,
41
+ help='Usage: Specify to open a new window to show results.')
42
  args = parser.parse_args()
 
43
  def visualize(image, bbox, score, isLocated, fps=None, box_color=(0, 255, 0),text_color=(0, 255, 0), fontScale = 1, fontSize = 1):
44
  output = image.copy()
45
  h, w, _ = output.shape
 
79
  print('No frames grabbed!')
80
  exit()
81
  first_frame_copy = first_frame.copy()
82
+ cv.putText(first_frame_copy, "1. Drag a bounding box to track.", (0, 25), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
83
+ cv.putText(first_frame_copy, "2. Press ENTER to confirm", (0, 50), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0))
84
+ roi = cv.selectROI('VitTrack Demo', first_frame_copy)
85
 
86
  if np.all(np.array(roi) == 0):
87
+ print("No ROI is selected! Exiting ...")
88
  exit()
89
  else:
90
  print("Selected ROI: {}".format(roi))
91
 
92
+ if args.save:
93
+ fps = video.get(cv.CAP_PROP_FPS)
94
+ frame_size = (first_frame.shape[1], first_frame.shape[0])
95
+ output_video = cv.VideoWriter('output.mp4', cv.VideoWriter_fourcc(*'mp4v'), fps, frame_size)
96
+
97
  # Init tracker with ROI
98
  model.init(first_frame, roi)
99
 
 
110
  tm.stop()
111
  # Visualize
112
  frame = visualize(frame, bbox, score, isLocated, fps=tm.getFPS())
113
+ if args.save:
114
+ output_video.write(frame)
115
+
116
+ if args.vis:
117
+ cv.imshow('VitTrack Demo', frame)
118
  tm.reset()
119
+
120
+ if args.save:
121
+ output_video.release()
122
+
123
+ video.release()
124
+ cv.destroyAllWindows()