redhecker Gongjunzhe12210401 commited on
Commit
e4da79f
·
1 Parent(s): bd48bb3

C++ Demo for person_reid_youtureid (#277)

Browse files

* add demo.cpp

* add CMakeLists.txt

* Update README.md

* turn standard to c++11

---------

Co-authored-by: Gongjunzhe12210401 <[email protected]>

models/person_reid_youtureid/CMakeLists.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.24.0)
2
+ project(opencv_zoo_person_reid_youtureid)
3
+
4
+ set(OPENCV_VERSION "4.10.0")
5
+ set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
6
+
7
+ # Find OpenCV
8
+ find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
9
+
10
+ add_executable(demo demo.cpp)
11
+ target_link_libraries(demo ${OpenCV_LIBS})
models/person_reid_youtureid/README.md CHANGED
@@ -10,6 +10,7 @@ This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/openc
10
 
11
  Run the following command to try the demo:
12
 
 
13
  ```shell
14
  python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery -v
15
 
@@ -17,6 +18,18 @@ python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery -v
17
  python demo.py --help
18
  ```
19
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ### License
21
 
22
  All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
 
10
 
11
  Run the following command to try the demo:
12
 
13
+ ### Python
14
  ```shell
15
  python demo.py --query_dir /path/to/query --gallery_dir /path/to/gallery -v
16
 
 
18
  python demo.py --help
19
  ```
20
 
21
+ ### C++
22
+ ```shell
23
+ # A typical and default installation path of OpenCV is /usr/local
24
+ cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
25
+ cmake --build build
26
+
27
+ ./build/demo --query_dir=/path/to/query --gallery_dir=/path/to/gallery -v
28
+
29
+ # get help regarding various parameters
30
+ ./build/demo --help
31
+ ```
32
+
33
  ### License
34
 
35
  All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
models/person_reid_youtureid/demo.cpp ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <opencv2/opencv.hpp>
2
+ #include "opencv2/dnn.hpp"
3
+ #include <iostream>
4
+ #include <vector>
5
+ #include <map>
6
+ #include <string>
7
+ #include <numeric>
8
+
9
+
10
+ // YoutuReID class for person re-identification
11
+ class YoutuReID {
12
+ public:
13
+ YoutuReID(const std::string& model_path,
14
+ const cv::Size& input_size = cv::Size(128, 256),
15
+ int output_dim = 768,
16
+ const cv::Scalar& mean = cv::Scalar(0.485, 0.456, 0.406),
17
+ const cv::Scalar& std = cv::Scalar(0.229, 0.224, 0.225),
18
+ int backend_id = 0,
19
+ int target_id = 0)
20
+ : model_path_(model_path), input_size_(input_size),
21
+ output_dim_(output_dim), mean_(mean), std_(std),
22
+ backend_id_(backend_id), target_id_(target_id)
23
+ {
24
+
25
+ model_ = cv::dnn::readNet(model_path_);
26
+ model_.setPreferableBackend(backend_id_);
27
+ model_.setPreferableTarget(target_id_);
28
+ }
29
+
30
+ void setBackendAndTarget(int backend_id, int target_id) {
31
+ backend_id_ = backend_id;
32
+ target_id_ = target_id;
33
+ model_.setPreferableBackend(backend_id_);
34
+ model_.setPreferableTarget(target_id_);
35
+ }
36
+
37
+ void setInputSize(const cv::Size& input_size) {
38
+ input_size_ = input_size;
39
+ }
40
+
41
+ // Preprocess image by resizing, normalizing, and creating a blob
42
+ cv::Mat preprocess(const cv::Mat& image) {
43
+ cv::Mat img;
44
+ cv::cvtColor(image, img, cv::COLOR_BGR2RGB);
45
+ img.convertTo(img, CV_32F, 1.0 / 255.0);
46
+
47
+ // Normalize each channel separately
48
+ std::vector<cv::Mat> channels(3);
49
+ cv::split(img, channels);
50
+ channels[0] = (channels[0] - mean_[0]) / std_[0];
51
+ channels[1] = (channels[1] - mean_[1]) / std_[1];
52
+ channels[2] = (channels[2] - mean_[2]) / std_[2];
53
+ cv::merge(channels, img);
54
+
55
+ return cv::dnn::blobFromImage(img);
56
+ }
57
+
58
+ // Run inference to extract feature vector
59
+ cv::Mat infer(const cv::Mat& image) {
60
+ cv::Mat input_blob = preprocess(image);
61
+ model_.setInput(input_blob);
62
+ cv::Mat features = model_.forward();
63
+
64
+ if (features.dims == 4 && features.size[2] == 1 && features.size[3] == 1) {
65
+ features = features.reshape(1, {1, features.size[1]});
66
+ }
67
+
68
+ return features;
69
+ }
70
+
71
+ // Perform query, comparing each query image to each gallery image
72
+ std::vector<std::vector<int>> query(const std::vector<cv::Mat>& query_img_list,
73
+ const std::vector<cv::Mat>& gallery_img_list,
74
+ int topK = 5) {
75
+ std::vector<cv::Mat> query_features_list, gallery_features_list;
76
+ cv::Mat query_features, gallery_features;
77
+
78
+ for (size_t i = 0; i < query_img_list.size(); ++i) {
79
+ cv::Mat feature = infer(query_img_list[i]);
80
+ query_features_list.push_back(feature.clone());
81
+ }
82
+ cv::vconcat(query_features_list, query_features);
83
+ normalizeFeatures(query_features);
84
+
85
+ for (size_t i = 0; i < gallery_img_list.size(); ++i) {
86
+ cv::Mat feature = infer(gallery_img_list[i]);
87
+ gallery_features_list.push_back(feature.clone());
88
+ }
89
+ cv::vconcat(gallery_features_list, gallery_features);
90
+ normalizeFeatures(gallery_features);
91
+
92
+ cv::Mat dist = query_features * gallery_features.t();
93
+ return getTopK(dist, topK);
94
+ }
95
+
96
+ private:
97
+ // Normalize feature vectors row-wise to unit length
98
+ void normalizeFeatures(cv::Mat& features) {
99
+ const float epsilon = 1e-6;
100
+ for (int i = 0; i < features.rows; ++i) {
101
+ cv::Mat featureRow = features.row(i);
102
+ float norm = cv::norm(featureRow, cv::NORM_L2);
103
+ if (norm < epsilon) {
104
+ norm = epsilon;
105
+ }
106
+ featureRow /= norm;
107
+ }
108
+ }
109
+
110
+ // Retrieve Top-K indices from similarity matrix
111
+ std::vector<std::vector<int>> getTopK(const cv::Mat& dist, int topK) {
112
+ std::vector<std::vector<int>> indices(dist.rows);
113
+
114
+ for (int i = 0; i < dist.rows; ++i) {
115
+ std::vector<std::pair<float, int>> sim_index_pairs;
116
+ for (int j = 0; j < dist.cols; ++j) {
117
+ sim_index_pairs.emplace_back(dist.at<float>(i, j), j);
118
+ }
119
+ std::sort(sim_index_pairs.begin(), sim_index_pairs.end(),
120
+ [](const std::pair<float, int>& a, const std::pair<float, int>& b) {
121
+ return a.first > b.first;
122
+ });
123
+
124
+ for (int k = 0; k < topK && k < sim_index_pairs.size(); ++k) {
125
+ indices[i].push_back(sim_index_pairs[k].second);
126
+ }
127
+ }
128
+ return indices;
129
+ }
130
+
131
+ std::string model_path_;
132
+ cv::Size input_size_;
133
+ int output_dim_;
134
+ cv::Scalar mean_, std_;
135
+ int backend_id_;
136
+ int target_id_;
137
+ cv::dnn::Net model_;
138
+ };
139
+
140
+ // Read images from directory and return a pair of image list and file list
141
+ std::pair<std::vector<cv::Mat>, std::vector<std::string>> readImagesFromDirectory(const std::string& img_dir, int w = 128, int h = 256) {
142
+ std::vector<cv::Mat> img_list;
143
+ std::vector<std::string> file_list;
144
+
145
+ std::vector<std::string> file_names;
146
+ cv::glob(img_dir + "/*", file_names, false);
147
+
148
+ for (size_t i = 0; i < file_names.size(); ++i) {
149
+ std::string file_name = file_names[i].substr(file_names[i].find_last_of("/\\") + 1);
150
+ cv::Mat img = cv::imread(file_names[i]);
151
+ if (!img.empty()) {
152
+ cv::resize(img, img, cv::Size(w, h));
153
+ img_list.push_back(img);
154
+ file_list.push_back(file_name);
155
+ }
156
+ }
157
+ return std::make_pair(img_list, file_list);
158
+ }
159
+
160
+ // Visualize query and gallery results by creating concatenated images
161
+ std::map<std::string, cv::Mat> visualize(
162
+ const std::map<std::string, std::vector<std::string>>& results,
163
+ const std::string& query_dir,
164
+ const std::string& gallery_dir,
165
+ const cv::Size& output_size = cv::Size(128, 384)) {
166
+
167
+ std::map<std::string, cv::Mat> results_vis;
168
+
169
+ for (std::map<std::string, std::vector<std::string>>::const_iterator it = results.begin(); it != results.end(); ++it) {
170
+ const std::string& query_file = it->first;
171
+ const std::vector<std::string>& top_matches = it->second;
172
+
173
+ cv::Mat query_img = cv::imread(query_dir + "/" + query_file);
174
+ if (query_img.empty()) continue;
175
+
176
+ cv::resize(query_img, query_img, output_size);
177
+ cv::copyMakeBorder(query_img, query_img, 5, 5, 5, 5,
178
+ cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
179
+ cv::putText(query_img, "Query", cv::Point(10, 30),
180
+ cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2);
181
+
182
+ cv::Mat concat_img = query_img;
183
+
184
+ for (size_t i = 0; i < top_matches.size(); ++i) {
185
+ cv::Mat gallery_img = cv::imread(gallery_dir + "/" + top_matches[i]);
186
+ if (gallery_img.empty()) continue;
187
+
188
+ cv::resize(gallery_img, gallery_img, output_size);
189
+ cv::copyMakeBorder(gallery_img, gallery_img, 5, 5, 5, 5,
190
+ cv::BORDER_CONSTANT, cv::Scalar(255, 255, 255));
191
+ cv::putText(gallery_img, "G" + std::to_string(i), cv::Point(10, 30),
192
+ cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 0), 2);
193
+
194
+ cv::hconcat(concat_img, gallery_img, concat_img);
195
+ }
196
+ results_vis[query_file] = concat_img;
197
+ }
198
+ return results_vis;
199
+ }
200
+
201
+ void printHelpMessage() {
202
+ std::cout << "usage: demo.cpp [-h] [--query_dir QUERY_DIR] [--gallery_dir GALLERY_DIR] "
203
+ << "[--backend_target BACKEND_TARGET] [--topk TOPK] [--model MODEL] [--save] [--vis]\n\n"
204
+ << "ReID baseline models from Tencent Youtu Lab\n\n"
205
+ << "optional arguments:\n"
206
+ << " -h, --help show this help message and exit\n"
207
+ << " --query_dir QUERY_DIR, -q QUERY_DIR\n"
208
+ << " Query directory.\n"
209
+ << " --gallery_dir GALLERY_DIR, -g GALLERY_DIR\n"
210
+ << " Gallery directory.\n"
211
+ << " --backend_target BACKEND_TARGET, -bt BACKEND_TARGET\n"
212
+ << " Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + "
213
+ "CPU, 1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU\n"
214
+ << " --topk TOPK Top-K closest from gallery for each query.\n"
215
+ << " --model MODEL, -m MODEL\n"
216
+ << " Path to the model.\n"
217
+ << " --save, -s Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in "
218
+ "case of camera input.\n"
219
+ << " --vis, -v Usage: Specify to open a new window to show results. Invalid in case of camera input.\n";
220
+ }
221
+
222
+ int main(int argc, char** argv) {
223
+ // CommandLineParser setup
224
+ cv::CommandLineParser parser(argc, argv,
225
+ "{help h | | Show help message.}"
226
+ "{query_dir q | | Query directory.}"
227
+ "{gallery_dir g | | Gallery directory.}"
228
+ "{backend_target bt | 0 | Choose one of the backend-target pair to run this demo: 0: (default) OpenCV implementation + CPU, "
229
+ "1: CUDA + GPU (CUDA), 2: CUDA + GPU (CUDA FP16), 3: TIM-VX + NPU, 4: CANN + NPU}"
230
+ "{topk k | 10 | Top-K closest from gallery for each query.}"
231
+ "{model m | person_reid_youtu_2021nov.onnx | Path to the model.}"
232
+ "{save s | false | Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.}"
233
+ "{vis v | false | Usage: Specify to open a new window to show results. Invalid in case of camera input.}");
234
+
235
+ if (parser.has("help")) {
236
+ printHelpMessage();
237
+ return 0;
238
+ }
239
+
240
+ std::string query_dir = parser.get<std::string>("query_dir");
241
+ std::string gallery_dir = parser.get<std::string>("gallery_dir");
242
+ int backend_target = parser.get<int>("backend_target");
243
+ int topK = parser.get<int>("topk");
244
+ std::string model_path = parser.get<std::string>("model");
245
+ bool save_flag = parser.get<bool>("save");
246
+ bool vis_flag = parser.get<bool>("vis");
247
+
248
+ if (!parser.check()) {
249
+ parser.printErrors();
250
+ return 1;
251
+ }
252
+
253
+ const std::vector<std::pair<int, int>> backend_target_pairs = {
254
+ {cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU},
255
+ {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA},
256
+ {cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16},
257
+ {cv::dnn::DNN_BACKEND_TIMVX, cv::dnn::DNN_TARGET_NPU},
258
+ {cv::dnn::DNN_BACKEND_CANN, cv::dnn::DNN_TARGET_NPU}
259
+ };
260
+
261
+ int backend_id = backend_target_pairs[backend_target].first;
262
+ int target_id = backend_target_pairs[backend_target].second;
263
+
264
+ YoutuReID reid(model_path, cv::Size(128, 256), 768,
265
+ cv::Scalar(0.485, 0.456, 0.406),
266
+ cv::Scalar(0.229, 0.224, 0.225),
267
+ backend_id, target_id);
268
+
269
+ std::pair<std::vector<cv::Mat>, std::vector<std::string>> query_data = readImagesFromDirectory(query_dir);
270
+ std::pair<std::vector<cv::Mat>, std::vector<std::string>> gallery_data = readImagesFromDirectory(gallery_dir);
271
+
272
+ std::vector<std::vector<int>> indices = reid.query(query_data.first, gallery_data.first, topK);
273
+
274
+ std::map<std::string, std::vector<std::string>> results;
275
+ for (size_t i = 0; i < query_data.second.size(); ++i) {
276
+ std::vector<std::string> top_matches;
277
+ for (int idx : indices[i]) {
278
+ top_matches.push_back(gallery_data.second[idx]);
279
+ }
280
+ results[query_data.second[i]] = top_matches;
281
+ std::cout << "Query: " << query_data.second[i] << "\n";
282
+ std::cout << "\tTop-" << topK << " from gallery: ";
283
+ for (size_t j = 0; j < top_matches.size(); ++j) {
284
+ std::cout << top_matches[j] << " ";
285
+ }
286
+ std::cout << std::endl;
287
+ }
288
+
289
+ std::map<std::string, cv::Mat> results_vis = visualize(results, query_dir, gallery_dir);
290
+
291
+ if (save_flag) {
292
+ for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) {
293
+ std::string save_path = "result-" + it->first;
294
+ cv::imwrite(save_path, it->second);
295
+ }
296
+ }
297
+
298
+ if (vis_flag) {
299
+ for (std::map<std::string, cv::Mat>::iterator it = results_vis.begin(); it != results_vis.end(); ++it) {
300
+ cv::namedWindow("result-" + it->first, cv::WINDOW_AUTOSIZE);
301
+ cv::imshow("result-" + it->first, it->second);
302
+ cv::waitKey(0);
303
+ cv::destroyAllWindows();
304
+ }
305
+ }
306
+
307
+ return 0;
308
+ }