Yuantao Feng commited on
Commit
00c0329
·
1 Parent(s): bf7b7bf

Improve benchmark configurations (#2)

Browse files

* Improve benchmark configurations:

* Move data downloading from configs to download_data.py. Add an alternative download link.

* Add Data class to operate data loading and indexing.

* Add Metric class to operate benchmark runs.

* Benchmark results are now the median or geometric mean of benchmark
runs.

benchmark/README.md CHANGED
@@ -10,6 +10,10 @@ Time is measured from data preprocess (resize is excluded), to a forward pass of
10
 
11
  1. Install `python >= 3.6`.
12
  2. Install dependencies: `pip install -r requirements.txt`.
 
 
 
 
13
 
14
  ## Benchmarking
15
 
 
10
 
11
  1. Install `python >= 3.6`.
12
  2. Install dependencies: `pip install -r requirements.txt`.
13
+ 3. Download data for benchmarking.
14
+ 1. Download all data: `python download_data.py`
15
+ 2. Download one or more specified data: `python download_data.py face text`. Available names can be found in `download_data.py`.
16
+ 3. If download fails, you can download all data from https://pan.baidu.com/s/18sV8D4vXUb2xC9EG45k7bg (code: pvrw). Please place and extract data packages under [./data](./data).
17
 
18
  ## Benchmarking
19
 
benchmark/benchmark.py CHANGED
@@ -7,7 +7,6 @@ import numpy as np
7
  import cv2 as cv
8
 
9
  from models import MODELS
10
- from download import Downloader
11
 
12
  parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
13
  parser.add_argument('--cfg', '-c', type=str,
@@ -15,11 +14,11 @@ parser.add_argument('--cfg', '-c', type=str,
15
  args = parser.parse_args()
16
 
17
  class Timer:
18
- def __init__(self):
 
 
19
  self._tm = cv.TickMeter()
20
-
21
  self._time_record = []
22
- self._average_time = 0
23
  self._calls = 0
24
 
25
  def start(self):
@@ -29,22 +28,121 @@ class Timer:
29
  self._tm.stop()
30
  self._calls += 1
31
  self._time_record.append(self._tm.getTimeMilli())
32
- self._average_time = sum(self._time_record) / self._calls
33
  self._tm.reset()
34
 
35
  def reset(self):
36
  self._time_record = []
37
- self._average_time = 0
38
  self._calls = 0
39
 
40
- def getAverageTime(self):
41
- return self._average_time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  class Benchmark:
45
  def __init__(self, **kwargs):
46
- self._fileList = kwargs.pop('fileList', None)
47
- assert self._fileList, 'fileList cannot be empty'
 
 
 
 
48
 
49
  backend_id = kwargs.pop('backend', 'default')
50
  available_backends = dict(
@@ -71,76 +169,22 @@ class Benchmark:
71
  )
72
  self._target = available_targets[target_id]
73
 
74
- self._sizes = kwargs.pop('sizes', None)
75
- self._repeat = kwargs.pop('repeat', 100)
76
- self._parentPath = kwargs.pop('parentPath', 'benchmark/data')
77
- self._useGroundTruth = kwargs.pop('useDetectionLabel', False) # If it is enable, 'sizes' will not work
78
- assert (self._sizes and not self._useGroundTruth) or (not self._sizes and self._useGroundTruth), 'If \'useDetectionLabel\' is True, \'sizes\' should not exist.'
79
-
80
- self._timer = Timer()
81
- self._benchmark_results = dict.fromkeys(self._fileList, dict())
82
-
83
- if self._useGroundTruth:
84
- self.loadLabel()
85
-
86
- def loadLabel(self):
87
- self._labels = dict.fromkeys(self._fileList, None)
88
- for imgName in self._fileList:
89
- self._labels[imgName] = np.loadtxt(os.path.join(self._parentPath, '{}.txt'.format(imgName[:-4])))
90
 
91
  def run(self, model):
92
  model.setBackend(self._backend)
93
  model.setTarget(self._target)
94
 
95
- for imgName in self._fileList:
96
- img = cv.imread(os.path.join(self._parentPath, imgName))
97
- if self._useGroundTruth:
98
- for idx, gt in enumerate(self._labels[imgName]):
99
- self._benchmark_results[imgName]['gt{}'.format(idx)] = self._run(
100
- model,
101
- img,
102
- gt,
103
- pbar_msg=' {}, gt{}'.format(imgName, idx)
104
- )
105
- else:
106
- if self._sizes is None:
107
- h, w, _ = img.shape
108
- model.setInputSize([w, h])
109
- self._benchmark_results[imgName][str([w, h])] = self._run(
110
- model,
111
- img,
112
- pbar_msg=' {}, original size {}'.format(imgName, str([w, h]))
113
- )
114
- else:
115
- for size in self._sizes:
116
- imgResized = cv.resize(img, size)
117
- model.setInputSize(size)
118
- self._benchmark_results[imgName][str(size)] = self._run(
119
- model,
120
- imgResized,
121
- pbar_msg=' {}, size {}'.format(imgName, str(size))
122
- )
123
 
124
  def printResults(self):
125
- print(' Results:')
126
  for imgName, results in self._benchmark_results.items():
127
- print(' image: {}'.format(imgName))
128
  total_latency = 0
129
  for key, latency in results.items():
130
  total_latency += latency
131
- print(' {}, latency: {:.4f} ms'.format(key, latency))
132
- print(' Average latency: {:.4f} ms'.format(total_latency / len(results)))
133
-
134
- def _run(self, model, *args, **kwargs):
135
- self._timer.reset()
136
- pbar = tqdm.tqdm(range(self._repeat))
137
- for _ in pbar:
138
- pbar.set_description(kwargs.get('pbar_msg', None))
139
-
140
- self._timer.start()
141
- results = model.infer(*args)
142
- self._timer.stop()
143
- return self._timer.getAverageTime()
144
 
145
 
146
  def build_from_cfg(cfg, registery):
@@ -160,16 +204,9 @@ if __name__ == '__main__':
160
  cfg = yaml.safe_load(f)
161
 
162
  # prepend PYTHONPATH to each path
163
- prepend_pythonpath(cfg, key1='Data', key2='parentPath')
164
- prepend_pythonpath(cfg, key1='Benchmark', key2='parentPath')
165
  prepend_pythonpath(cfg, key1='Model', key2='modelPath')
166
 
167
-
168
- # Download data if not exist
169
- print('Loading data:')
170
- downloader = Downloader(**cfg['Data'])
171
- downloader.get()
172
-
173
  # Instantiate benchmarking
174
  benchmark = Benchmark(**cfg['Benchmark'])
175
 
 
7
  import cv2 as cv
8
 
9
  from models import MODELS
 
10
 
11
  parser = argparse.ArgumentParser("Benchmarks for OpenCV Zoo.")
12
  parser.add_argument('--cfg', '-c', type=str,
 
14
  args = parser.parse_args()
15
 
16
  class Timer:
17
+ def __init__(self, warmup=0, reduction='median'):
18
+ self._warmup = warmup
19
+ self._reduction = reduction
20
  self._tm = cv.TickMeter()
 
21
  self._time_record = []
 
22
  self._calls = 0
23
 
24
  def start(self):
 
28
  self._tm.stop()
29
  self._calls += 1
30
  self._time_record.append(self._tm.getTimeMilli())
 
31
  self._tm.reset()
32
 
33
  def reset(self):
34
  self._time_record = []
 
35
  self._calls = 0
36
 
37
+ def getResult(self):
38
+ if self._reduction == 'median':
39
+ return self._getMedian(self._time_record[self._warmup:])
40
+ elif self._reduction == 'gmean':
41
+ return self._getGMean(self._time_record[self._warmup:])
42
+ else:
43
+ raise NotImplementedError()
44
+
45
+ def _getMedian(self, records):
46
+ ''' Return median time
47
+ '''
48
+ l = len(records)
49
+ mid = int(l / 2)
50
+ if l % 2 == 0:
51
+ return (records[mid] + records[mid - 1]) / 2
52
+ else:
53
+ return records[mid]
54
+
55
+ def _getGMean(self, records, drop_largest=3):
56
+ ''' Return geometric mean of time
57
+ '''
58
+ time_record_sorted = sorted(records, reverse=True)
59
+ return sum(records[drop_largest:]) / (self._calls - drop_largest)
60
+
61
+ class Data:
62
+ def __init__(self, **kwargs):
63
+ self._path = kwargs.pop('path', None)
64
+ assert self._path, 'Benchmark[\'data\'][\'path\'] cannot be empty.'
65
+
66
+ self._files = kwargs.pop('files', None)
67
+ if not self._files:
68
+ print('Benchmark[\'data\'][\'files\'] is empty, loading all images by default.')
69
+ self._files = list()
70
+ for filename in os.listdir(self._path):
71
+ if filename.endswith('jpg') or filename.endswith('png'):
72
+ self._files.append(filename)
73
+
74
+ self._use_label = kwargs.pop('useLabel', False)
75
+ if self._use_label:
76
+ self._labels = self._load_label()
77
+
78
+ def _load_label(self):
79
+ labels = dict.fromkeys(self._files, None)
80
+ for filename in self._files:
81
+ labels[filename] = np.loadtxt(os.path.join(self._path, '{}.txt'.format(filename[:-4])))
82
+ return labels
83
+
84
+ def __getitem__(self, idx):
85
+ image = cv.imread(os.path.join(self._path, self._files[idx]))
86
+ if self._use_label:
87
+ return self._files[idx], image, self._labels[self._files[idx]]
88
+ else:
89
+ return self._files[idx], image
90
+
91
+ class Metric:
92
+ def __init__(self, **kwargs):
93
+ self._sizes = kwargs.pop('sizes', None)
94
+ self._warmup = kwargs.pop('warmup', 3)
95
+ self._repeat = kwargs.pop('repeat', 10)
96
+ assert self._warmup < self._repeat, 'The value of warmup must be smaller than the value of repeat.'
97
+ self._batch_size = kwargs.pop('batchSize', 1)
98
+ self._reduction = kwargs.pop('reduction', 'median')
99
+
100
+ self._timer = Timer(self._warmup, self._reduction)
101
 
102
+ def getReduction(self):
103
+ return self._reduction
104
+
105
+ def forward(self, model, *args, **kwargs):
106
+ img = args[0]
107
+ h, w, _ = img.shape
108
+ if not self._sizes:
109
+ self._sizes = [[w, h]]
110
+
111
+ results = dict()
112
+ self._timer.reset()
113
+ if len(args) == 1:
114
+ for size in self._sizes:
115
+ img_r = cv.resize(img, size)
116
+ model.setInputSize(size)
117
+ # TODO: batched inference
118
+ # input_data = [img] * self._batch_size
119
+ input_data = img_r
120
+ for _ in range(self._repeat+self._warmup):
121
+ self._timer.start()
122
+ model.infer(input_data)
123
+ self._timer.stop()
124
+ results[str(size)] = self._timer.getResult()
125
+ else:
126
+ # TODO: batched inference
127
+ # input_data = [args] * self._batch_size
128
+ bboxes = args[1]
129
+ for idx, bbox in enumerate(bboxes):
130
+ for _ in range(self._repeat+self._warmup):
131
+ self._timer.start()
132
+ model.infer(img, bbox)
133
+ self._timer.stop()
134
+ results['bbox{}'.format(idx)] = self._timer.getResult()
135
+
136
+ return results
137
 
138
  class Benchmark:
139
  def __init__(self, **kwargs):
140
+ self._data_dict = kwargs.pop('data', None)
141
+ assert self._data_dict, 'Benchmark[\'data\'] cannot be empty and must have path and files.'
142
+ self._data = Data(**self._data_dict)
143
+
144
+ self._metric_dict = kwargs.pop('metric', None)
145
+ self._metric = Metric(**self._metric_dict)
146
 
147
  backend_id = kwargs.pop('backend', 'default')
148
  available_backends = dict(
 
169
  )
170
  self._target = available_targets[target_id]
171
 
172
+ self._benchmark_results = dict()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  def run(self, model):
175
  model.setBackend(self._backend)
176
  model.setTarget(self._target)
177
 
178
+ for data in self._data:
179
+ self._benchmark_results[data[0]] = self._metric.forward(model, *data[1:])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  def printResults(self):
 
182
  for imgName, results in self._benchmark_results.items():
183
+ print(' image: {}'.format(imgName))
184
  total_latency = 0
185
  for key, latency in results.items():
186
  total_latency += latency
187
+ print(' {}, latency ({}): {:.4f} ms'.format(key, self._metric.getReduction(), latency))
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
 
190
  def build_from_cfg(cfg, registery):
 
204
  cfg = yaml.safe_load(f)
205
 
206
  # prepend PYTHONPATH to each path
207
+ prepend_pythonpath(cfg['Benchmark'], key1='data', key2='path')
 
208
  prepend_pythonpath(cfg, key1='Model', key2='modelPath')
209
 
 
 
 
 
 
 
210
  # Instantiate benchmarking
211
  benchmark = Benchmark(**cfg['Benchmark'])
212
 
benchmark/config/face_detection_yunet.yaml CHANGED
@@ -1,23 +1,18 @@
1
- Data:
2
- name: "Images for Face Detection"
3
- url: "https://drive.google.com/u/0/uc?id=1lOAliAIeOv4olM65YDzE55kn6XjiX2l6&export=download"
4
- sha: "0ba67a9cfd60f7fdb65cdb7c55a1ce76c1193df1"
5
- filename: "face_detection.zip"
6
- parentPath: "benchmark/data"
7
-
8
  Benchmark:
9
  name: "Face Detection Benchmark"
10
- parentPath: "benchmark/data/face_detection"
11
- fileList:
12
- - "group.jpg"
13
- - "concerts.jpg"
14
- - "dance.jpg"
 
 
 
 
 
 
15
  backend: "default"
16
  target: "cpu"
17
- sizes: # [w, h], Omit to run at original scale
18
- - [160, 120]
19
- - [640, 480]
20
- repeat: 100 # default 100
21
 
22
  Model:
23
  name: "YuNet"
 
 
 
 
 
 
 
 
1
  Benchmark:
2
  name: "Face Detection Benchmark"
3
+ data:
4
+ path: "benchmark/data/face"
5
+ files: ["group.jpg", "concerts.jpg", "dance.jpg"]
6
+ metric:
7
+ sizes: # [[w1, h1], ...], Omit to run at original scale
8
+ - [160, 120]
9
+ - [640, 480]
10
+ warmup: 3
11
+ repeat: 10
12
+ batchSize: 1
13
+ reduction: 'median'
14
  backend: "default"
15
  target: "cpu"
 
 
 
 
16
 
17
  Model:
18
  name: "YuNet"
benchmark/config/text_detection_db.yaml CHANGED
@@ -1,22 +1,17 @@
1
- Data:
2
- name: "Images for Text Detection"
3
- url: "https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download"
4
- sha: "a40cf095ceb77159ddd2a5902f3b4329696dd866"
5
- filename: "text.zip"
6
- parentPath: "benchmark/data"
7
-
8
  Benchmark:
9
  name: "Text Detection Benchmark"
10
- parentPath: "benchmark/data/text"
11
- fileList:
12
- - "1.jpg"
13
- - "2.jpg"
14
- - "3.jpg"
 
 
 
 
 
15
  backend: "default"
16
  target: "cpu"
17
- sizes: # [w, h], default original scale
18
- - [640, 480]
19
- repeat: 100
20
 
21
  Model:
22
  name: "DB"
 
 
 
 
 
 
 
 
1
  Benchmark:
2
  name: "Text Detection Benchmark"
3
+ data:
4
+ path: "benchmark/data/text"
5
+ files: ["1.jpg", "2.jpg", "3.jpg"]
6
+ metric:
7
+ sizes: # [[w1, h1], ...], Omit to run at original scale
8
+ - [640, 480]
9
+ warmup: 3
10
+ repeat: 10
11
+ batchSize: 1
12
+ reduction: 'median'
13
  backend: "default"
14
  target: "cpu"
 
 
 
15
 
16
  Model:
17
  name: "DB"
benchmark/config/text_recognition_crnn.yaml CHANGED
@@ -1,21 +1,16 @@
1
- Data:
2
- name: "Images for Text Detection"
3
- url: "https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download"
4
- sha: "a40cf095ceb77159ddd2a5902f3b4329696dd866"
5
- filename: "text.zip"
6
- parentPath: "benchmark/data"
7
-
8
  Benchmark:
9
  name: "Text Recognition Benchmark"
10
- parentPath: "benchmark/data/text"
11
- fileList:
12
- - "1.jpg"
13
- - "2.jpg"
14
- - "3.jpg"
 
 
 
 
15
  backend: "default"
16
  target: "cpu"
17
- useDetectionLabel: True
18
- repeat: 100
19
 
20
  Model:
21
  name: "CRNN"
 
 
 
 
 
 
 
 
1
  Benchmark:
2
  name: "Text Recognition Benchmark"
3
+ data:
4
+ path: "benchmark/data/text"
5
+ files: ["1.jpg", "2.jpg", "3.jpg"]
6
+ useLabel: True
7
+ metric: # 'sizes' is omitted since this model requires input of fixed size
8
+ warmup: 3
9
+ repeat: 10
10
+ batchSize: 1
11
+ reduction: 'median'
12
  backend: "default"
13
  target: "cpu"
 
 
14
 
15
  Model:
16
  name: "CRNN"
benchmark/{download.py → download_data.py} RENAMED
@@ -32,7 +32,7 @@ class Downloader:
32
  if c in d:
33
  return int(d[c]) / self.MB
34
  return '<unknown>'
35
- print(' {} {} [{} Mb]'.format(r.getcode(), r.msg, getMB(r)))
36
 
37
  def verifyHash(self):
38
  if not self._sha:
@@ -46,44 +46,45 @@ class Downloader:
46
  break
47
  sha.update(buf)
48
  if self._sha != sha.hexdigest():
49
- print(' actual {}'.format(sha.hexdigest()))
50
- print(' expect {}'.format(self._sha))
51
  return self._sha == sha.hexdigest()
52
  except Exception as e:
53
- print(' catch {}'.format(e))
54
 
55
  def get(self):
 
56
  if self.verifyHash():
57
- print(' hash match - skipping download')
58
  else:
59
  basedir = os.path.dirname(self._saveTo)
60
  if basedir and not os.path.exists(basedir):
61
- print(' creating directory: ' + basedir)
62
  os.makedirs(basedir, exist_ok=True)
63
 
64
- print(' hash check failed - downloading')
65
  if 'drive.google.com' in self._url:
66
  urlquery = urlparse(self._url).query.split('&')
67
  for q in urlquery:
68
  if 'id=' in q:
69
  gid = q[3:]
70
  sz = GDrive(gid)(osp.join(self._saveTo, self._filename))
71
- print(' size = %.2f Mb' % (sz / (1024.0 * 1024)))
72
  else:
73
- print(' get {}'.format(self._url))
74
  self.download()
75
 
76
  # Verify hash after download
77
- print(' done')
78
- print(' file {}'.format(self._filename))
79
  if self.verifyHash():
80
- print(' hash match - extracting')
81
  else:
82
- print(' hash check failed - exiting')
83
 
84
  # Extract
85
  if '.zip' in self._filename:
86
- print(' extracting - ', end='')
87
  self.extract()
88
  print('done')
89
 
@@ -161,3 +162,32 @@ def GDrive(gid):
161
  print('')
162
  return sz
163
  return download_gdrive
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  if c in d:
33
  return int(d[c]) / self.MB
34
  return '<unknown>'
35
+ print(' {} {} [{} Mb]'.format(r.getcode(), r.msg, getMB(r)))
36
 
37
  def verifyHash(self):
38
  if not self._sha:
 
46
  break
47
  sha.update(buf)
48
  if self._sha != sha.hexdigest():
49
+ print(' actual {}'.format(sha.hexdigest()))
50
+ print(' expect {}'.format(self._sha))
51
  return self._sha == sha.hexdigest()
52
  except Exception as e:
53
+ print(' catch {}'.format(e))
54
 
55
  def get(self):
56
+ print(' {}: {}'.format(self._name, self._filename))
57
  if self.verifyHash():
58
+ print(' hash match - skipping download')
59
  else:
60
  basedir = os.path.dirname(self._saveTo)
61
  if basedir and not os.path.exists(basedir):
62
+ print(' creating directory: ' + basedir)
63
  os.makedirs(basedir, exist_ok=True)
64
 
65
+ print(' hash check failed - downloading')
66
  if 'drive.google.com' in self._url:
67
  urlquery = urlparse(self._url).query.split('&')
68
  for q in urlquery:
69
  if 'id=' in q:
70
  gid = q[3:]
71
  sz = GDrive(gid)(osp.join(self._saveTo, self._filename))
72
+ print(' size = %.2f Mb' % (sz / (1024.0 * 1024)))
73
  else:
74
+ print(' get {}'.format(self._url))
75
  self.download()
76
 
77
  # Verify hash after download
78
+ print(' done')
79
+ print(' file {}'.format(self._filename))
80
  if self.verifyHash():
81
+ print(' hash match - extracting')
82
  else:
83
+ print(' hash check failed - exiting')
84
 
85
  # Extract
86
  if '.zip' in self._filename:
87
+ print(' extracting - ', end='')
88
  self.extract()
89
  print('done')
90
 
 
162
  print('')
163
  return sz
164
  return download_gdrive
165
+
166
+ # Data will be downloaded and extracted to ./data by default
167
+ data_downloaders = dict(
168
+ face=Downloader(name='face',
169
+ url='https://drive.google.com/u/0/uc?id=1lOAliAIeOv4olM65YDzE55kn6XjiX2l6&export=download',
170
+ sha='8397f115c0d4447e55ea05488579e71a813e2691',
171
+ filename='face.zip'),
172
+ text=Downloader(name='text',
173
+ url='https://drive.google.com/u/0/uc?id=1lTQdZUau7ujHBqp0P6M1kccnnJgO-dRj&export=download',
174
+ sha='a40cf095ceb77159ddd2a5902f3b4329696dd866',
175
+ filename='text.zip'),
176
+ )
177
+
178
+ if __name__ == '__main__':
179
+ selected_data_names = []
180
+ for i in range(1, len(sys.argv)):
181
+ selected_data_names.append(sys.argv[i])
182
+ if not selected_data_names:
183
+ selected_data_names = list(data_downloaders.keys())
184
+ print('Data will be downloaded: {}'.format(str(selected_data_names)))
185
+
186
+ download_failed = []
187
+ for selected_data_name in selected_data_names:
188
+ downloader = data_downloaders[selected_data_name]
189
+ if not downloader.get():
190
+ download_failed.append(downloader._name)
191
+
192
+ if download_failed:
193
+ print('Data have not been downloaded: {}'.format(str(download_failed)))