Spaces:
Runtime error
Runtime error
| # Copyright (c) OpenMMLab. All rights reserved. | |
| from mmengine import get_file_backend, list_from_file | |
| from mmpretrain.registry import DATASETS | |
| from .base_dataset import BaseDataset | |
| class InShop(BaseDataset): | |
| """InShop Dataset for Image Retrieval. | |
| Please download the images from the homepage | |
| 'https://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/InShopRetrieval.html' | |
| (In-shop Clothes Retrieval Benchmark -> Img -> img.zip, | |
| Eval/list_eval_partition.txt), and organize them as follows way: :: | |
| In-shop Clothes Retrieval Benchmark (data_root)/ | |
| βββ Eval / | |
| β βββ list_eval_partition.txt (ann_file) | |
| βββ Img (img_prefix) | |
| β βββ img/ | |
| βββ README.txt | |
| βββ ..... | |
| Args: | |
| data_root (str): The root directory for dataset. | |
| split (str): Choose from 'train', 'query' and 'gallery'. | |
| Defaults to 'train'. | |
| data_prefix (str | dict): Prefix for training data. | |
| Defaults to 'Img'. | |
| ann_file (str): Annotation file path, path relative to | |
| ``data_root``. Defaults to 'Eval/list_eval_partition.txt'. | |
| **kwargs: Other keyword arguments in :class:`BaseDataset`. | |
| Examples: | |
| >>> from mmpretrain.datasets import InShop | |
| >>> | |
| >>> # build train InShop dataset | |
| >>> inshop_train_cfg = dict(data_root='data/inshop', split='train') | |
| >>> inshop_train = InShop(**inshop_train_cfg) | |
| >>> inshop_train | |
| Dataset InShop | |
| Number of samples: 25882 | |
| The `CLASSES` meta info is not set. | |
| Root of dataset: data/inshop | |
| >>> | |
| >>> # build query InShop dataset | |
| >>> inshop_query_cfg = dict(data_root='data/inshop', split='query') | |
| >>> inshop_query = InShop(**inshop_query_cfg) | |
| >>> inshop_query | |
| Dataset InShop | |
| Number of samples: 14218 | |
| The `CLASSES` meta info is not set. | |
| Root of dataset: data/inshop | |
| >>> | |
| >>> # build gallery InShop dataset | |
| >>> inshop_gallery_cfg = dict(data_root='data/inshop', split='gallery') | |
| >>> inshop_gallery = InShop(**inshop_gallery_cfg) | |
| >>> inshop_gallery | |
| Dataset InShop | |
| Number of samples: 12612 | |
| The `CLASSES` meta info is not set. | |
| Root of dataset: data/inshop | |
| """ | |
| def __init__(self, | |
| data_root: str, | |
| split: str = 'train', | |
| data_prefix: str = 'Img', | |
| ann_file: str = 'Eval/list_eval_partition.txt', | |
| **kwargs): | |
| assert split in ('train', 'query', 'gallery'), "'split' of `InShop`" \ | |
| f" must be one of ['train', 'query', 'gallery'], bu get '{split}'" | |
| self.backend = get_file_backend(data_root, enable_singleton=True) | |
| self.split = split | |
| super().__init__( | |
| data_root=data_root, | |
| data_prefix=data_prefix, | |
| ann_file=ann_file, | |
| **kwargs) | |
| def _process_annotations(self): | |
| lines = list_from_file(self.ann_file) | |
| anno_train = dict(metainfo=dict(), data_list=list()) | |
| anno_gallery = dict(metainfo=dict(), data_list=list()) | |
| # item_id to label, each item corresponds to one class label | |
| class_num = 0 | |
| gt_label_train = {} | |
| # item_id to label, each label corresponds to several items | |
| gallery_num = 0 | |
| gt_label_gallery = {} | |
| # (lines[0], lines[1]) is the image number and the field name; | |
| # Each line format as 'image_name, item_id, evaluation_status' | |
| for line in lines[2:]: | |
| img_name, item_id, status = line.split() | |
| img_path = self.backend.join_path(self.img_prefix, img_name) | |
| if status == 'train': | |
| if item_id not in gt_label_train: | |
| gt_label_train[item_id] = class_num | |
| class_num += 1 | |
| # item_id to class_id (for the training set) | |
| anno_train['data_list'].append( | |
| dict(img_path=img_path, gt_label=gt_label_train[item_id])) | |
| elif status == 'gallery': | |
| if item_id not in gt_label_gallery: | |
| gt_label_gallery[item_id] = [] | |
| # Since there are multiple images for each item, | |
| # record the corresponding item for each image. | |
| gt_label_gallery[item_id].append(gallery_num) | |
| anno_gallery['data_list'].append( | |
| dict(img_path=img_path, sample_idx=gallery_num)) | |
| gallery_num += 1 | |
| if self.split == 'train': | |
| anno_train['metainfo']['class_number'] = class_num | |
| anno_train['metainfo']['sample_number'] = \ | |
| len(anno_train['data_list']) | |
| return anno_train | |
| elif self.split == 'gallery': | |
| anno_gallery['metainfo']['sample_number'] = gallery_num | |
| return anno_gallery | |
| # Generate the label for the query(val) set | |
| anno_query = dict(metainfo=dict(), data_list=list()) | |
| query_num = 0 | |
| for line in lines[2:]: | |
| img_name, item_id, status = line.split() | |
| img_path = self.backend.join_path(self.img_prefix, img_name) | |
| if status == 'query': | |
| anno_query['data_list'].append( | |
| dict( | |
| img_path=img_path, gt_label=gt_label_gallery[item_id])) | |
| query_num += 1 | |
| anno_query['metainfo']['sample_number'] = query_num | |
| return anno_query | |
| def load_data_list(self): | |
| """load data list. | |
| For the train set, return image and ground truth label. For the query | |
| set, return image and ids of images in gallery. For the gallery set, | |
| return image and its id. | |
| """ | |
| data_info = self._process_annotations() | |
| data_list = data_info['data_list'] | |
| return data_list | |
| def extra_repr(self): | |
| """The extra repr information of the dataset.""" | |
| body = [f'Root of dataset: \t{self.data_root}'] | |
| return body | |