Spaces:
Running
on
Zero
Running
on
Zero
""" | |
Copied from RT-DETR (https://github.com/lyuwenyu/RT-DETR) | |
Copyright(c) 2023 lyuwenyu. All Rights Reserved. | |
""" | |
import os | |
from typing import Callable, Optional | |
import torch | |
import torchvision | |
import torchvision.transforms.functional as TVF | |
from PIL import Image | |
from sympy import im | |
try: | |
from defusedxml.ElementTree import parse as ET_parse | |
except ImportError: | |
from xml.etree.ElementTree import parse as ET_parse | |
from ...core import register | |
from .._misc import convert_to_tv_tensor | |
from ._dataset import DetDataset | |
class VOCDetection(torchvision.datasets.VOCDetection, DetDataset): | |
__inject__ = [ | |
"transforms", | |
] | |
def __init__( | |
self, | |
root: str, | |
ann_file: str = "trainval.txt", | |
label_file: str = "label_list.txt", | |
transforms: Optional[Callable] = None, | |
): | |
with open(os.path.join(root, ann_file), "r") as f: | |
lines = [x.strip() for x in f.readlines()] | |
lines = [x.split(" ") for x in lines] | |
self.images = [os.path.join(root, lin[0]) for lin in lines] | |
self.targets = [os.path.join(root, lin[1]) for lin in lines] | |
assert len(self.images) == len(self.targets) | |
with open(os.path.join(root + label_file), "r") as f: | |
labels = f.readlines() | |
labels = [lab.strip() for lab in labels] | |
self.transforms = transforms | |
self.labels_map = {lab: i for i, lab in enumerate(labels)} | |
def __getitem__(self, index: int): | |
image, target = self.load_item(index) | |
if self.transforms is not None: | |
image, target, _ = self.transforms(image, target, self) | |
# target["orig_size"] = torch.tensor(TVF.get_image_size(image)) | |
return image, target | |
def load_item(self, index: int): | |
image = Image.open(self.images[index]).convert("RGB") | |
target = self.parse_voc_xml(ET_parse(self.annotations[index]).getroot()) | |
output = {} | |
output["image_id"] = torch.tensor([index]) | |
for k in ["area", "boxes", "labels", "iscrowd"]: | |
output[k] = [] | |
for blob in target["annotation"]["object"]: | |
box = [float(v) for v in blob["bndbox"].values()] | |
output["boxes"].append(box) | |
output["labels"].append(blob["name"]) | |
output["area"].append((box[2] - box[0]) * (box[3] - box[1])) | |
output["iscrowd"].append(0) | |
w, h = image.size | |
boxes = torch.tensor(output["boxes"]) if len(output["boxes"]) > 0 else torch.zeros(0, 4) | |
output["boxes"] = convert_to_tv_tensor( | |
boxes, "boxes", box_format="xyxy", spatial_size=[h, w] | |
) | |
output["labels"] = torch.tensor([self.labels_map[lab] for lab in output["labels"]]) | |
output["area"] = torch.tensor(output["area"]) | |
output["iscrowd"] = torch.tensor(output["iscrowd"]) | |
output["orig_size"] = torch.tensor([w, h]) | |
return image, output | |