Spaces:
Sleeping
Sleeping
import torch | |
from detectron2.data.catalog import MetadataCatalog | |
from cubercnn import data | |
from detectron2.structures import Boxes, BoxMode | |
from cubercnn.util.math_util import estimate_truncation, mat2euler, R_to_allocentric | |
import os | |
import numpy as np | |
from tqdm import tqdm | |
def perp_vector(a, b): | |
return np.array([b, -a]) | |
def rotate_vector(x, y, theta): | |
# Calculate the rotated coordinates | |
x_rotated = x * np.cos(theta) - y * np.sin(theta) | |
y_rotated = x * np.sin(theta) + y * np.cos(theta) | |
return np.array([x_rotated, y_rotated]) | |
def calculate_alpha(location, ry): | |
''' | |
location: x, y, z coordinates | |
ry: rotation around y-axis, negative counter-clockwise, | |
positive x-axis is to the right | |
calculate the angle from a line perpendicular to the camera to the center of the bounding box''' | |
# get vector from camera to object | |
ry = -ry | |
x, y, z = location | |
# vector from [0,0,0] to the center of the bounding box | |
# we can do the whole thing in 2D, top down view | |
# vector perpendicular to center | |
perpendicular = perp_vector(x,z) | |
# vector corresponding to ry | |
ry_vector = np.array([np.cos(ry), np.sin(ry)]) | |
# angle between perpendicular and ry_vector | |
dot = perpendicular[0]*ry_vector[0] + perpendicular[1]*ry_vector[1] # Dot product between [x1, y1] and [x2, y2] | |
det = perpendicular[0]*ry_vector[1] - perpendicular[1]*ry_vector[0] # Determinant | |
alpha = -np.arctan2(det, dot) | |
# wrap to -pi to pi | |
if alpha > np.pi: | |
alpha -= 2*np.pi | |
if alpha < -np.pi: | |
alpha += 2*np.pi | |
return alpha | |
def test_calculate_alpha(): | |
location = [-3.67, 1.67, 6.05] | |
ry = -1.24 | |
expected = -0.72 | |
result1 = calculate_alpha(location, ry) | |
location = [-9.48, 2.08, 26.41] | |
ry = 1.77 | |
expected = 2.11 | |
result2 = calculate_alpha(location, ry) | |
location = [4.19, 1.46, 44.41] | |
ry = -1.35 | |
expected = -1.45 | |
result3 = calculate_alpha(location, ry) | |
location = [-6.41, 2.04, 46.74] | |
ry = 1.68 | |
expected = 1.82 | |
result4 = calculate_alpha(location, ry) | |
location = [0.28, 2.08, 17.74] | |
ry = -1.58 | |
expected = -1.59 | |
result5 = calculate_alpha(location, ry) | |
location = [-3.21, 1.97, 11.22] | |
ry = -0.13 | |
expected = 0.15 | |
result6 = calculate_alpha(location, ry) | |
# assert np.isclose(result, expected, atol=0.01) | |
return result1 | |
def main(): | |
alpha = test_calculate_alpha() | |
name = 'KITTI' | |
split = 'test' | |
dataset_paths_to_json = [f'datasets/Omni3D/{name}_{split}.json',] | |
os.makedirs('output/KITTI_formatted_predictions', exist_ok=True) | |
# Example 1. load all images | |
dataset = data.Omni3D(dataset_paths_to_json) | |
imgIds = dataset.getImgIds() | |
imgs = dataset.loadImgs(imgIds) | |
# Example 2. load annotations for image index 0 | |
annIds = dataset.getAnnIds(imgIds=imgs[0]['id']) | |
anns = dataset.loadAnns(annIds) | |
data.register_and_store_model_metadata(dataset, 'output') | |
thing_classes = MetadataCatalog.get('omni3d_model').thing_classes | |
dataset_id_to_contiguous_id = MetadataCatalog.get('omni3d_model').thing_dataset_id_to_contiguous_id | |
cats = {'pedestrian', 'car', 'cyclist', 'van', 'truck'} | |
input_folder = 'kitti_omni_eq' | |
out_path = 'output/'+input_folder+'/KITTI_formatted_predictions/' | |
in_path = 'output/'+input_folder+'/KITTI_pred/instances_predictions.pth' | |
print('saving to', out_path) | |
data_json = torch.load(in_path) | |
# | |
# reference | |
# https://github.com/ZrrSkywalker/MonoDETR/blob/c724572bddbc067832a0e0d860a411003f36c2fa/lib/helpers/tester_helper.py#L114 | |
files = {} | |
for image in tqdm(data_json): | |
K = image['K'] | |
K_inv = np.linalg.inv(K) | |
width, height = image['width'], image['height'] | |
image_id = image['image_id'] | |
l = [] | |
for pred in image['instances']: | |
category = thing_classes[pred['category_id']] | |
if category not in cats: | |
continue | |
occluded = 0 | |
# truncation = estimate_truncation(K, torch.tensor([x3d, y3d, z3d, w3d, h3d, l3d]), pred['pose'], width, height) | |
truncation = 0.0 # it does not matter | |
rotation_y = mat2euler(np.array(pred['pose']))[1] | |
bbox = BoxMode.convert(pred['bbox'], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) # x1, y1, x2, y2 -> convert to left, top, right, bottom | |
h3d, w3d, l3d = pred['dimensions'] | |
# unproject, this should yield the same | |
# cen_2d = np.array(pred['center_2D'] + [1]) | |
# z3d = pred['center_cam'][2] | |
# x3d, y3d, z3d = (K_inv @ (z3d*cen_2d)) | |
x3d, y3d, z3d = pred['center_cam'] | |
location = pred['center_cam'] | |
score = pred['score'] | |
alpha = calculate_alpha(location, rotation_y) | |
# convert to KITTI format | |
li = [category, truncation, occluded, alpha, bbox[0], bbox[1], bbox[2], bbox[3], h3d, w3d, l3d, x3d, y3d, z3d, rotation_y, score] | |
l.append(li) | |
# sort l by z3d | |
l = sorted(l, key=lambda x: x[13]) | |
files[image_id] = l | |
# 7518 test images | |
os.makedirs(out_path, exist_ok=True) | |
for img_id, content in files.items(): | |
img_id_str = str(img_id).zfill(6) | |
with open(out_path+f'{img_id_str}.txt', 'w') as f: | |
str_i = '' | |
for i in content: | |
# t = f'{category} {truncation:.2f} {occluded} {alpha:.2f} {bbox[0]:.2f} {bbox[1]:.2f} {bbox[2]:.2f} {bbox[3]:.2f} {w3d:.2f} {h3d:.2f} {l3d:.2f} {x3d:.2f} {y3d:.2f} {z3d:.2f} {rotation_y:.2f} {score:.2f}\n' | |
t = f'{i[0][0].upper() + i[0][1:]} {i[1]:.2f} {i[2]} {i[3]:.2f} {i[4]:.2f} {i[5]:.2f} {i[6]:.2f} {i[7]:.2f} {i[8]:.2f} {i[9]:.2f} {i[10]:.2f} {i[11]:.2f} {i[12]:.2f} {i[13]:.2f} {i[14]:.2f} {i[15]:.2f}\n' | |
str_i += t | |
f.write(str_i) | |
if __name__ == '__main__': | |
main() | |
# write to file | |
# #Values Name Description | |
# ---------------------------------------------------------------------------- | |
# 1 type Describes the type of object: 'Car', 'Van', 'Truck', | |
# 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', | |
# 'Misc' or 'DontCare' | |
# 1 truncated Float from 0 (non-truncated) to 1 (truncated), where | |
# truncated refers to the object leaving image boundaries | |
# 1 occluded Integer (0,1,2,3) indicating occlusion state: | |
# 0 = fully visible, 1 = partly occluded | |
# 2 = largely occluded, 3 = unknown | |
# 1 alpha Observation angle of object, ranging [-pi..pi] | |
# 4 bbox 2D bounding box of object in the image (0-based index): | |
# contains left, top, right, bottom pixel coordinates | |
# 3 dimensions 3D object dimensions: height, width, length (in meters) | |
# 3 location 3D object location x,y,z in camera coordinates (in meters) | |
# 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] | |
# 1 score Only for results: Float, indicating confidence in | |
# detection, needuhued for p/r curves, higher is better. | |
# output to files 000000.txt 000001.txt ... | |
# example file | |
# Car 0.00 0 -1.56 564.62 174.59 616.43 224.74 1.61 1.66 3.20 -0.69 1.69 25.01 -1.59 | |
# Car 0.00 0 1.71 481.59 180.09 512.55 202.42 1.40 1.51 3.70 -7.43 1.88 47.55 1.55 | |
# Car 0.00 0 1.64 542.05 175.55 565.27 193.79 1.46 1.66 4.05 -4.71 1.71 60.52 1.56 | |
# Cyclist 0.00 0 1.89 330.60 176.09 355.61 213.60 1.72 0.50 1.95 -12.63 1.88 34.09 1.54 | |
# DontCare -1 -1 -10 753.33 164.32 798.00 186.74 -1 -1 -1 -1000 -1000 -1000 -10 | |
# DontCare -1 -1 -10 738.50 171.32 753.27 184.42 -1 -1 -1 -1000 -1000 -1000 -10 |