Spaces:

liguang0115
/

vmem

Runtime error

App Files Files Community

vmem / extern /CUT3R /eval /video_depth /eval_depth.py

liguang0115

Add initial project structure with core files, configurations, and sample images

2df809d 2 months ago

raw

history blame contribute delete

15.2 kB

	import os
	import sys

	sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
	from eval.video_depth.tools import depth_evaluation, group_by_directory
	import numpy as np
	import cv2
	from tqdm import tqdm
	import glob
	from PIL import Image
	import argparse
	import json
	from eval.video_depth.metadata import dataset_metadata


	def get_args_parser():
	parser = argparse.ArgumentParser()

	parser.add_argument(
	"--output_dir",
	type=str,
	default="",
	help="value for outdir",
	)
	parser.add_argument(
	"--eval_dataset", type=str, default="nyu", choices=list(dataset_metadata.keys())
	)
	parser.add_argument(
	"--align",
	type=str,
	default="scale&shift",
	choices=["scale&shift", "scale", "metric"],
	)
	return parser


	def main(args):
	if args.eval_dataset == "sintel":
	TAG_FLOAT = 202021.25

	def depth_read(filename):
	"""Read depth data from file, return as numpy array."""
	f = open(filename, "rb")
	check = np.fromfile(f, dtype=np.float32, count=1)[0]
	assert (
	check == TAG_FLOAT
	), " depth_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine? ".format(
	TAG_FLOAT, check
	)
	width = np.fromfile(f, dtype=np.int32, count=1)[0]
	height = np.fromfile(f, dtype=np.int32, count=1)[0]
	size = width * height
	assert (
	width > 0 and height > 0 and size > 1 and size < 100000000
	), " depth_read:: Wrong input size (width = {0}, height = {1}).".format(
	width, height
	)
	depth = np.fromfile(f, dtype=np.float32, count=-1).reshape((height, width))
	return depth

	pred_pathes = glob.glob(
	f"{args.output_dir}//frame_.npy"
	) # TODO: update the path to your prediction
	pred_pathes = sorted(pred_pathes)

	if len(pred_pathes) > 643:
	full = True
	else:
	full = False

	if full:
	depth_pathes = glob.glob(f"data/sintel/training/depth//.dpt")
	depth_pathes = sorted(depth_pathes)
	else:
	seq_list = [
	"alley_2",
	"ambush_4",
	"ambush_5",
	"ambush_6",
	"cave_2",
	"cave_4",
	"market_2",
	"market_5",
	"market_6",
	"shaman_3",
	"sleeping_1",
	"sleeping_2",
	"temple_2",
	"temple_3",
	]
	depth_pathes_folder = [
	f"data/sintel/training/depth/{seq}" for seq in seq_list
	]
	depth_pathes = []
	for depth_pathes_folder_i in depth_pathes_folder:
	depth_pathes += glob.glob(depth_pathes_folder_i + "/*.dpt")
	depth_pathes = sorted(depth_pathes)

	def get_video_results():
	grouped_pred_depth = group_by_directory(pred_pathes)

	grouped_gt_depth = group_by_directory(depth_pathes)
	gathered_depth_metrics = []

	for key in tqdm(grouped_pred_depth.keys()):
	pd_pathes = grouped_pred_depth[key]
	gt_pathes = grouped_gt_depth[key.replace("_pred_depth", "")]

	gt_depth = np.stack(
	[depth_read(gt_path) for gt_path in gt_pathes], axis=0
	)
	pr_depth = np.stack(
	[
	cv2.resize(
	np.load(pd_path),
	(gt_depth.shape[2], gt_depth.shape[1]),
	interpolation=cv2.INTER_CUBIC,
	)
	for pd_path in pd_pathes
	],
	axis=0,
	)
	# for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment
	if args.align == "scale&shift":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=70,
	align_with_lad2=True,
	use_gpu=True,
	post_clip_max=70,
	)
	)
	elif args.align == "scale":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=70,
	align_with_scale=True,
	use_gpu=True,
	post_clip_max=70,
	)
	)
	elif args.align == "metric":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=70,
	metric_scale=True,
	use_gpu=True,
	post_clip_max=70,
	)
	)
	gathered_depth_metrics.append(depth_results)

	depth_log_path = f"{args.output_dir}/result_{args.align}.json"
	average_metrics = {
	key: np.average(
	[metrics[key] for metrics in gathered_depth_metrics],
	weights=[
	metrics["valid_pixels"] for metrics in gathered_depth_metrics
	],
	)
	for key in gathered_depth_metrics[0].keys()
	if key != "valid_pixels"
	}
	print("Average depth evaluation metrics:", average_metrics)
	with open(depth_log_path, "w") as f:
	f.write(json.dumps(average_metrics))

	get_video_results()
	elif args.eval_dataset == "bonn":

	def depth_read(filename):
	# loads depth map D from png file
	# and returns it as a numpy array
	depth_png = np.asarray(Image.open(filename))
	# make sure we have a proper 16bit depth map here.. not 8bit!
	assert np.max(depth_png) > 255
	depth = depth_png.astype(np.float64) / 5000.0
	depth[depth_png == 0] = -1.0
	return depth

	seq_list = ["balloon2", "crowd2", "crowd3", "person_tracking2", "synchronous"]

	img_pathes_folder = [
	f"data/bonn/rgbd_bonn_dataset/rgbd_bonn_{seq}/rgb_110/*.png"
	for seq in seq_list
	]
	img_pathes = []
	for img_pathes_folder_i in img_pathes_folder:
	img_pathes += glob.glob(img_pathes_folder_i)
	img_pathes = sorted(img_pathes)
	depth_pathes_folder = [
	f"data/bonn/rgbd_bonn_dataset/rgbd_bonn_{seq}/depth_110/*.png"
	for seq in seq_list
	]
	depth_pathes = []
	for depth_pathes_folder_i in depth_pathes_folder:
	depth_pathes += glob.glob(depth_pathes_folder_i)
	depth_pathes = sorted(depth_pathes)
	pred_pathes = glob.glob(
	f"{args.output_dir}//frame.npy"
	) # TODO: update the path to your prediction
	pred_pathes = sorted(pred_pathes)

	def get_video_results():
	grouped_pred_depth = group_by_directory(pred_pathes)
	grouped_gt_depth = group_by_directory(depth_pathes, idx=-2)
	gathered_depth_metrics = []
	for key in tqdm(grouped_gt_depth.keys()):
	pd_pathes = grouped_pred_depth[key[10:]]
	gt_pathes = grouped_gt_depth[key]
	gt_depth = np.stack(
	[depth_read(gt_path) for gt_path in gt_pathes], axis=0
	)
	pr_depth = np.stack(
	[
	cv2.resize(
	np.load(pd_path),
	(gt_depth.shape[2], gt_depth.shape[1]),
	interpolation=cv2.INTER_CUBIC,
	)
	for pd_path in pd_pathes
	],
	axis=0,
	)
	# for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment
	if args.align == "scale&shift":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=70,
	align_with_lad2=True,
	use_gpu=True,
	)
	)
	elif args.align == "scale":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=70,
	align_with_scale=True,
	use_gpu=True,
	)
	)
	elif args.align == "metric":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=70,
	metric_scale=True,
	use_gpu=True,
	)
	)
	gathered_depth_metrics.append(depth_results)

	# seq_len = gt_depth.shape[0]
	# error_map = error_map.reshape(seq_len, -1, error_map.shape[-1]).cpu()
	# error_map_colored = colorize(error_map, range=(error_map.min(), error_map.max()), append_cbar=True)
	# ImageSequenceClip([x for x in (error_map_colored.numpy()*255).astype(np.uint8)], fps=10).write_videofile(f'{args.output_dir}/errormap_{key}_{args.align}.mp4', fps=10)

	depth_log_path = f"{args.output_dir}/result_{args.align}.json"
	average_metrics = {
	key: np.average(
	[metrics[key] for metrics in gathered_depth_metrics],
	weights=[
	metrics["valid_pixels"] for metrics in gathered_depth_metrics
	],
	)
	for key in gathered_depth_metrics[0].keys()
	if key != "valid_pixels"
	}
	print("Average depth evaluation metrics:", average_metrics)
	with open(depth_log_path, "w") as f:
	f.write(json.dumps(average_metrics))

	get_video_results()
	elif args.eval_dataset == "kitti":

	def depth_read(filename):
	# loads depth map D from png file
	# and returns it as a numpy array,
	# for details see readme.txt
	img_pil = Image.open(filename)
	depth_png = np.array(img_pil, dtype=int)
	# make sure we have a proper 16bit depth map here.. not 8bit!
	assert np.max(depth_png) > 255

	depth = depth_png.astype(float) / 256.0
	depth[depth_png == 0] = -1.0
	return depth

	depth_pathes = glob.glob(
	"data/kitti/depth_selection/val_selection_cropped/groundtruth_depth_gathered//.png"
	)
	depth_pathes = sorted(depth_pathes)
	pred_pathes = glob.glob(
	f"{args.output_dir}//frame_.npy"
	) # TODO: update the path to your prediction
	pred_pathes = sorted(pred_pathes)

	def get_video_results():
	grouped_pred_depth = group_by_directory(pred_pathes)
	grouped_gt_depth = group_by_directory(depth_pathes)
	gathered_depth_metrics = []
	for key in tqdm(grouped_pred_depth.keys()):
	pd_pathes = grouped_pred_depth[key]
	gt_pathes = grouped_gt_depth[key]
	gt_depth = np.stack(
	[depth_read(gt_path) for gt_path in gt_pathes], axis=0
	)
	pr_depth = np.stack(
	[
	cv2.resize(
	np.load(pd_path),
	(gt_depth.shape[2], gt_depth.shape[1]),
	interpolation=cv2.INTER_CUBIC,
	)
	for pd_path in pd_pathes
	],
	axis=0,
	)

	# for depth eval, set align_with_lad2=False to use median alignment; set align_with_lad2=True to use scale&shift alignment
	if args.align == "scale&shift":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=None,
	align_with_lad2=True,
	use_gpu=True,
	)
	)
	elif args.align == "scale":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=None,
	align_with_scale=True,
	use_gpu=True,
	)
	)
	elif args.align == "metric":
	depth_results, error_map, depth_predict, depth_gt = (
	depth_evaluation(
	pr_depth,
	gt_depth,
	max_depth=None,
	metric_scale=True,
	use_gpu=True,
	)
	)
	gathered_depth_metrics.append(depth_results)

	depth_log_path = f"{args.output_dir}/result_{args.align}.json"
	average_metrics = {
	key: np.average(
	[metrics[key] for metrics in gathered_depth_metrics],
	weights=[
	metrics["valid_pixels"] for metrics in gathered_depth_metrics
	],
	)
	for key in gathered_depth_metrics[0].keys()
	if key != "valid_pixels"
	}
	print("Average depth evaluation metrics:", average_metrics)
	with open(depth_log_path, "w") as f:
	f.write(json.dumps(average_metrics))

	get_video_results()


	if __name__ == "__main__":
	args = get_args_parser()
	args = args.parse_args()
	main(args)