Spaces:

PERCEIVE-Demos
/

StrawberryDieasesDFINE

No application file

App Files Files Community

StrawberryDieasesDFINE / D-FINE /tools /dataset /remap_obj365.py

dangminh214

Clean initial commit (no large files, no LFS pointers)

b26e93d 8 days ago

raw

history blame contribute delete

5.98 kB

	"""
	Copyright (c) 2024 The D-FINE Authors. All Rights Reserved.
	"""

	import argparse
	import json
	import os


	def update_image_paths(images, new_prefix):
	print("Updating image paths with new prefix...")
	for img in images:
	split = img["file_name"].split("/")[1:]
	img["file_name"] = os.path.join(new_prefix, *split)
	print("Image paths updated.")
	return images


	def create_split_annotations(original_annotations, split_image_ids, new_prefix, output_file):
	print(f"Creating split annotations for {output_file}...")
	new_images = [img for img in original_annotations["images"] if img["id"] in split_image_ids]
	print(f"Number of images selected: {len(new_images)}")
	if new_prefix is not None:
	new_images = update_image_paths(new_images, new_prefix)

	new_annotations = {
	"images": new_images,
	"annotations": [
	ann for ann in original_annotations["annotations"] if ann["image_id"] in split_image_ids
	],
	"categories": original_annotations["categories"],
	}
	print(f'Number of annotations selected: {len(new_annotations["annotations"])}')
	with open(output_file, "w") as f:
	json.dump(new_annotations, f)
	print(f"Annotations saved to {output_file}")


	def parse_arguments():
	parser = argparse.ArgumentParser(description="Split and update dataset annotations.")
	parser.add_argument(
	"--base_dir",
	type=str,
	required=True,
	help="Base directory of the dataset, e.g., /data/Objects365/data",
	)
	parser.add_argument(
	"--new_val_size",
	type=int,
	default=5000,
	help="Number of images to include in the new validation set (default: 5000)",
	)
	parser.add_argument(
	"--output_suffix",
	type=str,
	default="new",
	help="Suffix to add to new annotation files (default: new)",
	)
	return parser.parse_args()


	def main():
	args = parse_arguments()
	base_dir = args.base_dir
	new_val_size = args.new_val_size
	output_suffix = args.output_suffix

	# Define paths based on the base directory
	original_train_ann_file = os.path.join(base_dir, "train", "zhiyuan_objv2_train.json")
	original_val_ann_file = os.path.join(base_dir, "val", "zhiyuan_objv2_val.json")

	new_val_ann_file = os.path.join(base_dir, "val", f"{output_suffix}_zhiyuan_objv2_val.json")
	new_train_ann_file = os.path.join(
	base_dir, "train", f"{output_suffix}_zhiyuan_objv2_train.json"
	)

	# Check if original annotation files exist
	if not os.path.isfile(original_train_ann_file):
	print(f"Error: Training annotation file not found at {original_train_ann_file}")
	return
	if not os.path.isfile(original_val_ann_file):
	print(f"Error: Validation annotation file not found at {original_val_ann_file}")
	return

	# Load the original training and validation annotations
	print("Loading original training annotations...")
	with open(original_train_ann_file, "r") as f:
	train_annotations = json.load(f)
	print("Training annotations loaded.")

	print("Loading original validation annotations...")
	with open(original_val_ann_file, "r") as f:
	val_annotations = json.load(f)
	print("Validation annotations loaded.")

	# Extract image IDs from the original validation set
	print("Extracting image IDs from the validation set...")
	val_image_ids = [img["id"] for img in val_annotations["images"]]
	print(f"Total validation images: {len(val_image_ids)}")

	# Split image IDs for the new training and validation sets
	print(
	f"Splitting validation images into new validation set of size {new_val_size} and training set..."
	)
	new_val_image_ids = val_image_ids[:new_val_size]
	new_train_image_ids = val_image_ids[new_val_size:]
	print(f"New validation set size: {len(new_val_image_ids)}")
	print(f"New training set size from validation images: {len(new_train_image_ids)}")

	# Create new validation annotation file
	print("Creating new validation annotations...")
	create_split_annotations(val_annotations, new_val_image_ids, None, new_val_ann_file)
	print("New validation annotations created.")

	# Combine the remaining validation images and annotations with the original training data
	print("Preparing new training images and annotations...")
	new_train_images = [
	img for img in val_annotations["images"] if img["id"] in new_train_image_ids
	]
	print(f"Number of images from validation to add to training: {len(new_train_images)}")
	new_train_images = update_image_paths(new_train_images, "images_from_val")
	new_train_annotations = [
	ann for ann in val_annotations["annotations"] if ann["image_id"] in new_train_image_ids
	]
	print(f"Number of annotations from validation to add to training: {len(new_train_annotations)}")

	# Add the original training images and annotations
	print("Adding original training images and annotations...")
	new_train_images.extend(train_annotations["images"])
	new_train_annotations.extend(train_annotations["annotations"])
	print(f"Total training images: {len(new_train_images)}")
	print(f"Total training annotations: {len(new_train_annotations)}")

	# Create a new training annotation dictionary
	print("Creating new training annotations dictionary...")
	new_train_annotations_dict = {
	"images": new_train_images,
	"annotations": new_train_annotations,
	"categories": train_annotations["categories"],
	}
	print("New training annotations dictionary created.")

	# Save the new training annotations
	print("Saving new training annotations...")
	with open(new_train_ann_file, "w") as f:
	json.dump(new_train_annotations_dict, f)
	print(f"New training annotations saved to {new_train_ann_file}")

	print("Processing completed successfully.")


	if __name__ == "__main__":
	main()