Spaces:
Sleeping
Sleeping
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import argparse | |
| import os | |
| import os.path as osp | |
| import shutil | |
| import xml.etree.ElementTree as ET | |
| import zipfile | |
| from xml.etree.ElementTree import ParseError | |
| def extract(root_path): | |
| idx = 0 | |
| for language in ['English', 'Korean', 'Mixed']: | |
| for camera in ['Digital_Camera', 'Mobile_Phone']: | |
| crt_path = osp.join(root_path, 'KAIST', language, camera) | |
| zips = os.listdir(crt_path) | |
| for zip in zips: | |
| extracted_path = osp.join(root_path, 'tmp', zip) | |
| extract_zipfile(osp.join(crt_path, zip), extracted_path) | |
| for file in os.listdir(extracted_path): | |
| if file.endswith('xml'): | |
| src_ann = os.path.join(extracted_path, file) | |
| # Filtering broken annotations | |
| try: | |
| ET.parse(src_ann) | |
| except ParseError: | |
| continue | |
| src_img = None | |
| img_names = [ | |
| file.replace('xml', suffix) | |
| for suffix in ['jpg', 'JPG'] | |
| ] | |
| for im in img_names: | |
| img_path = osp.join(extracted_path, im) | |
| if osp.exists(img_path): | |
| src_img = img_path | |
| if src_img: | |
| shutil.move( | |
| src_ann, | |
| osp.join(root_path, 'annotations', | |
| str(idx).zfill(5) + '.xml')) | |
| shutil.move( | |
| src_img, | |
| osp.join(root_path, 'imgs', | |
| str(idx).zfill(5) + '.jpg')) | |
| idx += 1 | |
| def extract_zipfile(zip_path, dst_dir, delete=True): | |
| files = zipfile.ZipFile(zip_path) | |
| for file in files.namelist(): | |
| files.extract(file, dst_dir) | |
| if delete: | |
| os.remove(zip_path) | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description='Extract KAIST zips') | |
| parser.add_argument('root_path', help='Root path of KAIST') | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| args = parse_args() | |
| root_path = args.root_path | |
| assert osp.exists(root_path) | |
| extract(root_path) | |
| shutil.rmtree(osp.join(args.root_path, 'tmp')) | |
| shutil.rmtree(osp.join(args.root_path, 'KAIST')) | |
| if __name__ == '__main__': | |
| main() | |