| import sys | |
| import traceback | |
| import pickle | |
| import os | |
| import concurrent.futures | |
| from tqdm import tqdm | |
| from font_dataset.font import load_fonts | |
| from font_dataset.layout import generate_font_image | |
| from font_dataset.text import CorpusGeneratorManager | |
| from font_dataset.background import background_image_generator | |
| cjk_ratio = 3 | |
| train_cnt = 100 | |
| val_cnt = 5 | |
| test_cnt = 30 | |
| train_cnt_cjk = int(train_cnt * cjk_ratio) | |
| val_cnt_cjk = int(val_cnt * cjk_ratio) | |
| test_cnt_cjk = int(test_cnt * cjk_ratio) | |
| dataset_path = "./dataset/font_img" | |
| os.makedirs(dataset_path, exist_ok=True) | |
| fonts, exclusion_rule = load_fonts() | |
| cnt = 0 | |
| for font in fonts: | |
| if exclusion_rule(font): | |
| print(f"Excluded font: {font.path}") | |
| continue | |
| if font.language == "CJK": | |
| cnt += cjk_ratio | |
| else: | |
| cnt += 1 | |
| print("Total training images:", train_cnt * cnt) | |
| print("Total validation images:", val_cnt * cnt) | |
| print("Total testing images:", test_cnt * cnt) | |
| if os.path.exists(os.path.join(dataset_path, "train")): | |
| num_file_train = len(os.listdir(os.path.join(dataset_path, "train"))) | |
| else: | |
| num_file_train = 0 | |
| if os.path.exists(os.path.join(dataset_path, "val")): | |
| num_file_val = len(os.listdir(os.path.join(dataset_path, "val"))) | |
| else: | |
| num_file_val = 0 | |
| if os.path.exists(os.path.join(dataset_path, "test")): | |
| num_file_test = len(os.listdir(os.path.join(dataset_path, "test"))) | |
| else: | |
| num_file_test = 0 | |
| print("Total files generated:", num_file_train + num_file_val + num_file_test) | |
| print("Total files target:", (train_cnt + val_cnt + test_cnt) * cnt * 2) | |
| print( | |
| f"{(num_file_train + num_file_val + num_file_test) / ((train_cnt + val_cnt + test_cnt) * cnt * 2) * 100:.2f}% completed" | |
| ) | |