jbilcke-hf's picture
jbilcke-hf HF Staff
we are going to hack into finetrainers
9fd1204
import pathlib
from typing import List
from diffusers.utils import export_to_video
from PIL import Image
from finetrainers.data.dataset import COMMON_CAPTION_FILES, COMMON_IMAGE_FILES, COMMON_VIDEO_FILES # noqa
def create_dummy_directory_structure(
directory_structure: List[str], tmpdir, num_data_files: int, caption: str, metadata_extension: str
):
for item in directory_structure:
# TODO(aryan): this should be improved
if item in COMMON_CAPTION_FILES:
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for _ in range(num_data_files):
f.write(f"{caption}\n")
elif item in COMMON_IMAGE_FILES:
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for i in range(num_data_files):
f.write(f"images/{i}.jpg\n")
elif item in COMMON_VIDEO_FILES:
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for i in range(num_data_files):
f.write(f"videos/{i}.mp4\n")
elif item == "metadata.csv":
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
f.write("file_name,caption\n")
for i in range(num_data_files):
f.write(f"{i}.{metadata_extension},{caption}\n")
elif item == "metadata.jsonl":
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for i in range(num_data_files):
f.write(f'{{"file_name": "{i}.{metadata_extension}", "caption": "{caption}"}}\n')
elif item.endswith(".txt"):
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
f.write(caption)
elif item.endswith(".jpg") or item.endswith(".png"):
data_file = pathlib.Path(tmpdir.name) / item
Image.new("RGB", (64, 64)).save(data_file.as_posix())
elif item.endswith(".mp4"):
data_file = pathlib.Path(tmpdir.name) / item
export_to_video([Image.new("RGB", (64, 64))] * 4, data_file.as_posix(), fps=2)
else:
data_file = pathlib.Path(tmpdir.name, item)
data_file.mkdir(exist_ok=True, parents=True)