Spaces:
Running
Running
File size: 2,479 Bytes
9fd1204 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import pathlib
from typing import List
from diffusers.utils import export_to_video
from PIL import Image
from finetrainers.data.dataset import COMMON_CAPTION_FILES, COMMON_IMAGE_FILES, COMMON_VIDEO_FILES # noqa
def create_dummy_directory_structure(
directory_structure: List[str], tmpdir, num_data_files: int, caption: str, metadata_extension: str
):
for item in directory_structure:
# TODO(aryan): this should be improved
if item in COMMON_CAPTION_FILES:
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for _ in range(num_data_files):
f.write(f"{caption}\n")
elif item in COMMON_IMAGE_FILES:
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for i in range(num_data_files):
f.write(f"images/{i}.jpg\n")
elif item in COMMON_VIDEO_FILES:
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for i in range(num_data_files):
f.write(f"videos/{i}.mp4\n")
elif item == "metadata.csv":
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
f.write("file_name,caption\n")
for i in range(num_data_files):
f.write(f"{i}.{metadata_extension},{caption}\n")
elif item == "metadata.jsonl":
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
for i in range(num_data_files):
f.write(f'{{"file_name": "{i}.{metadata_extension}", "caption": "{caption}"}}\n')
elif item.endswith(".txt"):
data_file = pathlib.Path(tmpdir.name) / item
with open(data_file.as_posix(), "w") as f:
f.write(caption)
elif item.endswith(".jpg") or item.endswith(".png"):
data_file = pathlib.Path(tmpdir.name) / item
Image.new("RGB", (64, 64)).save(data_file.as_posix())
elif item.endswith(".mp4"):
data_file = pathlib.Path(tmpdir.name) / item
export_to_video([Image.new("RGB", (64, 64))] * 4, data_file.as_posix(), fps=2)
else:
data_file = pathlib.Path(tmpdir.name, item)
data_file.mkdir(exist_ok=True, parents=True)
|