File size: 2,479 Bytes
9fd1204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pathlib
from typing import List

from diffusers.utils import export_to_video
from PIL import Image

from finetrainers.data.dataset import COMMON_CAPTION_FILES, COMMON_IMAGE_FILES, COMMON_VIDEO_FILES  # noqa


def create_dummy_directory_structure(
    directory_structure: List[str], tmpdir, num_data_files: int, caption: str, metadata_extension: str
):
    for item in directory_structure:
        # TODO(aryan): this should be improved
        if item in COMMON_CAPTION_FILES:
            data_file = pathlib.Path(tmpdir.name) / item
            with open(data_file.as_posix(), "w") as f:
                for _ in range(num_data_files):
                    f.write(f"{caption}\n")
        elif item in COMMON_IMAGE_FILES:
            data_file = pathlib.Path(tmpdir.name) / item
            with open(data_file.as_posix(), "w") as f:
                for i in range(num_data_files):
                    f.write(f"images/{i}.jpg\n")
        elif item in COMMON_VIDEO_FILES:
            data_file = pathlib.Path(tmpdir.name) / item
            with open(data_file.as_posix(), "w") as f:
                for i in range(num_data_files):
                    f.write(f"videos/{i}.mp4\n")
        elif item == "metadata.csv":
            data_file = pathlib.Path(tmpdir.name) / item
            with open(data_file.as_posix(), "w") as f:
                f.write("file_name,caption\n")
                for i in range(num_data_files):
                    f.write(f"{i}.{metadata_extension},{caption}\n")
        elif item == "metadata.jsonl":
            data_file = pathlib.Path(tmpdir.name) / item
            with open(data_file.as_posix(), "w") as f:
                for i in range(num_data_files):
                    f.write(f'{{"file_name": "{i}.{metadata_extension}", "caption": "{caption}"}}\n')
        elif item.endswith(".txt"):
            data_file = pathlib.Path(tmpdir.name) / item
            with open(data_file.as_posix(), "w") as f:
                f.write(caption)
        elif item.endswith(".jpg") or item.endswith(".png"):
            data_file = pathlib.Path(tmpdir.name) / item
            Image.new("RGB", (64, 64)).save(data_file.as_posix())
        elif item.endswith(".mp4"):
            data_file = pathlib.Path(tmpdir.name) / item
            export_to_video([Image.new("RGB", (64, 64))] * 4, data_file.as_posix(), fps=2)
        else:
            data_file = pathlib.Path(tmpdir.name, item)
            data_file.mkdir(exist_ok=True, parents=True)