|
import sys |
|
from pathlib import Path |
|
|
|
sys.path.append(str(Path(__file__).resolve().parent.parent.parent)) |
|
import argparse |
|
import os |
|
|
|
from jean_zay.launch import JeanZayExperiment |
|
|
|
|
|
def parse_mode(): |
|
parser = argparse.ArgumentParser( |
|
description="Extract embeddings from a dataset using DINOv2" |
|
) |
|
parser.add_argument( |
|
"--launch", |
|
action="store_true", |
|
help="Launch the experiment", |
|
) |
|
parser.add_argument("--src_json", help="path to src json") |
|
parser.add_argument("--dest", help="path to dest") |
|
parser.add_argument( |
|
"--num_samples_per_tar", |
|
help="number of samples per tar", |
|
type=int, |
|
default=10000, |
|
) |
|
parser.add_argument("--number_of_jobs", help="number of jobs", type=int, default=10) |
|
args = parser.parse_args() |
|
|
|
return args |
|
|
|
|
|
args = parse_mode() |
|
|
|
cmd_modifiers = [] |
|
exps = [] |
|
|
|
exp_name = f"inaturalist_preprocessing" |
|
job_name = f"inaturalist_preprocessing" |
|
jz_exp = JeanZayExperiment( |
|
exp_name, |
|
job_name, |
|
slurm_array_nb_jobs=args.number_of_jobs, |
|
cmd_path="data/to_webdataset/inaturalist_to_wds.py", |
|
num_nodes=1, |
|
num_gpus_per_node=1, |
|
qos="t3", |
|
account="syq", |
|
gpu_type="v100", |
|
time="1:00:00", |
|
) |
|
|
|
exps.append(jz_exp) |
|
|
|
trainer_modifiers = {} |
|
|
|
exp_modifier = { |
|
"--src_json": args.src_json, |
|
"--dest": args.dest, |
|
"--num_samples_per_tar": args.num_samples_per_tar, |
|
"--number_of_jobs": args.number_of_jobs, |
|
"--job_offset": "${SLURM_ARRAY_TASK_ID}", |
|
} |
|
|
|
cmd_modifiers.append(dict(trainer_modifiers, **exp_modifier)) |
|
|
|
|
|
if __name__ == "__main__": |
|
for exp, cmd_modifier in zip(exps, cmd_modifiers): |
|
exp.build_cmd(cmd_modifier) |
|
if args.launch == True: |
|
exp.launch() |
|
|