Spaces:
Runtime error
Runtime error
""" | |
utils for submitting to clusters, such as slurm | |
""" | |
import os | |
from omegaconf import DictConfig, OmegaConf | |
from datetime import datetime | |
from pathlib import Path | |
from utils.print_utils import cyan | |
# This is set below. | |
REPO_DIR = None | |
def submit_slurm_job( | |
cfg: DictConfig, | |
python_args: str, | |
project_root: Path, | |
): | |
log_dir = project_root / "slurm_logs" / f"{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}-{cfg.name}" | |
log_dir.mkdir(exist_ok=True, parents=True) | |
(project_root / "slurm_logs" / "latest").unlink(missing_ok=True) | |
(project_root / "slurm_logs" / "latest").symlink_to(log_dir, target_is_directory=True) | |
params = dict(name=cfg.name, log_dir=log_dir, project_root=project_root, python_args=python_args) | |
params.update(cfg.cluster.params) | |
slurm_script = cfg.cluster.launch_template.format(**params) | |
slurm_script_path = log_dir / "job.slurm" | |
with slurm_script_path.open("w") as f: | |
f.write(slurm_script) | |
os.system(f"chmod +x {slurm_script_path}") | |
os.system(f"sbatch {slurm_script_path}") | |
print(f"\n{cyan('script:')} {slurm_script_path}\n{cyan('slurm errors and logs:')} {log_dir}\n") | |
return log_dir | |