Spaces:
Sleeping
Sleeping
import argparse | |
parser = argparse.ArgumentParser() | |
######### Data-related arguments ######### | |
parser.add_argument("-c", "--data-config", type=str, help="data config YAML file", default="config_files/config_jets.yaml") | |
parser.add_argument( | |
"-train", | |
"--data-train", | |
nargs="*", | |
default=[], | |
help="training files; supported syntax:" | |
" (a) plain list, `--data-train /path/to/a/* /path/to/b/*`;" | |
" (b) (named) groups [Recommended], `--data-train a:/path/to/a/* b:/path/to/b/*`," | |
" the file splitting (for each dataloader worker) will be performed per group," | |
" and then mixed together, to ensure a uniform mixing from all groups for each worker.", | |
) | |
parser.add_argument( | |
"-val", | |
"--data-val", | |
nargs="*", | |
help="validation files", | |
) | |
parser.add_argument( | |
"-tag", | |
"--tag", | |
type=str, | |
required=False | |
) | |
parser.add_argument( | |
"-ckpt-step", | |
"--ckpt-step", | |
type=int, | |
required=False, | |
default=0 | |
) # to make it easier to find the actual number of steps | |
parser.add_argument( | |
"-load-from-run", | |
"--load-from-run", | |
required=False, | |
default="", | |
type=str, | |
help="WandB run name from which to pull the training settings" | |
) | |
parser.add_argument("--train-dataset-size", type=int, default=None, help="number of events to use from the training dataset") | |
parser.add_argument("--val-dataset-size", type=int, default=None, help="number of events to use from the validation dataset") | |
parser.add_argument("--test-dataset-max-size", type=int, default=None, help="number of events to use from the testing dataset (per signal hypothesis)") | |
parser.add_argument( | |
"-test", | |
"--data-test", | |
nargs="*", | |
default=[], | |
help="testing files; supported syntax:" | |
" (a) plain list, `--data-test /path/to/a/* /path/to/b/*`;" | |
" (b) keyword-based, `--data-test a:/path/to/a/* b:/path/to/b/*`, will produce output_a, output_b;" | |
" (c) split output per N input files, `--data-test a%10:/path/to/a/*`, will split per 10 input files", | |
) | |
######### Model and training-related arguments ######### | |
parser.add_argument( | |
"-net", | |
"--network-config", | |
type=str, | |
help="network architecture configuration file; the path must be relative to the current dir", | |
) | |
parser.add_argument( | |
"-n-blocks", | |
"--num-blocks", | |
type=int, | |
help="Number of blocks for GATr/LGATr/Transformer", | |
required=False, | |
default=10 | |
) | |
##### Transformer-specific arguments ##### | |
parser.add_argument( | |
"-internal-dim", | |
"--internal-dim", | |
type=int, | |
help="Internal dim for transformer", | |
required=False, | |
default=128 | |
) | |
parser.add_argument("--no-pid", "-np", action="store_true", help="If turned on, the PID is not going to be used as an input feature") | |
parser.add_argument( | |
"-heads", | |
"--n-heads", | |
type=int, | |
help="N attention heads for transformer", | |
required=False, | |
default=4 | |
) | |
##### L-GATr-specific arguments ##### | |
parser.add_argument( | |
"-mv-ch", | |
"--hidden-mv-channels", | |
type=int, | |
help="Hidden multivector channels for GATr and L-GATr", | |
required=False, | |
default=16 | |
) | |
parser.add_argument( | |
"-s-ch", | |
"--hidden-s-channels", | |
type=int, | |
help="Hidden scalar channels for GATr and L-GATr", | |
required=False, | |
default=64 | |
) | |
parser.add_argument( | |
"--load-model-weights", | |
type=str, | |
default=None, | |
help="initialize model with pre-trained weights", | |
) | |
parser.add_argument( | |
"--run-name", | |
type=str, | |
help="The name of the run. The wandb name and the folder it gets saved to will be this name + timestamp.", | |
) | |
parser.add_argument( | |
"--prefix", | |
type=str, | |
default="", | |
help="Path to the results folder, if empty, it will be set to the results folder in the current environment.", | |
) | |
parser.add_argument( | |
"--debug", | |
action="store_true", | |
default=False, | |
help="quickly test the setup by running over only a small number of events - use for debugging", | |
) | |
parser.add_argument( | |
"--wandb-projectname", type=str, help="project where the run is stored inside wandb", default="svj_clustering" | |
) | |
parser.add_argument("--batch-size", "-bs", type=int, default=128, help="batch size") | |
parser.add_argument("--num-epochs", type=int, default=20, help="number of epochs") | |
parser.add_argument("--num-steps", type=int, default=-1, help="Number of steps. If set to -1, it will be ignored and only num_epochs will be considered. Otherwise, training will stop after the reached number of steps.") | |
parser.add_argument( | |
"--gpus", | |
type=str, | |
default="0", | |
help='device for the training/testing; to use CPU, set to empty string (""); to use multiple gpu, set it as a comma separated list, e.g., `1,2,3,4`', | |
) | |
parser.add_argument( | |
"--num-workers", | |
type=int, | |
default=1, | |
help="number of threads to load the dataset; memory consumption and disk access load increases (~linearly) with this numbers", | |
) | |
### Loss-related arguments ### | |
parser.add_argument( | |
"--loss", | |
type=str, | |
default="oc", | |
choices=["oc", "quark_distance"], | |
help="Loss function to use (oc is object condensation, quark_distance aims to cluster things around the corresponding dark quark)" | |
) | |
parser.add_argument("--gt-radius", type=float, default=0.8, help="GT radius R - within the radius of a dark quark, GT points to the dark quark, out of the radius it's noise") | |
parser.add_argument("--attr-loss-weight", type=float, default=1.0, help="weight for the attractive loss") | |
parser.add_argument("--repul-loss-weight", type=float, default=1.0, help="weight for the repulsive loss") | |
parser.add_argument("--coord-loss-weight", type=float, default=0.0, help="weight for the coordinate loss") | |
parser.add_argument( | |
"--beta-type", | |
type=str, | |
default="default", | |
choices=["default", "pt", "pt+bc"], | |
help="How to predict betas", | |
) | |
parser.add_argument( | |
"--lorentz-norm", | |
help="Whether the norm in clustering space should be the Lorentz one (otherwise it's usual Euclidean 2-norm)", | |
action="store_true", | |
default=False, | |
) | |
parser.add_argument( | |
"--scalars-oc", | |
help="For L-GATr, use scalar virtual coordinates in the OC loss", | |
action="store_true", | |
default=False, | |
) | |
parser.add_argument( | |
"--spatial-part-only", | |
help="For L-GATr: if turned on, the spatial part is only going to be used for the loss.", | |
action="store_true", | |
default=False, | |
) | |
# defaults: --min-cluster-size 11 --min-samples 18 --epsilon 0.48 | |
parser.add_argument( | |
"--min-cluster-size", | |
help="parameter of the HDBSCAN clustering", | |
type=int, | |
default=2 | |
) | |
parser.add_argument( | |
"--min-samples", | |
help="parameter of the HDBSCAN clustering", | |
type=int, | |
default=1 | |
) | |
parser.add_argument( | |
"--parton-level", | |
help="Run on parton-level particles", | |
action="store_true" | |
) | |
parser.add_argument( | |
"--gen-level", | |
help="Run on gen-level final particles", | |
action="store_true" | |
) | |
parser.add_argument( | |
"--epsilon", | |
help="parameter of the HDBSCAN clustering", | |
type=float, | |
default=0.3 | |
) | |
parser.add_argument( | |
"-embed-as-vectors", | |
"--embed-as-vectors", | |
action="store_true", | |
default=False, | |
help="Whether to embed the input p_xyz as vectors (translations) or points", | |
) | |
#### Optimizer and LR-related arguments #### | |
parser.add_argument( | |
"--optimizer", | |
type=str, | |
default="ranger", | |
choices=["adam", "adamW", "radam", "ranger"], | |
help="optimizer for the training", | |
) | |
parser.add_argument( | |
"--optimizer-option", | |
nargs=2, | |
action="append", | |
default=[], | |
help="options to pass to the optimizer class constructor, e.g., `--optimizer-option weight_decay 1e-4`", | |
) | |
parser.add_argument( | |
"--lr-scheduler", | |
type=str, | |
default="flat+decay", | |
choices=[ | |
"none", | |
"steps", | |
"flat+decay", | |
"flat+linear", | |
"flat+cos", | |
"one-cycle", | |
"reduceplateau", | |
], | |
help="learning rate scheduler", | |
) | |
parser.add_argument("--start-lr", type=float, default=5e-3, help="start learning rate") | |
parser.add_argument("--validation-steps", type=float, default=1000, help="Run eval on validation set every x steps") | |
parser.add_argument( | |
"--backend", | |
type=str, | |
choices=["gloo", "nccl", "mpi"], | |
default=None, | |
help="backend for distributed training", | |
) | |
parser.add_argument( | |
"--log", | |
type=str, | |
default="", | |
help="path to the log file; `{auto}` can be used as part of the path to auto-generate a name, based on the timestamp and network configuration", | |
) | |
parser.add_argument( | |
"--use-amp", | |
action="store_true", | |
default=False, | |
help="use mixed precision training (fp16)", | |
) | |
# Objectness score submodel settings | |
parser.add_argument( | |
"-obj-score", | |
"--train-objectness-score", | |
action="store_true", | |
help="Whether to train the objectness classifier next to the usual clustering loss", | |
) | |
parser.add_argument( | |
"--obj-score-module", | |
default="src/models/transformer/transformer.py", | |
help="Path to the objectness score model", | |
type=str | |
) | |
parser.add_argument( | |
"-obj-score-gt", | |
"--objectness-score-gt-mode", | |
default="all_in_radius", | |
choices=["all_in_radius", "closest_only"], | |
help="Whether to train the objectness classifier next to the usual clustering loss", | |
) | |
parser.add_argument( | |
"-obj-score-weights", | |
"--load-objectness-score-weights", | |
type=str, | |
help="Ckpt file for the objectness score model", | |
default=None, | |
required=False | |
) | |
parser.add_argument( | |
"--global-features-obj-score", | |
"-global-features-os", | |
action="store_true", | |
help="Whether to use global features in the objectness score model", | |
default=False | |
) | |
parser.add_argument( | |
"--augment-soft-particles", | |
"-aug-soft", | |
help="add soft particles to the event - will add 10, 100, 1000, 10000 soft particles to the events (alternating in this order) and will split an energy of 0.5 GeV evenly among them", | |
action="store_true", | |
default=False | |
) | |
parser.add_argument( | |
"--irc-safety-loss", | |
"-irc", | |
help="add an IRC safety loss term", | |
action="store_true", | |
default=False | |
) | |