| # edited from https://github.com/fastai/imagenet-fast/blob/master/imagenet_nv/distributed.py | |
| import torch | |
| import torch.distributed as dist | |
| def reduce_tensor(tensor, num_gpus): | |
| rt = tensor.clone() | |
| dist.all_reduce(rt, op=dist.reduce_op.SUM) | |
| rt /= num_gpus | |
| return rt | |
| def init_distributed(rank, num_gpus, group_name, dist_backend, dist_url): | |
| assert torch.cuda.is_available(), "Distributed mode requires CUDA." | |
| # Set cuda device so everything is done on the right GPU. | |
| torch.cuda.set_device(rank % torch.cuda.device_count()) | |
| # Initialize distributed communication | |
| dist.init_process_group(dist_backend, init_method=dist_url, world_size=num_gpus, rank=rank, group_name=group_name) | |