File size: 3,415 Bytes
9fd1204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import time
from dataclasses import dataclass
from enum import Enum

import torch

from finetrainers.constants import FINETRAINERS_ENABLE_TIMING
from finetrainers.logging import get_logger


logger = get_logger()


class TimerDevice(str, Enum):
    CPU = "cpu"
    CUDA = "cuda"


@dataclass
class TimerData:
    name: str
    device: TimerDevice
    start_time: float = 0.0
    end_time: float = 0.0


class Timer:
    def __init__(self, name: str, device: TimerDevice, device_sync: bool = False):
        self.data = TimerData(name=name, device=device)

        self._device_sync = device_sync
        self._start_event = None
        self._end_event = None
        self._active = False
        self._enabled = FINETRAINERS_ENABLE_TIMING

    def __enter__(self):
        self.start()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end()
        return False

    def start(self):
        if self._active:
            logger.warning(f"Timer {self.data.name} is already running. Please stop it before starting again.")
            return
        self._active = True
        if not self._enabled:
            return
        if self.data.device == TimerDevice.CUDA and torch.cuda.is_available():
            self._start_cuda()
        else:
            self._start_cpu()
            if not self.data.device == TimerDevice.CPU:
                logger.warning(
                    f"Timer device {self.data.device} is either not supported or incorrect device selected. Falling back to CPU."
                )

    def end(self):
        if not self._active:
            logger.warning(f"Timer {self.data.name} is not running. Please start it before stopping.")
            return
        self._active = False
        if not self._enabled:
            return
        if self.data.device == TimerDevice.CUDA and torch.cuda.is_available():
            self._end_cuda()
        else:
            self._end_cpu()
            if not self.data.device == TimerDevice.CPU:
                logger.warning(
                    f"Timer device {self.data.device} is either not supported or incorrect device selected. Falling back to CPU."
                )

    @property
    def elapsed_time(self) -> float:
        if self._active:
            if self.data.device == TimerDevice.CUDA and torch.cuda.is_available():
                premature_end_event = torch.cuda.Event(enable_timing=True)
                premature_end_event.record()
                premature_end_event.synchronize()
                return self._start_event.elapsed_time(premature_end_event) / 1000.0
            else:
                return time.time() - self.data.start_time
        else:
            if self.data.device == TimerDevice.CUDA and torch.cuda.is_available():
                return self._start_event.elapsed_time(self._end_event) / 1000.0
            else:
                return self.data.end_time - self.data.start_time

    def _start_cpu(self):
        self.data.start_time = time.time()

    def _start_cuda(self):
        torch.cuda.synchronize()
        self._start_event = torch.cuda.Event(enable_timing=True)
        self._end_event = torch.cuda.Event(enable_timing=True)
        self._start_event.record()

    def _end_cpu(self):
        self.data.end_time = time.time()

    def _end_cuda(self):
        if self._device_sync:
            torch.cuda.synchronize()
        self._end_event.record()