File size: 5,394 Bytes
96a0788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
"""
import contextlib
from contextvars import ContextVar
from io import BytesIO
from typing import Any
from typing import Callable
from typing import ParamSpec
from typing import TypeVar
from typing import cast
from unittest.mock import patch

import torch
from torch.utils._pytree import tree_map_only
from torch._inductor.package.package import package_aoti
from torch.export.pt2_archive._package import AOTICompiledModel
from torch.export.pt2_archive._package_weights import TensorProperties
from torch.export.pt2_archive._package_weights import Weights


P = ParamSpec('P')
T = TypeVar('T')


INDUCTOR_CONFIGS_OVERRIDES = {
    'aot_inductor.package_constants_in_so': False,
    'aot_inductor.package_constants_on_disk': True,
    'aot_inductor.package': True,
}


class ZeroGPUCompiledModel:
    def __init__(self, archive_file: torch.types.FileLike, weights: Weights, cuda: bool = False):
        self.archive_file = archive_file
        self.weights = weights
        if cuda:
            self.weights_to_cuda_()
        self.compiled_model: ContextVar[AOTICompiledModel | None] = ContextVar('compiled_model', default=None)
    def weights_to_cuda_(self):
        for name in self.weights:
            tensor, properties = self.weights.get_weight(name)
            self.weights[name] = (tensor.to('cuda'), properties)
    def __call__(self, *args, **kwargs):
        if (compiled_model := self.compiled_model.get()) is None:
            constants_map = {name: value[0] for name, value in self.weights.items()}
            compiled_model = cast(AOTICompiledModel, torch._inductor.aoti_load_package(self.archive_file))
            compiled_model.load_constants(constants_map, check_full_update=True, user_managed=True)
            self.compiled_model.set(compiled_model)
        return compiled_model(*args, **kwargs)
    def __reduce__(self):
        weight_dict: dict[str, tuple[torch.Tensor, TensorProperties]] = {}
        for name in self.weights:
            tensor, properties = self.weights.get_weight(name)
            tensor_ = torch.empty_like(tensor, device='cpu').pin_memory()
            weight_dict[name] = (tensor_.copy_(tensor).detach().share_memory_(), properties)
        return ZeroGPUCompiledModel, (self.archive_file, Weights(weight_dict), True)


def aoti_compile(
    exported_program: torch.export.ExportedProgram,
    inductor_configs: dict[str, Any] | None = None,
):
    inductor_configs = (inductor_configs or {}) | INDUCTOR_CONFIGS_OVERRIDES
    gm = cast(torch.fx.GraphModule, exported_program.module())
    assert exported_program.example_inputs is not None
    args, kwargs = exported_program.example_inputs
    artifacts = torch._inductor.aot_compile(gm, args, kwargs, options=inductor_configs)
    archive_file = BytesIO()
    files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
    package_aoti(archive_file, files)
    weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
    return ZeroGPUCompiledModel(archive_file, weights)


def cudagraph(fn: Callable[P, list[torch.Tensor]]):

    graphs = {}

    def fn_(*args: P.args, **kwargs: P.kwargs):

        key = hash(tuple(
            tuple(kwarg.shape)
            for a in sorted(kwargs.keys())
            if isinstance((kwarg := kwargs[a]), torch.Tensor)
        ))

        if key in graphs:
            wrapped, *_ = graphs[key]
            return wrapped(*args, **kwargs)

        graph = torch.cuda.CUDAGraph()
        in_args, in_kwargs = tree_map_only(torch.Tensor, lambda t: t.clone(), (args, kwargs))
        in_args, in_kwargs = _cast_as((args, kwargs), (in_args, in_kwargs))

        fn(*in_args, **in_kwargs)
        with torch.cuda.graph(graph):
            out_tensors = fn(*in_args, **in_kwargs)

        def wrapped(*args: P.args, **kwargs: P.kwargs):
            for a, b in zip(in_args, args):
                if isinstance(a, torch.Tensor):
                    assert isinstance(b, torch.Tensor)
                    a.copy_(b)
            for key in kwargs:
                if isinstance((kwarg := kwargs[key]), torch.Tensor):
                    assert isinstance((in_kwarg := in_kwargs[key]), torch.Tensor)
                    in_kwarg.copy_(kwarg)
            graph.replay()
            return [tensor.clone() for tensor in out_tensors]

        graphs[key] = (wrapped, graph, in_args, in_kwargs, out_tensors)
        return wrapped(*args, **kwargs)

    return fn_


@contextlib.contextmanager
def capture_component_call(
    pipeline: Any,
    component_name: str,
    component_method='forward',
):

    class CapturedCallException(Exception):
        def __init__(self, *args, **kwargs):
            super().__init__()
            self.args = args
            self.kwargs = kwargs

    class CapturedCall:
        def __init__(self):
            self.args: tuple[Any, ...] = ()
            self.kwargs: dict[str, Any] = {}

    component = getattr(pipeline, component_name)
    captured_call = CapturedCall()

    def capture_call(*args, **kwargs):
        raise CapturedCallException(*args, **kwargs)

    with patch.object(component, component_method, new=capture_call):
        try:
            yield captured_call
        except CapturedCallException as e:
            captured_call.args = e.args
            captured_call.kwargs = e.kwargs


def _cast_as(type_from: T, value: Any) -> T:
    return value