darabos commited on
Commit
bbd029e
·
1 Parent(s): a961ac6

Split ops from infrastructure code.

Browse files
lynxkite-graph-analytics/src/lynxkite_graph_analytics/__init__.py CHANGED
@@ -13,7 +13,7 @@ pd.options.mode.copy_on_write = True # Prepare for Pandas 3.0.
13
  from .core import * # noqa (easier access for core classes)
14
  from . import lynxkite_ops # noqa (imported to trigger registration)
15
  from . import networkx_ops # noqa (imported to trigger registration)
16
- from . import pytorch_model_ops # noqa (imported to trigger registration)
17
 
18
  if os.environ.get("LYNXKITE_BIONEMO_INSTALLED", "").strip().lower() == "true":
19
  from . import bionemo_ops # noqa (imported to trigger registration)
 
13
  from .core import * # noqa (easier access for core classes)
14
  from . import lynxkite_ops # noqa (imported to trigger registration)
15
  from . import networkx_ops # noqa (imported to trigger registration)
16
+ from . import pytorch # noqa (imported to trigger registration)
17
 
18
  if os.environ.get("LYNXKITE_BIONEMO_INSTALLED", "").strip().lower() == "true":
19
  from . import bionemo_ops # noqa (imported to trigger registration)
lynxkite-graph-analytics/src/lynxkite_graph_analytics/lynxkite_ops.py CHANGED
@@ -8,7 +8,7 @@ from lynxkite.core import ops
8
  from collections import deque
9
 
10
  from tqdm import tqdm
11
- from . import core, pytorch_model_ops
12
  from lynxkite.core import workspace
13
  import grandcypher
14
  import joblib
@@ -347,7 +347,7 @@ def define_model(
347
  assert model_workspace, "Model workspace is unset."
348
  ws = load_ws(model_workspace)
349
  # Build the model without inputs, to get its interface.
350
- m = pytorch_model_ops.build_model(ws)
351
  m.source_workspace = model_workspace
352
  bundle = bundle.copy()
353
  bundle.other[save_as] = m
@@ -356,15 +356,15 @@ def define_model(
356
 
357
  # These contain the same mapping, but they get different UIs.
358
  # For inputs, you select existing columns. For outputs, you can create new columns.
359
- class ModelInferenceInputMapping(pytorch_model_ops.ModelMapping):
360
  pass
361
 
362
 
363
- class ModelTrainingInputMapping(pytorch_model_ops.ModelMapping):
364
  pass
365
 
366
 
367
- class ModelOutputMapping(pytorch_model_ops.ModelMapping):
368
  pass
369
 
370
 
@@ -379,7 +379,7 @@ def train_model(
379
  ):
380
  """Trains the selected model on the selected dataset. Most training parameters are set in the model definition."""
381
  m = bundle.other[model_name].copy()
382
- inputs = pytorch_model_ops.to_tensors(bundle, input_mapping)
383
  t = tqdm(range(epochs), desc="Training model")
384
  losses = []
385
  for _ in t:
@@ -406,7 +406,7 @@ def model_inference(
406
  return ops.Result(bundle, error="Mapping is unset.")
407
  m = bundle.other[model_name]
408
  assert m.trained, "The model is not trained."
409
- inputs = pytorch_model_ops.to_tensors(bundle, input_mapping)
410
  outputs = m.inference(inputs)
411
  bundle = bundle.copy()
412
  copied = set()
 
8
  from collections import deque
9
 
10
  from tqdm import tqdm
11
+ from . import core, pytorch
12
  from lynxkite.core import workspace
13
  import grandcypher
14
  import joblib
 
347
  assert model_workspace, "Model workspace is unset."
348
  ws = load_ws(model_workspace)
349
  # Build the model without inputs, to get its interface.
350
+ m = pytorch.core.build_model(ws)
351
  m.source_workspace = model_workspace
352
  bundle = bundle.copy()
353
  bundle.other[save_as] = m
 
356
 
357
  # These contain the same mapping, but they get different UIs.
358
  # For inputs, you select existing columns. For outputs, you can create new columns.
359
+ class ModelInferenceInputMapping(pytorch.core.ModelMapping):
360
  pass
361
 
362
 
363
+ class ModelTrainingInputMapping(pytorch.core.ModelMapping):
364
  pass
365
 
366
 
367
+ class ModelOutputMapping(pytorch.core.ModelMapping):
368
  pass
369
 
370
 
 
379
  ):
380
  """Trains the selected model on the selected dataset. Most training parameters are set in the model definition."""
381
  m = bundle.other[model_name].copy()
382
+ inputs = pytorch.core.to_tensors(bundle, input_mapping)
383
  t = tqdm(range(epochs), desc="Training model")
384
  losses = []
385
  for _ in t:
 
406
  return ops.Result(bundle, error="Mapping is unset.")
407
  m = bundle.other[model_name]
408
  assert m.trained, "The model is not trained."
409
+ inputs = pytorch.core.to_tensors(bundle, input_mapping)
410
  outputs = m.inference(inputs)
411
  bundle = bundle.copy()
412
  copied = set()
lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from . import core # noqa
2
+ from . import ops # noqa
lynxkite-graph-analytics/src/lynxkite_graph_analytics/{pytorch_model_ops.py → pytorch/core.py} RENAMED
@@ -1,16 +1,14 @@
1
  """Boxes for defining PyTorch models."""
2
 
3
  import copy
4
- import enum
5
  import graphlib
6
 
7
  import pydantic
8
  from lynxkite.core import ops, workspace
9
- from lynxkite.core.ops import Parameter as P
10
  import torch
11
  import torch_geometric.nn as pyg_nn
12
  import dataclasses
13
- from . import core
14
 
15
  ENV = "PyTorch model"
16
 
@@ -42,117 +40,6 @@ def reg(name, inputs=[], outputs=None, params=[]):
42
  )
43
 
44
 
45
- reg("Input: tensor", outputs=["output"], params=[P.basic("name")])
46
- reg("Input: graph edges", outputs=["edges"])
47
- reg("Input: sequential", outputs=["y"])
48
-
49
- reg("LSTM", inputs=["x", "h"], outputs=["x", "h"])
50
- reg(
51
- "Neural ODE",
52
- inputs=["x"],
53
- params=[
54
- P.basic("relative_tolerance"),
55
- P.basic("absolute_tolerance"),
56
- P.options(
57
- "method",
58
- [
59
- "dopri8",
60
- "dopri5",
61
- "bosh3",
62
- "fehlberg2",
63
- "adaptive_heun",
64
- "euler",
65
- "midpoint",
66
- "rk4",
67
- "explicit_adams",
68
- "implicit_adams",
69
- ],
70
- ),
71
- ],
72
- )
73
-
74
-
75
- reg("Attention", inputs=["q", "k", "v"], outputs=["x", "weights"])
76
- reg("LayerNorm", inputs=["x"])
77
- reg("Dropout", inputs=["x"], params=[P.basic("p", 0.5)])
78
-
79
-
80
- @op("Linear")
81
- def linear(x, *, output_dim=1024):
82
- return pyg_nn.Linear(-1, output_dim)
83
-
84
-
85
- class ActivationTypes(enum.Enum):
86
- ReLU = "ReLU"
87
- Leaky_ReLU = "Leaky ReLU"
88
- Tanh = "Tanh"
89
- Mish = "Mish"
90
-
91
-
92
- @op("Activation")
93
- def activation(x, *, type: ActivationTypes = ActivationTypes.ReLU):
94
- return getattr(torch.nn.functional, type.name.lower().replace(" ", "_"))
95
-
96
-
97
- @op("MSE loss")
98
- def mse_loss(x, y):
99
- return torch.nn.functional.mse_loss
100
-
101
-
102
- reg("Softmax", inputs=["x"])
103
- reg(
104
- "Graph conv",
105
- inputs=["x", "edges"],
106
- outputs=["x"],
107
- params=[P.options("type", ["GCNConv", "GATConv", "GATv2Conv", "SAGEConv"])],
108
- )
109
- reg("Concatenate", inputs=["a", "b"], outputs=["x"])
110
- reg("Add", inputs=["a", "b"], outputs=["x"])
111
- reg("Subtract", inputs=["a", "b"], outputs=["x"])
112
- reg("Multiply", inputs=["a", "b"], outputs=["x"])
113
- reg("Triplet margin loss", inputs=["x", "x_pos", "x_neg"], outputs=["loss"])
114
- reg("Cross-entropy loss", inputs=["x", "y"], outputs=["loss"])
115
- reg(
116
- "Optimizer",
117
- inputs=["loss"],
118
- outputs=[],
119
- params=[
120
- P.options(
121
- "type",
122
- [
123
- "AdamW",
124
- "Adafactor",
125
- "Adagrad",
126
- "SGD",
127
- "Lion",
128
- "Paged AdamW",
129
- "Galore AdamW",
130
- ],
131
- ),
132
- P.basic("lr", 0.001),
133
- ],
134
- )
135
-
136
- ops.register_passive_op(
137
- ENV,
138
- "Repeat",
139
- inputs=[ops.Input(name="input", position="top", type="tensor")],
140
- outputs=[ops.Output(name="output", position="bottom", type="tensor")],
141
- params=[
142
- ops.Parameter.basic("times", 1, int),
143
- ops.Parameter.basic("same_weights", False, bool),
144
- ],
145
- )
146
-
147
- ops.register_passive_op(
148
- ENV,
149
- "Recurrent chain",
150
- inputs=[ops.Input(name="input", position="top", type="tensor")],
151
- outputs=[ops.Output(name="output", position="bottom", type="tensor")],
152
- params=[],
153
- )
154
-
155
-
156
  def _to_id(*strings: str) -> str:
157
  """Replaces all non-alphanumeric characters with underscores."""
158
  return "_".join("".join(c if c.isalnum() else "_" for c in s) for s in strings)
 
1
  """Boxes for defining PyTorch models."""
2
 
3
  import copy
 
4
  import graphlib
5
 
6
  import pydantic
7
  from lynxkite.core import ops, workspace
 
8
  import torch
9
  import torch_geometric.nn as pyg_nn
10
  import dataclasses
11
+ from .. import core
12
 
13
  ENV = "PyTorch model"
14
 
 
40
  )
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def _to_id(*strings: str) -> str:
44
  """Replaces all non-alphanumeric characters with underscores."""
45
  return "_".join("".join(c if c.isalnum() else "_" for c in s) for s in strings)
lynxkite-graph-analytics/src/lynxkite_graph_analytics/pytorch/ops.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Boxes for defining PyTorch models."""
2
+
3
+ import enum
4
+ from lynxkite.core import ops
5
+ from lynxkite.core.ops import Parameter as P
6
+ import torch
7
+ import torch_geometric.nn as pyg_nn
8
+ from .core import op, reg, ENV
9
+
10
+ reg("Input: tensor", outputs=["output"], params=[P.basic("name")])
11
+ reg("Input: graph edges", outputs=["edges"])
12
+ reg("Input: sequential", outputs=["y"])
13
+
14
+ reg("LSTM", inputs=["x", "h"], outputs=["x", "h"])
15
+ reg(
16
+ "Neural ODE",
17
+ inputs=["x"],
18
+ params=[
19
+ P.basic("relative_tolerance"),
20
+ P.basic("absolute_tolerance"),
21
+ P.options(
22
+ "method",
23
+ [
24
+ "dopri8",
25
+ "dopri5",
26
+ "bosh3",
27
+ "fehlberg2",
28
+ "adaptive_heun",
29
+ "euler",
30
+ "midpoint",
31
+ "rk4",
32
+ "explicit_adams",
33
+ "implicit_adams",
34
+ ],
35
+ ),
36
+ ],
37
+ )
38
+
39
+
40
+ reg("Attention", inputs=["q", "k", "v"], outputs=["x", "weights"])
41
+ reg("LayerNorm", inputs=["x"])
42
+ reg("Dropout", inputs=["x"], params=[P.basic("p", 0.5)])
43
+
44
+
45
+ @op("Linear")
46
+ def linear(x, *, output_dim=1024):
47
+ return pyg_nn.Linear(-1, output_dim)
48
+
49
+
50
+ class ActivationTypes(enum.Enum):
51
+ ReLU = "ReLU"
52
+ Leaky_ReLU = "Leaky ReLU"
53
+ Tanh = "Tanh"
54
+ Mish = "Mish"
55
+
56
+
57
+ @op("Activation")
58
+ def activation(x, *, type: ActivationTypes = ActivationTypes.ReLU):
59
+ return getattr(torch.nn.functional, type.name.lower().replace(" ", "_"))
60
+
61
+
62
+ @op("MSE loss")
63
+ def mse_loss(x, y):
64
+ return torch.nn.functional.mse_loss
65
+
66
+
67
+ reg("Softmax", inputs=["x"])
68
+ reg(
69
+ "Graph conv",
70
+ inputs=["x", "edges"],
71
+ outputs=["x"],
72
+ params=[P.options("type", ["GCNConv", "GATConv", "GATv2Conv", "SAGEConv"])],
73
+ )
74
+ reg("Concatenate", inputs=["a", "b"], outputs=["x"])
75
+ reg("Add", inputs=["a", "b"], outputs=["x"])
76
+ reg("Subtract", inputs=["a", "b"], outputs=["x"])
77
+ reg("Multiply", inputs=["a", "b"], outputs=["x"])
78
+ reg("Triplet margin loss", inputs=["x", "x_pos", "x_neg"], outputs=["loss"])
79
+ reg("Cross-entropy loss", inputs=["x", "y"], outputs=["loss"])
80
+ reg(
81
+ "Optimizer",
82
+ inputs=["loss"],
83
+ outputs=[],
84
+ params=[
85
+ P.options(
86
+ "type",
87
+ [
88
+ "AdamW",
89
+ "Adafactor",
90
+ "Adagrad",
91
+ "SGD",
92
+ "Lion",
93
+ "Paged AdamW",
94
+ "Galore AdamW",
95
+ ],
96
+ ),
97
+ P.basic("lr", 0.001),
98
+ ],
99
+ )
100
+
101
+ ops.register_passive_op(
102
+ ENV,
103
+ "Repeat",
104
+ inputs=[ops.Input(name="input", position="top", type="tensor")],
105
+ outputs=[ops.Output(name="output", position="bottom", type="tensor")],
106
+ params=[
107
+ ops.Parameter.basic("times", 1, int),
108
+ ops.Parameter.basic("same_weights", False, bool),
109
+ ],
110
+ )
111
+
112
+ ops.register_passive_op(
113
+ ENV,
114
+ "Recurrent chain",
115
+ inputs=[ops.Input(name="input", position="top", type="tensor")],
116
+ outputs=[ops.Output(name="output", position="bottom", type="tensor")],
117
+ params=[],
118
+ )
lynxkite-graph-analytics/tests/test_pytorch_model_ops.py CHANGED
@@ -1,5 +1,5 @@
1
  from lynxkite.core import workspace
2
- from lynxkite_graph_analytics import pytorch_model_ops
3
  import torch
4
  import pytest
5
 
@@ -33,11 +33,11 @@ def make_ws(env, nodes: dict[str, dict], edges: list[tuple[str, str]]):
33
  return ws
34
 
35
 
36
- def summarize_layers(m: pytorch_model_ops.ModelConfig) -> str:
37
  return "".join(str(e)[0] for e in m.model)
38
 
39
 
40
- def summarize_connections(m: pytorch_model_ops.ModelConfig) -> str:
41
  return " ".join(
42
  "".join(n[0] for n in c.param_names) + "->" + "".join(n[0] for n in c.return_names)
43
  for c in m.model._children
@@ -46,7 +46,7 @@ def summarize_connections(m: pytorch_model_ops.ModelConfig) -> str:
46
 
47
  async def test_build_model():
48
  ws = make_ws(
49
- pytorch_model_ops.ENV,
50
  {
51
  "emb": {"title": "Input: tensor"},
52
  "lin": {"title": "Linear", "output_dim": 4},
@@ -65,7 +65,7 @@ async def test_build_model():
65
  )
66
  x = torch.rand(100, 4)
67
  y = x + 1
68
- m = pytorch_model_ops.build_model(ws)
69
  for i in range(1000):
70
  loss = m.train({"emb_output": x, "label_output": y})
71
  assert loss < 0.1
@@ -77,7 +77,7 @@ async def test_build_model():
77
  async def test_build_model_with_repeat():
78
  def repeated_ws(times):
79
  return make_ws(
80
- pytorch_model_ops.ENV,
81
  {
82
  "emb": {"title": "Input: tensor"},
83
  "lin": {"title": "Linear", "output_dim": 8},
@@ -99,17 +99,17 @@ async def test_build_model_with_repeat():
99
  )
100
 
101
  # 1 repetition
102
- m = pytorch_model_ops.build_model(repeated_ws(1))
103
  assert summarize_layers(m) == "IL<II"
104
  assert summarize_connections(m) == "e->S S->l l->a a->E E->E"
105
 
106
  # 2 repetitions
107
- m = pytorch_model_ops.build_model(repeated_ws(2))
108
  assert summarize_layers(m) == "IL<IL<II"
109
  assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->E E->E"
110
 
111
  # 3 repetitions
112
- m = pytorch_model_ops.build_model(repeated_ws(3))
113
  assert summarize_layers(m) == "IL<IL<IL<II"
114
  assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->S S->l l->a a->E E->E"
115
 
 
1
  from lynxkite.core import workspace
2
+ from lynxkite_graph_analytics import pytorch
3
  import torch
4
  import pytest
5
 
 
33
  return ws
34
 
35
 
36
+ def summarize_layers(m: pytorch.core.ModelConfig) -> str:
37
  return "".join(str(e)[0] for e in m.model)
38
 
39
 
40
+ def summarize_connections(m: pytorch.core.ModelConfig) -> str:
41
  return " ".join(
42
  "".join(n[0] for n in c.param_names) + "->" + "".join(n[0] for n in c.return_names)
43
  for c in m.model._children
 
46
 
47
  async def test_build_model():
48
  ws = make_ws(
49
+ pytorch.core.ENV,
50
  {
51
  "emb": {"title": "Input: tensor"},
52
  "lin": {"title": "Linear", "output_dim": 4},
 
65
  )
66
  x = torch.rand(100, 4)
67
  y = x + 1
68
+ m = pytorch.core.build_model(ws)
69
  for i in range(1000):
70
  loss = m.train({"emb_output": x, "label_output": y})
71
  assert loss < 0.1
 
77
  async def test_build_model_with_repeat():
78
  def repeated_ws(times):
79
  return make_ws(
80
+ pytorch.core.ENV,
81
  {
82
  "emb": {"title": "Input: tensor"},
83
  "lin": {"title": "Linear", "output_dim": 8},
 
99
  )
100
 
101
  # 1 repetition
102
+ m = pytorch.core.build_model(repeated_ws(1))
103
  assert summarize_layers(m) == "IL<II"
104
  assert summarize_connections(m) == "e->S S->l l->a a->E E->E"
105
 
106
  # 2 repetitions
107
+ m = pytorch.core.build_model(repeated_ws(2))
108
  assert summarize_layers(m) == "IL<IL<II"
109
  assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->E E->E"
110
 
111
  # 3 repetitions
112
+ m = pytorch.core.build_model(repeated_ws(3))
113
  assert summarize_layers(m) == "IL<IL<IL<II"
114
  assert summarize_connections(m) == "e->S S->l l->a a->S S->l l->a a->S S->l l->a a->E E->E"
115