Spaces:
Sleeping
Sleeping
Merge pull request #95 from MilesCranmer/state-saving
Browse files- README.md +3 -1
- docs/start.md +3 -1
- pysr/sr.py +46 -5
- setup.py +2 -2
- test/test.py +8 -1
README.md
CHANGED
|
@@ -74,7 +74,7 @@ Most common issues at this stage are solved
|
|
| 74 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
| 75 |
to use up-to-date packages.
|
| 76 |
|
| 77 |
-
#
|
| 78 |
|
| 79 |
Let's create a PySR example. First, let's import
|
| 80 |
numpy to generate some test data:
|
|
@@ -144,6 +144,8 @@ This arrow in the `pick` column indicates which equation is currently selected b
|
|
| 144 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
| 145 |
(both of which are differentiable).
|
| 146 |
|
|
|
|
|
|
|
| 147 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
| 148 |
feature selection (e.g., `select_k_features=3`).
|
| 149 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
|
|
|
| 74 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
| 75 |
to use up-to-date packages.
|
| 76 |
|
| 77 |
+
# Introduction
|
| 78 |
|
| 79 |
Let's create a PySR example. First, let's import
|
| 80 |
numpy to generate some test data:
|
|
|
|
| 144 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
| 145 |
(both of which are differentiable).
|
| 146 |
|
| 147 |
+
Note that `PySRRegressor` stores the state of the last search, and will restart from where you left off the next time you call `.fit()`. This will cause problems if significant changes are made to the search parameters (like changing the operators). You can run `model.reset()` to reset the state.
|
| 148 |
+
|
| 149 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
| 150 |
feature selection (e.g., `select_k_features=3`).
|
| 151 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
docs/start.md
CHANGED
|
@@ -19,7 +19,7 @@ Most common issues at this stage are solved
|
|
| 19 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
| 20 |
to use up-to-date packages.
|
| 21 |
|
| 22 |
-
#
|
| 23 |
|
| 24 |
Let's create a PySR example. First, let's import
|
| 25 |
numpy to generate some test data:
|
|
@@ -89,6 +89,8 @@ This arrow in the `pick` column indicates which equation is currently selected b
|
|
| 89 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
| 90 |
(both of which are differentiable).
|
| 91 |
|
|
|
|
|
|
|
| 92 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
| 93 |
feature selection (e.g., `select_k_features=3`).
|
| 94 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
|
|
|
| 19 |
by [tweaking the Julia package server](https://github.com/MilesCranmer/PySR/issues/27).
|
| 20 |
to use up-to-date packages.
|
| 21 |
|
| 22 |
+
# Introduction
|
| 23 |
|
| 24 |
Let's create a PySR example. First, let's import
|
| 25 |
numpy to generate some test data:
|
|
|
|
| 89 |
SymPy format (`sympy_format`), and even JAX and PyTorch format
|
| 90 |
(both of which are differentiable).
|
| 91 |
|
| 92 |
+
Note that `PySRRegressor` stores the state of the last search, and will restart from where you left off the next time you call `.fit()`. This will cause problems if significant changes are made to the search parameters (like changing the operators). You can run `model.reset()` to reset the state.
|
| 93 |
+
|
| 94 |
There are several other useful features such as denoising (e.g., `denoising=True`),
|
| 95 |
feature selection (e.g., `select_k_features=3`).
|
| 96 |
For a summary of features and options, see [this docs page](https://pysr.readthedocs.io/en/latest/docs/options/).
|
pysr/sr.py
CHANGED
|
@@ -12,6 +12,8 @@ from datetime import datetime
|
|
| 12 |
import warnings
|
| 13 |
from multiprocessing import cpu_count
|
| 14 |
from sklearn.base import BaseEstimator, RegressorMixin
|
|
|
|
|
|
|
| 15 |
|
| 16 |
is_julia_warning_silenced = False
|
| 17 |
|
|
@@ -320,7 +322,7 @@ def _write_project_file(tmp_dir):
|
|
| 320 |
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
|
| 321 |
|
| 322 |
[compat]
|
| 323 |
-
SymbolicRegression = "0.7.
|
| 324 |
julia = "1.5"
|
| 325 |
"""
|
| 326 |
|
|
@@ -636,9 +638,10 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 636 |
|
| 637 |
# Stored equations:
|
| 638 |
self.equations = None
|
|
|
|
|
|
|
| 639 |
|
| 640 |
self.multioutput = None
|
| 641 |
-
self.raw_julia_output = None
|
| 642 |
self.equation_file = equation_file
|
| 643 |
self.n_features = None
|
| 644 |
self.extra_sympy_mappings = extra_sympy_mappings
|
|
@@ -654,7 +657,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 654 |
self.surface_parameters = [
|
| 655 |
"model_selection",
|
| 656 |
"multioutput",
|
| 657 |
-
"raw_julia_output",
|
| 658 |
"equation_file",
|
| 659 |
"n_features",
|
| 660 |
"extra_sympy_mappings",
|
|
@@ -727,7 +729,6 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 727 |
else:
|
| 728 |
self.params[key] = value
|
| 729 |
|
| 730 |
-
self.refresh()
|
| 731 |
return self
|
| 732 |
|
| 733 |
def get_params(self, deep=True):
|
|
@@ -858,6 +859,12 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 858 |
return [eq["torch_format"] for eq in best]
|
| 859 |
return best["torch_format"]
|
| 860 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 861 |
def _run(self, X, y, weights, variable_names):
|
| 862 |
global already_ran
|
| 863 |
global Main
|
|
@@ -1046,6 +1053,38 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1046 |
float(weightDoNothing),
|
| 1047 |
]
|
| 1048 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1049 |
options = Main.Options(
|
| 1050 |
binary_operators=Main.eval(str(tuple(binary_operators)).replace("'", "")),
|
| 1051 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
|
@@ -1085,6 +1124,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1085 |
optimizer_iterations=self.params["optimizer_iterations"],
|
| 1086 |
perturbationFactor=self.params["perturbationFactor"],
|
| 1087 |
annealing=self.params["annealing"],
|
|
|
|
| 1088 |
)
|
| 1089 |
|
| 1090 |
np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[
|
|
@@ -1106,7 +1146,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1106 |
|
| 1107 |
cprocs = 0 if multithreading else procs
|
| 1108 |
|
| 1109 |
-
self.
|
| 1110 |
Main.X,
|
| 1111 |
Main.y,
|
| 1112 |
weights=Main.weights,
|
|
@@ -1119,6 +1159,7 @@ class PySRRegressor(BaseEstimator, RegressorMixin):
|
|
| 1119 |
options=options,
|
| 1120 |
numprocs=int(cprocs),
|
| 1121 |
multithreading=bool(multithreading),
|
|
|
|
| 1122 |
)
|
| 1123 |
|
| 1124 |
self.variable_names = variable_names
|
|
|
|
| 12 |
import warnings
|
| 13 |
from multiprocessing import cpu_count
|
| 14 |
from sklearn.base import BaseEstimator, RegressorMixin
|
| 15 |
+
from collections import OrderedDict
|
| 16 |
+
from hashlib import sha256
|
| 17 |
|
| 18 |
is_julia_warning_silenced = False
|
| 19 |
|
|
|
|
| 322 |
SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
|
| 323 |
|
| 324 |
[compat]
|
| 325 |
+
SymbolicRegression = "0.7.3"
|
| 326 |
julia = "1.5"
|
| 327 |
"""
|
| 328 |
|
|
|
|
| 638 |
|
| 639 |
# Stored equations:
|
| 640 |
self.equations = None
|
| 641 |
+
self.params_hash = None
|
| 642 |
+
self.raw_julia_state = None
|
| 643 |
|
| 644 |
self.multioutput = None
|
|
|
|
| 645 |
self.equation_file = equation_file
|
| 646 |
self.n_features = None
|
| 647 |
self.extra_sympy_mappings = extra_sympy_mappings
|
|
|
|
| 657 |
self.surface_parameters = [
|
| 658 |
"model_selection",
|
| 659 |
"multioutput",
|
|
|
|
| 660 |
"equation_file",
|
| 661 |
"n_features",
|
| 662 |
"extra_sympy_mappings",
|
|
|
|
| 729 |
else:
|
| 730 |
self.params[key] = value
|
| 731 |
|
|
|
|
| 732 |
return self
|
| 733 |
|
| 734 |
def get_params(self, deep=True):
|
|
|
|
| 859 |
return [eq["torch_format"] for eq in best]
|
| 860 |
return best["torch_format"]
|
| 861 |
|
| 862 |
+
def reset(self):
|
| 863 |
+
"""Reset the search state."""
|
| 864 |
+
self.equations = None
|
| 865 |
+
self.params_hash = None
|
| 866 |
+
self.raw_julia_state = None
|
| 867 |
+
|
| 868 |
def _run(self, X, y, weights, variable_names):
|
| 869 |
global already_ran
|
| 870 |
global Main
|
|
|
|
| 1053 |
float(weightDoNothing),
|
| 1054 |
]
|
| 1055 |
|
| 1056 |
+
params_to_hash = {
|
| 1057 |
+
**{k: self.__getattribute__(k) for k in self.surface_parameters},
|
| 1058 |
+
**self.params,
|
| 1059 |
+
}
|
| 1060 |
+
params_excluded_from_hash = [
|
| 1061 |
+
"niterations",
|
| 1062 |
+
]
|
| 1063 |
+
# Delete these^ from params_to_hash:
|
| 1064 |
+
params_to_hash = {
|
| 1065 |
+
k: v
|
| 1066 |
+
for k, v in params_to_hash.items()
|
| 1067 |
+
if k not in params_excluded_from_hash
|
| 1068 |
+
}
|
| 1069 |
+
|
| 1070 |
+
# Sort params_to_hash by key:
|
| 1071 |
+
params_to_hash = OrderedDict(sorted(params_to_hash.items()))
|
| 1072 |
+
# Hash all parameters:
|
| 1073 |
+
cur_hash = sha256(str(params_to_hash).encode()).hexdigest()
|
| 1074 |
+
|
| 1075 |
+
if self.params_hash is not None:
|
| 1076 |
+
if cur_hash != self.params_hash:
|
| 1077 |
+
warnings.warn(
|
| 1078 |
+
"Warning: PySR options have changed since the last run. "
|
| 1079 |
+
"This is experimental and may not work. "
|
| 1080 |
+
"For example, if the operators change, or even their order,"
|
| 1081 |
+
" the saved equations will be in the wrong format."
|
| 1082 |
+
"\n\n"
|
| 1083 |
+
"To reset the search state, run `.reset()`. "
|
| 1084 |
+
)
|
| 1085 |
+
|
| 1086 |
+
self.params_hash = cur_hash
|
| 1087 |
+
|
| 1088 |
options = Main.Options(
|
| 1089 |
binary_operators=Main.eval(str(tuple(binary_operators)).replace("'", "")),
|
| 1090 |
unary_operators=Main.eval(str(tuple(unary_operators)).replace("'", "")),
|
|
|
|
| 1124 |
optimizer_iterations=self.params["optimizer_iterations"],
|
| 1125 |
perturbationFactor=self.params["perturbationFactor"],
|
| 1126 |
annealing=self.params["annealing"],
|
| 1127 |
+
stateReturn=True, # Required for state saving.
|
| 1128 |
)
|
| 1129 |
|
| 1130 |
np_dtype = {16: np.float16, 32: np.float32, 64: np.float64}[
|
|
|
|
| 1146 |
|
| 1147 |
cprocs = 0 if multithreading else procs
|
| 1148 |
|
| 1149 |
+
self.raw_julia_state = Main.EquationSearch(
|
| 1150 |
Main.X,
|
| 1151 |
Main.y,
|
| 1152 |
weights=Main.weights,
|
|
|
|
| 1159 |
options=options,
|
| 1160 |
numprocs=int(cprocs),
|
| 1161 |
multithreading=bool(multithreading),
|
| 1162 |
+
saved_state=self.raw_julia_state,
|
| 1163 |
)
|
| 1164 |
|
| 1165 |
self.variable_names = variable_names
|
setup.py
CHANGED
|
@@ -8,14 +8,14 @@ except FileNotFoundError:
|
|
| 8 |
|
| 9 |
setuptools.setup(
|
| 10 |
name="pysr",
|
| 11 |
-
version="0.7.0",
|
| 12 |
author="Miles Cranmer",
|
| 13 |
author_email="[email protected]",
|
| 14 |
description="Simple and efficient symbolic regression",
|
| 15 |
long_description=long_description,
|
| 16 |
long_description_content_type="text/markdown",
|
| 17 |
url="https://github.com/MilesCranmer/pysr",
|
| 18 |
-
install_requires=["julia", "numpy", "pandas", "sympy", "scikit-learn"],
|
| 19 |
packages=setuptools.find_packages(),
|
| 20 |
package_data={"pysr": ["../Project.toml", "../datasets/*"]},
|
| 21 |
include_package_data=False,
|
|
|
|
| 8 |
|
| 9 |
setuptools.setup(
|
| 10 |
name="pysr",
|
| 11 |
+
version="0.7.0-1",
|
| 12 |
author="Miles Cranmer",
|
| 13 |
author_email="[email protected]",
|
| 14 |
description="Simple and efficient symbolic regression",
|
| 15 |
long_description=long_description,
|
| 16 |
long_description_content_type="text/markdown",
|
| 17 |
url="https://github.com/MilesCranmer/pysr",
|
| 18 |
+
install_requires=["julia>=0.5.7", "numpy", "pandas", "sympy", "scikit-learn"],
|
| 19 |
packages=setuptools.find_packages(),
|
| 20 |
package_data={"pysr": ["../Project.toml", "../datasets/*"]},
|
| 21 |
include_package_data=False,
|
test/test.py
CHANGED
|
@@ -77,7 +77,7 @@ class TestPipeline(unittest.TestCase):
|
|
| 77 |
model.predict(self.X)[:, 1], self.X[:, 1] ** 2, decimal=4
|
| 78 |
)
|
| 79 |
|
| 80 |
-
def
|
| 81 |
X = np.random.randn(100, 1)
|
| 82 |
y = X[:, 0] + 3.0
|
| 83 |
regressor = PySRRegressor(
|
|
@@ -94,6 +94,13 @@ class TestPipeline(unittest.TestCase):
|
|
| 94 |
self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
|
| 95 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
# Tweak model selection:
|
| 98 |
regressor.set_params(model_selection="best")
|
| 99 |
self.assertEqual(regressor.get_params()["model_selection"], "best")
|
|
|
|
| 77 |
model.predict(self.X)[:, 1], self.X[:, 1] ** 2, decimal=4
|
| 78 |
)
|
| 79 |
|
| 80 |
+
def test_empty_operators_single_input_multirun(self):
|
| 81 |
X = np.random.randn(100, 1)
|
| 82 |
y = X[:, 0] + 3.0
|
| 83 |
regressor = PySRRegressor(
|
|
|
|
| 94 |
self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
|
| 95 |
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 96 |
|
| 97 |
+
# Test if repeated fit works:
|
| 98 |
+
regressor.set_params(niterations=0)
|
| 99 |
+
regressor.fit(X, y)
|
| 100 |
+
|
| 101 |
+
self.assertLessEqual(regressor.equations.iloc[-1]["loss"], 1e-4)
|
| 102 |
+
np.testing.assert_almost_equal(regressor.predict(X), y, decimal=1)
|
| 103 |
+
|
| 104 |
# Tweak model selection:
|
| 105 |
regressor.set_params(model_selection="best")
|
| 106 |
self.assertEqual(regressor.get_params()["model_selection"], "best")
|