Spaces:
Running
Running
Upload 31 files
Browse files- Backend/Benchmarks/Ackley.py +58 -0
- Backend/Benchmarks/Adjiman.py +44 -0
- Backend/Benchmarks/Base.py +62 -0
- Backend/Benchmarks/Brent.py +36 -0
- Backend/Benchmarks/Himmelblau.py +38 -0
- Backend/Benchmarks/__init__.py +6 -0
- Backend/Benchmarks/__pycache__/Ackley.cpython-310.pyc +0 -0
- Backend/Benchmarks/__pycache__/Adjiman.cpython-310.pyc +0 -0
- Backend/Benchmarks/__pycache__/Base.cpython-310.pyc +0 -0
- Backend/Benchmarks/__pycache__/Brent.cpython-310.pyc +0 -0
- Backend/Benchmarks/__pycache__/Himmelblau.cpython-310.pyc +0 -0
- Backend/Benchmarks/__pycache__/__init__.cpython-310.pyc +0 -0
- Backend/ML_Tasks/CIFAR10Runner.py +37 -0
- Backend/ML_Tasks/MNISTRunner.py +36 -0
- Backend/ML_Tasks/__init__.py +4 -0
- Backend/ML_Tasks/__pycache__/CIFAR10Runner.cpython-310.pyc +0 -0
- Backend/ML_Tasks/__pycache__/MNISTRunner.cpython-310.pyc +0 -0
- Backend/ML_Tasks/__pycache__/__init__.cpython-310.pyc +0 -0
- Backend/optimizers/RMSprop.py +36 -0
- Backend/optimizers/SGD.py +43 -0
- Backend/optimizers/__init__.py +13 -0
- Backend/optimizers/__pycache__/RMSprop.cpython-310.pyc +0 -0
- Backend/optimizers/__pycache__/SGD.cpython-310.pyc +0 -0
- Backend/optimizers/__pycache__/__init__.cpython-310.pyc +0 -0
- Backend/optimizers/__pycache__/adam.cpython-310.pyc +0 -0
- Backend/optimizers/__pycache__/azure_optim.cpython-310.pyc +0 -0
- Backend/optimizers/__pycache__/base.cpython-310.pyc +0 -0
- Backend/optimizers/adam.py +49 -0
- Backend/optimizers/adamw.py +39 -0
- Backend/optimizers/azure_optim.py +134 -0
- Backend/optimizers/base.py +36 -0
Backend/Benchmarks/Ackley.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ackley N 2 function Benchmark
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
from scipy.optimize import minimize
|
5 |
+
from .Base import BaseBenchmark
|
6 |
+
|
7 |
+
class AckleyN2(BaseBenchmark):
|
8 |
+
"""Ackley N 2 function benchmark."""
|
9 |
+
|
10 |
+
def __init__(self):
|
11 |
+
super().__init__()
|
12 |
+
self.name = "Ackley N 2"
|
13 |
+
self.dimensions = 10
|
14 |
+
self.global_minimum = [0] * self.dimensions
|
15 |
+
self.global_minimum_value = 0.0
|
16 |
+
|
17 |
+
@staticmethod
|
18 |
+
def evaluate(x):
|
19 |
+
"""Evaluate the Ackley N 2 function."""
|
20 |
+
a = 20
|
21 |
+
b = 0.2
|
22 |
+
c = 2 * np.pi
|
23 |
+
n = len(x)
|
24 |
+
|
25 |
+
sum1 = sum(xi**2 for xi in x)
|
26 |
+
sum2 = sum(np.cos(c * xi) for xi in x)
|
27 |
+
|
28 |
+
term1 = -a * np.exp(-b * np.sqrt(sum1 / n))
|
29 |
+
term2 = -np.exp(sum2 / n)
|
30 |
+
|
31 |
+
return term1 + term2 + a + np.exp(1)
|
32 |
+
|
33 |
+
def ackley_n2(x):
|
34 |
+
"""Ackley N 2 function."""
|
35 |
+
a = 20
|
36 |
+
b = 0.2
|
37 |
+
c = 2 * np.pi
|
38 |
+
n = len(x)
|
39 |
+
|
40 |
+
sum1 = sum(xi**2 for xi in x)
|
41 |
+
sum2 = sum(np.cos(c * xi) for xi in x)
|
42 |
+
|
43 |
+
term1 = -a * np.exp(-b * np.sqrt(sum1 / n))
|
44 |
+
term2 = -np.exp(sum2 / n)
|
45 |
+
|
46 |
+
return term1 + term2 + a + np.exp(1)
|
47 |
+
|
48 |
+
def benchmark_ackley_n2():
|
49 |
+
"""Benchmark the Ackley N 2 function."""
|
50 |
+
x0 = np.random.uniform(-5, 5, size=10)
|
51 |
+
result = minimize(ackley_n2, x0, method='BFGS')
|
52 |
+
|
53 |
+
print(f"Optimized parameters: {result.x}")
|
54 |
+
print(f"Function value at optimum: {result.fun}")
|
55 |
+
print("Optimization successful:", result.success)
|
56 |
+
|
57 |
+
if __name__ == "__main__":
|
58 |
+
benchmark_ackley_n2()
|
Backend/Benchmarks/Adjiman.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Adjiman function benchmark
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
from scipy.optimize import minimize
|
5 |
+
from .Base import BaseBenchmark
|
6 |
+
|
7 |
+
class Adjiman(BaseBenchmark):
|
8 |
+
"""Adjiman's function benchmark."""
|
9 |
+
|
10 |
+
def __init__(self):
|
11 |
+
super().__init__()
|
12 |
+
self.name = "Adjiman"
|
13 |
+
self.dimensions = 2
|
14 |
+
self.global_minimum = [0, 0]
|
15 |
+
self.global_minimum_value = 0.5
|
16 |
+
|
17 |
+
@staticmethod
|
18 |
+
def evaluate(x):
|
19 |
+
"""Evaluate Adjiman's function."""
|
20 |
+
x1, x2 = x
|
21 |
+
term1 = (x1**2 + x2**2)**0.5
|
22 |
+
term2 = np.sin(term1)
|
23 |
+
term3 = np.exp(-term1)
|
24 |
+
return 0.5 * (term1 + term2 + term3)
|
25 |
+
|
26 |
+
def adjiman(x):
|
27 |
+
"""Adjiman's function."""
|
28 |
+
x1, x2 = x
|
29 |
+
term1 = (x1**2 + x2**2)**0.5
|
30 |
+
term2 = np.sin(term1)
|
31 |
+
term3 = np.exp(-term1)
|
32 |
+
return 0.5 * (term1 + term2 + term3)
|
33 |
+
|
34 |
+
def benchmark_adjiman():
|
35 |
+
"""Benchmark the Adjiman function."""
|
36 |
+
x0 = np.random.uniform(-5, 5, size=2)
|
37 |
+
result = minimize(adjiman, x0, method='BFGS')
|
38 |
+
|
39 |
+
print(f"Optimized parameters: {result.x}")
|
40 |
+
print(f"Function value at optimum: {result.fun}")
|
41 |
+
print("Optimization successful:", result.success)
|
42 |
+
|
43 |
+
if __name__ == "__main__":
|
44 |
+
benchmark_adjiman()
|
Backend/Benchmarks/Base.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file acts as the base class for benchmarks in the Backend/Benchmarks directory. it has logic to define global variables and methods that can be used by all benchmarks. and minimizes code duplication.
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
class BaseBenchmark:
|
5 |
+
def __init__(self):
|
6 |
+
self.global_min = None
|
7 |
+
self.initial_guess = None
|
8 |
+
self.path = []
|
9 |
+
self.loss_values = []
|
10 |
+
|
11 |
+
def set_global_min(self, point):
|
12 |
+
self.global_min = point
|
13 |
+
|
14 |
+
def set_initial_guess(self, guess):
|
15 |
+
self.initial_guess = guess
|
16 |
+
|
17 |
+
def add_to_path(self, point):
|
18 |
+
self.path.append(point)
|
19 |
+
|
20 |
+
def add_loss_value(self, value):
|
21 |
+
self.loss_values.append(value)
|
22 |
+
|
23 |
+
def reset(self):
|
24 |
+
self.global_min = None
|
25 |
+
self.initial_guess = None
|
26 |
+
self.path.clear()
|
27 |
+
self.loss_values.clear()
|
28 |
+
|
29 |
+
def get_metrics(self):
|
30 |
+
if self.global_min is None or not self.path or not self.loss_values:
|
31 |
+
raise ValueError("Metrics cannot be calculated. Ensure global_min, path, and loss_values are set.")
|
32 |
+
|
33 |
+
distance = np.linalg.norm(self.path[-1] - self.global_min)
|
34 |
+
convergence_rate = len(self.path) if self.loss_values[-1] < 1e-5 else float('inf')
|
35 |
+
return {
|
36 |
+
'distance': float(distance),
|
37 |
+
'final_loss': float(self.loss_values[-1]),
|
38 |
+
'convergence_rate': convergence_rate
|
39 |
+
}
|
40 |
+
|
41 |
+
def __str__(self):
|
42 |
+
return f"BaseBenchmark(global_min={self.global_min}, initial_guess={self.initial_guess}, path_length={len(self.path)}, loss_values_length={len(self.loss_values)})"
|
43 |
+
|
44 |
+
def __repr__(self):
|
45 |
+
return f"BaseBenchmark(global_min={self.global_min}, initial_guess={self.initial_guess}, path_length={len(self.path)}, loss_values_length={len(self.loss_values)})"
|
46 |
+
|
47 |
+
def __eq__(self, other):
|
48 |
+
if not isinstance(other, BaseBenchmark):
|
49 |
+
return False
|
50 |
+
return (self.global_min == other.global_min and
|
51 |
+
self.initial_guess == other.initial_guess and
|
52 |
+
self.path == other.path and
|
53 |
+
self.loss_values == other.loss_values)
|
54 |
+
|
55 |
+
def __ne__(self, other):
|
56 |
+
return not self.__eq__(other)
|
57 |
+
|
58 |
+
def __hash__(self):
|
59 |
+
return hash((self.global_min, tuple(self.initial_guess), tuple(self.path), tuple(self.loss_values)))
|
60 |
+
|
61 |
+
def __len__(self):
|
62 |
+
return len(self.path)
|
Backend/Benchmarks/Brent.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Brent function benchmark
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import scipy.optimize as opt
|
5 |
+
from .Base import BaseBenchmark
|
6 |
+
|
7 |
+
class Brent(BaseBenchmark):
|
8 |
+
"""Brent's function benchmark."""
|
9 |
+
|
10 |
+
def __init__(self):
|
11 |
+
super().__init__()
|
12 |
+
self.name = "Brent"
|
13 |
+
self.dimensions = 1
|
14 |
+
self.global_minimum = 1.0
|
15 |
+
self.global_minimum_value = 0.0
|
16 |
+
|
17 |
+
@staticmethod
|
18 |
+
def evaluate(x):
|
19 |
+
"""Evaluate Brent's function."""
|
20 |
+
return (x - 1)**2 * (x + 1)**2 * (x - 2)**2
|
21 |
+
|
22 |
+
def brent_function(x):
|
23 |
+
"""Brent's function."""
|
24 |
+
return (x - 1)**2 * (x + 1)**2 * (x - 2)**2
|
25 |
+
|
26 |
+
def benchmark_brent():
|
27 |
+
"""Benchmark the Brent function."""
|
28 |
+
np.random.uniform (-2, 2)
|
29 |
+
result = opt.minimize_scalar(brent_function, bounds=(-2, 2), method='bounded')
|
30 |
+
|
31 |
+
print(f"Optimized parameter: {result.x}")
|
32 |
+
print(f"Function value at optimum: {result.fun}")
|
33 |
+
print("Optimization successful:", result.success)
|
34 |
+
|
35 |
+
if __name__ == "__main__":
|
36 |
+
benchmark_brent()
|
Backend/Benchmarks/Himmelblau.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Himmelblau function benchmark
|
2 |
+
|
3 |
+
from time import time
|
4 |
+
from .Base import BaseBenchmark
|
5 |
+
from numpy.random import default_rng
|
6 |
+
from scipy.optimize import minimize
|
7 |
+
|
8 |
+
class Himmelblau(BaseBenchmark):
|
9 |
+
"""Himmelblau's function benchmark."""
|
10 |
+
|
11 |
+
def __init__(self):
|
12 |
+
super().__init__()
|
13 |
+
self.name = "Himmelblau"
|
14 |
+
self.dimensions = 2
|
15 |
+
self.global_minimum = [3, 2]
|
16 |
+
self.global_minimum_value = 0
|
17 |
+
|
18 |
+
@staticmethod
|
19 |
+
def evaluate(x):
|
20 |
+
"""Evaluate the Himmelblau function."""
|
21 |
+
return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
|
22 |
+
|
23 |
+
|
24 |
+
def himmelblau(x):
|
25 |
+
"""Himmelblau's function."""
|
26 |
+
return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
|
27 |
+
|
28 |
+
def benchmark_himmelblau():
|
29 |
+
"""Benchmark the Himmelblau function."""
|
30 |
+
rng = default_rng()
|
31 |
+
x0 = rng.uniform(-5, 5, size=2)
|
32 |
+
start_time = time()
|
33 |
+
result = minimize(himmelblau, x0, method='BFGS')
|
34 |
+
end_time = time()
|
35 |
+
|
36 |
+
print(f"Optimized parameters: {result.x}")
|
37 |
+
print(f"Function value at optimum: {result.fun}")
|
38 |
+
print(f"Time taken: {end_time - start_time:.4f} seconds")
|
Backend/Benchmarks/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .Base import BaseBenchmark
|
2 |
+
from .Himmelblau import Himmelblau
|
3 |
+
from .Ackley import AckleyN2
|
4 |
+
from .Adjiman import Adjiman
|
5 |
+
from .Brent import Brent
|
6 |
+
__all__ = ['BaseBenchmark', 'Himmelblau', 'AckleyN2', 'Adjiman', 'Brent']
|
Backend/Benchmarks/__pycache__/Ackley.cpython-310.pyc
ADDED
Binary file (2.24 kB). View file
|
|
Backend/Benchmarks/__pycache__/Adjiman.cpython-310.pyc
ADDED
Binary file (1.64 kB). View file
|
|
Backend/Benchmarks/__pycache__/Base.cpython-310.pyc
ADDED
Binary file (2.85 kB). View file
|
|
Backend/Benchmarks/__pycache__/Brent.cpython-310.pyc
ADDED
Binary file (1.56 kB). View file
|
|
Backend/Benchmarks/__pycache__/Himmelblau.cpython-310.pyc
ADDED
Binary file (1.69 kB). View file
|
|
Backend/Benchmarks/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (382 Bytes). View file
|
|
Backend/ML_Tasks/CIFAR10Runner.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# CIFAR Dataset implementation in Pytorch
|
2 |
+
|
3 |
+
from torch.utils.data import DataLoader
|
4 |
+
from torchvision import datasets, transforms
|
5 |
+
|
6 |
+
|
7 |
+
def load_cifar10(batch_size=64, num_workers=2, download=True):
|
8 |
+
"""Load CIFAR-10 dataset."""
|
9 |
+
transform = transforms.Compose([
|
10 |
+
transforms.ToTensor(),
|
11 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
12 |
+
])
|
13 |
+
|
14 |
+
train_dataset = datasets.CIFAR10(root='./data', train=True, download=download, transform=transform)
|
15 |
+
test_dataset = datasets.CIFAR10(root='./data', train=False, download=download, transform=transform)
|
16 |
+
|
17 |
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
|
18 |
+
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
|
19 |
+
|
20 |
+
return train_loader, test_loader
|
21 |
+
|
22 |
+
def run_cifar10():
|
23 |
+
"""Run CIFAR-10 dataset loading and basic iteration."""
|
24 |
+
train_loader, test_loader = load_cifar10(batch_size=64, num_workers=2, download=True)
|
25 |
+
|
26 |
+
# Example: Iterate through the training data
|
27 |
+
for images, labels in train_loader:
|
28 |
+
print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
|
29 |
+
break # Remove this break to iterate through all batches
|
30 |
+
|
31 |
+
if __name__ == "__main__":
|
32 |
+
train_loader, test_loader = load_cifar10(batch_size=64, num_workers=2, download=True)
|
33 |
+
|
34 |
+
# Example: Iterate through the training data
|
35 |
+
for images, labels in train_loader:
|
36 |
+
print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
|
37 |
+
break # Remove this break to iterate through all batches
|
Backend/ML_Tasks/MNISTRunner.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Fashion MNIST dataset classification using PyTorch
|
2 |
+
|
3 |
+
from torch.utils.data import DataLoader
|
4 |
+
from torchvision import datasets, transforms
|
5 |
+
|
6 |
+
def load_fashion_mnist(batch_size=64, num_workers=2, download=True):
|
7 |
+
"""Load Fashion MNIST dataset."""
|
8 |
+
transform = transforms.Compose([
|
9 |
+
transforms.ToTensor(),
|
10 |
+
transforms.Normalize((0.5,), (0.5,))
|
11 |
+
])
|
12 |
+
|
13 |
+
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=download, transform=transform)
|
14 |
+
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=download, transform=transform)
|
15 |
+
|
16 |
+
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
|
17 |
+
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
|
18 |
+
|
19 |
+
return train_loader, test_loader
|
20 |
+
|
21 |
+
def run_fashion_mnist():
|
22 |
+
"""Run Fashion MNIST dataset loading and basic iteration."""
|
23 |
+
train_loader, test_loader = load_fashion_mnist(batch_size=64, num_workers=2, download=True)
|
24 |
+
|
25 |
+
# Example: Iterate through the training data
|
26 |
+
for images, labels in train_loader:
|
27 |
+
print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
|
28 |
+
break # Remove this break to iterate through all batches
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
train_loader, test_loader = load_fashion_mnist(batch_size=64, num_workers=2, download=True)
|
32 |
+
|
33 |
+
# Example: Iterate through the training data
|
34 |
+
for images, labels in train_loader:
|
35 |
+
print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
|
36 |
+
break # Remove this break to iterate through all batches
|
Backend/ML_Tasks/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .CIFAR10Runner import load_cifar10, run_cifar10
|
2 |
+
from .MNISTRunner import load_fashion_mnist, run_fashion_mnist
|
3 |
+
|
4 |
+
__all__ = ['load_cifar10', 'run_cifar10', 'load_fashion_mnist', 'run_fashion_mnist']
|
Backend/ML_Tasks/__pycache__/CIFAR10Runner.cpython-310.pyc
ADDED
Binary file (1.31 kB). View file
|
|
Backend/ML_Tasks/__pycache__/MNISTRunner.cpython-310.pyc
ADDED
Binary file (1.32 kB). View file
|
|
Backend/ML_Tasks/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (357 Bytes). View file
|
|
Backend/optimizers/RMSprop.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# RmsProp optimizer implementation
|
2 |
+
from abc import ABC
|
3 |
+
|
4 |
+
from .base import BaseOptimizer
|
5 |
+
|
6 |
+
class RMSpropOptimizer(BaseOptimizer, ABC):
|
7 |
+
"""
|
8 |
+
RMSprop optimizer implementation.
|
9 |
+
This optimizer uses a moving average of squared gradients to normalize the gradient.
|
10 |
+
"""
|
11 |
+
|
12 |
+
def __init__(self, params, lr=0.001, alpha=0.99, eps=1e-8):
|
13 |
+
self.params = params
|
14 |
+
self.lr = lr
|
15 |
+
self.alpha = alpha
|
16 |
+
self.eps = eps
|
17 |
+
self.state = {p: {'mean_square': 0} for p in params}
|
18 |
+
|
19 |
+
def step(self):
|
20 |
+
for p in self.params:
|
21 |
+
if p.grad is None:
|
22 |
+
continue
|
23 |
+
|
24 |
+
state = self.state[p]
|
25 |
+
state['mean_square'] = self.alpha * state['mean_square'] + (1 - self.alpha) * (p.grad ** 2)
|
26 |
+
p.data -= self.lr * p.grad / (state['mean_square'].sqrt() + self.eps)
|
27 |
+
|
28 |
+
def zero_grad(self):
|
29 |
+
for p in self.params:
|
30 |
+
p.grad = 0
|
31 |
+
|
32 |
+
def __repr__(self):
|
33 |
+
return f"RMSpropOptimizer(lr={self.lr}, alpha={self.alpha}, eps={self.eps})"
|
34 |
+
|
35 |
+
def state_dict(self):
|
36 |
+
return {p: {'mean_square': state['mean_square']} for p, state in self.state.items()}
|
Backend/optimizers/SGD.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SGD implementation
|
2 |
+
|
3 |
+
from .base import BaseOptimizer
|
4 |
+
|
5 |
+
class SGDOptimizer(BaseOptimizer):
|
6 |
+
"""
|
7 |
+
Stochastic Gradient Descent (SGD) optimizer implementation.
|
8 |
+
This optimizer updates parameters using the gradient of the loss function.
|
9 |
+
"""
|
10 |
+
|
11 |
+
def __init__(self, params, lr=0.01, momentum=0.0):
|
12 |
+
self.params = params
|
13 |
+
self.lr = lr
|
14 |
+
self.momentum = momentum
|
15 |
+
self.state = {p: {'velocity': 0} for p in params}
|
16 |
+
|
17 |
+
def step(self):
|
18 |
+
for p in self.params:
|
19 |
+
if p.grad is None:
|
20 |
+
continue
|
21 |
+
|
22 |
+
state = self.state[p]
|
23 |
+
state['velocity'] = self.momentum * state['velocity'] - self.lr * p.grad
|
24 |
+
p.data += state['velocity']
|
25 |
+
|
26 |
+
def zero_grad(self):
|
27 |
+
for p in self.params:
|
28 |
+
p.grad = 0
|
29 |
+
|
30 |
+
def __repr__(self):
|
31 |
+
return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})"
|
32 |
+
|
33 |
+
def state_dict(self):
|
34 |
+
return {p: {'velocity': state['velocity']} for p, state in self.state.items()}
|
35 |
+
|
36 |
+
def load_state_dict(self, state_dict):
|
37 |
+
for p in self.params:
|
38 |
+
if p in state_dict:
|
39 |
+
self.state[p] = state_dict[p]
|
40 |
+
else:
|
41 |
+
self.state[p] = {'velocity': 0}
|
42 |
+
def __str__(self):
|
43 |
+
return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})"
|
Backend/optimizers/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .adam import AdamOptimizer
|
2 |
+
from .SGD import SGDOptimizer
|
3 |
+
from .azure_optim import Azure
|
4 |
+
from .RMSprop import RMSpropOptimizer
|
5 |
+
from .base import BaseOptimizer
|
6 |
+
|
7 |
+
__all__ = [
|
8 |
+
'AdamOptimizer',
|
9 |
+
'SGDOptimizer',
|
10 |
+
'Azure',
|
11 |
+
'RMSpropOptimizer',
|
12 |
+
'BaseOptimizer'
|
13 |
+
]
|
Backend/optimizers/__pycache__/RMSprop.cpython-310.pyc
ADDED
Binary file (2.01 kB). View file
|
|
Backend/optimizers/__pycache__/SGD.cpython-310.pyc
ADDED
Binary file (2.18 kB). View file
|
|
Backend/optimizers/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (423 Bytes). View file
|
|
Backend/optimizers/__pycache__/adam.cpython-310.pyc
ADDED
Binary file (2.22 kB). View file
|
|
Backend/optimizers/__pycache__/azure_optim.cpython-310.pyc
ADDED
Binary file (4.72 kB). View file
|
|
Backend/optimizers/__pycache__/base.cpython-310.pyc
ADDED
Binary file (1.21 kB). View file
|
|
Backend/optimizers/adam.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Adam optimizer implementation
|
2 |
+
|
3 |
+
from .base import BaseOptimizer
|
4 |
+
|
5 |
+
class AdamOptimizer(BaseOptimizer):
|
6 |
+
"""
|
7 |
+
Adam optimizer implementation.
|
8 |
+
"""
|
9 |
+
|
10 |
+
def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8):
|
11 |
+
self.params = params
|
12 |
+
self.lr = lr
|
13 |
+
self.betas = betas
|
14 |
+
self.eps = eps
|
15 |
+
self.state = {p: {'m': 0, 'v': 0, 't': 0} for p in params}
|
16 |
+
|
17 |
+
def step(self):
|
18 |
+
for p in self.params:
|
19 |
+
state = self.state[p]
|
20 |
+
state['t'] += 1
|
21 |
+
|
22 |
+
# Update biased first moment estimate
|
23 |
+
state['m'] = self.betas[0] * state['m'] + (1 - self.betas[0]) * p.grad
|
24 |
+
|
25 |
+
# Update biased second raw moment estimate
|
26 |
+
state['v'] = self.betas[1] * state['v'] + (1 - self.betas[1]) * (p.grad ** 2)
|
27 |
+
|
28 |
+
# Compute bias-corrected first moment estimate
|
29 |
+
m_hat = state['m'] / (1 - self.betas[0] ** state['t'])
|
30 |
+
|
31 |
+
# Compute bias-corrected second raw moment estimate
|
32 |
+
v_hat = state['v'] / (1 - self.betas[1] ** state['t'])
|
33 |
+
|
34 |
+
# Update parameters
|
35 |
+
p.data -= self.lr * m_hat / (v_hat.sqrt() + self.eps)
|
36 |
+
|
37 |
+
def zero_grad(self):
|
38 |
+
for p in self.params:
|
39 |
+
p.grad = 0
|
40 |
+
|
41 |
+
def state_dict(self):
|
42 |
+
return {p: {'m': state['m'], 'v': state['v'], 't': state['t']} for p, state in self.state.items()}
|
43 |
+
|
44 |
+
def load_state_dict(self, state_dict):
|
45 |
+
for p in self.params:
|
46 |
+
if p in state_dict:
|
47 |
+
self.state[p] = state_dict[p]
|
48 |
+
def __repr__(self):
|
49 |
+
return f"AdamOptimizer(lr={self.lr}, betas={self.betas}, eps={self.eps})"
|
Backend/optimizers/adamw.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AdamW implementation
|
2 |
+
|
3 |
+
from .adam import AdamOptimizer
|
4 |
+
|
5 |
+
class AdamWOptimizer(AdamOptimizer):
|
6 |
+
"""
|
7 |
+
AdamW optimizer implementation.
|
8 |
+
This optimizer decouples weight decay from the optimization steps.
|
9 |
+
"""
|
10 |
+
|
11 |
+
def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0.01):
|
12 |
+
super().__init__(params, lr, betas, eps)
|
13 |
+
self.weight_decay = weight_decay
|
14 |
+
|
15 |
+
def step(self):
|
16 |
+
for p in self.params:
|
17 |
+
if p.grad is None:
|
18 |
+
continue
|
19 |
+
|
20 |
+
state = self.state[p]
|
21 |
+
state['t'] += 1
|
22 |
+
|
23 |
+
# Update biased first moment estimate
|
24 |
+
state['m'] = self.betas[0] * state['m'] + (1 - self.betas[0]) * p.grad
|
25 |
+
|
26 |
+
# Update biased second raw moment estimate
|
27 |
+
state['v'] = self.betas[1] * state['v'] + (1 - self.betas[1]) * (p.grad ** 2)
|
28 |
+
|
29 |
+
# Compute bias-corrected first moment estimate
|
30 |
+
m_hat = state['m'] / (1 - self.betas[0] ** state['t'])
|
31 |
+
|
32 |
+
# Compute bias-corrected second raw moment estimate
|
33 |
+
v_hat = state['v'] / (1 - self.betas[1] ** state['t'])
|
34 |
+
|
35 |
+
# Update parameters with weight decay
|
36 |
+
p.data -= self.lr * (m_hat / (v_hat.sqrt() + self.eps) + self.weight_decay * p.data)
|
37 |
+
|
38 |
+
def __repr__(self):
|
39 |
+
return f"AdamWOptimizer(lr={self.lr}, betas={self.betas}, eps={self.eps}, weight_decay={self.weight_decay})"
|
Backend/optimizers/azure_optim.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.optim as optim
|
3 |
+
import numpy as np
|
4 |
+
import logging
|
5 |
+
|
6 |
+
# Configure logging for loss monitoring
|
7 |
+
logging.basicConfig (level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
8 |
+
logger = logging.getLogger (__name__)
|
9 |
+
|
10 |
+
|
11 |
+
class Azure (optim.Optimizer):
|
12 |
+
def __init__(self, params, lr=0.0007518383921113902, T0=2.2723218904585964, sigma=0.17181058166567398,
|
13 |
+
betas=(0.9, 0.999), eps=1e-8, sa_steps=5, sa_momentum=0.6612913488540948, clip_grad_norm=1.0):
|
14 |
+
"""
|
15 |
+
Azure Sky Optimizer: A hybrid optimizer combining Simulated Annealing (SA) and Adam.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
params (iterable): Iterable of parameters or dicts defining parameter groups.
|
19 |
+
lr (float): Learning rate for Adam phase (default: 0.0007518383921113902).
|
20 |
+
T0 (float): Initial temperature for SA (default: 2.2723218904585964).
|
21 |
+
sigma (float): Perturbation strength for SA (default: 0.17181058166567398).
|
22 |
+
betas (tuple): Adam's exponential decay rates (default: (0.9, 0.999)).
|
23 |
+
eps (float): Adam's epsilon for numerical stability (default: 1e-8).
|
24 |
+
sa_steps (int): Number of steps for SA phase (default: 5).
|
25 |
+
sa_momentum (float): Momentum for SA updates (default: 0.6612913488540948).
|
26 |
+
clip_grad_norm (float): Max norm for gradient clipping (default: 1.0).
|
27 |
+
"""
|
28 |
+
# Process params to handle various input formats
|
29 |
+
if isinstance (params, (list, tuple)) and isinstance (params [0], dict):
|
30 |
+
# Handle parameter groups (e.g., [{'params': ..., 'lr': ...}, ...])
|
31 |
+
param_groups = []
|
32 |
+
for group in params:
|
33 |
+
group_dict = group.copy ()
|
34 |
+
if 'params' not in group_dict:
|
35 |
+
raise ValueError ("Each parameter group must contain a 'params' key")
|
36 |
+
# Convert named_parameters() to a list of parameters if necessary
|
37 |
+
if isinstance (group_dict ['params'], (list, tuple)) and isinstance (group_dict ['params'] [0], tuple):
|
38 |
+
group_dict ['params'] = [p for _, p in group_dict ['params']]
|
39 |
+
param_groups.append (group_dict)
|
40 |
+
params = param_groups
|
41 |
+
else:
|
42 |
+
# Handle direct parameter lists or named_parameters()
|
43 |
+
if isinstance (params, (list, tuple)) and isinstance (params [0], tuple):
|
44 |
+
params = [p for _, p in params] # Convert named_parameters() to parameter list
|
45 |
+
params = [{'params': params}]
|
46 |
+
|
47 |
+
# Set defaults for each parameter group
|
48 |
+
defaults = dict (lr=lr, T0=T0, sigma=sigma, betas=betas, eps=eps, sa_steps=sa_steps,
|
49 |
+
sa_momentum=sa_momentum, clip_grad_norm=clip_grad_norm)
|
50 |
+
super ().__init__ (params, defaults)
|
51 |
+
self.step_count = 0
|
52 |
+
self.sa_active = True
|
53 |
+
self.losses = []
|
54 |
+
self.loss_window = 5
|
55 |
+
self.loss_spike_threshold = 10.0
|
56 |
+
|
57 |
+
def step(self, closure=None):
|
58 |
+
"""Performs a single optimization step."""
|
59 |
+
loss = None
|
60 |
+
if closure is not None:
|
61 |
+
with torch.enable_grad ():
|
62 |
+
loss = closure ()
|
63 |
+
|
64 |
+
# Loss spike monitoring
|
65 |
+
if loss is not None:
|
66 |
+
self._monitor_loss (loss.item ())
|
67 |
+
|
68 |
+
for group in self.param_groups:
|
69 |
+
# Gradient clipping
|
70 |
+
if group ['clip_grad_norm'] is not None:
|
71 |
+
torch.nn.utils.clip_grad_norm_ (group ['params'], group ['clip_grad_norm'])
|
72 |
+
|
73 |
+
for p in group ['params']:
|
74 |
+
if p.grad is None:
|
75 |
+
continue
|
76 |
+
grad = p.grad.data
|
77 |
+
|
78 |
+
# Dynamic Temperature Scaling
|
79 |
+
T = self._compute_temperature (group)
|
80 |
+
# Exploration-Exploitation Fusion
|
81 |
+
alpha = self._compute_alpha (group)
|
82 |
+
|
83 |
+
if self.sa_active:
|
84 |
+
noise = torch.randn_like (p.data) * group ['sigma'] * T
|
85 |
+
sa_update = noise
|
86 |
+
else:
|
87 |
+
sa_update = torch.zeros_like (p.data)
|
88 |
+
|
89 |
+
# Adam update
|
90 |
+
state = self.state [p]
|
91 |
+
if 'm' not in state:
|
92 |
+
state ['m'] = torch.zeros_like (p.data)
|
93 |
+
state ['v'] = torch.zeros_like (p.data)
|
94 |
+
state ['step'] = 0
|
95 |
+
m, v = state ['m'], state ['v']
|
96 |
+
beta1, beta2 = group ['betas']
|
97 |
+
state ['step'] += 1
|
98 |
+
m.mul_ (beta1).add_ (grad, alpha=1 - beta1)
|
99 |
+
v.mul_ (beta2).addcmul_ (grad, grad, value=1 - beta2)
|
100 |
+
m_hat = m / (1 - beta1 ** state ['step'])
|
101 |
+
v_hat = v / (1 - beta2 ** state ['step'])
|
102 |
+
# Use group-specific learning rate if provided
|
103 |
+
lr = group.get ('lr', self.defaults ['lr'])
|
104 |
+
adam_update = -lr * m_hat / (v_hat.sqrt () + group ['eps'])
|
105 |
+
|
106 |
+
# Combined update
|
107 |
+
update = alpha * adam_update + (1 - alpha) * sa_update
|
108 |
+
p.data.add_ (update)
|
109 |
+
|
110 |
+
self.step_count += 1
|
111 |
+
if self.step_count >= self.param_groups [0] ['sa_steps']:
|
112 |
+
self.sa_active = False
|
113 |
+
return loss
|
114 |
+
|
115 |
+
def _compute_temperature(self, group):
|
116 |
+
"""Dynamic Temperature Scaling based on step progress."""
|
117 |
+
epoch_decay = 0.05 # Adjustable decay rate
|
118 |
+
return group ['T0'] * (1.0 / (1.0 + epoch_decay * self.step_count))
|
119 |
+
|
120 |
+
def _compute_alpha(self, group):
|
121 |
+
"""Exploration-Exploitation Fusion Schedule using sigmoid."""
|
122 |
+
midpoint = group ['sa_steps'] / 2
|
123 |
+
return 1 / (1 + np.exp (-(self.step_count - midpoint) / (midpoint / 5)))
|
124 |
+
|
125 |
+
def _monitor_loss(self, loss):
|
126 |
+
"""Monitors for loss spikes and logs warnings."""
|
127 |
+
self.losses.append (loss)
|
128 |
+
if len (self.losses) > self.loss_window:
|
129 |
+
self.losses.pop (0)
|
130 |
+
avg_loss = sum (self.losses [:-1]) / (len (self.losses) - 1)
|
131 |
+
current_loss = self.losses [-1]
|
132 |
+
if current_loss > avg_loss * self.loss_spike_threshold:
|
133 |
+
logger.warning (
|
134 |
+
f"Loss spike detected: {current_loss:.4f} > {avg_loss:.4f} * {self.loss_spike_threshold}")
|
Backend/optimizers/base.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# The base class for all optimizers. Acts as an interface for the optimizers.
|
2 |
+
|
3 |
+
from abc import ABC, abstractmethod
|
4 |
+
|
5 |
+
class BaseOptimizer(ABC):
|
6 |
+
"""
|
7 |
+
Base class for all optimizers.
|
8 |
+
"""
|
9 |
+
|
10 |
+
@abstractmethod
|
11 |
+
def step(self):
|
12 |
+
"""
|
13 |
+
Perform a single optimization step.
|
14 |
+
"""
|
15 |
+
pass
|
16 |
+
|
17 |
+
@abstractmethod
|
18 |
+
def zero_grad(self):
|
19 |
+
"""
|
20 |
+
Clear the gradients of all optimized parameters.
|
21 |
+
"""
|
22 |
+
pass
|
23 |
+
|
24 |
+
@abstractmethod
|
25 |
+
def state_dict(self):
|
26 |
+
"""
|
27 |
+
Return the state of the optimizer as a dictionary.
|
28 |
+
"""
|
29 |
+
pass
|
30 |
+
|
31 |
+
@abstractmethod
|
32 |
+
def load_state_dict(self, state_dict):
|
33 |
+
"""
|
34 |
+
Load the optimizer state from a dictionary.
|
35 |
+
"""
|
36 |
+
pass
|