Allanatrix commited on
Commit
bc75bfa
·
verified ·
1 Parent(s): 1b579d5

Upload 31 files

Browse files
Files changed (31) hide show
  1. Backend/Benchmarks/Ackley.py +58 -0
  2. Backend/Benchmarks/Adjiman.py +44 -0
  3. Backend/Benchmarks/Base.py +62 -0
  4. Backend/Benchmarks/Brent.py +36 -0
  5. Backend/Benchmarks/Himmelblau.py +38 -0
  6. Backend/Benchmarks/__init__.py +6 -0
  7. Backend/Benchmarks/__pycache__/Ackley.cpython-310.pyc +0 -0
  8. Backend/Benchmarks/__pycache__/Adjiman.cpython-310.pyc +0 -0
  9. Backend/Benchmarks/__pycache__/Base.cpython-310.pyc +0 -0
  10. Backend/Benchmarks/__pycache__/Brent.cpython-310.pyc +0 -0
  11. Backend/Benchmarks/__pycache__/Himmelblau.cpython-310.pyc +0 -0
  12. Backend/Benchmarks/__pycache__/__init__.cpython-310.pyc +0 -0
  13. Backend/ML_Tasks/CIFAR10Runner.py +37 -0
  14. Backend/ML_Tasks/MNISTRunner.py +36 -0
  15. Backend/ML_Tasks/__init__.py +4 -0
  16. Backend/ML_Tasks/__pycache__/CIFAR10Runner.cpython-310.pyc +0 -0
  17. Backend/ML_Tasks/__pycache__/MNISTRunner.cpython-310.pyc +0 -0
  18. Backend/ML_Tasks/__pycache__/__init__.cpython-310.pyc +0 -0
  19. Backend/optimizers/RMSprop.py +36 -0
  20. Backend/optimizers/SGD.py +43 -0
  21. Backend/optimizers/__init__.py +13 -0
  22. Backend/optimizers/__pycache__/RMSprop.cpython-310.pyc +0 -0
  23. Backend/optimizers/__pycache__/SGD.cpython-310.pyc +0 -0
  24. Backend/optimizers/__pycache__/__init__.cpython-310.pyc +0 -0
  25. Backend/optimizers/__pycache__/adam.cpython-310.pyc +0 -0
  26. Backend/optimizers/__pycache__/azure_optim.cpython-310.pyc +0 -0
  27. Backend/optimizers/__pycache__/base.cpython-310.pyc +0 -0
  28. Backend/optimizers/adam.py +49 -0
  29. Backend/optimizers/adamw.py +39 -0
  30. Backend/optimizers/azure_optim.py +134 -0
  31. Backend/optimizers/base.py +36 -0
Backend/Benchmarks/Ackley.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ackley N 2 function Benchmark
2
+
3
+ import numpy as np
4
+ from scipy.optimize import minimize
5
+ from .Base import BaseBenchmark
6
+
7
+ class AckleyN2(BaseBenchmark):
8
+ """Ackley N 2 function benchmark."""
9
+
10
+ def __init__(self):
11
+ super().__init__()
12
+ self.name = "Ackley N 2"
13
+ self.dimensions = 10
14
+ self.global_minimum = [0] * self.dimensions
15
+ self.global_minimum_value = 0.0
16
+
17
+ @staticmethod
18
+ def evaluate(x):
19
+ """Evaluate the Ackley N 2 function."""
20
+ a = 20
21
+ b = 0.2
22
+ c = 2 * np.pi
23
+ n = len(x)
24
+
25
+ sum1 = sum(xi**2 for xi in x)
26
+ sum2 = sum(np.cos(c * xi) for xi in x)
27
+
28
+ term1 = -a * np.exp(-b * np.sqrt(sum1 / n))
29
+ term2 = -np.exp(sum2 / n)
30
+
31
+ return term1 + term2 + a + np.exp(1)
32
+
33
+ def ackley_n2(x):
34
+ """Ackley N 2 function."""
35
+ a = 20
36
+ b = 0.2
37
+ c = 2 * np.pi
38
+ n = len(x)
39
+
40
+ sum1 = sum(xi**2 for xi in x)
41
+ sum2 = sum(np.cos(c * xi) for xi in x)
42
+
43
+ term1 = -a * np.exp(-b * np.sqrt(sum1 / n))
44
+ term2 = -np.exp(sum2 / n)
45
+
46
+ return term1 + term2 + a + np.exp(1)
47
+
48
+ def benchmark_ackley_n2():
49
+ """Benchmark the Ackley N 2 function."""
50
+ x0 = np.random.uniform(-5, 5, size=10)
51
+ result = minimize(ackley_n2, x0, method='BFGS')
52
+
53
+ print(f"Optimized parameters: {result.x}")
54
+ print(f"Function value at optimum: {result.fun}")
55
+ print("Optimization successful:", result.success)
56
+
57
+ if __name__ == "__main__":
58
+ benchmark_ackley_n2()
Backend/Benchmarks/Adjiman.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adjiman function benchmark
2
+
3
+ import numpy as np
4
+ from scipy.optimize import minimize
5
+ from .Base import BaseBenchmark
6
+
7
+ class Adjiman(BaseBenchmark):
8
+ """Adjiman's function benchmark."""
9
+
10
+ def __init__(self):
11
+ super().__init__()
12
+ self.name = "Adjiman"
13
+ self.dimensions = 2
14
+ self.global_minimum = [0, 0]
15
+ self.global_minimum_value = 0.5
16
+
17
+ @staticmethod
18
+ def evaluate(x):
19
+ """Evaluate Adjiman's function."""
20
+ x1, x2 = x
21
+ term1 = (x1**2 + x2**2)**0.5
22
+ term2 = np.sin(term1)
23
+ term3 = np.exp(-term1)
24
+ return 0.5 * (term1 + term2 + term3)
25
+
26
+ def adjiman(x):
27
+ """Adjiman's function."""
28
+ x1, x2 = x
29
+ term1 = (x1**2 + x2**2)**0.5
30
+ term2 = np.sin(term1)
31
+ term3 = np.exp(-term1)
32
+ return 0.5 * (term1 + term2 + term3)
33
+
34
+ def benchmark_adjiman():
35
+ """Benchmark the Adjiman function."""
36
+ x0 = np.random.uniform(-5, 5, size=2)
37
+ result = minimize(adjiman, x0, method='BFGS')
38
+
39
+ print(f"Optimized parameters: {result.x}")
40
+ print(f"Function value at optimum: {result.fun}")
41
+ print("Optimization successful:", result.success)
42
+
43
+ if __name__ == "__main__":
44
+ benchmark_adjiman()
Backend/Benchmarks/Base.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file acts as the base class for benchmarks in the Backend/Benchmarks directory. it has logic to define global variables and methods that can be used by all benchmarks. and minimizes code duplication.
2
+ import numpy as np
3
+
4
+ class BaseBenchmark:
5
+ def __init__(self):
6
+ self.global_min = None
7
+ self.initial_guess = None
8
+ self.path = []
9
+ self.loss_values = []
10
+
11
+ def set_global_min(self, point):
12
+ self.global_min = point
13
+
14
+ def set_initial_guess(self, guess):
15
+ self.initial_guess = guess
16
+
17
+ def add_to_path(self, point):
18
+ self.path.append(point)
19
+
20
+ def add_loss_value(self, value):
21
+ self.loss_values.append(value)
22
+
23
+ def reset(self):
24
+ self.global_min = None
25
+ self.initial_guess = None
26
+ self.path.clear()
27
+ self.loss_values.clear()
28
+
29
+ def get_metrics(self):
30
+ if self.global_min is None or not self.path or not self.loss_values:
31
+ raise ValueError("Metrics cannot be calculated. Ensure global_min, path, and loss_values are set.")
32
+
33
+ distance = np.linalg.norm(self.path[-1] - self.global_min)
34
+ convergence_rate = len(self.path) if self.loss_values[-1] < 1e-5 else float('inf')
35
+ return {
36
+ 'distance': float(distance),
37
+ 'final_loss': float(self.loss_values[-1]),
38
+ 'convergence_rate': convergence_rate
39
+ }
40
+
41
+ def __str__(self):
42
+ return f"BaseBenchmark(global_min={self.global_min}, initial_guess={self.initial_guess}, path_length={len(self.path)}, loss_values_length={len(self.loss_values)})"
43
+
44
+ def __repr__(self):
45
+ return f"BaseBenchmark(global_min={self.global_min}, initial_guess={self.initial_guess}, path_length={len(self.path)}, loss_values_length={len(self.loss_values)})"
46
+
47
+ def __eq__(self, other):
48
+ if not isinstance(other, BaseBenchmark):
49
+ return False
50
+ return (self.global_min == other.global_min and
51
+ self.initial_guess == other.initial_guess and
52
+ self.path == other.path and
53
+ self.loss_values == other.loss_values)
54
+
55
+ def __ne__(self, other):
56
+ return not self.__eq__(other)
57
+
58
+ def __hash__(self):
59
+ return hash((self.global_min, tuple(self.initial_guess), tuple(self.path), tuple(self.loss_values)))
60
+
61
+ def __len__(self):
62
+ return len(self.path)
Backend/Benchmarks/Brent.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Brent function benchmark
2
+
3
+ import numpy as np
4
+ import scipy.optimize as opt
5
+ from .Base import BaseBenchmark
6
+
7
+ class Brent(BaseBenchmark):
8
+ """Brent's function benchmark."""
9
+
10
+ def __init__(self):
11
+ super().__init__()
12
+ self.name = "Brent"
13
+ self.dimensions = 1
14
+ self.global_minimum = 1.0
15
+ self.global_minimum_value = 0.0
16
+
17
+ @staticmethod
18
+ def evaluate(x):
19
+ """Evaluate Brent's function."""
20
+ return (x - 1)**2 * (x + 1)**2 * (x - 2)**2
21
+
22
+ def brent_function(x):
23
+ """Brent's function."""
24
+ return (x - 1)**2 * (x + 1)**2 * (x - 2)**2
25
+
26
+ def benchmark_brent():
27
+ """Benchmark the Brent function."""
28
+ np.random.uniform (-2, 2)
29
+ result = opt.minimize_scalar(brent_function, bounds=(-2, 2), method='bounded')
30
+
31
+ print(f"Optimized parameter: {result.x}")
32
+ print(f"Function value at optimum: {result.fun}")
33
+ print("Optimization successful:", result.success)
34
+
35
+ if __name__ == "__main__":
36
+ benchmark_brent()
Backend/Benchmarks/Himmelblau.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Himmelblau function benchmark
2
+
3
+ from time import time
4
+ from .Base import BaseBenchmark
5
+ from numpy.random import default_rng
6
+ from scipy.optimize import minimize
7
+
8
+ class Himmelblau(BaseBenchmark):
9
+ """Himmelblau's function benchmark."""
10
+
11
+ def __init__(self):
12
+ super().__init__()
13
+ self.name = "Himmelblau"
14
+ self.dimensions = 2
15
+ self.global_minimum = [3, 2]
16
+ self.global_minimum_value = 0
17
+
18
+ @staticmethod
19
+ def evaluate(x):
20
+ """Evaluate the Himmelblau function."""
21
+ return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
22
+
23
+
24
+ def himmelblau(x):
25
+ """Himmelblau's function."""
26
+ return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
27
+
28
+ def benchmark_himmelblau():
29
+ """Benchmark the Himmelblau function."""
30
+ rng = default_rng()
31
+ x0 = rng.uniform(-5, 5, size=2)
32
+ start_time = time()
33
+ result = minimize(himmelblau, x0, method='BFGS')
34
+ end_time = time()
35
+
36
+ print(f"Optimized parameters: {result.x}")
37
+ print(f"Function value at optimum: {result.fun}")
38
+ print(f"Time taken: {end_time - start_time:.4f} seconds")
Backend/Benchmarks/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .Base import BaseBenchmark
2
+ from .Himmelblau import Himmelblau
3
+ from .Ackley import AckleyN2
4
+ from .Adjiman import Adjiman
5
+ from .Brent import Brent
6
+ __all__ = ['BaseBenchmark', 'Himmelblau', 'AckleyN2', 'Adjiman', 'Brent']
Backend/Benchmarks/__pycache__/Ackley.cpython-310.pyc ADDED
Binary file (2.24 kB). View file
 
Backend/Benchmarks/__pycache__/Adjiman.cpython-310.pyc ADDED
Binary file (1.64 kB). View file
 
Backend/Benchmarks/__pycache__/Base.cpython-310.pyc ADDED
Binary file (2.85 kB). View file
 
Backend/Benchmarks/__pycache__/Brent.cpython-310.pyc ADDED
Binary file (1.56 kB). View file
 
Backend/Benchmarks/__pycache__/Himmelblau.cpython-310.pyc ADDED
Binary file (1.69 kB). View file
 
Backend/Benchmarks/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (382 Bytes). View file
 
Backend/ML_Tasks/CIFAR10Runner.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CIFAR Dataset implementation in Pytorch
2
+
3
+ from torch.utils.data import DataLoader
4
+ from torchvision import datasets, transforms
5
+
6
+
7
+ def load_cifar10(batch_size=64, num_workers=2, download=True):
8
+ """Load CIFAR-10 dataset."""
9
+ transform = transforms.Compose([
10
+ transforms.ToTensor(),
11
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
12
+ ])
13
+
14
+ train_dataset = datasets.CIFAR10(root='./data', train=True, download=download, transform=transform)
15
+ test_dataset = datasets.CIFAR10(root='./data', train=False, download=download, transform=transform)
16
+
17
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
18
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
19
+
20
+ return train_loader, test_loader
21
+
22
+ def run_cifar10():
23
+ """Run CIFAR-10 dataset loading and basic iteration."""
24
+ train_loader, test_loader = load_cifar10(batch_size=64, num_workers=2, download=True)
25
+
26
+ # Example: Iterate through the training data
27
+ for images, labels in train_loader:
28
+ print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
29
+ break # Remove this break to iterate through all batches
30
+
31
+ if __name__ == "__main__":
32
+ train_loader, test_loader = load_cifar10(batch_size=64, num_workers=2, download=True)
33
+
34
+ # Example: Iterate through the training data
35
+ for images, labels in train_loader:
36
+ print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
37
+ break # Remove this break to iterate through all batches
Backend/ML_Tasks/MNISTRunner.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Fashion MNIST dataset classification using PyTorch
2
+
3
+ from torch.utils.data import DataLoader
4
+ from torchvision import datasets, transforms
5
+
6
+ def load_fashion_mnist(batch_size=64, num_workers=2, download=True):
7
+ """Load Fashion MNIST dataset."""
8
+ transform = transforms.Compose([
9
+ transforms.ToTensor(),
10
+ transforms.Normalize((0.5,), (0.5,))
11
+ ])
12
+
13
+ train_dataset = datasets.FashionMNIST(root='./data', train=True, download=download, transform=transform)
14
+ test_dataset = datasets.FashionMNIST(root='./data', train=False, download=download, transform=transform)
15
+
16
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
17
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
18
+
19
+ return train_loader, test_loader
20
+
21
+ def run_fashion_mnist():
22
+ """Run Fashion MNIST dataset loading and basic iteration."""
23
+ train_loader, test_loader = load_fashion_mnist(batch_size=64, num_workers=2, download=True)
24
+
25
+ # Example: Iterate through the training data
26
+ for images, labels in train_loader:
27
+ print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
28
+ break # Remove this break to iterate through all batches
29
+
30
+ if __name__ == "__main__":
31
+ train_loader, test_loader = load_fashion_mnist(batch_size=64, num_workers=2, download=True)
32
+
33
+ # Example: Iterate through the training data
34
+ for images, labels in train_loader:
35
+ print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
36
+ break # Remove this break to iterate through all batches
Backend/ML_Tasks/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .CIFAR10Runner import load_cifar10, run_cifar10
2
+ from .MNISTRunner import load_fashion_mnist, run_fashion_mnist
3
+
4
+ __all__ = ['load_cifar10', 'run_cifar10', 'load_fashion_mnist', 'run_fashion_mnist']
Backend/ML_Tasks/__pycache__/CIFAR10Runner.cpython-310.pyc ADDED
Binary file (1.31 kB). View file
 
Backend/ML_Tasks/__pycache__/MNISTRunner.cpython-310.pyc ADDED
Binary file (1.32 kB). View file
 
Backend/ML_Tasks/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (357 Bytes). View file
 
Backend/optimizers/RMSprop.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RmsProp optimizer implementation
2
+ from abc import ABC
3
+
4
+ from .base import BaseOptimizer
5
+
6
+ class RMSpropOptimizer(BaseOptimizer, ABC):
7
+ """
8
+ RMSprop optimizer implementation.
9
+ This optimizer uses a moving average of squared gradients to normalize the gradient.
10
+ """
11
+
12
+ def __init__(self, params, lr=0.001, alpha=0.99, eps=1e-8):
13
+ self.params = params
14
+ self.lr = lr
15
+ self.alpha = alpha
16
+ self.eps = eps
17
+ self.state = {p: {'mean_square': 0} for p in params}
18
+
19
+ def step(self):
20
+ for p in self.params:
21
+ if p.grad is None:
22
+ continue
23
+
24
+ state = self.state[p]
25
+ state['mean_square'] = self.alpha * state['mean_square'] + (1 - self.alpha) * (p.grad ** 2)
26
+ p.data -= self.lr * p.grad / (state['mean_square'].sqrt() + self.eps)
27
+
28
+ def zero_grad(self):
29
+ for p in self.params:
30
+ p.grad = 0
31
+
32
+ def __repr__(self):
33
+ return f"RMSpropOptimizer(lr={self.lr}, alpha={self.alpha}, eps={self.eps})"
34
+
35
+ def state_dict(self):
36
+ return {p: {'mean_square': state['mean_square']} for p, state in self.state.items()}
Backend/optimizers/SGD.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SGD implementation
2
+
3
+ from .base import BaseOptimizer
4
+
5
+ class SGDOptimizer(BaseOptimizer):
6
+ """
7
+ Stochastic Gradient Descent (SGD) optimizer implementation.
8
+ This optimizer updates parameters using the gradient of the loss function.
9
+ """
10
+
11
+ def __init__(self, params, lr=0.01, momentum=0.0):
12
+ self.params = params
13
+ self.lr = lr
14
+ self.momentum = momentum
15
+ self.state = {p: {'velocity': 0} for p in params}
16
+
17
+ def step(self):
18
+ for p in self.params:
19
+ if p.grad is None:
20
+ continue
21
+
22
+ state = self.state[p]
23
+ state['velocity'] = self.momentum * state['velocity'] - self.lr * p.grad
24
+ p.data += state['velocity']
25
+
26
+ def zero_grad(self):
27
+ for p in self.params:
28
+ p.grad = 0
29
+
30
+ def __repr__(self):
31
+ return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})"
32
+
33
+ def state_dict(self):
34
+ return {p: {'velocity': state['velocity']} for p, state in self.state.items()}
35
+
36
+ def load_state_dict(self, state_dict):
37
+ for p in self.params:
38
+ if p in state_dict:
39
+ self.state[p] = state_dict[p]
40
+ else:
41
+ self.state[p] = {'velocity': 0}
42
+ def __str__(self):
43
+ return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})"
Backend/optimizers/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .adam import AdamOptimizer
2
+ from .SGD import SGDOptimizer
3
+ from .azure_optim import Azure
4
+ from .RMSprop import RMSpropOptimizer
5
+ from .base import BaseOptimizer
6
+
7
+ __all__ = [
8
+ 'AdamOptimizer',
9
+ 'SGDOptimizer',
10
+ 'Azure',
11
+ 'RMSpropOptimizer',
12
+ 'BaseOptimizer'
13
+ ]
Backend/optimizers/__pycache__/RMSprop.cpython-310.pyc ADDED
Binary file (2.01 kB). View file
 
Backend/optimizers/__pycache__/SGD.cpython-310.pyc ADDED
Binary file (2.18 kB). View file
 
Backend/optimizers/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (423 Bytes). View file
 
Backend/optimizers/__pycache__/adam.cpython-310.pyc ADDED
Binary file (2.22 kB). View file
 
Backend/optimizers/__pycache__/azure_optim.cpython-310.pyc ADDED
Binary file (4.72 kB). View file
 
Backend/optimizers/__pycache__/base.cpython-310.pyc ADDED
Binary file (1.21 kB). View file
 
Backend/optimizers/adam.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adam optimizer implementation
2
+
3
+ from .base import BaseOptimizer
4
+
5
+ class AdamOptimizer(BaseOptimizer):
6
+ """
7
+ Adam optimizer implementation.
8
+ """
9
+
10
+ def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8):
11
+ self.params = params
12
+ self.lr = lr
13
+ self.betas = betas
14
+ self.eps = eps
15
+ self.state = {p: {'m': 0, 'v': 0, 't': 0} for p in params}
16
+
17
+ def step(self):
18
+ for p in self.params:
19
+ state = self.state[p]
20
+ state['t'] += 1
21
+
22
+ # Update biased first moment estimate
23
+ state['m'] = self.betas[0] * state['m'] + (1 - self.betas[0]) * p.grad
24
+
25
+ # Update biased second raw moment estimate
26
+ state['v'] = self.betas[1] * state['v'] + (1 - self.betas[1]) * (p.grad ** 2)
27
+
28
+ # Compute bias-corrected first moment estimate
29
+ m_hat = state['m'] / (1 - self.betas[0] ** state['t'])
30
+
31
+ # Compute bias-corrected second raw moment estimate
32
+ v_hat = state['v'] / (1 - self.betas[1] ** state['t'])
33
+
34
+ # Update parameters
35
+ p.data -= self.lr * m_hat / (v_hat.sqrt() + self.eps)
36
+
37
+ def zero_grad(self):
38
+ for p in self.params:
39
+ p.grad = 0
40
+
41
+ def state_dict(self):
42
+ return {p: {'m': state['m'], 'v': state['v'], 't': state['t']} for p, state in self.state.items()}
43
+
44
+ def load_state_dict(self, state_dict):
45
+ for p in self.params:
46
+ if p in state_dict:
47
+ self.state[p] = state_dict[p]
48
+ def __repr__(self):
49
+ return f"AdamOptimizer(lr={self.lr}, betas={self.betas}, eps={self.eps})"
Backend/optimizers/adamw.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AdamW implementation
2
+
3
+ from .adam import AdamOptimizer
4
+
5
+ class AdamWOptimizer(AdamOptimizer):
6
+ """
7
+ AdamW optimizer implementation.
8
+ This optimizer decouples weight decay from the optimization steps.
9
+ """
10
+
11
+ def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0.01):
12
+ super().__init__(params, lr, betas, eps)
13
+ self.weight_decay = weight_decay
14
+
15
+ def step(self):
16
+ for p in self.params:
17
+ if p.grad is None:
18
+ continue
19
+
20
+ state = self.state[p]
21
+ state['t'] += 1
22
+
23
+ # Update biased first moment estimate
24
+ state['m'] = self.betas[0] * state['m'] + (1 - self.betas[0]) * p.grad
25
+
26
+ # Update biased second raw moment estimate
27
+ state['v'] = self.betas[1] * state['v'] + (1 - self.betas[1]) * (p.grad ** 2)
28
+
29
+ # Compute bias-corrected first moment estimate
30
+ m_hat = state['m'] / (1 - self.betas[0] ** state['t'])
31
+
32
+ # Compute bias-corrected second raw moment estimate
33
+ v_hat = state['v'] / (1 - self.betas[1] ** state['t'])
34
+
35
+ # Update parameters with weight decay
36
+ p.data -= self.lr * (m_hat / (v_hat.sqrt() + self.eps) + self.weight_decay * p.data)
37
+
38
+ def __repr__(self):
39
+ return f"AdamWOptimizer(lr={self.lr}, betas={self.betas}, eps={self.eps}, weight_decay={self.weight_decay})"
Backend/optimizers/azure_optim.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.optim as optim
3
+ import numpy as np
4
+ import logging
5
+
6
+ # Configure logging for loss monitoring
7
+ logging.basicConfig (level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
8
+ logger = logging.getLogger (__name__)
9
+
10
+
11
+ class Azure (optim.Optimizer):
12
+ def __init__(self, params, lr=0.0007518383921113902, T0=2.2723218904585964, sigma=0.17181058166567398,
13
+ betas=(0.9, 0.999), eps=1e-8, sa_steps=5, sa_momentum=0.6612913488540948, clip_grad_norm=1.0):
14
+ """
15
+ Azure Sky Optimizer: A hybrid optimizer combining Simulated Annealing (SA) and Adam.
16
+
17
+ Args:
18
+ params (iterable): Iterable of parameters or dicts defining parameter groups.
19
+ lr (float): Learning rate for Adam phase (default: 0.0007518383921113902).
20
+ T0 (float): Initial temperature for SA (default: 2.2723218904585964).
21
+ sigma (float): Perturbation strength for SA (default: 0.17181058166567398).
22
+ betas (tuple): Adam's exponential decay rates (default: (0.9, 0.999)).
23
+ eps (float): Adam's epsilon for numerical stability (default: 1e-8).
24
+ sa_steps (int): Number of steps for SA phase (default: 5).
25
+ sa_momentum (float): Momentum for SA updates (default: 0.6612913488540948).
26
+ clip_grad_norm (float): Max norm for gradient clipping (default: 1.0).
27
+ """
28
+ # Process params to handle various input formats
29
+ if isinstance (params, (list, tuple)) and isinstance (params [0], dict):
30
+ # Handle parameter groups (e.g., [{'params': ..., 'lr': ...}, ...])
31
+ param_groups = []
32
+ for group in params:
33
+ group_dict = group.copy ()
34
+ if 'params' not in group_dict:
35
+ raise ValueError ("Each parameter group must contain a 'params' key")
36
+ # Convert named_parameters() to a list of parameters if necessary
37
+ if isinstance (group_dict ['params'], (list, tuple)) and isinstance (group_dict ['params'] [0], tuple):
38
+ group_dict ['params'] = [p for _, p in group_dict ['params']]
39
+ param_groups.append (group_dict)
40
+ params = param_groups
41
+ else:
42
+ # Handle direct parameter lists or named_parameters()
43
+ if isinstance (params, (list, tuple)) and isinstance (params [0], tuple):
44
+ params = [p for _, p in params] # Convert named_parameters() to parameter list
45
+ params = [{'params': params}]
46
+
47
+ # Set defaults for each parameter group
48
+ defaults = dict (lr=lr, T0=T0, sigma=sigma, betas=betas, eps=eps, sa_steps=sa_steps,
49
+ sa_momentum=sa_momentum, clip_grad_norm=clip_grad_norm)
50
+ super ().__init__ (params, defaults)
51
+ self.step_count = 0
52
+ self.sa_active = True
53
+ self.losses = []
54
+ self.loss_window = 5
55
+ self.loss_spike_threshold = 10.0
56
+
57
+ def step(self, closure=None):
58
+ """Performs a single optimization step."""
59
+ loss = None
60
+ if closure is not None:
61
+ with torch.enable_grad ():
62
+ loss = closure ()
63
+
64
+ # Loss spike monitoring
65
+ if loss is not None:
66
+ self._monitor_loss (loss.item ())
67
+
68
+ for group in self.param_groups:
69
+ # Gradient clipping
70
+ if group ['clip_grad_norm'] is not None:
71
+ torch.nn.utils.clip_grad_norm_ (group ['params'], group ['clip_grad_norm'])
72
+
73
+ for p in group ['params']:
74
+ if p.grad is None:
75
+ continue
76
+ grad = p.grad.data
77
+
78
+ # Dynamic Temperature Scaling
79
+ T = self._compute_temperature (group)
80
+ # Exploration-Exploitation Fusion
81
+ alpha = self._compute_alpha (group)
82
+
83
+ if self.sa_active:
84
+ noise = torch.randn_like (p.data) * group ['sigma'] * T
85
+ sa_update = noise
86
+ else:
87
+ sa_update = torch.zeros_like (p.data)
88
+
89
+ # Adam update
90
+ state = self.state [p]
91
+ if 'm' not in state:
92
+ state ['m'] = torch.zeros_like (p.data)
93
+ state ['v'] = torch.zeros_like (p.data)
94
+ state ['step'] = 0
95
+ m, v = state ['m'], state ['v']
96
+ beta1, beta2 = group ['betas']
97
+ state ['step'] += 1
98
+ m.mul_ (beta1).add_ (grad, alpha=1 - beta1)
99
+ v.mul_ (beta2).addcmul_ (grad, grad, value=1 - beta2)
100
+ m_hat = m / (1 - beta1 ** state ['step'])
101
+ v_hat = v / (1 - beta2 ** state ['step'])
102
+ # Use group-specific learning rate if provided
103
+ lr = group.get ('lr', self.defaults ['lr'])
104
+ adam_update = -lr * m_hat / (v_hat.sqrt () + group ['eps'])
105
+
106
+ # Combined update
107
+ update = alpha * adam_update + (1 - alpha) * sa_update
108
+ p.data.add_ (update)
109
+
110
+ self.step_count += 1
111
+ if self.step_count >= self.param_groups [0] ['sa_steps']:
112
+ self.sa_active = False
113
+ return loss
114
+
115
+ def _compute_temperature(self, group):
116
+ """Dynamic Temperature Scaling based on step progress."""
117
+ epoch_decay = 0.05 # Adjustable decay rate
118
+ return group ['T0'] * (1.0 / (1.0 + epoch_decay * self.step_count))
119
+
120
+ def _compute_alpha(self, group):
121
+ """Exploration-Exploitation Fusion Schedule using sigmoid."""
122
+ midpoint = group ['sa_steps'] / 2
123
+ return 1 / (1 + np.exp (-(self.step_count - midpoint) / (midpoint / 5)))
124
+
125
+ def _monitor_loss(self, loss):
126
+ """Monitors for loss spikes and logs warnings."""
127
+ self.losses.append (loss)
128
+ if len (self.losses) > self.loss_window:
129
+ self.losses.pop (0)
130
+ avg_loss = sum (self.losses [:-1]) / (len (self.losses) - 1)
131
+ current_loss = self.losses [-1]
132
+ if current_loss > avg_loss * self.loss_spike_threshold:
133
+ logger.warning (
134
+ f"Loss spike detected: {current_loss:.4f} > {avg_loss:.4f} * {self.loss_spike_threshold}")
Backend/optimizers/base.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # The base class for all optimizers. Acts as an interface for the optimizers.
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ class BaseOptimizer(ABC):
6
+ """
7
+ Base class for all optimizers.
8
+ """
9
+
10
+ @abstractmethod
11
+ def step(self):
12
+ """
13
+ Perform a single optimization step.
14
+ """
15
+ pass
16
+
17
+ @abstractmethod
18
+ def zero_grad(self):
19
+ """
20
+ Clear the gradients of all optimized parameters.
21
+ """
22
+ pass
23
+
24
+ @abstractmethod
25
+ def state_dict(self):
26
+ """
27
+ Return the state of the optimizer as a dictionary.
28
+ """
29
+ pass
30
+
31
+ @abstractmethod
32
+ def load_state_dict(self, state_dict):
33
+ """
34
+ Load the optimizer state from a dictionary.
35
+ """
36
+ pass