Spaces:

Allanatrix
/

Nexa

Running

App Files Files Community

Allanatrix commited on Jun 16

Commit

bc75bfa

verified ·

1 Parent(s): 1b579d5

Upload 31 files

Browse files

Files changed (31) hide show

Backend/Benchmarks/Ackley.py +58 -0
Backend/Benchmarks/Adjiman.py +44 -0
Backend/Benchmarks/Base.py +62 -0
Backend/Benchmarks/Brent.py +36 -0
Backend/Benchmarks/Himmelblau.py +38 -0
Backend/Benchmarks/__init__.py +6 -0
Backend/Benchmarks/__pycache__/Ackley.cpython-310.pyc +0 -0
Backend/Benchmarks/__pycache__/Adjiman.cpython-310.pyc +0 -0
Backend/Benchmarks/__pycache__/Base.cpython-310.pyc +0 -0
Backend/Benchmarks/__pycache__/Brent.cpython-310.pyc +0 -0
Backend/Benchmarks/__pycache__/Himmelblau.cpython-310.pyc +0 -0
Backend/Benchmarks/__pycache__/__init__.cpython-310.pyc +0 -0
Backend/ML_Tasks/CIFAR10Runner.py +37 -0
Backend/ML_Tasks/MNISTRunner.py +36 -0
Backend/ML_Tasks/__init__.py +4 -0
Backend/ML_Tasks/__pycache__/CIFAR10Runner.cpython-310.pyc +0 -0
Backend/ML_Tasks/__pycache__/MNISTRunner.cpython-310.pyc +0 -0
Backend/ML_Tasks/__pycache__/__init__.cpython-310.pyc +0 -0
Backend/optimizers/RMSprop.py +36 -0
Backend/optimizers/SGD.py +43 -0
Backend/optimizers/__init__.py +13 -0
Backend/optimizers/__pycache__/RMSprop.cpython-310.pyc +0 -0
Backend/optimizers/__pycache__/SGD.cpython-310.pyc +0 -0
Backend/optimizers/__pycache__/__init__.cpython-310.pyc +0 -0
Backend/optimizers/__pycache__/adam.cpython-310.pyc +0 -0
Backend/optimizers/__pycache__/azure_optim.cpython-310.pyc +0 -0
Backend/optimizers/__pycache__/base.cpython-310.pyc +0 -0
Backend/optimizers/adam.py +49 -0
Backend/optimizers/adamw.py +39 -0
Backend/optimizers/azure_optim.py +134 -0
Backend/optimizers/base.py +36 -0

Backend/Benchmarks/Ackley.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Ackley N 2 function Benchmark
+import numpy as np
+from scipy.optimize import minimize
+from .Base import BaseBenchmark
+class AckleyN2(BaseBenchmark):
+    """Ackley N 2 function benchmark."""
+    def __init__(self):
+        super().__init__()
+        self.name = "Ackley N 2"
+        self.dimensions = 10
+        self.global_minimum = [0] * self.dimensions
+        self.global_minimum_value = 0.0
+    @staticmethod
+    def evaluate(x):
+        """Evaluate the Ackley N 2 function."""
+        a = 20
+        b = 0.2
+        c = 2 * np.pi
+        n = len(x)
+        sum1 = sum(xi**2 for xi in x)
+        sum2 = sum(np.cos(c * xi) for xi in x)
+        term1 = -a * np.exp(-b * np.sqrt(sum1 / n))
+        term2 = -np.exp(sum2 / n)
+        return term1 + term2 + a + np.exp(1)
+def ackley_n2(x):
+    """Ackley N 2 function."""
+    a = 20
+    b = 0.2
+    c = 2 * np.pi
+    n = len(x)
+    sum1 = sum(xi**2 for xi in x)
+    sum2 = sum(np.cos(c * xi) for xi in x)
+    term1 = -a * np.exp(-b * np.sqrt(sum1 / n))
+    term2 = -np.exp(sum2 / n)
+    return term1 + term2 + a + np.exp(1)
+def benchmark_ackley_n2():
+    """Benchmark the Ackley N 2 function."""
+    x0 = np.random.uniform(-5, 5, size=10)
+    result = minimize(ackley_n2, x0, method='BFGS')
+    print(f"Optimized parameters: {result.x}")
+    print(f"Function value at optimum: {result.fun}")
+    print("Optimization successful:", result.success)
+if __name__ == "__main__":
+    benchmark_ackley_n2()

Backend/Benchmarks/Adjiman.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Adjiman function benchmark
+import numpy as np
+from scipy.optimize import minimize
+from .Base import BaseBenchmark
+class Adjiman(BaseBenchmark):
+    """Adjiman's function benchmark."""
+    def __init__(self):
+        super().__init__()
+        self.name = "Adjiman"
+        self.dimensions = 2
+        self.global_minimum = [0, 0]
+        self.global_minimum_value = 0.5
+    @staticmethod
+    def evaluate(x):
+        """Evaluate Adjiman's function."""
+        x1, x2 = x
+        term1 = (x1**2 + x2**2)**0.5
+        term2 = np.sin(term1)
+        term3 = np.exp(-term1)
+        return 0.5 * (term1 + term2 + term3)
+def adjiman(x):
+    """Adjiman's function."""
+    x1, x2 = x
+    term1 = (x1**2 + x2**2)**0.5
+    term2 = np.sin(term1)
+    term3 = np.exp(-term1)
+    return 0.5 * (term1 + term2 + term3)
+def benchmark_adjiman():
+    """Benchmark the Adjiman function."""
+    x0 = np.random.uniform(-5, 5, size=2)
+    result = minimize(adjiman, x0, method='BFGS')
+    print(f"Optimized parameters: {result.x}")
+    print(f"Function value at optimum: {result.fun}")
+    print("Optimization successful:", result.success)
+if __name__ == "__main__":
+    benchmark_adjiman()

Backend/Benchmarks/Base.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# This file acts as the base class for benchmarks in the Backend/Benchmarks directory. it has logic to define global variables and methods that can be used by all benchmarks. and minimizes code duplication.
+import numpy as np
+class BaseBenchmark:
+    def __init__(self):
+        self.global_min = None
+        self.initial_guess = None
+        self.path = []
+        self.loss_values = []
+    def set_global_min(self, point):
+        self.global_min = point
+    def set_initial_guess(self, guess):
+        self.initial_guess = guess
+    def add_to_path(self, point):
+        self.path.append(point)
+    def add_loss_value(self, value):
+        self.loss_values.append(value)
+    def reset(self):
+        self.global_min = None
+        self.initial_guess = None
+        self.path.clear()
+        self.loss_values.clear()
+    def get_metrics(self):
+        if self.global_min is None or not self.path or not self.loss_values:
+            raise ValueError("Metrics cannot be calculated. Ensure global_min, path, and loss_values are set.")
+        distance = np.linalg.norm(self.path[-1] - self.global_min)
+        convergence_rate = len(self.path) if self.loss_values[-1] < 1e-5 else float('inf')
+        return {
+            'distance': float(distance),
+            'final_loss': float(self.loss_values[-1]),
+            'convergence_rate': convergence_rate
+        }
+    def __str__(self):
+        return f"BaseBenchmark(global_min={self.global_min}, initial_guess={self.initial_guess}, path_length={len(self.path)}, loss_values_length={len(self.loss_values)})"
+    def __repr__(self):
+        return f"BaseBenchmark(global_min={self.global_min}, initial_guess={self.initial_guess}, path_length={len(self.path)}, loss_values_length={len(self.loss_values)})"
+    def __eq__(self, other):
+        if not isinstance(other, BaseBenchmark):
+            return False
+        return (self.global_min == other.global_min and
+                self.initial_guess == other.initial_guess and
+                self.path == other.path and
+                self.loss_values == other.loss_values)
+    def __ne__(self, other):
+        return not self.__eq__(other)
+    def __hash__(self):
+        return hash((self.global_min, tuple(self.initial_guess), tuple(self.path), tuple(self.loss_values)))
+    def __len__(self):
+        return len(self.path)

Backend/Benchmarks/Brent.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Brent function benchmark
+import numpy as np
+import scipy.optimize as opt
+from .Base import BaseBenchmark
+class Brent(BaseBenchmark):
+    """Brent's function benchmark."""
+    def __init__(self):
+        super().__init__()
+        self.name = "Brent"
+        self.dimensions = 1
+        self.global_minimum = 1.0
+        self.global_minimum_value = 0.0
+    @staticmethod
+    def evaluate(x):
+        """Evaluate Brent's function."""
+        return (x - 1)**2 * (x + 1)**2 * (x - 2)**2
+def brent_function(x):
+    """Brent's function."""
+    return (x - 1)**2 * (x + 1)**2 * (x - 2)**2
+def benchmark_brent():
+    """Benchmark the Brent function."""
+    np.random.uniform (-2, 2)
+    result = opt.minimize_scalar(brent_function, bounds=(-2, 2), method='bounded')
+    print(f"Optimized parameter: {result.x}")
+    print(f"Function value at optimum: {result.fun}")
+    print("Optimization successful:", result.success)
+if __name__ == "__main__":
+    benchmark_brent()

Backend/Benchmarks/Himmelblau.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# Himmelblau function benchmark
+from time import time
+from .Base import BaseBenchmark
+from numpy.random import default_rng
+from scipy.optimize import minimize
+class Himmelblau(BaseBenchmark):
+    """Himmelblau's function benchmark."""
+    def __init__(self):
+        super().__init__()
+        self.name = "Himmelblau"
+        self.dimensions = 2
+        self.global_minimum = [3, 2]
+        self.global_minimum_value = 0
+    @staticmethod
+    def evaluate(x):
+        """Evaluate the Himmelblau function."""
+        return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
+def himmelblau(x):
+    """Himmelblau's function."""
+    return (x[0]**2 + x[1] - 11)**2 + (x[0] + x[1]**2 - 7)**2
+def benchmark_himmelblau():
+    """Benchmark the Himmelblau function."""
+    rng = default_rng()
+    x0 = rng.uniform(-5, 5, size=2)
+    start_time = time()
+    result = minimize(himmelblau, x0, method='BFGS')
+    end_time = time()
+    print(f"Optimized parameters: {result.x}")
+    print(f"Function value at optimum: {result.fun}")
+    print(f"Time taken: {end_time - start_time:.4f} seconds")

Backend/Benchmarks/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .Base import BaseBenchmark
+from .Himmelblau import Himmelblau
+from .Ackley import AckleyN2
+from .Adjiman import Adjiman
+from .Brent import Brent
+__all__ = ['BaseBenchmark', 'Himmelblau', 'AckleyN2', 'Adjiman', 'Brent']

Backend/Benchmarks/__pycache__/Ackley.cpython-310.pyc ADDED Viewed

Binary file (2.24 kB). View file

Backend/Benchmarks/__pycache__/Adjiman.cpython-310.pyc ADDED Viewed

Binary file (1.64 kB). View file

Backend/Benchmarks/__pycache__/Base.cpython-310.pyc ADDED Viewed

Binary file (2.85 kB). View file

Backend/Benchmarks/__pycache__/Brent.cpython-310.pyc ADDED Viewed

Binary file (1.56 kB). View file

Backend/Benchmarks/__pycache__/Himmelblau.cpython-310.pyc ADDED Viewed

Binary file (1.69 kB). View file

Backend/Benchmarks/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (382 Bytes). View file

Backend/ML_Tasks/CIFAR10Runner.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# CIFAR Dataset implementation in Pytorch
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+def load_cifar10(batch_size=64, num_workers=2, download=True):
+    """Load CIFAR-10 dataset."""
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    ])
+    train_dataset = datasets.CIFAR10(root='./data', train=True, download=download, transform=transform)
+    test_dataset = datasets.CIFAR10(root='./data', train=False, download=download, transform=transform)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
+    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+    return train_loader, test_loader
+def run_cifar10():
+    """Run CIFAR-10 dataset loading and basic iteration."""
+    train_loader, test_loader = load_cifar10(batch_size=64, num_workers=2, download=True)
+    # Example: Iterate through the training data
+    for images, labels in train_loader:
+        print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
+        break  # Remove this break to iterate through all batches
+if __name__ == "__main__":
+    train_loader, test_loader = load_cifar10(batch_size=64, num_workers=2, download=True)
+    # Example: Iterate through the training data
+    for images, labels in train_loader:
+        print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
+        break  # Remove this break to iterate through all batches

Backend/ML_Tasks/MNISTRunner.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Fashion MNIST dataset classification using PyTorch
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+def load_fashion_mnist(batch_size=64, num_workers=2, download=True):
+    """Load Fashion MNIST dataset."""
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.5,), (0.5,))
+    ])
+    train_dataset = datasets.FashionMNIST(root='./data', train=True, download=download, transform=transform)
+    test_dataset = datasets.FashionMNIST(root='./data', train=False, download=download, transform=transform)
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
+    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
+    return train_loader, test_loader
+def run_fashion_mnist():
+    """Run Fashion MNIST dataset loading and basic iteration."""
+    train_loader, test_loader = load_fashion_mnist(batch_size=64, num_workers=2, download=True)
+    # Example: Iterate through the training data
+    for images, labels in train_loader:
+        print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
+        break  # Remove this break to iterate through all batches
+if __name__ == "__main__":
+    train_loader, test_loader = load_fashion_mnist(batch_size=64, num_workers=2, download=True)
+    # Example: Iterate through the training data
+    for images, labels in train_loader:
+        print(f"Batch size: {images.size(0)}, Image shape: {images.shape}, Labels: {labels}")
+        break  # Remove this break to iterate through all batches

Backend/ML_Tasks/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .CIFAR10Runner import load_cifar10, run_cifar10
+from .MNISTRunner import load_fashion_mnist, run_fashion_mnist
+__all__ = ['load_cifar10', 'run_cifar10', 'load_fashion_mnist', 'run_fashion_mnist']

Backend/ML_Tasks/__pycache__/CIFAR10Runner.cpython-310.pyc ADDED Viewed

Binary file (1.31 kB). View file

Backend/ML_Tasks/__pycache__/MNISTRunner.cpython-310.pyc ADDED Viewed

Binary file (1.32 kB). View file

Backend/ML_Tasks/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (357 Bytes). View file

Backend/optimizers/RMSprop.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# RmsProp optimizer implementation
+from abc import ABC
+from .base import BaseOptimizer
+class RMSpropOptimizer(BaseOptimizer, ABC):
+    """
+    RMSprop optimizer implementation.
+    This optimizer uses a moving average of squared gradients to normalize the gradient.
+    """
+    def __init__(self, params, lr=0.001, alpha=0.99, eps=1e-8):
+        self.params = params
+        self.lr = lr
+        self.alpha = alpha
+        self.eps = eps
+        self.state = {p: {'mean_square': 0} for p in params}
+    def step(self):
+        for p in self.params:
+            if p.grad is None:
+                continue
+            state = self.state[p]
+            state['mean_square'] = self.alpha * state['mean_square'] + (1 - self.alpha) * (p.grad ** 2)
+            p.data -= self.lr * p.grad / (state['mean_square'].sqrt() + self.eps)
+    def zero_grad(self):
+        for p in self.params:
+            p.grad = 0
+    def __repr__(self):
+        return f"RMSpropOptimizer(lr={self.lr}, alpha={self.alpha}, eps={self.eps})"
+    def state_dict(self):
+        return {p: {'mean_square': state['mean_square']} for p, state in self.state.items()}

Backend/optimizers/SGD.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# SGD implementation
+from .base import BaseOptimizer
+class SGDOptimizer(BaseOptimizer):
+    """
+    Stochastic Gradient Descent (SGD) optimizer implementation.
+    This optimizer updates parameters using the gradient of the loss function.
+    """
+    def __init__(self, params, lr=0.01, momentum=0.0):
+        self.params = params
+        self.lr = lr
+        self.momentum = momentum
+        self.state = {p: {'velocity': 0} for p in params}
+    def step(self):
+        for p in self.params:
+            if p.grad is None:
+                continue
+            state = self.state[p]
+            state['velocity'] = self.momentum * state['velocity'] - self.lr * p.grad
+            p.data += state['velocity']
+    def zero_grad(self):
+        for p in self.params:
+            p.grad = 0
+    def __repr__(self):
+        return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})"
+    def state_dict(self):
+        return {p: {'velocity': state['velocity']} for p, state in self.state.items()}
+    def load_state_dict(self, state_dict):
+        for p in self.params:
+            if p in state_dict:
+                self.state[p] = state_dict[p]
+            else:
+                self.state[p] = {'velocity': 0}
+    def __str__(self):
+        return f"SGDOptimizer(lr={self.lr}, momentum={self.momentum})"

Backend/optimizers/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from .adam import AdamOptimizer
+from .SGD import SGDOptimizer
+from .azure_optim import Azure
+from .RMSprop import RMSpropOptimizer
+from .base import BaseOptimizer
+__all__ = [
+    'AdamOptimizer',
+    'SGDOptimizer',
+    'Azure',
+    'RMSpropOptimizer',
+    'BaseOptimizer'
+]

Backend/optimizers/__pycache__/RMSprop.cpython-310.pyc ADDED Viewed

Binary file (2.01 kB). View file

Backend/optimizers/__pycache__/SGD.cpython-310.pyc ADDED Viewed

Binary file (2.18 kB). View file

Backend/optimizers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (423 Bytes). View file

Backend/optimizers/__pycache__/adam.cpython-310.pyc ADDED Viewed

Binary file (2.22 kB). View file

Backend/optimizers/__pycache__/azure_optim.cpython-310.pyc ADDED Viewed

Binary file (4.72 kB). View file

Backend/optimizers/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (1.21 kB). View file

Backend/optimizers/adam.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Adam optimizer implementation
+from .base import BaseOptimizer
+class AdamOptimizer(BaseOptimizer):
+    """
+    Adam optimizer implementation.
+    """
+    def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8):
+        self.params = params
+        self.lr = lr
+        self.betas = betas
+        self.eps = eps
+        self.state = {p: {'m': 0, 'v': 0, 't': 0} for p in params}
+    def step(self):
+        for p in self.params:
+            state = self.state[p]
+            state['t'] += 1
+            # Update biased first moment estimate
+            state['m'] = self.betas[0] * state['m'] + (1 - self.betas[0]) * p.grad
+            # Update biased second raw moment estimate
+            state['v'] = self.betas[1] * state['v'] + (1 - self.betas[1]) * (p.grad ** 2)
+            # Compute bias-corrected first moment estimate
+            m_hat = state['m'] / (1 - self.betas[0] ** state['t'])
+            # Compute bias-corrected second raw moment estimate
+            v_hat = state['v'] / (1 - self.betas[1] ** state['t'])
+            # Update parameters
+            p.data -= self.lr * m_hat / (v_hat.sqrt() + self.eps)
+    def zero_grad(self):
+        for p in self.params:
+            p.grad = 0
+    def state_dict(self):
+        return {p: {'m': state['m'], 'v': state['v'], 't': state['t']} for p, state in self.state.items()}
+    def load_state_dict(self, state_dict):
+        for p in self.params:
+            if p in state_dict:
+                self.state[p] = state_dict[p]
+    def __repr__(self):
+        return f"AdamOptimizer(lr={self.lr}, betas={self.betas}, eps={self.eps})"

Backend/optimizers/adamw.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# AdamW implementation
+from .adam import AdamOptimizer
+class AdamWOptimizer(AdamOptimizer):
+    """
+    AdamW optimizer implementation.
+    This optimizer decouples weight decay from the optimization steps.
+    """
+    def __init__(self, params, lr=0.001, betas=(0.9, 0.999), eps=1e-8, weight_decay=0.01):
+        super().__init__(params, lr, betas, eps)
+        self.weight_decay = weight_decay
+    def step(self):
+        for p in self.params:
+            if p.grad is None:
+                continue
+            state = self.state[p]
+            state['t'] += 1
+            # Update biased first moment estimate
+            state['m'] = self.betas[0] * state['m'] + (1 - self.betas[0]) * p.grad
+            # Update biased second raw moment estimate
+            state['v'] = self.betas[1] * state['v'] + (1 - self.betas[1]) * (p.grad ** 2)
+            # Compute bias-corrected first moment estimate
+            m_hat = state['m'] / (1 - self.betas[0] ** state['t'])
+            # Compute bias-corrected second raw moment estimate
+            v_hat = state['v'] / (1 - self.betas[1] ** state['t'])
+            # Update parameters with weight decay
+            p.data -= self.lr * (m_hat / (v_hat.sqrt() + self.eps) + self.weight_decay * p.data)
+    def __repr__(self):
+        return f"AdamWOptimizer(lr={self.lr}, betas={self.betas}, eps={self.eps}, weight_decay={self.weight_decay})"

Backend/optimizers/azure_optim.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import torch
+import torch.optim as optim
+import numpy as np
+import logging
+# Configure logging for loss monitoring
+logging.basicConfig (level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger (__name__)
+class Azure (optim.Optimizer):
+    def __init__(self, params, lr=0.0007518383921113902, T0=2.2723218904585964, sigma=0.17181058166567398,
+                 betas=(0.9, 0.999), eps=1e-8, sa_steps=5, sa_momentum=0.6612913488540948, clip_grad_norm=1.0):
+        """
+        Azure Sky Optimizer: A hybrid optimizer combining Simulated Annealing (SA) and Adam.
+        Args:
+            params (iterable): Iterable of parameters or dicts defining parameter groups.
+            lr (float): Learning rate for Adam phase (default: 0.0007518383921113902).
+            T0 (float): Initial temperature for SA (default: 2.2723218904585964).
+            sigma (float): Perturbation strength for SA (default: 0.17181058166567398).
+            betas (tuple): Adam's exponential decay rates (default: (0.9, 0.999)).
+            eps (float): Adam's epsilon for numerical stability (default: 1e-8).
+            sa_steps (int): Number of steps for SA phase (default: 5).
+            sa_momentum (float): Momentum for SA updates (default: 0.6612913488540948).
+            clip_grad_norm (float): Max norm for gradient clipping (default: 1.0).
+        """
+        # Process params to handle various input formats
+        if isinstance (params, (list, tuple)) and isinstance (params [0], dict):
+            # Handle parameter groups (e.g., [{'params': ..., 'lr': ...}, ...])
+            param_groups = []
+            for group in params:
+                group_dict = group.copy ()
+                if 'params' not in group_dict:
+                    raise ValueError ("Each parameter group must contain a 'params' key")
+                # Convert named_parameters() to a list of parameters if necessary
+                if isinstance (group_dict ['params'], (list, tuple)) and isinstance (group_dict ['params'] [0], tuple):
+                    group_dict ['params'] = [p for _, p in group_dict ['params']]
+                param_groups.append (group_dict)
+            params = param_groups
+        else:
+            # Handle direct parameter lists or named_parameters()
+            if isinstance (params, (list, tuple)) and isinstance (params [0], tuple):
+                params = [p for _, p in params]  # Convert named_parameters() to parameter list
+            params = [{'params': params}]
+        # Set defaults for each parameter group
+        defaults = dict (lr=lr, T0=T0, sigma=sigma, betas=betas, eps=eps, sa_steps=sa_steps,
+                         sa_momentum=sa_momentum, clip_grad_norm=clip_grad_norm)
+        super ().__init__ (params, defaults)
+        self.step_count = 0
+        self.sa_active = True
+        self.losses = []
+        self.loss_window = 5
+        self.loss_spike_threshold = 10.0
+    def step(self, closure=None):
+        """Performs a single optimization step."""
+        loss = None
+        if closure is not None:
+            with torch.enable_grad ():
+                loss = closure ()
+        # Loss spike monitoring
+        if loss is not None:
+            self._monitor_loss (loss.item ())
+        for group in self.param_groups:
+            # Gradient clipping
+            if group ['clip_grad_norm'] is not None:
+                torch.nn.utils.clip_grad_norm_ (group ['params'], group ['clip_grad_norm'])
+            for p in group ['params']:
+                if p.grad is None:
+                    continue
+                grad = p.grad.data
+                # Dynamic Temperature Scaling
+                T = self._compute_temperature (group)
+                # Exploration-Exploitation Fusion
+                alpha = self._compute_alpha (group)
+                if self.sa_active:
+                    noise = torch.randn_like (p.data) * group ['sigma'] * T
+                    sa_update = noise
+                else:
+                    sa_update = torch.zeros_like (p.data)
+                # Adam update
+                state = self.state [p]
+                if 'm' not in state:
+                    state ['m'] = torch.zeros_like (p.data)
+                    state ['v'] = torch.zeros_like (p.data)
+                    state ['step'] = 0
+                m, v = state ['m'], state ['v']
+                beta1, beta2 = group ['betas']
+                state ['step'] += 1
+                m.mul_ (beta1).add_ (grad, alpha=1 - beta1)
+                v.mul_ (beta2).addcmul_ (grad, grad, value=1 - beta2)
+                m_hat = m / (1 - beta1 ** state ['step'])
+                v_hat = v / (1 - beta2 ** state ['step'])
+                # Use group-specific learning rate if provided
+                lr = group.get ('lr', self.defaults ['lr'])
+                adam_update = -lr * m_hat / (v_hat.sqrt () + group ['eps'])
+                # Combined update
+                update = alpha * adam_update + (1 - alpha) * sa_update
+                p.data.add_ (update)
+        self.step_count += 1
+        if self.step_count >= self.param_groups [0] ['sa_steps']:
+            self.sa_active = False
+        return loss
+    def _compute_temperature(self, group):
+        """Dynamic Temperature Scaling based on step progress."""
+        epoch_decay = 0.05  # Adjustable decay rate
+        return group ['T0'] * (1.0 / (1.0 + epoch_decay * self.step_count))
+    def _compute_alpha(self, group):
+        """Exploration-Exploitation Fusion Schedule using sigmoid."""
+        midpoint = group ['sa_steps'] / 2
+        return 1 / (1 + np.exp (-(self.step_count - midpoint) / (midpoint / 5)))
+    def _monitor_loss(self, loss):
+        """Monitors for loss spikes and logs warnings."""
+        self.losses.append (loss)
+        if len (self.losses) > self.loss_window:
+            self.losses.pop (0)
+            avg_loss = sum (self.losses [:-1]) / (len (self.losses) - 1)
+            current_loss = self.losses [-1]
+            if current_loss > avg_loss * self.loss_spike_threshold:
+                logger.warning (
+                    f"Loss spike detected: {current_loss:.4f} > {avg_loss:.4f} * {self.loss_spike_threshold}")

Backend/optimizers/base.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# The base class for all optimizers. Acts as an interface for the optimizers.
+from abc import ABC, abstractmethod
+class BaseOptimizer(ABC):
+    """
+    Base class for all optimizers.
+    """
+    @abstractmethod
+    def step(self):
+        """
+        Perform a single optimization step.
+        """
+        pass
+    @abstractmethod
+    def zero_grad(self):
+        """
+        Clear the gradients of all optimized parameters.
+        """
+        pass
+    @abstractmethod
+    def state_dict(self):
+        """
+        Return the state of the optimizer as a dictionary.
+        """
+        pass
+    @abstractmethod
+    def load_state_dict(self, state_dict):
+        """
+        Load the optimizer state from a dictionary.
+        """
+        pass