Spaces:

Allanatrix
/

Nexa

Sleeping

App Files Files Community

Allanatrix commited on Jun 16

Commit

1b579d5

verified ·

1 Parent(s): 8ade09a

Upload 12 files

Browse files

Files changed (12) hide show

.gitignore +0 -0
App.py +158 -0
Engine.py +166 -0
Hooks.py +47 -0
LICENSE +9 -0
Metrics.py +22 -0
Plots.py +33 -0
README.md +195 -14
__init__.py +4 -0
pyproject.toml +15 -0
usage_example.py +74 -0
uv.lock +0 -0

.gitignore ADDED Viewed

File without changes

App.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import gradio as gr
+import json
+import pandas as pd
+from Engine import Engine
+def run_study(mode, benchmark_func, optimizers, dim, dataset, epochs, batch_size, lr, use_sa, sa_temp, sa_cooling_rate):
+    if not optimizers:
+        raise gr.Error("Please select at least one optimizer.")
+    if mode == "Benchmark Optimization" and not benchmark_func:
+        raise gr.Error("Please select a benchmark function.")
+    if mode == "ML Task Training" and not dataset:
+        raise gr.Error("Please select a dataset.")
+    config = {
+        'mode': 'benchmark' if mode == 'Benchmark Optimization' else 'ml_task',
+        'benchmark_func': benchmark_func,
+        'optimizers': optimizers,
+        'dim': int(dim),
+        'dataset': dataset,
+        'epochs': int(epochs),
+        'batch_size': int(batch_size),
+        'lr': float(lr),
+        'use_sa': use_sa if 'AzureSky' in optimizers else None,
+        'sa_temp': float(sa_temp) if 'AzureSky' in optimizers and use_sa else None,
+        'sa_cooling_rate': float(sa_cooling_rate) if 'AzureSky' in optimizers and use_sa else None,
+        'max_iter': 100
+    }
+    runner = Engine()
+    results = runner.run(config)
+    if config['mode'] == 'benchmark':
+        metrics_df = pd.DataFrame(results['metrics'], index=config['optimizers'])
+        return results['plot'], None, metrics_df, results['metrics'], json.dumps(results, indent=2), "Study completed successfully."
+    else:
+        metrics_df = pd.DataFrame(results['metrics'], index=config['optimizers'])
+        return results['plot_acc'], results['plot_loss'], metrics_df, results['metrics'], json.dumps(results, indent=2), "Study completed successfully."
+def export_results(results_json):
+    return results_json, "results.json"
+def toggle_azure_settings(optimizers):
+    return gr.update(visible='AzureSky' in optimizers)
+with gr.Blocks(theme=gr.themes.Soft(), title="Nexa R&D Studio", css="""
+    .gr-button { margin-top: 10px; }
+    .gr-box { border-radius: 8px; }
+    .status-message { color: green; font-weight: bold; }
+""") as app:
+    gr.Markdown("""
+        # Nexa R&D Studio
+        A visual research tool for comparing and evaluating optimizers on benchmark functions and ML tasks.
+        Select a mode, configure your study, and analyze results with interactive plots and metrics.
+    """)
+    with gr.Tabs() as tabs:
+        with gr.TabItem("Study Configuration"):
+            mode = gr.Radio(
+                ['Benchmark Optimization', 'ML Task Training'],
+                label='Study Mode',
+                value='Benchmark Optimization',
+                info='Choose between optimizing benchmark functions or training on ML datasets.'
+            )
+            with gr.Row():
+                with gr.Column():
+                    optimizers = gr.CheckboxGroup(
+                        ['AzureSky', 'Adam', 'SGD', 'AdamW', 'RMSprop'],
+                        label='Optimizers',
+                        info='Select optimizers to compare. AzureSky includes a Simulated Annealing option.'
+                    )
+                    with gr.Accordion("AzureSky Ablation Settings", open=False, visible=False) as azure_settings:
+                        use_sa = gr.Checkbox(
+                            label='Enable Simulated Annealing (AzureSky)',
+                            value=True,
+                            info='Toggle Simulated Annealing for AzureSky optimizer.'
+                        )
+                        sa_temp = gr.Number(
+                            label='Initial SA Temperature',
+                            value=1.0,
+                            minimum=0.1,
+                            info='Controls exploration in Simulated Annealing (higher = more exploration).'
+                        )
+                        sa_cooling_rate = gr.Number(
+                            label='SA Cooling Rate',
+                            value=0.95,
+                            minimum=0.1,
+                            maximum=0.99,
+                            info='Rate at which SA temperature decreases (closer to 1 = slower cooling).'
+                        )
+                with gr.Column():
+                    with gr.Group(visible=True) as benchmark_tab:
+                        benchmark_func = gr.Dropdown(
+                            ['Himmelblau', 'Ackley', 'Adjiman', 'Brent'],
+                            label='Benchmark Function',
+                            info='Select a mathematical function to optimize.'
+                        )
+                        dim = gr.Number(
+                            label='Dimensionality',
+                            value=2,
+                            minimum=2,
+                            info='Number of dimensions for the benchmark function.'
+                        )
+                    with gr.Group(visible=False) as ml_task_tab:
+                        dataset = gr.Dropdown(
+                            ['MNIST', 'CIFAR-10'],
+                            label='Dataset',
+                            info='Select a dataset for ML training.'
+                        )
+                        epochs = gr.Number(
+                            label='Epochs',
+                            value=10,
+                            minimum=1,
+                            info='Number of training epochs.'
+                        )
+                        batch_size = gr.Number(
+                            label='Batch Size',
+                            value=32,
+                            minimum=1,
+                            info='Number of samples per training batch.'
+                        )
+                        lr = gr.Number(
+                            label='Learning Rate',
+                            value=0.001,
+                            minimum=0,
+                            info='Learning rate for optimizers.'
+                        )
+            run_button = gr.Button('Run Study', variant='primary')
+        with gr.TabItem("Results"):
+            status_message = gr.Markdown("Configure and run a study to view results.", elem_classes=["status-message"])
+            with gr.Row():
+                plot1 = gr.Plot(label='Main Plot (Benchmark or Accuracy)')
+                plot2 = gr.Plot(label='Loss Plot (ML Mode)')
+            metrics_df = gr.Dataframe(label='Metrics Table', headers=['Optimizer'] + [
+                'distance', 'final_loss', 'convergence_rate',
+                'final_train_acc', 'final_val_acc', 'generalization_gap',
+                'final_train_loss', 'final_val_loss', 'best_epoch'
+            ])
+            metrics_json = gr.JSON(label='Detailed Metrics')
+            export_data = gr.State()
+            export_button = gr.Button('Export Results as JSON')
+            export_file = gr.File(label='Download Results')
+    def toggle_tabs(mode):
+        return gr.update(visible=mode == 'Benchmark Optimization'), gr.update(visible=mode == 'ML Task Training')
+    mode.change(toggle_tabs, inputs=mode, outputs=[benchmark_tab, ml_task_tab])
+    optimizers.change(toggle_azure_settings, inputs=optimizers, outputs=azure_settings)
+    run_button.click(
+        run_study,
+        inputs=[mode, benchmark_func, optimizers, dim, dataset, epochs, batch_size, lr, use_sa, sa_temp, sa_cooling_rate],
+        outputs=[plot1, plot2, metrics_df, metrics_json, export_data, status_message]
+    )
+    export_button.click(export_results, inputs=[export_data], outputs=[export_file, gr.File()])
+app.launch()

Engine.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import numpy as np
+from Backend.Benchmarks import Himmelblau, Adjiman, Brent, Ackley
+from Backend.optimizers import adam, SGD, Azure, RMSprop
+from Backend.ML_Tasks import MNISTRunner, CIFAR10Runner
+from Metrics import calculate_benchmark_metrics, calculate_ml_metrics
+from Plots import plot_benchmark_surface, plot_ml_curves
+class Engine:
+    def __init__(self):
+        self.benchmarks = {
+            "Himmelblau": Himmelblau,
+            "Adjiman": Adjiman,
+            "Brent": Brent,
+            "Ackley": Ackley,
+        }
+        self.optimizers = {
+            "Adam": adam,
+            "SGD": SGD,
+            "AzureSky": Azure,
+            "RMSprop": RMSprop,
+        }
+        self.ml_tasks = {"MNIST": MNISTRunner, "CIFAR-10": CIFAR10Runner}
+    def run(self, config):
+        """Run a study based on the provided configuration."""
+        if config["mode"] == "benchmark":
+            return self.run_benchmark_study(config)
+        elif config["mode"] == "ml_task":
+            return self.run_ml_task_study(config)
+        else:
+            raise ValueError(f"Invalid mode: {config['mode']}")
+    def run_benchmark_study(self, config):
+        """Run a benchmark study comparing multiple optimizers."""
+        benchmark_class = self.benchmarks.get(config["benchmark_func"])
+        if not benchmark_class:
+            raise ValueError(f"Unknown benchmark: {config['benchmark_func']}")
+        benchmark = benchmark_class()
+        optimizers = []
+        for opt_name in config["optimizers"]:
+            opt_class = self.optimizers.get(opt_name)
+            if not opt_class:
+                raise ValueError(f"Unknown optimizer: {opt_name}")
+            # Pass use_sa for AzureSky if specified
+            kwargs = (
+                {"use_sa": config["use_sa"]}
+                if opt_name == "AzureSky" and "use_sa" in config
+                else {}
+            )
+            optimizers.append(opt_class(**kwargs))
+        initial_point = np.random.randn(config.get("dim", 2))
+        max_iter = config.get("max_iter", 100)
+        paths = []
+        loss_values = []
+        for opt in optimizers:
+            path = []
+            losses = []
+            x = initial_point.copy()
+            opt.reset()  # Reset optimizer state
+            for _ in range(max_iter):
+                grad = benchmark.grad_f(x)
+                x = opt.step(x, grad)
+                path.append(x.copy())
+                losses.append(benchmark.f(x))
+            paths.append(np.array(path))
+            loss_values.append(losses)
+        metrics = [
+            calculate_benchmark_metrics(path[-1], benchmark.global_min, path, losses)
+            for path, losses in zip(paths, loss_values)
+        ]
+        plot = plot_benchmark_surface(benchmark, paths, config["optimizers"])
+        return {"plot": plot, "metrics": metrics, "paths": paths}
+    def run_ml_task_study(self, config):
+        """Run an ML task study comparing multiple optimizers."""
+        task_class = self.ml_tasks.get(config["dataset"])
+        if not task_class:
+            raise ValueError(f"Unknown dataset: {config['dataset']}")
+        task_runner = task_class()
+        optimizers = []
+        for opt_name in config["optimizers"]:
+            opt_class = self.optimizers.get(opt_name)
+            if not opt_class:
+                raise ValueError(f"Unknown optimizer: {opt_name}")
+            kwargs = (
+                {"use_sa": config["use_sa"]}
+                if opt_name == "AzureSky" and "use_sa" in config
+                else {}
+            )
+            optimizers.append(opt_class(**kwargs))
+        histories = []
+        for opt in optimizers:
+            history = task_runner.run(
+                optimizer=opt,
+                epochs=config.get("epochs", 10),
+                batch_size=config.get("batch_size", 32),
+                lr=config.get("lr", 0.001),
+            )
+            histories.append(history)
+        metrics = [calculate_ml_metrics(h["train"], h["val"]) for h in histories]
+        plot_acc = plot_ml_curves(
+            [h["train"]["accuracy"] for h in histories],
+            [h["val"]["accuracy"] for h in histories],
+            config["optimizers"],
+            "Accuracy",
+        )
+        plot_loss = plot_ml_curves(
+            [h["train"]["loss"] for h in histories],
+            [h["val"]["loss"] for h in histories],
+            config["optimizers"],
+            "Loss",
+        )
+        return {
+            "plot_acc": plot_acc,
+            "plot_loss": plot_loss,
+            "metrics": metrics,
+            "histories": histories,
+        }
+    def list_benchmarks(self):
+        """Return available benchmark functions."""
+        return list(self.benchmarks.keys())
+    def list_optimizers(self):
+        """Return available optimizers."""
+        return list(self.optimizers.keys())
+    def list_ml_tasks(self):
+        """Return available ML tasks."""
+        return list(self.ml_tasks.keys())
+if __name__ == "__main__":
+    engine = Engine()
+    # Example benchmark study
+    config = {
+        "mode": "benchmark",
+        "benchmark_func": "Himmelblau",
+        "optimizers": ["Adam", "AzureSky"],
+        "dim": 2,
+        "max_iter": 100,
+        "use_sa": True,
+    }
+    results = engine.run(config)
+    print("Benchmark Results:", results["metrics"])
+    # Example ML task study
+    config = {
+        "mode": "ml_task",
+        "dataset": "MNIST",
+        "optimizers": ["Adam", "AzureSky"],
+        "epochs": 5,
+        "batch_size": 32,
+        "lr": 0.001,
+        "use_sa": True,
+    }
+    results = engine.run(config)
+    print("ML Task Results:", results['metrics'])

Hooks.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+import logging
+from torch import nn
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class GradientClippingHook:
+    """Hook to clip gradients during training to prevent explosions."""
+    def __init__(self, max_norm=1.0):
+        self.max_norm = max_norm
+    def __call__(self, module, grad_input, grad_output):
+        if grad_input is not None:
+            for g in grad_input:
+                if g is not None:
+                    torch.nn.utils.clip_grad_norm_(g, self.max_norm)
+                    logger.debug(f"Gradient clipped to max norm {self.max_norm}")
+class LossSpikeDetectionHook:
+    """Hook to detect and log loss spikes during training."""
+    def __init__(self, threshold=10.0, window=5):
+        self.threshold = threshold
+        self.window = window
+        self.losses = []
+    def __call__(self, module, input, output):
+        if isinstance(module, nn.Module) and output is not None:
+            loss = output.mean() if output.requires_grad else None
+            if loss is not None:
+                self.losses.append(loss.item())
+                if len(self.losses) > self.window:
+                    self.losses.pop(0)
+                    avg_loss = sum(self.losses[:-1]) / (len(self.losses) - 1)
+                    current_loss = self.losses[-1]
+                    if current_loss > avg_loss * self.threshold:
+                        logger.warning(f"Loss spike detected: {current_loss:.4f} > {avg_loss:.4f} * {self.threshold}")
+# Utility function to register hooks on a model
+def register_azure_hooks(model):
+    grad_clip_hook = GradientClippingHook(max_norm=1.0)
+    loss_spike_hook = LossSpikeDetectionHook(threshold=10.0, window=5)
+    for module in model.modules():
+        module.register_full_backward_hook(grad_clip_hook)
+        module.register_forward_hook(loss_spike_hook)
+    logger.info("Azure hooks registered: GradientClippingHook and LossSpikeDetectionHook")

LICENSE ADDED Viewed

	@@ -0,0 +1,9 @@

+MIT License
+Copyright (c) 2025 Allan
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Metrics.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import numpy as np
+def calculate_benchmark_metrics(final_point, global_min, path, loss_values):
+    distance = np.linalg.norm(final_point - global_min)
+    convergence_rate = len(path) if loss_values[-1] < 1e-5 else float('inf')
+    return {'distance': float(distance), 'final_loss': float(loss_values[-1]), 'convergence_rate': convergence_rate}
+def calculate_ml_metrics(train_history, val_history):
+    final_train_acc = train_history['accuracy'][-1]
+    final_val_acc = val_history['accuracy'][-1]
+    generalization_gap = final_train_acc - final_val_acc
+    final_train_loss = train_history['loss'][-1]
+    final_val_loss = val_history['loss'][-1]
+    best_epoch = np.argmax(val_history['accuracy']) + 1
+    return {
+        'final_train_acc': final_train_acc,
+        'final_val_acc': final_val_acc,
+        'generalization_gap': generalization_gap,
+        'final_train_loss': final_train_loss,
+        'final_val_loss': final_val_loss,
+        'best_epoch': best_epoch
+    }

Plots.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import matplotlib.pyplot as plt
+import numpy as np
+def plot_benchmark_surface(benchmark, paths, optimizer_names):
+    fig = plt.figure(figsize=(10, 8))
+    ax = fig.add_subplot(111, projection='3d')
+    x = np.linspace(-5, 5, 100)
+    y = np.linspace(-5, 5, 100)
+    X, Y = np.meshgrid(x, y)
+    Z = np.array([[benchmark.f(np.array([xi, yi])) for xi in x] for yi in y])
+    ax.plot_surface(X, Y, Z, cmap='viridis', alpha=0.5)
+    for path, name in zip(paths, optimizer_names):
+        ax.plot(path[:, 0], path[:, 1], [benchmark.f(p) for p in path], label=name)
+    ax.set_xlabel('X')
+    ax.set_ylabel('Y')
+    ax.set_zlabel('Loss')
+    ax.legend()
+    plt.close()
+    return fig
+def plot_ml_curves(train_data, val_data, optimizer_names, metric='Accuracy'):
+    fig = plt.figure(figsize=(10, 6))
+    for t, v, name in zip(train_data, val_data, optimizer_names):
+        epochs = range(1, len(t) + 1)
+        plt.plot(epochs, t, label=f'{name} Train')
+        plt.plot(epochs, v, '--', label=f'{name} Val')
+    plt.xlabel('Epoch')
+    plt.ylabel(metric)
+    plt.title(f'{metric} vs Epoch')
+    plt.legend()
+    plt.grid(True)
+    plt.close()
+    return fig

README.md CHANGED Viewed

@@ -1,14 +1,195 @@
----
-title: Nexa R&D
-emoji: 💻
-colorFrom: yellow
-colorTo: green
-sdk: gradio
-sdk_version: 5.33.2
-app_file: app.py
-pinned: false
-license: bsl-1.0
-short_description: Frontier research, model development, optimization studies
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Azure Sky Optimizer
+Azure Sky Optimizer is a hybrid optimizer for PyTorch, integrating Simulated Annealing (SA) with Adam to provide robust exploration and precise exploitation in non-convex optimization tasks. Designed for complex machine learning challenges, Azure Sky excels in domains requiring deep exploration of rugged loss landscapes, such as scientific machine learning, symbolic reasoning, and protein folding.
+Developed as part of an R&D initiative, Azure Sky combines structured stochastic exploration with gradient-based refinement, achieving stable convergence and strong generalization in multi-modal search spaces.
+---
+## Overview
+Conventional optimizers like Adam and AdamW often converge prematurely to sharp local minima, compromising generalization. Azure Sky leverages SA’s global search in early stages and Adam’s local convergence later, ensuring both deep exploration and precise convergence.
+### Core Innovations
+- **Dynamic Temperature Scaling:** Adjusts SA temperature based on training progress for controlled exploration.
+- **Exploration-Exploitation Fusion:** Seamlessly transitions between SA and Adam using a sigmoid-based blending mechanism.
+- **Stability Enhancements:** Built-in gradient clipping and loss spike monitoring for robust training.
+---
+## Key Features
+- **Hybrid Optimization:** Combines SA’s global search with Adam’s local refinement.
+- **Optimized Hyperparameters:** Tuned via Optuna (the best trial: 0.0893 on Two Moons dataset).
+- **Flexible Parameter Handling:** Supports parameter lists, named parameters, and parameter groups with group-specific learning rates.
+- **Production-Ready Stability:** Includes gradient clipping and loss spike detection.
+- **PyTorch Compatibility:** Fully integrated with PyTorch’s `optim` module.
+---
+## Installation
+Clone the repository and install using [uv](https://github.com/astral-sh/uv):
+```bash
+git clone https://github.com/yourusername/azure-sky-optimizer.git
+cd azure-sky-optimizer
+uv pip install -e .
+```
+**Requirements:**
+- Python >= 3.8
+- PyTorch >= 1.10.0
+- NumPy >= 1.20.0
+> **Note:** Ensure `uv` is installed. See [uv documentation](https://github.com/astral-sh/uv) for instructions.
+---
+## Usage Examples
+Azure Sky integrates seamlessly into PyTorch workflows. Below are usage examples for various parameter configurations.
+### Basic Usage
+```python
+import torch
+import torch.nn as nn
+from azure_optimizer import Azure
+model = nn.Linear(10, 2)
+criterion = nn.CrossEntropyLoss()
+optimizer = Azure(model.parameters())
+inputs = torch.randn(32, 10)
+targets = torch.randint(0, 2, (32,))
+optimizer.zero_grad()
+outputs = model(inputs)
+loss = criterion(outputs, targets)
+loss.backward()
+optimizer.step()
+```
+### Parameter Lists
+```python
+var1 = torch.nn.Parameter(torch.randn(2, 2))
+var2 = torch.nn.Parameter(torch.randn(2, 2))
+optimizer = Azure([var1, var2])
+```
+### Parameter Groups with Custom Learning Rates
+```python
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.base = nn.Linear(10, 5)
+        self.classifier = nn.Linear(5, 2)
+    def forward(self, x):
+        x = torch.relu(self.base(x))
+        return self.classifier(x)
+model = SimpleModel()
+optimizer = Azure([
+    {'params': model.base.parameters(), 'lr': 1e-2},
+    {'params': model.classifier.parameters()}
+])
+```
+For additional examples, see `azure_optimizer/usage_example.py`.
+---
+## Hyperparameters
+Default hyperparameters (from Optuna Trial 99, the best validation loss: 0.0893 on Two Moons):
+| Parameter   | Value                 | Description                  |
+|-------------|-----------------------|------------------------------|
+| lr          | 0.0007518383921113902 | Learning rate for Adam phase |
+| T0          | 2.2723218904585964    | Initial temperature for SA   |
+| sigma       | 0.17181058166567398   | Perturbation strength for SA |
+| SA_steps    | 5                     | Steps for SA phase           |
+| sa_momentum | 0.6612913488540948    | Momentum for SA updates      |
+---
+## Performance
+Evaluated on the Two Moons dataset (5000 samples, 20% noise):
+- **Best Validation Loss:** 0.0919
+- **Final Validation Accuracy:** 96.7%
+- **Epochs to Convergence:** 50
+Compared to:
+- **Adam:** loss 0.0927, acc 96.8%
+- **AdamW:** loss 0.0917, acc 97.1%
+Azure Sky prioritizes robust generalization over rapid convergence, making it ideal for pre-training and tasks requiring deep exploration.
+---
+## Contributing
+Contributions are welcome!
+1. Fork the repository.
+2. Create a feature branch: `git checkout -b feature/your-feature`
+3. Commit your changes.
+4. Push to your branch.
+5. Open a pull request.
+Please follow PEP 8 standards. Tests are not yet implemented; contributions to add testing infrastructure are highly encouraged.
+---
+## License
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
+---
+## Citation
+If you use Azure Sky Optimizer in your research or engineering projects, please cite:
+```
+[Allan]. (2025). Azure Sky Optimizer: A Hybrid Approach for Exploration and Exploitation. GitHub Repository.
+```
+---
+## Project Status
+As of May 27, 2025, Azure Sky Optimizer is stable and production-ready.
+**Planned improvements:**
+- Testing on larger datasets (e.g., CIFAR-10, MNIST)
+- Ablation studies for hyperparameter impact
+- Integration with PyTorch Lightning
+- Adding a comprehensive test suite
+For questions or collaboration, please open an issue on GitHub.
+Kaggle Notebook: https://www.kaggle.com/code/allanwandia/non-convex-research
+Writeup It has old metrics so watch out: https://github.com/DarkStarStrix/CSE-Repo-of-Advanced-Computation-ML-and-Systems-Engineering/blob/main/Papers/Computer_Science/Optimization/Optimization_Algothrims_The_HimmelBlau_Function_Case_Study.pdf
+---
+## Repository Structure
+```
+azure-sky-optimizer/
+├── azure_optimizer/
+│   ├── __init__.py
+│   ├── azure.py        # Updated Azure class
+│   ├── hooks.py
+│   └── usage_example.py  # Usage demonstrations
+├── README.md
+└── LICENSE
+```

__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .azure import Azure
+from .hooks import register_azure_hooks, GradientClippingHook, LossSpikeDetectionHook
+__all__ = ['Azure', 'register_azure_hooks', 'GradientClippingHook', 'LossSpikeDetectionHook']

pyproject.toml ADDED Viewed

	@@ -0,0 +1,15 @@

+[project]
+name = "azure"
+version = "0.1.0"
+description = "Add your description here"
+requires-python = ">=3.10"
+dependencies = [
+    "benchmarkfcns==1.0.0",
+    "gradio>=5.34.0",
+    "matplotlib>=3.10.3",
+    "numpy==1.26.0",
+    "pytorch-lightning==2.5.1",
+    "scipy>=1.15.3",
+    "torch>=2.7.1",
+    "torchvision>=0.22.1",
+]

usage_example.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import torch
+import torch.nn as nn
+from azure_optimizer import Azure  # Assuming the above class is in a module named azure_optimizer
+# Define a simple model for demonstration
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.base = nn.Linear(10, 5)
+        self.classifier = nn.Linear(5, 2)
+    def forward(self, x):
+        x = torch.relu(self.base(x))
+        return self.classifier(x)
+# Initialize model and sample variables
+model = SimpleModel()
+var1 = torch.nn.Parameter(torch.randn(2, 2))
+var2 = torch.nn.Parameter(torch.randn(2, 2))
+inputs = torch.randn(32, 10)
+targets = torch.randint(0, 2, (32,))
+criterion = nn.CrossEntropyLoss()
+# Example 1: Basic usage with model.parameters()
+optimizer = Azure(model.parameters())
+optimizer.zero_grad()
+outputs = model(inputs)
+loss = criterion(outputs, targets)
+loss.backward()
+optimizer.step()
+# Example 2: List of parameters
+optimizer = Azure([var1, var2])
+optimizer.zero_grad()
+loss = criterion(var1 @ var2, torch.zeros_like(var1 @ var2))
+loss.backward()
+optimizer.step()
+# Example 3: Named parameters
+optimizer = Azure(model.named_parameters())
+optimizer.zero_grad()
+outputs = model(inputs)
+loss = criterion(outputs, targets)
+loss.backward()
+optimizer.step()
+# Example 4: Named parameters in a list (invalid, will be handled by the class)
+optimizer = Azure([('layer0', var1), ('layer1', var2)])  # The class converts this to a parameter list
+optimizer.zero_grad()
+loss = criterion(var1 @ var2, torch.zeros_like(var1 @ var2))
+loss.backward()
+optimizer.step()
+# Example 5: Parameter groups with different learning rates
+optimizer = Azure([
+    {'params': model.base.parameters(), 'lr': 1e-2},
+    {'params': model.classifier.parameters()}
+])
+optimizer.zero_grad()
+outputs = model(inputs)
+loss = criterion(outputs, targets)
+loss.backward()
+optimizer.step()
+# Example 6: Parameter groups with named parameters
+optimizer = Azure([
+    {'params': model.base.named_parameters(), 'lr': 1e-2},
+    {'params': model.classifier.named_parameters()}
+])
+optimizer.zero_grad()
+outputs = model(inputs)
+loss = criterion(outputs, targets)
+loss.backward()
+optimizer.step()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff