Allanatrix commited on
Commit
1b579d5
·
verified ·
1 Parent(s): 8ade09a

Upload 12 files

Browse files
Files changed (12) hide show
  1. .gitignore +0 -0
  2. App.py +158 -0
  3. Engine.py +166 -0
  4. Hooks.py +47 -0
  5. LICENSE +9 -0
  6. Metrics.py +22 -0
  7. Plots.py +33 -0
  8. README.md +195 -14
  9. __init__.py +4 -0
  10. pyproject.toml +15 -0
  11. usage_example.py +74 -0
  12. uv.lock +0 -0
.gitignore ADDED
File without changes
App.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import pandas as pd
4
+ from Engine import Engine
5
+
6
+ def run_study(mode, benchmark_func, optimizers, dim, dataset, epochs, batch_size, lr, use_sa, sa_temp, sa_cooling_rate):
7
+ if not optimizers:
8
+ raise gr.Error("Please select at least one optimizer.")
9
+ if mode == "Benchmark Optimization" and not benchmark_func:
10
+ raise gr.Error("Please select a benchmark function.")
11
+ if mode == "ML Task Training" and not dataset:
12
+ raise gr.Error("Please select a dataset.")
13
+
14
+ config = {
15
+ 'mode': 'benchmark' if mode == 'Benchmark Optimization' else 'ml_task',
16
+ 'benchmark_func': benchmark_func,
17
+ 'optimizers': optimizers,
18
+ 'dim': int(dim),
19
+ 'dataset': dataset,
20
+ 'epochs': int(epochs),
21
+ 'batch_size': int(batch_size),
22
+ 'lr': float(lr),
23
+ 'use_sa': use_sa if 'AzureSky' in optimizers else None,
24
+ 'sa_temp': float(sa_temp) if 'AzureSky' in optimizers and use_sa else None,
25
+ 'sa_cooling_rate': float(sa_cooling_rate) if 'AzureSky' in optimizers and use_sa else None,
26
+ 'max_iter': 100
27
+ }
28
+ runner = Engine()
29
+ results = runner.run(config)
30
+
31
+ if config['mode'] == 'benchmark':
32
+ metrics_df = pd.DataFrame(results['metrics'], index=config['optimizers'])
33
+ return results['plot'], None, metrics_df, results['metrics'], json.dumps(results, indent=2), "Study completed successfully."
34
+ else:
35
+ metrics_df = pd.DataFrame(results['metrics'], index=config['optimizers'])
36
+ return results['plot_acc'], results['plot_loss'], metrics_df, results['metrics'], json.dumps(results, indent=2), "Study completed successfully."
37
+
38
+ def export_results(results_json):
39
+ return results_json, "results.json"
40
+
41
+ def toggle_azure_settings(optimizers):
42
+ return gr.update(visible='AzureSky' in optimizers)
43
+
44
+ with gr.Blocks(theme=gr.themes.Soft(), title="Nexa R&D Studio", css="""
45
+ .gr-button { margin-top: 10px; }
46
+ .gr-box { border-radius: 8px; }
47
+ .status-message { color: green; font-weight: bold; }
48
+ """) as app:
49
+ gr.Markdown("""
50
+ # Nexa R&D Studio
51
+ A visual research tool for comparing and evaluating optimizers on benchmark functions and ML tasks.
52
+ Select a mode, configure your study, and analyze results with interactive plots and metrics.
53
+ """)
54
+
55
+ with gr.Tabs() as tabs:
56
+ with gr.TabItem("Study Configuration"):
57
+ mode = gr.Radio(
58
+ ['Benchmark Optimization', 'ML Task Training'],
59
+ label='Study Mode',
60
+ value='Benchmark Optimization',
61
+ info='Choose between optimizing benchmark functions or training on ML datasets.'
62
+ )
63
+
64
+ with gr.Row():
65
+ with gr.Column():
66
+ optimizers = gr.CheckboxGroup(
67
+ ['AzureSky', 'Adam', 'SGD', 'AdamW', 'RMSprop'],
68
+ label='Optimizers',
69
+ info='Select optimizers to compare. AzureSky includes a Simulated Annealing option.'
70
+ )
71
+ with gr.Accordion("AzureSky Ablation Settings", open=False, visible=False) as azure_settings:
72
+ use_sa = gr.Checkbox(
73
+ label='Enable Simulated Annealing (AzureSky)',
74
+ value=True,
75
+ info='Toggle Simulated Annealing for AzureSky optimizer.'
76
+ )
77
+ sa_temp = gr.Number(
78
+ label='Initial SA Temperature',
79
+ value=1.0,
80
+ minimum=0.1,
81
+ info='Controls exploration in Simulated Annealing (higher = more exploration).'
82
+ )
83
+ sa_cooling_rate = gr.Number(
84
+ label='SA Cooling Rate',
85
+ value=0.95,
86
+ minimum=0.1,
87
+ maximum=0.99,
88
+ info='Rate at which SA temperature decreases (closer to 1 = slower cooling).'
89
+ )
90
+
91
+ with gr.Column():
92
+ with gr.Group(visible=True) as benchmark_tab:
93
+ benchmark_func = gr.Dropdown(
94
+ ['Himmelblau', 'Ackley', 'Adjiman', 'Brent'],
95
+ label='Benchmark Function',
96
+ info='Select a mathematical function to optimize.'
97
+ )
98
+ dim = gr.Number(
99
+ label='Dimensionality',
100
+ value=2,
101
+ minimum=2,
102
+ info='Number of dimensions for the benchmark function.'
103
+ )
104
+ with gr.Group(visible=False) as ml_task_tab:
105
+ dataset = gr.Dropdown(
106
+ ['MNIST', 'CIFAR-10'],
107
+ label='Dataset',
108
+ info='Select a dataset for ML training.'
109
+ )
110
+ epochs = gr.Number(
111
+ label='Epochs',
112
+ value=10,
113
+ minimum=1,
114
+ info='Number of training epochs.'
115
+ )
116
+ batch_size = gr.Number(
117
+ label='Batch Size',
118
+ value=32,
119
+ minimum=1,
120
+ info='Number of samples per training batch.'
121
+ )
122
+ lr = gr.Number(
123
+ label='Learning Rate',
124
+ value=0.001,
125
+ minimum=0,
126
+ info='Learning rate for optimizers.'
127
+ )
128
+
129
+ run_button = gr.Button('Run Study', variant='primary')
130
+
131
+ with gr.TabItem("Results"):
132
+ status_message = gr.Markdown("Configure and run a study to view results.", elem_classes=["status-message"])
133
+ with gr.Row():
134
+ plot1 = gr.Plot(label='Main Plot (Benchmark or Accuracy)')
135
+ plot2 = gr.Plot(label='Loss Plot (ML Mode)')
136
+ metrics_df = gr.Dataframe(label='Metrics Table', headers=['Optimizer'] + [
137
+ 'distance', 'final_loss', 'convergence_rate',
138
+ 'final_train_acc', 'final_val_acc', 'generalization_gap',
139
+ 'final_train_loss', 'final_val_loss', 'best_epoch'
140
+ ])
141
+ metrics_json = gr.JSON(label='Detailed Metrics')
142
+ export_data = gr.State()
143
+ export_button = gr.Button('Export Results as JSON')
144
+ export_file = gr.File(label='Download Results')
145
+
146
+ def toggle_tabs(mode):
147
+ return gr.update(visible=mode == 'Benchmark Optimization'), gr.update(visible=mode == 'ML Task Training')
148
+
149
+ mode.change(toggle_tabs, inputs=mode, outputs=[benchmark_tab, ml_task_tab])
150
+ optimizers.change(toggle_azure_settings, inputs=optimizers, outputs=azure_settings)
151
+ run_button.click(
152
+ run_study,
153
+ inputs=[mode, benchmark_func, optimizers, dim, dataset, epochs, batch_size, lr, use_sa, sa_temp, sa_cooling_rate],
154
+ outputs=[plot1, plot2, metrics_df, metrics_json, export_data, status_message]
155
+ )
156
+ export_button.click(export_results, inputs=[export_data], outputs=[export_file, gr.File()])
157
+
158
+ app.launch()
Engine.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from Backend.Benchmarks import Himmelblau, Adjiman, Brent, Ackley
3
+ from Backend.optimizers import adam, SGD, Azure, RMSprop
4
+ from Backend.ML_Tasks import MNISTRunner, CIFAR10Runner
5
+ from Metrics import calculate_benchmark_metrics, calculate_ml_metrics
6
+ from Plots import plot_benchmark_surface, plot_ml_curves
7
+
8
+
9
+ class Engine:
10
+ def __init__(self):
11
+ self.benchmarks = {
12
+ "Himmelblau": Himmelblau,
13
+ "Adjiman": Adjiman,
14
+ "Brent": Brent,
15
+ "Ackley": Ackley,
16
+ }
17
+ self.optimizers = {
18
+ "Adam": adam,
19
+ "SGD": SGD,
20
+ "AzureSky": Azure,
21
+ "RMSprop": RMSprop,
22
+ }
23
+ self.ml_tasks = {"MNIST": MNISTRunner, "CIFAR-10": CIFAR10Runner}
24
+
25
+ def run(self, config):
26
+ """Run a study based on the provided configuration."""
27
+ if config["mode"] == "benchmark":
28
+ return self.run_benchmark_study(config)
29
+ elif config["mode"] == "ml_task":
30
+ return self.run_ml_task_study(config)
31
+ else:
32
+ raise ValueError(f"Invalid mode: {config['mode']}")
33
+
34
+ def run_benchmark_study(self, config):
35
+ """Run a benchmark study comparing multiple optimizers."""
36
+ benchmark_class = self.benchmarks.get(config["benchmark_func"])
37
+ if not benchmark_class:
38
+ raise ValueError(f"Unknown benchmark: {config['benchmark_func']}")
39
+ benchmark = benchmark_class()
40
+
41
+ optimizers = []
42
+ for opt_name in config["optimizers"]:
43
+ opt_class = self.optimizers.get(opt_name)
44
+ if not opt_class:
45
+ raise ValueError(f"Unknown optimizer: {opt_name}")
46
+ # Pass use_sa for AzureSky if specified
47
+ kwargs = (
48
+ {"use_sa": config["use_sa"]}
49
+ if opt_name == "AzureSky" and "use_sa" in config
50
+ else {}
51
+ )
52
+ optimizers.append(opt_class(**kwargs))
53
+
54
+ initial_point = np.random.randn(config.get("dim", 2))
55
+ max_iter = config.get("max_iter", 100)
56
+ paths = []
57
+ loss_values = []
58
+
59
+ for opt in optimizers:
60
+ path = []
61
+ losses = []
62
+ x = initial_point.copy()
63
+ opt.reset() # Reset optimizer state
64
+ for _ in range(max_iter):
65
+ grad = benchmark.grad_f(x)
66
+ x = opt.step(x, grad)
67
+ path.append(x.copy())
68
+ losses.append(benchmark.f(x))
69
+ paths.append(np.array(path))
70
+ loss_values.append(losses)
71
+
72
+ metrics = [
73
+ calculate_benchmark_metrics(path[-1], benchmark.global_min, path, losses)
74
+ for path, losses in zip(paths, loss_values)
75
+ ]
76
+ plot = plot_benchmark_surface(benchmark, paths, config["optimizers"])
77
+ return {"plot": plot, "metrics": metrics, "paths": paths}
78
+
79
+ def run_ml_task_study(self, config):
80
+ """Run an ML task study comparing multiple optimizers."""
81
+ task_class = self.ml_tasks.get(config["dataset"])
82
+ if not task_class:
83
+ raise ValueError(f"Unknown dataset: {config['dataset']}")
84
+ task_runner = task_class()
85
+
86
+ optimizers = []
87
+ for opt_name in config["optimizers"]:
88
+ opt_class = self.optimizers.get(opt_name)
89
+ if not opt_class:
90
+ raise ValueError(f"Unknown optimizer: {opt_name}")
91
+ kwargs = (
92
+ {"use_sa": config["use_sa"]}
93
+ if opt_name == "AzureSky" and "use_sa" in config
94
+ else {}
95
+ )
96
+ optimizers.append(opt_class(**kwargs))
97
+
98
+ histories = []
99
+ for opt in optimizers:
100
+ history = task_runner.run(
101
+ optimizer=opt,
102
+ epochs=config.get("epochs", 10),
103
+ batch_size=config.get("batch_size", 32),
104
+ lr=config.get("lr", 0.001),
105
+ )
106
+ histories.append(history)
107
+
108
+ metrics = [calculate_ml_metrics(h["train"], h["val"]) for h in histories]
109
+ plot_acc = plot_ml_curves(
110
+ [h["train"]["accuracy"] for h in histories],
111
+ [h["val"]["accuracy"] for h in histories],
112
+ config["optimizers"],
113
+ "Accuracy",
114
+ )
115
+ plot_loss = plot_ml_curves(
116
+ [h["train"]["loss"] for h in histories],
117
+ [h["val"]["loss"] for h in histories],
118
+ config["optimizers"],
119
+ "Loss",
120
+ )
121
+ return {
122
+ "plot_acc": plot_acc,
123
+ "plot_loss": plot_loss,
124
+ "metrics": metrics,
125
+ "histories": histories,
126
+ }
127
+
128
+ def list_benchmarks(self):
129
+ """Return available benchmark functions."""
130
+ return list(self.benchmarks.keys())
131
+
132
+ def list_optimizers(self):
133
+ """Return available optimizers."""
134
+ return list(self.optimizers.keys())
135
+
136
+ def list_ml_tasks(self):
137
+ """Return available ML tasks."""
138
+ return list(self.ml_tasks.keys())
139
+
140
+
141
+ if __name__ == "__main__":
142
+ engine = Engine()
143
+ # Example benchmark study
144
+ config = {
145
+ "mode": "benchmark",
146
+ "benchmark_func": "Himmelblau",
147
+ "optimizers": ["Adam", "AzureSky"],
148
+ "dim": 2,
149
+ "max_iter": 100,
150
+ "use_sa": True,
151
+ }
152
+ results = engine.run(config)
153
+ print("Benchmark Results:", results["metrics"])
154
+
155
+ # Example ML task study
156
+ config = {
157
+ "mode": "ml_task",
158
+ "dataset": "MNIST",
159
+ "optimizers": ["Adam", "AzureSky"],
160
+ "epochs": 5,
161
+ "batch_size": 32,
162
+ "lr": 0.001,
163
+ "use_sa": True,
164
+ }
165
+ results = engine.run(config)
166
+ print("ML Task Results:", results['metrics'])
Hooks.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import logging
3
+ from torch import nn
4
+
5
+ # Configure logging
6
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class GradientClippingHook:
10
+ """Hook to clip gradients during training to prevent explosions."""
11
+ def __init__(self, max_norm=1.0):
12
+ self.max_norm = max_norm
13
+
14
+ def __call__(self, module, grad_input, grad_output):
15
+ if grad_input is not None:
16
+ for g in grad_input:
17
+ if g is not None:
18
+ torch.nn.utils.clip_grad_norm_(g, self.max_norm)
19
+ logger.debug(f"Gradient clipped to max norm {self.max_norm}")
20
+
21
+ class LossSpikeDetectionHook:
22
+ """Hook to detect and log loss spikes during training."""
23
+ def __init__(self, threshold=10.0, window=5):
24
+ self.threshold = threshold
25
+ self.window = window
26
+ self.losses = []
27
+
28
+ def __call__(self, module, input, output):
29
+ if isinstance(module, nn.Module) and output is not None:
30
+ loss = output.mean() if output.requires_grad else None
31
+ if loss is not None:
32
+ self.losses.append(loss.item())
33
+ if len(self.losses) > self.window:
34
+ self.losses.pop(0)
35
+ avg_loss = sum(self.losses[:-1]) / (len(self.losses) - 1)
36
+ current_loss = self.losses[-1]
37
+ if current_loss > avg_loss * self.threshold:
38
+ logger.warning(f"Loss spike detected: {current_loss:.4f} > {avg_loss:.4f} * {self.threshold}")
39
+
40
+ # Utility function to register hooks on a model
41
+ def register_azure_hooks(model):
42
+ grad_clip_hook = GradientClippingHook(max_norm=1.0)
43
+ loss_spike_hook = LossSpikeDetectionHook(threshold=10.0, window=5)
44
+ for module in model.modules():
45
+ module.register_full_backward_hook(grad_clip_hook)
46
+ module.register_forward_hook(loss_spike_hook)
47
+ logger.info("Azure hooks registered: GradientClippingHook and LossSpikeDetectionHook")
LICENSE ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Allan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Metrics.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ def calculate_benchmark_metrics(final_point, global_min, path, loss_values):
4
+ distance = np.linalg.norm(final_point - global_min)
5
+ convergence_rate = len(path) if loss_values[-1] < 1e-5 else float('inf')
6
+ return {'distance': float(distance), 'final_loss': float(loss_values[-1]), 'convergence_rate': convergence_rate}
7
+
8
+ def calculate_ml_metrics(train_history, val_history):
9
+ final_train_acc = train_history['accuracy'][-1]
10
+ final_val_acc = val_history['accuracy'][-1]
11
+ generalization_gap = final_train_acc - final_val_acc
12
+ final_train_loss = train_history['loss'][-1]
13
+ final_val_loss = val_history['loss'][-1]
14
+ best_epoch = np.argmax(val_history['accuracy']) + 1
15
+ return {
16
+ 'final_train_acc': final_train_acc,
17
+ 'final_val_acc': final_val_acc,
18
+ 'generalization_gap': generalization_gap,
19
+ 'final_train_loss': final_train_loss,
20
+ 'final_val_loss': final_val_loss,
21
+ 'best_epoch': best_epoch
22
+ }
Plots.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+
4
+ def plot_benchmark_surface(benchmark, paths, optimizer_names):
5
+ fig = plt.figure(figsize=(10, 8))
6
+ ax = fig.add_subplot(111, projection='3d')
7
+ x = np.linspace(-5, 5, 100)
8
+ y = np.linspace(-5, 5, 100)
9
+ X, Y = np.meshgrid(x, y)
10
+ Z = np.array([[benchmark.f(np.array([xi, yi])) for xi in x] for yi in y])
11
+ ax.plot_surface(X, Y, Z, cmap='viridis', alpha=0.5)
12
+ for path, name in zip(paths, optimizer_names):
13
+ ax.plot(path[:, 0], path[:, 1], [benchmark.f(p) for p in path], label=name)
14
+ ax.set_xlabel('X')
15
+ ax.set_ylabel('Y')
16
+ ax.set_zlabel('Loss')
17
+ ax.legend()
18
+ plt.close()
19
+ return fig
20
+
21
+ def plot_ml_curves(train_data, val_data, optimizer_names, metric='Accuracy'):
22
+ fig = plt.figure(figsize=(10, 6))
23
+ for t, v, name in zip(train_data, val_data, optimizer_names):
24
+ epochs = range(1, len(t) + 1)
25
+ plt.plot(epochs, t, label=f'{name} Train')
26
+ plt.plot(epochs, v, '--', label=f'{name} Val')
27
+ plt.xlabel('Epoch')
28
+ plt.ylabel(metric)
29
+ plt.title(f'{metric} vs Epoch')
30
+ plt.legend()
31
+ plt.grid(True)
32
+ plt.close()
33
+ return fig
README.md CHANGED
@@ -1,14 +1,195 @@
1
- ---
2
- title: Nexa R&D
3
- emoji: 💻
4
- colorFrom: yellow
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.33.2
8
- app_file: app.py
9
- pinned: false
10
- license: bsl-1.0
11
- short_description: Frontier research, model development, optimization studies
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Azure Sky Optimizer
2
+
3
+ Azure Sky Optimizer is a hybrid optimizer for PyTorch, integrating Simulated Annealing (SA) with Adam to provide robust exploration and precise exploitation in non-convex optimization tasks. Designed for complex machine learning challenges, Azure Sky excels in domains requiring deep exploration of rugged loss landscapes, such as scientific machine learning, symbolic reasoning, and protein folding.
4
+
5
+ Developed as part of an R&D initiative, Azure Sky combines structured stochastic exploration with gradient-based refinement, achieving stable convergence and strong generalization in multi-modal search spaces.
6
+
7
+ ---
8
+
9
+ ## Overview
10
+
11
+ Conventional optimizers like Adam and AdamW often converge prematurely to sharp local minima, compromising generalization. Azure Sky leverages SA’s global search in early stages and Adam’s local convergence later, ensuring both deep exploration and precise convergence.
12
+
13
+ ### Core Innovations
14
+
15
+ - **Dynamic Temperature Scaling:** Adjusts SA temperature based on training progress for controlled exploration.
16
+ - **Exploration-Exploitation Fusion:** Seamlessly transitions between SA and Adam using a sigmoid-based blending mechanism.
17
+ - **Stability Enhancements:** Built-in gradient clipping and loss spike monitoring for robust training.
18
+
19
+ ---
20
+
21
+ ## Key Features
22
+
23
+ - **Hybrid Optimization:** Combines SA’s global search with Adam’s local refinement.
24
+ - **Optimized Hyperparameters:** Tuned via Optuna (the best trial: 0.0893 on Two Moons dataset).
25
+ - **Flexible Parameter Handling:** Supports parameter lists, named parameters, and parameter groups with group-specific learning rates.
26
+ - **Production-Ready Stability:** Includes gradient clipping and loss spike detection.
27
+ - **PyTorch Compatibility:** Fully integrated with PyTorch’s `optim` module.
28
+
29
+ ---
30
+
31
+ ## Installation
32
+
33
+ Clone the repository and install using [uv](https://github.com/astral-sh/uv):
34
+
35
+ ```bash
36
+ git clone https://github.com/yourusername/azure-sky-optimizer.git
37
+ cd azure-sky-optimizer
38
+ uv pip install -e .
39
+ ```
40
+
41
+ **Requirements:**
42
+ - Python >= 3.8
43
+ - PyTorch >= 1.10.0
44
+ - NumPy >= 1.20.0
45
+
46
+ > **Note:** Ensure `uv` is installed. See [uv documentation](https://github.com/astral-sh/uv) for instructions.
47
+
48
+ ---
49
+
50
+ ## Usage Examples
51
+
52
+ Azure Sky integrates seamlessly into PyTorch workflows. Below are usage examples for various parameter configurations.
53
+
54
+ ### Basic Usage
55
+
56
+ ```python
57
+ import torch
58
+ import torch.nn as nn
59
+ from azure_optimizer import Azure
60
+
61
+ model = nn.Linear(10, 2)
62
+ criterion = nn.CrossEntropyLoss()
63
+ optimizer = Azure(model.parameters())
64
+
65
+ inputs = torch.randn(32, 10)
66
+ targets = torch.randint(0, 2, (32,))
67
+ optimizer.zero_grad()
68
+ outputs = model(inputs)
69
+ loss = criterion(outputs, targets)
70
+ loss.backward()
71
+ optimizer.step()
72
+ ```
73
+
74
+ ### Parameter Lists
75
+
76
+ ```python
77
+ var1 = torch.nn.Parameter(torch.randn(2, 2))
78
+ var2 = torch.nn.Parameter(torch.randn(2, 2))
79
+ optimizer = Azure([var1, var2])
80
+ ```
81
+
82
+ ### Parameter Groups with Custom Learning Rates
83
+
84
+ ```python
85
+ class SimpleModel(nn.Module):
86
+ def __init__(self):
87
+ super().__init__()
88
+ self.base = nn.Linear(10, 5)
89
+ self.classifier = nn.Linear(5, 2)
90
+
91
+ def forward(self, x):
92
+ x = torch.relu(self.base(x))
93
+ return self.classifier(x)
94
+
95
+ model = SimpleModel()
96
+ optimizer = Azure([
97
+ {'params': model.base.parameters(), 'lr': 1e-2},
98
+ {'params': model.classifier.parameters()}
99
+ ])
100
+ ```
101
+
102
+ For additional examples, see `azure_optimizer/usage_example.py`.
103
+
104
+ ---
105
+
106
+ ## Hyperparameters
107
+
108
+ Default hyperparameters (from Optuna Trial 99, the best validation loss: 0.0893 on Two Moons):
109
+
110
+ | Parameter | Value | Description |
111
+ |-------------|-----------------------|------------------------------|
112
+ | lr | 0.0007518383921113902 | Learning rate for Adam phase |
113
+ | T0 | 2.2723218904585964 | Initial temperature for SA |
114
+ | sigma | 0.17181058166567398 | Perturbation strength for SA |
115
+ | SA_steps | 5 | Steps for SA phase |
116
+ | sa_momentum | 0.6612913488540948 | Momentum for SA updates |
117
+
118
+ ---
119
+
120
+ ## Performance
121
+
122
+ Evaluated on the Two Moons dataset (5000 samples, 20% noise):
123
+
124
+ - **Best Validation Loss:** 0.0919
125
+ - **Final Validation Accuracy:** 96.7%
126
+ - **Epochs to Convergence:** 50
127
+
128
+ Compared to:
129
+ - **Adam:** loss 0.0927, acc 96.8%
130
+ - **AdamW:** loss 0.0917, acc 97.1%
131
+
132
+ Azure Sky prioritizes robust generalization over rapid convergence, making it ideal for pre-training and tasks requiring deep exploration.
133
+
134
+ ---
135
+
136
+ ## Contributing
137
+
138
+ Contributions are welcome!
139
+
140
+ 1. Fork the repository.
141
+ 2. Create a feature branch: `git checkout -b feature/your-feature`
142
+ 3. Commit your changes.
143
+ 4. Push to your branch.
144
+ 5. Open a pull request.
145
+
146
+ Please follow PEP 8 standards. Tests are not yet implemented; contributions to add testing infrastructure are highly encouraged.
147
+
148
+ ---
149
+
150
+ ## License
151
+
152
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
153
+
154
+ ---
155
+
156
+ ## Citation
157
+
158
+ If you use Azure Sky Optimizer in your research or engineering projects, please cite:
159
+
160
+ ```
161
+ [Allan]. (2025). Azure Sky Optimizer: A Hybrid Approach for Exploration and Exploitation. GitHub Repository.
162
+ ```
163
+
164
+ ---
165
+
166
+ ## Project Status
167
+
168
+ As of May 27, 2025, Azure Sky Optimizer is stable and production-ready.
169
+
170
+ **Planned improvements:**
171
+ - Testing on larger datasets (e.g., CIFAR-10, MNIST)
172
+ - Ablation studies for hyperparameter impact
173
+ - Integration with PyTorch Lightning
174
+ - Adding a comprehensive test suite
175
+
176
+ For questions or collaboration, please open an issue on GitHub.
177
+
178
+ Kaggle Notebook: https://www.kaggle.com/code/allanwandia/non-convex-research
179
+
180
+ Writeup It has old metrics so watch out: https://github.com/DarkStarStrix/CSE-Repo-of-Advanced-Computation-ML-and-Systems-Engineering/blob/main/Papers/Computer_Science/Optimization/Optimization_Algothrims_The_HimmelBlau_Function_Case_Study.pdf
181
+
182
+ ---
183
+
184
+ ## Repository Structure
185
+
186
+ ```
187
+ azure-sky-optimizer/
188
+ ├── azure_optimizer/
189
+ │ ├── __init__.py
190
+ │ ├── azure.py # Updated Azure class
191
+ │ ├── hooks.py
192
+ │ └── usage_example.py # Usage demonstrations
193
+ ├── README.md
194
+ └── LICENSE
195
+ ```
__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .azure import Azure
2
+ from .hooks import register_azure_hooks, GradientClippingHook, LossSpikeDetectionHook
3
+
4
+ __all__ = ['Azure', 'register_azure_hooks', 'GradientClippingHook', 'LossSpikeDetectionHook']
pyproject.toml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "azure"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ requires-python = ">=3.10"
6
+ dependencies = [
7
+ "benchmarkfcns==1.0.0",
8
+ "gradio>=5.34.0",
9
+ "matplotlib>=3.10.3",
10
+ "numpy==1.26.0",
11
+ "pytorch-lightning==2.5.1",
12
+ "scipy>=1.15.3",
13
+ "torch>=2.7.1",
14
+ "torchvision>=0.22.1",
15
+ ]
usage_example.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from azure_optimizer import Azure # Assuming the above class is in a module named azure_optimizer
4
+
5
+ # Define a simple model for demonstration
6
+ class SimpleModel(nn.Module):
7
+ def __init__(self):
8
+ super().__init__()
9
+ self.base = nn.Linear(10, 5)
10
+ self.classifier = nn.Linear(5, 2)
11
+
12
+ def forward(self, x):
13
+ x = torch.relu(self.base(x))
14
+ return self.classifier(x)
15
+
16
+ # Initialize model and sample variables
17
+ model = SimpleModel()
18
+ var1 = torch.nn.Parameter(torch.randn(2, 2))
19
+ var2 = torch.nn.Parameter(torch.randn(2, 2))
20
+ inputs = torch.randn(32, 10)
21
+ targets = torch.randint(0, 2, (32,))
22
+ criterion = nn.CrossEntropyLoss()
23
+
24
+ # Example 1: Basic usage with model.parameters()
25
+ optimizer = Azure(model.parameters())
26
+ optimizer.zero_grad()
27
+ outputs = model(inputs)
28
+ loss = criterion(outputs, targets)
29
+ loss.backward()
30
+ optimizer.step()
31
+
32
+ # Example 2: List of parameters
33
+ optimizer = Azure([var1, var2])
34
+ optimizer.zero_grad()
35
+ loss = criterion(var1 @ var2, torch.zeros_like(var1 @ var2))
36
+ loss.backward()
37
+ optimizer.step()
38
+
39
+ # Example 3: Named parameters
40
+ optimizer = Azure(model.named_parameters())
41
+ optimizer.zero_grad()
42
+ outputs = model(inputs)
43
+ loss = criterion(outputs, targets)
44
+ loss.backward()
45
+ optimizer.step()
46
+
47
+ # Example 4: Named parameters in a list (invalid, will be handled by the class)
48
+ optimizer = Azure([('layer0', var1), ('layer1', var2)]) # The class converts this to a parameter list
49
+ optimizer.zero_grad()
50
+ loss = criterion(var1 @ var2, torch.zeros_like(var1 @ var2))
51
+ loss.backward()
52
+ optimizer.step()
53
+
54
+ # Example 5: Parameter groups with different learning rates
55
+ optimizer = Azure([
56
+ {'params': model.base.parameters(), 'lr': 1e-2},
57
+ {'params': model.classifier.parameters()}
58
+ ])
59
+ optimizer.zero_grad()
60
+ outputs = model(inputs)
61
+ loss = criterion(outputs, targets)
62
+ loss.backward()
63
+ optimizer.step()
64
+
65
+ # Example 6: Parameter groups with named parameters
66
+ optimizer = Azure([
67
+ {'params': model.base.named_parameters(), 'lr': 1e-2},
68
+ {'params': model.classifier.named_parameters()}
69
+ ])
70
+ optimizer.zero_grad()
71
+ outputs = model(inputs)
72
+ loss = criterion(outputs, targets)
73
+ loss.backward()
74
+ optimizer.step()
uv.lock ADDED
The diff for this file is too large to render. See raw diff