Update app.py
Browse files
app.py
CHANGED
@@ -2,18 +2,312 @@ import gradio as gr
|
|
2 |
import numpy as np
|
3 |
import matplotlib.pyplot as plt
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
def run_snn(X, epochs, batch_size, l2_lambda, patience):
|
8 |
-
# Your SNN initialization and training code here
|
9 |
snn = SwarmNeuralNetwork(layer_sizes=[1, 32, 16, 8, 1],
|
10 |
fractal_methods=[sierpinski_fractal, mandelbrot_fractal, julia_fractal, julia_fractal])
|
|
|
11 |
snn.train(X, y, epochs=epochs, batch_size=batch_size, l2_lambda=l2_lambda, patience=patience)
|
|
|
12 |
y_pred = snn.forward(X, training=False)
|
13 |
fractal_outputs = snn.apply_fractals(X)
|
14 |
-
return y_pred, fractal_outputs
|
15 |
|
16 |
-
def plot_results(X, y, y_pred, fractal_outputs):
|
17 |
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
|
18 |
|
19 |
axs[0, 0].plot(X, y, label='True')
|
@@ -31,26 +325,21 @@ def plot_results(X, y, y_pred, fractal_outputs):
|
|
31 |
axs[1, 1].plot(X, fractal_outputs[2])
|
32 |
axs[1, 1].set_title('Julia Fractal Output')
|
33 |
|
|
|
34 |
return fig
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
fn=main_interface,
|
49 |
-
inputs=[
|
50 |
-
gr.inputs.Slider(1, 10000, default=5000, label="Epochs"),
|
51 |
-
gr.inputs.Slider(1, 100, default=32, label="Batch Size"),
|
52 |
-
gr.inputs.Slider(0.0001, 0.1, default=0.00001, label="L2 Lambda"),
|
53 |
-
gr.inputs.Slider(1, 1000, default=50, label="Patience")
|
54 |
-
],
|
55 |
-
outputs=gr.outputs.Plot()
|
56 |
-
).launch()
|
|
|
2 |
import numpy as np
|
3 |
import matplotlib.pyplot as plt
|
4 |
|
5 |
+
def relu(x):
|
6 |
+
return np.maximum(0, x)
|
7 |
+
|
8 |
+
def relu_derivative(x):
|
9 |
+
return (x > 0).astype(float)
|
10 |
+
|
11 |
+
def tanh(x):
|
12 |
+
return np.tanh(x)
|
13 |
+
|
14 |
+
def tanh_derivative(x):
|
15 |
+
return 1 - np.tanh(x)**2
|
16 |
+
|
17 |
+
class EveOptimizer:
|
18 |
+
def __init__(self, params, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
|
19 |
+
self.params = params
|
20 |
+
self.lr = learning_rate
|
21 |
+
self.beta1 = beta1
|
22 |
+
self.beta2 = beta2
|
23 |
+
self.epsilon = epsilon
|
24 |
+
self.t = 0
|
25 |
+
self.m = [np.zeros_like(p) for p in params]
|
26 |
+
self.v = [np.zeros_like(p) for p in params]
|
27 |
+
self.fractal_memory = [np.zeros_like(p) for p in params]
|
28 |
+
|
29 |
+
def step(self, grads):
|
30 |
+
self.t += 1
|
31 |
+
for i, (param, grad) in enumerate(zip(self.params, grads)):
|
32 |
+
self.m[i] = self.beta1 * self.m[i] + (1 - self.beta1) * grad
|
33 |
+
self.v[i] = self.beta2 * self.v[i] + (1 - self.beta2) * (grad ** 2)
|
34 |
+
|
35 |
+
m_hat = self.m[i] / (1 - self.beta1 ** self.t)
|
36 |
+
v_hat = self.v[i] / (1 - self.beta2 ** self.t)
|
37 |
+
|
38 |
+
fractal_factor = self.fractal_adjustment(param, grad)
|
39 |
+
self.fractal_memory[i] = 0.9 * self.fractal_memory[i] + 0.1 * fractal_factor
|
40 |
+
|
41 |
+
param -= self.lr * m_hat / (np.sqrt(v_hat) + self.epsilon) * self.fractal_memory[i]
|
42 |
+
|
43 |
+
def fractal_adjustment(self, param, grad):
|
44 |
+
c = np.mean(grad) + 1j * np.std(param)
|
45 |
+
z = 0
|
46 |
+
for _ in range(10):
|
47 |
+
z = z**2 + c
|
48 |
+
if abs(z) > 2:
|
49 |
+
break
|
50 |
+
return 1 / (1 + abs(z))
|
51 |
+
|
52 |
+
class BatchNormalization:
|
53 |
+
def __init__(self, input_shape):
|
54 |
+
self.gamma = np.ones(input_shape)
|
55 |
+
self.beta = np.zeros(input_shape)
|
56 |
+
self.epsilon = 1e-5
|
57 |
+
self.moving_mean = np.zeros(input_shape)
|
58 |
+
self.moving_var = np.ones(input_shape)
|
59 |
+
|
60 |
+
def forward(self, x, training=True):
|
61 |
+
if training:
|
62 |
+
mean = np.mean(x, axis=0)
|
63 |
+
var = np.var(x, axis=0)
|
64 |
+
self.moving_mean = 0.99 * self.moving_mean + 0.01 * mean
|
65 |
+
self.moving_var = 0.99 * self.moving_var + 0.01 * var
|
66 |
+
else:
|
67 |
+
mean = self.moving_mean
|
68 |
+
var = self.moving_var
|
69 |
+
|
70 |
+
x_norm = (x - mean) / np.sqrt(var + self.epsilon)
|
71 |
+
out = self.gamma * x_norm + self.beta
|
72 |
+
if training:
|
73 |
+
self.cache = (x, x_norm, mean, var)
|
74 |
+
return out
|
75 |
+
|
76 |
+
def backward(self, dout):
|
77 |
+
x, x_norm, mean, var = self.cache
|
78 |
+
m = x.shape[0]
|
79 |
+
|
80 |
+
dx_norm = dout * self.gamma
|
81 |
+
dvar = np.sum(dx_norm * (x - mean) * -0.5 * (var + self.epsilon)**(-1.5), axis=0)
|
82 |
+
dmean = np.sum(dx_norm * -1 / np.sqrt(var + self.epsilon), axis=0) + dvar * np.mean(-2 * (x - mean), axis=0)
|
83 |
+
|
84 |
+
dx = dx_norm / np.sqrt(var + self.epsilon) + dvar * 2 * (x - mean) / m + dmean / m
|
85 |
+
dgamma = np.sum(dout * x_norm, axis=0)
|
86 |
+
dbeta = np.sum(dout, axis=0)
|
87 |
+
|
88 |
+
return dx, dgamma, dbeta
|
89 |
+
|
90 |
+
class Reward:
|
91 |
+
def __init__(self):
|
92 |
+
self.lowest_avg_batch_loss = float('inf')
|
93 |
+
self.lowest_max_batch_loss = float('inf')
|
94 |
+
self.best_weights = None
|
95 |
+
|
96 |
+
def update(self, avg_batch_loss, max_batch_loss, network):
|
97 |
+
improved = False
|
98 |
+
if avg_batch_loss < self.lowest_avg_batch_loss:
|
99 |
+
self.lowest_avg_batch_loss = avg_batch_loss
|
100 |
+
improved = True
|
101 |
+
if max_batch_loss < self.lowest_max_batch_loss:
|
102 |
+
self.lowest_max_batch_loss = max_batch_loss
|
103 |
+
improved = True
|
104 |
+
if improved:
|
105 |
+
self.best_weights = self.get_network_weights(network)
|
106 |
+
|
107 |
+
def get_network_weights(self, network):
|
108 |
+
weights = []
|
109 |
+
for layer in network.layers:
|
110 |
+
layer_weights = []
|
111 |
+
for agent in layer.agents:
|
112 |
+
agent_weights = {
|
113 |
+
'weights': agent.weights.copy(),
|
114 |
+
'bias': agent.bias.copy(),
|
115 |
+
'bn_gamma': agent.bn.gamma.copy(),
|
116 |
+
'bn_beta': agent.bn.beta.copy()
|
117 |
+
}
|
118 |
+
layer_weights.append(agent_weights)
|
119 |
+
weights.append(layer_weights)
|
120 |
+
return weights
|
121 |
+
|
122 |
+
def apply_best_weights(self, network):
|
123 |
+
if self.best_weights is not None:
|
124 |
+
for layer, layer_weights in zip(network.layers, self.best_weights):
|
125 |
+
for agent, agent_weights in zip(layer.agents, layer_weights):
|
126 |
+
agent.weights = agent_weights['weights'].copy()
|
127 |
+
agent.bias = agent_weights['bias'].copy()
|
128 |
+
agent.bn.gamma = agent_weights['bn_gamma'].copy()
|
129 |
+
agent.bn.beta = agent_weights['bn_beta'].copy()
|
130 |
+
|
131 |
+
class Agent:
|
132 |
+
def __init__(self, id, input_size, output_size, fractal_method):
|
133 |
+
self.id = id
|
134 |
+
self.weights = np.random.randn(input_size, output_size) * np.sqrt(2. / input_size)
|
135 |
+
self.bias = np.zeros((1, output_size))
|
136 |
+
self.fractal_method = fractal_method
|
137 |
+
self.bn = BatchNormalization((output_size,))
|
138 |
+
|
139 |
+
self.optimizer = EveOptimizer([self.weights, self.bias, self.bn.gamma, self.bn.beta])
|
140 |
+
|
141 |
+
def forward(self, x, training=True):
|
142 |
+
self.last_input = x
|
143 |
+
z = np.dot(x, self.weights) + self.bias
|
144 |
+
z_bn = self.bn.forward(z, training)
|
145 |
+
self.last_output = relu(z_bn)
|
146 |
+
return self.last_output
|
147 |
+
|
148 |
+
def backward(self, error, l2_lambda=1e-5):
|
149 |
+
delta = error * relu_derivative(self.last_output)
|
150 |
+
delta, dgamma, dbeta = self.bn.backward(delta)
|
151 |
+
|
152 |
+
dw = np.dot(self.last_input.T, delta) + l2_lambda * self.weights
|
153 |
+
db = np.sum(delta, axis=0, keepdims=True)
|
154 |
+
|
155 |
+
self.optimizer.step([dw, db, dgamma, dbeta])
|
156 |
+
|
157 |
+
return np.dot(delta, self.weights.T)
|
158 |
+
|
159 |
+
def apply_fractal(self, x):
|
160 |
+
return self.fractal_method(x)
|
161 |
+
|
162 |
+
class Swarm:
|
163 |
+
def __init__(self, num_agents, input_size, output_size, fractal_method):
|
164 |
+
self.agents = [Agent(i, input_size, output_size, fractal_method) for i in range(num_agents)]
|
165 |
+
|
166 |
+
def forward(self, x, training=True):
|
167 |
+
results = [agent.forward(x, training) for agent in self.agents]
|
168 |
+
return np.mean(results, axis=0)
|
169 |
+
|
170 |
+
def backward(self, error, l2_lambda):
|
171 |
+
errors = [agent.backward(error, l2_lambda) for agent in self.agents]
|
172 |
+
return np.mean(errors, axis=0)
|
173 |
+
|
174 |
+
def apply_fractal(self, x):
|
175 |
+
results = [agent.apply_fractal(x) for agent in self.agents]
|
176 |
+
return np.mean(results, axis=0)
|
177 |
+
|
178 |
+
class SwarmNeuralNetwork:
|
179 |
+
def __init__(self, layer_sizes, fractal_methods):
|
180 |
+
self.layers = []
|
181 |
+
for i in range(len(layer_sizes) - 2):
|
182 |
+
self.layers.append(Swarm(num_agents=3,
|
183 |
+
input_size=layer_sizes[i],
|
184 |
+
output_size=layer_sizes[i+1],
|
185 |
+
fractal_method=fractal_methods[i]))
|
186 |
+
self.output_layer = Swarm(num_agents=1,
|
187 |
+
input_size=layer_sizes[-2],
|
188 |
+
output_size=layer_sizes[-1],
|
189 |
+
fractal_method=fractal_methods[-1])
|
190 |
+
self.reward = Reward()
|
191 |
+
|
192 |
+
def forward(self, x, training=True):
|
193 |
+
self.layer_outputs = [x]
|
194 |
+
for layer in self.layers:
|
195 |
+
x = layer.forward(x, training)
|
196 |
+
self.layer_outputs.append(x)
|
197 |
+
self.final_output = tanh(self.output_layer.forward(x, training))
|
198 |
+
return self.final_output
|
199 |
+
|
200 |
+
def backward(self, error, l2_lambda=1e-5):
|
201 |
+
error = error * tanh_derivative(self.final_output)
|
202 |
+
error = self.output_layer.backward(error, l2_lambda)
|
203 |
+
for i in reversed(range(len(self.layers))):
|
204 |
+
error = self.layers[i].backward(error, l2_lambda)
|
205 |
+
|
206 |
+
def train(self, X, y, epochs, batch_size=32, l2_lambda=1e-5, patience=50):
|
207 |
+
best_mse = float('inf')
|
208 |
+
patience_counter = 0
|
209 |
+
|
210 |
+
for epoch in range(epochs):
|
211 |
+
indices = np.arange(len(X))
|
212 |
+
np.random.shuffle(indices)
|
213 |
+
|
214 |
+
self.reward.apply_best_weights(self)
|
215 |
+
|
216 |
+
epoch_losses = []
|
217 |
+
for start_idx in range(0, len(X) - batch_size + 1, batch_size):
|
218 |
+
batch_indices = indices[start_idx:start_idx+batch_size]
|
219 |
+
X_batch = X[batch_indices]
|
220 |
+
y_batch = y[batch_indices]
|
221 |
+
|
222 |
+
output = self.forward(X_batch)
|
223 |
+
error = y_batch - output
|
224 |
+
|
225 |
+
error = np.clip(error, -1, 1)
|
226 |
+
|
227 |
+
self.backward(error, l2_lambda)
|
228 |
+
|
229 |
+
epoch_losses.append(np.mean(np.square(error)))
|
230 |
+
|
231 |
+
avg_batch_loss = np.mean(epoch_losses)
|
232 |
+
max_batch_loss = np.max(epoch_losses)
|
233 |
+
self.reward.update(avg_batch_loss, max_batch_loss, self)
|
234 |
+
|
235 |
+
mse = np.mean(np.square(y - self.forward(X, training=False)))
|
236 |
+
|
237 |
+
if epoch % 100 == 0:
|
238 |
+
print(f"Epoch {epoch}, MSE: {mse:.6f}, Avg Batch Loss: {avg_batch_loss:.6f}, Min Batch Loss: {np.min(epoch_losses):.6f}, Max Batch Loss: {max_batch_loss:.6f}")
|
239 |
+
|
240 |
+
if mse < best_mse:
|
241 |
+
best_mse = mse
|
242 |
+
patience_counter = 0
|
243 |
+
else:
|
244 |
+
patience_counter += 1
|
245 |
+
|
246 |
+
if patience_counter >= patience:
|
247 |
+
print(f"Early stopping at epoch {epoch}")
|
248 |
+
break
|
249 |
+
|
250 |
+
return best_mse
|
251 |
+
|
252 |
+
def apply_fractals(self, x):
|
253 |
+
fractal_outputs = []
|
254 |
+
for i, layer in enumerate(self.layers):
|
255 |
+
x = self.layer_outputs[i+1]
|
256 |
+
fractal_output = layer.apply_fractal(x)
|
257 |
+
fractal_outputs.append(fractal_output)
|
258 |
+
return fractal_outputs
|
259 |
+
|
260 |
+
def sierpinski_fractal(input_data):
|
261 |
+
t = np.linspace(0, 2 * np.pi, input_data.shape[0])
|
262 |
+
x = np.mean(input_data) * np.cos(t)
|
263 |
+
y = np.mean(input_data) * np.sin(t)
|
264 |
+
return x, y
|
265 |
+
|
266 |
+
def mandelbrot_fractal(input_data, max_iter=10):
|
267 |
+
output = np.zeros(input_data.shape[0])
|
268 |
+
for i in range(input_data.shape[0]):
|
269 |
+
c = input_data[i, 0] + 0.1j * np.std(input_data)
|
270 |
+
z = 0
|
271 |
+
for n in range(max_iter):
|
272 |
+
if abs(z) > 2:
|
273 |
+
output[i] = n
|
274 |
+
break
|
275 |
+
z = z*z + c
|
276 |
+
else:
|
277 |
+
output[i] = max_iter
|
278 |
+
return output
|
279 |
+
|
280 |
+
def julia_fractal(input_data, max_iter=10):
|
281 |
+
output = np.zeros(input_data.shape[0])
|
282 |
+
c = -0.8 + 0.156j
|
283 |
+
for i in range(input_data.shape[0]):
|
284 |
+
z = input_data[i, 0] + 0.1j * np.std(input_data)
|
285 |
+
for n in range(max_iter):
|
286 |
+
if abs(z) > 2:
|
287 |
+
output[i] = n
|
288 |
+
break
|
289 |
+
z = z*z + c
|
290 |
+
else:
|
291 |
+
output[i] = max_iter
|
292 |
+
return output
|
293 |
+
|
294 |
+
def run_snn(epochs, batch_size, l2_lambda, patience):
|
295 |
+
np.random.seed(42)
|
296 |
+
|
297 |
+
X = np.linspace(0, 10, 1000).reshape(-1, 1)
|
298 |
+
y = np.sin(X).reshape(-1, 1)
|
299 |
+
|
300 |
+
X = (X - X.min()) / (X.max() - X.min())
|
301 |
+
y = (y - y.min()) / (y.max() - y.min())
|
302 |
|
|
|
|
|
303 |
snn = SwarmNeuralNetwork(layer_sizes=[1, 32, 16, 8, 1],
|
304 |
fractal_methods=[sierpinski_fractal, mandelbrot_fractal, julia_fractal, julia_fractal])
|
305 |
+
|
306 |
snn.train(X, y, epochs=epochs, batch_size=batch_size, l2_lambda=l2_lambda, patience=patience)
|
307 |
+
|
308 |
y_pred = snn.forward(X, training=False)
|
309 |
fractal_outputs = snn.apply_fractals(X)
|
|
|
310 |
|
|
|
311 |
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
|
312 |
|
313 |
axs[0, 0].plot(X, y, label='True')
|
|
|
325 |
axs[1, 1].plot(X, fractal_outputs[2])
|
326 |
axs[1, 1].set_title('Julia Fractal Output')
|
327 |
|
328 |
+
plt.tight_layout()
|
329 |
return fig
|
330 |
|
331 |
+
with gr.Blocks() as demo:
|
332 |
+
epochs = gr.Slider(1, 10000, value=5000, label="Epochs")
|
333 |
+
batch_size = gr.Slider(1, 100, value=32, label="Batch Size")
|
334 |
+
l2_lambda = gr.Slider(0.0001, 0.1, value=0.00001, label="L2 Lambda")
|
335 |
+
patience = gr.Slider(1, 1000, value=50, label="Patience")
|
336 |
+
|
337 |
+
plot = gr.Plot()
|
338 |
+
|
339 |
+
def update_plot(epochs, batch_size, l2_lambda, patience):
|
340 |
+
return run_snn(epochs, batch_size, l2_lambda, patience)
|
341 |
+
|
342 |
+
btn = gr.Button("Run SNN")
|
343 |
+
btn.click(update_plot, inputs=[epochs, batch_size, l2_lambda, patience], outputs=plot)
|
344 |
|
345 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|