Spaces:
Runtime error
Runtime error
Update run.py
Browse files
run.py
CHANGED
@@ -20,16 +20,31 @@ from google_solver.google_model import evaluate_google_model
|
|
20 |
with open('params.json', 'r') as f:
|
21 |
params = json.load(f)
|
22 |
|
23 |
-
# Save params into a local file for tracking
|
24 |
-
with open('params_saved.json', 'w') as f:
|
25 |
-
json.dump(params, f)
|
26 |
-
|
27 |
# Settings
|
28 |
device = params['device']
|
29 |
run_tests = params['run_tests']
|
30 |
save_results = params['save_results']
|
31 |
dataset_path = params['dataset_path']
|
32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# Dataset sizes
|
34 |
train_dataset_size = params['train_dataset_size']
|
35 |
validation_dataset_size = params['validation_dataset_size']
|
@@ -122,7 +137,7 @@ for epoch in range(params['num_epochs']):
|
|
122 |
result = f"{epoch}, {i}, {actor_nn_ratio:.4f}, {actor_baseline_ratio:.4f}, {train_batch_record:.4f}"
|
123 |
print(result, flush=True)
|
124 |
if save_results:
|
125 |
-
with open(
|
126 |
f.write(result + '\n')
|
127 |
del batch
|
128 |
|
@@ -144,33 +159,45 @@ for epoch in range(params['num_epochs']):
|
|
144 |
baseline_actor.load_state_dict(actor.state_dict())
|
145 |
print('\nNew baseline record\n')
|
146 |
|
147 |
-
# Test every 10 epochs
|
148 |
-
if (epoch % 10 == 0) and run_tests:
|
149 |
-
b = max(int(batch_size // sample_size**2), 1)
|
150 |
-
validation_dataloader = DataLoader(validation_dataset, batch_size=b, collate_fn=validation_dataset.collate)
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
actor.beam_search(sample_size)
|
157 |
-
actor_output = actor(batch)
|
158 |
-
cost = actor_output['total_time']
|
159 |
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
163 |
|
164 |
-
|
165 |
-
|
|
|
166 |
|
167 |
-
|
168 |
-
|
169 |
|
170 |
-
|
171 |
-
|
|
|
172 |
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
with open('params.json', 'r') as f:
|
21 |
params = json.load(f)
|
22 |
|
|
|
|
|
|
|
|
|
23 |
# Settings
|
24 |
device = params['device']
|
25 |
run_tests = params['run_tests']
|
26 |
save_results = params['save_results']
|
27 |
dataset_path = params['dataset_path']
|
28 |
|
29 |
+
# Create persistent directory
|
30 |
+
results_dir = os.path.join('/data', 'results')
|
31 |
+
os.makedirs(results_dir, exist_ok=True)
|
32 |
+
|
33 |
+
now = datetime.now()
|
34 |
+
dt_string = now.strftime("%d-%m-%y %H-%M-%S")
|
35 |
+
experiment_path = os.path.join(results_dir, dt_string)
|
36 |
+
os.makedirs(experiment_path, exist_ok=True)
|
37 |
+
|
38 |
+
train_results_file = os.path.join(experiment_path, 'train_results.txt')
|
39 |
+
test_results_file = os.path.join(experiment_path, 'test_results.txt')
|
40 |
+
model_path = os.path.join(experiment_path, 'model_state_dict.pt')
|
41 |
+
optimizer_path = os.path.join(experiment_path, 'optimizer_state_dict.pt')
|
42 |
+
|
43 |
+
with open(train_results_file, 'w') as f: pass
|
44 |
+
with open(test_results_file, 'w') as f: pass
|
45 |
+
with open(os.path.join(experiment_path, 'params.json'), 'w') as f:
|
46 |
+
json.dump(params, f)
|
47 |
+
|
48 |
# Dataset sizes
|
49 |
train_dataset_size = params['train_dataset_size']
|
50 |
validation_dataset_size = params['validation_dataset_size']
|
|
|
137 |
result = f"{epoch}, {i}, {actor_nn_ratio:.4f}, {actor_baseline_ratio:.4f}, {train_batch_record:.4f}"
|
138 |
print(result, flush=True)
|
139 |
if save_results:
|
140 |
+
with open(train_results_file, 'a') as f:
|
141 |
f.write(result + '\n')
|
142 |
del batch
|
143 |
|
|
|
159 |
baseline_actor.load_state_dict(actor.state_dict())
|
160 |
print('\nNew baseline record\n')
|
161 |
|
|
|
|
|
|
|
|
|
162 |
|
163 |
+
# Test block (اختبار النموذج بعد كل 10 epochs)
|
164 |
+
if (epoch % 10 == 0) and run_tests:
|
165 |
+
b = max(int(batch_size // sample_size**2), 1)
|
166 |
+
validation_dataloader = DataLoader(validation_dataset, batch_size=b, collate_fn=validation_dataset.collate)
|
|
|
|
|
|
|
167 |
|
168 |
+
tot_cost = 0
|
169 |
+
tot_nn_cost = 0
|
170 |
+
for batch in validation_dataloader:
|
171 |
+
with torch.no_grad():
|
172 |
+
actor.beam_search(sample_size)
|
173 |
+
actor_output = actor(batch)
|
174 |
+
cost = actor_output['total_time']
|
175 |
|
176 |
+
nn_actor.nearest_neighbors()
|
177 |
+
nn_output = nn_actor(batch)
|
178 |
+
nn_cost = nn_output['total_time']
|
179 |
|
180 |
+
tot_cost += cost.sum().item()
|
181 |
+
tot_nn_cost += nn_cost.sum().item()
|
182 |
|
183 |
+
ratio = tot_cost / tot_nn_cost
|
184 |
+
validation_record = min(validation_record, ratio)
|
185 |
+
actor_google_ratio = tot_cost / tot_google_scores
|
186 |
|
187 |
+
print(f"\nTest results:\nActor/Google: {actor_google_ratio:.4f}, Actor/NN: {ratio:.4f}, Best NN Ratio: {validation_record:.4f}\n")
|
188 |
+
|
189 |
+
# ✅ حفظ النتائج والنموذج بعد كل اختبار
|
190 |
+
if save_results:
|
191 |
+
with open(test_results_file, 'a') as f:
|
192 |
+
f.write(f"{epoch}, {actor_google_ratio:.4f}, {ratio:.4f}, {validation_record:.4f}\n")
|
193 |
+
|
194 |
+
# حفظ النموذج والـ optimizer دائمًا
|
195 |
+
torch.save(actor.state_dict(), model_path)
|
196 |
+
torch.save(optimizer.state_dict(), optimizer_path)
|
197 |
+
|
198 |
+
# نسخة احتياطية كل 10 epochs
|
199 |
+
torch.save(actor.state_dict(), f"model_epoch_{epoch}.pt")
|
200 |
+
torch.save(optimizer.state_dict(), f"optimizer_epoch_{epoch}.pt")
|
201 |
+
|
202 |
+
|
203 |
+
print("End.")
|