a-ragab-h-m commited on
Commit
198da27
·
verified ·
1 Parent(s): 5d8a58e

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +56 -29
run.py CHANGED
@@ -20,16 +20,31 @@ from google_solver.google_model import evaluate_google_model
20
  with open('params.json', 'r') as f:
21
  params = json.load(f)
22
 
23
- # Save params into a local file for tracking
24
- with open('params_saved.json', 'w') as f:
25
- json.dump(params, f)
26
-
27
  # Settings
28
  device = params['device']
29
  run_tests = params['run_tests']
30
  save_results = params['save_results']
31
  dataset_path = params['dataset_path']
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Dataset sizes
34
  train_dataset_size = params['train_dataset_size']
35
  validation_dataset_size = params['validation_dataset_size']
@@ -122,7 +137,7 @@ for epoch in range(params['num_epochs']):
122
  result = f"{epoch}, {i}, {actor_nn_ratio:.4f}, {actor_baseline_ratio:.4f}, {train_batch_record:.4f}"
123
  print(result, flush=True)
124
  if save_results:
125
- with open('train_results.txt', 'a') as f:
126
  f.write(result + '\n')
127
  del batch
128
 
@@ -144,33 +159,45 @@ for epoch in range(params['num_epochs']):
144
  baseline_actor.load_state_dict(actor.state_dict())
145
  print('\nNew baseline record\n')
146
 
147
- # Test every 10 epochs
148
- if (epoch % 10 == 0) and run_tests:
149
- b = max(int(batch_size // sample_size**2), 1)
150
- validation_dataloader = DataLoader(validation_dataset, batch_size=b, collate_fn=validation_dataset.collate)
151
 
152
- tot_cost = 0
153
- tot_nn_cost = 0
154
- for batch in validation_dataloader:
155
- with torch.no_grad():
156
- actor.beam_search(sample_size)
157
- actor_output = actor(batch)
158
- cost = actor_output['total_time']
159
 
160
- nn_actor.nearest_neighbors()
161
- nn_output = nn_actor(batch)
162
- nn_cost = nn_output['total_time']
 
 
 
 
163
 
164
- tot_cost += cost.sum().item()
165
- tot_nn_cost += nn_cost.sum().item()
 
166
 
167
- ratio = tot_cost / tot_nn_cost
168
- validation_record = min(validation_record, ratio)
169
 
170
- actor_google_ratio = tot_cost / tot_google_scores
171
- print(f"\nTest results:\nActor/Google: {actor_google_ratio:.4f}, Actor/NN: {ratio:.4f}, Best NN Ratio: {validation_record:.4f}\n")
 
172
 
173
- if save_results:
174
- with open('test_results.txt', 'a') as f:
175
- f.write(f"{epoch}, {actor_google_ratio:.4f}, {ratio:.4f}, {validation_record:.4f}\n")
176
- print("End")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  with open('params.json', 'r') as f:
21
  params = json.load(f)
22
 
 
 
 
 
23
  # Settings
24
  device = params['device']
25
  run_tests = params['run_tests']
26
  save_results = params['save_results']
27
  dataset_path = params['dataset_path']
28
 
29
+ # Create persistent directory
30
+ results_dir = os.path.join('/data', 'results')
31
+ os.makedirs(results_dir, exist_ok=True)
32
+
33
+ now = datetime.now()
34
+ dt_string = now.strftime("%d-%m-%y %H-%M-%S")
35
+ experiment_path = os.path.join(results_dir, dt_string)
36
+ os.makedirs(experiment_path, exist_ok=True)
37
+
38
+ train_results_file = os.path.join(experiment_path, 'train_results.txt')
39
+ test_results_file = os.path.join(experiment_path, 'test_results.txt')
40
+ model_path = os.path.join(experiment_path, 'model_state_dict.pt')
41
+ optimizer_path = os.path.join(experiment_path, 'optimizer_state_dict.pt')
42
+
43
+ with open(train_results_file, 'w') as f: pass
44
+ with open(test_results_file, 'w') as f: pass
45
+ with open(os.path.join(experiment_path, 'params.json'), 'w') as f:
46
+ json.dump(params, f)
47
+
48
  # Dataset sizes
49
  train_dataset_size = params['train_dataset_size']
50
  validation_dataset_size = params['validation_dataset_size']
 
137
  result = f"{epoch}, {i}, {actor_nn_ratio:.4f}, {actor_baseline_ratio:.4f}, {train_batch_record:.4f}"
138
  print(result, flush=True)
139
  if save_results:
140
+ with open(train_results_file, 'a') as f:
141
  f.write(result + '\n')
142
  del batch
143
 
 
159
  baseline_actor.load_state_dict(actor.state_dict())
160
  print('\nNew baseline record\n')
161
 
 
 
 
 
162
 
163
+ # Test block (اختبار النموذج بعد كل 10 epochs)
164
+ if (epoch % 10 == 0) and run_tests:
165
+ b = max(int(batch_size // sample_size**2), 1)
166
+ validation_dataloader = DataLoader(validation_dataset, batch_size=b, collate_fn=validation_dataset.collate)
 
 
 
167
 
168
+ tot_cost = 0
169
+ tot_nn_cost = 0
170
+ for batch in validation_dataloader:
171
+ with torch.no_grad():
172
+ actor.beam_search(sample_size)
173
+ actor_output = actor(batch)
174
+ cost = actor_output['total_time']
175
 
176
+ nn_actor.nearest_neighbors()
177
+ nn_output = nn_actor(batch)
178
+ nn_cost = nn_output['total_time']
179
 
180
+ tot_cost += cost.sum().item()
181
+ tot_nn_cost += nn_cost.sum().item()
182
 
183
+ ratio = tot_cost / tot_nn_cost
184
+ validation_record = min(validation_record, ratio)
185
+ actor_google_ratio = tot_cost / tot_google_scores
186
 
187
+ print(f"\nTest results:\nActor/Google: {actor_google_ratio:.4f}, Actor/NN: {ratio:.4f}, Best NN Ratio: {validation_record:.4f}\n")
188
+
189
+ # حفظ النتائج والنموذج بعد كل اختبار
190
+ if save_results:
191
+ with open(test_results_file, 'a') as f:
192
+ f.write(f"{epoch}, {actor_google_ratio:.4f}, {ratio:.4f}, {validation_record:.4f}\n")
193
+
194
+ # حفظ النموذج والـ optimizer دائمًا
195
+ torch.save(actor.state_dict(), model_path)
196
+ torch.save(optimizer.state_dict(), optimizer_path)
197
+
198
+ # نسخة احتياطية كل 10 epochs
199
+ torch.save(actor.state_dict(), f"model_epoch_{epoch}.pt")
200
+ torch.save(optimizer.state_dict(), f"optimizer_epoch_{epoch}.pt")
201
+
202
+
203
+ print("End.")