a-ragab-h-m commited on
Commit
b0f79a7
·
verified ·
1 Parent(s): 46d72f5

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +54 -75
run.py CHANGED
@@ -8,6 +8,7 @@ import torch.optim as optim
8
  from torch.utils.data import DataLoader
9
  import json
10
 
 
11
  dir_path = os.path.dirname(os.path.realpath(__file__))
12
  sys.path.append(os.path.join(dir_path, '..'))
13
 
@@ -16,41 +17,27 @@ from Actor.actor import Actor
16
  from dataloader import VRP_Dataset
17
  from google_solver.google_model import evaluate_google_model
18
 
19
- # Load params
20
  with open('params.json', 'r') as f:
21
  params = json.load(f)
22
 
23
- # Settings
 
 
 
 
 
24
  device = params['device']
25
  run_tests = params['run_tests']
26
  save_results = params['save_results']
27
  dataset_path = params['dataset_path']
28
 
29
- # Create persistent directory
30
- results_dir = os.path.join('/data', 'results')
31
- os.makedirs(results_dir, exist_ok=True)
32
-
33
- now = datetime.now()
34
- dt_string = now.strftime("%d-%m-%y %H-%M-%S")
35
- experiment_path = os.path.join(results_dir, dt_string)
36
- os.makedirs(experiment_path, exist_ok=True)
37
-
38
- train_results_file = os.path.join(experiment_path, 'train_results.txt')
39
- test_results_file = os.path.join(experiment_path, 'test_results.txt')
40
- model_path = os.path.join(experiment_path, 'model_state_dict.pt')
41
- optimizer_path = os.path.join(experiment_path, 'optimizer_state_dict.pt')
42
-
43
- with open(train_results_file, 'w') as f: pass
44
- with open(test_results_file, 'w') as f: pass
45
- with open(os.path.join(experiment_path, 'params.json'), 'w') as f:
46
- json.dump(params, f)
47
-
48
- # Dataset sizes
49
  train_dataset_size = params['train_dataset_size']
50
  validation_dataset_size = params['validation_dataset_size']
51
  baseline_dataset_size = params['baseline_dataset_size']
52
 
53
- # Problem config
54
  num_nodes = params['num_nodes']
55
  num_depots = params['num_depots']
56
  embedding_size = params['embedding_size']
@@ -62,23 +49,19 @@ num_movers = params['num_movers']
62
  learning_rate = params['learning_rate']
63
  batch_size = params['batch_size']
64
  test_batch_size = params['test_batch_size']
65
- baseline_update_period = params['baseline_update_period']
66
 
67
- # Datasets
68
  validation_dataset = VRP_Dataset(validation_dataset_size, num_nodes, num_depots, dataset_path, device)
69
  baseline_dataset = VRP_Dataset(train_dataset_size, num_nodes, num_depots, dataset_path, device)
70
-
71
  if params['overfit_test']:
72
- train_dataset = VRP_Dataset(train_dataset_size, num_nodes, num_depots, dataset_path, device)
73
- baseline_dataset = train_dataset
74
- validation_dataset = train_dataset
75
 
76
- # Evaluate Google solver
77
  google_scores = evaluate_google_model(validation_dataset)
78
  tot_google_scores = google_scores.sum().item()
79
  input_size = validation_dataset.model_input_length()
80
 
81
- # Models
82
  model = Model(input_size=input_size, embedding_size=embedding_size, decoder_input_size=params["decoder_input_size"])
83
  actor = Actor(model=model, num_movers=num_movers, num_neighbors_encoder=num_neighbors_encoder,
84
  num_neighbors_action=num_neighbors_action, device=device, normalize=False)
@@ -95,11 +78,17 @@ nn_actor.nearest_neighbors()
95
 
96
  optimizer = optim.Adam(params=actor.parameters(), lr=learning_rate)
97
 
 
 
 
 
 
 
98
  train_batch_record = 100
99
  validation_record = 100
100
  baseline_record = None
101
 
102
- # Training loop
103
  for epoch in range(params['num_epochs']):
104
  if not params['overfit_test']:
105
  train_dataset = VRP_Dataset(train_dataset_size, num_nodes, num_depots, dataset_path, device)
@@ -107,7 +96,6 @@ for epoch in range(params['num_epochs']):
107
  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=train_dataset.collate)
108
  for i, batch in enumerate(train_dataloader):
109
  with torch.no_grad():
110
- nn_actor.nearest_neighbors()
111
  nn_output = nn_actor(batch)
112
  tot_nn_cost = nn_output['total_time'].sum().item()
113
 
@@ -129,75 +117,66 @@ for epoch in range(params['num_epochs']):
129
 
130
  tot_actor_cost = actor_cost.sum().item()
131
  tot_baseline_cost = baseline_cost.sum().item()
132
-
133
  actor_nn_ratio = tot_actor_cost / tot_nn_cost
134
  actor_baseline_ratio = tot_actor_cost / tot_baseline_cost
135
  train_batch_record = min(train_batch_record, actor_nn_ratio)
136
 
137
  result = f"{epoch}, {i}, {actor_nn_ratio:.4f}, {actor_baseline_ratio:.4f}, {train_batch_record:.4f}"
138
- print(result, flush=True)
139
  if save_results:
140
  with open(train_results_file, 'a') as f:
141
  f.write(result + '\n')
142
  del batch
143
 
144
- # Validation
145
  if epoch % 5 == 0:
146
  baseline_dataloader = DataLoader(baseline_dataset, batch_size=batch_size, collate_fn=baseline_dataset.collate)
147
  tot_cost = []
148
  for batch in baseline_dataloader:
149
  with torch.no_grad():
150
  actor.greedy_search()
151
- actor_output = actor(batch)
152
- cost = actor_output['total_time']
153
  tot_cost.append(cost)
154
- del batch
155
-
156
  tot_cost = torch.cat(tot_cost, dim=0)
157
  if baseline_record is None or (tot_cost < baseline_record).float().mean().item() > 0.9:
158
  baseline_record = tot_cost
159
  baseline_actor.load_state_dict(actor.state_dict())
160
  print('\nNew baseline record\n')
161
 
 
 
 
 
162
 
163
- # Test block (اختبار النموذج بعد كل 10 epochs)
164
- if (epoch % 10 == 0) and run_tests:
165
- b = max(int(batch_size // sample_size**2), 1)
166
- validation_dataloader = DataLoader(validation_dataset, batch_size=b, collate_fn=validation_dataset.collate)
167
-
168
- tot_cost = 0
169
- tot_nn_cost = 0
170
- for batch in validation_dataloader:
171
- with torch.no_grad():
172
- actor.beam_search(sample_size)
173
- actor_output = actor(batch)
174
- cost = actor_output['total_time']
175
-
176
- nn_actor.nearest_neighbors()
177
- nn_output = nn_actor(batch)
178
- nn_cost = nn_output['total_time']
179
-
180
- tot_cost += cost.sum().item()
181
- tot_nn_cost += nn_cost.sum().item()
182
-
183
- ratio = tot_cost / tot_nn_cost
184
- validation_record = min(validation_record, ratio)
185
- actor_google_ratio = tot_cost / tot_google_scores
186
 
187
- print(f"\nTest results:\nActor/Google: {actor_google_ratio:.4f}, Actor/NN: {ratio:.4f}, Best NN Ratio: {validation_record:.4f}\n")
 
188
 
189
- # حفظ النتائج والنموذج بعد كل اختبار
190
- if save_results:
191
- with open(test_results_file, 'a') as f:
192
- f.write(f"{epoch}, {actor_google_ratio:.4f}, {ratio:.4f}, {validation_record:.4f}\n")
193
-
194
- # حفظ النموذج والـ optimizer دائمًا
195
- torch.save(actor.state_dict(), model_path)
196
- torch.save(optimizer.state_dict(), optimizer_path)
197
 
198
- # نسخة احتياطية كل 10 epochs
199
- torch.save(actor.state_dict(), f"model_epoch_{epoch}.pt")
200
- torch.save(optimizer.state_dict(), f"optimizer_epoch_{epoch}.pt")
 
201
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  print("End.")
 
8
  from torch.utils.data import DataLoader
9
  import json
10
 
11
+ # إعداد المسارات
12
  dir_path = os.path.dirname(os.path.realpath(__file__))
13
  sys.path.append(os.path.join(dir_path, '..'))
14
 
 
17
  from dataloader import VRP_Dataset
18
  from google_solver.google_model import evaluate_google_model
19
 
20
+ # تحميل الإعدادات
21
  with open('params.json', 'r') as f:
22
  params = json.load(f)
23
 
24
+ # حفظ نسخة من الإعدادات
25
+ os.makedirs("/data", exist_ok=True)
26
+ with open('/data/params_saved.json', 'w') as f:
27
+ json.dump(params, f)
28
+
29
+ # إعداد المتغيرات
30
  device = params['device']
31
  run_tests = params['run_tests']
32
  save_results = params['save_results']
33
  dataset_path = params['dataset_path']
34
 
35
+ # حجم البيانات
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  train_dataset_size = params['train_dataset_size']
37
  validation_dataset_size = params['validation_dataset_size']
38
  baseline_dataset_size = params['baseline_dataset_size']
39
 
40
+ # خصائص المشكلة
41
  num_nodes = params['num_nodes']
42
  num_depots = params['num_depots']
43
  embedding_size = params['embedding_size']
 
49
  learning_rate = params['learning_rate']
50
  batch_size = params['batch_size']
51
  test_batch_size = params['test_batch_size']
 
52
 
53
+ # التحميل
54
  validation_dataset = VRP_Dataset(validation_dataset_size, num_nodes, num_depots, dataset_path, device)
55
  baseline_dataset = VRP_Dataset(train_dataset_size, num_nodes, num_depots, dataset_path, device)
 
56
  if params['overfit_test']:
57
+ train_dataset = baseline_dataset = validation_dataset = VRP_Dataset(train_dataset_size, num_nodes, num_depots, dataset_path, device)
 
 
58
 
59
+ # تقييم Google OR-Tools
60
  google_scores = evaluate_google_model(validation_dataset)
61
  tot_google_scores = google_scores.sum().item()
62
  input_size = validation_dataset.model_input_length()
63
 
64
+ # تعريف النماذج
65
  model = Model(input_size=input_size, embedding_size=embedding_size, decoder_input_size=params["decoder_input_size"])
66
  actor = Actor(model=model, num_movers=num_movers, num_neighbors_encoder=num_neighbors_encoder,
67
  num_neighbors_action=num_neighbors_action, device=device, normalize=False)
 
78
 
79
  optimizer = optim.Adam(params=actor.parameters(), lr=learning_rate)
80
 
81
+ # ملفات الإخراج
82
+ train_results_file = "/data/train_results.txt"
83
+ test_results_file = "/data/test_results.txt"
84
+ model_path = "/data/model_state_dict.pt"
85
+ optimizer_path = "/data/optimizer_state_dict.pt"
86
+
87
  train_batch_record = 100
88
  validation_record = 100
89
  baseline_record = None
90
 
91
+ # التدريب
92
  for epoch in range(params['num_epochs']):
93
  if not params['overfit_test']:
94
  train_dataset = VRP_Dataset(train_dataset_size, num_nodes, num_depots, dataset_path, device)
 
96
  train_dataloader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=train_dataset.collate)
97
  for i, batch in enumerate(train_dataloader):
98
  with torch.no_grad():
 
99
  nn_output = nn_actor(batch)
100
  tot_nn_cost = nn_output['total_time'].sum().item()
101
 
 
117
 
118
  tot_actor_cost = actor_cost.sum().item()
119
  tot_baseline_cost = baseline_cost.sum().item()
 
120
  actor_nn_ratio = tot_actor_cost / tot_nn_cost
121
  actor_baseline_ratio = tot_actor_cost / tot_baseline_cost
122
  train_batch_record = min(train_batch_record, actor_nn_ratio)
123
 
124
  result = f"{epoch}, {i}, {actor_nn_ratio:.4f}, {actor_baseline_ratio:.4f}, {train_batch_record:.4f}"
125
+ print(result)
126
  if save_results:
127
  with open(train_results_file, 'a') as f:
128
  f.write(result + '\n')
129
  del batch
130
 
131
+ # التحقق من الأد��ء
132
  if epoch % 5 == 0:
133
  baseline_dataloader = DataLoader(baseline_dataset, batch_size=batch_size, collate_fn=baseline_dataset.collate)
134
  tot_cost = []
135
  for batch in baseline_dataloader:
136
  with torch.no_grad():
137
  actor.greedy_search()
138
+ cost = actor(batch)['total_time']
 
139
  tot_cost.append(cost)
 
 
140
  tot_cost = torch.cat(tot_cost, dim=0)
141
  if baseline_record is None or (tot_cost < baseline_record).float().mean().item() > 0.9:
142
  baseline_record = tot_cost
143
  baseline_actor.load_state_dict(actor.state_dict())
144
  print('\nNew baseline record\n')
145
 
146
+ # التقييم وحفظ النموذج
147
+ if run_tests:
148
+ b = max(int(batch_size // sample_size**2), 1)
149
+ validation_dataloader = DataLoader(validation_dataset, batch_size=b, collate_fn=validation_dataset.collate)
150
 
151
+ tot_cost = 0
152
+ tot_nn_cost = 0
153
+ for batch in validation_dataloader:
154
+ with torch.no_grad():
155
+ actor.beam_search(sample_size)
156
+ actor_output = actor(batch)
157
+ cost = actor_output['total_time']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ nn_output = nn_actor(batch)
160
+ nn_cost = nn_output['total_time']
161
 
162
+ tot_cost += cost.sum().item()
163
+ tot_nn_cost += nn_cost.sum().item()
 
 
 
 
 
 
164
 
165
+ ratio = tot_cost / tot_nn_cost
166
+ validation_record = min(validation_record, ratio)
167
+ actor_google_ratio = tot_cost / tot_google_scores
168
+ print(f"\nTest results:\nActor/Google: {actor_google_ratio:.4f}, Actor/NN: {ratio:.4f}, Best NN Ratio: {validation_record:.4f}\n")
169
 
170
+ if save_results:
171
+ # حفظ دائم
172
+ with open(test_results_file, 'a') as f:
173
+ f.write(f"{epoch}, {actor_google_ratio:.4f}, {ratio:.4f}, {validation_record:.4f}\n")
174
+ torch.save(actor.state_dict(), model_path)
175
+ torch.save(optimizer.state_dict(), optimizer_path)
176
+
177
+ # نسخة احتياطية كل 10 epochs
178
+ if epoch % 10 == 0:
179
+ torch.save(actor.state_dict(), f"/data/model_epoch_{epoch}.pt")
180
+ torch.save(optimizer.state_dict(), f"/data/optimizer_epoch_{epoch}.pt")
181
 
182
  print("End.")