a-ragab-h-m commited on
Commit
c389cb0
·
verified ·
1 Parent(s): 25ba527

Create dataloader.py

Browse files
Files changed (1) hide show
  1. dataloader.py +94 -0
dataloader.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from torch.utils.data import Dataset
4
+ from sklearn.decomposition import TruncatedSVD
5
+ import numpy as np
6
+ from random import shuffle
7
+ import os
8
+ import pandas as pd
9
+
10
+ class VRP_Dataset(Dataset):
11
+
12
+ def __init__(self, dataset_size, num_nodes, num_depots, dataset_path, device='cpu', *args, **kwargs):
13
+ super().__init__()
14
+
15
+ self.device = device
16
+ self.dataset_size = dataset_size
17
+ self.num_nodes = num_nodes
18
+ self.num_depots = num_depots
19
+
20
+ # Load external CSV data from Hugging Face
21
+ raw_data = pd.read_csv(dataset_path)
22
+ if len(raw_data) < dataset_size:
23
+ raise ValueError("Dataset size requested exceeds available data")
24
+
25
+ sampled_data = raw_data.sample(n=dataset_size, random_state=42).reset_index(drop=True)
26
+
27
+ # Extract coordinates (assuming columns named 'longitude', 'latitude')
28
+ coords = torch.tensor(sampled_data[['longitude', 'latitude']].values, dtype=torch.float32)
29
+
30
+ # Assign node positions
31
+ node_positions = coords.view(dataset_size, num_nodes, 2)
32
+ self.node_positions = node_positions
33
+
34
+ # Generate fleet data
35
+ num_cars = num_nodes
36
+ launch_time = torch.zeros(dataset_size, num_cars, 1)
37
+ car_start_node = torch.randint(low=0, high=num_depots, size=(dataset_size, num_cars, 1))
38
+ self.fleet_data = {
39
+ 'start_time': launch_time,
40
+ 'car_start_node': car_start_node
41
+ }
42
+
43
+ # Generate graph data
44
+ a = torch.arange(num_nodes).reshape(1, 1, -1).repeat(dataset_size, num_cars, 1)
45
+ b = car_start_node.repeat(1, 1, num_nodes)
46
+ depot = ((a == b).sum(dim=1) > 0).float().unsqueeze(2)
47
+
48
+ start_times = (torch.rand(dataset_size, num_nodes, 1) * 2 + 3) * (1 - depot)
49
+ end_times = start_times + (0.1 + 0.5 * torch.rand(dataset_size, num_nodes, 1)) * (1 - depot)
50
+
51
+ distance_matrix = self.compute_distance_matrix(node_positions)
52
+ time_matrix = distance_matrix.clone()
53
+
54
+ self.graph_data = {
55
+ 'start_times': start_times,
56
+ 'end_times': end_times,
57
+ 'depot': depot,
58
+ 'node_vector': node_positions,
59
+ 'distance_matrix': distance_matrix,
60
+ 'time_matrix': time_matrix
61
+ }
62
+
63
+ def compute_distance_matrix(self, node_positions):
64
+ x = node_positions.unsqueeze(1).repeat(1, self.num_nodes, 1, 1)
65
+ y = node_positions.unsqueeze(2).repeat(1, 1, self.num_nodes, 1)
66
+ distance = (((x - y) ** 2).sum(dim=3)) ** 0.5
67
+ return distance
68
+
69
+ def __getitem__(self, idx):
70
+ A = {key: self.graph_data[key][idx].unsqueeze(0).to(self.device) for key in self.graph_data}
71
+ B = {key: self.fleet_data[key][idx].unsqueeze(0).to(self.device) for key in self.fleet_data}
72
+ return A, B
73
+
74
+ def __len__(self):
75
+ return self.dataset_size
76
+
77
+ def collate(self, batch):
78
+ graph_data = {key: torch.cat([item[0][key] for item in batch], dim=0) for key in self.graph_data}
79
+ fleet_data = {key: torch.cat([item[1][key] for item in batch], dim=0) for key in self.fleet_data}
80
+ return graph_data, fleet_data
81
+
82
+ def get_batch(self, idx, batch_size=10):
83
+ return self.collate([self.__getitem__(i) for i in range(idx, idx + batch_size)])
84
+
85
+ def get_data(self):
86
+ return self.graph_data, self.fleet_data
87
+
88
+ def model_input_length(self):
89
+ return 3 + self.graph_data['node_vector'].shape[2]
90
+
91
+ def save_data(self, fp):
92
+ data = (self.graph_data, self.fleet_data)
93
+ with open(fp, 'wb') as f:
94
+ torch.save(data, f)