File size: 4,229 Bytes
c3cc0a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import random
import json
from datetime import datetime, timedelta
import logging
import os
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class IntelligentRoutingDataGenerator:
def __init__(self):
self.categories = ['electricity', 'internet', 'plumber', 'water_cooler', 'sweeper', 'carpenter']
self.availability_statuses = ['Available', 'Unavailable']
self.hostel_names = ['bh1', 'bh2', 'bh3', 'ivh', 'gh']
self.floor_numbers = [0, 1, 2, 3]
self.time_slots = [
"08:00-12:00", "12:00-16:00", "16:00-20:00"
]
def generate_staff_members(self, category):
"""Generate 2 staff members for a specific category"""
return [
{
"staff_id": f"S{random.randint(10000, 99999)}",
"department": category, # Ensure staff department matches grievance category
"current_workload": random.randint(0, 5),
"availability_status": random.choice(self.availability_statuses),
"past_resolution_rate": round(random.uniform(0.85, 0.99), 2)
}
for _ in range(2)
]
def generate_availability_data(self, staff_id, student_id):
return {
"staff_availability": [
{
"staff_id": staff_id,
"time_slot": random.choice(self.time_slots),
"availability_status": random.choice(self.availability_statuses)
}
],
"student_availability": [
{
"student_id": student_id,
"time_slot": random.choice(self.time_slots),
"availability_status": random.choice(self.availability_statuses)
}
]
}
def generate_sample(self, index):
grievance_id = f"G{67890 + index}"
student_id = f"STU{200 + index}"
# First select category, then generate matching staff
selected_category = random.choice(self.categories)
staff_members = self.generate_staff_members(selected_category)
# Generate base timestamp
base_time = datetime.utcnow()
submission_time = base_time - timedelta(minutes=random.randint(0, 60))
# Generate sample data
sample = {
"grievance_id": grievance_id,
"category": selected_category,
"submission_timestamp": submission_time.strftime("%Y-%m-%dT%H:%M:%SZ"),
"student_room_no": str(random.randint(100, 499)),
"hostel_name": random.choice(self.hostel_names),
"floor_number": random.choice(self.floor_numbers),
"current_staff_status": staff_members,
"floor_metrics": {
"number_of_requests": random.randint(0, 30),
"total_delays": random.randint(0, 5)
},
"availability_data": self.generate_availability_data(
staff_members[0]["staff_id"],
student_id
)
}
return sample
def generate_dataset(self, num_samples, output_path):
dataset = []
for i in range(num_samples):
sample = self.generate_sample(i)
dataset.append(sample)
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(output_path), exist_ok=True)
# Save to JSON file
with open(output_path, 'w') as f:
json.dump(dataset, f, indent=2)
logger.info(f"Generated {len(dataset)} samples and saved to {output_path}")
return dataset
def main():
generator = IntelligentRoutingDataGenerator()
# Generate training data
train_samples = generator.generate_dataset(
40000,
'models/intelligent_routing/train_data/training_data.json'
)
# Generate test data
test_samples = generator.generate_dataset(
8000,
'models/intelligent_routing/test_data/test_data.json'
)
print(f"Generated {len(train_samples)} training samples and {len(test_samples)} test samples")
if __name__ == "__main__":
main()
|