Regino
shdvfsdj
1773e4e
import os
import shutil
import random
# βœ… Define paths
data_dir = "PlantVillage"
train_dir = "dataset/train"
test_dir = "dataset/test"
split_ratio = 0.8 # 80% train, 20% test
# βœ… Ensure train/test folders exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
# βœ… Split dataset
for category in os.listdir(data_dir): # Loop through plant disease categories
category_path = os.path.join(data_dir, category)
if os.path.isdir(category_path): # Ensure it's a folder
images = os.listdir(category_path)
random.shuffle(images) # Shuffle to ensure randomness
split_index = int(len(images) * split_ratio)
train_images = images[:split_index]
test_images = images[split_index:]
# βœ… Create category folders
os.makedirs(os.path.join(train_dir, category), exist_ok=True)
os.makedirs(os.path.join(test_dir, category), exist_ok=True)
# βœ… Move images
for img in train_images:
shutil.move(os.path.join(category_path, img), os.path.join(train_dir, category, img))
for img in test_images:
shutil.move(os.path.join(category_path, img), os.path.join(test_dir, category, img))
print("βœ… Dataset successfully split into train/test!")