Spaces:

CZerion
/

crop_health_monitor

Running

CZerion commited on May 17

Commit

cf62d40

verified ·

1 Parent(s): 9692df7

Update dataset.py

Files changed (1) hide show

dataset.py CHANGED Viewed

@@ -1,16 +1,19 @@
 from datasets import load_dataset, concatenate_datasets
-pv = load_dataset("DScomp380/plant_village", split="train")                # Disease
-pd = load_dataset("agyaatcoder/PlantDoc", split="train")                   # Disease
-# Normalize column names and combine
-def norm(ds, image_col, label_col):
-    return ds.rename_column(image_col, "image").rename_column(label_col, "label")
-disease_ds = concatenate_datasets([norm(pv, "image", "label"), norm(pd, "image", "label")])
-def preprocess(batch):
-    imgs = [augment(img.convert("RGB")) for img in batch["image"]]
-    return {"pixel_values": imgs, "label": batch["label"]}
-train_ds = disease_ds.with_transform(preprocess)

 from datasets import load_dataset, concatenate_datasets
+def load_disease_dataset():
+    pv = load_dataset("DScomp380/plant_village", split="train")
+    pd = load_dataset("agyaatcoder/PlantDoc", split="train")
+    # Normalize column names
+    def normalize(ds, image_col, label_col):
+        return ds.rename_column(image_col, "image").rename_column(label_col, "label")
+    # Combine into one dataset
+    disease_ds = concatenate_datasets([
+        normalize(pv, "image", "label"),
+        normalize(pd, "image", "label")
+    ])
+    # Split into train/validation
+    split = disease_ds.train_test_split(test_size=0.2, stratify_by_column="label")
+    return split["train"], split["test"]