Spaces:

wuhp
/

segtodetect

Running

App Files Files Community

wuhp commited on Jul 23

Commit

99a318c

verified ·

1 Parent(s): b506212

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -55

app.py CHANGED Viewed

@@ -25,29 +25,22 @@ def parse_roboflow_url(url: str):
 def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1, 0.1)):
     rf = Roboflow(api_key=api_key)
     ws, proj_name, ver = parse_roboflow_url(dataset_url)
     version_obj = rf.workspace(ws).project(proj_name).version(ver)
     dataset = version_obj.download("coco-segmentation")
     root = dataset.location
-    # find annotation JSON
     ann_file = None
     for dp, _, files in os.walk(root):
         for f in files:
-            if 'train' in f.lower() and f.lower().endswith('.json'):
                 ann_file = os.path.join(dp, f)
                 break
         if ann_file:
             break
-    if not ann_file:
-        for dp, _, files in os.walk(root):
-            for f in files:
-                if f.lower().endswith('.json'):
-                    ann_file = os.path.join(dp, f)
-                    break
-            if ann_file:
-                break
     if not ann_file:
         raise FileNotFoundError(f"No JSON annotations under {root}")
@@ -56,14 +49,14 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
     cat_ids = sorted(c['id'] for c in coco.get('categories', []))
     id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
-    # prepare flat YOLO dirs
     out_root = tempfile.mkdtemp(prefix="yolov8_")
-    img_out = os.path.join(out_root, "images")
-    lbl_out = os.path.join(out_root, "labels")
-    os.makedirs(img_out, exist_ok=True)
-    os.makedirs(lbl_out, exist_ok=True)
-    # convert segmentation → bounding‐box labels
     annos = {}
     for anno in coco['annotations']:
         img_id = anno['image_id']
@@ -72,7 +65,7 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
         x_min, x_max = min(xs), max(xs)
         y_min, y_max = min(ys), max(ys)
         w, h = x_max - x_min, y_max - y_min
-        cx, cy = x_min + w / 2, y_min + h / 2
         iw, ih = images_info[img_id]['width'], images_info[img_id]['height']
         line = (
@@ -81,56 +74,61 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
         )
         annos.setdefault(img_id, []).append(line)
-    # locate raw images folder
-    train_img_dir = None
     for dp, _, files in os.walk(root):
-        if any(f.lower().endswith(('.jpg', '.png', '.jpeg')) for f in files):
-            train_img_dir = dp
             break
-    if not train_img_dir:
-        raise FileNotFoundError(f"No images under {root}")
-    # copy images + write flat labels
     name_to_id = {img['file_name']: img['id'] for img in coco['images']}
     for fname, img_id in name_to_id.items():
-        src = os.path.join(train_img_dir, fname)
-        if not os.path.isfile(src):
             continue
-        shutil.copy(src, os.path.join(img_out, fname))
-        with open(os.path.join(lbl_out, fname.rsplit('.', 1)[0] + ".txt"), 'w') as lf:
             lf.write("\n".join(annos.get(img_id, [])))
-    # split into train/valid/test
-    all_images = sorted([f for f in os.listdir(img_out) if f.lower().endswith(('.jpg', '.png', '.jpeg'))])
-    random.shuffle(all_images)
-    n = len(all_images)
-    n_train = int(n * split_ratios[0])
-    n_valid = int(n * split_ratios[1])
     splits = {
-        "train": all_images[:n_train],
-        "valid": all_images[n_train:n_train + n_valid],
-        "test":  all_images[n_train + n_valid:]
     }
-    for split_name, files in splits.items():
-        img_dir = os.path.join(out_root, split_name, "images")
-        lbl_dir = os.path.join(out_root, split_name, "labels")
         os.makedirs(img_dir, exist_ok=True)
         os.makedirs(lbl_dir, exist_ok=True)
-        for fname in files:
-            shutil.move(os.path.join(img_out, fname), os.path.join(img_dir, fname))
-            lbl_fname = fname.rsplit(".", 1)[0] + ".txt"
-            shutil.move(os.path.join(lbl_out, lbl_fname), os.path.join(lbl_dir, lbl_fname))
-    # remove flat dirs
-    shutil.rmtree(img_out)
-    shutil.rmtree(lbl_out)
-    # build before/after galleries for a few samples
     before, after = [], []
     sample = random.sample(list(name_to_id.keys()), min(5, len(name_to_id)))
     for fname in sample:
-        src = os.path.join(train_img_dir, fname)
         img = cv2.cvtColor(cv2.imread(src), cv2.COLOR_BGR2RGB)
         seg_vis = img.copy()
@@ -147,7 +145,7 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
             w0, h0 = int(wnorm * iw), int(hnorm * ih)
             x0 = int(cxn * iw - w0 / 2)
             y0 = int(cyn * ih - h0 / 2)
-            cv2.rectangle(box_vis, (x0, y0), (x0 + w0, y0 + h0), (0, 255, 0), 2)
         before.append(Image.fromarray(seg_vis))
         after.append(Image.fromarray(box_vis))
@@ -166,7 +164,7 @@ def upload_and_train_detection(
     rf = Roboflow(api_key=api_key)
     ws = rf.workspace()
-    # get or create project
     try:
         proj = ws.project(project_slug)
     except Exception:
@@ -177,7 +175,7 @@ def upload_and_train_detection(
             project_license=project_license
         )
-    # upload folder with train/valid/test
     ws.upload_dataset(
         dataset_path,
         project_slug,
@@ -185,16 +183,16 @@ def upload_and_train_detection(
         project_type=project_type
     )
-    # create new version
     version_num = proj.generate_version(settings={
         "augmentation": {},
         "preprocessing": {},
     })
-    # enqueue training (now sees splits)
     proj.version(str(version_num)).train()
-    # return endpoint
     m = proj.version(str(version_num)).model
     return f"{m['base_url']}{m['id']}?api_key={api_key}"

 def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1, 0.1)):
+    # --- download segmentation export
     rf = Roboflow(api_key=api_key)
     ws, proj_name, ver = parse_roboflow_url(dataset_url)
     version_obj = rf.workspace(ws).project(proj_name).version(ver)
     dataset = version_obj.download("coco-segmentation")
     root = dataset.location
+    # --- find the COCO JSON
     ann_file = None
     for dp, _, files in os.walk(root):
         for f in files:
+            if f.lower().endswith('.json'):
                 ann_file = os.path.join(dp, f)
                 break
         if ann_file:
             break
     if not ann_file:
         raise FileNotFoundError(f"No JSON annotations under {root}")
     cat_ids = sorted(c['id'] for c in coco.get('categories', []))
     id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
+    # --- make a flat YOLO folder
     out_root = tempfile.mkdtemp(prefix="yolov8_")
+    flat_img = os.path.join(out_root, "flat_images")
+    flat_lbl = os.path.join(out_root, "flat_labels")
+    os.makedirs(flat_img, exist_ok=True)
+    os.makedirs(flat_lbl, exist_ok=True)
+    # --- convert each segmentation to a YOLO bbox line
     annos = {}
     for anno in coco['annotations']:
         img_id = anno['image_id']
         x_min, x_max = min(xs), max(xs)
         y_min, y_max = min(ys), max(ys)
         w, h = x_max - x_min, y_max - y_min
+        cx, cy = x_min + w/2, y_min + h/2
         iw, ih = images_info[img_id]['width'], images_info[img_id]['height']
         line = (
         )
         annos.setdefault(img_id, []).append(line)
+    # --- locate the single images folder
+    img_src = None
     for dp, _, files in os.walk(root):
+        if any(f.lower().endswith(('.jpg','.png','.jpeg')) for f in files):
+            img_src = dp
             break
+    if not img_src:
+        raise FileNotFoundError(f"No images folder in {root}")
+    # --- copy images + write flat labels
     name_to_id = {img['file_name']: img['id'] for img in coco['images']}
     for fname, img_id in name_to_id.items():
+        src_path = os.path.join(img_src, fname)
+        if not os.path.isfile(src_path):
             continue
+        shutil.copy(src_path, os.path.join(flat_img, fname))
+        with open(os.path.join(flat_lbl, fname.rsplit('.',1)[0] + ".txt"), 'w') as lf:
             lf.write("\n".join(annos.get(img_id, [])))
+    # --- split filenames into train/valid/test lists
+    all_files = sorted([f for f in os.listdir(flat_img) if f.lower().endswith(('.jpg','.png','.jpeg'))])
+    random.shuffle(all_files)
+    n = len(all_files)
+    n_train = max(1, int(n * split_ratios[0]))
+    n_valid = max(1, int(n * split_ratios[1]))
+    # ensure we don’t overshoot
+    n_valid = min(n_valid, n - n_train - 1)
     splits = {
+        "train": all_files[:n_train],
+        "valid": all_files[n_train:n_train+n_valid],
+        "test":  all_files[n_train+n_valid:]
     }
+    # --- create Roboflow‑friendly structure:
+    # out_root/images/{train,valid,test}
+    # out_root/labels/{train,valid,test}
+    for split, files in splits.items():
+        img_dir = os.path.join(out_root, "images", split)
+        lbl_dir = os.path.join(out_root, "labels", split)
         os.makedirs(img_dir, exist_ok=True)
         os.makedirs(lbl_dir, exist_ok=True)
+        for fn in files:
+            shutil.move(os.path.join(flat_img, fn),  os.path.join(img_dir, fn))
+            shutil.move(os.path.join(flat_lbl, fn.rsplit('.',1)[0] + ".txt"),
+                        os.path.join(lbl_dir, fn.rsplit('.',1)[0] + ".txt"))
+    # --- clean up flats
+    shutil.rmtree(flat_img)
+    shutil.rmtree(flat_lbl)
+    # --- build a few before/after previews
     before, after = [], []
     sample = random.sample(list(name_to_id.keys()), min(5, len(name_to_id)))
     for fname in sample:
+        src = os.path.join(img_src, fname)
         img = cv2.cvtColor(cv2.imread(src), cv2.COLOR_BGR2RGB)
         seg_vis = img.copy()
             w0, h0 = int(wnorm * iw), int(hnorm * ih)
             x0 = int(cxn * iw - w0 / 2)
             y0 = int(cyn * ih - h0 / 2)
+            cv2.rectangle(box_vis, (x0, y0), (x0+w0, y0+h0), (0, 255, 0), 2)
         before.append(Image.fromarray(seg_vis))
         after.append(Image.fromarray(box_vis))
     rf = Roboflow(api_key=api_key)
     ws = rf.workspace()
+    # get-or-create your detection project
     try:
         proj = ws.project(project_slug)
     except Exception:
             project_license=project_license
         )
+    # upload the properly‑split folder
     ws.upload_dataset(
         dataset_path,
         project_slug,
         project_type=project_type
     )
+    # create a new version
     version_num = proj.generate_version(settings={
         "augmentation": {},
         "preprocessing": {},
     })
+    # enqueue training (now finds train/valid/test)
     proj.version(str(version_num)).train()
+    # return the hosted endpoint URL
     m = proj.version(str(version_num)).model
     return f"{m['base_url']}{m['id']}?api_key={api_key}"