Spaces:

wuhp
/

segtodetect

Sleeping

App Files Files Community

wuhp commited on 19 days ago

Commit

8193bd7

verified ·

1 Parent(s): 5bacccd

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -74

app.py CHANGED Viewed

@@ -25,11 +25,11 @@ def parse_roboflow_url(url: str):
     return workspace, project, version
-def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1, 0.1)):
     """
     1) Download segmentation dataset from Roboflow
     2) Convert each mask to its bounding box (YOLO format)
-    3) Split into train/valid/test
     4) Return before/after visuals plus (dataset_path, detection_slug)
     """
     rf = Roboflow(api_key=api_key)
@@ -38,41 +38,48 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
     dataset = version_obj.download("coco-segmentation")
     root = dataset.location
-    # find the COCO JSON
-    ann_file = None
     for dp, _, files in os.walk(root):
         for f in files:
-            if f.lower().endswith('.json'):
-                ann_file = os.path.join(dp, f)
-                break
-        if ann_file:
-            break
-    if not ann_file:
-        raise FileNotFoundError(f"No JSON found under {root}")
-    coco = json.load(open(ann_file, 'r'))
-    images_info = {img['id']: img for img in coco['images']}
-    cat_ids = sorted(c['id'] for c in coco.get('categories', []))
     id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
-    # build YOLO bboxes
-    annos = {}
-    for anno in coco['annotations']:
-        img_id = anno['image_id']
-        xs, ys = anno['segmentation'][0][0::2], anno['segmentation'][0][1::2]
-        xmin, xmax = min(xs), max(xs)
-        ymin, ymax = min(ys), max(ys)
-        w, h = xmax - xmin, ymax - ymin
-        cx, cy = xmin + w/2, ymin + h/2
-        iw, ih = images_info[img_id]['width'], images_info[img_id]['height']
-        line = (
-            f"{id_to_index[anno['category_id']]} "
-            f"{cx/iw:.6f} {cy/ih:.6f} {w/iw:.6f} {h/ih:.6f}"
-        )
-        annos.setdefault(img_id, []).append(line)
-    # gather all filenames and paths
-    name_to_id = {img['file_name']: img['id'] for img in coco['images']}
     file_paths = {
         f: os.path.join(dp, f)
         for dp, _, files in os.walk(root)
@@ -80,57 +87,49 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
         if f in name_to_id
     }
-    # split filenames
-    all_files = list(name_to_id.keys())
-    random.shuffle(all_files)
-    n = len(all_files)
-    n_train = max(1, int(n * split_ratios[0]))
-    n_valid = max(1, int(n * split_ratios[1]))
-    n_valid = min(n_valid, n - n_train - 1)
-    splits = {
-        "train": all_files[:n_train],
-        "valid": all_files[n_train:n_train+n_valid],
-        "test":  all_files[n_train+n_valid:]
-    }
-    # create Roboflow‐style dataset folder:
-    # out_root/
-    #   train/images, train/labels,
-    #   valid/images, valid/labels,
-    #   test/images, test/labels
     out_root = tempfile.mkdtemp(prefix="yolov8_")
-    for split, files in splits.items():
         img_dir = os.path.join(out_root, split, "images")
         lbl_dir = os.path.join(out_root, split, "labels")
         os.makedirs(img_dir, exist_ok=True)
         os.makedirs(lbl_dir, exist_ok=True)
-        for fname in files:
-            # copy image
             shutil.copy(file_paths[fname], os.path.join(img_dir, fname))
-            # write label
-            txt = "\n".join(annos.get(name_to_id[fname], []))
-            with open(os.path.join(lbl_dir, fname.rsplit('.',1)[0] + ".txt"), 'w') as f:
-                f.write(txt)
-    # prepare a few before/after examples
     before, after = [], []
-    sample = random.sample(all_files, min(5, len(all_files)))
-    for fname in sample:
         img = cv2.cvtColor(cv2.imread(file_paths[fname]), cv2.COLOR_BGR2RGB)
         seg_vis = img.copy()
-        for anno in coco['annotations']:
-            if anno['image_id'] != name_to_id[fname]:
-                continue
-            pts = np.array(anno['segmentation'][0], np.int32).reshape(-1,2)
-            cv2.polylines(seg_vis, [pts], True, (255,0,0), 2)
         box_vis = img.copy()
-        for line in annos.get(name_to_id[fname], []):
             _, cxn, cyn, wnorm, hnorm = map(float, line.split())
-            iw, ih = images_info[name_to_id[fname]]['width'], images_info[name_to_id[fname]]['height']
-            w0, h0 = int(wnorm*iw), int(hnorm*ih)
-            x0 = int(cxn*iw - w0/2)
-            y0 = int(cyn*ih - h0/2)
-            cv2.rectangle(box_vis, (x0,y0), (x0+w0,y0+h0), (0,255,0), 2)
         before.append(Image.fromarray(seg_vis))
         after.append(Image.fromarray(box_vis))
@@ -146,7 +145,7 @@ def upload_and_train_detection(
     project_type: str = "object-detection"
 ):
     """
-    Uploads the converted dataset (with train/valid/test splits) to Roboflow,
     creates or fetches a detection project, and kicks off training.
     Returns the hosted model URL.
     """
@@ -214,7 +213,7 @@ def upload_and_train_detection(
 # --- Gradio UI ---
 with gr.Blocks() as app:
-    gr.Markdown("## 🔄 Seg→BBox + Auto‑Upload/Train")
     api_input = gr.Textbox(label="Roboflow API Key", type="password")
     url_input = gr.Textbox(label="Segmentation Dataset URL")

     return workspace, project, version
+def convert_seg_to_bbox(api_key: str, dataset_url: str):
     """
     1) Download segmentation dataset from Roboflow
     2) Convert each mask to its bounding box (YOLO format)
+    3) Preserve original train/valid/test splits
     4) Return before/after visuals plus (dataset_path, detection_slug)
     """
     rf = Roboflow(api_key=api_key)
     dataset = version_obj.download("coco-segmentation")
     root = dataset.location
+    # 1) Locate all three split JSON files
+    json_files = {}
     for dp, _, files in os.walk(root):
         for f in files:
+            lf = f.lower()
+            if not lf.endswith('.json'):
+                continue
+            if 'train' in lf:
+                json_files['train'] = os.path.join(dp, f)
+            elif 'valid' in lf or 'val' in lf:
+                json_files['valid'] = os.path.join(dp, f)
+            elif 'test' in lf:
+                json_files['test'] = os.path.join(dp, f)
+    if any(k not in json_files for k in ('train', 'valid', 'test')):
+        raise RuntimeError(f"Missing one of train/valid/test JSONs: {json_files}")
+    # 2) Build category → index mapping from the train split
+    train_coco = json.load(open(json_files['train'], 'r'))
+    cat_ids = sorted(c['id'] for c in train_coco.get('categories', []))
     id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
+    # 3) Aggregate ALL image info & annotations into global dicts
+    global_images_info = {}
+    global_annos = {}
+    for split, jf in json_files.items():
+        coco = json.load(open(jf, 'r'))
+        for img in coco['images']:
+            global_images_info[img['id']] = img
+        for anno in coco['annotations']:
+            xs = anno['segmentation'][0][0::2]
+            ys = anno['segmentation'][0][1::2]
+            xmin, xmax = min(xs), max(xs)
+            ymin, ymax = min(ys), max(ys)
+            w, h = xmax - xmin, ymax - ymin
+            cx, cy = xmin + w/2, ymin + h/2
+            iw = global_images_info[anno['image_id']]['width']
+            ih = global_images_info[anno['image_id']]['height']
+            line = f"{id_to_index[anno['category_id']]} {cx/iw:.6f} {cy/ih:.6f} {w/iw:.6f} {h/ih:.6f}"
+            global_annos.setdefault(anno['image_id'], []).append(line)
+    # 4) Build a quick map of filename → full path
+    name_to_id = {img['file_name']: img['id'] for img in global_images_info.values()}
     file_paths = {
         f: os.path.join(dp, f)
         for dp, _, files in os.walk(root)
         if f in name_to_id
     }
+    # 5) Copy images & write YOLO .txt labels, preserving original splits
     out_root = tempfile.mkdtemp(prefix="yolov8_")
+    for split in ('train', 'valid', 'test'):
+        coco = json.load(open(json_files[split], 'r'))
         img_dir = os.path.join(out_root, split, "images")
         lbl_dir = os.path.join(out_root, split, "labels")
         os.makedirs(img_dir, exist_ok=True)
         os.makedirs(lbl_dir, exist_ok=True)
+        for img in coco['images']:
+            fname = img['file_name']
             shutil.copy(file_paths[fname], os.path.join(img_dir, fname))
+            with open(os.path.join(lbl_dir, fname.rsplit('.', 1)[0] + ".txt"), 'w') as f:
+                f.write("\n".join(global_annos.get(img['id'], [])))
+    # 6) Prepare a few before/after examples (random sample across all splits)
     before, after = [], []
+    all_ids = list(global_images_info.keys())
+    sample_ids = random.sample(all_ids, min(5, len(all_ids)))
+    for img_id in sample_ids:
+        fname = global_images_info[img_id]['file_name']
         img = cv2.cvtColor(cv2.imread(file_paths[fname]), cv2.COLOR_BGR2RGB)
+        # draw segmentation outlines
         seg_vis = img.copy()
+        for jf in json_files.values():
+            coco = json.load(open(jf, 'r'))
+            for anno in coco['annotations']:
+                if anno['image_id'] != img_id:
+                    continue
+                pts = np.array(anno['segmentation'][0], np.int32).reshape(-1, 2)
+                cv2.polylines(seg_vis, [pts], True, (255, 0, 0), 2)
+        # draw bounding boxes
         box_vis = img.copy()
+        for line in global_annos.get(img_id, []):
             _, cxn, cyn, wnorm, hnorm = map(float, line.split())
+            iw = global_images_info[img_id]['width']
+            ih = global_images_info[img_id]['height']
+            w0, h0 = int(wnorm * iw), int(hnorm * ih)
+            x0 = int(cxn * iw - w0 / 2)
+            y0 = int(cyn * ih - h0 / 2)
+            cv2.rectangle(box_vis, (x0, y0), (x0 + w0, y0 + h0), (0, 255, 0), 2)
         before.append(Image.fromarray(seg_vis))
         after.append(Image.fromarray(box_vis))
     project_type: str = "object-detection"
 ):
     """
+    Uploads the converted dataset (with preserved splits) to Roboflow,
     creates or fetches a detection project, and kicks off training.
     Returns the hosted model URL.
     """
 # --- Gradio UI ---
 with gr.Blocks() as app:
+    gr.Markdown("## 🔄 Seg→BBox + Auto-Upload/Train")
     api_input = gr.Textbox(label="Roboflow API Key", type="password")
     url_input = gr.Textbox(label="Segmentation Dataset URL")