Spaces:

wuhp
/

segtodetect

Sleeping

App Files Files Community

wuhp commited on 19 days ago

Commit

66be7dd

verified ·

1 Parent(s): 23838aa

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -74

app.py CHANGED Viewed

@@ -15,23 +15,29 @@ from roboflow import Roboflow
 def parse_roboflow_url(url: str):
     parsed = urlparse(url)
     parts = parsed.path.strip('/').split('/')
-    ws   = parts[0]
-    proj = parts[1]
     try:
-        ver = int(parts[-1])
     except ValueError:
-        ver = int(parts[-2])
-    return ws, proj, ver
 def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1, 0.1)):
-    rf            = Roboflow(api_key=api_key)
-    workspace, proj_name, ver = parse_roboflow_url(dataset_url)
-    version_obj   = rf.workspace(workspace).project(proj_name).version(ver)
-    dataset       = version_obj.download("coco-segmentation")
-    root          = dataset.location
-    # find COCO JSON
     ann_file = None
     for dp, _, files in os.walk(root):
         for f in files:
@@ -41,29 +47,22 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
         if ann_file:
             break
     if not ann_file:
-        raise FileNotFoundError(f"No JSON annotations found under {root}")
     coco        = json.load(open(ann_file, 'r'))
     images_info = {img['id']: img for img in coco['images']}
     cat_ids     = sorted(c['id'] for c in coco.get('categories', []))
     id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
-    # flatten & convert to YOLO bboxes
-    out_root = tempfile.mkdtemp(prefix="yolov8_")
-    flat_img = os.path.join(out_root, "flat_images")
-    flat_lbl = os.path.join(out_root, "flat_labels")
-    os.makedirs(flat_img, exist_ok=True)
-    os.makedirs(flat_lbl, exist_ok=True)
     annos = {}
     for anno in coco['annotations']:
         img_id = anno['image_id']
         xs, ys = anno['segmentation'][0][0::2], anno['segmentation'][0][1::2]
         xmin, xmax = min(xs), max(xs)
         ymin, ymax = min(ys), max(ys)
-        w, h       = xmax - xmin, ymax - ymin
-        cx, cy     = xmin + w/2, ymin + h/2
         iw, ih = images_info[img_id]['width'], images_info[img_id]['height']
         line = (
             f"{id_to_index[anno['category_id']]} "
@@ -71,6 +70,13 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
         )
         annos.setdefault(img_id, []).append(line)
     name_to_id = {img['file_name']: img['id'] for img in coco['images']}
     file_paths = {
         f: os.path.join(dp, f)
@@ -84,48 +90,44 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
         if not src:
             continue
         shutil.copy(src, os.path.join(flat_img, fname))
-        with open(os.path.join(flat_lbl, fname.rsplit('.',1)[0] + ".txt"), 'w') as lf:
             lf.write("\n".join(annos.get(img_id, [])))
-    # split into train/valid/test
-    all_files = sorted(
-        f for f in os.listdir(flat_img)
-        if f.lower().endswith(('.jpg','.png','.jpeg'))
-    )
     random.shuffle(all_files)
     n = len(all_files)
     n_train = max(1, int(n * split_ratios[0]))
     n_valid = max(1, int(n * split_ratios[1]))
     n_valid = min(n_valid, n - n_train - 1)
     splits = {
         "train": all_files[:n_train],
         "valid": all_files[n_train:n_train+n_valid],
         "test":  all_files[n_train+n_valid:]
     }
     for split, files in splits.items():
-        idir = os.path.join(out_root, "images", split)
-        ldir = os.path.join(out_root, "labels", split)
-        os.makedirs(idir, exist_ok=True)
-        os.makedirs(ldir, exist_ok=True)
         for fn in files:
-            shutil.move(os.path.join(flat_img, fn), os.path.join(idir, fn))
             lbl = fn.rsplit('.',1)[0] + ".txt"
-            shutil.move(os.path.join(flat_lbl, lbl), os.path.join(ldir, lbl))
     shutil.rmtree(flat_img)
     shutil.rmtree(flat_lbl)
-    # before/after visuals
     before, after = [], []
     sample = random.sample(list(name_to_id.keys()), min(5, len(name_to_id)))
     for fname in sample:
-        src = file_paths.get(fname)
-        if not src:
-            continue
-        img = cv2.cvtColor(cv2.imread(src), cv2.COLOR_BGR2RGB)
         seg_vis = img.copy()
         for anno in coco['annotations']:
             if anno['image_id'] != name_to_id[fname]:
@@ -133,6 +135,7 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
             pts = np.array(anno['segmentation'][0], np.int32).reshape(-1,2)
             cv2.polylines(seg_vis, [pts], True, (255,0,0), 2)
         box_vis = img.copy()
         for line in annos.get(name_to_id[fname], []):
             _, cxn, cyn, wnorm, hnorm = map(float, line.split())
@@ -145,27 +148,33 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
         before.append(Image.fromarray(seg_vis))
         after.append(Image.fromarray(box_vis))
-    return before, after, out_root, proj_name + "-detection", workspace
 def upload_and_train_detection(
     api_key: str,
-    workspace: str,
-    project_slug: str,
     dataset_path: str,
     project_license: str = "MIT",
     project_type: str = "object-detection"
 ):
     rf = Roboflow(api_key=api_key)
-    ws = rf.workspace(workspace)
-    # 1) Try to fetch existing project
     try:
-        proj = ws.project(project_slug)
     except Exception as e:
         if "does not exist" in str(e).lower():
             proj = ws.create_project(
-                project_slug,
                 annotation=project_type,
                 project_type=project_type,
                 project_license=project_license
@@ -173,26 +182,15 @@ def upload_and_train_detection(
         else:
             raise
-    # 2) If it exists but as the wrong annotation type, spin up <slug>-v2
-    if getattr(proj, "annotation", None) != project_type:
-        new_slug = project_slug + "-v2"
-        proj = ws.create_project(
-            new_slug,
-            annotation=project_type,
-            project_type=project_type,
-            project_license=project_license
-        )
-        project_slug = new_slug
-    # 3) Upload train/valid/test
     ws.upload_dataset(
         dataset_path,
-        project_slug,
         project_license=project_license,
         project_type=project_type
     )
-    # 4) Generate new version & train using the `settings` parameter
     try:
         version_num = proj.generate_version(settings={
             "augmentation": {},
@@ -201,18 +199,17 @@ def upload_and_train_detection(
     except RuntimeError as e:
         msg = str(e).lower()
         if "unsupported request" in msg or "does not exist" in msg:
-            suffix = "-v3" if project_slug.endswith("-v2") else "-v2"
-            new_slug = project_slug + suffix
             proj = ws.create_project(
                 new_slug,
                 annotation=project_type,
                 project_type=project_type,
                 project_license=project_license
             )
-            project_slug = new_slug
             ws.upload_dataset(
                 dataset_path,
-                project_slug,
                 project_license=project_license,
                 project_type=project_type
             )
@@ -223,10 +220,8 @@ def upload_and_train_detection(
         else:
             raise
-    # 5) Kick off training
     model = proj.version(str(version_num)).train()
-    # 6) Return the hosted endpoint URL
     return f"{model['base_url']}{model['id']}?api_key={api_key}"
@@ -239,14 +234,13 @@ with gr.Blocks() as app:
     run_btn    = gr.Button("Convert to BBoxes")
     before_g   = gr.Gallery(columns=5, label="Before")
     after_g    = gr.Gallery(columns=5, label="After")
-    ds_state   = gr.Textbox(visible=False)
-    slug_state = gr.Textbox(visible=False)
-    ws_state   = gr.Textbox(visible=False)
     run_btn.click(
         convert_seg_to_bbox,
         inputs=[api_input, url_input],
-        outputs=[before_g, after_g, ds_state, slug_state, ws_state]
     )
     gr.Markdown("## 🚀 Upload & Train Detection Model")
@@ -255,7 +249,7 @@ with gr.Blocks() as app:
     train_btn.click(
         upload_and_train_detection,
-        inputs=[api_input, ws_state, slug_state, ds_state],
         outputs=[url_out]
     )

 def parse_roboflow_url(url: str):
     parsed = urlparse(url)
     parts = parsed.path.strip('/').split('/')
+    workspace = parts[0]
+    project   = parts[1]
     try:
+        version = int(parts[-1])
     except ValueError:
+        version = int(parts[-2])
+    return workspace, project, version
 def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1, 0.1)):
+    """
+    1) Download segmentation dataset from Roboflow
+    2) Convert each mask to its bounding box (YOLO format)
+    3) Split into train/valid/test
+    4) Return before/after visuals plus (dataset_path, detection_slug)
+    """
+    rf          = Roboflow(api_key=api_key)
+    ws, proj, ver = parse_roboflow_url(dataset_url)
+    version_obj = rf.workspace(ws).project(proj).version(ver)
+    dataset     = version_obj.download("coco-segmentation")
+    root        = dataset.location
+    # find the COCO JSON
     ann_file = None
     for dp, _, files in os.walk(root):
         for f in files:
         if ann_file:
             break
     if not ann_file:
+        raise FileNotFoundError(f"No JSON found under {root}")
     coco        = json.load(open(ann_file, 'r'))
     images_info = {img['id']: img for img in coco['images']}
     cat_ids     = sorted(c['id'] for c in coco.get('categories', []))
     id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
+    # build YOLO bboxes
     annos = {}
     for anno in coco['annotations']:
         img_id = anno['image_id']
         xs, ys = anno['segmentation'][0][0::2], anno['segmentation'][0][1::2]
         xmin, xmax = min(xs), max(xs)
         ymin, ymax = min(ys), max(ys)
+        w, h = xmax - xmin, ymax - ymin
+        cx, cy = xmin + w/2, ymin + h/2
         iw, ih = images_info[img_id]['width'], images_info[img_id]['height']
         line = (
             f"{id_to_index[anno['category_id']]} "
         )
         annos.setdefault(img_id, []).append(line)
+    # copy and write out flat images + labels
+    out_root = tempfile.mkdtemp(prefix="yolov8_")
+    flat_img = os.path.join(out_root, "flat_images")
+    flat_lbl = os.path.join(out_root, "flat_labels")
+    os.makedirs(flat_img, exist_ok=True)
+    os.makedirs(flat_lbl, exist_ok=True)
     name_to_id = {img['file_name']: img['id'] for img in coco['images']}
     file_paths = {
         f: os.path.join(dp, f)
         if not src:
             continue
         shutil.copy(src, os.path.join(flat_img, fname))
+        lbl_path = os.path.join(flat_lbl, fname.rsplit('.',1)[0] + ".txt")
+        with open(lbl_path, 'w') as lf:
             lf.write("\n".join(annos.get(img_id, [])))
+    # split filenames
+    all_files = [f for f in os.listdir(flat_img) if f.lower().endswith(('.jpg','.png','.jpeg'))]
     random.shuffle(all_files)
     n = len(all_files)
     n_train = max(1, int(n * split_ratios[0]))
     n_valid = max(1, int(n * split_ratios[1]))
     n_valid = min(n_valid, n - n_train - 1)
     splits = {
         "train": all_files[:n_train],
         "valid": all_files[n_train:n_train+n_valid],
         "test":  all_files[n_train+n_valid:]
     }
+    # move into final folder structure
     for split, files in splits.items():
+        img_dir = os.path.join(out_root, "images", split)
+        lbl_dir = os.path.join(out_root, "labels", split)
+        os.makedirs(img_dir, exist_ok=True)
+        os.makedirs(lbl_dir, exist_ok=True)
         for fn in files:
+            shutil.move(os.path.join(flat_img, fn), os.path.join(img_dir, fn))
             lbl = fn.rsplit('.',1)[0] + ".txt"
+            shutil.move(os.path.join(flat_lbl, lbl), os.path.join(lbl_dir, lbl))
     shutil.rmtree(flat_img)
     shutil.rmtree(flat_lbl)
+    # prepare a few before/after images for display
     before, after = [], []
     sample = random.sample(list(name_to_id.keys()), min(5, len(name_to_id)))
     for fname in sample:
+        img = cv2.cvtColor(cv2.imread(file_paths[fname]), cv2.COLOR_BGR2RGB)
+        # original segmentation overlay
         seg_vis = img.copy()
         for anno in coco['annotations']:
             if anno['image_id'] != name_to_id[fname]:
             pts = np.array(anno['segmentation'][0], np.int32).reshape(-1,2)
             cv2.polylines(seg_vis, [pts], True, (255,0,0), 2)
+        # bbox overlay
         box_vis = img.copy()
         for line in annos.get(name_to_id[fname], []):
             _, cxn, cyn, wnorm, hnorm = map(float, line.split())
         before.append(Image.fromarray(seg_vis))
         after.append(Image.fromarray(box_vis))
+    detection_slug = proj + "-detection"
+    return before, after, out_root, detection_slug
 def upload_and_train_detection(
     api_key: str,
+    detection_slug: str,
     dataset_path: str,
     project_license: str = "MIT",
     project_type: str = "object-detection"
 ):
+    """
+    Uploads your converted dataset into *your* active Roboflow workspace,
+    creates (or finds) a project named `detection_slug`, and kicks off training.
+    Returns the hosted endpoint URL.
+    """
     rf = Roboflow(api_key=api_key)
+    # use your active workspace (no name needed)
+    ws = rf.workspace()
+    # 1) get-or-create
     try:
+        proj = ws.project(detection_slug)
     except Exception as e:
         if "does not exist" in str(e).lower():
             proj = ws.create_project(
+                detection_slug,
                 annotation=project_type,
                 project_type=project_type,
                 project_license=project_license
         else:
             raise
+    # 2) upload everything under dataset_path
     ws.upload_dataset(
         dataset_path,
+        proj.slug,
         project_license=project_license,
         project_type=project_type
     )
+    # 3) generate a new version
     try:
         version_num = proj.generate_version(settings={
             "augmentation": {},
     except RuntimeError as e:
         msg = str(e).lower()
         if "unsupported request" in msg or "does not exist" in msg:
+            # bump slug and retry
+            new_slug = proj.slug + "-v2"
             proj = ws.create_project(
                 new_slug,
                 annotation=project_type,
                 project_type=project_type,
                 project_license=project_license
             )
             ws.upload_dataset(
                 dataset_path,
+                proj.slug,
                 project_license=project_license,
                 project_type=project_type
             )
         else:
             raise
+    # 4) train & return endpoint
     model = proj.version(str(version_num)).train()
     return f"{model['base_url']}{model['id']}?api_key={api_key}"
     run_btn    = gr.Button("Convert to BBoxes")
     before_g   = gr.Gallery(columns=5, label="Before")
     after_g    = gr.Gallery(columns=5, label="After")
+    ds_state   = gr.Textbox(visible=False, label="Converted Dataset Path")
+    slug_state = gr.Textbox(visible=False, label="Detection Project Slug")
     run_btn.click(
         convert_seg_to_bbox,
         inputs=[api_input, url_input],
+        outputs=[before_g, after_g, ds_state, slug_state]
     )
     gr.Markdown("## 🚀 Upload & Train Detection Model")
     train_btn.click(
         upload_and_train_detection,
+        inputs=[api_input, slug_state, ds_state],
         outputs=[url_out]
     )