wuhp commited on
Commit
99a318c
·
verified ·
1 Parent(s): b506212

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -55
app.py CHANGED
@@ -25,29 +25,22 @@ def parse_roboflow_url(url: str):
25
 
26
 
27
  def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1, 0.1)):
 
28
  rf = Roboflow(api_key=api_key)
29
  ws, proj_name, ver = parse_roboflow_url(dataset_url)
30
  version_obj = rf.workspace(ws).project(proj_name).version(ver)
31
  dataset = version_obj.download("coco-segmentation")
32
  root = dataset.location
33
 
34
- # find annotation JSON
35
  ann_file = None
36
  for dp, _, files in os.walk(root):
37
  for f in files:
38
- if 'train' in f.lower() and f.lower().endswith('.json'):
39
  ann_file = os.path.join(dp, f)
40
  break
41
  if ann_file:
42
  break
43
- if not ann_file:
44
- for dp, _, files in os.walk(root):
45
- for f in files:
46
- if f.lower().endswith('.json'):
47
- ann_file = os.path.join(dp, f)
48
- break
49
- if ann_file:
50
- break
51
  if not ann_file:
52
  raise FileNotFoundError(f"No JSON annotations under {root}")
53
 
@@ -56,14 +49,14 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
56
  cat_ids = sorted(c['id'] for c in coco.get('categories', []))
57
  id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
58
 
59
- # prepare flat YOLO dirs
60
  out_root = tempfile.mkdtemp(prefix="yolov8_")
61
- img_out = os.path.join(out_root, "images")
62
- lbl_out = os.path.join(out_root, "labels")
63
- os.makedirs(img_out, exist_ok=True)
64
- os.makedirs(lbl_out, exist_ok=True)
65
 
66
- # convert segmentation bounding‐box labels
67
  annos = {}
68
  for anno in coco['annotations']:
69
  img_id = anno['image_id']
@@ -72,7 +65,7 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
72
  x_min, x_max = min(xs), max(xs)
73
  y_min, y_max = min(ys), max(ys)
74
  w, h = x_max - x_min, y_max - y_min
75
- cx, cy = x_min + w / 2, y_min + h / 2
76
 
77
  iw, ih = images_info[img_id]['width'], images_info[img_id]['height']
78
  line = (
@@ -81,56 +74,61 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
81
  )
82
  annos.setdefault(img_id, []).append(line)
83
 
84
- # locate raw images folder
85
- train_img_dir = None
86
  for dp, _, files in os.walk(root):
87
- if any(f.lower().endswith(('.jpg', '.png', '.jpeg')) for f in files):
88
- train_img_dir = dp
89
  break
90
- if not train_img_dir:
91
- raise FileNotFoundError(f"No images under {root}")
92
 
93
- # copy images + write flat labels
94
  name_to_id = {img['file_name']: img['id'] for img in coco['images']}
95
  for fname, img_id in name_to_id.items():
96
- src = os.path.join(train_img_dir, fname)
97
- if not os.path.isfile(src):
98
  continue
99
- shutil.copy(src, os.path.join(img_out, fname))
100
- with open(os.path.join(lbl_out, fname.rsplit('.', 1)[0] + ".txt"), 'w') as lf:
101
  lf.write("\n".join(annos.get(img_id, [])))
102
 
103
- # split into train/valid/test
104
- all_images = sorted([f for f in os.listdir(img_out) if f.lower().endswith(('.jpg', '.png', '.jpeg'))])
105
- random.shuffle(all_images)
106
- n = len(all_images)
107
- n_train = int(n * split_ratios[0])
108
- n_valid = int(n * split_ratios[1])
 
 
109
  splits = {
110
- "train": all_images[:n_train],
111
- "valid": all_images[n_train:n_train + n_valid],
112
- "test": all_images[n_train + n_valid:]
113
  }
114
 
115
- for split_name, files in splits.items():
116
- img_dir = os.path.join(out_root, split_name, "images")
117
- lbl_dir = os.path.join(out_root, split_name, "labels")
 
 
 
118
  os.makedirs(img_dir, exist_ok=True)
119
  os.makedirs(lbl_dir, exist_ok=True)
120
- for fname in files:
121
- shutil.move(os.path.join(img_out, fname), os.path.join(img_dir, fname))
122
- lbl_fname = fname.rsplit(".", 1)[0] + ".txt"
123
- shutil.move(os.path.join(lbl_out, lbl_fname), os.path.join(lbl_dir, lbl_fname))
124
 
125
- # remove flat dirs
126
- shutil.rmtree(img_out)
127
- shutil.rmtree(lbl_out)
128
 
129
- # build before/after galleries for a few samples
130
  before, after = [], []
131
  sample = random.sample(list(name_to_id.keys()), min(5, len(name_to_id)))
132
  for fname in sample:
133
- src = os.path.join(train_img_dir, fname)
134
  img = cv2.cvtColor(cv2.imread(src), cv2.COLOR_BGR2RGB)
135
 
136
  seg_vis = img.copy()
@@ -147,7 +145,7 @@ def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1,
147
  w0, h0 = int(wnorm * iw), int(hnorm * ih)
148
  x0 = int(cxn * iw - w0 / 2)
149
  y0 = int(cyn * ih - h0 / 2)
150
- cv2.rectangle(box_vis, (x0, y0), (x0 + w0, y0 + h0), (0, 255, 0), 2)
151
 
152
  before.append(Image.fromarray(seg_vis))
153
  after.append(Image.fromarray(box_vis))
@@ -166,7 +164,7 @@ def upload_and_train_detection(
166
  rf = Roboflow(api_key=api_key)
167
  ws = rf.workspace()
168
 
169
- # get or create project
170
  try:
171
  proj = ws.project(project_slug)
172
  except Exception:
@@ -177,7 +175,7 @@ def upload_and_train_detection(
177
  project_license=project_license
178
  )
179
 
180
- # upload folder with train/valid/test
181
  ws.upload_dataset(
182
  dataset_path,
183
  project_slug,
@@ -185,16 +183,16 @@ def upload_and_train_detection(
185
  project_type=project_type
186
  )
187
 
188
- # create new version
189
  version_num = proj.generate_version(settings={
190
  "augmentation": {},
191
  "preprocessing": {},
192
  })
193
 
194
- # enqueue training (now sees splits)
195
  proj.version(str(version_num)).train()
196
 
197
- # return endpoint
198
  m = proj.version(str(version_num)).model
199
  return f"{m['base_url']}{m['id']}?api_key={api_key}"
200
 
 
25
 
26
 
27
  def convert_seg_to_bbox(api_key: str, dataset_url: str, split_ratios=(0.8, 0.1, 0.1)):
28
+ # --- download segmentation export
29
  rf = Roboflow(api_key=api_key)
30
  ws, proj_name, ver = parse_roboflow_url(dataset_url)
31
  version_obj = rf.workspace(ws).project(proj_name).version(ver)
32
  dataset = version_obj.download("coco-segmentation")
33
  root = dataset.location
34
 
35
+ # --- find the COCO JSON
36
  ann_file = None
37
  for dp, _, files in os.walk(root):
38
  for f in files:
39
+ if f.lower().endswith('.json'):
40
  ann_file = os.path.join(dp, f)
41
  break
42
  if ann_file:
43
  break
 
 
 
 
 
 
 
 
44
  if not ann_file:
45
  raise FileNotFoundError(f"No JSON annotations under {root}")
46
 
 
49
  cat_ids = sorted(c['id'] for c in coco.get('categories', []))
50
  id_to_index = {cid: idx for idx, cid in enumerate(cat_ids)}
51
 
52
+ # --- make a flat YOLO folder
53
  out_root = tempfile.mkdtemp(prefix="yolov8_")
54
+ flat_img = os.path.join(out_root, "flat_images")
55
+ flat_lbl = os.path.join(out_root, "flat_labels")
56
+ os.makedirs(flat_img, exist_ok=True)
57
+ os.makedirs(flat_lbl, exist_ok=True)
58
 
59
+ # --- convert each segmentation to a YOLO bbox line
60
  annos = {}
61
  for anno in coco['annotations']:
62
  img_id = anno['image_id']
 
65
  x_min, x_max = min(xs), max(xs)
66
  y_min, y_max = min(ys), max(ys)
67
  w, h = x_max - x_min, y_max - y_min
68
+ cx, cy = x_min + w/2, y_min + h/2
69
 
70
  iw, ih = images_info[img_id]['width'], images_info[img_id]['height']
71
  line = (
 
74
  )
75
  annos.setdefault(img_id, []).append(line)
76
 
77
+ # --- locate the single images folder
78
+ img_src = None
79
  for dp, _, files in os.walk(root):
80
+ if any(f.lower().endswith(('.jpg','.png','.jpeg')) for f in files):
81
+ img_src = dp
82
  break
83
+ if not img_src:
84
+ raise FileNotFoundError(f"No images folder in {root}")
85
 
86
+ # --- copy images + write flat labels
87
  name_to_id = {img['file_name']: img['id'] for img in coco['images']}
88
  for fname, img_id in name_to_id.items():
89
+ src_path = os.path.join(img_src, fname)
90
+ if not os.path.isfile(src_path):
91
  continue
92
+ shutil.copy(src_path, os.path.join(flat_img, fname))
93
+ with open(os.path.join(flat_lbl, fname.rsplit('.',1)[0] + ".txt"), 'w') as lf:
94
  lf.write("\n".join(annos.get(img_id, [])))
95
 
96
+ # --- split filenames into train/valid/test lists
97
+ all_files = sorted([f for f in os.listdir(flat_img) if f.lower().endswith(('.jpg','.png','.jpeg'))])
98
+ random.shuffle(all_files)
99
+ n = len(all_files)
100
+ n_train = max(1, int(n * split_ratios[0]))
101
+ n_valid = max(1, int(n * split_ratios[1]))
102
+ # ensure we don’t overshoot
103
+ n_valid = min(n_valid, n - n_train - 1)
104
  splits = {
105
+ "train": all_files[:n_train],
106
+ "valid": all_files[n_train:n_train+n_valid],
107
+ "test": all_files[n_train+n_valid:]
108
  }
109
 
110
+ # --- create Roboflow‑friendly structure:
111
+ # out_root/images/{train,valid,test}
112
+ # out_root/labels/{train,valid,test}
113
+ for split, files in splits.items():
114
+ img_dir = os.path.join(out_root, "images", split)
115
+ lbl_dir = os.path.join(out_root, "labels", split)
116
  os.makedirs(img_dir, exist_ok=True)
117
  os.makedirs(lbl_dir, exist_ok=True)
118
+ for fn in files:
119
+ shutil.move(os.path.join(flat_img, fn), os.path.join(img_dir, fn))
120
+ shutil.move(os.path.join(flat_lbl, fn.rsplit('.',1)[0] + ".txt"),
121
+ os.path.join(lbl_dir, fn.rsplit('.',1)[0] + ".txt"))
122
 
123
+ # --- clean up flats
124
+ shutil.rmtree(flat_img)
125
+ shutil.rmtree(flat_lbl)
126
 
127
+ # --- build a few before/after previews
128
  before, after = [], []
129
  sample = random.sample(list(name_to_id.keys()), min(5, len(name_to_id)))
130
  for fname in sample:
131
+ src = os.path.join(img_src, fname)
132
  img = cv2.cvtColor(cv2.imread(src), cv2.COLOR_BGR2RGB)
133
 
134
  seg_vis = img.copy()
 
145
  w0, h0 = int(wnorm * iw), int(hnorm * ih)
146
  x0 = int(cxn * iw - w0 / 2)
147
  y0 = int(cyn * ih - h0 / 2)
148
+ cv2.rectangle(box_vis, (x0, y0), (x0+w0, y0+h0), (0, 255, 0), 2)
149
 
150
  before.append(Image.fromarray(seg_vis))
151
  after.append(Image.fromarray(box_vis))
 
164
  rf = Roboflow(api_key=api_key)
165
  ws = rf.workspace()
166
 
167
+ # get-or-create your detection project
168
  try:
169
  proj = ws.project(project_slug)
170
  except Exception:
 
175
  project_license=project_license
176
  )
177
 
178
+ # upload the properly‑split folder
179
  ws.upload_dataset(
180
  dataset_path,
181
  project_slug,
 
183
  project_type=project_type
184
  )
185
 
186
+ # create a new version
187
  version_num = proj.generate_version(settings={
188
  "augmentation": {},
189
  "preprocessing": {},
190
  })
191
 
192
+ # enqueue training (now finds train/valid/test)
193
  proj.version(str(version_num)).train()
194
 
195
+ # return the hosted endpoint URL
196
  m = proj.version(str(version_num)).model
197
  return f"{m['base_url']}{m['id']}?api_key={api_key}"
198