Spaces:

wuhp
/

test-detr

Sleeping

App Files Files Community

wuhp commited on Sep 18

Commit

3e12066

verified ·

1 Parent(s): 2078af3

Update app.py

Browse files

Files changed (1) hide show

app.py +257 -115

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ REPO_URL = "https://github.com/supervisely-ecosystem/RT-DETRv2"
 REPO_DIR = os.path.join(os.getcwd(), "third_party", "RT-DETRv2")
 PY_IMPL_DIR = os.path.join(REPO_DIR, "rtdetrv2_pytorch")  # Supervisely keeps PyTorch impl here
-# Core deps + your requested packages; pinned as lower-bounds to avoid downgrades
 COMMON_REQUIREMENTS = [
     "gradio>=4.36.1",
     "ultralytics>=8.2.0",
@@ -30,9 +30,9 @@ COMMON_REQUIREMENTS = [
     "torchvision>=0.15.2",
     "pyyaml>=6.0.1",
     "Pillow>=10.0.0",
-    "supervisely>=6.0.0",     # <- fixes ModuleNotFoundError from repo trainer
-    "tensorboard>=2.13.0",    # convenience: sometimes used by forks
-    "pycocotools>=2.0.7",     # convenience: ensure wheels are present
 ]
 # === bootstrap (clone + pip) ===================================================
@@ -51,15 +51,17 @@ def ensure_repo_and_requirements():
         except Exception:
             logging.warning("git pull failed; continuing with current checkout")
-    # Make sure all our app/runtime deps (incl. supervisely & ultralytics) are present
-    pip_install(COMMON_REQUIREMENTS)
-    # Then install repo-specific extras (pycocotools/tensorboard etc. if required)
     req_file = os.path.join(PY_IMPL_DIR, "requirements.txt")
     if os.path.exists(req_file):
         pip_install(["-r", req_file])
-    # Double-check supervisely importability; if not, try again explicitly.
     try:
         import supervisely  # noqa: F401
     except Exception:
@@ -77,8 +79,10 @@ DEFAULT_MODEL_KEY = "rtdetrv2_s"
 # === utilities ================================================================
 def handle_remove_readonly(func, path, exc_info):
-    try: os.chmod(path, stat.S_IWRITE)
-    except Exception: pass
     func(path)
 _ROBO_URL_RX = re.compile(r"""
@@ -104,8 +108,10 @@ def parse_roboflow_url(s: str):
         version = None
         if len(parts) >= 3:
             v = parts[2]
-            if v.lower().startswith('v') and v[1:].isdigit(): version = int(v[1:])
-            elif v.isdigit(): version = int(v)
         return parts[0], parts[1], version
     if '/' in s and 'roboflow' not in s:
         p = s.split('/')
@@ -113,8 +119,10 @@ def parse_roboflow_url(s: str):
             version = None
             if len(p) >= 3:
                 v = p[2]
-                if v.lower().startswith('v') and v[1:].isdigit(): version = int(v[1:])
-                elif v.isdigit(): version = int(v)
             return p[0], p[1], version
     return None, None, None
@@ -132,8 +140,10 @@ def _extract_class_names(data_yaml):
     names = data_yaml.get('names', None)
     if isinstance(names, dict):
         def _k(x):
-            try: return int(x)
-            except Exception: return str(x)
         keys = sorted(names.keys(), key=_k)
         names_list = [names[k] for k in keys]
     elif isinstance(names, list):
@@ -150,7 +160,8 @@ def download_dataset(api_key, workspace, project, version):
         ver = proj.version(int(version))
         dataset = ver.download("yolov8")  # labels in YOLO format (we'll convert to COCO)
         data_yaml_path = os.path.join(dataset.location, 'data.yaml')
-        with open(data_yaml_path, 'r') as f: data_yaml = yaml.safe_load(f)
         class_names = _extract_class_names(data_yaml)
         splits = [s for s in ['train', 'valid', 'test'] if os.path.exists(os.path.join(dataset.location, s))]
         return dataset.location, class_names, splits, f"{project}-v{version}"
@@ -170,7 +181,8 @@ def yolo_to_coco(split_dir_images, split_dir_labels, class_names, out_json):
     ann_id = 1
     img_id = 1
     for fname in sorted(os.listdir(split_dir_images)):
-        if not fname.lower().endswith((".jpg",".jpeg",".png")): continue
         img_path = os.path.join(split_dir_images, fname)
         try:
             with Image.open(img_path) as im:
@@ -183,19 +195,28 @@ def yolo_to_coco(split_dir_images, split_dir_labels, class_names, out_json):
             with open(label_file, "r") as f:
                 for line in f:
                     parts = line.strip().split()
-                    if len(parts) < 5: continue
-                    cls = int(float(parts[0]))
-                    cx, cy, bw, bh = map(float, parts[1:5])
-                    x = (cx - bw/2.0) * w
-                    y = (cy - bh/2.0) * h
-                    ww = bw * w
-                    hh = bh * h
                     annotations.append({
                         "id": ann_id,
                         "image_id": img_id,
                         "category_id": cls,
-                        "bbox": [max(0.0,x), max(0.0,y), max(1.0,ww), max(1.0,hh)],
-                        "area": max(1.0, ww*hh),
                         "iscrowd": 0,
                         "segmentation": []
                     })
@@ -203,7 +224,8 @@ def yolo_to_coco(split_dir_images, split_dir_labels, class_names, out_json):
         img_id += 1
     coco = {"images": images, "annotations": annotations, "categories": categories}
     os.makedirs(os.path.dirname(out_json), exist_ok=True)
-    with open(out_json, "w") as f: json.dump(coco, f)
 def make_coco_annotations(merged_dir, class_names):
     ann_dir = os.path.join(merged_dir, "annotations")
@@ -219,28 +241,34 @@ def make_coco_annotations(merged_dir, class_names):
 # === dataset merging ==========================================================
 def gather_class_counts(dataset_info, class_mapping):
-    if not dataset_info: return {}
     final_names = set(v for v in class_mapping.values() if v is not None)
     counts = {name: 0 for name in final_names}
     for loc, names, splits, _ in dataset_info:
         id_to_name = {idx: class_mapping.get(n, None) for idx, n in enumerate(names)}
         for split in splits:
             labels_dir = os.path.join(loc, split, 'labels')
-            if not os.path.exists(labels_dir): continue
             for label_file in os.listdir(labels_dir):
-                if not label_file.endswith('.txt'): continue
                 found = set()
                 with open(os.path.join(labels_dir, label_file), 'r') as f:
                     for line in f:
                         parts = line.strip().split()
-                        if not parts: continue
                         try:
                             cls_id = int(parts[0])
                             mapped = id_to_name.get(cls_id, None)
-                            if mapped: found.add(mapped)
                         except Exception:
                             continue
-                for m in found: counts[m] += 1
     return counts
 def finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress=gr.Progress()):
@@ -260,7 +288,8 @@ def finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress=
     for loc, _, splits, _ in dataset_info:
         for split in splits:
             img_dir = os.path.join(loc, split, 'images')
-            if not os.path.exists(img_dir): continue
             for img_file in os.listdir(img_dir):
                 if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                     all_images.append((os.path.join(img_dir, img_file), split, loc))
@@ -272,24 +301,30 @@ def finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress=
     for img_path, split, source_loc in progress.tqdm(all_images, desc="Analyzing images"):
         lbl_path = label_path_for(img_path)
-        if not os.path.exists(lbl_path): continue
         source_names = loc_to_names.get(source_loc, [])
         image_classes = set()
         with open(lbl_path, 'r') as f:
             for line in f:
                 parts = line.strip().split()
-                if not parts: continue
                 try:
                     cls_id = int(parts[0])
                     orig = source_names[cls_id]
                     mapped = class_mapping.get(orig, orig)
-                    if mapped in active_classes: image_classes.add(mapped)
                 except Exception:
                     continue
-        if not image_classes: continue
-        if any(current_counts[c] >= class_limits[c] for c in image_classes): continue
         selected_images.append((img_path, split))
-        for c in image_classes: current_counts[c] += 1
     progress(0.6, desc=f"Copying {len(selected_images)} files...")
     for img_path, split in progress.tqdm(selected_images, desc="Finalizing files"):
@@ -300,13 +335,16 @@ def finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress=
         source_loc = None
         for info in dataset_info:
-            if img_path.startswith(info[0]): source_loc = info[0]; break
         source_names = loc_to_names.get(source_loc, [])
         with open(lbl_path, 'r') as f_in, open(out_lbl, 'w') as f_out:
             for line in f_in:
                 parts = line.strip().split()
-                if not parts: continue
                 try:
                     old_id = int(parts[0])
                     original_name = source_names[old_id]
@@ -334,10 +372,19 @@ def finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress=
 # === entrypoint + config detection/generation =================================
 def find_training_script(repo_root):
     candidates = []
-    for pat in ["**/tools/train.py", "**/train.py"]:
         candidates.extend(glob(os.path.join(repo_root, pat), recursive=True))
-    candidates.sort(key=lambda p: (0 if "rtdetrv2_pytorch" in p else 1, len(p)))
     return candidates[0] if candidates else None
 def find_model_config_template(model_key):
@@ -353,17 +400,37 @@ def find_model_config_template(model_key):
     def score(p):
         pl = p.lower()
         s = 0
-        if "/rtdetrv2_pytorch/" in pl: s += 4
-        if "/config" in pl: s += 3
         for token in want_tokens:
-            if token in os.path.basename(pl): s += 3
-            if token in pl: s += 2
-        if "coco" in pl: s += 1
         return -s, len(p)
     yamls.sort(key=score)
     return yamls[0] if yamls else None
 def patch_base_config(base_cfg_path, merged_dir, class_count, run_name,
                       epochs, batch, imgsz, lr, optimizer):
     if not base_cfg_path or not os.path.exists(base_cfg_path):
@@ -383,7 +450,7 @@ def patch_base_config(base_cfg_path, merged_dir, class_count, run_name,
         "out_dir":    os.path.abspath(os.path.join("runs", "train", run_name)),
     }
-    # dataset block
     for root_key in ["dataset", "data"]:
         if root_key in cfg and isinstance(cfg[root_key], dict):
             ds = cfg[root_key]
@@ -393,21 +460,31 @@ def patch_base_config(base_cfg_path, merged_dir, class_count, run_name,
                 ("test",  "test_json",  "test_img"),
             ]:
                 if split in ds and isinstance(ds[split], dict):
-                    ds[split]["name"] = ds[split].get("name", "coco")
-                    for k in ["ann_file", "ann_path", "annotation", "annotations"]:
-                        if k in ds[split] or k in ["ann_file", "ann_path"]:
-                            ds[split][k] = paths[jf]; break
-                    for k in ["img_prefix", "img_dir", "image_root", "data_root"]:
-                        if k in ds[split] or k in ["img_prefix", "img_dir"]:
-                            ds[split][k] = paths[ip]; break
     # num_classes
     def set_num_classes(node, n):
-        if not isinstance(node, dict): return False
         if "num_classes" in node:
-            node["num_classes"] = int(n); return True
         for k, v in node.items():
-            if isinstance(v, dict) and set_num_classes(v, n): return True
         return False
     if "model" in cfg and isinstance(cfg["model"], dict):
@@ -420,17 +497,23 @@ def patch_base_config(base_cfg_path, merged_dir, class_count, run_name,
     updated_epoch = False
     for key in ["max_epoch", "epochs", "num_epochs"]:
         if key in cfg:
-            cfg[key] = int(epochs); updated_epoch = True; break
     if "solver" in cfg and isinstance(cfg["solver"], dict):
         for key in ["max_epoch", "epochs", "num_epochs"]:
             if key in cfg["solver"]:
-                cfg["solver"][key] = int(epochs); updated_epoch = True; break
     if not updated_epoch:
         cfg["max_epoch"] = int(epochs)
     for key in ["input_size", "img_size", "imgsz"]:
-        if key in cfg: cfg[key] = int(imgsz)
-    if "input_size" not in cfg: cfg["input_size"] = int(imgsz)
     # lr / optimizer / batch
     if "solver" not in cfg or not isinstance(cfg["solver"], dict):
@@ -438,7 +521,8 @@ def patch_base_config(base_cfg_path, merged_dir, class_count, run_name,
     sol = cfg["solver"]
     for key in ["base_lr", "lr", "learning_rate"]:
         if key in sol:
-            sol[key] = float(lr); break
     else:
         sol["base_lr"] = float(lr)
@@ -456,9 +540,11 @@ def patch_base_config(base_cfg_path, merged_dir, class_count, run_name,
     else:
         cfg["output_dir"] = paths["out_dir"]
-    cfg_out_dir = os.path.join("generated_configs"); os.makedirs(cfg_out_dir, exist_ok=True)
     out_path = os.path.join(cfg_out_dir, f"{run_name}.yaml")
-    with open(out_path, "w") as f: yaml.safe_dump(cfg, f, sort_keys=False)
     return out_path
 def find_best_checkpoint(out_dir):
@@ -470,16 +556,21 @@ def find_best_checkpoint(out_dir):
     ]
     for p in pats:
         f = sorted(glob(p, recursive=True))
-        if f: return f[0]
-    any_ckpt = sorted(glob(os.path.join(out_dir, "**", "*.pt"), recursive=True) +
-                      glob(os.path.join(out_dir, "**", "*.pth"), recursive=True))
     return any_ckpt[-1] if any_ckpt else None
 # === Gradio handlers ==========================================================
 def load_datasets_handler(api_key, url_file, progress=gr.Progress()):
     api_key = api_key or os.getenv("ROBOFLOW_API_KEY", "")
-    if not api_key: raise gr.Error("Roboflow API Key is required (or set ROBOFLOW_API_KEY).")
-    if not url_file: raise gr.Error("Upload a .txt with Roboflow URLs or 'workspace/project[/vN]' lines.")
     with open(url_file.name, 'r', encoding='utf-8', errors='ignore') as f:
         urls = [line.strip() for line in f if line.strip()]
@@ -497,8 +588,10 @@ def load_datasets_handler(api_key, url_file, progress=gr.Progress()):
                 failures.append((raw, f"No latest version for {ws}/{proj}"))
                 continue
         loc, names, splits, name_str = download_dataset(api_key, ws, proj, int(ver))
-        if loc: dataset_info.append((loc, names, splits, name_str))
-        else: failures.append((raw, f"DownloadError: {ws}/{proj}/v{ver}"))
     if not dataset_info:
         msg = "No datasets loaded.\n" + "\n".join([f"- {u}: {why}" for u, why in failures[:10]])
@@ -510,11 +603,13 @@ def load_datasets_handler(api_key, url_file, progress=gr.Progress()):
     df = pd.DataFrame([[n, n, counts.get(n, 0), False] for n in all_names],
                       columns=["Original Name", "Rename To", "Max Images", "Remove"])
     status = "Datasets loaded successfully."
-    if failures: status += f" ({len(dataset_info)} OK, {len(failures)} failed; see logs)."
     return status, dataset_info, df
 def update_class_counts_handler(class_df, dataset_info):
-    if class_df is None or not dataset_info: return None
     class_df = pd.DataFrame(class_df)
     mapping = {row["Original Name"]: (None if bool(row["Remove"]) else row["Rename To"])
                for _, row in class_df.iterrows()}
@@ -524,41 +619,34 @@ def update_class_counts_handler(class_df, dataset_info):
         id_to_final = {idx: mapping.get(n, None) for idx, n in enumerate(names)}
         for split in splits:
             labels_dir = os.path.join(loc, split, 'labels')
-            if not os.path.exists(labels_dir): continue
             for label_file in os.listdir(labels_dir):
-                if not label_file.endswith('.txt'): continue
                 found = set()
                 with open(os.path.join(labels_dir, label_file), 'r') as f:
                     for line in f:
                         parts = line.strip().split()
-                        if not parts: continue
                         try:
                             cls_id = int(parts[0])
                             mapped = id_to_final.get(cls_id, None)
-                            if mapped: found.add(mapped)
                         except Exception:
                             continue
-                for m in found: counts[m] += 1
     return pd.DataFrame(list(counts.items()), columns=["Final Class Name", "Est. Total Images"])
-def finalize_handler(dataset_info, class_df, progress=gr.Progress()):
-    if not dataset_info: raise gr.Error("Load datasets first in Tab 1.")
-    if class_df is None: raise gr.Error("Class data is missing.")
-    class_df = pd.DataFrame(class_df)
-    class_mapping, class_limits = {}, {}
-    for _, row in class_df.iterrows():
-        orig = row["Original Name"]
-        if bool(row["Remove"]): continue
-        final_name = row["Rename To"]
-        class_mapping[orig] = final_name
-        class_limits[final_name] = class_limits.get(final_name, 0) + int(row["Max Images"])
-    status, path = finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress)
-    return status, path
 def training_handler(dataset_path, model_key, run_name, epochs, batch, imgsz, lr, opt, progress=gr.Progress()):
-    if not dataset_path: raise gr.Error("Finalize a dataset in Tab 2 before training.")
     train_script = find_training_script(REPO_DIR)
     if not train_script:
         raise gr.Error("RT-DETRv2 training script not found inside the repo (looked for **/tools/train.py).")
@@ -567,7 +655,8 @@ def training_handler(dataset_path, model_key, run_name, epochs, batch, imgsz, lr
         raise gr.Error("Could not find a matching RT-DETRv2 config in the repo (S/L/X).")
     data_yaml = os.path.join(dataset_path, "data.yaml")
-    with open(data_yaml, "r") as f: dy = yaml.safe_load(f)
     class_names = [str(x) for x in dy.get("names", [])]
     make_coco_annotations(dataset_path, class_names)
@@ -600,7 +689,8 @@ def training_handler(dataset_path, model_key, run_name, epochs, batch, imgsz, lr
             proc = subprocess.Popen(cmd, cwd=os.path.dirname(train_script),
                                     stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                                     bufsize=1, text=True, env=env)
-            for line in proc.stdout: q.put(line.rstrip())
             proc.wait()
             q.put(f"__EXITCODE__:{proc.returncode}")
         except Exception as e:
@@ -610,38 +700,78 @@ def training_handler(dataset_path, model_key, run_name, epochs, batch, imgsz, lr
     log_tail, last_epoch, total_epochs = [], 0, int(epochs)
     first_lines = []
     while True:
         line = q.get()
         if line.startswith("__EXITCODE__"):
-            code = int(line.split(":",1)[1])
             if code != 0:
                 head = "\n".join(first_lines[:60])
                 raise gr.Error(f"Training exited with code {code}.\nLast output:\n{head or 'No logs captured.'}")
             break
         if line.startswith("__ERROR__"):
-            raise gr.Error(f"Training failed: {line.split(':',1)[1]}")
-        if len(first_lines) < 120: first_lines.append(line)
-        log_tail.append(line); log_tail = log_tail[-40:]
         m = re.search(r"[Ee]poch\s+(\d+)\s*/\s*(\d+)", line)
         if m:
             try:
-                last_epoch = int(m.group(1)); total_epochs = max(total_epochs, int(m.group(2)))
-            except Exception: pass
-        progress(min(max(last_epoch / max(1,total_epochs),0.0),1.0), desc=f"Epoch {last_epoch}/{total_epochs}")
-        fig1 = plt.figure(); plt.title("Loss (see logs)")
-        fig2 = plt.figure(); plt.title("mAP (see logs)")
         yield "\n".join(log_tail), fig1, fig2, None
     ckpt = find_best_checkpoint(out_dir) or find_best_checkpoint("runs")
     if not ckpt or not os.path.exists(ckpt):
         raise gr.Error("Training finished, but checkpoint file not found. Check logs/output directory.")
     yield "Training complete!", None, None, gr.File.update(value=ckpt, visible=True)
 def upload_handler(model_file, hf_token, hf_repo, gh_token, gh_repo, progress=gr.Progress()):
-    if not model_file: raise gr.Error("No trained model file to upload.")
     from huggingface_hub import HfApi, HfFolder
     hf_status = "Skipped Hugging Face."
     if hf_token and hf_repo:
@@ -658,21 +788,27 @@ def upload_handler(model_file, hf_token, hf_repo, gh_token, gh_repo, progress=gr
     if gh_token and gh_repo:
         progress(0.5, desc="Uploading to GitHub...")
         try:
-            if '/' not in gh_repo: raise ValueError("GitHub repo must be 'username/repo'.")
             username, repo_name = gh_repo.split('/')
             api_url = f"https://api.github.com/repos/{username}/{repo_name}/contents/{os.path.basename(model_file.name)}"
             headers = {"Authorization": f"token {gh_token}"}
-            with open(model_file.name, "rb") as f: content = base64.b64encode(f.read()).decode()
             get_resp = requests.get(api_url, headers=headers, timeout=30)
             sha = get_resp.json().get('sha') if get_resp.ok else None
             data = {"message": "Upload trained model from Rolo app", "content": content}
-            if sha: data["sha"] = sha
             put_resp = requests.put(api_url, headers=headers, json=data, timeout=60)
-            if put_resp.ok: gh_status = f"Success! {put_resp.json()['content']['html_url']}"
-            else: gh_status = f"GitHub Error: {put_resp.json().get('message','Unknown')}"
         except Exception as e:
             gh_status = f"GitHub Error: {e}"
-    progress(1); return hf_status, gh_status
 # === UI =======================================================================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky")) as app:
@@ -751,4 +887,10 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky")) as app:
 if __name__ == "__main__":
     os.environ.setdefault("YOLO_CONFIG_DIR", "/tmp/Ultralytics")  # silence stray warnings from other libs
     app.launch(debug=True)

 REPO_DIR = os.path.join(os.getcwd(), "third_party", "RT-DETRv2")
 PY_IMPL_DIR = os.path.join(REPO_DIR, "rtdetrv2_pytorch")  # Supervisely keeps PyTorch impl here
+# Core deps + your requested packages; pinned as lower-bounds to avoid downgrades (local runs only)
 COMMON_REQUIREMENTS = [
     "gradio>=4.36.1",
     "ultralytics>=8.2.0",
     "torchvision>=0.15.2",
     "pyyaml>=6.0.1",
     "Pillow>=10.0.0",
+    "supervisely>=6.0.0",
+    "tensorboard>=2.13.0",
+    "pycocotools>=2.0.7",
 ]
 # === bootstrap (clone + pip) ===================================================
         except Exception:
             logging.warning("git pull failed; continuing with current checkout")
+    # On HF Spaces: expect requirements.txt to be used at build time; skip heavy runtime installs
+    if os.getenv("HF_SPACE") == "1" or os.getenv("SPACE_ID"):
+        logging.info("Detected Hugging Face Space — skipping runtime pip installs.")
+        return
+    # Local fallback (non-Spaces)
+    pip_install(COMMON_REQUIREMENTS)
     req_file = os.path.join(PY_IMPL_DIR, "requirements.txt")
     if os.path.exists(req_file):
         pip_install(["-r", req_file])
     try:
         import supervisely  # noqa: F401
     except Exception:
 # === utilities ================================================================
 def handle_remove_readonly(func, path, exc_info):
+    try:
+        os.chmod(path, stat.S_IWRITE)
+    except Exception:
+        pass
     func(path)
 _ROBO_URL_RX = re.compile(r"""
         version = None
         if len(parts) >= 3:
             v = parts[2]
+            if v.lower().startswith('v') and v[1:].isdigit():
+                version = int(v[1:])
+            elif v.isdigit():
+                version = int(v)
         return parts[0], parts[1], version
     if '/' in s and 'roboflow' not in s:
         p = s.split('/')
             version = None
             if len(p) >= 3:
                 v = p[2]
+                if v.lower().startswith('v') and v[1:].isdigit():
+                    version = int(v[1:])
+                elif v.isdigit():
+                    version = int(v)
             return p[0], p[1], version
     return None, None, None
     names = data_yaml.get('names', None)
     if isinstance(names, dict):
         def _k(x):
+            try:
+                return int(x)
+            except Exception:
+                return str(x)
         keys = sorted(names.keys(), key=_k)
         names_list = [names[k] for k in keys]
     elif isinstance(names, list):
         ver = proj.version(int(version))
         dataset = ver.download("yolov8")  # labels in YOLO format (we'll convert to COCO)
         data_yaml_path = os.path.join(dataset.location, 'data.yaml')
+        with open(data_yaml_path, 'r') as f:
+            data_yaml = yaml.safe_load(f)
         class_names = _extract_class_names(data_yaml)
         splits = [s for s in ['train', 'valid', 'test'] if os.path.exists(os.path.join(dataset.location, s))]
         return dataset.location, class_names, splits, f"{project}-v{version}"
     ann_id = 1
     img_id = 1
     for fname in sorted(os.listdir(split_dir_images)):
+        if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
+            continue
         img_path = os.path.join(split_dir_images, fname)
         try:
             with Image.open(img_path) as im:
             with open(label_file, "r") as f:
                 for line in f:
                     parts = line.strip().split()
+                    if len(parts) < 5:
+                        continue
+                    try:
+                        cls = int(float(parts[0]))
+                        cx, cy, bw, bh = map(float, parts[1:5])
+                    except Exception:
+                        continue
+                    x = max(0.0, (cx - bw / 2.0) * w)
+                    y = max(0.0, (cy - bh / 2.0) * h)
+                    ww = max(1.0, bw * w)
+                    hh = max(1.0, bh * h)
+                    # clamp right/bottom to image bounds
+                    if x + ww > w:
+                        ww = max(1.0, w - x)
+                    if y + hh > h:
+                        hh = max(1.0, h - y)
                     annotations.append({
                         "id": ann_id,
                         "image_id": img_id,
                         "category_id": cls,
+                        "bbox": [x, y, ww, hh],
+                        "area": max(1.0, ww * hh),
                         "iscrowd": 0,
                         "segmentation": []
                     })
         img_id += 1
     coco = {"images": images, "annotations": annotations, "categories": categories}
     os.makedirs(os.path.dirname(out_json), exist_ok=True)
+    with open(out_json, "w") as f:
+        json.dump(coco, f)
 def make_coco_annotations(merged_dir, class_names):
     ann_dir = os.path.join(merged_dir, "annotations")
 # === dataset merging ==========================================================
 def gather_class_counts(dataset_info, class_mapping):
+    if not dataset_info:
+        return {}
     final_names = set(v for v in class_mapping.values() if v is not None)
     counts = {name: 0 for name in final_names}
     for loc, names, splits, _ in dataset_info:
         id_to_name = {idx: class_mapping.get(n, None) for idx, n in enumerate(names)}
         for split in splits:
             labels_dir = os.path.join(loc, split, 'labels')
+            if not os.path.exists(labels_dir):
+                continue
             for label_file in os.listdir(labels_dir):
+                if not label_file.endswith('.txt'):
+                    continue
                 found = set()
                 with open(os.path.join(labels_dir, label_file), 'r') as f:
                     for line in f:
                         parts = line.strip().split()
+                        if not parts:
+                            continue
                         try:
                             cls_id = int(parts[0])
                             mapped = id_to_name.get(cls_id, None)
+                            if mapped:
+                                found.add(mapped)
                         except Exception:
                             continue
+                for m in found:
+                    counts[m] += 1
     return counts
 def finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress=gr.Progress()):
     for loc, _, splits, _ in dataset_info:
         for split in splits:
             img_dir = os.path.join(loc, split, 'images')
+            if not os.path.exists(img_dir):
+                continue
             for img_file in os.listdir(img_dir):
                 if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                     all_images.append((os.path.join(img_dir, img_file), split, loc))
     for img_path, split, source_loc in progress.tqdm(all_images, desc="Analyzing images"):
         lbl_path = label_path_for(img_path)
+        if not os.path.exists(lbl_path):
+            continue
         source_names = loc_to_names.get(source_loc, [])
         image_classes = set()
         with open(lbl_path, 'r') as f:
             for line in f:
                 parts = line.strip().split()
+                if not parts:
+                    continue
                 try:
                     cls_id = int(parts[0])
                     orig = source_names[cls_id]
                     mapped = class_mapping.get(orig, orig)
+                    if mapped in active_classes:
+                        image_classes.add(mapped)
                 except Exception:
                     continue
+        if not image_classes:
+            continue
+        if any(current_counts[c] >= class_limits[c] for c in image_classes):
+            continue
         selected_images.append((img_path, split))
+        for c in image_classes:
+            current_counts[c] += 1
     progress(0.6, desc=f"Copying {len(selected_images)} files...")
     for img_path, split in progress.tqdm(selected_images, desc="Finalizing files"):
         source_loc = None
         for info in dataset_info:
+            if img_path.startswith(info[0]):
+                source_loc = info[0]
+                break
         source_names = loc_to_names.get(source_loc, [])
         with open(lbl_path, 'r') as f_in, open(out_lbl, 'w') as f_out:
             for line in f_in:
                 parts = line.strip().split()
+                if not parts:
+                    continue
                 try:
                     old_id = int(parts[0])
                     original_name = source_names[old_id]
 # === entrypoint + config detection/generation =================================
 def find_training_script(repo_root):
+    # Hard-prefer the canonical path widely used in the repo/issues
+    canonical = os.path.join(repo_root, "rtdetrv2_pytorch", "tools", "train.py")
+    if os.path.exists(canonical):
+        return canonical
     candidates = []
+    for pat in ["**/tools/train.py", "**/train.py", "**/tools/train_net.py"]:
         candidates.extend(glob(os.path.join(repo_root, pat), recursive=True))
+    # Prefer anything inside rtdetrv2_pytorch, then shorter paths
+    def _score(p):
+        pl = p.replace("\\", "/").lower()
+        return (0 if "rtdetrv2_pytorch" in pl else 1, len(p))
+    candidates.sort(key=_score)
     return candidates[0] if candidates else None
 def find_model_config_template(model_key):
     def score(p):
         pl = p.lower()
         s = 0
+        if "/rtdetrv2_pytorch/" in pl:
+            s += 4
+        if "/config" in pl:
+            s += 3
         for token in want_tokens:
+            if token in os.path.basename(pl):
+                s += 3
+            if token in pl:
+                s += 2
+        if "coco" in pl:
+            s += 1
         return -s, len(p)
     yamls.sort(key=score)
     return yamls[0] if yamls else None
+def _set_first_existing_key(d: dict, keys: list, value, fallback_key: str | None = None):
+    """
+    If any key from `keys` exists in dict `d`, set the first one found to `value`.
+    Otherwise, if `fallback_key` is given, create it with `value`.
+    Returns the key that was set, or None.
+    """
+    for k in keys:
+        if k in d:
+            d[k] = value
+            return k
+    if fallback_key:
+        d[fallback_key] = value
+        return fallback_key
+    return None
 def patch_base_config(base_cfg_path, merged_dir, class_count, run_name,
                       epochs, batch, imgsz, lr, optimizer):
     if not base_cfg_path or not os.path.exists(base_cfg_path):
         "out_dir":    os.path.abspath(os.path.join("runs", "train", run_name)),
     }
+    # dataset block: set an existing alias if present, otherwise add a common key
     for root_key in ["dataset", "data"]:
         if root_key in cfg and isinstance(cfg[root_key], dict):
             ds = cfg[root_key]
                 ("test",  "test_json",  "test_img"),
             ]:
                 if split in ds and isinstance(ds[split], dict):
+                    node = ds[split]
+                    node["name"] = node.get("name", "coco")
+                    _set_first_existing_key(
+                        node,
+                        keys=["ann_file", "ann_path", "annotation", "annotations"],
+                        value=paths[jf],
+                        fallback_key="ann_file",
+                    )
+                    _set_first_existing_key(
+                        node,
+                        keys=["img_prefix", "img_dir", "image_root", "data_root"],
+                        value=paths[ip],
+                        fallback_key="img_prefix",
+                    )
     # num_classes
     def set_num_classes(node, n):
+        if not isinstance(node, dict):
+            return False
         if "num_classes" in node:
+            node["num_classes"] = int(n)
+            return True
         for k, v in node.items():
+            if isinstance(v, dict) and set_num_classes(v, n):
+                return True
         return False
     if "model" in cfg and isinstance(cfg["model"], dict):
     updated_epoch = False
     for key in ["max_epoch", "epochs", "num_epochs"]:
         if key in cfg:
+            cfg[key] = int(epochs)
+            updated_epoch = True
+            break
     if "solver" in cfg and isinstance(cfg["solver"], dict):
         for key in ["max_epoch", "epochs", "num_epochs"]:
             if key in cfg["solver"]:
+                cfg["solver"][key] = int(epochs)
+                updated_epoch = True
+                break
     if not updated_epoch:
         cfg["max_epoch"] = int(epochs)
     for key in ["input_size", "img_size", "imgsz"]:
+        if key in cfg:
+            cfg[key] = int(imgsz)
+    if "input_size" not in cfg:
+        cfg["input_size"] = int(imgsz)
     # lr / optimizer / batch
     if "solver" not in cfg or not isinstance(cfg["solver"], dict):
     sol = cfg["solver"]
     for key in ["base_lr", "lr", "learning_rate"]:
         if key in sol:
+            sol[key] = float(lr)
+            break
     else:
         sol["base_lr"] = float(lr)
     else:
         cfg["output_dir"] = paths["out_dir"]
+    cfg_out_dir = os.path.join("generated_configs")
+    os.makedirs(cfg_out_dir, exist_ok=True)
     out_path = os.path.join(cfg_out_dir, f"{run_name}.yaml")
+    with open(out_path, "w") as f:
+        yaml.safe_dump(cfg, f, sort_keys=False)
     return out_path
 def find_best_checkpoint(out_dir):
     ]
     for p in pats:
         f = sorted(glob(p, recursive=True))
+        if f:
+            return f[0]
+    any_ckpt = sorted(
+        glob(os.path.join(out_dir, "**", "*.pt"), recursive=True)
+        + glob(os.path.join(out_dir, "**", "*.pth"), recursive=True)
+    )
     return any_ckpt[-1] if any_ckpt else None
 # === Gradio handlers ==========================================================
 def load_datasets_handler(api_key, url_file, progress=gr.Progress()):
     api_key = api_key or os.getenv("ROBOFLOW_API_KEY", "")
+    if not api_key:
+        raise gr.Error("Roboflow API Key is required (or set ROBOFLOW_API_KEY).")
+    if not url_file:
+        raise gr.Error("Upload a .txt with Roboflow URLs or 'workspace/project[/vN]' lines.")
     with open(url_file.name, 'r', encoding='utf-8', errors='ignore') as f:
         urls = [line.strip() for line in f if line.strip()]
                 failures.append((raw, f"No latest version for {ws}/{proj}"))
                 continue
         loc, names, splits, name_str = download_dataset(api_key, ws, proj, int(ver))
+        if loc:
+            dataset_info.append((loc, names, splits, name_str))
+        else:
+            failures.append((raw, f"DownloadError: {ws}/{proj}/v{ver}"))
     if not dataset_info:
         msg = "No datasets loaded.\n" + "\n".join([f"- {u}: {why}" for u, why in failures[:10]])
     df = pd.DataFrame([[n, n, counts.get(n, 0), False] for n in all_names],
                       columns=["Original Name", "Rename To", "Max Images", "Remove"])
     status = "Datasets loaded successfully."
+    if failures:
+        status += f" ({len(dataset_info)} OK, {len(failures)} failed; see logs)."
     return status, dataset_info, df
 def update_class_counts_handler(class_df, dataset_info):
+    if class_df is None or not dataset_info:
+        return None
     class_df = pd.DataFrame(class_df)
     mapping = {row["Original Name"]: (None if bool(row["Remove"]) else row["Rename To"])
                for _, row in class_df.iterrows()}
         id_to_final = {idx: mapping.get(n, None) for idx, n in enumerate(names)}
         for split in splits:
             labels_dir = os.path.join(loc, split, 'labels')
+            if not os.path.exists(labels_dir):
+                continue
             for label_file in os.listdir(labels_dir):
+                if not label_file.endswith('.txt'):
+                    continue
                 found = set()
                 with open(os.path.join(labels_dir, label_file), 'r') as f:
                     for line in f:
                         parts = line.strip().split()
+                        if not parts:
+                            continue
                         try:
                             cls_id = int(parts[0])
                             mapped = id_to_final.get(cls_id, None)
+                            if mapped:
+                                found.add(mapped)
                         except Exception:
                             continue
+                for m in found:
+                    counts[m] += 1
     return pd.DataFrame(list(counts.items()), columns=["Final Class Name", "Est. Total Images"])
 def training_handler(dataset_path, model_key, run_name, epochs, batch, imgsz, lr, opt, progress=gr.Progress()):
+    if not dataset_path:
+        raise gr.Error("Finalize a dataset in Tab 2 before training.")
     train_script = find_training_script(REPO_DIR)
+    logging.info(f"Resolved training script: {train_script}")
     if not train_script:
         raise gr.Error("RT-DETRv2 training script not found inside the repo (looked for **/tools/train.py).")
         raise gr.Error("Could not find a matching RT-DETRv2 config in the repo (S/L/X).")
     data_yaml = os.path.join(dataset_path, "data.yaml")
+    with open(data_yaml, "r") as f:
+        dy = yaml.safe_load(f)
     class_names = [str(x) for x in dy.get("names", [])]
     make_coco_annotations(dataset_path, class_names)
             proc = subprocess.Popen(cmd, cwd=os.path.dirname(train_script),
                                     stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                                     bufsize=1, text=True, env=env)
+            for line in proc.stdout:
+                q.put(line.rstrip())
             proc.wait()
             q.put(f"__EXITCODE__:{proc.returncode}")
         except Exception as e:
     log_tail, last_epoch, total_epochs = [], 0, int(epochs)
     first_lines = []
+    line_no = 0
     while True:
         line = q.get()
         if line.startswith("__EXITCODE__"):
+            code = int(line.split(":", 1)[1])
             if code != 0:
                 head = "\n".join(first_lines[:60])
                 raise gr.Error(f"Training exited with code {code}.\nLast output:\n{head or 'No logs captured.'}")
             break
         if line.startswith("__ERROR__"):
+            raise gr.Error(f"Training failed: {line.split(':', 1)[1]}")
+        if len(first_lines) < 120:
+            first_lines.append(line)
+        log_tail.append(line)
+        log_tail = log_tail[-40:]
         m = re.search(r"[Ee]poch\s+(\d+)\s*/\s*(\d+)", line)
         if m:
             try:
+                last_epoch = int(m.group(1))
+                total_epochs = max(total_epochs, int(m.group(2)))
+            except Exception:
+                pass
+        progress(min(max(last_epoch / max(1, total_epochs), 0.0), 1.0), desc=f"Epoch {last_epoch}/{total_epochs}")
+        # Throttle plotting; close figs after yield to avoid leaks
+        line_no += 1
+        fig1 = fig2 = None
+        if line_no % 80 == 0:
+            fig1 = plt.figure()
+            plt.title("Loss (see logs)")
+            plt.plot([0, last_epoch], [0, 0])
+            plt.tight_layout()
+            fig2 = plt.figure()
+            plt.title("mAP (see logs)")
+            plt.plot([0, last_epoch], [0, 0])
+            plt.tight_layout()
         yield "\n".join(log_tail), fig1, fig2, None
+        if fig1 is not None:
+            plt.close(fig1)
+        if fig2 is not None:
+            plt.close(fig2)
     ckpt = find_best_checkpoint(out_dir) or find_best_checkpoint("runs")
     if not ckpt or not os.path.exists(ckpt):
         raise gr.Error("Training finished, but checkpoint file not found. Check logs/output directory.")
     yield "Training complete!", None, None, gr.File.update(value=ckpt, visible=True)
+def finalize_handler(dataset_info, class_df, progress=gr.Progress()):
+    if not dataset_info:
+        raise gr.Error("Load datasets first in Tab 1.")
+    if class_df is None:
+        raise gr.Error("Class data is missing.")
+    class_df = pd.DataFrame(class_df)
+    class_mapping, class_limits = {}, {}
+    for _, row in class_df.iterrows():
+        orig = row["Original Name"]
+        if bool(row["Remove"]):
+            continue
+        final_name = row["Rename To"]
+        class_mapping[orig] = final_name
+        class_limits[final_name] = class_limits.get(final_name, 0) + int(row["Max Images"])
+    status, path = finalize_merged_dataset(dataset_info, class_mapping, class_limits, progress)
+    return status, path
 def upload_handler(model_file, hf_token, hf_repo, gh_token, gh_repo, progress=gr.Progress()):
+    if not model_file:
+        raise gr.Error("No trained model file to upload.")
     from huggingface_hub import HfApi, HfFolder
     hf_status = "Skipped Hugging Face."
     if hf_token and hf_repo:
     if gh_token and gh_repo:
         progress(0.5, desc="Uploading to GitHub...")
         try:
+            if '/' not in gh_repo:
+                raise ValueError("GitHub repo must be 'username/repo'.")
             username, repo_name = gh_repo.split('/')
             api_url = f"https://api.github.com/repos/{username}/{repo_name}/contents/{os.path.basename(model_file.name)}"
             headers = {"Authorization": f"token {gh_token}"}
+            with open(model_file.name, "rb") as f:
+                content = base64.b64encode(f.read()).decode()
             get_resp = requests.get(api_url, headers=headers, timeout=30)
             sha = get_resp.json().get('sha') if get_resp.ok else None
             data = {"message": "Upload trained model from Rolo app", "content": content}
+            if sha:
+                data["sha"] = sha
             put_resp = requests.put(api_url, headers=headers, json=data, timeout=60)
+            if put_resp.ok:
+                gh_status = f"Success! {put_resp.json()['content']['html_url']}"
+            else:
+                gh_status = f"GitHub Error: {put_resp.json().get('message','Unknown')}"
         except Exception as e:
             gh_status = f"GitHub Error: {e}"
+    progress(1)
+    return hf_status, gh_status
 # === UI =======================================================================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="sky")) as app:
 if __name__ == "__main__":
     os.environ.setdefault("YOLO_CONFIG_DIR", "/tmp/Ultralytics")  # silence stray warnings from other libs
+    # Log training script resolution at startup for quick troubleshooting
+    try:
+        ts = find_training_script(REPO_DIR)
+        logging.info(f"Startup check — training script at: {ts}")
+    except Exception as e:
+        logging.warning(f"Startup training-script check failed: {e}")
     app.launch(debug=True)