Spaces:

hanszhu
/

Dense-Captioning-Platform

Sleeping

App Files Files Community

hanszhu commited on 18 days ago

Commit

a39d1c3

1 Parent(s): eb4d305

deploy(space): push working Gradio app with API /predict, MedSAM auto-overlay, HF model downloads, cleaned requirements

Browse files

Files changed (3) hide show

Dockerfile +0 -29
README.md +1 -1
app.py +77 -119

Dockerfile DELETED Viewed

@@ -1,29 +0,0 @@
-FROM python:3.10-slim
-ENV DEBIAN_FRONTEND=noninteractive \
-	PIP_NO_CACHE_DIR=1 \
-	MPLBACKEND=Agg \
-	MIM_IGNORE_INSTALL_PYTORCH=1
-RUN apt-get update && apt-get install -y --no-install-recommends \
-	libgl1 libglib2.0-0 git && \
-	rm -rf /var/lib/apt/lists/*
-WORKDIR /app
-COPY requirements.txt /app/requirements.txt
-# Install pip deps and the mm stack with openmim
-RUN python -m pip install -U pip openmim && \
-	pip install -r requirements.txt && \
-	mim install "mmengine==0.10.4" && \
-	mim install "mmcv==2.1.0" && \
-	mim install "mmdet==3.3.0" && \
-	pip install git+https://github.com/facebookresearch/segment-anything.git
-# Copy the rest of the application
-COPY . /app
-EXPOSE 7860
-CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🐢
 colorFrom: purple
 colorTo: purple
 sdk: gradio
-sdk_version: 5.39.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: purple
 colorTo: purple
 sdk: gradio
+sdk_version: 5.38.2
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -47,8 +47,16 @@ class MedSAMIntegrator:
             import segment_anything  # noqa: F401
             return True
         except Exception as e:
-            print(f"⚠ segment_anything not available: {e}. It must be installed at build time (Dockerfile).")
-            return False
     def _load_medsam_model(self):
         try:
@@ -199,48 +207,6 @@ class MedSAMIntegrator:
 # Single global instance
 _medsam = MedSAMIntegrator()
-# Cache for SAM automatic mask generator
-_sam_auto_generator = None
-_sam_auto_ckpt_path = None
-def _get_sam_generator():
-    """Load and cache SAM ViT-H automatic mask generator with faster params if checkpoint exists."""
-    global _sam_auto_generator, _sam_auto_ckpt_path
-    if _sam_auto_generator is not None:
-        return _sam_auto_generator
-    try:
-        from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
-        ckpt = "models/sam_vit_h_4b8939.pth"
-        if not os.path.exists(ckpt):
-            try:
-                from huggingface_hub import hf_hub_download
-                ckpt = hf_hub_download(
-                    repo_id="Aniketg6/SAM",
-                    filename="sam_vit_h_4b8939.pth",
-                    cache_dir="./models"
-                )
-                print(f"✅ Downloaded SAM ViT-H checkpoint to: {ckpt}")
-            except Exception as e:
-                print(f"⚠ Failed to download SAM ViT-H checkpoint: {e}")
-                return None
-        _sam_auto_ckpt_path = ckpt
-        sam = sam_model_registry["vit_h"](checkpoint=ckpt)
-        # Speed-tuned generator params
-        _sam_auto_generator = SamAutomaticMaskGenerator(
-            sam,
-            points_per_side=16,
-            pred_iou_thresh=0.88,
-            stability_score_thresh=0.9,
-            crop_n_layers=0,
-            box_nms_thresh=0.7,
-            min_mask_region_area=512  # filter tiny masks
-        )
-        return _sam_auto_generator
-    except Exception as e:
-        print(f"_get_sam_generator failed: {e}")
-        return None
 def _extract_bboxes_from_mmdet_result(det_result):
     """Extract Nx4 xyxy bboxes from various MMDet result formats."""
@@ -668,54 +634,46 @@ def analyze(image):
     # Chart Element Detection (Cascade R-CNN)
     if element_model is not None:
         try:
-            # If medical image, skip heavy MMDet to speed up
-            if isinstance(result.get("chart_type_label"), str) and result["chart_type_label"].lower() == "medical image":
-                result["element_result"] = "skipped_for_medical"
             else:
-                # Convert PIL image to numpy array for MMDetection
-                np_img = np.array(image.convert("RGB"))[:, :, ::-1]  # PIL → BGR
-                element_result = inference_detector(element_model, np_img)
-                # Convert result to more API-friendly format
-                if isinstance(element_result, tuple):
-                    bbox_result, segm_result = element_result
-                    element_data = {
-                        "bboxes": bbox_result.tolist() if hasattr(bbox_result, 'tolist') else str(bbox_result),
-                        "segments": segm_result.tolist() if hasattr(segm_result, 'tolist') else str(segm_result)
-                    }
-                else:
-                    element_data = str(element_result)
-                result["element_result"] = element_data
-                result["status"] = "Chart classification + element detection completed"
         except Exception as e:
             result["element_result"] = f"Error: {str(e)}"
     # Chart Data Point Segmentation (Mask R-CNN)
     if datapoint_model is not None:
         try:
-            # If medical image, skip heavy MMDet to speed up
-            if isinstance(result.get("chart_type_label"), str) and result["chart_type_label"].lower() == "medical image":
-                result["datapoint_result"] = "skipped_for_medical"
             else:
-                # Convert PIL image to numpy array for MMDetection
-                np_img = np.array(image.convert("RGB"))[:, :, ::-1]  # PIL → BGR
-                datapoint_result = inference_detector(datapoint_model, np_img)
-                # Convert result to more API-friendly format
-                if isinstance(datapoint_result, tuple):
-                    bbox_result, segm_result = datapoint_result
-                    datapoint_data = {
-                        "bboxes": bbox_result.tolist() if hasattr(bbox_result, 'tolist') else str(bbox_result),
-                        "segments": segm_result.tolist() if hasattr(segm_result, 'tolist') else str(segm_result)
-                    }
-                else:
-                    datapoint_data = str(datapoint_result)
-                result["datapoint_result"] = datapoint_data
-                result["status"] = "Full analysis completed"
         except Exception as e:
             result["datapoint_result"] = f"Error: {str(e)}"
@@ -744,35 +702,46 @@ def analyze_with_medsam(base_result, image):
         if not isinstance(base_result, dict):
             return base_result, None
         label = str(base_result.get("chart_type_label", "")).strip().lower()
-        if label != "medical image":
             return base_result, None
         pil_img = Image.open(image).convert("RGB") if isinstance(image, str) else image
         if pil_img is None:
             return base_result, None
         segmentations = []
         masks_for_overlay = []
-        # Try fast SAM generator first; avoid MedSAM embedding when SAM is available
-        gen = _get_sam_generator()
-        if gen is not None and _sam_auto_ckpt_path is not None and os.path.exists(_sam_auto_ckpt_path):
-            try:
-                import cv2 as _cv2
-                img_path = image if isinstance(image, str) else None
-                if img_path is None:
-                    tmp_path = "./_tmp_input_image.png"
-                    pil_img.save(tmp_path)
-                    img_path = tmp_path
                 img_bgr = _cv2.imread(img_path)
-                masks = gen.generate(img_bgr)
-                # Keep top-K by stability_score or area
-                def _score(m):
-                    s = float(m.get('stability_score', 0.0))
-                    seg = m.get('segmentation', None)
-                    area = int(seg.sum()) if isinstance(seg, np.ndarray) else 0
-                    return (s, area)
-                masks = sorted(masks, key=_score, reverse=True)[:8]
                 for m in masks:
                     seg = m.get('segmentation', None)
                     if seg is None:
@@ -784,20 +753,9 @@ def analyze_with_medsam(base_result, image):
                         "method": "sam_auto"
                     })
                     masks_for_overlay.append({"mask": seg_u8})
-            except Exception as e:
-                print(f"SAM generator segmentation failed: {e}")
-        # Fallback to MedSAM boxes only if nothing produced
-        if not segmentations and _medsam.is_available():
-            try:
-                # Prepare embedding once
-                img_path = image if isinstance(image, str) else None
-                if img_path is None:
-                    tmp_path = "./_tmp_input_image.png"
-                    pil_img.save(tmp_path)
-                    img_path = tmp_path
-                _medsam.load_image(img_path)
-                cand_bboxes = _find_topk_foreground_bboxes(pil_img, max_regions=5, min_area=400)
                 for bbox in cand_bboxes:
                     m = _medsam.segment_with_box(bbox)
                     if m is None or not isinstance(m.get('mask'), np.ndarray):
@@ -808,8 +766,8 @@ def analyze_with_medsam(base_result, image):
                         "method": m.get("method", "medsam_box_auto")
                     })
                     masks_for_overlay.append(m)
-            except Exception as auto_e:
-                print(f"MedSAM fallback segmentation failed: {auto_e}")
         W, H = pil_img.size
         base_result["medsam"] = {

             import segment_anything  # noqa: F401
             return True
         except Exception as e:
+            print(f"⚠ segment_anything not available: {e}. Attempting install from Git...")
+            try:
+                import subprocess, sys
+                subprocess.check_call([sys.executable, "-m", "pip", "install", "git+https://github.com/facebookresearch/segment-anything.git"])
+                import segment_anything  # noqa: F401
+                print("✓ segment_anything installed")
+                return True
+            except Exception as install_err:
+                print(f"❌ Failed to install segment_anything: {install_err}")
+                return False
     def _load_medsam_model(self):
         try:
 # Single global instance
 _medsam = MedSAMIntegrator()
 def _extract_bboxes_from_mmdet_result(det_result):
     """Extract Nx4 xyxy bboxes from various MMDet result formats."""
     # Chart Element Detection (Cascade R-CNN)
     if element_model is not None:
         try:
+            # Convert PIL image to numpy array for MMDetection
+            np_img = np.array(image.convert("RGB"))[:, :, ::-1]  # PIL → BGR
+            element_result = inference_detector(element_model, np_img)
+            # Convert result to more API-friendly format
+            if isinstance(element_result, tuple):
+                bbox_result, segm_result = element_result
+                element_data = {
+                    "bboxes": bbox_result.tolist() if hasattr(bbox_result, 'tolist') else str(bbox_result),
+                    "segments": segm_result.tolist() if hasattr(segm_result, 'tolist') else str(segm_result)
+                }
             else:
+                element_data = str(element_result)
+            result["element_result"] = element_data
+            result["status"] = "Chart classification + element detection completed"
         except Exception as e:
             result["element_result"] = f"Error: {str(e)}"
     # Chart Data Point Segmentation (Mask R-CNN)
     if datapoint_model is not None:
         try:
+            # Convert PIL image to numpy array for MMDetection
+            np_img = np.array(image.convert("RGB"))[:, :, ::-1]  # PIL → BGR
+            datapoint_result = inference_detector(datapoint_model, np_img)
+            # Convert result to more API-friendly format
+            if isinstance(datapoint_result, tuple):
+                bbox_result, segm_result = datapoint_result
+                datapoint_data = {
+                    "bboxes": bbox_result.tolist() if hasattr(bbox_result, 'tolist') else str(bbox_result),
+                    "segments": segm_result.tolist() if hasattr(segm_result, 'tolist') else str(segm_result)
+                }
             else:
+                datapoint_data = str(datapoint_result)
+            result["datapoint_result"] = datapoint_data
+            result["status"] = "Full analysis completed"
         except Exception as e:
             result["datapoint_result"] = f"Error: {str(e)}"
         if not isinstance(base_result, dict):
             return base_result, None
         label = str(base_result.get("chart_type_label", "")).strip().lower()
+        if label != "medical image" or not _medsam.is_available():
             return base_result, None
         pil_img = Image.open(image).convert("RGB") if isinstance(image, str) else image
         if pil_img is None:
             return base_result, None
+        # Prepare embedding
+        img_path = image if isinstance(image, str) else None
+        if img_path is None:
+            tmp_path = "./_tmp_input_image.png"
+            pil_img.save(tmp_path)
+            img_path = tmp_path
+        _medsam.load_image(img_path)
         segmentations = []
         masks_for_overlay = []
+        # AUTO segmentation path
+        try:
+            from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
+            import cv2 as _cv2
+            # If ViT-H checkpoint present, use SAM automatic mask generator (download if missing)
+            vit_h_ckpt = "models/sam_vit_h_4b8939.pth"
+            if not os.path.exists(vit_h_ckpt):
+                try:
+                    from huggingface_hub import hf_hub_download
+                    vit_h_ckpt = hf_hub_download(
+                        repo_id="Aniketg6/SAM",
+                        filename="sam_vit_h_4b8939.pth",
+                        cache_dir="./models"
+                    )
+                    print(f"✅ Downloaded SAM ViT-H checkpoint to: {vit_h_ckpt}")
+                except Exception as dlh:
+                    print(f"⚠ Failed to download SAM ViT-H checkpoint: {dlh}")
+            if os.path.exists(vit_h_ckpt):
                 img_bgr = _cv2.imread(img_path)
+                sam = sam_model_registry["vit_h"](checkpoint=vit_h_ckpt)
+                mask_generator = SamAutomaticMaskGenerator(sam)
+                masks = mask_generator.generate(img_bgr)
                 for m in masks:
                     seg = m.get('segmentation', None)
                     if seg is None:
                         "method": "sam_auto"
                     })
                     masks_for_overlay.append({"mask": seg_u8})
+            else:
+                # Fallback: derive candidate boxes and run MedSAM per box
+                cand_bboxes = _find_topk_foreground_bboxes(pil_img, max_regions=20, min_area=200)
                 for bbox in cand_bboxes:
                     m = _medsam.segment_with_box(bbox)
                     if m is None or not isinstance(m.get('mask'), np.ndarray):
                         "method": m.get("method", "medsam_box_auto")
                     })
                     masks_for_overlay.append(m)
+        except Exception as auto_e:
+            print(f"Automatic MedSAM segmentation failed: {auto_e}")
         W, H = pil_img.size
         base_result["medsam"] = {