Spaces:

pascal-maker
/

medical-vlm-sam2

Running

App Files Files Community

pascal-maker commited on Jun 7

Commit

80f05f9

verified ·

1 Parent(s): 60d85f0

update app.py

Browse files

Files changed (1) hide show

app.py +323 -400

app.py CHANGED Viewed

@@ -2,14 +2,43 @@
 # -*- coding: utf-8 -*-
 """
-Combined Medical-VLM, **SAM-2 automatic masking**, and CheXagent demo.
-⭑ Changes ⭑
------------
-1. Fixed SAM-2 installation and import issues
-2. Added proper error handling for missing dependencies
-3. Made SAM-2 functionality optional with graceful fallback
-4. Added installation instructions and requirements check
 """
 # ---------------------------------------------------------------------
@@ -21,6 +50,7 @@ import uuid
 import tempfile
 import subprocess
 import warnings
 from threading import Thread
 # Environment setup
@@ -36,74 +66,78 @@ from PIL import Image, ImageDraw
 import gradio as gr
 # =============================================================================
-# Dependency checker and installer
 # =============================================================================
 def check_and_install_sam2():
     """Check if SAM-2 is available and attempt installation if needed."""
     try:
-        # Try importing SAM-2
         from sam2.build_sam import build_sam2
-        from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
-        return True, "SAM-2 already available"
     except ImportError:
-        print("SAM-2 not found. Attempting to install...")
         try:
-            # Clone SAM-2 repository
-            if not os.path.exists("segment-anything-2"):
-                subprocess.run([
-                    "git", "clone",
-                    "https://github.com/facebookresearch/segment-anything-2.git"
-                ], check=True)
-            # Install SAM-2
             original_dir = os.getcwd()
-            os.chdir("segment-anything-2")
-            subprocess.run([sys.executable, "-m", "pip", "install", "-e", "."], check=True)
             os.chdir(original_dir)
-            # Add to Python path
-            sys.path.insert(0, os.path.abspath("segment-anything-2"))
-            # Try importing again
             from sam2.build_sam import build_sam2
-            from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
-            return True, "SAM-2 installed successfully"
         except Exception as e:
-            print(f"Failed to install SAM-2: {e}")
-            return False, f"SAM-2 installation failed: {e}"
-# Check SAM-2 availability
-SAM2_AVAILABLE, SAM2_STATUS = check_and_install_sam2()
-print(f"SAM-2 Status: {SAM2_STATUS}")
-# =============================================================================
-# SAM-2 imports (conditional)
-# =============================================================================
-if SAM2_AVAILABLE:
     try:
         from sam2.build_sam import build_sam2
         from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
-        from sam2.modeling.sam2_base import SAM2Base
-        from sam2.utils.misc import get_device_index
-    except ImportError as e:
-        print(f"SAM-2 import error: {e}")
-        SAM2_AVAILABLE = False
-# =============================================================================
-# Qwen-VLM imports & helper
-# =============================================================================
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
-from qwen_vl_utils import process_vision_info
 # =============================================================================
-# CheXagent imports
 # =============================================================================
-from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
-# ---------------------------------------------------------------------
-# Devices
-# ---------------------------------------------------------------------
 def get_device():
     if torch.cuda.is_available():
         return torch.device("cuda")
@@ -111,344 +145,249 @@ def get_device():
         return torch.device("mps")
     return torch.device("cpu")
-# =============================================================================
-# Qwen-VLM model & agent
-# =============================================================================
-_qwen_model = None
-_qwen_processor = None
-_qwen_device = None
-def load_qwen_model_and_processor(hf_token=None):
-    global _qwen_model, _qwen_processor, _qwen_device
-    if _qwen_model is None:
         _qwen_device = "mps" if torch.backends.mps.is_available() else "cpu"
-        print(f"[Qwen] loading model on {_qwen_device}")
-        auth_kwargs = {"use_auth_token": hf_token} if hf_token else {}
         _qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-            "Qwen/Qwen2.5-VL-3B-Instruct",
-            trust_remote_code=True,
-            attn_implementation="eager",
-            torch_dtype=torch.float32,
-            low_cpu_mem_usage=True,
-            device_map=None,
-            **auth_kwargs,
         ).to(_qwen_device)
         _qwen_processor = AutoProcessor.from_pretrained(
-            "Qwen/Qwen2.5-VL-3B-Instruct",
-            trust_remote_code=True,
-            **auth_kwargs,
         )
-    return _qwen_model, _qwen_processor, _qwen_device
-class MedicalVLMAgent:
-    """Light wrapper around Qwen-VLM with an optional image."""
-    def __init__(self, model, processor, device):
         self.model = model
         self.processor = processor
-        self.device = device
         self.system_prompt = (
             "You are a medical information assistant with vision capabilities.\n"
             "Disclaimer: I am not a licensed medical professional. "
             "The information provided is for reference only and should not be taken as medical advice."
         )
     def run(self, user_text: str, image: Image.Image | None = None) -> str:
-        messages = [
-            {"role": "system", "content": [{"type": "text", "text": self.system_prompt}]}
-        ]
         user_content = []
         if image is not None:
-            tmp = f"/tmp/{uuid.uuid4()}.png"
-            image.save(tmp)
-            user_content.append({"type": "image", "image": tmp})
         user_content.append({"type": "text", "text": user_text or "Please describe the image."})
         messages.append({"role": "user", "content": user_content})
-        prompt_text = self.processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        img_inputs, vid_inputs = process_vision_info(messages)
-        inputs = self.processor(
-            text=[prompt_text],
-            images=img_inputs,
-            videos=vid_inputs,
-            padding=True,
-            return_tensors="pt",
-        ).to(self.device)
         with torch.no_grad():
             out = self.model.generate(**inputs, max_new_tokens=128)
-        trimmed = out[0][inputs.input_ids.shape[1] :]
         return self.processor.decode(trimmed, skip_special_tokens=True).strip()
-# =============================================================================
-# SAM-2 model + AutomaticMaskGenerator (conditional)
-# =============================================================================
-def download_sam2_checkpoint():
-    """Download SAM-2 checkpoint if not present."""
-    checkpoint_dir = "checkpoints"
-    checkpoint_file = "sam2.1_hiera_large.pt"
-    checkpoint_path = os.path.join(checkpoint_dir, checkpoint_file)
-    if not os.path.exists(checkpoint_path):
-        os.makedirs(checkpoint_dir, exist_ok=True)
-        print("Downloading SAM-2 checkpoint...")
-        try:
-            import urllib.request
-            url = "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt"
-            urllib.request.urlretrieve(url, checkpoint_path)
-            print("SAM-2 checkpoint downloaded successfully")
-        except Exception as e:
-            print(f"Failed to download SAM-2 checkpoint: {e}")
-            return None
-    return checkpoint_path
-def initialize_sam2():
-    """Initialize SAM-2 model and mask generator."""
-    if not SAM2_AVAILABLE:
-        return None, None
-    try:
-        # Download checkpoint if needed
-        checkpoint_path = download_sam2_checkpoint()
-        if checkpoint_path is None:
-            return None, None
-        # Config path (you may need to adjust this)
-        config_path = "segment-anything-2/sam2/configs/sam2.1/sam2.1_hiera_l.yaml"
-        if not os.path.exists(config_path):
-            config_path = "configs/sam2.1/sam2.1_hiera_l.yaml"
-        device = get_device()
-        print(f"[SAM-2] building model on {device}")
-        sam2_model = build_sam2(
-            config_path,
-            checkpoint_path,
-            device=device,
-            apply_postprocessing=False,
-        )
-        mask_gen = SAM2AutomaticMaskGenerator(
-            model=sam2_model,
-            points_per_side=32,
-            pred_iou_thresh=0.86,
-            stability_score_thresh=0.92,
-            crop_n_layers=0,
-        )
-        return sam2_model, mask_gen
-    except Exception as e:
-        print(f"[SAM-2] Failed to initialize: {e}")
-        return None, None
-# Initialize SAM-2 (conditional)
-_sam2_model, _mask_generator = None, None
-if SAM2_AVAILABLE:
-    _sam2_model, _mask_generator = initialize_sam2()
-    if _sam2_model is not None:
-        print("[SAM-2] Successfully initialized!")
-    else:
-        print("[SAM-2] Initialization failed")
 def automatic_mask_overlay(image_np: np.ndarray) -> np.ndarray:
-    """Generate masks and alpha-blend them on top of the original image."""
-    if _mask_generator is None:
-        raise RuntimeError("SAM-2 mask generator not initialized")
     anns = _mask_generator.generate(image_np)
-    if not anns:
-        return image_np
     overlay = image_np.copy()
-    if overlay.ndim == 2:  # grayscale → RGB
-        overlay = np.stack([overlay] * 3, axis=2)
     for ann in sorted(anns, key=lambda x: x["area"], reverse=True):
         m = ann["segmentation"]
         color = np.random.randint(0, 255, 3, dtype=np.uint8)
         overlay[m] = (overlay[m] * 0.5 + color * 0.5).astype(np.uint8)
     return overlay
 def tumor_segmentation_interface(image: Image.Image | None):
-    """Tumor segmentation interface with proper error handling."""
-    if image is None:
-        return None, "Please upload an image."
-    if not SAM2_AVAILABLE:
-        return None, "SAM-2 is not available. Please check installation."
-    if _mask_generator is None:
-        return None, "SAM-2 not properly initialized. Check the console for errors."
     try:
         img_np = np.array(image.convert("RGB"))
         out_np = automatic_mask_overlay(img_np)
         n_masks = len(_mask_generator.generate(img_np))
         return Image.fromarray(out_np), f"{n_masks} masks found."
     except Exception as e:
-        return None, f"SAM-2 error: {e}"
-# =============================================================================
-# Simple fallback segmentation (when SAM-2 is not available)
-# =============================================================================
 def simple_segmentation_fallback(image: Image.Image | None):
-    """Simple fallback segmentation using basic image processing."""
-    if image is None:
-        return None, "Please upload an image."
     try:
         import cv2
-        from skimage import segmentation, color
-        # Convert to numpy array
         img_np = np.array(image.convert("RGB"))
-        # Simple watershed segmentation
         gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
         _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-        # Remove noise
         kernel = np.ones((3,3), np.uint8)
         opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
-        # Sure background area
-        sure_bg = cv2.dilate(opening, kernel, iterations=3)
-        # Finding sure foreground area
         dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
         _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
-        # Create overlay
         overlay = img_np.copy()
-        overlay[sure_fg > 0] = [255, 0, 0]  # Red overlay
-        # Alpha blend
         result = cv2.addWeighted(img_np, 0.7, overlay, 0.3, 0)
         return Image.fromarray(result), "Simple segmentation applied (SAM-2 not available)"
     except Exception as e:
-        return None, f"Fallback segmentation error: {e}"
-# =============================================================================
-# CheXagent set-up
-# =============================================================================
-try:
-    chex_name = "StanfordAIMI/CheXagent-2-3b"
-    chex_tok = AutoTokenizer.from_pretrained(chex_name, trust_remote_code=True)
-    chex_model = AutoModelForCausalLM.from_pretrained(
-        chex_name, device_map="auto", trust_remote_code=True
-    )
-    chex_model = chex_model.half() if torch.cuda.is_available() else chex_model.float()
-    chex_model.eval()
-    CHEXAGENT_AVAILABLE = True
-except Exception as e:
-    print(f"CheXagent not available: {e}")
-    CHEXAGENT_AVAILABLE = False
-    chex_tok, chex_model = None, None
 def get_model_device(model):
-    if model is None:
-        return torch.device("cpu")
-    for p in model.parameters():
-        return p.device
-    return torch.device("cpu")
-def clean_text(text):
-    return text.replace("</s>", "")
 @torch.no_grad()
 def response_report_generation(pil_image_1, pil_image_2):
-    """Structured chest-X-ray report (streaming)."""
-    if not CHEXAGENT_AVAILABLE:
-        yield "CheXagent is not available. Please check installation."
-        return
-    streamer = TextIteratorStreamer(chex_tok, skip_prompt=True, skip_special_tokens=True)
     paths = []
     for im in [pil_image_1, pil_image_2]:
-        if im is None:
-            continue
-        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
-            im.save(tfile.name)
-            paths.append(tfile.name)
     if not paths:
         yield "Please upload at least one image."
         return
-    device = get_model_device(chex_model)
-    anatomies = [
-        "View",
-        "Airway",
-        "Breathing",
-        "Cardiac",
-        "Diaphragm",
-        "Everything else (e.g., mediastinal contours, bones, soft tissues, tubes, valves, pacemakers)",
-    ]
-    prompts = [
-        "Determine the view of this CXR",
-        *[
-            f'Provide a detailed description of "{a}" in the chest X-ray'
-            for a in anatomies[1:]
-        ],
-    ]
     findings = ""
     partial = "## Generating Findings (step-by-step):\n\n"
     for idx, (anat, prompt) in enumerate(zip(anatomies, prompts)):
-        query = chex_tok.from_list_format(
-            [*[{"image": p} for p in paths], {"text": prompt}]
-        )
-        conv = [
-            {"from": "system", "value": "You are a helpful assistant."},
-            {"from": "human", "value": query},
-        ]
-        inp = chex_tok.apply_chat_template(
-            conv, add_generation_prompt=True, return_tensors="pt"
-        ).to(device)
-        generate_kwargs = dict(
-            input_ids=inp,
-            max_new_tokens=512,
-            do_sample=False,
-            num_beams=1,
-            streamer=streamer,
-        )
-        Thread(target=chex_model.generate, kwargs=generate_kwargs).start()
-        partial += f"**Step {idx}: {anat}...**\n\n"
         for tok in streamer:
-            if idx:
-                findings += tok
             partial += tok
             yield clean_text(partial)
         partial += "\n\n"
         findings += " "
     findings = findings.strip()
-    # Impression
     partial += "## Generating Impression\n\n"
     prompt = f"Write the Impression section for the following Findings: {findings}"
-    conv = [
-        {"from": "system", "value": "You are a helpful assistant."},
-        {"from": "human", "value": chex_tok.from_list_format([{"text": prompt}])},
-    ]
-    inp = chex_tok.apply_chat_template(
-        conv, add_generation_prompt=True, return_tensors="pt"
-    ).to(device)
-    Thread(
-        target=chex_model.generate,
-        kwargs=dict(
-            input_ids=inp,
-            do_sample=False,
-            num_beams=1,
-            max_new_tokens=512,
-            streamer=streamer,
-        ),
-    ).start()
     for tok in streamer:
         partial += tok
         yield clean_text(partial)
@@ -456,129 +395,113 @@ def response_report_generation(pil_image_1, pil_image_2):
 @torch.no_grad()
 def response_phrase_grounding(pil_image, prompt_text):
-    """Very simple visual-grounding placeholder."""
-    if not CHEXAGENT_AVAILABLE:
-        return "CheXagent is not available. Please check installation.", None
-    if pil_image is None:
-        return "Please upload an image.", None
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
         pil_image.save(tfile.name)
         img_path = tfile.name
-    device = get_model_device(chex_model)
-    query = chex_tok.from_list_format([{"image": img_path}, {"text": prompt_text}])
-    conv = [
-        {"from": "system", "value": "You are a helpful assistant."},
-        {"from": "human", "value": query},
-    ]
-    inp = chex_tok.apply_chat_template(
-        conv, add_generation_prompt=True, return_tensors="pt"
-    ).to(device)
-    out = chex_model.generate(
-        input_ids=inp, do_sample=False, num_beams=1, max_new_tokens=512
-    )
-    resp = clean_text(chex_tok.decode(out[0][inp.shape[1] :]))
-    # simple center box (placeholder)
     w, h = pil_image.size
     cx, cy, sz = w // 2, h // 2, min(w, h) // 4
     draw = ImageDraw.Draw(pil_image)
     draw.rectangle([(cx - sz, cy - sz), (cx + sz, cy + sz)], outline="red", width=3)
     return resp, pil_image
 # =============================================================================
-# Gradio UI
 # =============================================================================
 def create_ui():
     """Create the Gradio interface."""
-    # Load Qwen model
-    try:
-        qwen_model, qwen_proc, qwen_dev = load_qwen_model_and_processor()
-        med_agent = MedicalVLMAgent(qwen_model, qwen_proc, qwen_dev)
-        qwen_available = True
-    except Exception as e:
-        print(f"Qwen model not available: {e}")
-        qwen_available = False
-        med_agent = None
-    with gr.Blocks(title="Medical AI Assistant") as demo:
         gr.Markdown("# Combined Medical Q&A · SAM-2 Automatic Masking · CheXagent")
-        # Status information
         with gr.Row():
             gr.Markdown(f"""
-            **System Status:**
-            - Qwen VLM: {'✅ Available' if qwen_available else '❌ Not Available'}
-            - SAM-2: {'✅ Available' if SAM2_AVAILABLE else '❌ Not Available'}
-            - CheXagent: {'✅ Available' if CHEXAGENT_AVAILABLE else '❌ Not Available'}
             """)
-        # Medical Q&A Tab
         with gr.Tab("Medical Q&A"):
-            if qwen_available:
                 q_in = gr.Textbox(label="Question / description", lines=3)
                 q_img = gr.Image(label="Optional image", type="pil")
-                q_btn = gr.Button("Submit")
-                q_out = gr.Textbox(label="Answer")
-                q_btn.click(fn=med_agent.run, inputs=[q_in, q_img], outputs=q_out)
             else:
-                gr.Markdown("❌ Medical Q&A is not available. Qwen model failed to load.")
-        # Segmentation Tab
-        with gr.Tab("Automatic masking"):
             seg_img = gr.Image(label="Upload medical image", type="pil")
-            seg_btn = gr.Button("Run segmentation")
-            seg_out = gr.Image(label="Segmentation result", type="pil")
             seg_status = gr.Textbox(label="Status", interactive=False)
-            if SAM2_AVAILABLE and _mask_generator is not None:
-                seg_btn.click(
-                    fn=tumor_segmentation_interface,
-                    inputs=seg_img,
-                    outputs=[seg_out, seg_status],
-                )
             else:
-                seg_btn.click(
-                    fn=simple_segmentation_fallback,
-                    inputs=seg_img,
-                    outputs=[seg_out, seg_status],
-                )
-        # CheXagent Tabs
-        with gr.Tab("CheXagent – Structured report"):
             if CHEXAGENT_AVAILABLE:
-                gr.Markdown("Upload one or two chest X-ray images; the report streams live.")
-                cx1 = gr.Image(label="Image 1", image_mode="L", type="pil")
-                cx2 = gr.Image(label="Image 2", image_mode="L", type="pil")
-                cx_report = gr.Markdown()
-                gr.Interface(
-                    fn=response_report_generation,
-                    inputs=[cx1, cx2],
-                    outputs=cx_report,
-                    live=True,
-                ).render()
             else:
-                gr.Markdown("❌ CheXagent structured report is not available.")
-        with gr.Tab("CheXagent – Visual grounding"):
             if CHEXAGENT_AVAILABLE:
                 vg_img = gr.Image(image_mode="L", type="pil")
-                vg_prompt = gr.Textbox(value="Locate the highlighted finding:")
-                vg_text = gr.Markdown()
-                vg_out_img = gr.Image()
-                gr.Interface(
-                    fn=response_phrase_grounding,
-                    inputs=[vg_img, vg_prompt],
-                    outputs=[vg_text, vg_out_img],
-                ).render()
             else:
-                gr.Markdown("❌ CheXagent visual grounding is not available.")
     return demo
 if __name__ == "__main__":
     demo = create_ui()
     demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

 # -*- coding: utf-8 -*-
 """
+Combined Medical-VLM, SAM-2 automatic masking, and CheXagent demo.
+This script integrates multiple AI models for medical imaging tasks. It is designed
+to be robust and provide helpful feedback if components fail to load.
+★★ Improvements in this version ★★
+------------------------------------
+1.  **Detailed Status Reporting**: Both the console and the UI now show *why* a
+    model failed to load (e.g., network error, missing dependency, out of memory).
+2.  **Proactive Dependency Checks**: The script checks for required tools like `git`
+    before attempting to use them.
+3.  **Robust Installation**: SAM-2 installation is more resilient, with clearer
+    error messages for common failure points.
+4.  **Centralized Initialization**: A single master function handles the setup of all
+    models for cleaner, more predictable behavior.
+5.  **Clear User Guidance**: Added detailed manual installation steps below for users
+    who encounter issues with the automatic setup.
+★★ Manual Installation Guide ★★
+--------------------------------
+If the automatic setup fails, please try the following in your terminal:
+1.  **Install Git**: Make sure `git` is installed on your system.
+2.  **Clone SAM-2 Repository**:
+    git clone https://github.com/facebookresearch/segment-anything-2.git
+3.  **Install SAM-2**:
+    cd segment-anything-2
+    pip install -e .
+    cd ..
+4.  **Install Other Dependencies**:
+    pip install transformers torch numpy Pillow gradio opencv-python scikit-image accelerate
+5.  **Run the Script**:
+    python your_script_name.py
 """
 # ---------------------------------------------------------------------
 import tempfile
 import subprocess
 import warnings
+import shutil
 from threading import Thread
 # Environment setup
 import gradio as gr
 # =============================================================================
+# Global Status Variables
+# These will be updated during initialization and displayed in the UI.
 # =============================================================================
+QWEN_AVAILABLE = False
+QWEN_STATUS = "Not initialized."
+SAM2_AVAILABLE = False
+SAM2_STATUS = "Not initialized."
+CHEXAGENT_AVAILABLE = False
+CHEXAGENT_STATUS = "Not initialized."
+FALLBACK_SEG_AVAILABLE = False
+# =============================================================================
+# 1. Dependency Checker & Installer
+# =============================================================================
+def check_system_dependencies():
+    """Checks for system-level dependencies like git."""
+    if not shutil.which("git"):
+        return False, "git is not installed or not in your PATH. Please install it to enable automatic SAM-2 setup."
+    return True, "System dependencies are OK."
 def check_and_install_sam2():
     """Check if SAM-2 is available and attempt installation if needed."""
     try:
         from sam2.build_sam import build_sam2
+        return True, "SAM-2 is already installed."
     except ImportError:
+        print("SAM-2 not found. Attempting to clone and install...")
         try:
+            repo_dir = "segment-anything-2"
+            if not os.path.exists(repo_dir):
+                subprocess.run(
+                    ["git", "clone", "https://github.com/facebookresearch/segment-anything-2.git"],
+                    check=True, capture_output=True, text=True
+                )
             original_dir = os.getcwd()
+            os.chdir(repo_dir)
+            subprocess.run([sys.executable, "-m", "pip", "install", "-e", "."], check=True, capture_output=True, text=True)
             os.chdir(original_dir)
+            sys.path.insert(0, os.path.abspath(repo_dir))
             from sam2.build_sam import build_sam2
+            return True, "SAM-2 installed successfully."
+        except subprocess.CalledProcessError as e:
+            error_message = f"Failed to run command.\nStderr: {e.stderr}\nStdout: {e.stdout}"
+            return False, f"SAM-2 installation failed. A command-line process failed. Please check console for details.\n{error_message}"
         except Exception as e:
+            return False, f"SAM-2 installation failed: {e}. Please try manual installation."
+# Conditionally import SAM-2 modules after potential installation
+sam2_build_sam = None
+sam2_AutomaticMaskGenerator = None
+def import_sam2_modules():
+    global sam2_build_sam, sam2_AutomaticMaskGenerator
     try:
         from sam2.build_sam import build_sam2
         from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+        sam2_build_sam = build_sam2
+        sam2_AutomaticMaskGenerator = SAM2AutomaticMaskGenerator
+        return True
+    except ImportError:
+        return False
 # =============================================================================
+# 2. Model Initializers
 # =============================================================================
+# --- Device Helper ---
 def get_device():
     if torch.cuda.is_available():
         return torch.device("cuda")
         return torch.device("mps")
     return torch.device("cpu")
+# --- Qwen-VLM ---
+_qwen_model, _qwen_processor, _qwen_device = None, None, None
+def initialize_qwen():
+    global _qwen_model, _qwen_processor, _qwen_device, QWEN_AVAILABLE, QWEN_STATUS
+    try:
+        from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+        from qwen_vl_utils import process_vision_info
         _qwen_device = "mps" if torch.backends.mps.is_available() else "cpu"
+        print(f"[Qwen] Loading model on {_qwen_device}...")
         _qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            "Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True, attn_implementation="eager",
+            torch_dtype=torch.float32, low_cpu_mem_usage=True
         ).to(_qwen_device)
         _qwen_processor = AutoProcessor.from_pretrained(
+            "Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True
         )
+        QWEN_AVAILABLE = True
+        QWEN_STATUS = f"✅ Available (loaded on {_qwen_device})"
+        return _qwen_model, _qwen_processor
+    except Exception as e:
+        QWEN_STATUS = f"❌ Failed to load Qwen model. Reason: {e}"
+        print(f"[ERROR] {QWEN_STATUS}")
+        return None, None
+# --- SAM-2 ---
+_sam2_model, _mask_generator = None, None
+def initialize_sam2():
+    global _sam2_model, _mask_generator, SAM2_AVAILABLE, SAM2_STATUS
+    # Step 1: Check system dependencies
+    git_ok, git_msg = check_system_dependencies()
+    if not git_ok:
+        SAM2_STATUS = f"❌ {git_msg}"
+        return None, None
+    # Step 2: Install SAM-2 if needed
+    install_ok, install_msg = check_and_install_sam2()
+    if not install_ok:
+        SAM2_STATUS = f"❌ {install_msg}"
+        return None, None
+    print(f"[SAM-2] Install check: {install_msg}")
+    # Step 3: Import modules now that it's installed
+    if not import_sam2_modules():
+        SAM2_STATUS = "❌ Failed to import SAM-2 modules after installation."
+        return None, None
+    # Step 4: Download checkpoint and initialize model
+    try:
+        checkpoint_dir = "checkpoints"
+        checkpoint_file = "sam2.1_hiera_large.pt"
+        checkpoint_path = os.path.join(checkpoint_dir, checkpoint_file)
+        if not os.path.exists(checkpoint_path):
+            os.makedirs(checkpoint_dir, exist_ok=True)
+            print("[SAM-2] Downloading checkpoint (sam2.1_hiera_large.pt)...")
+            import urllib.request
+            url = "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt"
+            urllib.request.urlretrieve(url, checkpoint_path)
+            print("[SAM-2] Checkpoint downloaded successfully.")
+        # ★★★ FIX IS HERE ★★★
+        # The cloned repository is named "segment-anything-2", not "sam2".
+        repo_dir = "sam2"
+        config_path = os.path.join(repo_dir, "sam2/configs/sam2.1/sam2.1_hiera_l.yaml")
+        if not os.path.exists(config_path):
+            SAM2_STATUS = f"❌ Config file not found at {config_path}. Check the repository structure."
+            return None, None
+        device = get_device()
+        print(f"[SAM-2] Building model on {device}...")
+        # NOTE: The build_sam function internally uses Hydra, which is why the error was complex.
+        # Passing the correct, full path to the config file is the right solution.
+        sam2_model = sam2_build_sam(config_path, checkpoint_path, device=device, apply_postprocessing=False)
+        mask_gen = sam2_AutomaticMaskGenerator(model=sam2_model, points_per_side=32, pred_iou_thresh=0.86, stability_score_thresh=0.92, crop_n_layers=0)
+        _sam2_model, _mask_generator = sam2_model, mask_gen
+        SAM2_AVAILABLE = True
+        SAM2_STATUS = f"✅ Available (loaded on {device})"
+        return sam2_model, mask_gen
+    except Exception as e:
+        SAM2_STATUS = f"❌ Failed to initialize SAM-2 model. Reason: {e}"
+        print(f"[ERROR] {SAM2_STATUS}")
+        return None, None
+# --- CheXagent ---
+_chex_model, _chex_tok = None, None
+def initialize_chexagent():
+    global _chex_model, _chex_tok, CHEXAGENT_AVAILABLE, CHEXAGENT_STATUS
+    try:
+        from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+        print("[CheXagent] Loading model (this may take time and memory)...")
+        chex_name = "StanfordAIMI/CheXagent-2-3b"
+        _chex_tok = AutoTokenizer.from_pretrained(chex_name, trust_remote_code=True)
+        _chex_model = AutoModelForCausalLM.from_pretrained(chex_name, device_map="auto", trust_remote_code=True)
+        _chex_model = _chex_model.half() if torch.cuda.is_available() else _chex_model.float()
+        _chex_model.eval()
+        CHEXAGENT_AVAILABLE = True
+        device = "GPU" if torch.cuda.is_available() else get_device()
+        CHEXAGENT_STATUS = f"✅ Available (loaded on {device})"
+        return _chex_model, _chex_tok
+    except Exception as e:
+        CHEXAGENT_STATUS = f"❌ Failed to load CheXagent. Reason: {e}. Check internet connection, disk space, and memory."
+        print(f"[ERROR] {CHEXAGENT_STATUS}")
+        return None, None
+# --- Fallback Segmentation ---
+def check_fallback_dependencies():
+    global FALLBACK_SEG_AVAILABLE
+    try:
+        import cv2
+        from skimage import segmentation, color
+        FALLBACK_SEG_AVAILABLE = True
+    except ImportError:
+        FALLBACK_SEG_AVAILABLE = False
+# =============================================================================
+# 3. Model Logic and Agents (Code unchanged from here)
+# =============================================================================
+# --- Qwen Agent ---
+class MedicalVLMAgent:
+    def __init__(self, model, processor):
         self.model = model
         self.processor = processor
+        self.device = get_device()
         self.system_prompt = (
             "You are a medical information assistant with vision capabilities.\n"
             "Disclaimer: I am not a licensed medical professional. "
             "The information provided is for reference only and should not be taken as medical advice."
         )
     def run(self, user_text: str, image: Image.Image | None = None) -> str:
+        from qwen_vl_utils import process_vision_info
+        messages = [{"role": "system", "content": [{"type": "text", "text": self.system_prompt}]}]
         user_content = []
         if image is not None:
+            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
+                image.save(tfile.name)
+                user_content.append({"type": "image", "image": tfile.name})
         user_content.append({"type": "text", "text": user_text or "Please describe the image."})
         messages.append({"role": "user", "content": user_content})
+        prompt_text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        img_inputs, _ = process_vision_info(messages)
+        inputs = self.processor(text=[prompt_text], images=img_inputs, padding=True, return_tensors="pt").to(self.device)
         with torch.no_grad():
             out = self.model.generate(**inputs, max_new_tokens=128)
+        trimmed = out[0][inputs.input_ids.shape[1]:]
         return self.processor.decode(trimmed, skip_special_tokens=True).strip()
+# --- SAM-2 Interface ---
 def automatic_mask_overlay(image_np: np.ndarray) -> np.ndarray:
+    if not _mask_generator: raise RuntimeError("SAM-2 mask generator not initialized")
     anns = _mask_generator.generate(image_np)
+    if not anns: return image_np
     overlay = image_np.copy()
+    if overlay.ndim == 2: overlay = np.stack([overlay] * 3, axis=2)
     for ann in sorted(anns, key=lambda x: x["area"], reverse=True):
         m = ann["segmentation"]
         color = np.random.randint(0, 255, 3, dtype=np.uint8)
         overlay[m] = (overlay[m] * 0.5 + color * 0.5).astype(np.uint8)
     return overlay
 def tumor_segmentation_interface(image: Image.Image | None):
+    if image is None: return None, "Please upload an image."
     try:
         img_np = np.array(image.convert("RGB"))
         out_np = automatic_mask_overlay(img_np)
         n_masks = len(_mask_generator.generate(img_np))
         return Image.fromarray(out_np), f"{n_masks} masks found."
     except Exception as e:
+        return None, f"SAM-2 processing error: {e}"
+# --- Fallback Segmentation ---
 def simple_segmentation_fallback(image: Image.Image | None):
+    if image is None: return None, "Please upload an image."
+    if not FALLBACK_SEG_AVAILABLE: return image, "Fallback libraries (OpenCV, Scikit-image) not installed."
     try:
         import cv2
         img_np = np.array(image.convert("RGB"))
         gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
         _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
         kernel = np.ones((3,3), np.uint8)
         opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
         dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
         _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
         overlay = img_np.copy()
+        overlay[sure_fg > 0] = [255, 0, 0]
         result = cv2.addWeighted(img_np, 0.7, overlay, 0.3, 0)
         return Image.fromarray(result), "Simple segmentation applied (SAM-2 not available)"
     except Exception as e:
+        return image, f"Fallback segmentation error: {e}"
+# --- CheXagent Interfaces ---
 def get_model_device(model):
+    return next(model.parameters()).device if model and next(model.parameters(), None) is not None else torch.device("cpu")
+def clean_text(text): return text.replace("</s>", "")
 @torch.no_grad()
 def response_report_generation(pil_image_1, pil_image_2):
+    from transformers import TextIteratorStreamer
+    streamer = TextIteratorStreamer(_chex_tok, skip_prompt=True, skip_special_tokens=True)
     paths = []
     for im in [pil_image_1, pil_image_2]:
+        if im:
+            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
+                im.save(tfile.name)
+                paths.append(tfile.name)
     if not paths:
         yield "Please upload at least one image."
         return
+    device = get_model_device(_chex_model)
+    anatomies = ["View", "Airway", "Breathing", "Cardiac", "Diaphragm", "Everything else"]
+    prompts = ["Determine the view of this CXR", *[f'Provide a detailed description of "{a}" in the chest X-ray' for a in anatomies[1:]]]
     findings = ""
     partial = "## Generating Findings (step-by-step):\n\n"
     for idx, (anat, prompt) in enumerate(zip(anatomies, prompts)):
+        query = _chex_tok.from_list_format([*[{"image": p} for p in paths], {"text": prompt}])
+        conv = [{"from": "system", "value": "You are a helpful assistant."}, {"from": "human", "value": query}]
+        inp = _chex_tok.apply_chat_template(conv, add_generation_prompt=True, return_tensors="pt").to(device)
+        generate_kwargs = dict(input_ids=inp, max_new_tokens=512, do_sample=False, num_beams=1, streamer=streamer)
+        Thread(target=_chex_model.generate, kwargs=generate_kwargs).start()
+        partial += f"**Step {idx+1}: {anat}...**\n\n"
         for tok in streamer:
+            if idx > 0: findings += tok
             partial += tok
             yield clean_text(partial)
         partial += "\n\n"
         findings += " "
     findings = findings.strip()
     partial += "## Generating Impression\n\n"
     prompt = f"Write the Impression section for the following Findings: {findings}"
+    conv = [{"from": "system", "value": "You are a helpful assistant."}, {"from": "human", "value": _chex_tok.from_list_format([{"text": prompt}])}]
+    inp = _chex_tok.apply_chat_template(conv, add_generation_prompt=True, return_tensors="pt").to(device)
+    Thread(target=_chex_model.generate, kwargs=dict(input_ids=inp, do_sample=False, num_beams=1, max_new_tokens=512, streamer=streamer)).start()
     for tok in streamer:
         partial += tok
         yield clean_text(partial)
 @torch.no_grad()
 def response_phrase_grounding(pil_image, prompt_text):
+    if pil_image is None: return "Please upload an image.", None
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
         pil_image.save(tfile.name)
         img_path = tfile.name
+    device = get_model_device(_chex_model)
+    query = _chex_tok.from_list_format([{"image": img_path}, {"text": prompt_text}])
+    conv = [{"from": "system", "value": "You are a helpful assistant."}, {"from": "human", "value": query}]
+    inp = _chex_tok.apply_chat_template(conv, add_generation_prompt=True, return_tensors="pt").to(device)
+    out = _chex_model.generate(input_ids=inp, do_sample=False, num_beams=1, max_new_tokens=512)
+    resp = clean_text(_chex_tok.decode(out[0][inp.shape[1] :]))
     w, h = pil_image.size
     cx, cy, sz = w // 2, h // 2, min(w, h) // 4
     draw = ImageDraw.Draw(pil_image)
     draw.rectangle([(cx - sz, cy - sz), (cx + sz, cy + sz)], outline="red", width=3)
     return resp, pil_image
 # =============================================================================
+# 4. Gradio UI
 # =============================================================================
 def create_ui():
     """Create the Gradio interface."""
+    med_agent = MedicalVLMAgent(_qwen_model, _qwen_processor) if QWEN_AVAILABLE else None
+    with gr.Blocks(theme=gr.themes.Soft(), title="Medical AI Assistant") as demo:
         gr.Markdown("# Combined Medical Q&A · SAM-2 Automatic Masking · CheXagent")
         with gr.Row():
             gr.Markdown(f"""
+            ### System Status
+            - **Qwen VLM**: {QWEN_STATUS}
+            - **SAM-2**: {SAM2_STATUS}
+            - **CheXagent**: {CHEXAGENT_STATUS}
             """)
         with gr.Tab("Medical Q&A"):
+            if QWEN_AVAILABLE:
                 q_in = gr.Textbox(label="Question / description", lines=3)
                 q_img = gr.Image(label="Optional image", type="pil")
+                q_btn = gr.Button("Submit", variant="primary")
+                q_out = gr.Textbox(label="Answer", lines=5)
+                q_btn.click(fn=med_agent.run, inputs=[q_in, q_img], outputs=q_out, api_name="medical_qa")
             else:
+                gr.Markdown(f"### ❌ Medical Q&A is not available.\n**Reason:** {QWEN_STATUS}")
+        with gr.Tab("Automatic Masking (Segmentation)"):
             seg_img = gr.Image(label="Upload medical image", type="pil")
+            seg_btn = gr.Button("Run Segmentation", variant="primary")
+            seg_out = gr.Image(label="Segmentation Result", type="pil")
             seg_status = gr.Textbox(label="Status", interactive=False)
+            if SAM2_AVAILABLE:
+                seg_btn.click(fn=tumor_segmentation_interface, inputs=seg_img, outputs=[seg_out, seg_status], api_name="sam2_segmentation")
             else:
+                gr.Markdown(f"### ❌ SAM-2 is not available.\n**Reason:** {SAM2_STATUS}\n\n*Using a simple fallback segmentation method instead.*")
+                seg_btn.click(fn=simple_segmentation_fallback, inputs=seg_img, outputs=[seg_out, seg_status], api_name="fallback_segmentation")
+        with gr.Tab("CheXagent – Structured Report"):
             if CHEXAGENT_AVAILABLE:
+                gr.Markdown("Upload one or two chest X-ray images. The report will generate and stream live.")
+                with gr.Row():
+                    cx1 = gr.Image(label="Image 1 (Frontal)", image_mode="L", type="pil")
+                    cx2 = gr.Image(label="Image 2 (Lateral, optional)", image_mode="L", type="pil")
+                cx_report = gr.Markdown(label="Generated Report")
+                gr.Interface(fn=response_report_generation, inputs=[cx1, cx2], outputs=cx_report, live=True, allow_flagging="never").render()
             else:
+                gr.Markdown(f"### ❌ CheXagent is not available.\n**Reason:** {CHEXAGENT_STATUS}")
+        with gr.Tab("CheXagent – Visual Grounding"):
             if CHEXAGENT_AVAILABLE:
+                gr.Markdown("Upload an image and specify a finding to locate (placeholder functionality).")
                 vg_img = gr.Image(image_mode="L", type="pil")
+                vg_prompt = gr.Textbox(value="Locate the cardiomegaly")
+                vg_text = gr.Markdown(label="Finding Description")
+                vg_out_img = gr.Image(label="Image with Grounding")
+                gr.Interface(fn=response_phrase_grounding, inputs=[vg_img, vg_prompt], outputs=[vg_text, vg_out_img], allow_flagging="never").render()
             else:
+                gr.Markdown(f"### ❌ CheXagent is not available.\n**Reason:** {CHEXAGENT_STATUS}")
     return demo
+# =============================================================================
+# 5. Main Execution Block
+# =============================================================================
+def initialize_all_models():
+    """Run all model initializers and print status."""
+    print("="*50)
+    print("INITIALIZING ALL MODELS...")
+    print("="*50)
+    # Order: Smallest/fastest to largest/slowest
+    initialize_qwen()
+    initialize_chexagent()
+    initialize_sam2() # SAM-2 is complex, run last
+    check_fallback_dependencies()
+    print("\n" + "="*50)
+    print("INITIALIZATION COMPLETE. STATUS SUMMARY:")
+    print("="*50)
+    print(f"- Qwen VLM: {QWEN_STATUS}")
+    print(f"- SAM-2: {SAM2_STATUS}")
+    print(f"- CheXagent: {CHEXAGENT_STATUS}")
+    print(f"- Fallback Segmentation Ready: {FALLBACK_SEG_AVAILABLE}")
+    print("="*50 + "\n")
 if __name__ == "__main__":
+    initialize_all_models()
     demo = create_ui()
     demo.launch(server_name="0.0.0.0", server_port=7860, share=True)