Spaces:

pascal-maker
/

medical-vlm-sam2

Running

App Files Files Community

pascal-maker commited on Jun 7

Commit

2fb54d3

verified ·

1 Parent(s): 80f05f9

Update qpp.py

Browse files

Files changed (1) hide show

app.py +400 -323

app.py CHANGED Viewed

@@ -2,43 +2,14 @@
 # -*- coding: utf-8 -*-
 """
-Combined Medical-VLM, SAM-2 automatic masking, and CheXagent demo.
-This script integrates multiple AI models for medical imaging tasks. It is designed
-to be robust and provide helpful feedback if components fail to load.
-★★ Improvements in this version ★★
-------------------------------------
-1.  **Detailed Status Reporting**: Both the console and the UI now show *why* a
-    model failed to load (e.g., network error, missing dependency, out of memory).
-2.  **Proactive Dependency Checks**: The script checks for required tools like `git`
-    before attempting to use them.
-3.  **Robust Installation**: SAM-2 installation is more resilient, with clearer
-    error messages for common failure points.
-4.  **Centralized Initialization**: A single master function handles the setup of all
-    models for cleaner, more predictable behavior.
-5.  **Clear User Guidance**: Added detailed manual installation steps below for users
-    who encounter issues with the automatic setup.
-★★ Manual Installation Guide ★★
---------------------------------
-If the automatic setup fails, please try the following in your terminal:
-1.  **Install Git**: Make sure `git` is installed on your system.
-2.  **Clone SAM-2 Repository**:
-    git clone https://github.com/facebookresearch/segment-anything-2.git
-3.  **Install SAM-2**:
-    cd segment-anything-2
-    pip install -e .
-    cd ..
-4.  **Install Other Dependencies**:
-    pip install transformers torch numpy Pillow gradio opencv-python scikit-image accelerate
-5.  **Run the Script**:
-    python your_script_name.py
 """
 # ---------------------------------------------------------------------
@@ -50,7 +21,6 @@ import uuid
 import tempfile
 import subprocess
 import warnings
-import shutil
 from threading import Thread
 # Environment setup
@@ -66,78 +36,74 @@ from PIL import Image, ImageDraw
 import gradio as gr
 # =============================================================================
-# Global Status Variables
-# These will be updated during initialization and displayed in the UI.
 # =============================================================================
-QWEN_AVAILABLE = False
-QWEN_STATUS = "Not initialized."
-SAM2_AVAILABLE = False
-SAM2_STATUS = "Not initialized."
-CHEXAGENT_AVAILABLE = False
-CHEXAGENT_STATUS = "Not initialized."
-FALLBACK_SEG_AVAILABLE = False
-# =============================================================================
-# 1. Dependency Checker & Installer
-# =============================================================================
-def check_system_dependencies():
-    """Checks for system-level dependencies like git."""
-    if not shutil.which("git"):
-        return False, "git is not installed or not in your PATH. Please install it to enable automatic SAM-2 setup."
-    return True, "System dependencies are OK."
 def check_and_install_sam2():
     """Check if SAM-2 is available and attempt installation if needed."""
     try:
         from sam2.build_sam import build_sam2
-        return True, "SAM-2 is already installed."
     except ImportError:
-        print("SAM-2 not found. Attempting to clone and install...")
         try:
-            repo_dir = "segment-anything-2"
-            if not os.path.exists(repo_dir):
-                subprocess.run(
-                    ["git", "clone", "https://github.com/facebookresearch/segment-anything-2.git"],
-                    check=True, capture_output=True, text=True
-                )
             original_dir = os.getcwd()
-            os.chdir(repo_dir)
-            subprocess.run([sys.executable, "-m", "pip", "install", "-e", "."], check=True, capture_output=True, text=True)
             os.chdir(original_dir)
-            sys.path.insert(0, os.path.abspath(repo_dir))
             from sam2.build_sam import build_sam2
-            return True, "SAM-2 installed successfully."
-        except subprocess.CalledProcessError as e:
-            error_message = f"Failed to run command.\nStderr: {e.stderr}\nStdout: {e.stdout}"
-            return False, f"SAM-2 installation failed. A command-line process failed. Please check console for details.\n{error_message}"
         except Exception as e:
-            return False, f"SAM-2 installation failed: {e}. Please try manual installation."
-# Conditionally import SAM-2 modules after potential installation
-sam2_build_sam = None
-sam2_AutomaticMaskGenerator = None
-def import_sam2_modules():
-    global sam2_build_sam, sam2_AutomaticMaskGenerator
     try:
         from sam2.build_sam import build_sam2
         from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
-        sam2_build_sam = build_sam2
-        sam2_AutomaticMaskGenerator = SAM2AutomaticMaskGenerator
-        return True
-    except ImportError:
-        return False
 # =============================================================================
-# 2. Model Initializers
 # =============================================================================
-# --- Device Helper ---
 def get_device():
     if torch.cuda.is_available():
         return torch.device("cuda")
@@ -145,249 +111,344 @@ def get_device():
         return torch.device("mps")
     return torch.device("cpu")
-# --- Qwen-VLM ---
-_qwen_model, _qwen_processor, _qwen_device = None, None, None
-def initialize_qwen():
-    global _qwen_model, _qwen_processor, _qwen_device, QWEN_AVAILABLE, QWEN_STATUS
-    try:
-        from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
-        from qwen_vl_utils import process_vision_info
         _qwen_device = "mps" if torch.backends.mps.is_available() else "cpu"
-        print(f"[Qwen] Loading model on {_qwen_device}...")
         _qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-            "Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True, attn_implementation="eager",
-            torch_dtype=torch.float32, low_cpu_mem_usage=True
         ).to(_qwen_device)
         _qwen_processor = AutoProcessor.from_pretrained(
-            "Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True
         )
-        QWEN_AVAILABLE = True
-        QWEN_STATUS = f"✅ Available (loaded on {_qwen_device})"
-        return _qwen_model, _qwen_processor
-    except Exception as e:
-        QWEN_STATUS = f"❌ Failed to load Qwen model. Reason: {e}"
-        print(f"[ERROR] {QWEN_STATUS}")
-        return None, None
-# --- SAM-2 ---
-_sam2_model, _mask_generator = None, None
-def initialize_sam2():
-    global _sam2_model, _mask_generator, SAM2_AVAILABLE, SAM2_STATUS
-    # Step 1: Check system dependencies
-    git_ok, git_msg = check_system_dependencies()
-    if not git_ok:
-        SAM2_STATUS = f"❌ {git_msg}"
-        return None, None
-    # Step 2: Install SAM-2 if needed
-    install_ok, install_msg = check_and_install_sam2()
-    if not install_ok:
-        SAM2_STATUS = f"❌ {install_msg}"
-        return None, None
-    print(f"[SAM-2] Install check: {install_msg}")
-    # Step 3: Import modules now that it's installed
-    if not import_sam2_modules():
-        SAM2_STATUS = "❌ Failed to import SAM-2 modules after installation."
-        return None, None
-    # Step 4: Download checkpoint and initialize model
-    try:
-        checkpoint_dir = "checkpoints"
-        checkpoint_file = "sam2.1_hiera_large.pt"
-        checkpoint_path = os.path.join(checkpoint_dir, checkpoint_file)
-        if not os.path.exists(checkpoint_path):
-            os.makedirs(checkpoint_dir, exist_ok=True)
-            print("[SAM-2] Downloading checkpoint (sam2.1_hiera_large.pt)...")
-            import urllib.request
-            url = "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt"
-            urllib.request.urlretrieve(url, checkpoint_path)
-            print("[SAM-2] Checkpoint downloaded successfully.")
-        # ★★★ FIX IS HERE ★★★
-        # The cloned repository is named "segment-anything-2", not "sam2".
-        repo_dir = "sam2"
-        config_path = os.path.join(repo_dir, "sam2/configs/sam2.1/sam2.1_hiera_l.yaml")
-        if not os.path.exists(config_path):
-            SAM2_STATUS = f"❌ Config file not found at {config_path}. Check the repository structure."
-            return None, None
-        device = get_device()
-        print(f"[SAM-2] Building model on {device}...")
-        # NOTE: The build_sam function internally uses Hydra, which is why the error was complex.
-        # Passing the correct, full path to the config file is the right solution.
-        sam2_model = sam2_build_sam(config_path, checkpoint_path, device=device, apply_postprocessing=False)
-        mask_gen = sam2_AutomaticMaskGenerator(model=sam2_model, points_per_side=32, pred_iou_thresh=0.86, stability_score_thresh=0.92, crop_n_layers=0)
-        _sam2_model, _mask_generator = sam2_model, mask_gen
-        SAM2_AVAILABLE = True
-        SAM2_STATUS = f"✅ Available (loaded on {device})"
-        return sam2_model, mask_gen
-    except Exception as e:
-        SAM2_STATUS = f"❌ Failed to initialize SAM-2 model. Reason: {e}"
-        print(f"[ERROR] {SAM2_STATUS}")
-        return None, None
-# --- CheXagent ---
-_chex_model, _chex_tok = None, None
-def initialize_chexagent():
-    global _chex_model, _chex_tok, CHEXAGENT_AVAILABLE, CHEXAGENT_STATUS
-    try:
-        from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
-        print("[CheXagent] Loading model (this may take time and memory)...")
-        chex_name = "StanfordAIMI/CheXagent-2-3b"
-        _chex_tok = AutoTokenizer.from_pretrained(chex_name, trust_remote_code=True)
-        _chex_model = AutoModelForCausalLM.from_pretrained(chex_name, device_map="auto", trust_remote_code=True)
-        _chex_model = _chex_model.half() if torch.cuda.is_available() else _chex_model.float()
-        _chex_model.eval()
-        CHEXAGENT_AVAILABLE = True
-        device = "GPU" if torch.cuda.is_available() else get_device()
-        CHEXAGENT_STATUS = f"✅ Available (loaded on {device})"
-        return _chex_model, _chex_tok
-    except Exception as e:
-        CHEXAGENT_STATUS = f"❌ Failed to load CheXagent. Reason: {e}. Check internet connection, disk space, and memory."
-        print(f"[ERROR] {CHEXAGENT_STATUS}")
-        return None, None
-# --- Fallback Segmentation ---
-def check_fallback_dependencies():
-    global FALLBACK_SEG_AVAILABLE
-    try:
-        import cv2
-        from skimage import segmentation, color
-        FALLBACK_SEG_AVAILABLE = True
-    except ImportError:
-        FALLBACK_SEG_AVAILABLE = False
-# =============================================================================
-# 3. Model Logic and Agents (Code unchanged from here)
-# =============================================================================
-# --- Qwen Agent ---
 class MedicalVLMAgent:
-    def __init__(self, model, processor):
         self.model = model
         self.processor = processor
-        self.device = get_device()
         self.system_prompt = (
             "You are a medical information assistant with vision capabilities.\n"
             "Disclaimer: I am not a licensed medical professional. "
             "The information provided is for reference only and should not be taken as medical advice."
         )
     def run(self, user_text: str, image: Image.Image | None = None) -> str:
-        from qwen_vl_utils import process_vision_info
-        messages = [{"role": "system", "content": [{"type": "text", "text": self.system_prompt}]}]
         user_content = []
         if image is not None:
-            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
-                image.save(tfile.name)
-                user_content.append({"type": "image", "image": tfile.name})
         user_content.append({"type": "text", "text": user_text or "Please describe the image."})
         messages.append({"role": "user", "content": user_content})
-        prompt_text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        img_inputs, _ = process_vision_info(messages)
-        inputs = self.processor(text=[prompt_text], images=img_inputs, padding=True, return_tensors="pt").to(self.device)
         with torch.no_grad():
             out = self.model.generate(**inputs, max_new_tokens=128)
-        trimmed = out[0][inputs.input_ids.shape[1]:]
         return self.processor.decode(trimmed, skip_special_tokens=True).strip()
-# --- SAM-2 Interface ---
 def automatic_mask_overlay(image_np: np.ndarray) -> np.ndarray:
-    if not _mask_generator: raise RuntimeError("SAM-2 mask generator not initialized")
     anns = _mask_generator.generate(image_np)
-    if not anns: return image_np
     overlay = image_np.copy()
-    if overlay.ndim == 2: overlay = np.stack([overlay] * 3, axis=2)
     for ann in sorted(anns, key=lambda x: x["area"], reverse=True):
         m = ann["segmentation"]
         color = np.random.randint(0, 255, 3, dtype=np.uint8)
         overlay[m] = (overlay[m] * 0.5 + color * 0.5).astype(np.uint8)
     return overlay
 def tumor_segmentation_interface(image: Image.Image | None):
-    if image is None: return None, "Please upload an image."
     try:
         img_np = np.array(image.convert("RGB"))
         out_np = automatic_mask_overlay(img_np)
         n_masks = len(_mask_generator.generate(img_np))
         return Image.fromarray(out_np), f"{n_masks} masks found."
     except Exception as e:
-        return None, f"SAM-2 processing error: {e}"
-# --- Fallback Segmentation ---
 def simple_segmentation_fallback(image: Image.Image | None):
-    if image is None: return None, "Please upload an image."
-    if not FALLBACK_SEG_AVAILABLE: return image, "Fallback libraries (OpenCV, Scikit-image) not installed."
     try:
         import cv2
         img_np = np.array(image.convert("RGB"))
         gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
         _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
         kernel = np.ones((3,3), np.uint8)
         opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
         dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
         _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
         overlay = img_np.copy()
-        overlay[sure_fg > 0] = [255, 0, 0]
         result = cv2.addWeighted(img_np, 0.7, overlay, 0.3, 0)
         return Image.fromarray(result), "Simple segmentation applied (SAM-2 not available)"
     except Exception as e:
-        return image, f"Fallback segmentation error: {e}"
-# --- CheXagent Interfaces ---
 def get_model_device(model):
-    return next(model.parameters()).device if model and next(model.parameters(), None) is not None else torch.device("cpu")
-def clean_text(text): return text.replace("</s>", "")
 @torch.no_grad()
 def response_report_generation(pil_image_1, pil_image_2):
-    from transformers import TextIteratorStreamer
-    streamer = TextIteratorStreamer(_chex_tok, skip_prompt=True, skip_special_tokens=True)
     paths = []
     for im in [pil_image_1, pil_image_2]:
-        if im:
-            with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
-                im.save(tfile.name)
-                paths.append(tfile.name)
     if not paths:
         yield "Please upload at least one image."
         return
-    device = get_model_device(_chex_model)
-    anatomies = ["View", "Airway", "Breathing", "Cardiac", "Diaphragm", "Everything else"]
-    prompts = ["Determine the view of this CXR", *[f'Provide a detailed description of "{a}" in the chest X-ray' for a in anatomies[1:]]]
     findings = ""
     partial = "## Generating Findings (step-by-step):\n\n"
     for idx, (anat, prompt) in enumerate(zip(anatomies, prompts)):
-        query = _chex_tok.from_list_format([*[{"image": p} for p in paths], {"text": prompt}])
-        conv = [{"from": "system", "value": "You are a helpful assistant."}, {"from": "human", "value": query}]
-        inp = _chex_tok.apply_chat_template(conv, add_generation_prompt=True, return_tensors="pt").to(device)
-        generate_kwargs = dict(input_ids=inp, max_new_tokens=512, do_sample=False, num_beams=1, streamer=streamer)
-        Thread(target=_chex_model.generate, kwargs=generate_kwargs).start()
-        partial += f"**Step {idx+1}: {anat}...**\n\n"
         for tok in streamer:
-            if idx > 0: findings += tok
             partial += tok
             yield clean_text(partial)
         partial += "\n\n"
         findings += " "
     findings = findings.strip()
     partial += "## Generating Impression\n\n"
     prompt = f"Write the Impression section for the following Findings: {findings}"
-    conv = [{"from": "system", "value": "You are a helpful assistant."}, {"from": "human", "value": _chex_tok.from_list_format([{"text": prompt}])}]
-    inp = _chex_tok.apply_chat_template(conv, add_generation_prompt=True, return_tensors="pt").to(device)
-    Thread(target=_chex_model.generate, kwargs=dict(input_ids=inp, do_sample=False, num_beams=1, max_new_tokens=512, streamer=streamer)).start()
     for tok in streamer:
         partial += tok
         yield clean_text(partial)
@@ -395,113 +456,129 @@ def response_report_generation(pil_image_1, pil_image_2):
 @torch.no_grad()
 def response_phrase_grounding(pil_image, prompt_text):
-    if pil_image is None: return "Please upload an image.", None
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
         pil_image.save(tfile.name)
         img_path = tfile.name
-    device = get_model_device(_chex_model)
-    query = _chex_tok.from_list_format([{"image": img_path}, {"text": prompt_text}])
-    conv = [{"from": "system", "value": "You are a helpful assistant."}, {"from": "human", "value": query}]
-    inp = _chex_tok.apply_chat_template(conv, add_generation_prompt=True, return_tensors="pt").to(device)
-    out = _chex_model.generate(input_ids=inp, do_sample=False, num_beams=1, max_new_tokens=512)
-    resp = clean_text(_chex_tok.decode(out[0][inp.shape[1] :]))
     w, h = pil_image.size
     cx, cy, sz = w // 2, h // 2, min(w, h) // 4
     draw = ImageDraw.Draw(pil_image)
     draw.rectangle([(cx - sz, cy - sz), (cx + sz, cy + sz)], outline="red", width=3)
-    return resp, pil_image
 # =============================================================================
-# 4. Gradio UI
 # =============================================================================
 def create_ui():
     """Create the Gradio interface."""
-    med_agent = MedicalVLMAgent(_qwen_model, _qwen_processor) if QWEN_AVAILABLE else None
-    with gr.Blocks(theme=gr.themes.Soft(), title="Medical AI Assistant") as demo:
         gr.Markdown("# Combined Medical Q&A · SAM-2 Automatic Masking · CheXagent")
         with gr.Row():
             gr.Markdown(f"""
-            ### System Status
-            - **Qwen VLM**: {QWEN_STATUS}
-            - **SAM-2**: {SAM2_STATUS}
-            - **CheXagent**: {CHEXAGENT_STATUS}
             """)
         with gr.Tab("Medical Q&A"):
-            if QWEN_AVAILABLE:
                 q_in = gr.Textbox(label="Question / description", lines=3)
                 q_img = gr.Image(label="Optional image", type="pil")
-                q_btn = gr.Button("Submit", variant="primary")
-                q_out = gr.Textbox(label="Answer", lines=5)
-                q_btn.click(fn=med_agent.run, inputs=[q_in, q_img], outputs=q_out, api_name="medical_qa")
             else:
-                gr.Markdown(f"### ❌ Medical Q&A is not available.\n**Reason:** {QWEN_STATUS}")
-        with gr.Tab("Automatic Masking (Segmentation)"):
             seg_img = gr.Image(label="Upload medical image", type="pil")
-            seg_btn = gr.Button("Run Segmentation", variant="primary")
-            seg_out = gr.Image(label="Segmentation Result", type="pil")
             seg_status = gr.Textbox(label="Status", interactive=False)
-            if SAM2_AVAILABLE:
-                seg_btn.click(fn=tumor_segmentation_interface, inputs=seg_img, outputs=[seg_out, seg_status], api_name="sam2_segmentation")
             else:
-                gr.Markdown(f"### ❌ SAM-2 is not available.\n**Reason:** {SAM2_STATUS}\n\n*Using a simple fallback segmentation method instead.*")
-                seg_btn.click(fn=simple_segmentation_fallback, inputs=seg_img, outputs=[seg_out, seg_status], api_name="fallback_segmentation")
-        with gr.Tab("CheXagent – Structured Report"):
             if CHEXAGENT_AVAILABLE:
-                gr.Markdown("Upload one or two chest X-ray images. The report will generate and stream live.")
-                with gr.Row():
-                    cx1 = gr.Image(label="Image 1 (Frontal)", image_mode="L", type="pil")
-                    cx2 = gr.Image(label="Image 2 (Lateral, optional)", image_mode="L", type="pil")
-                cx_report = gr.Markdown(label="Generated Report")
-                gr.Interface(fn=response_report_generation, inputs=[cx1, cx2], outputs=cx_report, live=True, allow_flagging="never").render()
             else:
-                gr.Markdown(f"### ❌ CheXagent is not available.\n**Reason:** {CHEXAGENT_STATUS}")
-        with gr.Tab("CheXagent – Visual Grounding"):
             if CHEXAGENT_AVAILABLE:
-                gr.Markdown("Upload an image and specify a finding to locate (placeholder functionality).")
                 vg_img = gr.Image(image_mode="L", type="pil")
-                vg_prompt = gr.Textbox(value="Locate the cardiomegaly")
-                vg_text = gr.Markdown(label="Finding Description")
-                vg_out_img = gr.Image(label="Image with Grounding")
-                gr.Interface(fn=response_phrase_grounding, inputs=[vg_img, vg_prompt], outputs=[vg_text, vg_out_img], allow_flagging="never").render()
             else:
-                gr.Markdown(f"### ❌ CheXagent is not available.\n**Reason:** {CHEXAGENT_STATUS}")
     return demo
-# =============================================================================
-# 5. Main Execution Block
-# =============================================================================
-def initialize_all_models():
-    """Run all model initializers and print status."""
-    print("="*50)
-    print("INITIALIZING ALL MODELS...")
-    print("="*50)
-    # Order: Smallest/fastest to largest/slowest
-    initialize_qwen()
-    initialize_chexagent()
-    initialize_sam2() # SAM-2 is complex, run last
-    check_fallback_dependencies()
-    print("\n" + "="*50)
-    print("INITIALIZATION COMPLETE. STATUS SUMMARY:")
-    print("="*50)
-    print(f"- Qwen VLM: {QWEN_STATUS}")
-    print(f"- SAM-2: {SAM2_STATUS}")
-    print(f"- CheXagent: {CHEXAGENT_STATUS}")
-    print(f"- Fallback Segmentation Ready: {FALLBACK_SEG_AVAILABLE}")
-    print("="*50 + "\n")
 if __name__ == "__main__":
-    initialize_all_models()
     demo = create_ui()
     demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

 # -*- coding: utf-8 -*-
 """
+Combined Medical-VLM, **SAM-2 automatic masking**, and CheXagent demo.
+⭑ Changes ⭑
+-----------
+1. Fixed SAM-2 installation and import issues
+2. Added proper error handling for missing dependencies
+3. Made SAM-2 functionality optional with graceful fallback
+4. Added installation instructions and requirements check
 """
 # ---------------------------------------------------------------------
 import tempfile
 import subprocess
 import warnings
 from threading import Thread
 # Environment setup
 import gradio as gr
 # =============================================================================
+# Dependency checker and installer
 # =============================================================================
 def check_and_install_sam2():
     """Check if SAM-2 is available and attempt installation if needed."""
     try:
+        # Try importing SAM-2
         from sam2.build_sam import build_sam2
+        from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+        return True, "SAM-2 already available"
     except ImportError:
+        print("SAM-2 not found. Attempting to install...")
         try:
+            # Clone SAM-2 repository
+            if not os.path.exists("segment-anything-2"):
+                subprocess.run([
+                    "git", "clone",
+                    "https://github.com/facebookresearch/segment-anything-2.git"
+                ], check=True)
+            # Install SAM-2
             original_dir = os.getcwd()
+            os.chdir("segment-anything-2")
+            subprocess.run([sys.executable, "-m", "pip", "install", "-e", "."], check=True)
             os.chdir(original_dir)
+            # Add to Python path
+            sys.path.insert(0, os.path.abspath("segment-anything-2"))
+            # Try importing again
             from sam2.build_sam import build_sam2
+            from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+            return True, "SAM-2 installed successfully"
         except Exception as e:
+            print(f"Failed to install SAM-2: {e}")
+            return False, f"SAM-2 installation failed: {e}"
+# Check SAM-2 availability
+SAM2_AVAILABLE, SAM2_STATUS = check_and_install_sam2()
+print(f"SAM-2 Status: {SAM2_STATUS}")
+# =============================================================================
+# SAM-2 imports (conditional)
+# =============================================================================
+if SAM2_AVAILABLE:
     try:
         from sam2.build_sam import build_sam2
         from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+        from sam2.modeling.sam2_base import SAM2Base
+        from sam2.utils.misc import get_device_index
+    except ImportError as e:
+        print(f"SAM-2 import error: {e}")
+        SAM2_AVAILABLE = False
 # =============================================================================
+# Qwen-VLM imports & helper
 # =============================================================================
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+# =============================================================================
+# CheXagent imports
+# =============================================================================
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+# ---------------------------------------------------------------------
+# Devices
+# ---------------------------------------------------------------------
 def get_device():
     if torch.cuda.is_available():
         return torch.device("cuda")
         return torch.device("mps")
     return torch.device("cpu")
+# =============================================================================
+# Qwen-VLM model & agent
+# =============================================================================
+_qwen_model = None
+_qwen_processor = None
+_qwen_device = None
+def load_qwen_model_and_processor(hf_token=None):
+    global _qwen_model, _qwen_processor, _qwen_device
+    if _qwen_model is None:
         _qwen_device = "mps" if torch.backends.mps.is_available() else "cpu"
+        print(f"[Qwen] loading model on {_qwen_device}")
+        auth_kwargs = {"use_auth_token": hf_token} if hf_token else {}
         _qwen_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            "Qwen/Qwen2.5-VL-3B-Instruct",
+            trust_remote_code=True,
+            attn_implementation="eager",
+            torch_dtype=torch.float32,
+            low_cpu_mem_usage=True,
+            device_map=None,
+            **auth_kwargs,
         ).to(_qwen_device)
         _qwen_processor = AutoProcessor.from_pretrained(
+            "Qwen/Qwen2.5-VL-3B-Instruct",
+            trust_remote_code=True,
+            **auth_kwargs,
         )
+    return _qwen_model, _qwen_processor, _qwen_device
 class MedicalVLMAgent:
+    """Light wrapper around Qwen-VLM with an optional image."""
+    def __init__(self, model, processor, device):
         self.model = model
         self.processor = processor
+        self.device = device
         self.system_prompt = (
             "You are a medical information assistant with vision capabilities.\n"
             "Disclaimer: I am not a licensed medical professional. "
             "The information provided is for reference only and should not be taken as medical advice."
         )
     def run(self, user_text: str, image: Image.Image | None = None) -> str:
+        messages = [
+            {"role": "system", "content": [{"type": "text", "text": self.system_prompt}]}
+        ]
         user_content = []
         if image is not None:
+            tmp = f"/tmp/{uuid.uuid4()}.png"
+            image.save(tmp)
+            user_content.append({"type": "image", "image": tmp})
         user_content.append({"type": "text", "text": user_text or "Please describe the image."})
         messages.append({"role": "user", "content": user_content})
+        prompt_text = self.processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        img_inputs, vid_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[prompt_text],
+            images=img_inputs,
+            videos=vid_inputs,
+            padding=True,
+            return_tensors="pt",
+        ).to(self.device)
         with torch.no_grad():
             out = self.model.generate(**inputs, max_new_tokens=128)
+        trimmed = out[0][inputs.input_ids.shape[1] :]
         return self.processor.decode(trimmed, skip_special_tokens=True).strip()
+# =============================================================================
+# SAM-2 model + AutomaticMaskGenerator (conditional)
+# =============================================================================
+def download_sam2_checkpoint():
+    """Download SAM-2 checkpoint if not present."""
+    checkpoint_dir = "checkpoints"
+    checkpoint_file = "sam2.1_hiera_large.pt"
+    checkpoint_path = os.path.join(checkpoint_dir, checkpoint_file)
+    if not os.path.exists(checkpoint_path):
+        os.makedirs(checkpoint_dir, exist_ok=True)
+        print("Downloading SAM-2 checkpoint...")
+        try:
+            import urllib.request
+            url = "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_large.pt"
+            urllib.request.urlretrieve(url, checkpoint_path)
+            print("SAM-2 checkpoint downloaded successfully")
+        except Exception as e:
+            print(f"Failed to download SAM-2 checkpoint: {e}")
+            return None
+    return checkpoint_path
+def initialize_sam2():
+    """Initialize SAM-2 model and mask generator."""
+    if not SAM2_AVAILABLE:
+        return None, None
+    try:
+        # Download checkpoint if needed
+        checkpoint_path = download_sam2_checkpoint()
+        if checkpoint_path is None:
+            return None, None
+        # Config path (you may need to adjust this)
+        config_path = "segment-anything-2/sam2/configs/sam2.1/sam2.1_hiera_l.yaml"
+        if not os.path.exists(config_path):
+            config_path = "configs/sam2.1/sam2.1_hiera_l.yaml"
+        device = get_device()
+        print(f"[SAM-2] building model on {device}")
+        sam2_model = build_sam2(
+            config_path,
+            checkpoint_path,
+            device=device,
+            apply_postprocessing=False,
+        )
+        mask_gen = SAM2AutomaticMaskGenerator(
+            model=sam2_model,
+            points_per_side=32,
+            pred_iou_thresh=0.86,
+            stability_score_thresh=0.92,
+            crop_n_layers=0,
+        )
+        return sam2_model, mask_gen
+    except Exception as e:
+        print(f"[SAM-2] Failed to initialize: {e}")
+        return None, None
+# Initialize SAM-2 (conditional)
+_sam2_model, _mask_generator = None, None
+if SAM2_AVAILABLE:
+    _sam2_model, _mask_generator = initialize_sam2()
+    if _sam2_model is not None:
+        print("[SAM-2] Successfully initialized!")
+    else:
+        print("[SAM-2] Initialization failed")
 def automatic_mask_overlay(image_np: np.ndarray) -> np.ndarray:
+    """Generate masks and alpha-blend them on top of the original image."""
+    if _mask_generator is None:
+        raise RuntimeError("SAM-2 mask generator not initialized")
     anns = _mask_generator.generate(image_np)
+    if not anns:
+        return image_np
     overlay = image_np.copy()
+    if overlay.ndim == 2:  # grayscale → RGB
+        overlay = np.stack([overlay] * 3, axis=2)
     for ann in sorted(anns, key=lambda x: x["area"], reverse=True):
         m = ann["segmentation"]
         color = np.random.randint(0, 255, 3, dtype=np.uint8)
         overlay[m] = (overlay[m] * 0.5 + color * 0.5).astype(np.uint8)
     return overlay
 def tumor_segmentation_interface(image: Image.Image | None):
+    """Tumor segmentation interface with proper error handling."""
+    if image is None:
+        return None, "Please upload an image."
+    if not SAM2_AVAILABLE:
+        return None, "SAM-2 is not available. Please check installation."
+    if _mask_generator is None:
+        return None, "SAM-2 not properly initialized. Check the console for errors."
     try:
         img_np = np.array(image.convert("RGB"))
         out_np = automatic_mask_overlay(img_np)
         n_masks = len(_mask_generator.generate(img_np))
         return Image.fromarray(out_np), f"{n_masks} masks found."
     except Exception as e:
+        return None, f"SAM-2 error: {e}"
+# =============================================================================
+# Simple fallback segmentation (when SAM-2 is not available)
+# =============================================================================
 def simple_segmentation_fallback(image: Image.Image | None):
+    """Simple fallback segmentation using basic image processing."""
+    if image is None:
+        return None, "Please upload an image."
     try:
         import cv2
+        from skimage import segmentation, color
+        # Convert to numpy array
         img_np = np.array(image.convert("RGB"))
+        # Simple watershed segmentation
         gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
         _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        # Remove noise
         kernel = np.ones((3,3), np.uint8)
         opening = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
+        # Sure background area
+        sure_bg = cv2.dilate(opening, kernel, iterations=3)
+        # Finding sure foreground area
         dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
         _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
+        # Create overlay
         overlay = img_np.copy()
+        overlay[sure_fg > 0] = [255, 0, 0]  # Red overlay
+        # Alpha blend
         result = cv2.addWeighted(img_np, 0.7, overlay, 0.3, 0)
         return Image.fromarray(result), "Simple segmentation applied (SAM-2 not available)"
     except Exception as e:
+        return None, f"Fallback segmentation error: {e}"
+# =============================================================================
+# CheXagent set-up
+# =============================================================================
+try:
+    chex_name = "StanfordAIMI/CheXagent-2-3b"
+    chex_tok = AutoTokenizer.from_pretrained(chex_name, trust_remote_code=True)
+    chex_model = AutoModelForCausalLM.from_pretrained(
+        chex_name, device_map="auto", trust_remote_code=True
+    )
+    chex_model = chex_model.half() if torch.cuda.is_available() else chex_model.float()
+    chex_model.eval()
+    CHEXAGENT_AVAILABLE = True
+except Exception as e:
+    print(f"CheXagent not available: {e}")
+    CHEXAGENT_AVAILABLE = False
+    chex_tok, chex_model = None, None
 def get_model_device(model):
+    if model is None:
+        return torch.device("cpu")
+    for p in model.parameters():
+        return p.device
+    return torch.device("cpu")
+def clean_text(text):
+    return text.replace("</s>", "")
 @torch.no_grad()
 def response_report_generation(pil_image_1, pil_image_2):
+    """Structured chest-X-ray report (streaming)."""
+    if not CHEXAGENT_AVAILABLE:
+        yield "CheXagent is not available. Please check installation."
+        return
+    streamer = TextIteratorStreamer(chex_tok, skip_prompt=True, skip_special_tokens=True)
     paths = []
     for im in [pil_image_1, pil_image_2]:
+        if im is None:
+            continue
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
+            im.save(tfile.name)
+            paths.append(tfile.name)
     if not paths:
         yield "Please upload at least one image."
         return
+    device = get_model_device(chex_model)
+    anatomies = [
+        "View",
+        "Airway",
+        "Breathing",
+        "Cardiac",
+        "Diaphragm",
+        "Everything else (e.g., mediastinal contours, bones, soft tissues, tubes, valves, pacemakers)",
+    ]
+    prompts = [
+        "Determine the view of this CXR",
+        *[
+            f'Provide a detailed description of "{a}" in the chest X-ray'
+            for a in anatomies[1:]
+        ],
+    ]
     findings = ""
     partial = "## Generating Findings (step-by-step):\n\n"
     for idx, (anat, prompt) in enumerate(zip(anatomies, prompts)):
+        query = chex_tok.from_list_format(
+            [*[{"image": p} for p in paths], {"text": prompt}]
+        )
+        conv = [
+            {"from": "system", "value": "You are a helpful assistant."},
+            {"from": "human", "value": query},
+        ]
+        inp = chex_tok.apply_chat_template(
+            conv, add_generation_prompt=True, return_tensors="pt"
+        ).to(device)
+        generate_kwargs = dict(
+            input_ids=inp,
+            max_new_tokens=512,
+            do_sample=False,
+            num_beams=1,
+            streamer=streamer,
+        )
+        Thread(target=chex_model.generate, kwargs=generate_kwargs).start()
+        partial += f"**Step {idx}: {anat}...**\n\n"
         for tok in streamer:
+            if idx:
+                findings += tok
             partial += tok
             yield clean_text(partial)
         partial += "\n\n"
         findings += " "
     findings = findings.strip()
+    # Impression
     partial += "## Generating Impression\n\n"
     prompt = f"Write the Impression section for the following Findings: {findings}"
+    conv = [
+        {"from": "system", "value": "You are a helpful assistant."},
+        {"from": "human", "value": chex_tok.from_list_format([{"text": prompt}])},
+    ]
+    inp = chex_tok.apply_chat_template(
+        conv, add_generation_prompt=True, return_tensors="pt"
+    ).to(device)
+    Thread(
+        target=chex_model.generate,
+        kwargs=dict(
+            input_ids=inp,
+            do_sample=False,
+            num_beams=1,
+            max_new_tokens=512,
+            streamer=streamer,
+        ),
+    ).start()
     for tok in streamer:
         partial += tok
         yield clean_text(partial)
 @torch.no_grad()
 def response_phrase_grounding(pil_image, prompt_text):
+    """Very simple visual-grounding placeholder."""
+    if not CHEXAGENT_AVAILABLE:
+        return "CheXagent is not available. Please check installation.", None
+    if pil_image is None:
+        return "Please upload an image.", None
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tfile:
         pil_image.save(tfile.name)
         img_path = tfile.name
+    device = get_model_device(chex_model)
+    query = chex_tok.from_list_format([{"image": img_path}, {"text": prompt_text}])
+    conv = [
+        {"from": "system", "value": "You are a helpful assistant."},
+        {"from": "human", "value": query},
+    ]
+    inp = chex_tok.apply_chat_template(
+        conv, add_generation_prompt=True, return_tensors="pt"
+    ).to(device)
+    out = chex_model.generate(
+        input_ids=inp, do_sample=False, num_beams=1, max_new_tokens=512
+    )
+    resp = clean_text(chex_tok.decode(out[0][inp.shape[1] :]))
+    # simple center box (placeholder)
     w, h = pil_image.size
     cx, cy, sz = w // 2, h // 2, min(w, h) // 4
     draw = ImageDraw.Draw(pil_image)
     draw.rectangle([(cx - sz, cy - sz), (cx + sz, cy + sz)], outline="red", width=3)
+    return resp, pil_image
 # =============================================================================
+# Gradio UI
 # =============================================================================
 def create_ui():
     """Create the Gradio interface."""
+    # Load Qwen model
+    try:
+        qwen_model, qwen_proc, qwen_dev = load_qwen_model_and_processor()
+        med_agent = MedicalVLMAgent(qwen_model, qwen_proc, qwen_dev)
+        qwen_available = True
+    except Exception as e:
+        print(f"Qwen model not available: {e}")
+        qwen_available = False
+        med_agent = None
+    with gr.Blocks(title="Medical AI Assistant") as demo:
         gr.Markdown("# Combined Medical Q&A · SAM-2 Automatic Masking · CheXagent")
+        # Status information
         with gr.Row():
             gr.Markdown(f"""
+            **System Status:**
+            - Qwen VLM: {'✅ Available' if qwen_available else '❌ Not Available'}
+            - SAM-2: {'✅ Available' if SAM2_AVAILABLE else '❌ Not Available'}
+            - CheXagent: {'✅ Available' if CHEXAGENT_AVAILABLE else '❌ Not Available'}
             """)
+        # Medical Q&A Tab
         with gr.Tab("Medical Q&A"):
+            if qwen_available:
                 q_in = gr.Textbox(label="Question / description", lines=3)
                 q_img = gr.Image(label="Optional image", type="pil")
+                q_btn = gr.Button("Submit")
+                q_out = gr.Textbox(label="Answer")
+                q_btn.click(fn=med_agent.run, inputs=[q_in, q_img], outputs=q_out)
             else:
+                gr.Markdown("❌ Medical Q&A is not available. Qwen model failed to load.")
+        # Segmentation Tab
+        with gr.Tab("Automatic masking"):
             seg_img = gr.Image(label="Upload medical image", type="pil")
+            seg_btn = gr.Button("Run segmentation")
+            seg_out = gr.Image(label="Segmentation result", type="pil")
             seg_status = gr.Textbox(label="Status", interactive=False)
+            if SAM2_AVAILABLE and _mask_generator is not None:
+                seg_btn.click(
+                    fn=tumor_segmentation_interface,
+                    inputs=seg_img,
+                    outputs=[seg_out, seg_status],
+                )
             else:
+                seg_btn.click(
+                    fn=simple_segmentation_fallback,
+                    inputs=seg_img,
+                    outputs=[seg_out, seg_status],
+                )
+        # CheXagent Tabs
+        with gr.Tab("CheXagent – Structured report"):
             if CHEXAGENT_AVAILABLE:
+                gr.Markdown("Upload one or two chest X-ray images; the report streams live.")
+                cx1 = gr.Image(label="Image 1", image_mode="L", type="pil")
+                cx2 = gr.Image(label="Image 2", image_mode="L", type="pil")
+                cx_report = gr.Markdown()
+                gr.Interface(
+                    fn=response_report_generation,
+                    inputs=[cx1, cx2],
+                    outputs=cx_report,
+                    live=True,
+                ).render()
             else:
+                gr.Markdown("❌ CheXagent structured report is not available.")
+        with gr.Tab("CheXagent – Visual grounding"):
             if CHEXAGENT_AVAILABLE:
                 vg_img = gr.Image(image_mode="L", type="pil")
+                vg_prompt = gr.Textbox(value="Locate the highlighted finding:")
+                vg_text = gr.Markdown()
+                vg_out_img = gr.Image()
+                gr.Interface(
+                    fn=response_phrase_grounding,
+                    inputs=[vg_img, vg_prompt],
+                    outputs=[vg_text, vg_out_img],
+                ).render()
             else:
+                gr.Markdown("❌ CheXagent visual grounding is not available.")
     return demo
 if __name__ == "__main__":
     demo = create_ui()
     demo.launch(server_name="0.0.0.0", server_port=7860, share=True)