Spaces:

BinKhoaLe1812
/

Medical-Chatbot

Running

App Files Files Community

LiamKhoaLe commited on Jul 9

Commit

da736bd

1 Parent(s): f672b04

GRADIO CLIENT MIGRATION:

Browse files

Files changed (1) hide show

vlm.py +39 -55

vlm.py CHANGED Viewed

@@ -1,70 +1,54 @@
-# vlm.py
 import os, logging, traceback, json, base64
 from io import BytesIO
 from PIL import Image
-from huggingface_hub import InferenceClient # Render model on HF hub
-from transformers import pipeline             # Render model on transformers
 from translation import translate_query
-# Initialise once
-HF_TOKEN = os.getenv("HF_TOKEN")
-# client = InferenceClient(provider="auto", api_key=HF_TOKEN) # comment in back
 logger = logging.getLogger("vlm-agent")
-logging.basicConfig(level=logging.INFO, format="%(asctime)s — %(name)s — %(levelname)s — %(message)s", force=True) # Change INFO to DEBUG for full-ctx JSON loader
-# ✅ Load VLM pipeline once (lazy load allowed)
-vlm_pipe = None
-def load_vlm():
-    global vlm_pipe
-    if vlm_pipe is None:
-        logger.info("⏳ Loading MedGEMMA model via Transformers pipeline...")
-        vlm_pipe = pipeline("image-to-text", model="google/medgemma-4b-it", use_auth_token=HF_TOKEN, device_map="auto")
-        logger.info("✅ MedGEMMA model ready.")
-    return vlm_pipe
 def process_medical_image(base64_image: str, prompt: str = None, lang: str = "EN") -> str:
-    """
-    Send base64 image + prompt to MedGEMMA and return output.
-    """
     if not prompt:
         prompt = "Describe and investigate any clinical findings from this medical image."
-    elif prompt and (lang.upper() in {"VI", "ZH"}):
-        user_query = translate_query(user_query, lang.lower())
-    # Send over API
     try:
-        # HF hub
-        # response = client.chat.completions.create(
-        #     model="google/medgemma-4b-it",
-        #     messages=[{
-        #         "role": "user",
-        #         "content": [
-        #             {"type": "text", "text": prompt},
-        #             {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
-        #         ]
-        #     }]
-        # )
-        # Transformers
-        image_data = base64.b64decode(base64_image) # Decode base64 to PIL Image
-        image = Image.open(BytesIO(image_data)).convert("RGB")
-        pipe = load_vlm()
-        response = pipe(image, prompt=prompt, max_new_tokens=100)[0]["generated_text"]
-        # Validate response
-        if not response or not hasattr(response, "choices") or not response.choices:
-            raise ValueError("Empty or malformed response from MedGEMMA.")
-        # Get choice resp
-        message = response.choices[0].message
-        if not message or not hasattr(message, "content"):
-            raise ValueError("MedGEMMA response missing `.content`.")
-        # Beautify
-        result = message.content.strip()
-        logger.info(f"[VLM] MedGemma returned {result}")
-        return result
     except Exception as e:
         logger.error(f"[VLM] ❌ Exception: {e}")
         logger.error(f"[VLM] 🔍 Traceback:\n{traceback.format_exc()}")
-        try:
-            logger.error(f"[VLM] ⚠️ Raw response: {json.dumps(response, default=str, indent=2)}")
-        except:
-            logger.warning("[VLM] ⚠️ Response not serializable.")
-        return f"[VLM] ⚠️ Image diagnosis failed: {str(e)}"

 import os, logging, traceback, json, base64
 from io import BytesIO
 from PIL import Image
 from translation import translate_query
+from gradio_client import Client, handle_file
+import tempfile
 logger = logging.getLogger("vlm-agent")
+logging.basicConfig(level=logging.INFO, format="%(asctime)s — %(name)s — %(levelname)s — %(message)s", force=True)
+# ✅ Load Gradio client once
+gr_client = None
+def load_gradio_client():
+    global gr_client
+    if gr_client is None:
+        logger.info("[VLM] ⏳ Connecting to MedGEMMA Gradio Space...")
+        gr_client = Client("warshanks/medgemma-4b-it")
+        logger.info("[VLM] Gradio MedGEMMA client ready.")
+    return gr_client
 def process_medical_image(base64_image: str, prompt: str = None, lang: str = "EN") -> str:
     if not prompt:
         prompt = "Describe and investigate any clinical findings from this medical image."
+    elif lang.upper() in {"VI", "ZH"}:
+        prompt = translate_query(prompt, lang.lower())
     try:
+        # 1️⃣ Decode base64 image to temp file
+        image_data = base64.b64decode(base64_image)
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
+            tmp.write(image_data)
+            tmp.flush()
+            image_path = tmp.name
+        # 2️⃣ Send to Gradio MedGEMMA
+        client = load_gradio_client()
+        logger.info(f"[VLM] Sending prompt: {prompt}")
+        result = client.predict(
+            message={"text": prompt, "files": [handle_file(image_path)]},
+            param_2 = "You analyze medical images and report abnormalities, diseases with clear diagnostic insight."
+            param_3=2048,
+            api_name="/chat"
+        )
+        if isinstance(result, str):
+            logger.info(f"[VLM] ✅ Response: {result}")
+            return result.strip()
+        else:
+            logger.warning(f"[VLM] ⚠️ Unexpected result type: {type(result)} — {result}")
+            return str(result)
     except Exception as e:
         logger.error(f"[VLM] ❌ Exception: {e}")
         logger.error(f"[VLM] 🔍 Traceback:\n{traceback.format_exc()}")
+        return f"[VLM] ⚠️ Failed to process image: {e}"