Spaces:

MdFaisalKarim
/

MedSegVLM_with_DeepSeek

Sleeping

App Files Files Community

Faisal commited on 20 days ago

Commit

61663e8

1 Parent(s): 470d42b

Add requirements.txt and .gitignore for Hugging Face deployment

Browse files

Files changed (3) hide show

.gitignore +44 -0
app.py +157 -0
requirements.txt +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,44 @@

+# Virtual Environment
+.venv/
+venv/
+env/
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+# Model files (if any)
+*.bin
+*.safetensors
+models/

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import gradio as gr
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, GenerationConfig
+from qwen_vl_utils import process_vision_info
+import torch
+import requests
+from IPython.display import Markdown
+# ----------------------------
+# MODEL LOADING (MedVLM-R1)
+# ----------------------------
+MODEL_PATH = 'JZPeterPan/MedVLM-R1'
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_PATH,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(MODEL_PATH)
+temp_generation_config = GenerationConfig(
+    max_new_tokens=1024,
+    do_sample=False,
+    temperature=1,
+    num_return_sequences=1,
+    pad_token_id=151643,
+)
+# ----------------------------
+# API SETTINGS (DeepSeek R1)
+# ----------------------------
+api_key = "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"
+deepseek_model = "deepseek/deepseek-r1"
+# ----------------------------
+# DEFAULT QUESTION
+# ----------------------------
+DEFAULT_QUESTION = "What abnormality is in the brain MRI and what is the location?\nA) Tumour\nB) No tumour"
+QUESTION_TEMPLATE = """
+{Question}
+Your task:
+1. Think through the question step by step, enclose your reasoning process in <think>...</think> tags.
+2. Then provide the correct single-letter choice (A, B, C, D,...) inside <answer>...</answer> tags.
+3. No extra information or text outside of these tags.
+"""
+# ----------------------------
+# PIPELINE FUNCTION
+# ----------------------------
+def process_pipeline(image, user_question):
+    if image is None or user_question.strip() == "":
+        return "Please upload an image and enter a question."
+    # Combine user's question with default
+    combined_question = user_question.strip() + "\n\n" + DEFAULT_QUESTION
+    message = [{
+        "role": "user",
+        "content": [
+            {"type": "image", "image": image},
+            {"type": "text", "text": QUESTION_TEMPLATE.format(Question=combined_question)}
+        ]
+    }]
+    # Prepare inputs for MedVLM
+    text = processor.apply_chat_template(message, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(message)
+    inputs = processor(
+        text=text,
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    ).to("cuda")
+    # Generate output from MedVLM
+    generated_ids = model.generate(
+        **inputs,
+        use_cache=True,
+        max_new_tokens=1024,
+        do_sample=False,
+        generation_config=temp_generation_config
+    )
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False
+    )[0]
+    # Send MedVLM output to DeepSeek R1
+    prompt = f"""The following is a medical AI's answer to a visual question.
+The answer is about having tumour or not, focus on that mostly.
+Keep the answer precise but more structured, and helpful for a medical professional.
+If possible, make a table using the details from the original answer.
+Original Answer:
+{output_text}
+"""
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": deepseek_model,
+        "messages": [
+            {"role": "system", "content": "You are a highly skilled medical writer."},
+            {"role": "user", "content": prompt}
+        ]
+    }
+    response = requests.post(
+        "https://openrouter.ai/api/v1/chat/completions",
+        headers=headers,
+        json=data
+    )
+    try:
+        detailed_answer = response.json()["choices"][0]["message"]["content"]
+    except Exception as e:
+        return f"Error from DeepSeek: {str(e)}\nFull Response: {response.text}"
+    return detailed_answer
+# ----------------------------
+# GRADIO UI
+# ----------------------------
+with gr.Blocks(title="Brain MRI QA") as demo:
+    with gr.Row():
+        # Left column
+        with gr.Column():
+            image_input = gr.Image(type="filepath", label="Upload Medical Image")
+        # Right column
+        with gr.Column():
+            question_box = gr.Textbox(label="Your Question about the Image", placeholder="Type your question here...")
+            submit_btn = gr.Button("Submit")
+            clear_btn = gr.Button("Clear")
+            llm_output = gr.Textbox(label="Detailed LLM Answer", interactive=False, lines=10)
+    submit_btn.click(
+        fn=process_pipeline,
+        inputs=[image_input, question_box],
+        outputs=llm_output
+    )
+    clear_btn.click(
+        fn=lambda: ("", ""),
+        outputs=[question_box, llm_output]
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+gradio==5.42.0
+transformers>=4.40.0
+torch>=2.0.0
+torchvision>=0.15.0
+requests>=2.31.0
+Pillow>=10.0.0
+accelerate>=0.20.0
+safetensors>=0.3.0
+tokenizers>=0.15.0
+numpy>=1.24.0
+scipy>=1.10.0
+qwen-vl-utils