Add initial implementation of Surgical Contaminent Classifier-Mix with model, config, and inference script

Files changed (5) hide show

README.md +69 -0
classifier.py +86 -0
config.json +9 -0
example_inference.py +22 -0
pytorch_model.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+# Surgical Contaminent Classifier-Mix
+This repository contains a PyTorch-based image classifier for identifying visual contaminants in surgical footage. The model distinguishes between five classes: `blur`, `smoke`, `clear`, `fluid`, and `oob` (out-of-body). It uses a MobileNetV2 backbone via [timm](https://github.com/huggingface/pytorch-image-models), and is compatible with Hugging Face Transformers' `AutoModel` and `AutoConfig` using `trust_remote_code=True`.
+The name **"classifier-mix"** refers to the training data source, a mix of DaVinci and Medtronic RARP surgical frames.
+> Training log:
+> `gs://noee/mobileNet/Medtronic_28-04-2025/Run_13h20_Finetune_lr0.0001_ReduceLROnPlateau/training.log`
+>
+## Files
+- `classifier.py`: Model and config implementation.
+- `config.json`: Hugging Face model configuration.
+- `pytorch_model.bin`: Model weights.
+- `sample_img.png`: Example image for inference.
+- `example_inference.py`: Example script for running inference.
+## Usage
+### Installation
+Install required dependencies:
+```sh
+pip install torch torchvision timm transformers pillow
+```
+### Model Details
+- **Backbone:** MobileNetV2 (`mobilenetv2_100`)
+- **Classes:** blur, smoke, clear, fluid, oob
+- **Input size:** 224x224 RGB images
+- **Normalization:** mean=[0.6075, 0.4093, 0.3609], std=[0.2066, 0.2036, 0.1991]
+### Inference Example
+You can run the provided inference script to dehaze the sample image:
+```python
+# example_inference.py
+from transformers import AutoModel
+from PIL import Image
+# Load model
+model = AutoModel.from_pretrained(
+    "vopeai/classifier-mix",
+    trust_remote_code=True
+)
+model.eval()
+# Load and preprocess image
+img = Image.open("sample_img.png").convert("RGB")
+# Run inference
+outputs = model(img)
+print("Predicted class:", outputs[0]['label'])
+print("Confidences:", outputs[0]['confidences'])
+```
+Or use the model in your own code, by loading the model as follows :
+```python
+from transformers import AutoModel
+# Load model
+model = AutoModel.from_pretrained("vopeai/classifier-mix", trust_remote_code=True)
+```
+For more details, see the code files in this repository.

classifier.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch.nn as nn
+import timm
+from torchvision import transforms
+import torch
+from PIL import Image
+from torch.nn.functional import softmax
+from transformers import PretrainedConfig, PreTrainedModel
+LABEL_MAP = ["blur", "smoke", "clear", "fluid", "oob"]
+class ClassifierConfig(PretrainedConfig):
+    model_type = "classifier"
+    def __init__(self, model_name="mobilenetv2_100", num_classes=len(LABEL_MAP), **kwargs):
+        super().__init__(**kwargs)
+        self.model_name = model_name
+        self.num_classes = num_classes
+class ClassifierModel(nn.Module):
+    def __init__(self, model_name="mobilenetv2_100", num_classes=len(LABEL_MAP), pretrained=True):
+        super().__init__()
+        self.base_model = timm.create_model(model_name, pretrained=pretrained)
+        num_features = self.base_model.classifier.in_features
+        # Use Sequential to match saved model structure
+        self.base_model.classifier = nn.Sequential(
+            nn.Linear(num_features, num_classes)
+        )
+        if "mobilenetv2" in model_name:
+            self.target_layer = self.base_model.conv_head
+        else:
+            raise NotImplementedError(f"Grad-CAM target layer not defined for model: {model_name}")
+    def forward(self, x):
+        return self.base_model(x)
+class ClassifierWrapper(PreTrainedModel):
+    config_class = ClassifierConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = ClassifierModel(
+            model_name=config.model_name,
+            num_classes=config.num_classes,
+            pretrained=False  # Weights are loaded by from_pretrained
+        )
+        self.transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.Normalize(mean=[0.6075, 0.4093, 0.3609], std=[0.2066, 0.2036, 0.1991])
+        ])
+    def forward(self, input):
+        # Ensure input is a tensor
+        if isinstance(input, Image.Image):
+            x = transforms.ToTensor()(input).unsqueeze(0) # Convert PIL Image to tensor
+        elif isinstance(input, torch.Tensor):
+            if input.dim() == 3:
+                x = input.unsqueeze(0) # Single tensor image
+            elif input.dim() == 4:
+                x = input  # Batch
+            else:
+                raise ValueError("Unsupported tensor shape.")
+        else:
+           raise TypeError(f"Unsupported input type: {type(input)}. Expected PIL.Image or torch.Tensor.")
+        # Apply transformations
+        x = self.transform(x)
+        # Forward pass through the model
+        outputs = self.model(x)
+        confs = softmax(outputs, dim=1)
+        preds = torch.argmax(confs, dim=1)
+        results = []
+        for i in range(len(preds)):
+            label = LABEL_MAP[preds[i]]
+            confidences = {}
+            for j in range(len(LABEL_MAP)):
+                confidences[LABEL_MAP[j]] = round(float(confs[0][j]), 3)
+            results.append({"label": label, "confidences": confidences})
+        return results

config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "model_type": "classifier",
+  "architectures": ["ClassifierWrapper"],
+  "auto_map": {
+    "AutoModel": "classifier.ClassifierWrapper",
+    "AutoConfig": "classifier.ClassifierConfig"
+  },
+  "trust_remote_code": true
+}

example_inference.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+from transformers import AutoModel, AutoConfig
+from torchvision import transforms
+from PIL import Image
+import os
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+# Load model and config
+model = AutoModel.from_pretrained(
+    "./classifier-mix",  # or path to your model directory
+    trust_remote_code=True
+)
+model.eval()
+# Load and preprocess image
+img = Image.open("classifier-mix/sample_img.png").convert("RGB")
+outputs = model(img)
+print("Predicted class:", outputs[0]['label'])
+print("Confidences:", outputs[0]['confidences'])

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c834b0f9b606b5f04d377015d3fab1976097639eed81b6f91360838409b1d24
+size 9158475