Spaces:

ysfad
/

mae-waste-classifier-demo

Runtime error

App Files Files Community

ysfad commited on Jun 29

Commit

905ac99

verified ·

1 Parent(s): de63d9f

Update: Enhanced classifier with temperature scaling

Browse files

Files changed (1) hide show

improved_mae_classifier.py +346 -0

improved_mae_classifier.py ADDED Viewed

	@@ -0,0 +1,346 @@

+#!/usr/bin/env python3
+"""
+Improved MAE Waste Classifier with temperature scaling and bias correction
+"""
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from PIL import Image
+from torchvision import transforms
+from huggingface_hub import hf_hub_download
+import warnings
+warnings.filterwarnings("ignore")
+# Import MAE model
+from mae.models_vit import vit_base_patch16
+class ImprovedMAEWasteClassifier:
+    def __init__(self,
+                 model_path=None,
+                 hf_model_id=None,
+                 device=None,
+                 temperature=2.5,  # Temperature scaling to reduce overconfidence
+                 cardboard_penalty=0.8):  # Penalty factor for cardboard predictions
+        """
+        Initialize improved MAE waste classifier with bias correction
+        Args:
+            model_path: Local path to model file
+            hf_model_id: Hugging Face model ID
+            device: Device to run on
+            temperature: Temperature scaling factor (>1 reduces confidence)
+            cardboard_penalty: Penalty factor for cardboard predictions
+        """
+        self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
+        self.temperature = temperature
+        self.cardboard_penalty = cardboard_penalty
+        # Class names (must match training order)
+        self.class_names = [
+            'Cardboard', 'Food Organics', 'Glass', 'Metal',
+            'Miscellaneous Trash', 'Paper', 'Plastic', 'Textile Trash', 'Vegetation'
+        ]
+        # Class-specific confidence thresholds
+        self.class_thresholds = {
+            'Cardboard': 0.8,  # Higher threshold for cardboard
+            'Plastic': 0.6,
+            'Metal': 0.6,
+            'Glass': 0.6,
+            'Paper': 0.6,
+            'Food Organics': 0.5,
+            'Miscellaneous Trash': 0.5,
+            'Textile Trash': 0.4,  # Lower threshold for underrepresented class
+            'Vegetation': 0.5
+        }
+        # Load model
+        self.model = self._load_model(model_path, hf_model_id)
+        self.model.eval()
+        # Data preprocessing
+        self.transform = transforms.Compose([
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]
+            )
+        ])
+        print(f"✅ Improved MAE Classifier loaded on {self.device}")
+        print(f"🌡️ Temperature scaling: {self.temperature}")
+        print(f"🗂️ Cardboard penalty: {self.cardboard_penalty}")
+    def _load_model(self, model_path=None, hf_model_id=None):
+        """Load the finetuned MAE model"""
+        # Determine model path
+        if model_path and os.path.exists(model_path):
+            checkpoint_path = model_path
+            print(f"📁 Loading local model from {model_path}")
+        elif hf_model_id:
+            print(f"🌐 Downloading model from HF Hub: {hf_model_id}")
+            checkpoint_path = hf_hub_download(
+                repo_id=hf_model_id,
+                filename="best_model.pth",
+                cache_dir="./hf_cache"
+            )
+            print(f"✅ Downloaded model to: {checkpoint_path}")
+        else:
+            # Try local file
+            local_path = "output_simple_mae/best_model.pth"
+            if os.path.exists(local_path):
+                checkpoint_path = local_path
+                print(f"📁 Using local model: {local_path}")
+            else:
+                raise FileNotFoundError("No model found. Provide model_path or hf_model_id")
+        # Create model
+        model = vit_base_patch16(num_classes=len(self.class_names))
+        # Load checkpoint
+        checkpoint = torch.load(checkpoint_path, map_location=self.device)
+        # Handle different checkpoint formats
+        if 'model_state_dict' in checkpoint:
+            state_dict = checkpoint['model_state_dict']
+        elif 'model' in checkpoint:
+            state_dict = checkpoint['model']
+        else:
+            state_dict = checkpoint
+        # Load state dict
+        model.load_state_dict(state_dict, strict=False)
+        model = model.to(self.device)
+        print(f"✅ Loaded finetuned MAE model from {checkpoint_path}")
+        return model
+    def _apply_temperature_scaling(self, logits):
+        """Apply temperature scaling to reduce overconfidence"""
+        return logits / self.temperature
+    def _apply_class_bias_correction(self, probs):
+        """Apply bias correction to reduce cardboard overconfidence"""
+        probs_corrected = probs.clone()
+        # Find cardboard class index
+        cardboard_idx = self.class_names.index('Cardboard')
+        # Apply penalty to cardboard predictions
+        probs_corrected[cardboard_idx] *= self.cardboard_penalty
+        # Renormalize probabilities
+        probs_corrected = probs_corrected / probs_corrected.sum()
+        return probs_corrected
+    def _ensemble_prediction(self, image, num_crops=5):
+        """Use ensemble of augmented predictions for better stability"""
+        # Different augmentation transforms
+        augment_transforms = [
+            transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+            ]),
+            transforms.Compose([
+                transforms.Resize((224, 224)),
+                transforms.RandomHorizontalFlip(p=1.0),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+            ]),
+            transforms.Compose([
+                transforms.Resize((256, 256)),
+                transforms.CenterCrop(224),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+            ]),
+            transforms.Compose([
+                transforms.Resize((224, 224)),
+                transforms.ColorJitter(brightness=0.1, contrast=0.1),
+                transforms.ToTensor(),
+                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+            ]),
+            # Standard transform
+            self.transform
+        ]
+        all_probs = []
+        with torch.no_grad():
+            for transform in augment_transforms[:num_crops]:
+                # Apply transform
+                input_tensor = transform(image).unsqueeze(0).to(self.device)
+                # Get prediction
+                logits = self.model(input_tensor)
+                # Apply temperature scaling
+                scaled_logits = self._apply_temperature_scaling(logits)
+                # Get probabilities
+                probs = F.softmax(scaled_logits, dim=1).squeeze(0)
+                # Apply bias correction
+                corrected_probs = self._apply_class_bias_correction(probs)
+                all_probs.append(corrected_probs.cpu().numpy())
+        # Average ensemble predictions
+        ensemble_probs = np.mean(all_probs, axis=0)
+        return ensemble_probs
+    def classify_image(self, image, top_k=5, use_ensemble=True):
+        """
+        Classify a waste image with improved confidence calibration
+        Args:
+            image: PIL Image or path to image
+            top_k: Number of top predictions to return
+            use_ensemble: Whether to use ensemble prediction
+        Returns:
+            Dictionary with classification results
+        """
+        try:
+            # Load image if path provided
+            if isinstance(image, str):
+                image = Image.open(image).convert('RGB')
+            elif not isinstance(image, Image.Image):
+                raise ValueError("Image must be PIL Image or file path")
+            # Get predictions
+            if use_ensemble:
+                probs = self._ensemble_prediction(image)
+            else:
+                # Single prediction with improvements
+                input_tensor = self.transform(image).unsqueeze(0).to(self.device)
+                with torch.no_grad():
+                    logits = self.model(input_tensor)
+                    scaled_logits = self._apply_temperature_scaling(logits)
+                    probs = F.softmax(scaled_logits, dim=1).squeeze(0)
+                    probs = self._apply_class_bias_correction(probs)
+                    probs = probs.cpu().numpy()
+            # Get top predictions
+            top_indices = np.argsort(probs)[::-1][:top_k]
+            top_predictions = []
+            for idx in top_indices:
+                class_name = self.class_names[idx]
+                confidence = float(probs[idx])
+                top_predictions.append({
+                    'class': class_name,
+                    'confidence': confidence
+                })
+            # Determine final prediction with class-specific thresholds
+            predicted_class = top_predictions[0]['class']
+            predicted_confidence = top_predictions[0]['confidence']
+            # Check if prediction meets class-specific threshold
+            threshold = self.class_thresholds.get(predicted_class, 0.5)
+            if predicted_confidence < threshold:
+                # If below threshold, mark as uncertain
+                predicted_class = "Uncertain"
+                predicted_confidence = predicted_confidence
+            return {
+                'success': True,
+                'predicted_class': predicted_class,
+                'confidence': predicted_confidence,
+                'top_predictions': top_predictions,
+                'ensemble_used': use_ensemble,
+                'temperature': self.temperature
+            }
+        except Exception as e:
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    def get_disposal_instructions(self, class_name):
+        """Get disposal instructions for a waste class"""
+        instructions = {
+            'Cardboard': 'Flatten and place in recycling bin. Remove any tape or staples.',
+            'Food Organics': 'Place in compost bin or organic waste collection.',
+            'Glass': 'Rinse and place in glass recycling bin. Remove caps and lids.',
+            'Metal': 'Rinse cans and place in metal recycling bin.',
+            'Miscellaneous Trash': 'Place in general waste bin.',
+            'Paper': 'Place in paper recycling bin. Remove any plastic components.',
+            'Plastic': 'Check recycling number and place in appropriate plastic recycling bin.',
+            'Textile Trash': 'Donate if in good condition, otherwise place in textile recycling.',
+            'Vegetation': 'Compost or place in yard waste collection.',
+            'Uncertain': 'Please take another photo from a different angle or with better lighting.'
+        }
+        return instructions.get(class_name, 'Please consult local waste management guidelines.')
+    def get_model_info(self):
+        """Get model information"""
+        return {
+            'model_name': 'Improved ViT-Base MAE',
+            'architecture': 'Vision Transformer (ViT-Base)',
+            'pretrained': 'MAE (Masked Autoencoder)',
+            'num_classes': len(self.class_names),
+            'device': str(self.device),
+            'temperature': self.temperature,
+            'cardboard_penalty': self.cardboard_penalty,
+            'improvements': [
+                'Temperature scaling for confidence calibration',
+                'Class-specific bias correction',
+                'Ensemble predictions for stability',
+                'Class-specific confidence thresholds'
+            ]
+        }
+def test_improved_classifier():
+    """Test the improved classifier"""
+    print("🧪 Testing Improved MAE Waste Classifier...")
+    # Load improved classifier
+    classifier = ImprovedMAEWasteClassifier(hf_model_id="ysfad/mae-waste-classifier")
+    # Test with a sample image
+    test_image = "fail_images/image.webp"
+    if os.path.exists(test_image):
+        print(f"\n🔍 Testing with {test_image}")
+        # Test both single and ensemble prediction
+        print("\n1. Single prediction:")
+        result1 = classifier.classify_image(test_image, use_ensemble=False)
+        if result1['success']:
+            print(f"🎯 Predicted: {result1['predicted_class']} ({result1['confidence']:.3f})")
+        print("\n2. Ensemble prediction:")
+        result2 = classifier.classify_image(test_image, use_ensemble=True)
+        if result2['success']:
+            print(f"🎯 Predicted: {result2['predicted_class']} ({result2['confidence']:.3f})")
+            print("📊 Top predictions:")
+            for i, pred in enumerate(result2['top_predictions'], 1):
+                print(f"  {i}. {pred['class']}: {pred['confidence']:.3f}")
+    print("\n🤖 Model Info:")
+    info = classifier.get_model_info()
+    for key, value in info.items():
+        if isinstance(value, list):
+            print(f"  {key}:")
+            for item in value:
+                print(f"    - {item}")
+        else:
+            print(f"  {key}: {value}")
+if __name__ == "__main__":
+    test_improved_classifier()