Spaces:

ysfad
/

mae-waste-classifier-demo

Runtime error

File size: 14,030 Bytes

905ac99

#!/usr/bin/env python3
"""

Improved MAE Waste Classifier with temperature scaling and bias correction

"""

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from PIL import Image
from torchvision import transforms
from huggingface_hub import hf_hub_download
import warnings
warnings.filterwarnings("ignore")

# Import MAE model
from mae.models_vit import vit_base_patch16

class ImprovedMAEWasteClassifier:
    def __init__(self, 

                 model_path=None, 

                 hf_model_id=None, 

                 device=None,

                 temperature=2.5,  # Temperature scaling to reduce overconfidence

                 cardboard_penalty=0.8):  # Penalty factor for cardboard predictions
        """

        Initialize improved MAE waste classifier with bias correction

        

        Args:

            model_path: Local path to model file

            hf_model_id: Hugging Face model ID

            device: Device to run on

            temperature: Temperature scaling factor (>1 reduces confidence)

            cardboard_penalty: Penalty factor for cardboard predictions

        """
        self.device = device or ('cuda' if torch.cuda.is_available() else 'cpu')
        self.temperature = temperature
        self.cardboard_penalty = cardboard_penalty
        
        # Class names (must match training order)
        self.class_names = [
            'Cardboard', 'Food Organics', 'Glass', 'Metal', 
            'Miscellaneous Trash', 'Paper', 'Plastic', 'Textile Trash', 'Vegetation'
        ]
        
        # Class-specific confidence thresholds
        self.class_thresholds = {
            'Cardboard': 0.8,  # Higher threshold for cardboard
            'Plastic': 0.6,
            'Metal': 0.6,
            'Glass': 0.6,
            'Paper': 0.6,
            'Food Organics': 0.5,
            'Miscellaneous Trash': 0.5,
            'Textile Trash': 0.4,  # Lower threshold for underrepresented class
            'Vegetation': 0.5
        }
        
        # Load model
        self.model = self._load_model(model_path, hf_model_id)
        self.model.eval()
        
        # Data preprocessing
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])
        
        print(f"✅ Improved MAE Classifier loaded on {self.device}")
        print(f"🌡️ Temperature scaling: {self.temperature}")
        print(f"🗂️ Cardboard penalty: {self.cardboard_penalty}")

    def _load_model(self, model_path=None, hf_model_id=None):
        """Load the finetuned MAE model"""
        
        # Determine model path
        if model_path and os.path.exists(model_path):
            checkpoint_path = model_path
            print(f"📁 Loading local model from {model_path}")
        elif hf_model_id:
            print(f"🌐 Downloading model from HF Hub: {hf_model_id}")
            checkpoint_path = hf_hub_download(
                repo_id=hf_model_id,
                filename="best_model.pth",
                cache_dir="./hf_cache"
            )
            print(f"✅ Downloaded model to: {checkpoint_path}")
        else:
            # Try local file
            local_path = "output_simple_mae/best_model.pth"
            if os.path.exists(local_path):
                checkpoint_path = local_path
                print(f"📁 Using local model: {local_path}")
            else:
                raise FileNotFoundError("No model found. Provide model_path or hf_model_id")
        
        # Create model
        model = vit_base_patch16(num_classes=len(self.class_names))
        
        # Load checkpoint
        checkpoint = torch.load(checkpoint_path, map_location=self.device)
        
        # Handle different checkpoint formats
        if 'model_state_dict' in checkpoint:
            state_dict = checkpoint['model_state_dict']
        elif 'model' in checkpoint:
            state_dict = checkpoint['model']
        else:
            state_dict = checkpoint
        
        # Load state dict
        model.load_state_dict(state_dict, strict=False)
        model = model.to(self.device)
        
        print(f"✅ Loaded finetuned MAE model from {checkpoint_path}")
        return model

    def _apply_temperature_scaling(self, logits):
        """Apply temperature scaling to reduce overconfidence"""
        return logits / self.temperature

    def _apply_class_bias_correction(self, probs):
        """Apply bias correction to reduce cardboard overconfidence"""
        probs_corrected = probs.clone()
        
        # Find cardboard class index
        cardboard_idx = self.class_names.index('Cardboard')
        
        # Apply penalty to cardboard predictions
        probs_corrected[cardboard_idx] *= self.cardboard_penalty
        
        # Renormalize probabilities
        probs_corrected = probs_corrected / probs_corrected.sum()
        
        return probs_corrected

    def _ensemble_prediction(self, image, num_crops=5):
        """Use ensemble of augmented predictions for better stability"""
        
        # Different augmentation transforms
        augment_transforms = [
            transforms.Compose([
                transforms.Resize((256, 256)),
                transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]),
            transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.RandomHorizontalFlip(p=1.0),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]),
            transforms.Compose([
                transforms.Resize((256, 256)),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]),
            transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ColorJitter(brightness=0.1, contrast=0.1),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ]),
            # Standard transform
            self.transform
        ]
        
        all_probs = []
        
        with torch.no_grad():
            for transform in augment_transforms[:num_crops]:
                # Apply transform
                input_tensor = transform(image).unsqueeze(0).to(self.device)
                
                # Get prediction
                logits = self.model(input_tensor)
                
                # Apply temperature scaling
                scaled_logits = self._apply_temperature_scaling(logits)
                
                # Get probabilities
                probs = F.softmax(scaled_logits, dim=1).squeeze(0)
                
                # Apply bias correction
                corrected_probs = self._apply_class_bias_correction(probs)
                
                all_probs.append(corrected_probs.cpu().numpy())
        
        # Average ensemble predictions
        ensemble_probs = np.mean(all_probs, axis=0)
        
        return ensemble_probs

    def classify_image(self, image, top_k=5, use_ensemble=True):
        """

        Classify a waste image with improved confidence calibration

        

        Args:

            image: PIL Image or path to image

            top_k: Number of top predictions to return

            use_ensemble: Whether to use ensemble prediction

        

        Returns:

            Dictionary with classification results

        """
        try:
            # Load image if path provided
            if isinstance(image, str):
                image = Image.open(image).convert('RGB')
            elif not isinstance(image, Image.Image):
                raise ValueError("Image must be PIL Image or file path")
            
            # Get predictions
            if use_ensemble:
                probs = self._ensemble_prediction(image)
            else:
                # Single prediction with improvements
                input_tensor = self.transform(image).unsqueeze(0).to(self.device)
                
                with torch.no_grad():
                    logits = self.model(input_tensor)
                    scaled_logits = self._apply_temperature_scaling(logits)
                    probs = F.softmax(scaled_logits, dim=1).squeeze(0)
                    probs = self._apply_class_bias_correction(probs)
                    probs = probs.cpu().numpy()
            
            # Get top predictions
            top_indices = np.argsort(probs)[::-1][:top_k]
            top_predictions = []
            
            for idx in top_indices:
                class_name = self.class_names[idx]
                confidence = float(probs[idx])
                
                top_predictions.append({
                    'class': class_name,
                    'confidence': confidence
                })
            
            # Determine final prediction with class-specific thresholds
            predicted_class = top_predictions[0]['class']
            predicted_confidence = top_predictions[0]['confidence']
            
            # Check if prediction meets class-specific threshold
            threshold = self.class_thresholds.get(predicted_class, 0.5)
            
            if predicted_confidence < threshold:
                # If below threshold, mark as uncertain
                predicted_class = "Uncertain"
                predicted_confidence = predicted_confidence
            
            return {
                'success': True,
                'predicted_class': predicted_class,
                'confidence': predicted_confidence,
                'top_predictions': top_predictions,
                'ensemble_used': use_ensemble,
                'temperature': self.temperature
            }
            
        except Exception as e:
            return {
                'success': False,
                'error': str(e)
            }

    def get_disposal_instructions(self, class_name):
        """Get disposal instructions for a waste class"""
        instructions = {
            'Cardboard': 'Flatten and place in recycling bin. Remove any tape or staples.',
            'Food Organics': 'Place in compost bin or organic waste collection.',
            'Glass': 'Rinse and place in glass recycling bin. Remove caps and lids.',
            'Metal': 'Rinse cans and place in metal recycling bin.',
            'Miscellaneous Trash': 'Place in general waste bin.',
            'Paper': 'Place in paper recycling bin. Remove any plastic components.',
            'Plastic': 'Check recycling number and place in appropriate plastic recycling bin.',
            'Textile Trash': 'Donate if in good condition, otherwise place in textile recycling.',
            'Vegetation': 'Compost or place in yard waste collection.',
            'Uncertain': 'Please take another photo from a different angle or with better lighting.'
        }
        
        return instructions.get(class_name, 'Please consult local waste management guidelines.')

    def get_model_info(self):
        """Get model information"""
        return {
            'model_name': 'Improved ViT-Base MAE',
            'architecture': 'Vision Transformer (ViT-Base)',
            'pretrained': 'MAE (Masked Autoencoder)',
            'num_classes': len(self.class_names),
            'device': str(self.device),
            'temperature': self.temperature,
            'cardboard_penalty': self.cardboard_penalty,
            'improvements': [
                'Temperature scaling for confidence calibration',
                'Class-specific bias correction',
                'Ensemble predictions for stability',
                'Class-specific confidence thresholds'
            ]
        }

def test_improved_classifier():
    """Test the improved classifier"""
    print("🧪 Testing Improved MAE Waste Classifier...")
    
    # Load improved classifier
    classifier = ImprovedMAEWasteClassifier(hf_model_id="ysfad/mae-waste-classifier")
    
    # Test with a sample image
    test_image = "fail_images/image.webp"
    if os.path.exists(test_image):
        print(f"\n🔍 Testing with {test_image}")
        
        # Test both single and ensemble prediction
        print("\n1. Single prediction:")
        result1 = classifier.classify_image(test_image, use_ensemble=False)
        if result1['success']:
            print(f"🎯 Predicted: {result1['predicted_class']} ({result1['confidence']:.3f})")
        
        print("\n2. Ensemble prediction:")
        result2 = classifier.classify_image(test_image, use_ensemble=True)
        if result2['success']:
            print(f"🎯 Predicted: {result2['predicted_class']} ({result2['confidence']:.3f})")
            print("📊 Top predictions:")
            for i, pred in enumerate(result2['top_predictions'], 1):
                print(f"  {i}. {pred['class']}: {pred['confidence']:.3f}")
    
    print("\n🤖 Model Info:")
    info = classifier.get_model_info()
    for key, value in info.items():
        if isinstance(value, list):
            print(f"  {key}:")
            for item in value:
                print(f"    - {item}")
        else:
            print(f"  {key}: {value}")

if __name__ == "__main__":
    test_improved_classifier()