Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Initial commit
Browse files- assets/cat_dog.jpg +0 -0
 - flagged/img ndarray/0.jpg +0 -0
 - flagged/img ndarray/1.jpg +0 -0
 - flagged/log.csv +3 -0
 - flagged/output/0.png +0 -0
 - flagged/output/1.png +0 -0
 - gradcam/__pycache__/utils.cpython-38.pyc +0 -0
 - gradcam/app.py +61 -0
 - gradcam/utils.py +100 -0
 - requirements.txt +6 -0
 
    	
        assets/cat_dog.jpg
    ADDED
    
    
											 
									 | 
									
								
    	
        flagged/img ndarray/0.jpg
    ADDED
    
    
											 
									 | 
									
								
    	
        flagged/img ndarray/1.jpg
    ADDED
    
    
											 
									 | 
									
								
    	
        flagged/log.csv
    ADDED
    
    | 
         @@ -0,0 +1,3 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            'text','img ndarray','output','timestamp'
         
     | 
| 2 | 
         
            +
            'big ship','img ndarray/0.jpg','output/0.png','2022-04-16 19:37:48.314750'
         
     | 
| 3 | 
         
            +
            'microphone','img ndarray/1.jpg','output/1.png','2022-04-16 21:45:35.413185'
         
     | 
    	
        flagged/output/0.png
    ADDED
    
    
											 
									 | 
									
								
    	
        flagged/output/1.png
    ADDED
    
    
											 
									 | 
									
								
    	
        gradcam/__pycache__/utils.cpython-38.pyc
    ADDED
    
    | 
         Binary file (2.77 kB). View file 
     | 
| 
         | 
    	
        gradcam/app.py
    ADDED
    
    | 
         @@ -0,0 +1,61 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import gradio as gr
         
     | 
| 2 | 
         
            +
            import clip
         
     | 
| 3 | 
         
            +
            import torch
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            import utils
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            clip_model = "RN50x4"
         
     | 
| 8 | 
         
            +
            device = "cuda" if torch.cuda.is_available() else "cpu"
         
     | 
| 9 | 
         
            +
            model, preprocess = clip.load(clip_model, device=device, jit=False)
         
     | 
| 10 | 
         
            +
            model.eval()
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            def grad_cam_fn(text, img, saliency_layer):
         
     | 
| 14 | 
         
            +
                resize = model.visual.input_resolution
         
     | 
| 15 | 
         
            +
                img = img.resize((resize, resize))
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
                text_input = clip.tokenize([text]).to(device)
         
     | 
| 18 | 
         
            +
                text_feature = model.encode_text(text_input).float()
         
     | 
| 19 | 
         
            +
                image_input = preprocess(img).unsqueeze(0).to(device)
         
     | 
| 20 | 
         
            +
             
     | 
| 21 | 
         
            +
                attn_map = utils.gradCAM(
         
     | 
| 22 | 
         
            +
                    model.visual,
         
     | 
| 23 | 
         
            +
                    image_input,
         
     | 
| 24 | 
         
            +
                    text_feature,
         
     | 
| 25 | 
         
            +
                    getattr(model.visual, saliency_layer)
         
     | 
| 26 | 
         
            +
                )
         
     | 
| 27 | 
         
            +
                attn_map = attn_map.squeeze().detach().cpu().numpy()
         
     | 
| 28 | 
         
            +
                attn_map = utils.getAttMap(img, attn_map)
         
     | 
| 29 | 
         
            +
             
     | 
| 30 | 
         
            +
                return attn_map
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
            if __name__ == '__main__':
         
     | 
| 34 | 
         
            +
                interface = gr.Interface(
         
     | 
| 35 | 
         
            +
                    fn=grad_cam_fn,
         
     | 
| 36 | 
         
            +
                    inputs=[
         
     | 
| 37 | 
         
            +
                        gr.inputs.Textbox(
         
     | 
| 38 | 
         
            +
                            label="Target Text",
         
     | 
| 39 | 
         
            +
                            lines=1),
         
     | 
| 40 | 
         
            +
                        gr.inputs.Image(
         
     | 
| 41 | 
         
            +
                            label='Input Image',
         
     | 
| 42 | 
         
            +
                            image_mode="RGB",
         
     | 
| 43 | 
         
            +
                            type='pil',
         
     | 
| 44 | 
         
            +
                            shape=(512, 512)),
         
     | 
| 45 | 
         
            +
                        gr.inputs.Dropdown(
         
     | 
| 46 | 
         
            +
                            ["layer4", "layer3", "layer2", "layer1"],
         
     | 
| 47 | 
         
            +
                            default="layer4",
         
     | 
| 48 | 
         
            +
                            label="Saliency Layer")
         
     | 
| 49 | 
         
            +
                    ],
         
     | 
| 50 | 
         
            +
                    outputs=gr.outputs.Image(
         
     | 
| 51 | 
         
            +
                        type="pil", 
         
     | 
| 52 | 
         
            +
                        label="Attention Map"),
         
     | 
| 53 | 
         
            +
                    examples=[
         
     | 
| 54 | 
         
            +
                        ['a cat lying on the floor', 'assets/cat_dog.jpg', 'layer4'],
         
     | 
| 55 | 
         
            +
                        ['a dog sitting', 'assets/cat_dog.jpg', 'layer4']
         
     | 
| 56 | 
         
            +
                    ],
         
     | 
| 57 | 
         
            +
                    description="OpenAI CLIP Grad CAM")
         
     | 
| 58 | 
         
            +
                interface.launch(
         
     | 
| 59 | 
         
            +
                    server_name='0.0.0.0',
         
     | 
| 60 | 
         
            +
                    server_port=7861,
         
     | 
| 61 | 
         
            +
                    share=False)
         
     | 
    	
        gradcam/utils.py
    ADDED
    
    | 
         @@ -0,0 +1,100 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import numpy as np
         
     | 
| 2 | 
         
            +
            import torch
         
     | 
| 3 | 
         
            +
            import torch.nn as nn
         
     | 
| 4 | 
         
            +
            import torch.nn.functional as F
         
     | 
| 5 | 
         
            +
            import matplotlib.cm
         
     | 
| 6 | 
         
            +
            from PIL import Image
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            class Hook:
         
     | 
| 10 | 
         
            +
                """Attaches to a module and records its activations and gradients."""
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
                def __init__(self, module: nn.Module):
         
     | 
| 13 | 
         
            +
                    self.data = None
         
     | 
| 14 | 
         
            +
                    self.hook = module.register_forward_hook(self.save_grad)
         
     | 
| 15 | 
         
            +
                    
         
     | 
| 16 | 
         
            +
                def save_grad(self, module, input, output):
         
     | 
| 17 | 
         
            +
                    self.data = output
         
     | 
| 18 | 
         
            +
                    output.requires_grad_(True)
         
     | 
| 19 | 
         
            +
                    output.retain_grad()
         
     | 
| 20 | 
         
            +
                    
         
     | 
| 21 | 
         
            +
                def __enter__(self):
         
     | 
| 22 | 
         
            +
                    return self
         
     | 
| 23 | 
         
            +
                
         
     | 
| 24 | 
         
            +
                def __exit__(self, exc_type, exc_value, exc_traceback):
         
     | 
| 25 | 
         
            +
                    self.hook.remove()
         
     | 
| 26 | 
         
            +
                    
         
     | 
| 27 | 
         
            +
                @property
         
     | 
| 28 | 
         
            +
                def activation(self) -> torch.Tensor:
         
     | 
| 29 | 
         
            +
                    return self.data
         
     | 
| 30 | 
         
            +
                
         
     | 
| 31 | 
         
            +
                @property
         
     | 
| 32 | 
         
            +
                def gradient(self) -> torch.Tensor:
         
     | 
| 33 | 
         
            +
                    return self.data.grad
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
            # Reference: https://arxiv.org/abs/1610.02391
         
     | 
| 37 | 
         
            +
            def gradCAM(
         
     | 
| 38 | 
         
            +
                model: nn.Module,
         
     | 
| 39 | 
         
            +
                input: torch.Tensor,
         
     | 
| 40 | 
         
            +
                target: torch.Tensor,
         
     | 
| 41 | 
         
            +
                layer: nn.Module
         
     | 
| 42 | 
         
            +
            ) -> torch.Tensor:
         
     | 
| 43 | 
         
            +
                # Zero out any gradients at the input.
         
     | 
| 44 | 
         
            +
                if input.grad is not None:
         
     | 
| 45 | 
         
            +
                    input.grad.data.zero_()
         
     | 
| 46 | 
         
            +
                    
         
     | 
| 47 | 
         
            +
                # Disable gradient settings.
         
     | 
| 48 | 
         
            +
                requires_grad = {}
         
     | 
| 49 | 
         
            +
                for name, param in model.named_parameters():
         
     | 
| 50 | 
         
            +
                    requires_grad[name] = param.requires_grad
         
     | 
| 51 | 
         
            +
                    param.requires_grad_(False)
         
     | 
| 52 | 
         
            +
                    
         
     | 
| 53 | 
         
            +
                # Attach a hook to the model at the desired layer.
         
     | 
| 54 | 
         
            +
                assert isinstance(layer, nn.Module)
         
     | 
| 55 | 
         
            +
                with Hook(layer) as hook:        
         
     | 
| 56 | 
         
            +
                    # Do a forward and backward pass.
         
     | 
| 57 | 
         
            +
                    output = model(input)
         
     | 
| 58 | 
         
            +
                    output.backward(target)
         
     | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
            +
                    grad = hook.gradient.float()
         
     | 
| 61 | 
         
            +
                    act = hook.activation.float()
         
     | 
| 62 | 
         
            +
                
         
     | 
| 63 | 
         
            +
                    # Global average pool gradient across spatial dimension
         
     | 
| 64 | 
         
            +
                    # to obtain importance weights.
         
     | 
| 65 | 
         
            +
                    alpha = grad.mean(dim=(2, 3), keepdim=True)
         
     | 
| 66 | 
         
            +
                    # Weighted combination of activation maps over channel
         
     | 
| 67 | 
         
            +
                    # dimension.
         
     | 
| 68 | 
         
            +
                    gradcam = torch.sum(act * alpha, dim=1, keepdim=True)
         
     | 
| 69 | 
         
            +
                    # We only want neurons with positive influence so we
         
     | 
| 70 | 
         
            +
                    # clamp any negative ones.
         
     | 
| 71 | 
         
            +
                    gradcam = torch.clamp(gradcam, min=0)
         
     | 
| 72 | 
         
            +
             
     | 
| 73 | 
         
            +
                # Resize gradcam to input resolution.
         
     | 
| 74 | 
         
            +
                gradcam = F.interpolate(
         
     | 
| 75 | 
         
            +
                    gradcam,
         
     | 
| 76 | 
         
            +
                    input.shape[2:],
         
     | 
| 77 | 
         
            +
                    mode='bicubic',
         
     | 
| 78 | 
         
            +
                    align_corners=False)
         
     | 
| 79 | 
         
            +
                
         
     | 
| 80 | 
         
            +
                # Restore gradient settings.
         
     | 
| 81 | 
         
            +
                for name, param in model.named_parameters():
         
     | 
| 82 | 
         
            +
                    param.requires_grad_(requires_grad[name])
         
     | 
| 83 | 
         
            +
                    
         
     | 
| 84 | 
         
            +
                return gradcam
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
            +
             
     | 
| 87 | 
         
            +
            # Modified from: https://github.com/salesforce/ALBEF/blob/main/visualization.ipynb
         
     | 
| 88 | 
         
            +
            def getAttMap(img, attn_map):
         
     | 
| 89 | 
         
            +
                # Normalize attention map
         
     | 
| 90 | 
         
            +
                attn_map = attn_map - attn_map.min()
         
     | 
| 91 | 
         
            +
                if attn_map.max() > 0:
         
     | 
| 92 | 
         
            +
                    attn_map = attn_map / attn_map.max()
         
     | 
| 93 | 
         
            +
             
     | 
| 94 | 
         
            +
                H = matplotlib.cm.jet(attn_map)
         
     | 
| 95 | 
         
            +
                H = (H * 255).astype(np.uint8)[:, :, :3]
         
     | 
| 96 | 
         
            +
                img_heatmap = Image.fromarray(H)
         
     | 
| 97 | 
         
            +
                img_heatmap = img_heatmap.resize((256, 256))
         
     | 
| 98 | 
         
            +
                
         
     | 
| 99 | 
         
            +
                return Image.blend(
         
     | 
| 100 | 
         
            +
                    img.resize((256, 256)), img_heatmap, 0.4)
         
     | 
    	
        requirements.txt
    ADDED
    
    | 
         @@ -0,0 +1,6 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            gradio>=2.9.0,<2.10.0
         
     | 
| 2 | 
         
            +
            torch>=1.10.0,<1.11.0
         
     | 
| 3 | 
         
            +
            git+https://github.com/openai/CLIP.git
         
     | 
| 4 | 
         
            +
            Pillow
         
     | 
| 5 | 
         
            +
            matplotlib
         
     | 
| 6 | 
         
            +
            numpy
         
     |