Spaces:

sergiopaniego
/

DETR_object_detection

Sleeping

App Files Files Community

sergiopaniego HF Staff commited on Jun 18, 2024

Commit

153ddd7

1 Parent(s): bfd905c

Added attention

Browse files

Files changed (1) hide show

app.py +58 -3

app.py CHANGED Viewed

@@ -32,6 +32,54 @@ def get_output_figure(pil_img, scores, labels, boxes):
     return plt.gcf()
 @spaces.GPU
 def detect(image):
@@ -46,7 +94,7 @@ def detect(image):
     postprocessed_outputs = processor.post_process_object_detection(outputs, target_sizes=[(height, width)], threshold=0.9)
     results = postprocessed_outputs[0]
-    print(results)
     output_figure = get_output_figure(image, results['scores'], results['labels'], results['boxes'])
@@ -55,9 +103,16 @@ def detect(image):
     buf.seek(0)
     output_pil_img = Image.open(buf)
-    print(output_pil_img)
-    return output_pil_img, output_pil_img
 demo = gr.Interface(
     fn=detect,

     return plt.gcf()
+def get_output_attn_figure(image, encoding, results):
+    # keep only predictions of queries with +0.9 condifence (excluding no-object class)
+    probas = outputs.logits.softmax(-1)[0, :, :-1]
+    keep = probas.max(-1).values > 0.9
+    bboxes_scaled = results['boxes']
+    # use lists to store the outputs vis up-values
+    conv_features = []
+    hooks = [
+        model.model.backbone.conv_encoder.register_forward_hook(
+            lambda self, input, output: conv_features.append(output)
+        )
+    ]
+    # propagate through the model
+    outputs = model(**encoding, output_attentions=True)
+    for hook in hooks:
+        hook.remove()
+    # don't need the list anymore
+    conv_features = conv_features[0]
+    # get cross-attentions weights of last decoder layer - which is of shape (batch_size, num_heads, num_queries, width*height)
+    dec_attn_weights = outputs.cross_attentions[-1]
+    #average them over the 8 heads and detach from graph
+    dec_attn_weights = torch.mean(dec_attn_weights, dim=1).detach()
+    # get the feature map shape
+    h, w = conv_features[-1][0].shape[-2:]
+    fig, axs = plt.subplots(ncols=len(bboxes_scaled), nrows=2, figsize=(22, 7))
+    colors = COLORS * 100
+    for idx, ax_i, box in zip(keep.nonzero(), axs.T, bboxes_scaled):
+        xmin, ymin, xmax, ymax = box.detach().numpy()
+        ax = ax_i[0]
+        ax.imshow(dec_attn_weights[0, idx].view(h, w))
+        ax.axis('off')
+        ax.set_title(f'query id: {idx.item()}')
+        ax = ax_i[1]
+        ax.imshow(image)
+        ax.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax - ymin, fill=False,
+                        color='blue', linewidth=3))
+        ax.axis('off')
+        ax.set_title(model.config.id2label[probas[idx].argmax().item()])
+    fig.tight_layout()
+    return plt.gcf()
 @spaces.GPU
 def detect(image):
     postprocessed_outputs = processor.post_process_object_detection(outputs, target_sizes=[(height, width)], threshold=0.9)
     results = postprocessed_outputs[0]
+    #print(results)
     output_figure = get_output_figure(image, results['scores'], results['labels'], results['boxes'])
     buf.seek(0)
     output_pil_img = Image.open(buf)
+    output_figure_attn = get_output_attn_figure(image, encoding, results)
+    buf = io.BytesIO()
+    output_figure_attn.savefig(buf, bbox_inches='tight')
+    buf.seek(0)
+    output_pil_img_attn = Image.open(buf)
+    #print(output_pil_img)
+    return output_pil_img, output_pil_img_attn
 demo = gr.Interface(
     fn=detect,