File size: 4,566 Bytes
ecf8904
 
 
 
 
 
 
 
 
 
 
 
 
b4a4fd5
79c4fdc
b4a4fd5
 
 
 
 
 
ecf8904
b21a094
ecf8904
 
 
 
 
 
 
 
880ac53
b4a4fd5
 
 
 
ecf8904
57aeafc
b4a4fd5
 
ecf8904
79c4fdc
ecf8904
 
 
 
79c4fdc
 
 
ecf8904
 
 
 
 
 
 
b4a4fd5
79c4fdc
ecf8904
 
 
 
 
 
 
 
 
b4a4fd5
ecf8904
 
 
 
 
 
b4a4fd5
ecf8904
 
79c4fdc
 
 
ecf8904
b4a4fd5
 
ecf8904
1d91ad9
 
79c4fdc
 
 
ecf8904
79c4fdc
 
 
 
 
 
 
1d91ad9
 
 
 
 
 
 
 
79c4fdc
 
ecf8904
 
 
 
 
 
 
 
1644fb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecf8904
9009886
 
 
 
 
 
 
 
 
79c4fdc
9009886
79c4fdc
9009886
 
 
2a7707c
79c4fdc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
Demonstrates integrating Rerun visualization with Gradio.

Provides example implementations of data streaming, keypoint annotation, and dynamic
visualization across multiple Gradio tabs using Rerun's recording and visualization capabilities.
"""

import math
import os
import tempfile
import time
import uuid

import subprocess

subprocess.run(
    "pip install gradio_rerun-0.23.0a2.tar.gz",
    shell=True,
)


import cv2
import gradio as gr
import rerun as rr
import rerun.blueprint as rrb
from gradio_rerun import Rerun
from gradio_rerun.events import (
    SelectionChange,
    TimelineChange,
    TimeUpdate,
)
import spaces
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests


processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")


# Whenever we need a recording, we construct a new recording stream.
# As long as the app and recording IDs remain the same, the data
# will be merged by the Viewer.
def get_recording(recording_id: str) -> rr.RecordingStream:
    return rr.RecordingStream(
        application_id="rerun_example_gradio", recording_id=recording_id
    )


# A task can directly log to a binary stream, which is routed to the embedded viewer.
# Incremental chunks are yielded to the viewer using `yield stream.read()`.
#
# This is the preferred way to work with Rerun in Gradio since your data can be immediately and
# incrementally seen by the viewer. Also, there are no ephemeral RRDs to cleanup or manage.
@spaces.GPU
def streaming_object_detection(recording_id: str, img):
    # Here we get a recording using the provided recording id.
    rec = get_recording(recording_id)
    stream = rec.binary_stream()

    if img is None:
        raise gr.Error("Must provide an image to blur.")

    blueprint = rrb.Blueprint(
        rrb.Horizontal(
            rrb.Spatial2DView(origin="image"),
        ),
        collapse_panels=True,
    )

    rec.send_blueprint(blueprint)
    rec.set_time("iteration", sequence=0)
    rec.log("image", rr.Image(img))
    yield stream.read()

    with torch.inference_mode():
        inputs = processor(images=img, return_tensors="pt")
        outputs = model(**inputs)

    # convert outputs (bounding boxes and class logits) to COCO API
    # let's only keep detections with score > 0.9

    height, width = img.shape[:2]
    target_sizes = torch.tensor([[height, width]])  # [height, width] order
    results = processor.post_process_object_detection(
        outputs, target_sizes=target_sizes, threshold=0.9
    )[0]

    print(results)
    rec.log(
        "image/objects",
        rr.Boxes2D(
            array=results["boxes"],
            array_format=rr.Box2DFormat.XYXY,
            labels=[model.config.id2label[label.item()] for label in results["labels"]],
            colors=[
                (
                    label.item() * 50 % 255,
                    (label.item() * 80 + 40) % 255,
                    (label.item() * 120 + 100) % 255,
                )
                for label in results["labels"]
            ],
        ),
    )

    # Ensure we consume everything from the recording.
    stream.flush()
    yield stream.read()


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Accordion("Your image", open=True):
                img = gr.Image(interactive=True, label="Image")
                detect_objects = gr.Button("Detect objects")

        with gr.Column(scale=4):
            viewer = Rerun(
                streaming=True,
                panel_states={
                    "time": "collapsed",
                    "blueprint": "hidden",
                    "selection": "hidden",
                },
                height=700,
            )

    # We make a new recording id, and store it in a Gradio's session state.
    recording_id = gr.State(uuid.uuid4())

    # Also store the current timeline and time of the viewer in the session state.
    current_timeline = gr.State("")
    current_time = gr.State(0.0)

    # When registering the event listeners, we pass the `recording_id` in as input in order to create
    # a recording stream using that id.
    detect_objects.click(
        # Using the `viewer` as an output allows us to stream data to it by yielding bytes from the callback.
        streaming_object_detection,
        inputs=[recording_id, img],
        outputs=[viewer],
    )
if __name__ == "__main__":
    demo.launch(ssr_mode=False)