Spaces:

oxkitsune
/

rerun-example-zero-gpu

Running on Zero

App Files Files Community

oxkitsune commited on Apr 14

Commit

b4a4fd5

1 Parent(s): 57aeafc

add detr model

Browse files

Files changed (2) hide show

app.py +27 -24
requirements.txt +10 -2

app.py CHANGED Viewed

@@ -11,6 +11,13 @@ import tempfile
 import time
 import uuid
 import cv2
 import gradio as gr
 import rerun as rr
@@ -22,13 +29,16 @@ from gradio_rerun.events import (
     TimeUpdate,
 )
 import spaces
-import subprocess
-subprocess.run(
-    "pip install gradio_rerun-0.23.0a2-py3-none-any.whl",
-    shell=True,
-)
 # Whenever we need a recording, we construct a new recording stream.
 # As long as the app and recording IDs remain the same, the data
@@ -42,6 +52,7 @@ def get_recording(recording_id: str) -> rr.RecordingStream:
 #
 # This is the preferred way to work with Rerun in Gradio since your data can be immediately and
 # incrementally seen by the viewer. Also, there are no ephemeral RRDs to cleanup or manage.
 def streaming_repeated_blur(recording_id: str, img):
     # Here we get a recording using the provided recording id.
     rec = get_recording(recording_id)
@@ -52,30 +63,27 @@ def streaming_repeated_blur(recording_id: str, img):
     blueprint = rrb.Blueprint(
         rrb.Horizontal(
-            rrb.Spatial2DView(origin="image/original"),
-            rrb.Spatial2DView(origin="image/blurred"),
         ),
         collapse_panels=True,
     )
     rec.send_blueprint(blueprint)
     rec.set_time("iteration", sequence=0)
-    rec.log("image/original", rr.Image(img))
     yield stream.read()
-    blur = img
-    for i in range(100):
-        rec.set_time("iteration", sequence=i)
-        # Pretend blurring takes a while so we can see streaming in action.
-        time.sleep(0.1)
-        blur = cv2.GaussianBlur(blur, (5, 5), 0)
-        rec.log("image/blurred", rr.Image(blur))
-        # Each time we yield bytes from the stream back to Gradio, they
-        # are incrementally sent to the viewer. Make sure to yield any time
-        # you want the user to be able to see progress.
-        yield stream.read()
     # Ensure we consume everything from the recording.
     stream.flush()
@@ -162,11 +170,6 @@ def track_current_time(evt: TimeUpdate):
 def track_current_timeline_and_time(evt: TimelineChange):
     return evt.timeline, evt.time
-@spaces.GPU
-def run_inference(img):
-    print("running inference")
-    pass
 with gr.Blocks() as demo:
     with gr.Row():
         img = gr.Image(interactive=True, label="Image")

 import time
 import uuid
+import subprocess
+subprocess.run(
+    "pip install gradio_rerun-0.23.0a2.tar.gz",
+    shell=True,
+)
 import cv2
 import gradio as gr
 import rerun as rr
     TimeUpdate,
 )
 import spaces
+from transformers import DetrImageProcessor, DetrForObjectDetection
+import torch
+from PIL import Image
+import requests
+url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+image = Image.open(requests.get(url, stream=True).raw)
+processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
+model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
 # Whenever we need a recording, we construct a new recording stream.
 # As long as the app and recording IDs remain the same, the data
 #
 # This is the preferred way to work with Rerun in Gradio since your data can be immediately and
 # incrementally seen by the viewer. Also, there are no ephemeral RRDs to cleanup or manage.
+@spaces.GPU
 def streaming_repeated_blur(recording_id: str, img):
     # Here we get a recording using the provided recording id.
     rec = get_recording(recording_id)
     blueprint = rrb.Blueprint(
         rrb.Horizontal(
+            rrb.Spatial2DView(origin="image"),
         ),
         collapse_panels=True,
     )
     rec.send_blueprint(blueprint)
     rec.set_time("iteration", sequence=0)
+    rec.log("image", rr.Image(img))
     yield stream.read()
+    inputs = processor(images=image, return_tensors="pt")
+    outputs = model(**inputs)
+    # convert outputs (bounding boxes and class logits) to COCO API
+    # let's only keep detections with score > 0.9
+    target_sizes = torch.tensor([image.size[::-1]])
+    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
+    print(results)
+    rec.log("image/objects", rr.Boxes2D(sizes=results["boxes"], labels=[model.config.id2label[label.item()] for label in results["labels"]]))
     # Ensure we consume everything from the recording.
     stream.flush()
 def track_current_timeline_and_time(evt: TimelineChange):
     return evt.timeline, evt.time
 with gr.Blocks() as demo:
     with gr.Row():
         img = gr.Image(interactive=True, label="Image")

requirements.txt CHANGED Viewed

@@ -13,6 +13,7 @@ cffi==1.17.1
 charset-normalizer==3.4.1
 click==8.1.8
 cryptography==44.0.2
 decorator==5.2.1
 dill==0.3.8
 exceptiongroup==1.2.2
@@ -53,6 +54,7 @@ nvidia-cufft-cu12==11.2.1.3
 nvidia-curand-cu12==10.3.5.147
 nvidia-cusolver-cu12==11.6.1.9
 nvidia-cusparse-cu12==12.3.1.170
 nvidia-nccl-cu12==2.21.5
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.4.127
@@ -79,11 +81,13 @@ python-dateutil==2.9.0.post0
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.2
 requests==2.32.3
 rerun-sdk==0.23.0a2
 rich==14.0.0
 ruff==0.11.5
 safehttpx==0.1.6
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
@@ -92,11 +96,15 @@ spaces==0.34.2
 stack-data==0.6.3
 starlette==0.46.2
 sympy==1.13.1
 tomlkit==0.13.2
-torch==2.5.1
 tqdm==4.67.1
 traitlets==5.14.3
-triton==3.1.0
 typer==0.15.2
 typing-inspection==0.4.0
 typing_extensions==4.13.2

 charset-normalizer==3.4.1
 click==8.1.8
 cryptography==44.0.2
+datasets==3.5.0
 decorator==5.2.1
 dill==0.3.8
 exceptiongroup==1.2.2
 nvidia-curand-cu12==10.3.5.147
 nvidia-cusolver-cu12==11.6.1.9
 nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusparselt-cu12==0.6.2
 nvidia-nccl-cu12==2.21.5
 nvidia-nvjitlink-cu12==12.4.127
 nvidia-nvtx-cu12==12.4.127
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.2
+regex==2024.11.6
 requests==2.32.3
 rerun-sdk==0.23.0a2
 rich==14.0.0
 ruff==0.11.5
 safehttpx==0.1.6
+safetensors==0.5.3
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 stack-data==0.6.3
 starlette==0.46.2
 sympy==1.13.1
+timm==1.0.15
+tokenizers==0.21.1
 tomlkit==0.13.2
+torch==2.6.0
+torchvision==0.21.0
 tqdm==4.67.1
 traitlets==5.14.3
+transformers==4.51.3
+triton==3.2.0
 typer==0.15.2
 typing-inspection==0.4.0
 typing_extensions==4.13.2